1 //===------ CodeGeneration.cpp - Code generate the Scops. -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using Cloog. 12 // 13 // The Scop describes the high level memory behaviour of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. Cloog is used to generate an abstract syntax tree (clast) that 16 // reflects the updated execution order. This clast is used to create new 17 // LLVM-IR that is computational equivalent to the original control flow region, 18 // but executes its code in the new execution order defined by the changed 19 // scattering. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "polly-codegen" 24 25 #include "polly/Cloog.h" 26 #include "polly/CodeGeneration.h" 27 #include "polly/Dependences.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/ScopInfo.h" 30 #include "polly/TempScopInfo.h" 31 #include "polly/Support/GICHelper.h" 32 #include "polly/LoopGenerators.h" 33 34 #include "llvm/Module.h" 35 #include "llvm/ADT/SetVector.h" 36 #include "llvm/ADT/PostOrderIterator.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/ScalarEvolutionExpander.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/IRBuilder.h" 42 #include "llvm/Target/TargetData.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 45 #define CLOOG_INT_GMP 1 46 #include "cloog/cloog.h" 47 #include "cloog/isl/cloog.h" 48 49 #include "isl/aff.h" 50 51 #include <vector> 52 #include <utility> 53 54 using namespace polly; 55 using namespace llvm; 56 57 struct isl_set; 58 59 namespace polly { 60 61 bool EnablePollyVector; 62 63 static cl::opt<bool, true> 64 Vector("enable-polly-vector", 65 cl::desc("Enable polly vector code generation"), cl::Hidden, 66 cl::location(EnablePollyVector), cl::init(false), cl::ZeroOrMore); 67 68 static cl::opt<bool> 69 OpenMP("enable-polly-openmp", 70 cl::desc("Generate OpenMP parallel code"), cl::Hidden, 71 cl::value_desc("OpenMP code generation enabled if true"), 72 cl::init(false), cl::ZeroOrMore); 73 74 static cl::opt<bool> 75 AtLeastOnce("enable-polly-atLeastOnce", 76 cl::desc("Give polly the hint, that every loop is executed at least" 77 "once"), cl::Hidden, 78 cl::value_desc("OpenMP code generation enabled if true"), 79 cl::init(false), cl::ZeroOrMore); 80 81 static cl::opt<bool> 82 Aligned("enable-polly-aligned", 83 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 84 cl::value_desc("OpenMP code generation enabled if true"), 85 cl::init(false), cl::ZeroOrMore); 86 87 static cl::opt<bool> 88 GroupedUnrolling("enable-polly-grouped-unroll", 89 cl::desc("Perform grouped unrolling, but don't generate SIMD " 90 "instuctions"), cl::Hidden, cl::init(false), 91 cl::ZeroOrMore); 92 93 typedef DenseMap<const Value*, Value*> ValueMapT; 94 typedef DenseMap<const char*, Value*> CharMapT; 95 typedef std::vector<ValueMapT> VectorValueMapT; 96 97 class IslGenerator { 98 public: 99 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 100 Builder(Builder), IVS(IVS) {} 101 Value *generateIslInt(__isl_take isl_int Int); 102 Value *generateIslAff(__isl_take isl_aff *Aff); 103 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 104 105 private: 106 typedef struct { 107 Value *Result; 108 class IslGenerator *Generator; 109 } IslGenInfo; 110 111 IRBuilder<> &Builder; 112 std::vector<Value *> &IVS; 113 static int mergeIslAffValues(__isl_take isl_set *Set, 114 __isl_take isl_aff *Aff, void *User); 115 }; 116 117 Value *IslGenerator::generateIslInt(isl_int Int) { 118 mpz_t IntMPZ; 119 mpz_init(IntMPZ); 120 isl_int_get_gmp(Int, IntMPZ); 121 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 122 mpz_clear(IntMPZ); 123 return IntValue; 124 } 125 126 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 127 Value *Result; 128 Value *ConstValue; 129 isl_int ConstIsl; 130 131 isl_int_init(ConstIsl); 132 isl_aff_get_constant(Aff, &ConstIsl); 133 ConstValue = generateIslInt(ConstIsl); 134 Type *Ty = Builder.getInt64Ty(); 135 136 // FIXME: We should give the constant and coefficients the right type. Here 137 // we force it into i64. 138 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 139 140 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 141 142 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 143 "must match the dimension of the affine space."); 144 145 isl_int CoefficientIsl; 146 isl_int_init(CoefficientIsl); 147 148 for (unsigned int i = 0; i < NbInputDims; ++i) { 149 Value *CoefficientValue; 150 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 151 152 if (isl_int_is_zero(CoefficientIsl)) 153 continue; 154 155 CoefficientValue = generateIslInt(CoefficientIsl); 156 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 157 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 158 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 159 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 160 } 161 162 isl_int_clear(CoefficientIsl); 163 isl_int_clear(ConstIsl); 164 isl_aff_free(Aff); 165 166 return Result; 167 } 168 169 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 170 __isl_take isl_aff *Aff, void *User) { 171 IslGenInfo *GenInfo = (IslGenInfo *)User; 172 173 assert((GenInfo->Result == NULL) && "Result is already set." 174 "Currently only single isl_aff is supported"); 175 assert(isl_set_plain_is_universe(Set) 176 && "Code generation failed because the set is not universe"); 177 178 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 179 180 isl_set_free(Set); 181 return 0; 182 } 183 184 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 185 IslGenInfo User; 186 User.Result = NULL; 187 User.Generator = this; 188 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 189 assert(User.Result && "Code generation for isl_pw_aff failed"); 190 191 isl_pw_aff_free(PwAff); 192 return User.Result; 193 } 194 195 /// @brief Generate a new basic block for a polyhedral statement. 196 /// 197 /// The only public function exposed is generate(). 198 class BlockGenerator { 199 public: 200 /// @brief Generate a new BasicBlock for a ScopStmt. 201 /// 202 /// @param Builder The LLVM-IR Builder used to generate the statement. The 203 /// code is generated at the location, the Builder points to. 204 /// @param Stmt The statement to code generate. 205 /// @param GlobalMap A map that defines for certain Values referenced from the 206 /// original code new Values they should be replaced with. 207 /// @param P A reference to the pass this function is called from. 208 /// The pass is needed to update other analysis. 209 static void generate(IRBuilder<> &Builder, ScopStmt &Stmt, 210 ValueMapT &GlobalMap, Pass *P) { 211 BlockGenerator Generator(Builder, Stmt, P); 212 Generator.copyBB(GlobalMap); 213 } 214 215 protected: 216 IRBuilder<> &Builder; 217 ScopStmt &Statement; 218 Pass *P; 219 220 BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P); 221 222 /// @brief Get the new version of a Value. 223 /// 224 /// @param Old The old Value. 225 /// @param BBMap A mapping from old values to their new values 226 /// (for values recalculated within this basic block). 227 /// @param GlobalMap A mapping from old values to their new values 228 /// (for values recalculated in the new ScoP, but not 229 /// within this basic block). 230 /// 231 /// @returns o The old value, if it is still valid. 232 /// o The new value, if available. 233 /// o NULL, if no value is found. 234 Value *getNewValue(const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap); 235 236 void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 237 ValueMapT &GlobalMap); 238 239 /// @brief Get the memory access offset to be added to the base address 240 std::vector<Value*> getMemoryAccessIndex(__isl_keep isl_map *AccessRelation, 241 Value *BaseAddress, ValueMapT &BBMap, 242 ValueMapT &GlobalMap); 243 244 /// @brief Get the new operand address according to the changed access in 245 /// JSCOP file. 246 Value *getNewAccessOperand(__isl_keep isl_map *NewAccessRelation, 247 Value *BaseAddress, ValueMapT &BBMap, 248 ValueMapT &GlobalMap); 249 250 /// @brief Generate the operand address 251 Value *generateLocationAccessed(const Instruction *Inst, 252 const Value *Pointer, ValueMapT &BBMap, 253 ValueMapT &GlobalMap); 254 255 Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap, 256 ValueMapT &GlobalMap); 257 258 Value *generateScalarStore(const StoreInst *store, ValueMapT &BBMap, 259 ValueMapT &GlobalMap); 260 261 /// @brief Copy a single Instruction. 262 /// 263 /// This copies a single Instruction and updates references to old values 264 /// with references to new values, as defined by GlobalMap and BBMap. 265 /// 266 /// @param BBMap A mapping from old values to their new values 267 /// (for values recalculated within this basic block). 268 /// @param GlobalMap A mapping from old values to their new values 269 /// (for values recalculated in the new ScoP, but not 270 /// within this basic block). 271 void copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 272 ValueMapT &GlobalMap); 273 274 /// @brief Copy the basic block. 275 /// 276 /// This copies the entire basic block and updates references to old values 277 /// with references to new values, as defined by GlobalMap. 278 /// 279 /// @param GlobalMap A mapping from old values to their new values 280 /// (for values recalculated in the new ScoP, but not 281 /// within this basic block). 282 void copyBB(ValueMapT &GlobalMap); 283 }; 284 285 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 286 Builder(B), Statement(Stmt), P(P) {} 287 288 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 289 ValueMapT &GlobalMap) { 290 // We assume constants never change. 291 // This avoids map lookups for many calls to this function. 292 if (isa<Constant>(Old)) 293 return const_cast<Value*>(Old); 294 295 if (GlobalMap.count(Old)) { 296 Value *New = GlobalMap[Old]; 297 298 if (Old->getType()->getScalarSizeInBits() 299 < New->getType()->getScalarSizeInBits()) 300 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 301 302 return New; 303 } 304 305 if (BBMap.count(Old)) { 306 return BBMap[Old]; 307 } 308 309 // 'Old' is within the original SCoP, but was not rewritten. 310 // 311 // Such values appear, if they only calculate information already available in 312 // the polyhedral description (e.g. an induction variable increment). They 313 // can be safely ignored. 314 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 315 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 316 return NULL; 317 318 // Everything else is probably a scop-constant value defined as global, 319 // function parameter or an instruction not within the scop. 320 return const_cast<Value*>(Old); 321 } 322 323 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 324 ValueMapT &GlobalMap) { 325 Instruction *NewInst = Inst->clone(); 326 327 // Replace old operands with the new ones. 328 for (Instruction::const_op_iterator OI = Inst->op_begin(), 329 OE = Inst->op_end(); OI != OE; ++OI) { 330 Value *OldOperand = *OI; 331 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 332 333 if (!NewOperand) { 334 assert(!isa<StoreInst>(NewInst) 335 && "Store instructions are always needed!"); 336 delete NewInst; 337 return; 338 } 339 340 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 341 } 342 343 Builder.Insert(NewInst); 344 BBMap[Inst] = NewInst; 345 346 if (!NewInst->getType()->isVoidTy()) 347 NewInst->setName("p_" + Inst->getName()); 348 } 349 350 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 351 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 352 ValueMapT &BBMap, ValueMapT &GlobalMap) { 353 354 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) 355 && "Only single dimensional access functions supported"); 356 357 std::vector<Value *> IVS; 358 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 359 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 360 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 361 IVS.push_back(NewIV); 362 } 363 364 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 365 IslGenerator IslGen(Builder, IVS); 366 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 367 368 Type *Ty = Builder.getInt64Ty(); 369 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 370 371 std::vector<Value*> IndexArray; 372 Value *NullValue = Constant::getNullValue(Ty); 373 IndexArray.push_back(NullValue); 374 IndexArray.push_back(OffsetValue); 375 return IndexArray; 376 } 377 378 Value *BlockGenerator::getNewAccessOperand( 379 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, 380 ValueMapT &BBMap, ValueMapT &GlobalMap) { 381 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 382 BaseAddress, 383 BBMap, GlobalMap); 384 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 385 "p_newarrayidx_"); 386 return NewOperand; 387 } 388 389 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 390 const Value *Pointer, 391 ValueMapT &BBMap, 392 ValueMapT &GlobalMap) { 393 MemoryAccess &Access = Statement.getAccessFor(Inst); 394 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 395 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 396 397 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) 398 && "Current and new access function use different spaces"); 399 400 Value *NewPointer; 401 402 if (!NewAccessRelation) { 403 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 404 } else { 405 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 406 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, 407 BBMap, GlobalMap); 408 } 409 410 isl_map_free(CurrentAccessRelation); 411 isl_map_free(NewAccessRelation); 412 return NewPointer; 413 } 414 415 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 416 ValueMapT &BBMap, 417 ValueMapT &GlobalMap) { 418 const Value *Pointer = Load->getPointerOperand(); 419 const Instruction *Inst = dyn_cast<Instruction>(Load); 420 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 421 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 422 Load->getName() + "_p_scalar_"); 423 return ScalarLoad; 424 } 425 426 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 427 ValueMapT &BBMap, 428 ValueMapT &GlobalMap) { 429 const Value *Pointer = Store->getPointerOperand(); 430 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 431 GlobalMap); 432 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 433 434 return Builder.CreateStore(ValueOperand, NewPointer); 435 } 436 437 void BlockGenerator::copyInstruction(const Instruction *Inst, 438 ValueMapT &BBMap, ValueMapT &GlobalMap) { 439 // Terminator instructions control the control flow. They are explicitly 440 // expressed in the clast and do not need to be copied. 441 if (Inst->isTerminator()) 442 return; 443 444 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 445 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 446 return; 447 } 448 449 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 450 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 451 return; 452 } 453 454 copyInstScalar(Inst, BBMap, GlobalMap); 455 } 456 457 458 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 459 BasicBlock *BB = Statement.getBasicBlock(); 460 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 461 Builder.GetInsertPoint(), P); 462 CopyBB->setName("polly.stmt." + BB->getName()); 463 Builder.SetInsertPoint(CopyBB->begin()); 464 465 ValueMapT BBMap; 466 467 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 468 ++II) 469 copyInstruction(II, BBMap, GlobalMap); 470 } 471 472 /// @brief Generate a new vector basic block for a polyhedral statement. 473 /// 474 /// The only public function exposed is generate(). 475 class VectorBlockGenerator : BlockGenerator { 476 public: 477 /// @brief Generate a new vector basic block for a ScoPStmt. 478 /// 479 /// This code generation is similar to the normal, scalar code generation, 480 /// except that each instruction is code generated for several vector lanes 481 /// at a time. If possible instructions are issued as actual vector 482 /// instructions, but e.g. for address calculation instructions we currently 483 /// generate scalar instructions for each vector lane. 484 /// 485 /// @param Builder The LLVM-IR Builder used to generate the statement. The 486 /// code is generated at the location, the builder points 487 /// to. 488 /// @param Stmt The statement to code generate. 489 /// @param GlobalMaps A vector of maps that define for certain Values 490 /// referenced from the original code new Values they should 491 /// be replaced with. Each map in the vector of maps is 492 /// used for one vector lane. The number of elements in the 493 /// vector defines the width of the generated vector 494 /// instructions. 495 /// @param P A reference to the pass this function is called from. 496 /// The pass is needed to update other analysis. 497 static void generate(IRBuilder<> &B, ScopStmt &Stmt, 498 VectorValueMapT &GlobalMaps, __isl_keep isl_set *Domain, 499 Pass *P) { 500 VectorBlockGenerator Generator(B, GlobalMaps, Stmt, Domain, P); 501 Generator.copyBB(); 502 } 503 504 private: 505 // This is a vector of global value maps. The first map is used for the first 506 // vector lane, ... 507 // Each map, contains information about Instructions in the old ScoP, which 508 // are recalculated in the new SCoP. When copying the basic block, we replace 509 // all referenes to the old instructions with their recalculated values. 510 VectorValueMapT &GlobalMaps; 511 512 isl_set *Domain; 513 514 VectorBlockGenerator(IRBuilder<> &B, VectorValueMapT &GlobalMaps, 515 ScopStmt &Stmt, __isl_keep isl_set *Domain, Pass *P); 516 517 int getVectorWidth(); 518 519 Value *getVectorValue(const Value *Old, ValueMapT &VectorMap, 520 VectorValueMapT &ScalarMaps); 521 522 Type *getVectorPtrTy(const Value *V, int Width); 523 524 /// @brief Load a vector from a set of adjacent scalars 525 /// 526 /// In case a set of scalars is known to be next to each other in memory, 527 /// create a vector load that loads those scalars 528 /// 529 /// %vector_ptr= bitcast double* %p to <4 x double>* 530 /// %vec_full = load <4 x double>* %vector_ptr 531 /// 532 Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap); 533 534 /// @brief Load a vector initialized from a single scalar in memory 535 /// 536 /// In case all elements of a vector are initialized to the same 537 /// scalar value, this value is loaded and shuffeled into all elements 538 /// of the vector. 539 /// 540 /// %splat_one = load <1 x double>* %p 541 /// %splat = shufflevector <1 x double> %splat_one, <1 x 542 /// double> %splat_one, <4 x i32> zeroinitializer 543 /// 544 Value *generateStrideZeroLoad(const LoadInst *Load, ValueMapT &BBMap); 545 546 /// @Load a vector from scalars distributed in memory 547 /// 548 /// In case some scalars a distributed randomly in memory. Create a vector 549 /// by loading each scalar and by inserting one after the other into the 550 /// vector. 551 /// 552 /// %scalar_1= load double* %p_1 553 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 554 /// %scalar 2 = load double* %p_2 555 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 556 /// 557 Value *generateUnknownStrideLoad(const LoadInst *Load, 558 VectorValueMapT &ScalarMaps); 559 560 void generateLoad(const LoadInst *Load, ValueMapT &VectorMap, 561 VectorValueMapT &ScalarMaps); 562 563 void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &VectorMap, 564 VectorValueMapT &ScalarMaps); 565 566 void copyBinaryInst(const BinaryOperator *Inst, ValueMapT &VectorMap, 567 VectorValueMapT &ScalarMaps); 568 569 void copyStore(const StoreInst *Store, ValueMapT &VectorMap, 570 VectorValueMapT &ScalarMaps); 571 572 void copyInstScalarized(const Instruction *Inst, ValueMapT &VectorMap, 573 VectorValueMapT &ScalarMaps); 574 575 bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap, 576 VectorValueMapT &ScalarMaps); 577 578 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); 579 580 void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap, 581 VectorValueMapT &ScalarMaps); 582 583 void copyBB(); 584 }; 585 586 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 587 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain, 588 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), 589 Domain(Domain) { 590 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 591 assert(Domain && "No statement domain provided"); 592 } 593 594 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 595 ValueMapT &VectorMap, 596 VectorValueMapT &ScalarMaps) { 597 if (VectorMap.count(Old)) 598 return VectorMap[Old]; 599 600 int Width = getVectorWidth(); 601 602 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 603 604 for (int Lane = 0; Lane < Width; Lane++) 605 Vector = Builder.CreateInsertElement(Vector, 606 getNewValue(Old, 607 ScalarMaps[Lane], 608 GlobalMaps[Lane]), 609 Builder.getInt32(Lane)); 610 611 VectorMap[Old] = Vector; 612 613 return Vector; 614 } 615 616 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 617 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 618 assert(PointerTy && "PointerType expected"); 619 620 Type *ScalarType = PointerTy->getElementType(); 621 VectorType *VectorType = VectorType::get(ScalarType, Width); 622 623 return PointerType::getUnqual(VectorType); 624 } 625 626 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 627 ValueMapT &BBMap) { 628 const Value *Pointer = Load->getPointerOperand(); 629 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 630 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 631 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 632 "vector_ptr"); 633 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 634 Load->getName() + "_p_vec_full"); 635 if (!Aligned) 636 VecLoad->setAlignment(8); 637 638 return VecLoad; 639 } 640 641 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 642 ValueMapT &BBMap) { 643 const Value *Pointer = Load->getPointerOperand(); 644 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 645 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 646 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 647 Load->getName() + "_p_vec_p"); 648 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 649 Load->getName() + "_p_splat_one"); 650 651 if (!Aligned) 652 ScalarLoad->setAlignment(8); 653 654 Constant *SplatVector = 655 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 656 getVectorWidth())); 657 658 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 659 SplatVector, 660 Load->getName() 661 + "_p_splat"); 662 return VectorLoad; 663 } 664 665 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 666 VectorValueMapT &ScalarMaps) { 667 int VectorWidth = getVectorWidth(); 668 const Value *Pointer = Load->getPointerOperand(); 669 VectorType *VectorType = VectorType::get( 670 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 671 672 Value *Vector = UndefValue::get(VectorType); 673 674 for (int i = 0; i < VectorWidth; i++) { 675 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 676 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 677 Load->getName() + "_p_scalar_"); 678 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 679 Builder.getInt32(i), 680 Load->getName() + "_p_vec_"); 681 } 682 683 return Vector; 684 } 685 686 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 687 ValueMapT &VectorMap, 688 VectorValueMapT &ScalarMaps) { 689 if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) { 690 for (int i = 0; i < getVectorWidth(); i++) 691 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], 692 GlobalMaps[i]); 693 return; 694 } 695 696 MemoryAccess &Access = Statement.getAccessFor(Load); 697 698 Value *NewLoad; 699 if (Access.isStrideZero(isl_set_copy(Domain))) 700 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 701 else if (Access.isStrideOne(isl_set_copy(Domain))) 702 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 703 else 704 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 705 706 VectorMap[Load] = NewLoad; 707 } 708 709 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 710 ValueMapT &VectorMap, 711 VectorValueMapT &ScalarMaps) { 712 int VectorWidth = getVectorWidth(); 713 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 714 ScalarMaps); 715 716 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 717 718 const CastInst *Cast = dyn_cast<CastInst>(Inst); 719 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 720 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 721 } 722 723 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 724 ValueMapT &VectorMap, 725 VectorValueMapT &ScalarMaps) { 726 Value *OpZero = Inst->getOperand(0); 727 Value *OpOne = Inst->getOperand(1); 728 729 Value *NewOpZero, *NewOpOne; 730 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 731 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 732 733 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, 734 NewOpOne, 735 Inst->getName() + "p_vec"); 736 VectorMap[Inst] = NewInst; 737 } 738 739 void VectorBlockGenerator::copyStore(const StoreInst *Store, 740 ValueMapT &VectorMap, 741 VectorValueMapT &ScalarMaps) { 742 int VectorWidth = getVectorWidth(); 743 744 MemoryAccess &Access = Statement.getAccessFor(Store); 745 746 const Value *Pointer = Store->getPointerOperand(); 747 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 748 ScalarMaps); 749 750 if (Access.isStrideOne(isl_set_copy(Domain))) { 751 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 752 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 753 754 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 755 "vector_ptr"); 756 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 757 758 if (!Aligned) 759 Store->setAlignment(8); 760 } else { 761 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 762 Value *Scalar = Builder.CreateExtractElement(Vector, 763 Builder.getInt32(i)); 764 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 765 Builder.CreateStore(Scalar, NewPointer); 766 } 767 } 768 } 769 770 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 771 ValueMapT &VectorMap) { 772 for (Instruction::const_op_iterator OI = Inst->op_begin(), 773 OE = Inst->op_end(); OI != OE; ++OI) 774 if (VectorMap.count(*OI)) 775 return true; 776 return false; 777 } 778 779 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 780 ValueMapT &VectorMap, 781 VectorValueMapT &ScalarMaps) { 782 bool HasVectorOperand = false; 783 int VectorWidth = getVectorWidth(); 784 785 for (Instruction::const_op_iterator OI = Inst->op_begin(), 786 OE = Inst->op_end(); OI != OE; ++OI) { 787 ValueMapT::iterator VecOp = VectorMap.find(*OI); 788 789 if (VecOp == VectorMap.end()) 790 continue; 791 792 HasVectorOperand = true; 793 Value *NewVector = VecOp->second; 794 795 for (int i = 0; i < VectorWidth; ++i) { 796 ValueMapT &SM = ScalarMaps[i]; 797 798 // If there is one scalar extracted, all scalar elements should have 799 // already been extracted by the code here. So no need to check for the 800 // existance of all of them. 801 if (SM.count(*OI)) 802 break; 803 804 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 805 } 806 } 807 808 return HasVectorOperand; 809 } 810 811 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 812 ValueMapT &VectorMap, 813 VectorValueMapT &ScalarMaps) { 814 bool HasVectorOperand; 815 int VectorWidth = getVectorWidth(); 816 817 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 818 819 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 820 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 821 822 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 823 return; 824 825 // Make the result available as vector value. 826 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 827 Value *Vector = UndefValue::get(VectorType); 828 829 for (int i = 0; i < VectorWidth; i++) 830 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 831 Builder.getInt32(i)); 832 833 VectorMap[Inst] = Vector; 834 } 835 836 int VectorBlockGenerator::getVectorWidth() { 837 return GlobalMaps.size(); 838 } 839 840 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 841 ValueMapT &VectorMap, 842 VectorValueMapT &ScalarMaps) { 843 // Terminator instructions control the control flow. They are explicitly 844 // expressed in the clast and do not need to be copied. 845 if (Inst->isTerminator()) 846 return; 847 848 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 849 generateLoad(Load, VectorMap, ScalarMaps); 850 return; 851 } 852 853 if (hasVectorOperands(Inst, VectorMap)) { 854 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 855 copyStore(Store, VectorMap, ScalarMaps); 856 return; 857 } 858 859 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 860 copyUnaryInst(Unary, VectorMap, ScalarMaps); 861 return; 862 } 863 864 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 865 copyBinaryInst(Binary, VectorMap, ScalarMaps); 866 return; 867 } 868 869 // Falltrough: We generate scalar instructions, if we don't know how to 870 // generate vector code. 871 } 872 873 copyInstScalarized(Inst, VectorMap, ScalarMaps); 874 } 875 876 void VectorBlockGenerator::copyBB() { 877 BasicBlock *BB = Statement.getBasicBlock(); 878 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 879 Builder.GetInsertPoint(), P); 880 CopyBB->setName("polly.stmt." + BB->getName()); 881 Builder.SetInsertPoint(CopyBB->begin()); 882 883 // Create two maps that store the mapping from the original instructions of 884 // the old basic block to their copies in the new basic block. Those maps 885 // are basic block local. 886 // 887 // As vector code generation is supported there is one map for scalar values 888 // and one for vector values. 889 // 890 // In case we just do scalar code generation, the vectorMap is not used and 891 // the scalarMap has just one dimension, which contains the mapping. 892 // 893 // In case vector code generation is done, an instruction may either appear 894 // in the vector map once (as it is calculating >vectorwidth< values at a 895 // time. Or (if the values are calculated using scalar operations), it 896 // appears once in every dimension of the scalarMap. 897 VectorValueMapT ScalarBlockMap(getVectorWidth()); 898 ValueMapT VectorBlockMap; 899 900 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 901 II != IE; ++II) 902 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 903 } 904 905 /// Class to generate LLVM-IR that calculates the value of a clast_expr. 906 class ClastExpCodeGen { 907 IRBuilder<> &Builder; 908 const CharMapT &IVS; 909 910 Value *codegen(const clast_name *e, Type *Ty); 911 Value *codegen(const clast_term *e, Type *Ty); 912 Value *codegen(const clast_binary *e, Type *Ty); 913 Value *codegen(const clast_reduction *r, Type *Ty); 914 public: 915 916 // A generator for clast expressions. 917 // 918 // @param B The IRBuilder that defines where the code to calculate the 919 // clast expressions should be inserted. 920 // @param IVMAP A Map that translates strings describing the induction 921 // variables to the Values* that represent these variables 922 // on the LLVM side. 923 ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap); 924 925 // Generates code to calculate a given clast expression. 926 // 927 // @param e The expression to calculate. 928 // @return The Value that holds the result. 929 Value *codegen(const clast_expr *e, Type *Ty); 930 }; 931 932 Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) { 933 CharMapT::const_iterator I = IVS.find(e->name); 934 935 assert(I != IVS.end() && "Clast name not found"); 936 937 return Builder.CreateSExtOrBitCast(I->second, Ty); 938 } 939 940 Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) { 941 APInt a = APInt_from_MPZ(e->val); 942 943 Value *ConstOne = ConstantInt::get(Builder.getContext(), a); 944 ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); 945 946 if (!e->var) 947 return ConstOne; 948 949 Value *var = codegen(e->var, Ty); 950 return Builder.CreateMul(ConstOne, var); 951 } 952 953 Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { 954 Value *LHS = codegen(e->LHS, Ty); 955 956 APInt RHS_AP = APInt_from_MPZ(e->RHS); 957 958 Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); 959 RHS = Builder.CreateSExtOrBitCast(RHS, Ty); 960 961 switch (e->type) { 962 case clast_bin_mod: 963 return Builder.CreateSRem(LHS, RHS); 964 case clast_bin_fdiv: 965 { 966 // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d 967 Value *One = ConstantInt::get(Ty, 1); 968 Value *Zero = ConstantInt::get(Ty, 0); 969 Value *Sum1 = Builder.CreateSub(LHS, RHS); 970 Value *Sum2 = Builder.CreateAdd(Sum1, One); 971 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 972 Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); 973 return Builder.CreateSDiv(Dividend, RHS); 974 } 975 case clast_bin_cdiv: 976 { 977 // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d 978 Value *One = ConstantInt::get(Ty, 1); 979 Value *Zero = ConstantInt::get(Ty, 0); 980 Value *Sum1 = Builder.CreateAdd(LHS, RHS); 981 Value *Sum2 = Builder.CreateSub(Sum1, One); 982 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 983 Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); 984 return Builder.CreateSDiv(Dividend, RHS); 985 } 986 case clast_bin_div: 987 return Builder.CreateSDiv(LHS, RHS); 988 }; 989 990 llvm_unreachable("Unknown clast binary expression type"); 991 } 992 993 Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { 994 assert(( r->type == clast_red_min 995 || r->type == clast_red_max 996 || r->type == clast_red_sum) 997 && "Clast reduction type not supported"); 998 Value *old = codegen(r->elts[0], Ty); 999 1000 for (int i=1; i < r->n; ++i) { 1001 Value *exprValue = codegen(r->elts[i], Ty); 1002 1003 switch (r->type) { 1004 case clast_red_min: 1005 { 1006 Value *cmp = Builder.CreateICmpSLT(old, exprValue); 1007 old = Builder.CreateSelect(cmp, old, exprValue); 1008 break; 1009 } 1010 case clast_red_max: 1011 { 1012 Value *cmp = Builder.CreateICmpSGT(old, exprValue); 1013 old = Builder.CreateSelect(cmp, old, exprValue); 1014 break; 1015 } 1016 case clast_red_sum: 1017 old = Builder.CreateAdd(old, exprValue); 1018 break; 1019 } 1020 } 1021 1022 return old; 1023 } 1024 1025 ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap) 1026 : Builder(B), IVS(IVMap) {} 1027 1028 Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { 1029 switch(e->type) { 1030 case clast_expr_name: 1031 return codegen((const clast_name *)e, Ty); 1032 case clast_expr_term: 1033 return codegen((const clast_term *)e, Ty); 1034 case clast_expr_bin: 1035 return codegen((const clast_binary *)e, Ty); 1036 case clast_expr_red: 1037 return codegen((const clast_reduction *)e, Ty); 1038 } 1039 1040 llvm_unreachable("Unknown clast expression!"); 1041 } 1042 1043 class ClastStmtCodeGen { 1044 public: 1045 const std::vector<std::string> &getParallelLoops(); 1046 1047 private: 1048 // The Scop we code generate. 1049 Scop *S; 1050 Pass *P; 1051 1052 // The Builder specifies the current location to code generate at. 1053 IRBuilder<> &Builder; 1054 1055 // Map the Values from the old code to their counterparts in the new code. 1056 ValueMapT ValueMap; 1057 1058 // clastVars maps from the textual representation of a clast variable to its 1059 // current *Value. clast variables are scheduling variables, original 1060 // induction variables or parameters. They are used either in loop bounds or 1061 // to define the statement instance that is executed. 1062 // 1063 // for (s = 0; s < n + 3; ++i) 1064 // for (t = s; t < m; ++j) 1065 // Stmt(i = s + 3 * m, j = t); 1066 // 1067 // {s,t,i,j,n,m} is the set of clast variables in this clast. 1068 CharMapT ClastVars; 1069 1070 // Codegenerator for clast expressions. 1071 ClastExpCodeGen ExpGen; 1072 1073 // Do we currently generate parallel code? 1074 bool parallelCodeGeneration; 1075 1076 std::vector<std::string> parallelLoops; 1077 1078 void codegen(const clast_assignment *a); 1079 1080 void codegen(const clast_assignment *a, ScopStmt *Statement, 1081 unsigned Dimension, int vectorDim, 1082 std::vector<ValueMapT> *VectorVMap = 0); 1083 1084 void codegenSubstitutions(const clast_stmt *Assignment, 1085 ScopStmt *Statement, int vectorDim = 0, 1086 std::vector<ValueMapT> *VectorVMap = 0); 1087 1088 void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL, 1089 const char *iterator = NULL, isl_set *scatteringDomain = 0); 1090 1091 void codegen(const clast_block *b); 1092 1093 /// @brief Create a classical sequential loop. 1094 void codegenForSequential(const clast_for *f); 1095 1096 /// @brief Create OpenMP structure values. 1097 /// 1098 /// Create a list of values that has to be stored into the OpenMP subfuncition 1099 /// structure. 1100 SetVector<Value*> getOMPValues(); 1101 1102 /// @brief Update the internal structures according to a Value Map. 1103 /// 1104 /// @param VMap A map from old to new values. 1105 /// @param Reverse If true, we assume the update should be reversed. 1106 void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1107 bool Reverse); 1108 1109 /// @brief Create an OpenMP parallel for loop. 1110 /// 1111 /// This loop reflects a loop as if it would have been created by an OpenMP 1112 /// statement. 1113 void codegenForOpenMP(const clast_for *f); 1114 1115 bool isInnermostLoop(const clast_for *f); 1116 1117 /// @brief Get the number of loop iterations for this loop. 1118 /// @param f The clast for loop to check. 1119 int getNumberOfIterations(const clast_for *f); 1120 1121 /// @brief Create vector instructions for this loop. 1122 void codegenForVector(const clast_for *f); 1123 1124 void codegen(const clast_for *f); 1125 1126 Value *codegen(const clast_equation *eq); 1127 1128 void codegen(const clast_guard *g); 1129 1130 void codegen(const clast_stmt *stmt); 1131 1132 void addParameters(const CloogNames *names); 1133 1134 IntegerType *getIntPtrTy(); 1135 1136 public: 1137 void codegen(const clast_root *r); 1138 1139 ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P); 1140 }; 1141 } 1142 1143 IntegerType *ClastStmtCodeGen::getIntPtrTy() { 1144 return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext()); 1145 } 1146 1147 const std::vector<std::string> &ClastStmtCodeGen::getParallelLoops() { 1148 return parallelLoops; 1149 } 1150 1151 void ClastStmtCodeGen::codegen(const clast_assignment *a) { 1152 Value *V= ExpGen.codegen(a->RHS, getIntPtrTy()); 1153 ClastVars[a->LHS] = V; 1154 } 1155 1156 void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt, 1157 unsigned Dim, int VectorDim, 1158 std::vector<ValueMapT> *VectorVMap) { 1159 const PHINode *PN; 1160 Value *RHS; 1161 1162 assert(!A->LHS && "Statement assignments do not have left hand side"); 1163 1164 PN = Stmt->getInductionVariableForDimension(Dim); 1165 RHS = ExpGen.codegen(A->RHS, Builder.getInt64Ty()); 1166 RHS = Builder.CreateTruncOrBitCast(RHS, PN->getType()); 1167 1168 if (VectorVMap) 1169 (*VectorVMap)[VectorDim][PN] = RHS; 1170 1171 ValueMap[PN] = RHS; 1172 } 1173 1174 void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, 1175 ScopStmt *Statement, int vectorDim, 1176 std::vector<ValueMapT> *VectorVMap) { 1177 int Dimension = 0; 1178 1179 while (Assignment) { 1180 assert(CLAST_STMT_IS_A(Assignment, stmt_ass) 1181 && "Substitions are expected to be assignments"); 1182 codegen((const clast_assignment *)Assignment, Statement, Dimension, 1183 vectorDim, VectorVMap); 1184 Assignment = Assignment->next; 1185 Dimension++; 1186 } 1187 } 1188 1189 void ClastStmtCodeGen::codegen(const clast_user_stmt *u, 1190 std::vector<Value*> *IVS , const char *iterator, 1191 isl_set *Domain) { 1192 ScopStmt *Statement = (ScopStmt *)u->statement->usr; 1193 1194 if (u->substitutions) 1195 codegenSubstitutions(u->substitutions, Statement); 1196 1197 int VectorDimensions = IVS ? IVS->size() : 1; 1198 1199 if (VectorDimensions == 1) { 1200 BlockGenerator::generate(Builder, *Statement, ValueMap, P); 1201 return; 1202 } 1203 1204 VectorValueMapT VectorMap(VectorDimensions); 1205 1206 if (IVS) { 1207 assert (u->substitutions && "Substitutions expected!"); 1208 int i = 0; 1209 for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end(); 1210 II != IE; ++II) { 1211 ClastVars[iterator] = *II; 1212 codegenSubstitutions(u->substitutions, Statement, i, &VectorMap); 1213 i++; 1214 } 1215 } 1216 1217 VectorBlockGenerator::generate(Builder, *Statement, VectorMap, Domain, P); 1218 } 1219 1220 void ClastStmtCodeGen::codegen(const clast_block *b) { 1221 if (b->body) 1222 codegen(b->body); 1223 } 1224 1225 void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { 1226 Value *LowerBound, *UpperBound, *IV, *Stride; 1227 BasicBlock *AfterBB; 1228 Type *IntPtrTy = getIntPtrTy(); 1229 1230 LowerBound = ExpGen.codegen(f->LB, IntPtrTy); 1231 UpperBound = ExpGen.codegen(f->UB, IntPtrTy); 1232 Stride = Builder.getInt(APInt_from_MPZ(f->stride)); 1233 1234 IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB); 1235 1236 // Add loop iv to symbols. 1237 ClastVars[f->iterator] = IV; 1238 1239 if (f->body) 1240 codegen(f->body); 1241 1242 // Loop is finished, so remove its iv from the live symbols. 1243 ClastVars.erase(f->iterator); 1244 Builder.SetInsertPoint(AfterBB->begin()); 1245 } 1246 1247 SetVector<Value*> ClastStmtCodeGen::getOMPValues() { 1248 SetVector<Value*> Values; 1249 1250 // The clast variables 1251 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1252 I != E; I++) 1253 Values.insert(I->second); 1254 1255 // The memory reference base addresses 1256 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { 1257 ScopStmt *Stmt = *SI; 1258 for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(), 1259 E = Stmt->memacc_end(); I != E; ++I) { 1260 Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr()); 1261 Values.insert((BaseAddr)); 1262 } 1263 } 1264 1265 return Values; 1266 } 1267 1268 void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1269 bool Reverse) { 1270 std::set<Value*> Inserted; 1271 1272 if (Reverse) { 1273 OMPGenerator::ValueToValueMapTy ReverseMap; 1274 1275 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1276 I != E; ++I) 1277 ReverseMap.insert(std::make_pair(I->second, I->first)); 1278 1279 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1280 I != E; I++) { 1281 ClastVars[I->first] = ReverseMap[I->second]; 1282 Inserted.insert(I->second); 1283 } 1284 1285 /// FIXME: At the moment we do not reverse the update of the ValueMap. 1286 /// This is incomplet, but the failure should be obvious, such that 1287 /// we can fix this later. 1288 return; 1289 } 1290 1291 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1292 I != E; I++) { 1293 ClastVars[I->first] = VMap[I->second]; 1294 Inserted.insert(I->second); 1295 } 1296 1297 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1298 I != E; ++I) { 1299 if (Inserted.count(I->first)) 1300 continue; 1301 1302 ValueMap[I->first] = I->second; 1303 } 1304 } 1305 1306 static void clearDomtree(Function *F, DominatorTree &DT) { 1307 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 1308 std::vector<BasicBlock*> Nodes; 1309 for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I) 1310 Nodes.push_back(I->getBlock()); 1311 1312 for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end(); 1313 I != E; ++I) 1314 DT.eraseNode(*I); 1315 } 1316 1317 void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { 1318 Value *Stride, *LB, *UB, *IV; 1319 BasicBlock::iterator LoopBody; 1320 IntegerType *IntPtrTy = getIntPtrTy(); 1321 SetVector<Value*> Values; 1322 OMPGenerator::ValueToValueMapTy VMap; 1323 OMPGenerator OMPGen(Builder, P); 1324 1325 Stride = Builder.getInt(APInt_from_MPZ(For->stride)); 1326 Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy); 1327 LB = ExpGen.codegen(For->LB, IntPtrTy); 1328 UB = ExpGen.codegen(For->UB, IntPtrTy); 1329 1330 Values = getOMPValues(); 1331 1332 IV = OMPGen.createParallelLoop(LB, UB, Stride, Values, VMap, &LoopBody); 1333 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 1334 Builder.SetInsertPoint(LoopBody); 1335 1336 updateWithValueMap(VMap, /* reverse */ false); 1337 ClastVars[For->iterator] = IV; 1338 1339 if (For->body) 1340 codegen(For->body); 1341 1342 ClastVars.erase(For->iterator); 1343 updateWithValueMap(VMap, /* reverse */ true); 1344 1345 clearDomtree((*LoopBody).getParent()->getParent(), 1346 P->getAnalysis<DominatorTree>()); 1347 1348 Builder.SetInsertPoint(AfterLoop); 1349 } 1350 1351 bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { 1352 const clast_stmt *stmt = f->body; 1353 1354 while (stmt) { 1355 if (!CLAST_STMT_IS_A(stmt, stmt_user)) 1356 return false; 1357 1358 stmt = stmt->next; 1359 } 1360 1361 return true; 1362 } 1363 1364 int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) { 1365 isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); 1366 isl_set *tmp = isl_set_copy(loopDomain); 1367 1368 // Calculate a map similar to the identity map, but with the last input 1369 // and output dimension not related. 1370 // [i0, i1, i2, i3] -> [i0, i1, i2, o0] 1371 isl_space *Space = isl_set_get_space(loopDomain); 1372 Space = isl_space_drop_outputs(Space, 1373 isl_set_dim(loopDomain, isl_dim_set) - 2, 1); 1374 Space = isl_space_map_from_set(Space); 1375 isl_map *identity = isl_map_identity(Space); 1376 identity = isl_map_add_dims(identity, isl_dim_in, 1); 1377 identity = isl_map_add_dims(identity, isl_dim_out, 1); 1378 1379 isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); 1380 map = isl_map_intersect(map, identity); 1381 1382 isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); 1383 isl_map *lexmin = isl_map_lexmin(map); 1384 isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); 1385 1386 isl_set *elements = isl_map_range(sub); 1387 1388 if (!isl_set_is_singleton(elements)) { 1389 isl_set_free(elements); 1390 return -1; 1391 } 1392 1393 isl_point *p = isl_set_sample_point(elements); 1394 1395 isl_int v; 1396 isl_int_init(v); 1397 isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); 1398 int numberIterations = isl_int_get_si(v); 1399 isl_int_clear(v); 1400 isl_point_free(p); 1401 1402 return (numberIterations) / isl_int_get_si(f->stride) + 1; 1403 } 1404 1405 void ClastStmtCodeGen::codegenForVector(const clast_for *F) { 1406 DEBUG(dbgs() << "Vectorizing loop '" << F->iterator << "'\n";); 1407 int VectorWidth = getNumberOfIterations(F); 1408 1409 Value *LB = ExpGen.codegen(F->LB, getIntPtrTy()); 1410 1411 APInt Stride = APInt_from_MPZ(F->stride); 1412 IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType()); 1413 Stride = Stride.zext(LoopIVType->getBitWidth()); 1414 Value *StrideValue = ConstantInt::get(LoopIVType, Stride); 1415 1416 std::vector<Value*> IVS(VectorWidth); 1417 IVS[0] = LB; 1418 1419 for (int i = 1; i < VectorWidth; i++) 1420 IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); 1421 1422 isl_set *Domain = isl_set_from_cloog_domain(F->domain); 1423 1424 // Add loop iv to symbols. 1425 ClastVars[F->iterator] = LB; 1426 1427 const clast_stmt *Stmt = F->body; 1428 1429 while (Stmt) { 1430 codegen((const clast_user_stmt *)Stmt, &IVS, F->iterator, 1431 isl_set_copy(Domain)); 1432 Stmt = Stmt->next; 1433 } 1434 1435 // Loop is finished, so remove its iv from the live symbols. 1436 isl_set_free(Domain); 1437 ClastVars.erase(F->iterator); 1438 } 1439 1440 void ClastStmtCodeGen::codegen(const clast_for *f) { 1441 if ((Vector || OpenMP) && P->getAnalysis<Dependences>().isParallelFor(f)) { 1442 if (Vector && isInnermostLoop(f) && (-1 != getNumberOfIterations(f)) 1443 && (getNumberOfIterations(f) <= 16)) { 1444 codegenForVector(f); 1445 return; 1446 } 1447 1448 if (OpenMP && !parallelCodeGeneration) { 1449 parallelCodeGeneration = true; 1450 parallelLoops.push_back(f->iterator); 1451 codegenForOpenMP(f); 1452 parallelCodeGeneration = false; 1453 return; 1454 } 1455 } 1456 1457 codegenForSequential(f); 1458 } 1459 1460 Value *ClastStmtCodeGen::codegen(const clast_equation *eq) { 1461 Value *LHS = ExpGen.codegen(eq->LHS, getIntPtrTy()); 1462 Value *RHS = ExpGen.codegen(eq->RHS, getIntPtrTy()); 1463 CmpInst::Predicate P; 1464 1465 if (eq->sign == 0) 1466 P = ICmpInst::ICMP_EQ; 1467 else if (eq->sign > 0) 1468 P = ICmpInst::ICMP_SGE; 1469 else 1470 P = ICmpInst::ICMP_SLE; 1471 1472 return Builder.CreateICmp(P, LHS, RHS); 1473 } 1474 1475 void ClastStmtCodeGen::codegen(const clast_guard *g) { 1476 Function *F = Builder.GetInsertBlock()->getParent(); 1477 LLVMContext &Context = F->getContext(); 1478 1479 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1480 Builder.GetInsertPoint(), P); 1481 CondBB->setName("polly.cond"); 1482 BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); 1483 MergeBB->setName("polly.merge"); 1484 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 1485 1486 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 1487 DT.addNewBlock(ThenBB, CondBB); 1488 DT.changeImmediateDominator(MergeBB, CondBB); 1489 1490 CondBB->getTerminator()->eraseFromParent(); 1491 1492 Builder.SetInsertPoint(CondBB); 1493 1494 Value *Predicate = codegen(&(g->eq[0])); 1495 1496 for (int i = 1; i < g->n; ++i) { 1497 Value *TmpPredicate = codegen(&(g->eq[i])); 1498 Predicate = Builder.CreateAnd(Predicate, TmpPredicate); 1499 } 1500 1501 Builder.CreateCondBr(Predicate, ThenBB, MergeBB); 1502 Builder.SetInsertPoint(ThenBB); 1503 Builder.CreateBr(MergeBB); 1504 Builder.SetInsertPoint(ThenBB->begin()); 1505 1506 codegen(g->then); 1507 1508 Builder.SetInsertPoint(MergeBB->begin()); 1509 } 1510 1511 void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { 1512 if (CLAST_STMT_IS_A(stmt, stmt_root)) 1513 assert(false && "No second root statement expected"); 1514 else if (CLAST_STMT_IS_A(stmt, stmt_ass)) 1515 codegen((const clast_assignment *)stmt); 1516 else if (CLAST_STMT_IS_A(stmt, stmt_user)) 1517 codegen((const clast_user_stmt *)stmt); 1518 else if (CLAST_STMT_IS_A(stmt, stmt_block)) 1519 codegen((const clast_block *)stmt); 1520 else if (CLAST_STMT_IS_A(stmt, stmt_for)) 1521 codegen((const clast_for *)stmt); 1522 else if (CLAST_STMT_IS_A(stmt, stmt_guard)) 1523 codegen((const clast_guard *)stmt); 1524 1525 if (stmt->next) 1526 codegen(stmt->next); 1527 } 1528 1529 void ClastStmtCodeGen::addParameters(const CloogNames *names) { 1530 SCEVExpander Rewriter(P->getAnalysis<ScalarEvolution>(), "polly"); 1531 1532 int i = 0; 1533 for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); 1534 PI != PE; ++PI) { 1535 assert(i < names->nb_parameters && "Not enough parameter names"); 1536 1537 const SCEV *Param = *PI; 1538 Type *Ty = Param->getType(); 1539 1540 Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); 1541 Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); 1542 ClastVars[names->parameters[i]] = V; 1543 1544 ++i; 1545 } 1546 } 1547 1548 void ClastStmtCodeGen::codegen(const clast_root *r) { 1549 addParameters(r->names); 1550 1551 parallelCodeGeneration = false; 1552 1553 const clast_stmt *stmt = (const clast_stmt*) r; 1554 if (stmt->next) 1555 codegen(stmt->next); 1556 } 1557 1558 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P) : 1559 S(scop), P(P), Builder(B), ExpGen(Builder, ClastVars) {} 1560 1561 namespace { 1562 class CodeGeneration : public ScopPass { 1563 Region *region; 1564 Scop *S; 1565 DominatorTree *DT; 1566 RegionInfo *RI; 1567 1568 std::vector<std::string> parallelLoops; 1569 1570 public: 1571 static char ID; 1572 1573 CodeGeneration() : ScopPass(ID) {} 1574 1575 // Split the entry edge of the region and generate a new basic block on this 1576 // edge. This function also updates ScopInfo and RegionInfo. 1577 // 1578 // @param region The region where the entry edge will be splitted. 1579 BasicBlock *splitEdgeAdvanced(Region *region) { 1580 BasicBlock *newBlock; 1581 BasicBlock *splitBlock; 1582 1583 newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this); 1584 1585 if (DT->dominates(region->getEntry(), newBlock)) { 1586 BasicBlock *OldBlock = region->getEntry(); 1587 std::string OldName = OldBlock->getName(); 1588 1589 // Update ScopInfo. 1590 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) 1591 if ((*SI)->getBasicBlock() == OldBlock) { 1592 (*SI)->setBasicBlock(newBlock); 1593 break; 1594 } 1595 1596 // Update RegionInfo. 1597 splitBlock = OldBlock; 1598 OldBlock->setName("polly.split"); 1599 newBlock->setName(OldName); 1600 region->replaceEntry(newBlock); 1601 RI->setRegionFor(newBlock, region); 1602 } else { 1603 RI->setRegionFor(newBlock, region->getParent()); 1604 splitBlock = newBlock; 1605 } 1606 1607 return splitBlock; 1608 } 1609 1610 // Create a split block that branches either to the old code or to a new basic 1611 // block where the new code can be inserted. 1612 // 1613 // @param Builder A builder that will be set to point to a basic block, where 1614 // the new code can be generated. 1615 // @return The split basic block. 1616 BasicBlock *addSplitAndStartBlock(IRBuilder<> *Builder) { 1617 BasicBlock *StartBlock, *SplitBlock; 1618 1619 SplitBlock = splitEdgeAdvanced(region); 1620 SplitBlock->setName("polly.split_new_and_old"); 1621 Function *F = SplitBlock->getParent(); 1622 StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F); 1623 SplitBlock->getTerminator()->eraseFromParent(); 1624 Builder->SetInsertPoint(SplitBlock); 1625 Builder->CreateCondBr(Builder->getTrue(), StartBlock, region->getEntry()); 1626 DT->addNewBlock(StartBlock, SplitBlock); 1627 Builder->SetInsertPoint(StartBlock); 1628 return SplitBlock; 1629 } 1630 1631 // Merge the control flow of the newly generated code with the existing code. 1632 // 1633 // @param SplitBlock The basic block where the control flow was split between 1634 // old and new version of the Scop. 1635 // @param Builder An IRBuilder that points to the last instruction of the 1636 // newly generated code. 1637 void mergeControlFlow(BasicBlock *SplitBlock, IRBuilder<> *Builder) { 1638 BasicBlock *MergeBlock; 1639 Region *R = region; 1640 1641 if (R->getExit()->getSinglePredecessor()) 1642 // No splitEdge required. A block with a single predecessor cannot have 1643 // PHI nodes that would complicate life. 1644 MergeBlock = R->getExit(); 1645 else { 1646 MergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this); 1647 // SplitEdge will never split R->getExit(), as R->getExit() has more than 1648 // one predecessor. Hence, mergeBlock is always a newly generated block. 1649 R->replaceExit(MergeBlock); 1650 } 1651 1652 Builder->CreateBr(MergeBlock); 1653 MergeBlock->setName("polly.merge_new_and_old"); 1654 1655 if (DT->dominates(SplitBlock, MergeBlock)) 1656 DT->changeImmediateDominator(MergeBlock, SplitBlock); 1657 } 1658 1659 bool runOnScop(Scop &scop) { 1660 S = &scop; 1661 region = &S->getRegion(); 1662 DT = &getAnalysis<DominatorTree>(); 1663 RI = &getAnalysis<RegionInfo>(); 1664 1665 parallelLoops.clear(); 1666 1667 assert(region->isSimple() && "Only simple regions are supported"); 1668 1669 // In the CFG the optimized code of the SCoP is generated next to the 1670 // original code. Both the new and the original version of the code remain 1671 // in the CFG. A branch statement decides which version is executed. 1672 // For now, we always execute the new version (the old one is dead code 1673 // eliminated by the cleanup passes). In the future we may decide to execute 1674 // the new version only if certain run time checks succeed. This will be 1675 // useful to support constructs for which we cannot prove all assumptions at 1676 // compile time. 1677 // 1678 // Before transformation: 1679 // 1680 // bb0 1681 // | 1682 // orig_scop 1683 // | 1684 // bb1 1685 // 1686 // After transformation: 1687 // bb0 1688 // | 1689 // polly.splitBlock 1690 // / \. 1691 // | startBlock 1692 // | | 1693 // orig_scop new_scop 1694 // \ / 1695 // \ / 1696 // bb1 (joinBlock) 1697 IRBuilder<> builder(region->getEntry()); 1698 1699 // The builder will be set to startBlock. 1700 BasicBlock *splitBlock = addSplitAndStartBlock(&builder); 1701 BasicBlock *StartBlock = builder.GetInsertBlock(); 1702 1703 mergeControlFlow(splitBlock, &builder); 1704 builder.SetInsertPoint(StartBlock->begin()); 1705 1706 ClastStmtCodeGen CodeGen(S, builder, this); 1707 CloogInfo &C = getAnalysis<CloogInfo>(); 1708 CodeGen.codegen(C.getClast()); 1709 1710 parallelLoops.insert(parallelLoops.begin(), 1711 CodeGen.getParallelLoops().begin(), 1712 CodeGen.getParallelLoops().end()); 1713 1714 return true; 1715 } 1716 1717 virtual void printScop(raw_ostream &OS) const { 1718 for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(), 1719 PE = parallelLoops.end(); PI != PE; ++PI) 1720 OS << "Parallel loop with iterator '" << *PI << "' generated\n"; 1721 } 1722 1723 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1724 AU.addRequired<CloogInfo>(); 1725 AU.addRequired<Dependences>(); 1726 AU.addRequired<DominatorTree>(); 1727 AU.addRequired<RegionInfo>(); 1728 AU.addRequired<ScalarEvolution>(); 1729 AU.addRequired<ScopDetection>(); 1730 AU.addRequired<ScopInfo>(); 1731 AU.addRequired<TargetData>(); 1732 1733 AU.addPreserved<CloogInfo>(); 1734 AU.addPreserved<Dependences>(); 1735 1736 // FIXME: We do not create LoopInfo for the newly generated loops. 1737 AU.addPreserved<LoopInfo>(); 1738 AU.addPreserved<DominatorTree>(); 1739 AU.addPreserved<ScopDetection>(); 1740 AU.addPreserved<ScalarEvolution>(); 1741 1742 // FIXME: We do not yet add regions for the newly generated code to the 1743 // region tree. 1744 AU.addPreserved<RegionInfo>(); 1745 AU.addPreserved<TempScopInfo>(); 1746 AU.addPreserved<ScopInfo>(); 1747 AU.addPreservedID(IndependentBlocksID); 1748 } 1749 }; 1750 } 1751 1752 char CodeGeneration::ID = 1; 1753 1754 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 1755 "Polly - Create LLVM-IR from SCoPs", false, false) 1756 INITIALIZE_PASS_DEPENDENCY(CloogInfo) 1757 INITIALIZE_PASS_DEPENDENCY(Dependences) 1758 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 1759 INITIALIZE_PASS_DEPENDENCY(RegionInfo) 1760 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1761 INITIALIZE_PASS_DEPENDENCY(ScopDetection) 1762 INITIALIZE_PASS_DEPENDENCY(TargetData) 1763 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 1764 "Polly - Create LLVM-IR from SCoPs", false, false) 1765 1766 Pass *polly::createCodeGenerationPass() { 1767 return new CodeGeneration(); 1768 } 1769