1 //===------ CodeGeneration.cpp - Code generate the Scops. -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using Cloog. 12 // 13 // The Scop describes the high level memory behaviour of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. Cloog is used to generate an abstract syntax tree (clast) that 16 // reflects the updated execution order. This clast is used to create new 17 // LLVM-IR that is computational equivalent to the original control flow region, 18 // but executes its code in the new execution order defined by the changed 19 // scattering. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "polly-codegen" 24 25 #include "polly/Cloog.h" 26 #include "polly/CodeGeneration.h" 27 #include "polly/Dependences.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/ScopInfo.h" 30 #include "polly/TempScopInfo.h" 31 #include "polly/Support/GICHelper.h" 32 #include "polly/LoopGenerators.h" 33 34 #include "llvm/Module.h" 35 #include "llvm/ADT/SetVector.h" 36 #include "llvm/ADT/PostOrderIterator.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/ScalarEvolutionExpander.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/IRBuilder.h" 42 #include "llvm/Target/TargetData.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 45 #define CLOOG_INT_GMP 1 46 #include "cloog/cloog.h" 47 #include "cloog/isl/cloog.h" 48 49 #include "isl/aff.h" 50 51 #include <vector> 52 #include <utility> 53 54 using namespace polly; 55 using namespace llvm; 56 57 struct isl_set; 58 59 namespace polly { 60 61 bool EnablePollyVector; 62 63 static cl::opt<bool, true> 64 Vector("enable-polly-vector", 65 cl::desc("Enable polly vector code generation"), cl::Hidden, 66 cl::location(EnablePollyVector), cl::init(false), cl::ZeroOrMore); 67 68 static cl::opt<bool> 69 OpenMP("enable-polly-openmp", 70 cl::desc("Generate OpenMP parallel code"), cl::Hidden, 71 cl::value_desc("OpenMP code generation enabled if true"), 72 cl::init(false), cl::ZeroOrMore); 73 74 static cl::opt<bool> 75 AtLeastOnce("enable-polly-atLeastOnce", 76 cl::desc("Give polly the hint, that every loop is executed at least" 77 "once"), cl::Hidden, 78 cl::value_desc("OpenMP code generation enabled if true"), 79 cl::init(false), cl::ZeroOrMore); 80 81 static cl::opt<bool> 82 Aligned("enable-polly-aligned", 83 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 84 cl::value_desc("OpenMP code generation enabled if true"), 85 cl::init(false), cl::ZeroOrMore); 86 87 typedef DenseMap<const Value*, Value*> ValueMapT; 88 typedef DenseMap<const char*, Value*> CharMapT; 89 typedef std::vector<ValueMapT> VectorValueMapT; 90 91 class IslGenerator { 92 public: 93 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 94 Builder(Builder), IVS(IVS) {} 95 Value *generateIslInt(__isl_take isl_int Int); 96 Value *generateIslAff(__isl_take isl_aff *Aff); 97 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 98 99 private: 100 typedef struct { 101 Value *Result; 102 class IslGenerator *Generator; 103 } IslGenInfo; 104 105 IRBuilder<> &Builder; 106 std::vector<Value *> &IVS; 107 static int mergeIslAffValues(__isl_take isl_set *Set, 108 __isl_take isl_aff *Aff, void *User); 109 }; 110 111 Value *IslGenerator::generateIslInt(isl_int Int) { 112 mpz_t IntMPZ; 113 mpz_init(IntMPZ); 114 isl_int_get_gmp(Int, IntMPZ); 115 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 116 mpz_clear(IntMPZ); 117 return IntValue; 118 } 119 120 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 121 Value *Result; 122 Value *ConstValue; 123 isl_int ConstIsl; 124 125 isl_int_init(ConstIsl); 126 isl_aff_get_constant(Aff, &ConstIsl); 127 ConstValue = generateIslInt(ConstIsl); 128 Type *Ty = Builder.getInt64Ty(); 129 130 // FIXME: We should give the constant and coefficients the right type. Here 131 // we force it into i64. 132 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 133 134 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 135 136 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 137 "must match the dimension of the affine space."); 138 139 isl_int CoefficientIsl; 140 isl_int_init(CoefficientIsl); 141 142 for (unsigned int i = 0; i < NbInputDims; ++i) { 143 Value *CoefficientValue; 144 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 145 146 if (isl_int_is_zero(CoefficientIsl)) 147 continue; 148 149 CoefficientValue = generateIslInt(CoefficientIsl); 150 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 151 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 152 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 153 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 154 } 155 156 isl_int_clear(CoefficientIsl); 157 isl_int_clear(ConstIsl); 158 isl_aff_free(Aff); 159 160 return Result; 161 } 162 163 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 164 __isl_take isl_aff *Aff, void *User) { 165 IslGenInfo *GenInfo = (IslGenInfo *)User; 166 167 assert((GenInfo->Result == NULL) && "Result is already set." 168 "Currently only single isl_aff is supported"); 169 assert(isl_set_plain_is_universe(Set) 170 && "Code generation failed because the set is not universe"); 171 172 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 173 174 isl_set_free(Set); 175 return 0; 176 } 177 178 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 179 IslGenInfo User; 180 User.Result = NULL; 181 User.Generator = this; 182 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 183 assert(User.Result && "Code generation for isl_pw_aff failed"); 184 185 isl_pw_aff_free(PwAff); 186 return User.Result; 187 } 188 189 /// @brief Generate a new basic block for a polyhedral statement. 190 /// 191 /// The only public function exposed is generate(). 192 class BlockGenerator { 193 public: 194 /// @brief Generate a new BasicBlock for a ScopStmt. 195 /// 196 /// @param Builder The LLVM-IR Builder used to generate the statement. The 197 /// code is generated at the location, the Builder points to. 198 /// @param Stmt The statement to code generate. 199 /// @param GlobalMap A map that defines for certain Values referenced from the 200 /// original code new Values they should be replaced with. 201 /// @param P A reference to the pass this function is called from. 202 /// The pass is needed to update other analysis. 203 static void generate(IRBuilder<> &Builder, ScopStmt &Stmt, 204 ValueMapT &GlobalMap, Pass *P) { 205 BlockGenerator Generator(Builder, Stmt, P); 206 Generator.copyBB(GlobalMap); 207 } 208 209 protected: 210 IRBuilder<> &Builder; 211 ScopStmt &Statement; 212 Pass *P; 213 214 BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P); 215 216 /// @brief Get the new version of a Value. 217 /// 218 /// @param Old The old Value. 219 /// @param BBMap A mapping from old values to their new values 220 /// (for values recalculated within this basic block). 221 /// @param GlobalMap A mapping from old values to their new values 222 /// (for values recalculated in the new ScoP, but not 223 /// within this basic block). 224 /// 225 /// @returns o The old value, if it is still valid. 226 /// o The new value, if available. 227 /// o NULL, if no value is found. 228 Value *getNewValue(const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap); 229 230 void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 231 ValueMapT &GlobalMap); 232 233 /// @brief Get the memory access offset to be added to the base address 234 std::vector<Value*> getMemoryAccessIndex(__isl_keep isl_map *AccessRelation, 235 Value *BaseAddress, ValueMapT &BBMap, 236 ValueMapT &GlobalMap); 237 238 /// @brief Get the new operand address according to the changed access in 239 /// JSCOP file. 240 Value *getNewAccessOperand(__isl_keep isl_map *NewAccessRelation, 241 Value *BaseAddress, ValueMapT &BBMap, 242 ValueMapT &GlobalMap); 243 244 /// @brief Generate the operand address 245 Value *generateLocationAccessed(const Instruction *Inst, 246 const Value *Pointer, ValueMapT &BBMap, 247 ValueMapT &GlobalMap); 248 249 Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap, 250 ValueMapT &GlobalMap); 251 252 Value *generateScalarStore(const StoreInst *store, ValueMapT &BBMap, 253 ValueMapT &GlobalMap); 254 255 /// @brief Copy a single Instruction. 256 /// 257 /// This copies a single Instruction and updates references to old values 258 /// with references to new values, as defined by GlobalMap and BBMap. 259 /// 260 /// @param BBMap A mapping from old values to their new values 261 /// (for values recalculated within this basic block). 262 /// @param GlobalMap A mapping from old values to their new values 263 /// (for values recalculated in the new ScoP, but not 264 /// within this basic block). 265 void copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 266 ValueMapT &GlobalMap); 267 268 /// @brief Copy the basic block. 269 /// 270 /// This copies the entire basic block and updates references to old values 271 /// with references to new values, as defined by GlobalMap. 272 /// 273 /// @param GlobalMap A mapping from old values to their new values 274 /// (for values recalculated in the new ScoP, but not 275 /// within this basic block). 276 void copyBB(ValueMapT &GlobalMap); 277 }; 278 279 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 280 Builder(B), Statement(Stmt), P(P) {} 281 282 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 283 ValueMapT &GlobalMap) { 284 // We assume constants never change. 285 // This avoids map lookups for many calls to this function. 286 if (isa<Constant>(Old)) 287 return const_cast<Value*>(Old); 288 289 if (GlobalMap.count(Old)) { 290 Value *New = GlobalMap[Old]; 291 292 if (Old->getType()->getScalarSizeInBits() 293 < New->getType()->getScalarSizeInBits()) 294 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 295 296 return New; 297 } 298 299 if (BBMap.count(Old)) { 300 return BBMap[Old]; 301 } 302 303 // 'Old' is within the original SCoP, but was not rewritten. 304 // 305 // Such values appear, if they only calculate information already available in 306 // the polyhedral description (e.g. an induction variable increment). They 307 // can be safely ignored. 308 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 309 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 310 return NULL; 311 312 // Everything else is probably a scop-constant value defined as global, 313 // function parameter or an instruction not within the scop. 314 return const_cast<Value*>(Old); 315 } 316 317 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 318 ValueMapT &GlobalMap) { 319 Instruction *NewInst = Inst->clone(); 320 321 // Replace old operands with the new ones. 322 for (Instruction::const_op_iterator OI = Inst->op_begin(), 323 OE = Inst->op_end(); OI != OE; ++OI) { 324 Value *OldOperand = *OI; 325 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 326 327 if (!NewOperand) { 328 assert(!isa<StoreInst>(NewInst) 329 && "Store instructions are always needed!"); 330 delete NewInst; 331 return; 332 } 333 334 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 335 } 336 337 Builder.Insert(NewInst); 338 BBMap[Inst] = NewInst; 339 340 if (!NewInst->getType()->isVoidTy()) 341 NewInst->setName("p_" + Inst->getName()); 342 } 343 344 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 345 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 346 ValueMapT &BBMap, ValueMapT &GlobalMap) { 347 348 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) 349 && "Only single dimensional access functions supported"); 350 351 std::vector<Value *> IVS; 352 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 353 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 354 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 355 IVS.push_back(NewIV); 356 } 357 358 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 359 IslGenerator IslGen(Builder, IVS); 360 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 361 362 Type *Ty = Builder.getInt64Ty(); 363 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 364 365 std::vector<Value*> IndexArray; 366 Value *NullValue = Constant::getNullValue(Ty); 367 IndexArray.push_back(NullValue); 368 IndexArray.push_back(OffsetValue); 369 return IndexArray; 370 } 371 372 Value *BlockGenerator::getNewAccessOperand( 373 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, 374 ValueMapT &BBMap, ValueMapT &GlobalMap) { 375 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 376 BaseAddress, 377 BBMap, GlobalMap); 378 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 379 "p_newarrayidx_"); 380 return NewOperand; 381 } 382 383 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 384 const Value *Pointer, 385 ValueMapT &BBMap, 386 ValueMapT &GlobalMap) { 387 MemoryAccess &Access = Statement.getAccessFor(Inst); 388 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 389 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 390 391 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) 392 && "Current and new access function use different spaces"); 393 394 Value *NewPointer; 395 396 if (!NewAccessRelation) { 397 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 398 } else { 399 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 400 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, 401 BBMap, GlobalMap); 402 } 403 404 isl_map_free(CurrentAccessRelation); 405 isl_map_free(NewAccessRelation); 406 return NewPointer; 407 } 408 409 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 410 ValueMapT &BBMap, 411 ValueMapT &GlobalMap) { 412 const Value *Pointer = Load->getPointerOperand(); 413 const Instruction *Inst = dyn_cast<Instruction>(Load); 414 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 415 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 416 Load->getName() + "_p_scalar_"); 417 return ScalarLoad; 418 } 419 420 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 421 ValueMapT &BBMap, 422 ValueMapT &GlobalMap) { 423 const Value *Pointer = Store->getPointerOperand(); 424 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 425 GlobalMap); 426 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 427 428 return Builder.CreateStore(ValueOperand, NewPointer); 429 } 430 431 void BlockGenerator::copyInstruction(const Instruction *Inst, 432 ValueMapT &BBMap, ValueMapT &GlobalMap) { 433 // Terminator instructions control the control flow. They are explicitly 434 // expressed in the clast and do not need to be copied. 435 if (Inst->isTerminator()) 436 return; 437 438 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 439 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 440 return; 441 } 442 443 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 444 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 445 return; 446 } 447 448 copyInstScalar(Inst, BBMap, GlobalMap); 449 } 450 451 452 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 453 BasicBlock *BB = Statement.getBasicBlock(); 454 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 455 Builder.GetInsertPoint(), P); 456 CopyBB->setName("polly.stmt." + BB->getName()); 457 Builder.SetInsertPoint(CopyBB->begin()); 458 459 ValueMapT BBMap; 460 461 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 462 ++II) 463 copyInstruction(II, BBMap, GlobalMap); 464 } 465 466 /// @brief Generate a new vector basic block for a polyhedral statement. 467 /// 468 /// The only public function exposed is generate(). 469 class VectorBlockGenerator : BlockGenerator { 470 public: 471 /// @brief Generate a new vector basic block for a ScoPStmt. 472 /// 473 /// This code generation is similar to the normal, scalar code generation, 474 /// except that each instruction is code generated for several vector lanes 475 /// at a time. If possible instructions are issued as actual vector 476 /// instructions, but e.g. for address calculation instructions we currently 477 /// generate scalar instructions for each vector lane. 478 /// 479 /// @param Builder The LLVM-IR Builder used to generate the statement. The 480 /// code is generated at the location, the builder points 481 /// to. 482 /// @param Stmt The statement to code generate. 483 /// @param GlobalMaps A vector of maps that define for certain Values 484 /// referenced from the original code new Values they should 485 /// be replaced with. Each map in the vector of maps is 486 /// used for one vector lane. The number of elements in the 487 /// vector defines the width of the generated vector 488 /// instructions. 489 /// @param P A reference to the pass this function is called from. 490 /// The pass is needed to update other analysis. 491 static void generate(IRBuilder<> &B, ScopStmt &Stmt, 492 VectorValueMapT &GlobalMaps, __isl_keep isl_set *Domain, 493 Pass *P) { 494 VectorBlockGenerator Generator(B, GlobalMaps, Stmt, Domain, P); 495 Generator.copyBB(); 496 } 497 498 private: 499 // This is a vector of global value maps. The first map is used for the first 500 // vector lane, ... 501 // Each map, contains information about Instructions in the old ScoP, which 502 // are recalculated in the new SCoP. When copying the basic block, we replace 503 // all referenes to the old instructions with their recalculated values. 504 VectorValueMapT &GlobalMaps; 505 506 isl_set *Domain; 507 508 VectorBlockGenerator(IRBuilder<> &B, VectorValueMapT &GlobalMaps, 509 ScopStmt &Stmt, __isl_keep isl_set *Domain, Pass *P); 510 511 int getVectorWidth(); 512 513 Value *getVectorValue(const Value *Old, ValueMapT &VectorMap, 514 VectorValueMapT &ScalarMaps); 515 516 Type *getVectorPtrTy(const Value *V, int Width); 517 518 /// @brief Load a vector from a set of adjacent scalars 519 /// 520 /// In case a set of scalars is known to be next to each other in memory, 521 /// create a vector load that loads those scalars 522 /// 523 /// %vector_ptr= bitcast double* %p to <4 x double>* 524 /// %vec_full = load <4 x double>* %vector_ptr 525 /// 526 Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap); 527 528 /// @brief Load a vector initialized from a single scalar in memory 529 /// 530 /// In case all elements of a vector are initialized to the same 531 /// scalar value, this value is loaded and shuffeled into all elements 532 /// of the vector. 533 /// 534 /// %splat_one = load <1 x double>* %p 535 /// %splat = shufflevector <1 x double> %splat_one, <1 x 536 /// double> %splat_one, <4 x i32> zeroinitializer 537 /// 538 Value *generateStrideZeroLoad(const LoadInst *Load, ValueMapT &BBMap); 539 540 /// @Load a vector from scalars distributed in memory 541 /// 542 /// In case some scalars a distributed randomly in memory. Create a vector 543 /// by loading each scalar and by inserting one after the other into the 544 /// vector. 545 /// 546 /// %scalar_1= load double* %p_1 547 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 548 /// %scalar 2 = load double* %p_2 549 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 550 /// 551 Value *generateUnknownStrideLoad(const LoadInst *Load, 552 VectorValueMapT &ScalarMaps); 553 554 void generateLoad(const LoadInst *Load, ValueMapT &VectorMap, 555 VectorValueMapT &ScalarMaps); 556 557 void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &VectorMap, 558 VectorValueMapT &ScalarMaps); 559 560 void copyBinaryInst(const BinaryOperator *Inst, ValueMapT &VectorMap, 561 VectorValueMapT &ScalarMaps); 562 563 void copyStore(const StoreInst *Store, ValueMapT &VectorMap, 564 VectorValueMapT &ScalarMaps); 565 566 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); 567 568 void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap, 569 VectorValueMapT &ScalarMaps); 570 571 void copyBB(); 572 }; 573 574 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 575 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain, 576 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), 577 Domain(Domain) { 578 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 579 assert(Domain && "No statement domain provided"); 580 } 581 582 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 583 ValueMapT &VectorMap, 584 VectorValueMapT &ScalarMaps) { 585 if (VectorMap.count(Old)) 586 return VectorMap[Old]; 587 588 int Width = getVectorWidth(); 589 590 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 591 592 for (int Lane = 0; Lane < Width; Lane++) 593 Vector = Builder.CreateInsertElement(Vector, 594 getNewValue(Old, 595 ScalarMaps[Lane], 596 GlobalMaps[Lane]), 597 Builder.getInt32(Lane)); 598 599 VectorMap[Old] = Vector; 600 601 return Vector; 602 } 603 604 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 605 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 606 assert(PointerTy && "PointerType expected"); 607 608 Type *ScalarType = PointerTy->getElementType(); 609 VectorType *VectorType = VectorType::get(ScalarType, Width); 610 611 return PointerType::getUnqual(VectorType); 612 } 613 614 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 615 ValueMapT &BBMap) { 616 const Value *Pointer = Load->getPointerOperand(); 617 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 618 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 619 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 620 "vector_ptr"); 621 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 622 Load->getName() + "_p_vec_full"); 623 if (!Aligned) 624 VecLoad->setAlignment(8); 625 626 return VecLoad; 627 } 628 629 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 630 ValueMapT &BBMap) { 631 const Value *Pointer = Load->getPointerOperand(); 632 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 633 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 634 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 635 Load->getName() + "_p_vec_p"); 636 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 637 Load->getName() + "_p_splat_one"); 638 639 if (!Aligned) 640 ScalarLoad->setAlignment(8); 641 642 Constant *SplatVector = 643 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 644 getVectorWidth())); 645 646 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 647 SplatVector, 648 Load->getName() 649 + "_p_splat"); 650 return VectorLoad; 651 } 652 653 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 654 VectorValueMapT &ScalarMaps) { 655 int VectorWidth = getVectorWidth(); 656 const Value *Pointer = Load->getPointerOperand(); 657 VectorType *VectorType = VectorType::get( 658 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 659 660 Value *Vector = UndefValue::get(VectorType); 661 662 for (int i = 0; i < VectorWidth; i++) { 663 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 664 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 665 Load->getName() + "_p_scalar_"); 666 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 667 Builder.getInt32(i), 668 Load->getName() + "_p_vec_"); 669 } 670 671 return Vector; 672 } 673 674 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 675 ValueMapT &VectorMap, 676 VectorValueMapT &ScalarMaps) { 677 Value *NewLoad; 678 679 MemoryAccess &Access = Statement.getAccessFor(Load); 680 681 if (Access.isStrideZero(isl_set_copy(Domain))) 682 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 683 else if (Access.isStrideOne(isl_set_copy(Domain))) 684 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 685 else 686 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 687 688 VectorMap[Load] = NewLoad; 689 } 690 691 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 692 ValueMapT &VectorMap, 693 VectorValueMapT &ScalarMaps) { 694 int VectorWidth = getVectorWidth(); 695 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 696 ScalarMaps); 697 698 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 699 700 const CastInst *Cast = dyn_cast<CastInst>(Inst); 701 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 702 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 703 } 704 705 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 706 ValueMapT &VectorMap, 707 VectorValueMapT &ScalarMaps) { 708 Value *OpZero = Inst->getOperand(0); 709 Value *OpOne = Inst->getOperand(1); 710 711 Value *NewOpZero, *NewOpOne; 712 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 713 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 714 715 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, 716 NewOpOne, 717 Inst->getName() + "p_vec"); 718 VectorMap[Inst] = NewInst; 719 } 720 721 void VectorBlockGenerator::copyStore(const StoreInst *Store, 722 ValueMapT &VectorMap, 723 VectorValueMapT &ScalarMaps) { 724 int VectorWidth = getVectorWidth(); 725 726 MemoryAccess &Access = Statement.getAccessFor(Store); 727 728 const Value *Pointer = Store->getPointerOperand(); 729 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 730 ScalarMaps); 731 732 if (Access.isStrideOne(isl_set_copy(Domain))) { 733 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 734 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 735 736 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 737 "vector_ptr"); 738 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 739 740 if (!Aligned) 741 Store->setAlignment(8); 742 } else { 743 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 744 Value *Scalar = Builder.CreateExtractElement(Vector, 745 Builder.getInt32(i)); 746 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 747 Builder.CreateStore(Scalar, NewPointer); 748 } 749 } 750 } 751 752 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 753 ValueMapT &VectorMap) { 754 for (Instruction::const_op_iterator OI = Inst->op_begin(), 755 OE = Inst->op_end(); OI != OE; ++OI) 756 if (VectorMap.count(*OI)) 757 return true; 758 return false; 759 } 760 761 int VectorBlockGenerator::getVectorWidth() { 762 return GlobalMaps.size(); 763 } 764 765 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 766 ValueMapT &VectorMap, 767 VectorValueMapT &ScalarMaps) { 768 // Terminator instructions control the control flow. They are explicitly 769 // expressed in the clast and do not need to be copied. 770 if (Inst->isTerminator()) 771 return; 772 773 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 774 generateLoad(Load, VectorMap, ScalarMaps); 775 return; 776 } 777 778 if (hasVectorOperands(Inst, VectorMap)) { 779 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 780 copyStore(Store, VectorMap, ScalarMaps); 781 return; 782 } 783 784 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 785 copyUnaryInst(Unary, VectorMap, ScalarMaps); 786 return; 787 } 788 789 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 790 copyBinaryInst(Binary, VectorMap, ScalarMaps); 791 return; 792 } 793 794 llvm_unreachable("Cannot issue vector code for this instruction"); 795 } 796 797 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 798 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 799 } 800 801 void VectorBlockGenerator::copyBB() { 802 BasicBlock *BB = Statement.getBasicBlock(); 803 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 804 Builder.GetInsertPoint(), P); 805 CopyBB->setName("polly.stmt." + BB->getName()); 806 Builder.SetInsertPoint(CopyBB->begin()); 807 808 // Create two maps that store the mapping from the original instructions of 809 // the old basic block to their copies in the new basic block. Those maps 810 // are basic block local. 811 // 812 // As vector code generation is supported there is one map for scalar values 813 // and one for vector values. 814 // 815 // In case we just do scalar code generation, the vectorMap is not used and 816 // the scalarMap has just one dimension, which contains the mapping. 817 // 818 // In case vector code generation is done, an instruction may either appear 819 // in the vector map once (as it is calculating >vectorwidth< values at a 820 // time. Or (if the values are calculated using scalar operations), it 821 // appears once in every dimension of the scalarMap. 822 VectorValueMapT ScalarBlockMap(getVectorWidth()); 823 ValueMapT VectorBlockMap; 824 825 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 826 II != IE; ++II) 827 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 828 } 829 830 /// Class to generate LLVM-IR that calculates the value of a clast_expr. 831 class ClastExpCodeGen { 832 IRBuilder<> &Builder; 833 const CharMapT &IVS; 834 835 Value *codegen(const clast_name *e, Type *Ty); 836 Value *codegen(const clast_term *e, Type *Ty); 837 Value *codegen(const clast_binary *e, Type *Ty); 838 Value *codegen(const clast_reduction *r, Type *Ty); 839 public: 840 841 // A generator for clast expressions. 842 // 843 // @param B The IRBuilder that defines where the code to calculate the 844 // clast expressions should be inserted. 845 // @param IVMAP A Map that translates strings describing the induction 846 // variables to the Values* that represent these variables 847 // on the LLVM side. 848 ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap); 849 850 // Generates code to calculate a given clast expression. 851 // 852 // @param e The expression to calculate. 853 // @return The Value that holds the result. 854 Value *codegen(const clast_expr *e, Type *Ty); 855 }; 856 857 Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) { 858 CharMapT::const_iterator I = IVS.find(e->name); 859 860 assert(I != IVS.end() && "Clast name not found"); 861 862 return Builder.CreateSExtOrBitCast(I->second, Ty); 863 } 864 865 Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) { 866 APInt a = APInt_from_MPZ(e->val); 867 868 Value *ConstOne = ConstantInt::get(Builder.getContext(), a); 869 ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); 870 871 if (!e->var) 872 return ConstOne; 873 874 Value *var = codegen(e->var, Ty); 875 return Builder.CreateMul(ConstOne, var); 876 } 877 878 Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { 879 Value *LHS = codegen(e->LHS, Ty); 880 881 APInt RHS_AP = APInt_from_MPZ(e->RHS); 882 883 Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); 884 RHS = Builder.CreateSExtOrBitCast(RHS, Ty); 885 886 switch (e->type) { 887 case clast_bin_mod: 888 return Builder.CreateSRem(LHS, RHS); 889 case clast_bin_fdiv: 890 { 891 // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d 892 Value *One = ConstantInt::get(Ty, 1); 893 Value *Zero = ConstantInt::get(Ty, 0); 894 Value *Sum1 = Builder.CreateSub(LHS, RHS); 895 Value *Sum2 = Builder.CreateAdd(Sum1, One); 896 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 897 Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); 898 return Builder.CreateSDiv(Dividend, RHS); 899 } 900 case clast_bin_cdiv: 901 { 902 // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d 903 Value *One = ConstantInt::get(Ty, 1); 904 Value *Zero = ConstantInt::get(Ty, 0); 905 Value *Sum1 = Builder.CreateAdd(LHS, RHS); 906 Value *Sum2 = Builder.CreateSub(Sum1, One); 907 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 908 Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); 909 return Builder.CreateSDiv(Dividend, RHS); 910 } 911 case clast_bin_div: 912 return Builder.CreateSDiv(LHS, RHS); 913 }; 914 915 llvm_unreachable("Unknown clast binary expression type"); 916 } 917 918 Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { 919 assert(( r->type == clast_red_min 920 || r->type == clast_red_max 921 || r->type == clast_red_sum) 922 && "Clast reduction type not supported"); 923 Value *old = codegen(r->elts[0], Ty); 924 925 for (int i=1; i < r->n; ++i) { 926 Value *exprValue = codegen(r->elts[i], Ty); 927 928 switch (r->type) { 929 case clast_red_min: 930 { 931 Value *cmp = Builder.CreateICmpSLT(old, exprValue); 932 old = Builder.CreateSelect(cmp, old, exprValue); 933 break; 934 } 935 case clast_red_max: 936 { 937 Value *cmp = Builder.CreateICmpSGT(old, exprValue); 938 old = Builder.CreateSelect(cmp, old, exprValue); 939 break; 940 } 941 case clast_red_sum: 942 old = Builder.CreateAdd(old, exprValue); 943 break; 944 } 945 } 946 947 return old; 948 } 949 950 ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap) 951 : Builder(B), IVS(IVMap) {} 952 953 Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { 954 switch(e->type) { 955 case clast_expr_name: 956 return codegen((const clast_name *)e, Ty); 957 case clast_expr_term: 958 return codegen((const clast_term *)e, Ty); 959 case clast_expr_bin: 960 return codegen((const clast_binary *)e, Ty); 961 case clast_expr_red: 962 return codegen((const clast_reduction *)e, Ty); 963 } 964 965 llvm_unreachable("Unknown clast expression!"); 966 } 967 968 class ClastStmtCodeGen { 969 public: 970 const std::vector<std::string> &getParallelLoops(); 971 972 private: 973 // The Scop we code generate. 974 Scop *S; 975 Pass *P; 976 977 // The Builder specifies the current location to code generate at. 978 IRBuilder<> &Builder; 979 980 // Map the Values from the old code to their counterparts in the new code. 981 ValueMapT ValueMap; 982 983 // clastVars maps from the textual representation of a clast variable to its 984 // current *Value. clast variables are scheduling variables, original 985 // induction variables or parameters. They are used either in loop bounds or 986 // to define the statement instance that is executed. 987 // 988 // for (s = 0; s < n + 3; ++i) 989 // for (t = s; t < m; ++j) 990 // Stmt(i = s + 3 * m, j = t); 991 // 992 // {s,t,i,j,n,m} is the set of clast variables in this clast. 993 CharMapT ClastVars; 994 995 // Codegenerator for clast expressions. 996 ClastExpCodeGen ExpGen; 997 998 // Do we currently generate parallel code? 999 bool parallelCodeGeneration; 1000 1001 std::vector<std::string> parallelLoops; 1002 1003 void codegen(const clast_assignment *a); 1004 1005 void codegen(const clast_assignment *a, ScopStmt *Statement, 1006 unsigned Dimension, int vectorDim, 1007 std::vector<ValueMapT> *VectorVMap = 0); 1008 1009 void codegenSubstitutions(const clast_stmt *Assignment, 1010 ScopStmt *Statement, int vectorDim = 0, 1011 std::vector<ValueMapT> *VectorVMap = 0); 1012 1013 void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL, 1014 const char *iterator = NULL, isl_set *scatteringDomain = 0); 1015 1016 void codegen(const clast_block *b); 1017 1018 /// @brief Create a classical sequential loop. 1019 void codegenForSequential(const clast_for *f); 1020 1021 /// @brief Create OpenMP structure values. 1022 /// 1023 /// Create a list of values that has to be stored into the OpenMP subfuncition 1024 /// structure. 1025 SetVector<Value*> getOMPValues(); 1026 1027 /// @brief Update the internal structures according to a Value Map. 1028 /// 1029 /// @param VMap A map from old to new values. 1030 /// @param Reverse If true, we assume the update should be reversed. 1031 void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1032 bool Reverse); 1033 1034 /// @brief Create an OpenMP parallel for loop. 1035 /// 1036 /// This loop reflects a loop as if it would have been created by an OpenMP 1037 /// statement. 1038 void codegenForOpenMP(const clast_for *f); 1039 1040 bool isInnermostLoop(const clast_for *f); 1041 1042 /// @brief Get the number of loop iterations for this loop. 1043 /// @param f The clast for loop to check. 1044 int getNumberOfIterations(const clast_for *f); 1045 1046 /// @brief Create vector instructions for this loop. 1047 void codegenForVector(const clast_for *f); 1048 1049 void codegen(const clast_for *f); 1050 1051 Value *codegen(const clast_equation *eq); 1052 1053 void codegen(const clast_guard *g); 1054 1055 void codegen(const clast_stmt *stmt); 1056 1057 void addParameters(const CloogNames *names); 1058 1059 IntegerType *getIntPtrTy(); 1060 1061 public: 1062 void codegen(const clast_root *r); 1063 1064 ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P); 1065 }; 1066 } 1067 1068 IntegerType *ClastStmtCodeGen::getIntPtrTy() { 1069 return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext()); 1070 } 1071 1072 const std::vector<std::string> &ClastStmtCodeGen::getParallelLoops() { 1073 return parallelLoops; 1074 } 1075 1076 void ClastStmtCodeGen::codegen(const clast_assignment *a) { 1077 Value *V= ExpGen.codegen(a->RHS, getIntPtrTy()); 1078 ClastVars[a->LHS] = V; 1079 } 1080 1081 void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt, 1082 unsigned Dim, int VectorDim, 1083 std::vector<ValueMapT> *VectorVMap) { 1084 const PHINode *PN; 1085 Value *RHS; 1086 1087 assert(!A->LHS && "Statement assignments do not have left hand side"); 1088 1089 PN = Stmt->getInductionVariableForDimension(Dim); 1090 RHS = ExpGen.codegen(A->RHS, Builder.getInt64Ty()); 1091 RHS = Builder.CreateTruncOrBitCast(RHS, PN->getType()); 1092 1093 if (VectorVMap) 1094 (*VectorVMap)[VectorDim][PN] = RHS; 1095 1096 ValueMap[PN] = RHS; 1097 } 1098 1099 void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, 1100 ScopStmt *Statement, int vectorDim, 1101 std::vector<ValueMapT> *VectorVMap) { 1102 int Dimension = 0; 1103 1104 while (Assignment) { 1105 assert(CLAST_STMT_IS_A(Assignment, stmt_ass) 1106 && "Substitions are expected to be assignments"); 1107 codegen((const clast_assignment *)Assignment, Statement, Dimension, 1108 vectorDim, VectorVMap); 1109 Assignment = Assignment->next; 1110 Dimension++; 1111 } 1112 } 1113 1114 void ClastStmtCodeGen::codegen(const clast_user_stmt *u, 1115 std::vector<Value*> *IVS , const char *iterator, 1116 isl_set *Domain) { 1117 ScopStmt *Statement = (ScopStmt *)u->statement->usr; 1118 1119 if (u->substitutions) 1120 codegenSubstitutions(u->substitutions, Statement); 1121 1122 int VectorDimensions = IVS ? IVS->size() : 1; 1123 1124 if (VectorDimensions == 1) { 1125 BlockGenerator::generate(Builder, *Statement, ValueMap, P); 1126 return; 1127 } 1128 1129 VectorValueMapT VectorMap(VectorDimensions); 1130 1131 if (IVS) { 1132 assert (u->substitutions && "Substitutions expected!"); 1133 int i = 0; 1134 for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end(); 1135 II != IE; ++II) { 1136 ClastVars[iterator] = *II; 1137 codegenSubstitutions(u->substitutions, Statement, i, &VectorMap); 1138 i++; 1139 } 1140 } 1141 1142 VectorBlockGenerator::generate(Builder, *Statement, VectorMap, Domain, P); 1143 } 1144 1145 void ClastStmtCodeGen::codegen(const clast_block *b) { 1146 if (b->body) 1147 codegen(b->body); 1148 } 1149 1150 void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { 1151 Value *LowerBound, *UpperBound, *IV, *Stride; 1152 BasicBlock *AfterBB; 1153 Type *IntPtrTy = getIntPtrTy(); 1154 1155 LowerBound = ExpGen.codegen(f->LB, IntPtrTy); 1156 UpperBound = ExpGen.codegen(f->UB, IntPtrTy); 1157 Stride = Builder.getInt(APInt_from_MPZ(f->stride)); 1158 1159 IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB); 1160 1161 // Add loop iv to symbols. 1162 ClastVars[f->iterator] = IV; 1163 1164 if (f->body) 1165 codegen(f->body); 1166 1167 // Loop is finished, so remove its iv from the live symbols. 1168 ClastVars.erase(f->iterator); 1169 Builder.SetInsertPoint(AfterBB->begin()); 1170 } 1171 1172 SetVector<Value*> ClastStmtCodeGen::getOMPValues() { 1173 SetVector<Value*> Values; 1174 1175 // The clast variables 1176 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1177 I != E; I++) 1178 Values.insert(I->second); 1179 1180 // The memory reference base addresses 1181 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { 1182 ScopStmt *Stmt = *SI; 1183 for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(), 1184 E = Stmt->memacc_end(); I != E; ++I) { 1185 Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr()); 1186 Values.insert((BaseAddr)); 1187 } 1188 } 1189 1190 return Values; 1191 } 1192 1193 void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1194 bool Reverse) { 1195 std::set<Value*> Inserted; 1196 1197 if (Reverse) { 1198 OMPGenerator::ValueToValueMapTy ReverseMap; 1199 1200 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1201 I != E; ++I) 1202 ReverseMap.insert(std::make_pair(I->second, I->first)); 1203 1204 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1205 I != E; I++) { 1206 ClastVars[I->first] = ReverseMap[I->second]; 1207 Inserted.insert(I->second); 1208 } 1209 1210 /// FIXME: At the moment we do not reverse the update of the ValueMap. 1211 /// This is incomplet, but the failure should be obvious, such that 1212 /// we can fix this later. 1213 return; 1214 } 1215 1216 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1217 I != E; I++) { 1218 ClastVars[I->first] = VMap[I->second]; 1219 Inserted.insert(I->second); 1220 } 1221 1222 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1223 I != E; ++I) { 1224 if (Inserted.count(I->first)) 1225 continue; 1226 1227 ValueMap[I->first] = I->second; 1228 } 1229 } 1230 1231 static void clearDomtree(Function *F, DominatorTree &DT) { 1232 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 1233 std::vector<BasicBlock*> Nodes; 1234 for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I) 1235 Nodes.push_back(I->getBlock()); 1236 1237 for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end(); 1238 I != E; ++I) 1239 DT.eraseNode(*I); 1240 } 1241 1242 void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { 1243 Value *Stride, *LB, *UB, *IV; 1244 BasicBlock::iterator LoopBody; 1245 IntegerType *IntPtrTy = getIntPtrTy(); 1246 SetVector<Value*> Values; 1247 OMPGenerator::ValueToValueMapTy VMap; 1248 OMPGenerator OMPGen(Builder, P); 1249 1250 Stride = Builder.getInt(APInt_from_MPZ(For->stride)); 1251 Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy); 1252 LB = ExpGen.codegen(For->LB, IntPtrTy); 1253 UB = ExpGen.codegen(For->UB, IntPtrTy); 1254 1255 Values = getOMPValues(); 1256 1257 IV = OMPGen.createParallelLoop(LB, UB, Stride, Values, VMap, &LoopBody); 1258 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 1259 Builder.SetInsertPoint(LoopBody); 1260 1261 updateWithValueMap(VMap, /* reverse */ false); 1262 ClastVars[For->iterator] = IV; 1263 1264 if (For->body) 1265 codegen(For->body); 1266 1267 ClastVars.erase(For->iterator); 1268 updateWithValueMap(VMap, /* reverse */ true); 1269 1270 clearDomtree((*LoopBody).getParent()->getParent(), 1271 P->getAnalysis<DominatorTree>()); 1272 1273 Builder.SetInsertPoint(AfterLoop); 1274 } 1275 1276 bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { 1277 const clast_stmt *stmt = f->body; 1278 1279 while (stmt) { 1280 if (!CLAST_STMT_IS_A(stmt, stmt_user)) 1281 return false; 1282 1283 stmt = stmt->next; 1284 } 1285 1286 return true; 1287 } 1288 1289 int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) { 1290 isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); 1291 isl_set *tmp = isl_set_copy(loopDomain); 1292 1293 // Calculate a map similar to the identity map, but with the last input 1294 // and output dimension not related. 1295 // [i0, i1, i2, i3] -> [i0, i1, i2, o0] 1296 isl_space *Space = isl_set_get_space(loopDomain); 1297 Space = isl_space_drop_outputs(Space, 1298 isl_set_dim(loopDomain, isl_dim_set) - 2, 1); 1299 Space = isl_space_map_from_set(Space); 1300 isl_map *identity = isl_map_identity(Space); 1301 identity = isl_map_add_dims(identity, isl_dim_in, 1); 1302 identity = isl_map_add_dims(identity, isl_dim_out, 1); 1303 1304 isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); 1305 map = isl_map_intersect(map, identity); 1306 1307 isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); 1308 isl_map *lexmin = isl_map_lexmin(map); 1309 isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); 1310 1311 isl_set *elements = isl_map_range(sub); 1312 1313 if (!isl_set_is_singleton(elements)) { 1314 isl_set_free(elements); 1315 return -1; 1316 } 1317 1318 isl_point *p = isl_set_sample_point(elements); 1319 1320 isl_int v; 1321 isl_int_init(v); 1322 isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); 1323 int numberIterations = isl_int_get_si(v); 1324 isl_int_clear(v); 1325 isl_point_free(p); 1326 1327 return (numberIterations) / isl_int_get_si(f->stride) + 1; 1328 } 1329 1330 void ClastStmtCodeGen::codegenForVector(const clast_for *F) { 1331 DEBUG(dbgs() << "Vectorizing loop '" << F->iterator << "'\n";); 1332 int VectorWidth = getNumberOfIterations(F); 1333 1334 Value *LB = ExpGen.codegen(F->LB, getIntPtrTy()); 1335 1336 APInt Stride = APInt_from_MPZ(F->stride); 1337 IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType()); 1338 Stride = Stride.zext(LoopIVType->getBitWidth()); 1339 Value *StrideValue = ConstantInt::get(LoopIVType, Stride); 1340 1341 std::vector<Value*> IVS(VectorWidth); 1342 IVS[0] = LB; 1343 1344 for (int i = 1; i < VectorWidth; i++) 1345 IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); 1346 1347 isl_set *Domain = isl_set_from_cloog_domain(F->domain); 1348 1349 // Add loop iv to symbols. 1350 ClastVars[F->iterator] = LB; 1351 1352 const clast_stmt *Stmt = F->body; 1353 1354 while (Stmt) { 1355 codegen((const clast_user_stmt *)Stmt, &IVS, F->iterator, 1356 isl_set_copy(Domain)); 1357 Stmt = Stmt->next; 1358 } 1359 1360 // Loop is finished, so remove its iv from the live symbols. 1361 isl_set_free(Domain); 1362 ClastVars.erase(F->iterator); 1363 } 1364 1365 void ClastStmtCodeGen::codegen(const clast_for *f) { 1366 if ((Vector || OpenMP) && P->getAnalysis<Dependences>().isParallelFor(f)) { 1367 if (Vector && isInnermostLoop(f) && (-1 != getNumberOfIterations(f)) 1368 && (getNumberOfIterations(f) <= 16)) { 1369 codegenForVector(f); 1370 return; 1371 } 1372 1373 if (OpenMP && !parallelCodeGeneration) { 1374 parallelCodeGeneration = true; 1375 parallelLoops.push_back(f->iterator); 1376 codegenForOpenMP(f); 1377 parallelCodeGeneration = false; 1378 return; 1379 } 1380 } 1381 1382 codegenForSequential(f); 1383 } 1384 1385 Value *ClastStmtCodeGen::codegen(const clast_equation *eq) { 1386 Value *LHS = ExpGen.codegen(eq->LHS, getIntPtrTy()); 1387 Value *RHS = ExpGen.codegen(eq->RHS, getIntPtrTy()); 1388 CmpInst::Predicate P; 1389 1390 if (eq->sign == 0) 1391 P = ICmpInst::ICMP_EQ; 1392 else if (eq->sign > 0) 1393 P = ICmpInst::ICMP_SGE; 1394 else 1395 P = ICmpInst::ICMP_SLE; 1396 1397 return Builder.CreateICmp(P, LHS, RHS); 1398 } 1399 1400 void ClastStmtCodeGen::codegen(const clast_guard *g) { 1401 Function *F = Builder.GetInsertBlock()->getParent(); 1402 LLVMContext &Context = F->getContext(); 1403 1404 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1405 Builder.GetInsertPoint(), P); 1406 CondBB->setName("polly.cond"); 1407 BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); 1408 MergeBB->setName("polly.merge"); 1409 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 1410 1411 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 1412 DT.addNewBlock(ThenBB, CondBB); 1413 DT.changeImmediateDominator(MergeBB, CondBB); 1414 1415 CondBB->getTerminator()->eraseFromParent(); 1416 1417 Builder.SetInsertPoint(CondBB); 1418 1419 Value *Predicate = codegen(&(g->eq[0])); 1420 1421 for (int i = 1; i < g->n; ++i) { 1422 Value *TmpPredicate = codegen(&(g->eq[i])); 1423 Predicate = Builder.CreateAnd(Predicate, TmpPredicate); 1424 } 1425 1426 Builder.CreateCondBr(Predicate, ThenBB, MergeBB); 1427 Builder.SetInsertPoint(ThenBB); 1428 Builder.CreateBr(MergeBB); 1429 Builder.SetInsertPoint(ThenBB->begin()); 1430 1431 codegen(g->then); 1432 1433 Builder.SetInsertPoint(MergeBB->begin()); 1434 } 1435 1436 void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { 1437 if (CLAST_STMT_IS_A(stmt, stmt_root)) 1438 assert(false && "No second root statement expected"); 1439 else if (CLAST_STMT_IS_A(stmt, stmt_ass)) 1440 codegen((const clast_assignment *)stmt); 1441 else if (CLAST_STMT_IS_A(stmt, stmt_user)) 1442 codegen((const clast_user_stmt *)stmt); 1443 else if (CLAST_STMT_IS_A(stmt, stmt_block)) 1444 codegen((const clast_block *)stmt); 1445 else if (CLAST_STMT_IS_A(stmt, stmt_for)) 1446 codegen((const clast_for *)stmt); 1447 else if (CLAST_STMT_IS_A(stmt, stmt_guard)) 1448 codegen((const clast_guard *)stmt); 1449 1450 if (stmt->next) 1451 codegen(stmt->next); 1452 } 1453 1454 void ClastStmtCodeGen::addParameters(const CloogNames *names) { 1455 SCEVExpander Rewriter(P->getAnalysis<ScalarEvolution>(), "polly"); 1456 1457 int i = 0; 1458 for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); 1459 PI != PE; ++PI) { 1460 assert(i < names->nb_parameters && "Not enough parameter names"); 1461 1462 const SCEV *Param = *PI; 1463 Type *Ty = Param->getType(); 1464 1465 Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); 1466 Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); 1467 ClastVars[names->parameters[i]] = V; 1468 1469 ++i; 1470 } 1471 } 1472 1473 void ClastStmtCodeGen::codegen(const clast_root *r) { 1474 addParameters(r->names); 1475 1476 parallelCodeGeneration = false; 1477 1478 const clast_stmt *stmt = (const clast_stmt*) r; 1479 if (stmt->next) 1480 codegen(stmt->next); 1481 } 1482 1483 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P) : 1484 S(scop), P(P), Builder(B), ExpGen(Builder, ClastVars) {} 1485 1486 namespace { 1487 class CodeGeneration : public ScopPass { 1488 Region *region; 1489 Scop *S; 1490 DominatorTree *DT; 1491 RegionInfo *RI; 1492 1493 std::vector<std::string> parallelLoops; 1494 1495 public: 1496 static char ID; 1497 1498 CodeGeneration() : ScopPass(ID) {} 1499 1500 // Split the entry edge of the region and generate a new basic block on this 1501 // edge. This function also updates ScopInfo and RegionInfo. 1502 // 1503 // @param region The region where the entry edge will be splitted. 1504 BasicBlock *splitEdgeAdvanced(Region *region) { 1505 BasicBlock *newBlock; 1506 BasicBlock *splitBlock; 1507 1508 newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this); 1509 1510 if (DT->dominates(region->getEntry(), newBlock)) { 1511 BasicBlock *OldBlock = region->getEntry(); 1512 std::string OldName = OldBlock->getName(); 1513 1514 // Update ScopInfo. 1515 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) 1516 if ((*SI)->getBasicBlock() == OldBlock) { 1517 (*SI)->setBasicBlock(newBlock); 1518 break; 1519 } 1520 1521 // Update RegionInfo. 1522 splitBlock = OldBlock; 1523 OldBlock->setName("polly.split"); 1524 newBlock->setName(OldName); 1525 region->replaceEntry(newBlock); 1526 RI->setRegionFor(newBlock, region); 1527 } else { 1528 RI->setRegionFor(newBlock, region->getParent()); 1529 splitBlock = newBlock; 1530 } 1531 1532 return splitBlock; 1533 } 1534 1535 // Create a split block that branches either to the old code or to a new basic 1536 // block where the new code can be inserted. 1537 // 1538 // @param Builder A builder that will be set to point to a basic block, where 1539 // the new code can be generated. 1540 // @return The split basic block. 1541 BasicBlock *addSplitAndStartBlock(IRBuilder<> *Builder) { 1542 BasicBlock *StartBlock, *SplitBlock; 1543 1544 SplitBlock = splitEdgeAdvanced(region); 1545 SplitBlock->setName("polly.split_new_and_old"); 1546 Function *F = SplitBlock->getParent(); 1547 StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F); 1548 SplitBlock->getTerminator()->eraseFromParent(); 1549 Builder->SetInsertPoint(SplitBlock); 1550 Builder->CreateCondBr(Builder->getTrue(), StartBlock, region->getEntry()); 1551 DT->addNewBlock(StartBlock, SplitBlock); 1552 Builder->SetInsertPoint(StartBlock); 1553 return SplitBlock; 1554 } 1555 1556 // Merge the control flow of the newly generated code with the existing code. 1557 // 1558 // @param SplitBlock The basic block where the control flow was split between 1559 // old and new version of the Scop. 1560 // @param Builder An IRBuilder that points to the last instruction of the 1561 // newly generated code. 1562 void mergeControlFlow(BasicBlock *SplitBlock, IRBuilder<> *Builder) { 1563 BasicBlock *MergeBlock; 1564 Region *R = region; 1565 1566 if (R->getExit()->getSinglePredecessor()) 1567 // No splitEdge required. A block with a single predecessor cannot have 1568 // PHI nodes that would complicate life. 1569 MergeBlock = R->getExit(); 1570 else { 1571 MergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this); 1572 // SplitEdge will never split R->getExit(), as R->getExit() has more than 1573 // one predecessor. Hence, mergeBlock is always a newly generated block. 1574 R->replaceExit(MergeBlock); 1575 } 1576 1577 Builder->CreateBr(MergeBlock); 1578 MergeBlock->setName("polly.merge_new_and_old"); 1579 1580 if (DT->dominates(SplitBlock, MergeBlock)) 1581 DT->changeImmediateDominator(MergeBlock, SplitBlock); 1582 } 1583 1584 bool runOnScop(Scop &scop) { 1585 S = &scop; 1586 region = &S->getRegion(); 1587 DT = &getAnalysis<DominatorTree>(); 1588 RI = &getAnalysis<RegionInfo>(); 1589 1590 parallelLoops.clear(); 1591 1592 assert(region->isSimple() && "Only simple regions are supported"); 1593 1594 // In the CFG the optimized code of the SCoP is generated next to the 1595 // original code. Both the new and the original version of the code remain 1596 // in the CFG. A branch statement decides which version is executed. 1597 // For now, we always execute the new version (the old one is dead code 1598 // eliminated by the cleanup passes). In the future we may decide to execute 1599 // the new version only if certain run time checks succeed. This will be 1600 // useful to support constructs for which we cannot prove all assumptions at 1601 // compile time. 1602 // 1603 // Before transformation: 1604 // 1605 // bb0 1606 // | 1607 // orig_scop 1608 // | 1609 // bb1 1610 // 1611 // After transformation: 1612 // bb0 1613 // | 1614 // polly.splitBlock 1615 // / \. 1616 // | startBlock 1617 // | | 1618 // orig_scop new_scop 1619 // \ / 1620 // \ / 1621 // bb1 (joinBlock) 1622 IRBuilder<> builder(region->getEntry()); 1623 1624 // The builder will be set to startBlock. 1625 BasicBlock *splitBlock = addSplitAndStartBlock(&builder); 1626 BasicBlock *StartBlock = builder.GetInsertBlock(); 1627 1628 mergeControlFlow(splitBlock, &builder); 1629 builder.SetInsertPoint(StartBlock->begin()); 1630 1631 ClastStmtCodeGen CodeGen(S, builder, this); 1632 CloogInfo &C = getAnalysis<CloogInfo>(); 1633 CodeGen.codegen(C.getClast()); 1634 1635 parallelLoops.insert(parallelLoops.begin(), 1636 CodeGen.getParallelLoops().begin(), 1637 CodeGen.getParallelLoops().end()); 1638 1639 return true; 1640 } 1641 1642 virtual void printScop(raw_ostream &OS) const { 1643 for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(), 1644 PE = parallelLoops.end(); PI != PE; ++PI) 1645 OS << "Parallel loop with iterator '" << *PI << "' generated\n"; 1646 } 1647 1648 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1649 AU.addRequired<CloogInfo>(); 1650 AU.addRequired<Dependences>(); 1651 AU.addRequired<DominatorTree>(); 1652 AU.addRequired<RegionInfo>(); 1653 AU.addRequired<ScalarEvolution>(); 1654 AU.addRequired<ScopDetection>(); 1655 AU.addRequired<ScopInfo>(); 1656 AU.addRequired<TargetData>(); 1657 1658 AU.addPreserved<CloogInfo>(); 1659 AU.addPreserved<Dependences>(); 1660 1661 // FIXME: We do not create LoopInfo for the newly generated loops. 1662 AU.addPreserved<LoopInfo>(); 1663 AU.addPreserved<DominatorTree>(); 1664 AU.addPreserved<ScopDetection>(); 1665 AU.addPreserved<ScalarEvolution>(); 1666 1667 // FIXME: We do not yet add regions for the newly generated code to the 1668 // region tree. 1669 AU.addPreserved<RegionInfo>(); 1670 AU.addPreserved<TempScopInfo>(); 1671 AU.addPreserved<ScopInfo>(); 1672 AU.addPreservedID(IndependentBlocksID); 1673 } 1674 }; 1675 } 1676 1677 char CodeGeneration::ID = 1; 1678 1679 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 1680 "Polly - Create LLVM-IR from SCoPs", false, false) 1681 INITIALIZE_PASS_DEPENDENCY(CloogInfo) 1682 INITIALIZE_PASS_DEPENDENCY(Dependences) 1683 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 1684 INITIALIZE_PASS_DEPENDENCY(RegionInfo) 1685 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1686 INITIALIZE_PASS_DEPENDENCY(ScopDetection) 1687 INITIALIZE_PASS_DEPENDENCY(TargetData) 1688 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 1689 "Polly - Create LLVM-IR from SCoPs", false, false) 1690 1691 Pass *polly::createCodeGenerationPass() { 1692 return new CodeGeneration(); 1693 } 1694