1 //===------ CodeGeneration.cpp - Code generate the Scops. -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using Cloog. 12 // 13 // The Scop describes the high level memory behaviour of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. Cloog is used to generate an abstract syntax tree (clast) that 16 // reflects the updated execution order. This clast is used to create new 17 // LLVM-IR that is computational equivalent to the original control flow region, 18 // but executes its code in the new execution order defined by the changed 19 // scattering. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "polly-codegen" 24 25 #include "polly/Cloog.h" 26 #include "polly/CodeGeneration.h" 27 #include "polly/Dependences.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/ScopInfo.h" 30 #include "polly/TempScopInfo.h" 31 #include "polly/Support/GICHelper.h" 32 #include "polly/LoopGenerators.h" 33 34 #include "llvm/Module.h" 35 #include "llvm/ADT/SetVector.h" 36 #include "llvm/ADT/PostOrderIterator.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/ScalarEvolutionExpander.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/IRBuilder.h" 42 #include "llvm/Target/TargetData.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 45 #define CLOOG_INT_GMP 1 46 #include "cloog/cloog.h" 47 #include "cloog/isl/cloog.h" 48 49 #include "isl/aff.h" 50 51 #include <vector> 52 #include <utility> 53 54 using namespace polly; 55 using namespace llvm; 56 57 struct isl_set; 58 59 namespace polly { 60 61 bool EnablePollyVector; 62 63 static cl::opt<bool, true> 64 Vector("enable-polly-vector", 65 cl::desc("Enable polly vector code generation"), cl::Hidden, 66 cl::location(EnablePollyVector), cl::init(false), cl::ZeroOrMore); 67 68 static cl::opt<bool> 69 OpenMP("enable-polly-openmp", 70 cl::desc("Generate OpenMP parallel code"), cl::Hidden, 71 cl::value_desc("OpenMP code generation enabled if true"), 72 cl::init(false), cl::ZeroOrMore); 73 74 static cl::opt<bool> 75 AtLeastOnce("enable-polly-atLeastOnce", 76 cl::desc("Give polly the hint, that every loop is executed at least" 77 "once"), cl::Hidden, 78 cl::value_desc("OpenMP code generation enabled if true"), 79 cl::init(false), cl::ZeroOrMore); 80 81 static cl::opt<bool> 82 Aligned("enable-polly-aligned", 83 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 84 cl::value_desc("OpenMP code generation enabled if true"), 85 cl::init(false), cl::ZeroOrMore); 86 87 typedef DenseMap<const Value*, Value*> ValueMapT; 88 typedef DenseMap<const char*, Value*> CharMapT; 89 typedef std::vector<ValueMapT> VectorValueMapT; 90 91 class IslGenerator; 92 93 class IslGenerator { 94 public: 95 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 96 Builder(Builder), IVS(IVS) {} 97 Value *generateIslInt(__isl_take isl_int Int); 98 Value *generateIslAff(__isl_take isl_aff *Aff); 99 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 100 101 private: 102 typedef struct { 103 Value *Result; 104 class IslGenerator *Generator; 105 } IslGenInfo; 106 107 IRBuilder<> &Builder; 108 std::vector<Value *> &IVS; 109 static int mergeIslAffValues(__isl_take isl_set *Set, 110 __isl_take isl_aff *Aff, void *User); 111 }; 112 113 Value *IslGenerator::generateIslInt(isl_int Int) { 114 mpz_t IntMPZ; 115 mpz_init(IntMPZ); 116 isl_int_get_gmp(Int, IntMPZ); 117 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 118 mpz_clear(IntMPZ); 119 return IntValue; 120 } 121 122 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 123 Value *Result; 124 Value *ConstValue; 125 isl_int ConstIsl; 126 127 isl_int_init(ConstIsl); 128 isl_aff_get_constant(Aff, &ConstIsl); 129 ConstValue = generateIslInt(ConstIsl); 130 Type *Ty = Builder.getInt64Ty(); 131 132 // FIXME: We should give the constant and coefficients the right type. Here 133 // we force it into i64. 134 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 135 136 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 137 138 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 139 "must match the dimension of the affine space."); 140 141 isl_int CoefficientIsl; 142 isl_int_init(CoefficientIsl); 143 144 for (unsigned int i = 0; i < NbInputDims; ++i) { 145 Value *CoefficientValue; 146 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 147 148 if (isl_int_is_zero(CoefficientIsl)) 149 continue; 150 151 CoefficientValue = generateIslInt(CoefficientIsl); 152 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 153 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 154 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 155 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 156 } 157 158 isl_int_clear(CoefficientIsl); 159 isl_int_clear(ConstIsl); 160 isl_aff_free(Aff); 161 162 return Result; 163 } 164 165 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 166 __isl_take isl_aff *Aff, void *User) { 167 IslGenInfo *GenInfo = (IslGenInfo *)User; 168 169 assert((GenInfo->Result == NULL) && "Result is already set." 170 "Currently only single isl_aff is supported"); 171 assert(isl_set_plain_is_universe(Set) 172 && "Code generation failed because the set is not universe"); 173 174 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 175 176 isl_set_free(Set); 177 return 0; 178 } 179 180 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 181 IslGenInfo User; 182 User.Result = NULL; 183 User.Generator = this; 184 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 185 assert(User.Result && "Code generation for isl_pw_aff failed"); 186 187 isl_pw_aff_free(PwAff); 188 return User.Result; 189 } 190 191 /// @brief Generate a new basic block for a polyhedral statement. 192 /// 193 /// The only public function exposed is generate(). 194 class BlockGenerator { 195 public: 196 /// @brief Generate a new BasicBlock for a ScopStmt. 197 /// 198 /// @param Builder The LLVM-IR Builder used to generate the statement. The 199 /// code is generated at the location, the Builder points to. 200 /// @param Stmt The statement to code generate. 201 /// @param GlobalMap A map that defines for certain Values referenced from the 202 /// original code new Values they should be replaced with. 203 /// @param P A reference to the pass this function is called from. 204 /// The pass is needed to update other analysis. 205 static void generate(IRBuilder<> &Builder, ScopStmt &Stmt, 206 ValueMapT &GlobalMap, Pass *P) { 207 BlockGenerator Generator(Builder, Stmt, P); 208 Generator.copyBB(GlobalMap); 209 } 210 211 protected: 212 IRBuilder<> &Builder; 213 ScopStmt &Statement; 214 Pass *P; 215 216 BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P); 217 218 /// @brief Get the new version of a Value. 219 /// 220 /// @param Old The old Value. 221 /// @param BBMap A mapping from old values to their new values 222 /// (for values recalculated within this basic block). 223 /// @param GlobalMap A mapping from old values to their new values 224 /// (for values recalculated in the new ScoP, but not 225 /// within this basic block). 226 /// 227 /// @returns o The old value, if it is still valid. 228 /// o The new value, if available. 229 /// o NULL, if no value is found. 230 Value *getNewValue(const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap); 231 232 void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 233 ValueMapT &GlobalMap); 234 235 /// @brief Get the memory access offset to be added to the base address 236 std::vector<Value*> getMemoryAccessIndex(__isl_keep isl_map *AccessRelation, 237 Value *BaseAddress, ValueMapT &BBMap, 238 ValueMapT &GlobalMap); 239 240 /// @brief Get the new operand address according to the changed access in 241 /// JSCOP file. 242 Value *getNewAccessOperand(__isl_keep isl_map *NewAccessRelation, 243 Value *BaseAddress, const Value *OldOperand, 244 ValueMapT &BBMap, ValueMapT &GlobalMap); 245 246 /// @brief Generate the operand address 247 Value *generateLocationAccessed(const Instruction *Inst, 248 const Value *Pointer, ValueMapT &BBMap, 249 ValueMapT &GlobalMap); 250 251 Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap, 252 ValueMapT &GlobalMap); 253 254 Value *generateScalarStore(const StoreInst *store, ValueMapT &BBMap, 255 ValueMapT &GlobalMap); 256 257 /// @brief Copy a single Instruction. 258 /// 259 /// This copies a single Instruction and updates references to old values 260 /// with references to new values, as defined by GlobalMap and BBMap. 261 /// 262 /// @param BBMap A mapping from old values to their new values 263 /// (for values recalculated within this basic block). 264 /// @param GlobalMap A mapping from old values to their new values 265 /// (for values recalculated in the new ScoP, but not 266 /// within this basic block). 267 void copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 268 ValueMapT &GlobalMap); 269 270 /// @brief Copy the basic block. 271 /// 272 /// This copies the entire basic block and updates references to old values 273 /// with references to new values, as defined by GlobalMap. 274 /// 275 /// @param GlobalMap A mapping from old values to their new values 276 /// (for values recalculated in the new ScoP, but not 277 /// within this basic block). 278 void copyBB(ValueMapT &GlobalMap); 279 }; 280 281 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 282 Builder(B), Statement(Stmt), P(P) {} 283 284 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 285 ValueMapT &GlobalMap) { 286 // We assume constants never change. 287 // This avoids map lookups for many calls to this function. 288 if (isa<Constant>(Old)) 289 return const_cast<Value*>(Old); 290 291 if (GlobalMap.count(Old)) { 292 Value *New = GlobalMap[Old]; 293 294 if (Old->getType()->getScalarSizeInBits() 295 < New->getType()->getScalarSizeInBits()) 296 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 297 298 return New; 299 } 300 301 if (BBMap.count(Old)) { 302 return BBMap[Old]; 303 } 304 305 // 'Old' is within the original SCoP, but was not rewritten. 306 // 307 // Such values appear, if they only calculate information already available in 308 // the polyhedral description (e.g. an induction variable increment). They 309 // can be safely ignored. 310 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 311 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 312 return NULL; 313 314 // Everything else is probably a scop-constant value defined as global, 315 // function parameter or an instruction not within the scop. 316 return const_cast<Value*>(Old); 317 } 318 319 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 320 ValueMapT &GlobalMap) { 321 Instruction *NewInst = Inst->clone(); 322 323 // Replace old operands with the new ones. 324 for (Instruction::const_op_iterator OI = Inst->op_begin(), 325 OE = Inst->op_end(); OI != OE; ++OI) { 326 Value *OldOperand = *OI; 327 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 328 329 if (!NewOperand) { 330 assert(!isa<StoreInst>(NewInst) 331 && "Store instructions are always needed!"); 332 delete NewInst; 333 return; 334 } 335 336 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 337 } 338 339 Builder.Insert(NewInst); 340 BBMap[Inst] = NewInst; 341 342 if (!NewInst->getType()->isVoidTy()) 343 NewInst->setName("p_" + Inst->getName()); 344 } 345 346 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 347 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 348 ValueMapT &BBMap, ValueMapT &GlobalMap) { 349 350 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) 351 && "Only single dimensional access functions supported"); 352 353 std::vector<Value *> IVS; 354 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 355 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 356 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 357 IVS.push_back(NewIV); 358 } 359 360 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 361 IslGenerator IslGen(Builder, IVS); 362 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 363 364 Type *Ty = Builder.getInt64Ty(); 365 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 366 367 std::vector<Value*> IndexArray; 368 Value *NullValue = Constant::getNullValue(Ty); 369 IndexArray.push_back(NullValue); 370 IndexArray.push_back(OffsetValue); 371 return IndexArray; 372 } 373 374 Value *BlockGenerator::getNewAccessOperand( 375 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, const Value 376 *OldOperand, ValueMapT &BBMap, ValueMapT &GlobalMap) { 377 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 378 BaseAddress, 379 BBMap, GlobalMap); 380 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 381 "p_newarrayidx_"); 382 return NewOperand; 383 } 384 385 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 386 const Value *Pointer, 387 ValueMapT &BBMap, 388 ValueMapT &GlobalMap) { 389 MemoryAccess &Access = Statement.getAccessFor(Inst); 390 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 391 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 392 393 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) 394 && "Current and new access function use different spaces"); 395 396 Value *NewPointer; 397 398 if (!NewAccessRelation) { 399 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 400 } else { 401 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 402 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, Pointer, 403 BBMap, GlobalMap); 404 } 405 406 isl_map_free(CurrentAccessRelation); 407 isl_map_free(NewAccessRelation); 408 return NewPointer; 409 } 410 411 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 412 ValueMapT &BBMap, 413 ValueMapT &GlobalMap) { 414 const Value *Pointer = Load->getPointerOperand(); 415 const Instruction *Inst = dyn_cast<Instruction>(Load); 416 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 417 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 418 Load->getName() + "_p_scalar_"); 419 return ScalarLoad; 420 } 421 422 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 423 ValueMapT &BBMap, 424 ValueMapT &GlobalMap) { 425 const Value *Pointer = Store->getPointerOperand(); 426 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 427 GlobalMap); 428 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 429 430 return Builder.CreateStore(ValueOperand, NewPointer); 431 } 432 433 void BlockGenerator::copyInstruction(const Instruction *Inst, 434 ValueMapT &BBMap, ValueMapT &GlobalMap) { 435 // Terminator instructions control the control flow. They are explicitly 436 // expressed in the clast and do not need to be copied. 437 if (Inst->isTerminator()) 438 return; 439 440 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 441 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 442 return; 443 } 444 445 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 446 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 447 return; 448 } 449 450 copyInstScalar(Inst, BBMap, GlobalMap); 451 } 452 453 454 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 455 BasicBlock *BB = Statement.getBasicBlock(); 456 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 457 Builder.GetInsertPoint(), P); 458 CopyBB->setName("polly.stmt." + BB->getName()); 459 Builder.SetInsertPoint(CopyBB->begin()); 460 461 ValueMapT BBMap; 462 463 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 464 ++II) 465 copyInstruction(II, BBMap, GlobalMap); 466 } 467 468 /// @brief Generate a new vector basic block for a polyhedral statement. 469 /// 470 /// The only public function exposed is generate(). 471 class VectorBlockGenerator : BlockGenerator { 472 public: 473 /// @brief Generate a new vector basic block for a ScoPStmt. 474 /// 475 /// This code generation is similar to the normal, scalar code generation, 476 /// except that each instruction is code generated for several vector lanes 477 /// at a time. If possible instructions are issued as actual vector 478 /// instructions, but e.g. for address calculation instructions we currently 479 /// generate scalar instructions for each vector lane. 480 /// 481 /// @param Builder The LLVM-IR Builder used to generate the statement. The 482 /// code is generated at the location, the builder points 483 /// to. 484 /// @param Stmt The statement to code generate. 485 /// @param GlobalMaps A vector of maps that define for certain Values 486 /// referenced from the original code new Values they should 487 /// be replaced with. Each map in the vector of maps is 488 /// used for one vector lane. The number of elements in the 489 /// vector defines the width of the generated vector 490 /// instructions. 491 /// @param P A reference to the pass this function is called from. 492 /// The pass is needed to update other analysis. 493 static void generate(IRBuilder<> &B, ScopStmt &Stmt, 494 VectorValueMapT &GlobalMaps, __isl_keep isl_set *Domain, 495 Pass *P) { 496 VectorBlockGenerator Generator(B, GlobalMaps, Stmt, Domain, P); 497 Generator.copyBB(); 498 } 499 500 private: 501 // This is a vector of global value maps. The first map is used for the first 502 // vector lane, ... 503 // Each map, contains information about Instructions in the old ScoP, which 504 // are recalculated in the new SCoP. When copying the basic block, we replace 505 // all referenes to the old instructions with their recalculated values. 506 VectorValueMapT &GlobalMaps; 507 508 isl_set *Domain; 509 510 VectorBlockGenerator(IRBuilder<> &B, VectorValueMapT &GlobalMaps, 511 ScopStmt &Stmt, __isl_keep isl_set *Domain, Pass *P); 512 513 int getVectorWidth(); 514 515 Value *getVectorValue(const Value *Old, ValueMapT &VectorMap, 516 VectorValueMapT &ScalarMaps); 517 518 Type *getVectorPtrTy(const Value *V, int Width); 519 520 /// @brief Load a vector from a set of adjacent scalars 521 /// 522 /// In case a set of scalars is known to be next to each other in memory, 523 /// create a vector load that loads those scalars 524 /// 525 /// %vector_ptr= bitcast double* %p to <4 x double>* 526 /// %vec_full = load <4 x double>* %vector_ptr 527 /// 528 Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap); 529 530 /// @brief Load a vector initialized from a single scalar in memory 531 /// 532 /// In case all elements of a vector are initialized to the same 533 /// scalar value, this value is loaded and shuffeled into all elements 534 /// of the vector. 535 /// 536 /// %splat_one = load <1 x double>* %p 537 /// %splat = shufflevector <1 x double> %splat_one, <1 x 538 /// double> %splat_one, <4 x i32> zeroinitializer 539 /// 540 Value *generateStrideZeroLoad(const LoadInst *Load, ValueMapT &BBMap); 541 542 /// @Load a vector from scalars distributed in memory 543 /// 544 /// In case some scalars a distributed randomly in memory. Create a vector 545 /// by loading each scalar and by inserting one after the other into the 546 /// vector. 547 /// 548 /// %scalar_1= load double* %p_1 549 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 550 /// %scalar 2 = load double* %p_2 551 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 552 /// 553 Value *generateUnknownStrideLoad(const LoadInst *Load, 554 VectorValueMapT &ScalarMaps); 555 556 void generateLoad(const LoadInst *Load, ValueMapT &VectorMap, 557 VectorValueMapT &ScalarMaps); 558 559 void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &VectorMap, 560 VectorValueMapT &ScalarMaps); 561 562 void copyBinaryInst(const BinaryOperator *Inst, ValueMapT &VectorMap, 563 VectorValueMapT &ScalarMaps); 564 565 void copyStore(const StoreInst *Store, ValueMapT &VectorMap, 566 VectorValueMapT &ScalarMaps); 567 568 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); 569 570 void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap, 571 VectorValueMapT &ScalarMaps); 572 573 void copyBB(); 574 }; 575 576 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 577 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain, 578 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), 579 Domain(Domain) { 580 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 581 assert(Domain && "No statement domain provided"); 582 } 583 584 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 585 ValueMapT &VectorMap, 586 VectorValueMapT &ScalarMaps) { 587 if (VectorMap.count(Old)) 588 return VectorMap[Old]; 589 590 int Width = getVectorWidth(); 591 592 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 593 594 for (int Lane = 0; Lane < Width; Lane++) 595 Vector = Builder.CreateInsertElement(Vector, 596 getNewValue(Old, 597 ScalarMaps[Lane], 598 GlobalMaps[Lane]), 599 Builder.getInt32(Lane)); 600 601 VectorMap[Old] = Vector; 602 603 return Vector; 604 } 605 606 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 607 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 608 assert(PointerTy && "PointerType expected"); 609 610 Type *ScalarType = PointerTy->getElementType(); 611 VectorType *VectorType = VectorType::get(ScalarType, Width); 612 613 return PointerType::getUnqual(VectorType); 614 } 615 616 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 617 ValueMapT &BBMap) { 618 const Value *Pointer = Load->getPointerOperand(); 619 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 620 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 621 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 622 "vector_ptr"); 623 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 624 Load->getName() + "_p_vec_full"); 625 if (!Aligned) 626 VecLoad->setAlignment(8); 627 628 return VecLoad; 629 } 630 631 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 632 ValueMapT &BBMap) { 633 const Value *Pointer = Load->getPointerOperand(); 634 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 635 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 636 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 637 Load->getName() + "_p_vec_p"); 638 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 639 Load->getName() + "_p_splat_one"); 640 641 if (!Aligned) 642 ScalarLoad->setAlignment(8); 643 644 Constant *SplatVector = 645 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 646 getVectorWidth())); 647 648 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 649 SplatVector, 650 Load->getName() 651 + "_p_splat"); 652 return VectorLoad; 653 } 654 655 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 656 VectorValueMapT &ScalarMaps) { 657 int VectorWidth = getVectorWidth(); 658 const Value *Pointer = Load->getPointerOperand(); 659 VectorType *VectorType = VectorType::get( 660 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 661 662 Value *Vector = UndefValue::get(VectorType); 663 664 for (int i = 0; i < VectorWidth; i++) { 665 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 666 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 667 Load->getName() + "_p_scalar_"); 668 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 669 Builder.getInt32(i), 670 Load->getName() + "_p_vec_"); 671 } 672 673 return Vector; 674 } 675 676 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 677 ValueMapT &VectorMap, 678 VectorValueMapT &ScalarMaps) { 679 Value *NewLoad; 680 681 MemoryAccess &Access = Statement.getAccessFor(Load); 682 683 if (Access.isStrideZero(isl_set_copy(Domain))) 684 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 685 else if (Access.isStrideOne(isl_set_copy(Domain))) 686 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 687 else 688 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 689 690 VectorMap[Load] = NewLoad; 691 } 692 693 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 694 ValueMapT &VectorMap, 695 VectorValueMapT &ScalarMaps) { 696 int VectorWidth = getVectorWidth(); 697 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 698 ScalarMaps); 699 700 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 701 702 const CastInst *Cast = dyn_cast<CastInst>(Inst); 703 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 704 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 705 } 706 707 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 708 ValueMapT &VectorMap, 709 VectorValueMapT &ScalarMaps) { 710 Value *OpZero = Inst->getOperand(0); 711 Value *OpOne = Inst->getOperand(1); 712 713 Value *NewOpZero, *NewOpOne; 714 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 715 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 716 717 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, 718 NewOpOne, 719 Inst->getName() + "p_vec"); 720 VectorMap[Inst] = NewInst; 721 } 722 723 void VectorBlockGenerator::copyStore(const StoreInst *Store, 724 ValueMapT &VectorMap, 725 VectorValueMapT &ScalarMaps) { 726 int VectorWidth = getVectorWidth(); 727 728 MemoryAccess &Access = Statement.getAccessFor(Store); 729 730 const Value *Pointer = Store->getPointerOperand(); 731 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 732 ScalarMaps); 733 734 if (Access.isStrideOne(isl_set_copy(Domain))) { 735 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 736 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 737 738 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 739 "vector_ptr"); 740 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 741 742 if (!Aligned) 743 Store->setAlignment(8); 744 } else { 745 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 746 Value *Scalar = Builder.CreateExtractElement(Vector, 747 Builder.getInt32(i)); 748 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 749 Builder.CreateStore(Scalar, NewPointer); 750 } 751 } 752 } 753 754 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 755 ValueMapT &VectorMap) { 756 for (Instruction::const_op_iterator OI = Inst->op_begin(), 757 OE = Inst->op_end(); OI != OE; ++OI) 758 if (VectorMap.count(*OI)) 759 return true; 760 return false; 761 } 762 763 int VectorBlockGenerator::getVectorWidth() { 764 return GlobalMaps.size(); 765 } 766 767 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 768 ValueMapT &VectorMap, 769 VectorValueMapT &ScalarMaps) { 770 // Terminator instructions control the control flow. They are explicitly 771 // expressed in the clast and do not need to be copied. 772 if (Inst->isTerminator()) 773 return; 774 775 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 776 generateLoad(Load, VectorMap, ScalarMaps); 777 return; 778 } 779 780 if (hasVectorOperands(Inst, VectorMap)) { 781 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 782 copyStore(Store, VectorMap, ScalarMaps); 783 return; 784 } 785 786 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 787 copyUnaryInst(Unary, VectorMap, ScalarMaps); 788 return; 789 } 790 791 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 792 copyBinaryInst(Binary, VectorMap, ScalarMaps); 793 return; 794 } 795 796 llvm_unreachable("Cannot issue vector code for this instruction"); 797 } 798 799 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 800 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 801 } 802 803 void VectorBlockGenerator::copyBB() { 804 BasicBlock *BB = Statement.getBasicBlock(); 805 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 806 Builder.GetInsertPoint(), P); 807 CopyBB->setName("polly.stmt." + BB->getName()); 808 Builder.SetInsertPoint(CopyBB->begin()); 809 810 // Create two maps that store the mapping from the original instructions of 811 // the old basic block to their copies in the new basic block. Those maps 812 // are basic block local. 813 // 814 // As vector code generation is supported there is one map for scalar values 815 // and one for vector values. 816 // 817 // In case we just do scalar code generation, the vectorMap is not used and 818 // the scalarMap has just one dimension, which contains the mapping. 819 // 820 // In case vector code generation is done, an instruction may either appear 821 // in the vector map once (as it is calculating >vectorwidth< values at a 822 // time. Or (if the values are calculated using scalar operations), it 823 // appears once in every dimension of the scalarMap. 824 VectorValueMapT ScalarBlockMap(getVectorWidth()); 825 ValueMapT VectorBlockMap; 826 827 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 828 II != IE; ++II) 829 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 830 } 831 832 /// Class to generate LLVM-IR that calculates the value of a clast_expr. 833 class ClastExpCodeGen { 834 IRBuilder<> &Builder; 835 const CharMapT &IVS; 836 837 Value *codegen(const clast_name *e, Type *Ty); 838 Value *codegen(const clast_term *e, Type *Ty); 839 Value *codegen(const clast_binary *e, Type *Ty); 840 Value *codegen(const clast_reduction *r, Type *Ty); 841 public: 842 843 // A generator for clast expressions. 844 // 845 // @param B The IRBuilder that defines where the code to calculate the 846 // clast expressions should be inserted. 847 // @param IVMAP A Map that translates strings describing the induction 848 // variables to the Values* that represent these variables 849 // on the LLVM side. 850 ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap); 851 852 // Generates code to calculate a given clast expression. 853 // 854 // @param e The expression to calculate. 855 // @return The Value that holds the result. 856 Value *codegen(const clast_expr *e, Type *Ty); 857 }; 858 859 Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) { 860 CharMapT::const_iterator I = IVS.find(e->name); 861 862 assert(I != IVS.end() && "Clast name not found"); 863 864 return Builder.CreateSExtOrBitCast(I->second, Ty); 865 } 866 867 Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) { 868 APInt a = APInt_from_MPZ(e->val); 869 870 Value *ConstOne = ConstantInt::get(Builder.getContext(), a); 871 ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); 872 873 if (!e->var) 874 return ConstOne; 875 876 Value *var = codegen(e->var, Ty); 877 return Builder.CreateMul(ConstOne, var); 878 } 879 880 Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { 881 Value *LHS = codegen(e->LHS, Ty); 882 883 APInt RHS_AP = APInt_from_MPZ(e->RHS); 884 885 Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); 886 RHS = Builder.CreateSExtOrBitCast(RHS, Ty); 887 888 switch (e->type) { 889 case clast_bin_mod: 890 return Builder.CreateSRem(LHS, RHS); 891 case clast_bin_fdiv: 892 { 893 // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d 894 Value *One = ConstantInt::get(Ty, 1); 895 Value *Zero = ConstantInt::get(Ty, 0); 896 Value *Sum1 = Builder.CreateSub(LHS, RHS); 897 Value *Sum2 = Builder.CreateAdd(Sum1, One); 898 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 899 Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); 900 return Builder.CreateSDiv(Dividend, RHS); 901 } 902 case clast_bin_cdiv: 903 { 904 // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d 905 Value *One = ConstantInt::get(Ty, 1); 906 Value *Zero = ConstantInt::get(Ty, 0); 907 Value *Sum1 = Builder.CreateAdd(LHS, RHS); 908 Value *Sum2 = Builder.CreateSub(Sum1, One); 909 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 910 Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); 911 return Builder.CreateSDiv(Dividend, RHS); 912 } 913 case clast_bin_div: 914 return Builder.CreateSDiv(LHS, RHS); 915 }; 916 917 llvm_unreachable("Unknown clast binary expression type"); 918 } 919 920 Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { 921 assert(( r->type == clast_red_min 922 || r->type == clast_red_max 923 || r->type == clast_red_sum) 924 && "Clast reduction type not supported"); 925 Value *old = codegen(r->elts[0], Ty); 926 927 for (int i=1; i < r->n; ++i) { 928 Value *exprValue = codegen(r->elts[i], Ty); 929 930 switch (r->type) { 931 case clast_red_min: 932 { 933 Value *cmp = Builder.CreateICmpSLT(old, exprValue); 934 old = Builder.CreateSelect(cmp, old, exprValue); 935 break; 936 } 937 case clast_red_max: 938 { 939 Value *cmp = Builder.CreateICmpSGT(old, exprValue); 940 old = Builder.CreateSelect(cmp, old, exprValue); 941 break; 942 } 943 case clast_red_sum: 944 old = Builder.CreateAdd(old, exprValue); 945 break; 946 } 947 } 948 949 return old; 950 } 951 952 ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap) 953 : Builder(B), IVS(IVMap) {} 954 955 Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { 956 switch(e->type) { 957 case clast_expr_name: 958 return codegen((const clast_name *)e, Ty); 959 case clast_expr_term: 960 return codegen((const clast_term *)e, Ty); 961 case clast_expr_bin: 962 return codegen((const clast_binary *)e, Ty); 963 case clast_expr_red: 964 return codegen((const clast_reduction *)e, Ty); 965 } 966 967 llvm_unreachable("Unknown clast expression!"); 968 } 969 970 class ClastStmtCodeGen { 971 public: 972 const std::vector<std::string> &getParallelLoops(); 973 974 private: 975 // The Scop we code generate. 976 Scop *S; 977 Pass *P; 978 979 // The Builder specifies the current location to code generate at. 980 IRBuilder<> &Builder; 981 982 // Map the Values from the old code to their counterparts in the new code. 983 ValueMapT ValueMap; 984 985 // clastVars maps from the textual representation of a clast variable to its 986 // current *Value. clast variables are scheduling variables, original 987 // induction variables or parameters. They are used either in loop bounds or 988 // to define the statement instance that is executed. 989 // 990 // for (s = 0; s < n + 3; ++i) 991 // for (t = s; t < m; ++j) 992 // Stmt(i = s + 3 * m, j = t); 993 // 994 // {s,t,i,j,n,m} is the set of clast variables in this clast. 995 CharMapT ClastVars; 996 997 // Codegenerator for clast expressions. 998 ClastExpCodeGen ExpGen; 999 1000 // Do we currently generate parallel code? 1001 bool parallelCodeGeneration; 1002 1003 std::vector<std::string> parallelLoops; 1004 1005 void codegen(const clast_assignment *a); 1006 1007 void codegen(const clast_assignment *a, ScopStmt *Statement, 1008 unsigned Dimension, int vectorDim, 1009 std::vector<ValueMapT> *VectorVMap = 0); 1010 1011 void codegenSubstitutions(const clast_stmt *Assignment, 1012 ScopStmt *Statement, int vectorDim = 0, 1013 std::vector<ValueMapT> *VectorVMap = 0); 1014 1015 void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL, 1016 const char *iterator = NULL, isl_set *scatteringDomain = 0); 1017 1018 void codegen(const clast_block *b); 1019 1020 /// @brief Create a classical sequential loop. 1021 void codegenForSequential(const clast_for *f); 1022 1023 /// @brief Create OpenMP structure values. 1024 /// 1025 /// Create a list of values that has to be stored into the OpenMP subfuncition 1026 /// structure. 1027 SetVector<Value*> getOMPValues(); 1028 1029 /// @brief Update the internal structures according to a Value Map. 1030 /// 1031 /// @param VMap A map from old to new values. 1032 /// @param Reverse If true, we assume the update should be reversed. 1033 void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1034 bool Reverse); 1035 1036 /// @brief Create an OpenMP parallel for loop. 1037 /// 1038 /// This loop reflects a loop as if it would have been created by an OpenMP 1039 /// statement. 1040 void codegenForOpenMP(const clast_for *f); 1041 1042 bool isInnermostLoop(const clast_for *f); 1043 1044 /// @brief Get the number of loop iterations for this loop. 1045 /// @param f The clast for loop to check. 1046 int getNumberOfIterations(const clast_for *f); 1047 1048 /// @brief Create vector instructions for this loop. 1049 void codegenForVector(const clast_for *f); 1050 1051 void codegen(const clast_for *f); 1052 1053 Value *codegen(const clast_equation *eq); 1054 1055 void codegen(const clast_guard *g); 1056 1057 void codegen(const clast_stmt *stmt); 1058 1059 void addParameters(const CloogNames *names); 1060 1061 IntegerType *getIntPtrTy(); 1062 1063 public: 1064 void codegen(const clast_root *r); 1065 1066 ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P); 1067 }; 1068 } 1069 1070 IntegerType *ClastStmtCodeGen::getIntPtrTy() { 1071 return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext()); 1072 } 1073 1074 const std::vector<std::string> &ClastStmtCodeGen::getParallelLoops() { 1075 return parallelLoops; 1076 } 1077 1078 void ClastStmtCodeGen::codegen(const clast_assignment *a) { 1079 Value *V= ExpGen.codegen(a->RHS, getIntPtrTy()); 1080 ClastVars[a->LHS] = V; 1081 } 1082 1083 void ClastStmtCodeGen::codegen(const clast_assignment *a, ScopStmt *Statement, 1084 unsigned Dimension, int vectorDim, 1085 std::vector<ValueMapT> *VectorVMap) { 1086 Value *RHS = ExpGen.codegen(a->RHS, getIntPtrTy()); 1087 1088 assert(!a->LHS && "Statement assignments do not have left hand side"); 1089 const PHINode *PN; 1090 PN = Statement->getInductionVariableForDimension(Dimension); 1091 const Value *V = PN; 1092 1093 if (VectorVMap) 1094 (*VectorVMap)[vectorDim][V] = RHS; 1095 1096 ValueMap[V] = RHS; 1097 } 1098 1099 void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, 1100 ScopStmt *Statement, int vectorDim, 1101 std::vector<ValueMapT> *VectorVMap) { 1102 int Dimension = 0; 1103 1104 while (Assignment) { 1105 assert(CLAST_STMT_IS_A(Assignment, stmt_ass) 1106 && "Substitions are expected to be assignments"); 1107 codegen((const clast_assignment *)Assignment, Statement, Dimension, 1108 vectorDim, VectorVMap); 1109 Assignment = Assignment->next; 1110 Dimension++; 1111 } 1112 } 1113 1114 void ClastStmtCodeGen::codegen(const clast_user_stmt *u, 1115 std::vector<Value*> *IVS , const char *iterator, 1116 isl_set *Domain) { 1117 ScopStmt *Statement = (ScopStmt *)u->statement->usr; 1118 1119 if (u->substitutions) 1120 codegenSubstitutions(u->substitutions, Statement); 1121 1122 int VectorDimensions = IVS ? IVS->size() : 1; 1123 1124 if (VectorDimensions == 1) { 1125 BlockGenerator::generate(Builder, *Statement, ValueMap, P); 1126 return; 1127 } 1128 1129 VectorValueMapT VectorMap(VectorDimensions); 1130 1131 if (IVS) { 1132 assert (u->substitutions && "Substitutions expected!"); 1133 int i = 0; 1134 for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end(); 1135 II != IE; ++II) { 1136 ClastVars[iterator] = *II; 1137 codegenSubstitutions(u->substitutions, Statement, i, &VectorMap); 1138 i++; 1139 } 1140 } 1141 1142 VectorBlockGenerator::generate(Builder, *Statement, VectorMap, Domain, P); 1143 } 1144 1145 void ClastStmtCodeGen::codegen(const clast_block *b) { 1146 if (b->body) 1147 codegen(b->body); 1148 } 1149 1150 void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { 1151 Value *LowerBound, *UpperBound, *IV, *Stride; 1152 BasicBlock *AfterBB; 1153 Type *IntPtrTy = getIntPtrTy(); 1154 1155 LowerBound = ExpGen.codegen(f->LB, IntPtrTy); 1156 UpperBound = ExpGen.codegen(f->UB, IntPtrTy); 1157 Stride = Builder.getInt(APInt_from_MPZ(f->stride)); 1158 1159 IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB); 1160 1161 // Add loop iv to symbols. 1162 ClastVars[f->iterator] = IV; 1163 1164 if (f->body) 1165 codegen(f->body); 1166 1167 // Loop is finished, so remove its iv from the live symbols. 1168 ClastVars.erase(f->iterator); 1169 Builder.SetInsertPoint(AfterBB->begin()); 1170 } 1171 1172 SetVector<Value*> ClastStmtCodeGen::getOMPValues() { 1173 SetVector<Value*> Values; 1174 1175 // The clast variables 1176 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1177 I != E; I++) 1178 Values.insert(I->second); 1179 1180 // The memory reference base addresses 1181 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { 1182 ScopStmt *Stmt = *SI; 1183 for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(), 1184 E = Stmt->memacc_end(); I != E; ++I) { 1185 Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr()); 1186 Values.insert((BaseAddr)); 1187 } 1188 } 1189 1190 return Values; 1191 } 1192 1193 void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 1194 bool Reverse) { 1195 std::set<Value*> Inserted; 1196 1197 if (Reverse) { 1198 OMPGenerator::ValueToValueMapTy ReverseMap; 1199 1200 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1201 I != E; ++I) 1202 ReverseMap.insert(std::make_pair(I->second, I->first)); 1203 1204 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1205 I != E; I++) { 1206 ClastVars[I->first] = ReverseMap[I->second]; 1207 Inserted.insert(I->second); 1208 } 1209 1210 /// FIXME: At the moment we do not reverse the update of the ValueMap. 1211 /// This is incomplet, but the failure should be obvious, such that 1212 /// we can fix this later. 1213 return; 1214 } 1215 1216 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 1217 I != E; I++) { 1218 ClastVars[I->first] = VMap[I->second]; 1219 Inserted.insert(I->second); 1220 } 1221 1222 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 1223 I != E; ++I) { 1224 if (Inserted.count(I->first)) 1225 continue; 1226 1227 ValueMap[I->first] = I->second; 1228 } 1229 } 1230 1231 static void clearDomtree(Function *F, DominatorTree &DT) { 1232 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 1233 std::vector<BasicBlock*> Nodes; 1234 for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I) 1235 Nodes.push_back(I->getBlock()); 1236 1237 for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end(); 1238 I != E; ++I) 1239 DT.eraseNode(*I); 1240 } 1241 1242 void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { 1243 Value *Stride, *LB, *UB, *IV; 1244 BasicBlock::iterator LoopBody; 1245 IntegerType *IntPtrTy = getIntPtrTy(); 1246 SetVector<Value*> Values; 1247 OMPGenerator::ValueToValueMapTy VMap; 1248 OMPGenerator OMPGen(Builder, P); 1249 1250 Stride = Builder.getInt(APInt_from_MPZ(For->stride)); 1251 Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy); 1252 LB = ExpGen.codegen(For->LB, IntPtrTy); 1253 UB = ExpGen.codegen(For->UB, IntPtrTy); 1254 1255 Values = getOMPValues(); 1256 1257 IV = OMPGen.createParallelLoop(LB, UB, Stride, Values, VMap, &LoopBody); 1258 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 1259 Builder.SetInsertPoint(LoopBody); 1260 1261 updateWithValueMap(VMap, /* reverse */ false); 1262 ClastVars[For->iterator] = IV; 1263 1264 if (For->body) 1265 codegen(For->body); 1266 1267 ClastVars.erase(For->iterator); 1268 updateWithValueMap(VMap, /* reverse */ true); 1269 1270 clearDomtree((*LoopBody).getParent()->getParent(), 1271 P->getAnalysis<DominatorTree>()); 1272 1273 Builder.SetInsertPoint(AfterLoop); 1274 } 1275 1276 bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { 1277 const clast_stmt *stmt = f->body; 1278 1279 while (stmt) { 1280 if (!CLAST_STMT_IS_A(stmt, stmt_user)) 1281 return false; 1282 1283 stmt = stmt->next; 1284 } 1285 1286 return true; 1287 } 1288 1289 int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) { 1290 isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); 1291 isl_set *tmp = isl_set_copy(loopDomain); 1292 1293 // Calculate a map similar to the identity map, but with the last input 1294 // and output dimension not related. 1295 // [i0, i1, i2, i3] -> [i0, i1, i2, o0] 1296 isl_space *Space = isl_set_get_space(loopDomain); 1297 Space = isl_space_drop_outputs(Space, 1298 isl_set_dim(loopDomain, isl_dim_set) - 2, 1); 1299 Space = isl_space_map_from_set(Space); 1300 isl_map *identity = isl_map_identity(Space); 1301 identity = isl_map_add_dims(identity, isl_dim_in, 1); 1302 identity = isl_map_add_dims(identity, isl_dim_out, 1); 1303 1304 isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); 1305 map = isl_map_intersect(map, identity); 1306 1307 isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); 1308 isl_map *lexmin = isl_map_lexmin(map); 1309 isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); 1310 1311 isl_set *elements = isl_map_range(sub); 1312 1313 if (!isl_set_is_singleton(elements)) { 1314 isl_set_free(elements); 1315 return -1; 1316 } 1317 1318 isl_point *p = isl_set_sample_point(elements); 1319 1320 isl_int v; 1321 isl_int_init(v); 1322 isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); 1323 int numberIterations = isl_int_get_si(v); 1324 isl_int_clear(v); 1325 isl_point_free(p); 1326 1327 return (numberIterations) / isl_int_get_si(f->stride) + 1; 1328 } 1329 1330 void ClastStmtCodeGen::codegenForVector(const clast_for *F) { 1331 DEBUG(dbgs() << "Vectorizing loop '" << F->iterator << "'\n";); 1332 int VectorWidth = getNumberOfIterations(F); 1333 1334 Value *LB = ExpGen.codegen(F->LB, getIntPtrTy()); 1335 1336 APInt Stride = APInt_from_MPZ(F->stride); 1337 IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType()); 1338 Stride = Stride.zext(LoopIVType->getBitWidth()); 1339 Value *StrideValue = ConstantInt::get(LoopIVType, Stride); 1340 1341 std::vector<Value*> IVS(VectorWidth); 1342 IVS[0] = LB; 1343 1344 for (int i = 1; i < VectorWidth; i++) 1345 IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); 1346 1347 isl_set *Domain = isl_set_from_cloog_domain(F->domain); 1348 1349 // Add loop iv to symbols. 1350 ClastVars[F->iterator] = LB; 1351 1352 const clast_stmt *Stmt = F->body; 1353 1354 while (Stmt) { 1355 codegen((const clast_user_stmt *)Stmt, &IVS, F->iterator, 1356 isl_set_copy(Domain)); 1357 Stmt = Stmt->next; 1358 } 1359 1360 // Loop is finished, so remove its iv from the live symbols. 1361 isl_set_free(Domain); 1362 ClastVars.erase(F->iterator); 1363 } 1364 1365 void ClastStmtCodeGen::codegen(const clast_for *f) { 1366 if ((Vector || OpenMP) && P->getAnalysis<Dependences>().isParallelFor(f)) { 1367 if (Vector && isInnermostLoop(f) && (-1 != getNumberOfIterations(f)) 1368 && (getNumberOfIterations(f) <= 16)) { 1369 codegenForVector(f); 1370 return; 1371 } 1372 1373 if (OpenMP && !parallelCodeGeneration) { 1374 parallelCodeGeneration = true; 1375 parallelLoops.push_back(f->iterator); 1376 codegenForOpenMP(f); 1377 parallelCodeGeneration = false; 1378 return; 1379 } 1380 } 1381 1382 codegenForSequential(f); 1383 } 1384 1385 Value *ClastStmtCodeGen::codegen(const clast_equation *eq) { 1386 Value *LHS = ExpGen.codegen(eq->LHS, getIntPtrTy()); 1387 Value *RHS = ExpGen.codegen(eq->RHS, getIntPtrTy()); 1388 CmpInst::Predicate P; 1389 1390 if (eq->sign == 0) 1391 P = ICmpInst::ICMP_EQ; 1392 else if (eq->sign > 0) 1393 P = ICmpInst::ICMP_SGE; 1394 else 1395 P = ICmpInst::ICMP_SLE; 1396 1397 return Builder.CreateICmp(P, LHS, RHS); 1398 } 1399 1400 void ClastStmtCodeGen::codegen(const clast_guard *g) { 1401 Function *F = Builder.GetInsertBlock()->getParent(); 1402 LLVMContext &Context = F->getContext(); 1403 1404 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1405 Builder.GetInsertPoint(), P); 1406 CondBB->setName("polly.cond"); 1407 BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); 1408 MergeBB->setName("polly.merge"); 1409 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 1410 1411 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 1412 DT.addNewBlock(ThenBB, CondBB); 1413 DT.changeImmediateDominator(MergeBB, CondBB); 1414 1415 CondBB->getTerminator()->eraseFromParent(); 1416 1417 Builder.SetInsertPoint(CondBB); 1418 1419 Value *Predicate = codegen(&(g->eq[0])); 1420 1421 for (int i = 1; i < g->n; ++i) { 1422 Value *TmpPredicate = codegen(&(g->eq[i])); 1423 Predicate = Builder.CreateAnd(Predicate, TmpPredicate); 1424 } 1425 1426 Builder.CreateCondBr(Predicate, ThenBB, MergeBB); 1427 Builder.SetInsertPoint(ThenBB); 1428 Builder.CreateBr(MergeBB); 1429 Builder.SetInsertPoint(ThenBB->begin()); 1430 1431 codegen(g->then); 1432 1433 Builder.SetInsertPoint(MergeBB->begin()); 1434 } 1435 1436 void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { 1437 if (CLAST_STMT_IS_A(stmt, stmt_root)) 1438 assert(false && "No second root statement expected"); 1439 else if (CLAST_STMT_IS_A(stmt, stmt_ass)) 1440 codegen((const clast_assignment *)stmt); 1441 else if (CLAST_STMT_IS_A(stmt, stmt_user)) 1442 codegen((const clast_user_stmt *)stmt); 1443 else if (CLAST_STMT_IS_A(stmt, stmt_block)) 1444 codegen((const clast_block *)stmt); 1445 else if (CLAST_STMT_IS_A(stmt, stmt_for)) 1446 codegen((const clast_for *)stmt); 1447 else if (CLAST_STMT_IS_A(stmt, stmt_guard)) 1448 codegen((const clast_guard *)stmt); 1449 1450 if (stmt->next) 1451 codegen(stmt->next); 1452 } 1453 1454 void ClastStmtCodeGen::addParameters(const CloogNames *names) { 1455 SCEVExpander Rewriter(P->getAnalysis<ScalarEvolution>(), "polly"); 1456 1457 int i = 0; 1458 for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); 1459 PI != PE; ++PI) { 1460 assert(i < names->nb_parameters && "Not enough parameter names"); 1461 1462 const SCEV *Param = *PI; 1463 Type *Ty = Param->getType(); 1464 1465 Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); 1466 Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); 1467 ClastVars[names->parameters[i]] = V; 1468 1469 ++i; 1470 } 1471 } 1472 1473 void ClastStmtCodeGen::codegen(const clast_root *r) { 1474 addParameters(r->names); 1475 1476 parallelCodeGeneration = false; 1477 1478 const clast_stmt *stmt = (const clast_stmt*) r; 1479 if (stmt->next) 1480 codegen(stmt->next); 1481 } 1482 1483 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P) : 1484 S(scop), P(P), Builder(B), ExpGen(Builder, ClastVars) {} 1485 1486 namespace { 1487 class CodeGeneration : public ScopPass { 1488 Region *region; 1489 Scop *S; 1490 DominatorTree *DT; 1491 RegionInfo *RI; 1492 1493 std::vector<std::string> parallelLoops; 1494 1495 public: 1496 static char ID; 1497 1498 CodeGeneration() : ScopPass(ID) {} 1499 1500 // Split the entry edge of the region and generate a new basic block on this 1501 // edge. This function also updates ScopInfo and RegionInfo. 1502 // 1503 // @param region The region where the entry edge will be splitted. 1504 BasicBlock *splitEdgeAdvanced(Region *region) { 1505 BasicBlock *newBlock; 1506 BasicBlock *splitBlock; 1507 1508 newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this); 1509 1510 if (DT->dominates(region->getEntry(), newBlock)) { 1511 BasicBlock *OldBlock = region->getEntry(); 1512 std::string OldName = OldBlock->getName(); 1513 1514 // Update ScopInfo. 1515 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) 1516 if ((*SI)->getBasicBlock() == OldBlock) { 1517 (*SI)->setBasicBlock(newBlock); 1518 break; 1519 } 1520 1521 // Update RegionInfo. 1522 splitBlock = OldBlock; 1523 OldBlock->setName("polly.split"); 1524 newBlock->setName(OldName); 1525 region->replaceEntry(newBlock); 1526 RI->setRegionFor(newBlock, region); 1527 } else { 1528 RI->setRegionFor(newBlock, region->getParent()); 1529 splitBlock = newBlock; 1530 } 1531 1532 return splitBlock; 1533 } 1534 1535 // Create a split block that branches either to the old code or to a new basic 1536 // block where the new code can be inserted. 1537 // 1538 // @param Builder A builder that will be set to point to a basic block, where 1539 // the new code can be generated. 1540 // @return The split basic block. 1541 BasicBlock *addSplitAndStartBlock(IRBuilder<> *Builder) { 1542 BasicBlock *StartBlock, *SplitBlock; 1543 1544 SplitBlock = splitEdgeAdvanced(region); 1545 SplitBlock->setName("polly.split_new_and_old"); 1546 Function *F = SplitBlock->getParent(); 1547 StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F); 1548 SplitBlock->getTerminator()->eraseFromParent(); 1549 Builder->SetInsertPoint(SplitBlock); 1550 Builder->CreateCondBr(Builder->getTrue(), StartBlock, region->getEntry()); 1551 DT->addNewBlock(StartBlock, SplitBlock); 1552 Builder->SetInsertPoint(StartBlock); 1553 return SplitBlock; 1554 } 1555 1556 // Merge the control flow of the newly generated code with the existing code. 1557 // 1558 // @param SplitBlock The basic block where the control flow was split between 1559 // old and new version of the Scop. 1560 // @param Builder An IRBuilder that points to the last instruction of the 1561 // newly generated code. 1562 void mergeControlFlow(BasicBlock *SplitBlock, IRBuilder<> *Builder) { 1563 BasicBlock *MergeBlock; 1564 Region *R = region; 1565 1566 if (R->getExit()->getSinglePredecessor()) 1567 // No splitEdge required. A block with a single predecessor cannot have 1568 // PHI nodes that would complicate life. 1569 MergeBlock = R->getExit(); 1570 else { 1571 MergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this); 1572 // SplitEdge will never split R->getExit(), as R->getExit() has more than 1573 // one predecessor. Hence, mergeBlock is always a newly generated block. 1574 R->replaceExit(MergeBlock); 1575 } 1576 1577 Builder->CreateBr(MergeBlock); 1578 MergeBlock->setName("polly.merge_new_and_old"); 1579 1580 if (DT->dominates(SplitBlock, MergeBlock)) 1581 DT->changeImmediateDominator(MergeBlock, SplitBlock); 1582 } 1583 1584 bool runOnScop(Scop &scop) { 1585 S = &scop; 1586 region = &S->getRegion(); 1587 DT = &getAnalysis<DominatorTree>(); 1588 RI = &getAnalysis<RegionInfo>(); 1589 1590 parallelLoops.clear(); 1591 1592 assert(region->isSimple() && "Only simple regions are supported"); 1593 1594 // In the CFG the optimized code of the SCoP is generated next to the 1595 // original code. Both the new and the original version of the code remain 1596 // in the CFG. A branch statement decides which version is executed. 1597 // For now, we always execute the new version (the old one is dead code 1598 // eliminated by the cleanup passes). In the future we may decide to execute 1599 // the new version only if certain run time checks succeed. This will be 1600 // useful to support constructs for which we cannot prove all assumptions at 1601 // compile time. 1602 // 1603 // Before transformation: 1604 // 1605 // bb0 1606 // | 1607 // orig_scop 1608 // | 1609 // bb1 1610 // 1611 // After transformation: 1612 // bb0 1613 // | 1614 // polly.splitBlock 1615 // / \. 1616 // | startBlock 1617 // | | 1618 // orig_scop new_scop 1619 // \ / 1620 // \ / 1621 // bb1 (joinBlock) 1622 IRBuilder<> builder(region->getEntry()); 1623 1624 // The builder will be set to startBlock. 1625 BasicBlock *splitBlock = addSplitAndStartBlock(&builder); 1626 BasicBlock *StartBlock = builder.GetInsertBlock(); 1627 1628 mergeControlFlow(splitBlock, &builder); 1629 builder.SetInsertPoint(StartBlock->begin()); 1630 1631 ClastStmtCodeGen CodeGen(S, builder, this); 1632 CloogInfo &C = getAnalysis<CloogInfo>(); 1633 CodeGen.codegen(C.getClast()); 1634 1635 parallelLoops.insert(parallelLoops.begin(), 1636 CodeGen.getParallelLoops().begin(), 1637 CodeGen.getParallelLoops().end()); 1638 1639 return true; 1640 } 1641 1642 virtual void printScop(raw_ostream &OS) const { 1643 for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(), 1644 PE = parallelLoops.end(); PI != PE; ++PI) 1645 OS << "Parallel loop with iterator '" << *PI << "' generated\n"; 1646 } 1647 1648 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 1649 AU.addRequired<CloogInfo>(); 1650 AU.addRequired<Dependences>(); 1651 AU.addRequired<DominatorTree>(); 1652 AU.addRequired<RegionInfo>(); 1653 AU.addRequired<ScalarEvolution>(); 1654 AU.addRequired<ScopDetection>(); 1655 AU.addRequired<ScopInfo>(); 1656 AU.addRequired<TargetData>(); 1657 1658 AU.addPreserved<CloogInfo>(); 1659 AU.addPreserved<Dependences>(); 1660 1661 // FIXME: We do not create LoopInfo for the newly generated loops. 1662 AU.addPreserved<LoopInfo>(); 1663 AU.addPreserved<DominatorTree>(); 1664 AU.addPreserved<ScopDetection>(); 1665 AU.addPreserved<ScalarEvolution>(); 1666 1667 // FIXME: We do not yet add regions for the newly generated code to the 1668 // region tree. 1669 AU.addPreserved<RegionInfo>(); 1670 AU.addPreserved<TempScopInfo>(); 1671 AU.addPreserved<ScopInfo>(); 1672 AU.addPreservedID(IndependentBlocksID); 1673 } 1674 }; 1675 } 1676 1677 char CodeGeneration::ID = 1; 1678 1679 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 1680 "Polly - Create LLVM-IR from SCoPs", false, false) 1681 INITIALIZE_PASS_DEPENDENCY(CloogInfo) 1682 INITIALIZE_PASS_DEPENDENCY(Dependences) 1683 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 1684 INITIALIZE_PASS_DEPENDENCY(RegionInfo) 1685 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1686 INITIALIZE_PASS_DEPENDENCY(ScopDetection) 1687 INITIALIZE_PASS_DEPENDENCY(TargetData) 1688 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 1689 "Polly - Create LLVM-IR from SCoPs", false, false) 1690 1691 Pass *polly::createCodeGenerationPass() { 1692 return new CodeGeneration(); 1693 } 1694