1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/BlockGenerators.h" 18 #include "polly/Support/GICHelper.h" 19 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/ScalarEvolutionExpander.h" 23 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24 #include "llvm/Support/CommandLine.h" 25 26 #include "isl/aff.h" 27 #include "isl/set.h" 28 29 using namespace llvm; 30 using namespace polly; 31 32 static cl::opt<bool> 33 Aligned("enable-polly-aligned", 34 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 35 cl::value_desc("OpenMP code generation enabled if true"), 36 cl::init(false), cl::ZeroOrMore); 37 38 static cl::opt<bool> 39 GroupedUnrolling("enable-polly-grouped-unroll", 40 cl::desc("Perform grouped unrolling, but don't generate SIMD " 41 "instuctions"), cl::Hidden, cl::init(false), 42 cl::ZeroOrMore); 43 44 static cl::opt<bool> 45 SCEVCodegen("polly-codegen-scev", 46 cl::desc("Use SCEV based code generation."), cl::Hidden, 47 cl::init(false), cl::ZeroOrMore); 48 49 /// The SCEVRewriter takes a scalar evolution expression and updates the 50 /// following components: 51 /// 52 /// - SCEVUnknown 53 /// 54 /// Values referenced in SCEVUnknown subexpressions are looked up in 55 /// two Value to Value maps (GlobalMap and BBMap). If they are found they are 56 /// replaced by a reference to the value they map to. 57 /// 58 /// - SCEVAddRecExpr 59 /// 60 /// Based on a Loop -> Value map {Loop_1: %Value}, an expression 61 /// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step. 62 /// AddRecExpr's with more than two operands can not be translated. 63 /// 64 /// FIXME: The comment above is not yet reality. At the moment we derive 65 /// %Value by looking up the canonical IV of the loop and by defining 66 /// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for 67 /// canonical induction variables. 68 /// 69 /// 70 /// How can this be used? 71 /// ==================== 72 /// 73 /// SCEVRewrite based code generation works on virtually independent blocks. 74 /// This means we do not run the independent blocks pass to rewrite scalar 75 /// instructions, but just ignore instructions that we can analyze with scalar 76 /// evolution. Virtually independent blocks are blocks that only reference the 77 /// following values: 78 /// 79 /// o Values calculated within a basic block 80 /// o Values representable by SCEV 81 /// 82 /// During code generation we can ignore all instructions: 83 /// 84 /// - Ignore all instructions except: 85 /// - Load instructions 86 /// - Instructions that reference operands already calculated within the 87 /// basic block. 88 /// - Store instructions 89 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> { 90 public: 91 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE, 92 ValueMapT &GlobalMap, ValueMapT &BBMap) { 93 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap); 94 return Rewriter.visit(scev); 95 } 96 97 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap, 98 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap), 99 BBMap(BBMap) {} 100 101 const SCEV *visit(const SCEV *Expr) { 102 // FIXME: The parameter handling is incorrect. 103 // 104 // Polly does only detect parameters in Access function and loop iteration 105 // counters, but it does not get parameters that are just used by 106 // instructions within the basic block. 107 // 108 // There are two options to solve this: 109 // o Iterate over all instructions of the SCoP and find the actual 110 // parameters. 111 // o Just check within the SCEVRewriter if Values lay outside of the SCoP 112 // and detect parameters on the fly. 113 // 114 // This is especially important for OpenMP and GPGPU code generation, as 115 // they require us to detect and possibly rewrite the corresponding 116 // parameters. 117 if (isl_id *Id = S.getIdForParam(Expr)) { 118 isl_id_free(Id); 119 return Expr; 120 } 121 122 123 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr); 124 } 125 126 const SCEV *visitConstant(const SCEVConstant *Constant) { 127 return Constant; 128 } 129 130 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { 131 const SCEV *Operand = visit(Expr->getOperand()); 132 return SE.getTruncateExpr(Operand, Expr->getType()); 133 } 134 135 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 136 const SCEV *Operand = visit(Expr->getOperand()); 137 return SE.getZeroExtendExpr(Operand, Expr->getType()); 138 } 139 140 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 141 const SCEV *Operand = visit(Expr->getOperand()); 142 return SE.getSignExtendExpr(Operand, Expr->getType()); 143 } 144 145 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 146 SmallVector<const SCEV *, 2> Operands; 147 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 148 const SCEV *Operand = visit(Expr->getOperand(i)); 149 Operands.push_back(Operand); 150 } 151 152 return SE.getAddExpr(Operands); 153 } 154 155 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 156 SmallVector<const SCEV *, 2> Operands; 157 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 158 const SCEV *Operand = visit(Expr->getOperand(i)); 159 Operands.push_back(Operand); 160 } 161 162 return SE.getMulExpr(Operands); 163 } 164 165 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { 166 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); 167 } 168 169 // Return a new induction variable if the loop is within the original SCoP 170 // or NULL otherwise. 171 Value *getNewIV(const Loop *L) { 172 Value *IV = L->getCanonicalInductionVariable(); 173 if (!IV) 174 return NULL; 175 176 ValueMapT::iterator NewIV = GlobalMap.find(IV); 177 178 if (NewIV == GlobalMap.end()) 179 return NULL; 180 181 return NewIV->second; 182 } 183 184 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 185 Value *IV; 186 187 IV = getNewIV(Expr->getLoop()); 188 189 // The IV is not within the GlobalMaps. So do not rewrite it and also do 190 // not rewrite any descendants. 191 if (!IV) 192 return Expr; 193 194 assert(Expr->getNumOperands() == 2 195 && "An AddRecExpr with more than two operands can not be rewritten."); 196 197 const SCEV *Base, *Step, *IVExpr, *Product; 198 199 Base = visit(Expr->getStart()); 200 Step = visit(Expr->getOperand(1)); 201 IVExpr = SE.getUnknown(IV); 202 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType()); 203 Product = SE.getMulExpr(Step, IVExpr); 204 205 return SE.getAddExpr(Base, Product); 206 } 207 208 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 209 SmallVector<const SCEV *, 2> Operands; 210 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 211 const SCEV *Operand = visit(Expr->getOperand(i)); 212 Operands.push_back(Operand); 213 } 214 215 return SE.getSMaxExpr(Operands); 216 } 217 218 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { 219 SmallVector<const SCEV *, 2> Operands; 220 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 221 const SCEV *Operand = visit(Expr->getOperand(i)); 222 Operands.push_back(Operand); 223 } 224 225 return SE.getUMaxExpr(Operands); 226 } 227 228 const SCEV *visitUnknown(const SCEVUnknown *Expr) { 229 Value *V = Expr->getValue(); 230 231 if (GlobalMap.count(V)) 232 return SE.getUnknown(GlobalMap[V]); 233 234 if (BBMap.count(V)) 235 return SE.getUnknown(BBMap[V]); 236 237 return Expr; 238 } 239 240 private: 241 Scop &S; 242 ScalarEvolution &SE; 243 ValueMapT &GlobalMap; 244 ValueMapT &BBMap; 245 }; 246 247 248 // Helper class to generate memory location. 249 namespace { 250 class IslGenerator { 251 public: 252 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 253 Builder(Builder), IVS(IVS) {} 254 Value *generateIslInt(__isl_take isl_int Int); 255 Value *generateIslAff(__isl_take isl_aff *Aff); 256 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 257 258 private: 259 typedef struct { 260 Value *Result; 261 class IslGenerator *Generator; 262 } IslGenInfo; 263 264 IRBuilder<> &Builder; 265 std::vector<Value *> &IVS; 266 static int mergeIslAffValues(__isl_take isl_set *Set, 267 __isl_take isl_aff *Aff, void *User); 268 }; 269 } 270 271 272 Value *IslGenerator::generateIslInt(isl_int Int) { 273 mpz_t IntMPZ; 274 mpz_init(IntMPZ); 275 isl_int_get_gmp(Int, IntMPZ); 276 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 277 mpz_clear(IntMPZ); 278 return IntValue; 279 } 280 281 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 282 Value *Result; 283 Value *ConstValue; 284 isl_int ConstIsl; 285 286 isl_int_init(ConstIsl); 287 isl_aff_get_constant(Aff, &ConstIsl); 288 ConstValue = generateIslInt(ConstIsl); 289 Type *Ty = Builder.getInt64Ty(); 290 291 // FIXME: We should give the constant and coefficients the right type. Here 292 // we force it into i64. 293 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 294 295 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 296 297 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 298 "must match the dimension of the affine space."); 299 300 isl_int CoefficientIsl; 301 isl_int_init(CoefficientIsl); 302 303 for (unsigned int i = 0; i < NbInputDims; ++i) { 304 Value *CoefficientValue; 305 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 306 307 if (isl_int_is_zero(CoefficientIsl)) 308 continue; 309 310 CoefficientValue = generateIslInt(CoefficientIsl); 311 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 312 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 313 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 314 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 315 } 316 317 isl_int_clear(CoefficientIsl); 318 isl_int_clear(ConstIsl); 319 isl_aff_free(Aff); 320 321 return Result; 322 } 323 324 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 325 __isl_take isl_aff *Aff, void *User) { 326 IslGenInfo *GenInfo = (IslGenInfo *)User; 327 328 assert((GenInfo->Result == NULL) && "Result is already set." 329 "Currently only single isl_aff is supported"); 330 assert(isl_set_plain_is_universe(Set) 331 && "Code generation failed because the set is not universe"); 332 333 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 334 335 isl_set_free(Set); 336 return 0; 337 } 338 339 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 340 IslGenInfo User; 341 User.Result = NULL; 342 User.Generator = this; 343 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 344 assert(User.Result && "Code generation for isl_pw_aff failed"); 345 346 isl_pw_aff_free(PwAff); 347 return User.Result; 348 } 349 350 351 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 352 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {} 353 354 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) { 355 if (SCEVCodegen && SE.isSCEVable(Inst->getType())) 356 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst))) 357 if (!isa<SCEVCouldNotCompute>(Scev)) { 358 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) { 359 if (Unknown->getValue() != Inst) 360 return true; 361 } else { 362 return true; 363 } 364 } 365 366 return false; 367 } 368 369 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 370 ValueMapT &GlobalMap) { 371 // We assume constants never change. 372 // This avoids map lookups for many calls to this function. 373 if (isa<Constant>(Old)) 374 return const_cast<Value*>(Old); 375 376 if (GlobalMap.count(Old)) { 377 Value *New = GlobalMap[Old]; 378 379 if (Old->getType()->getScalarSizeInBits() 380 < New->getType()->getScalarSizeInBits()) 381 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 382 383 return New; 384 } 385 386 if (BBMap.count(Old)) { 387 return BBMap[Old]; 388 } 389 390 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 391 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old))) 392 if (!isa<SCEVCouldNotCompute>(Scev)) { 393 const SCEV *NewScev = SCEVRewriter::rewrite(Scev, 394 *Statement.getParent(), SE, 395 GlobalMap, BBMap); 396 SCEVExpander Expander(SE, "polly"); 397 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 398 Builder.GetInsertPoint()); 399 400 BBMap[Old] = Expanded; 401 return Expanded; 402 } 403 404 // 'Old' is within the original SCoP, but was not rewritten. 405 // 406 // Such values appear, if they only calculate information already available in 407 // the polyhedral description (e.g. an induction variable increment). They 408 // can be safely ignored. 409 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 410 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 411 return NULL; 412 413 // Everything else is probably a scop-constant value defined as global, 414 // function parameter or an instruction not within the scop. 415 return const_cast<Value*>(Old); 416 } 417 418 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 419 ValueMapT &GlobalMap) { 420 Instruction *NewInst = Inst->clone(); 421 422 // Replace old operands with the new ones. 423 for (Instruction::const_op_iterator OI = Inst->op_begin(), 424 OE = Inst->op_end(); OI != OE; ++OI) { 425 Value *OldOperand = *OI; 426 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 427 428 if (!NewOperand) { 429 assert(!isa<StoreInst>(NewInst) 430 && "Store instructions are always needed!"); 431 delete NewInst; 432 return; 433 } 434 435 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 436 } 437 438 Builder.Insert(NewInst); 439 BBMap[Inst] = NewInst; 440 441 if (!NewInst->getType()->isVoidTy()) 442 NewInst->setName("p_" + Inst->getName()); 443 } 444 445 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 446 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 447 ValueMapT &BBMap, ValueMapT &GlobalMap) { 448 449 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) 450 && "Only single dimensional access functions supported"); 451 452 std::vector<Value *> IVS; 453 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 454 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 455 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 456 IVS.push_back(NewIV); 457 } 458 459 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 460 IslGenerator IslGen(Builder, IVS); 461 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 462 463 Type *Ty = Builder.getInt64Ty(); 464 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 465 466 std::vector<Value*> IndexArray; 467 Value *NullValue = Constant::getNullValue(Ty); 468 IndexArray.push_back(NullValue); 469 IndexArray.push_back(OffsetValue); 470 return IndexArray; 471 } 472 473 Value *BlockGenerator::getNewAccessOperand( 474 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, 475 ValueMapT &BBMap, ValueMapT &GlobalMap) { 476 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 477 BaseAddress, 478 BBMap, GlobalMap); 479 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 480 "p_newarrayidx_"); 481 return NewOperand; 482 } 483 484 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 485 const Value *Pointer, 486 ValueMapT &BBMap, 487 ValueMapT &GlobalMap) { 488 MemoryAccess &Access = Statement.getAccessFor(Inst); 489 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 490 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 491 492 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) 493 && "Current and new access function use different spaces"); 494 495 Value *NewPointer; 496 497 if (!NewAccessRelation) { 498 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 499 } else { 500 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 501 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, 502 BBMap, GlobalMap); 503 } 504 505 isl_map_free(CurrentAccessRelation); 506 isl_map_free(NewAccessRelation); 507 return NewPointer; 508 } 509 510 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 511 ValueMapT &BBMap, 512 ValueMapT &GlobalMap) { 513 const Value *Pointer = Load->getPointerOperand(); 514 const Instruction *Inst = dyn_cast<Instruction>(Load); 515 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 516 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 517 Load->getName() + "_p_scalar_"); 518 return ScalarLoad; 519 } 520 521 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 522 ValueMapT &BBMap, 523 ValueMapT &GlobalMap) { 524 const Value *Pointer = Store->getPointerOperand(); 525 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 526 GlobalMap); 527 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 528 529 return Builder.CreateStore(ValueOperand, NewPointer); 530 } 531 532 void BlockGenerator::copyInstruction(const Instruction *Inst, 533 ValueMapT &BBMap, ValueMapT &GlobalMap) { 534 // Terminator instructions control the control flow. They are explicitly 535 // expressed in the clast and do not need to be copied. 536 if (Inst->isTerminator()) 537 return; 538 539 if (isSCEVIgnore(Inst)) 540 return; 541 542 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 543 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 544 return; 545 } 546 547 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 548 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 549 return; 550 } 551 552 copyInstScalar(Inst, BBMap, GlobalMap); 553 } 554 555 556 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 557 BasicBlock *BB = Statement.getBasicBlock(); 558 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 559 Builder.GetInsertPoint(), P); 560 CopyBB->setName("polly.stmt." + BB->getName()); 561 Builder.SetInsertPoint(CopyBB->begin()); 562 563 ValueMapT BBMap; 564 565 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 566 ++II) 567 copyInstruction(II, BBMap, GlobalMap); 568 } 569 570 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 571 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain, 572 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), 573 Domain(Domain) { 574 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 575 assert(Domain && "No statement domain provided"); 576 } 577 578 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 579 ValueMapT &VectorMap, 580 VectorValueMapT &ScalarMaps) { 581 if (VectorMap.count(Old)) 582 return VectorMap[Old]; 583 584 int Width = getVectorWidth(); 585 586 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 587 588 for (int Lane = 0; Lane < Width; Lane++) 589 Vector = Builder.CreateInsertElement(Vector, 590 getNewValue(Old, 591 ScalarMaps[Lane], 592 GlobalMaps[Lane]), 593 Builder.getInt32(Lane)); 594 595 VectorMap[Old] = Vector; 596 597 return Vector; 598 } 599 600 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 601 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 602 assert(PointerTy && "PointerType expected"); 603 604 Type *ScalarType = PointerTy->getElementType(); 605 VectorType *VectorType = VectorType::get(ScalarType, Width); 606 607 return PointerType::getUnqual(VectorType); 608 } 609 610 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 611 ValueMapT &BBMap) { 612 const Value *Pointer = Load->getPointerOperand(); 613 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 614 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 615 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 616 "vector_ptr"); 617 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 618 Load->getName() + "_p_vec_full"); 619 if (!Aligned) 620 VecLoad->setAlignment(8); 621 622 return VecLoad; 623 } 624 625 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 626 ValueMapT &BBMap) { 627 const Value *Pointer = Load->getPointerOperand(); 628 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 629 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 630 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 631 Load->getName() + "_p_vec_p"); 632 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 633 Load->getName() + "_p_splat_one"); 634 635 if (!Aligned) 636 ScalarLoad->setAlignment(8); 637 638 Constant *SplatVector = 639 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 640 getVectorWidth())); 641 642 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 643 SplatVector, 644 Load->getName() 645 + "_p_splat"); 646 return VectorLoad; 647 } 648 649 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 650 VectorValueMapT &ScalarMaps) { 651 int VectorWidth = getVectorWidth(); 652 const Value *Pointer = Load->getPointerOperand(); 653 VectorType *VectorType = VectorType::get( 654 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 655 656 Value *Vector = UndefValue::get(VectorType); 657 658 for (int i = 0; i < VectorWidth; i++) { 659 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 660 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 661 Load->getName() + "_p_scalar_"); 662 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 663 Builder.getInt32(i), 664 Load->getName() + "_p_vec_"); 665 } 666 667 return Vector; 668 } 669 670 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 671 ValueMapT &VectorMap, 672 VectorValueMapT &ScalarMaps) { 673 if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) { 674 for (int i = 0; i < getVectorWidth(); i++) 675 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], 676 GlobalMaps[i]); 677 return; 678 } 679 680 MemoryAccess &Access = Statement.getAccessFor(Load); 681 682 Value *NewLoad; 683 if (Access.isStrideZero(isl_set_copy(Domain))) 684 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 685 else if (Access.isStrideOne(isl_set_copy(Domain))) 686 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 687 else 688 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 689 690 VectorMap[Load] = NewLoad; 691 } 692 693 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 694 ValueMapT &VectorMap, 695 VectorValueMapT &ScalarMaps) { 696 int VectorWidth = getVectorWidth(); 697 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 698 ScalarMaps); 699 700 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 701 702 const CastInst *Cast = dyn_cast<CastInst>(Inst); 703 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 704 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 705 } 706 707 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 708 ValueMapT &VectorMap, 709 VectorValueMapT &ScalarMaps) { 710 Value *OpZero = Inst->getOperand(0); 711 Value *OpOne = Inst->getOperand(1); 712 713 Value *NewOpZero, *NewOpOne; 714 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 715 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 716 717 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, 718 NewOpOne, 719 Inst->getName() + "p_vec"); 720 VectorMap[Inst] = NewInst; 721 } 722 723 void VectorBlockGenerator::copyStore(const StoreInst *Store, 724 ValueMapT &VectorMap, 725 VectorValueMapT &ScalarMaps) { 726 int VectorWidth = getVectorWidth(); 727 728 MemoryAccess &Access = Statement.getAccessFor(Store); 729 730 const Value *Pointer = Store->getPointerOperand(); 731 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 732 ScalarMaps); 733 734 if (Access.isStrideOne(isl_set_copy(Domain))) { 735 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 736 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 737 738 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 739 "vector_ptr"); 740 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 741 742 if (!Aligned) 743 Store->setAlignment(8); 744 } else { 745 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 746 Value *Scalar = Builder.CreateExtractElement(Vector, 747 Builder.getInt32(i)); 748 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 749 Builder.CreateStore(Scalar, NewPointer); 750 } 751 } 752 } 753 754 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 755 ValueMapT &VectorMap) { 756 for (Instruction::const_op_iterator OI = Inst->op_begin(), 757 OE = Inst->op_end(); OI != OE; ++OI) 758 if (VectorMap.count(*OI)) 759 return true; 760 return false; 761 } 762 763 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 764 ValueMapT &VectorMap, 765 VectorValueMapT &ScalarMaps) { 766 bool HasVectorOperand = false; 767 int VectorWidth = getVectorWidth(); 768 769 for (Instruction::const_op_iterator OI = Inst->op_begin(), 770 OE = Inst->op_end(); OI != OE; ++OI) { 771 ValueMapT::iterator VecOp = VectorMap.find(*OI); 772 773 if (VecOp == VectorMap.end()) 774 continue; 775 776 HasVectorOperand = true; 777 Value *NewVector = VecOp->second; 778 779 for (int i = 0; i < VectorWidth; ++i) { 780 ValueMapT &SM = ScalarMaps[i]; 781 782 // If there is one scalar extracted, all scalar elements should have 783 // already been extracted by the code here. So no need to check for the 784 // existance of all of them. 785 if (SM.count(*OI)) 786 break; 787 788 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 789 } 790 } 791 792 return HasVectorOperand; 793 } 794 795 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 796 ValueMapT &VectorMap, 797 VectorValueMapT &ScalarMaps) { 798 bool HasVectorOperand; 799 int VectorWidth = getVectorWidth(); 800 801 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 802 803 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 804 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 805 806 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 807 return; 808 809 // Make the result available as vector value. 810 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 811 Value *Vector = UndefValue::get(VectorType); 812 813 for (int i = 0; i < VectorWidth; i++) 814 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 815 Builder.getInt32(i)); 816 817 VectorMap[Inst] = Vector; 818 } 819 820 int VectorBlockGenerator::getVectorWidth() { 821 return GlobalMaps.size(); 822 } 823 824 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 825 ValueMapT &VectorMap, 826 VectorValueMapT &ScalarMaps) { 827 // Terminator instructions control the control flow. They are explicitly 828 // expressed in the clast and do not need to be copied. 829 if (Inst->isTerminator()) 830 return; 831 832 if (isSCEVIgnore(Inst)) 833 return; 834 835 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 836 generateLoad(Load, VectorMap, ScalarMaps); 837 return; 838 } 839 840 if (hasVectorOperands(Inst, VectorMap)) { 841 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 842 copyStore(Store, VectorMap, ScalarMaps); 843 return; 844 } 845 846 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 847 copyUnaryInst(Unary, VectorMap, ScalarMaps); 848 return; 849 } 850 851 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 852 copyBinaryInst(Binary, VectorMap, ScalarMaps); 853 return; 854 } 855 856 // Falltrough: We generate scalar instructions, if we don't know how to 857 // generate vector code. 858 } 859 860 copyInstScalarized(Inst, VectorMap, ScalarMaps); 861 } 862 863 void VectorBlockGenerator::copyBB() { 864 BasicBlock *BB = Statement.getBasicBlock(); 865 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 866 Builder.GetInsertPoint(), P); 867 CopyBB->setName("polly.stmt." + BB->getName()); 868 Builder.SetInsertPoint(CopyBB->begin()); 869 870 // Create two maps that store the mapping from the original instructions of 871 // the old basic block to their copies in the new basic block. Those maps 872 // are basic block local. 873 // 874 // As vector code generation is supported there is one map for scalar values 875 // and one for vector values. 876 // 877 // In case we just do scalar code generation, the vectorMap is not used and 878 // the scalarMap has just one dimension, which contains the mapping. 879 // 880 // In case vector code generation is done, an instruction may either appear 881 // in the vector map once (as it is calculating >vectorwidth< values at a 882 // time. Or (if the values are calculated using scalar operations), it 883 // appears once in every dimension of the scalarMap. 884 VectorValueMapT ScalarBlockMap(getVectorWidth()); 885 ValueMapT VectorBlockMap; 886 887 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 888 II != IE; ++II) 889 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 890 } 891