1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/BlockGenerators.h" 19 #include "polly/Support/GICHelper.h" 20 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 25 #include "llvm/Support/CommandLine.h" 26 27 #include "isl/aff.h" 28 #include "isl/set.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", 35 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 36 cl::value_desc("OpenMP code generation enabled if true"), 37 cl::init(false), cl::ZeroOrMore); 38 39 static cl::opt<bool> 40 SCEVCodegen("polly-codegen-scev", 41 cl::desc("Use SCEV based code generation."), cl::Hidden, 42 cl::init(false), cl::ZeroOrMore); 43 44 /// The SCEVRewriter takes a scalar evolution expression and updates the 45 /// following components: 46 /// 47 /// - SCEVUnknown 48 /// 49 /// Values referenced in SCEVUnknown subexpressions are looked up in 50 /// two Value to Value maps (GlobalMap and BBMap). If they are found they are 51 /// replaced by a reference to the value they map to. 52 /// 53 /// - SCEVAddRecExpr 54 /// 55 /// Based on a Loop -> Value map {Loop_1: %Value}, an expression 56 /// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step. 57 /// AddRecExpr's with more than two operands can not be translated. 58 /// 59 /// FIXME: The comment above is not yet reality. At the moment we derive 60 /// %Value by looking up the canonical IV of the loop and by defining 61 /// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for 62 /// canonical induction variables. 63 /// 64 /// 65 /// How can this be used? 66 /// ==================== 67 /// 68 /// SCEVRewrite based code generation works on virtually independent blocks. 69 /// This means we do not run the independent blocks pass to rewrite scalar 70 /// instructions, but just ignore instructions that we can analyze with scalar 71 /// evolution. Virtually independent blocks are blocks that only reference the 72 /// following values: 73 /// 74 /// o Values calculated within a basic block 75 /// o Values representable by SCEV 76 /// 77 /// During code generation we can ignore all instructions: 78 /// 79 /// - Ignore all instructions except: 80 /// - Load instructions 81 /// - Instructions that reference operands already calculated within the 82 /// basic block. 83 /// - Store instructions 84 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> { 85 public: 86 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE, 87 ValueMapT &GlobalMap, ValueMapT &BBMap) { 88 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap); 89 return Rewriter.visit(scev); 90 } 91 92 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap, 93 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap), 94 BBMap(BBMap) {} 95 96 const SCEV *visit(const SCEV *Expr) { 97 // FIXME: The parameter handling is incorrect. 98 // 99 // Polly does only detect parameters in Access function and loop iteration 100 // counters, but it does not get parameters that are just used by 101 // instructions within the basic block. 102 // 103 // There are two options to solve this: 104 // o Iterate over all instructions of the SCoP and find the actual 105 // parameters. 106 // o Just check within the SCEVRewriter if Values lay outside of the SCoP 107 // and detect parameters on the fly. 108 // 109 // This is especially important for OpenMP and GPGPU code generation, as 110 // they require us to detect and possibly rewrite the corresponding 111 // parameters. 112 if (isl_id *Id = S.getIdForParam(Expr)) { 113 isl_id_free(Id); 114 return Expr; 115 } 116 117 118 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr); 119 } 120 121 const SCEV *visitConstant(const SCEVConstant *Constant) { 122 return Constant; 123 } 124 125 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { 126 const SCEV *Operand = visit(Expr->getOperand()); 127 return SE.getTruncateExpr(Operand, Expr->getType()); 128 } 129 130 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 131 const SCEV *Operand = visit(Expr->getOperand()); 132 return SE.getZeroExtendExpr(Operand, Expr->getType()); 133 } 134 135 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 136 const SCEV *Operand = visit(Expr->getOperand()); 137 return SE.getSignExtendExpr(Operand, Expr->getType()); 138 } 139 140 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 141 SmallVector<const SCEV *, 2> Operands; 142 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 143 const SCEV *Operand = visit(Expr->getOperand(i)); 144 Operands.push_back(Operand); 145 } 146 147 return SE.getAddExpr(Operands); 148 } 149 150 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 151 SmallVector<const SCEV *, 2> Operands; 152 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 153 const SCEV *Operand = visit(Expr->getOperand(i)); 154 Operands.push_back(Operand); 155 } 156 157 return SE.getMulExpr(Operands); 158 } 159 160 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { 161 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); 162 } 163 164 // Return a new induction variable if the loop is within the original SCoP 165 // or NULL otherwise. 166 Value *getNewIV(const Loop *L) { 167 Value *IV = L->getCanonicalInductionVariable(); 168 if (!IV) 169 return NULL; 170 171 ValueMapT::iterator NewIV = GlobalMap.find(IV); 172 173 if (NewIV == GlobalMap.end()) 174 return NULL; 175 176 return NewIV->second; 177 } 178 179 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 180 Value *IV; 181 182 IV = getNewIV(Expr->getLoop()); 183 184 // The IV is not within the GlobalMaps. So do not rewrite it and also do 185 // not rewrite any descendants. 186 if (!IV) 187 return Expr; 188 189 assert(Expr->getNumOperands() == 2 190 && "An AddRecExpr with more than two operands can not be rewritten."); 191 192 const SCEV *Base, *Step, *IVExpr, *Product; 193 194 Base = visit(Expr->getStart()); 195 Step = visit(Expr->getOperand(1)); 196 IVExpr = SE.getUnknown(IV); 197 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType()); 198 Product = SE.getMulExpr(Step, IVExpr); 199 200 return SE.getAddExpr(Base, Product); 201 } 202 203 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 204 SmallVector<const SCEV *, 2> Operands; 205 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 206 const SCEV *Operand = visit(Expr->getOperand(i)); 207 Operands.push_back(Operand); 208 } 209 210 return SE.getSMaxExpr(Operands); 211 } 212 213 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { 214 SmallVector<const SCEV *, 2> Operands; 215 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 216 const SCEV *Operand = visit(Expr->getOperand(i)); 217 Operands.push_back(Operand); 218 } 219 220 return SE.getUMaxExpr(Operands); 221 } 222 223 const SCEV *visitUnknown(const SCEVUnknown *Expr) { 224 Value *V = Expr->getValue(); 225 226 if (GlobalMap.count(V)) 227 return SE.getUnknown(GlobalMap[V]); 228 229 if (BBMap.count(V)) 230 return SE.getUnknown(BBMap[V]); 231 232 return Expr; 233 } 234 235 private: 236 Scop &S; 237 ScalarEvolution &SE; 238 ValueMapT &GlobalMap; 239 ValueMapT &BBMap; 240 }; 241 242 // Helper class to generate memory location. 243 namespace { 244 class IslGenerator { 245 public: 246 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 247 Builder(Builder), IVS(IVS) {} 248 Value *generateIslInt(__isl_take isl_int Int); 249 Value *generateIslAff(__isl_take isl_aff *Aff); 250 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 251 252 private: 253 typedef struct { 254 Value *Result; 255 class IslGenerator *Generator; 256 } IslGenInfo; 257 258 IRBuilder<> &Builder; 259 std::vector<Value *> &IVS; 260 static int mergeIslAffValues(__isl_take isl_set *Set, 261 __isl_take isl_aff *Aff, void *User); 262 }; 263 } 264 265 266 Value *IslGenerator::generateIslInt(isl_int Int) { 267 mpz_t IntMPZ; 268 mpz_init(IntMPZ); 269 isl_int_get_gmp(Int, IntMPZ); 270 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 271 mpz_clear(IntMPZ); 272 return IntValue; 273 } 274 275 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 276 Value *Result; 277 Value *ConstValue; 278 isl_int ConstIsl; 279 280 isl_int_init(ConstIsl); 281 isl_aff_get_constant(Aff, &ConstIsl); 282 ConstValue = generateIslInt(ConstIsl); 283 Type *Ty = Builder.getInt64Ty(); 284 285 // FIXME: We should give the constant and coefficients the right type. Here 286 // we force it into i64. 287 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 288 289 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 290 291 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 292 "must match the dimension of the affine space."); 293 294 isl_int CoefficientIsl; 295 isl_int_init(CoefficientIsl); 296 297 for (unsigned int i = 0; i < NbInputDims; ++i) { 298 Value *CoefficientValue; 299 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 300 301 if (isl_int_is_zero(CoefficientIsl)) 302 continue; 303 304 CoefficientValue = generateIslInt(CoefficientIsl); 305 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 306 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 307 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 308 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 309 } 310 311 isl_int_clear(CoefficientIsl); 312 isl_int_clear(ConstIsl); 313 isl_aff_free(Aff); 314 315 return Result; 316 } 317 318 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 319 __isl_take isl_aff *Aff, void *User) { 320 IslGenInfo *GenInfo = (IslGenInfo *)User; 321 322 assert((GenInfo->Result == NULL) && "Result is already set." 323 "Currently only single isl_aff is supported"); 324 assert(isl_set_plain_is_universe(Set) 325 && "Code generation failed because the set is not universe"); 326 327 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 328 329 isl_set_free(Set); 330 return 0; 331 } 332 333 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 334 IslGenInfo User; 335 User.Result = NULL; 336 User.Generator = this; 337 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 338 assert(User.Result && "Code generation for isl_pw_aff failed"); 339 340 isl_pw_aff_free(PwAff); 341 return User.Result; 342 } 343 344 345 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 346 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {} 347 348 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) { 349 if (SCEVCodegen && SE.isSCEVable(Inst->getType())) 350 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst))) 351 if (!isa<SCEVCouldNotCompute>(Scev)) { 352 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) { 353 if (Unknown->getValue() != Inst) 354 return true; 355 } else { 356 return true; 357 } 358 } 359 360 return false; 361 } 362 363 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 364 ValueMapT &GlobalMap) { 365 // We assume constants never change. 366 // This avoids map lookups for many calls to this function. 367 if (isa<Constant>(Old)) 368 return const_cast<Value*>(Old); 369 370 if (GlobalMap.count(Old)) { 371 Value *New = GlobalMap[Old]; 372 373 if (Old->getType()->getScalarSizeInBits() 374 < New->getType()->getScalarSizeInBits()) 375 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 376 377 return New; 378 } 379 380 if (BBMap.count(Old)) { 381 return BBMap[Old]; 382 } 383 384 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 385 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old))) 386 if (!isa<SCEVCouldNotCompute>(Scev)) { 387 const SCEV *NewScev = SCEVRewriter::rewrite(Scev, 388 *Statement.getParent(), SE, 389 GlobalMap, BBMap); 390 SCEVExpander Expander(SE, "polly"); 391 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 392 Builder.GetInsertPoint()); 393 394 BBMap[Old] = Expanded; 395 return Expanded; 396 } 397 398 // 'Old' is within the original SCoP, but was not rewritten. 399 // 400 // Such values appear, if they only calculate information already available in 401 // the polyhedral description (e.g. an induction variable increment). They 402 // can be safely ignored. 403 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 404 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 405 return NULL; 406 407 // Everything else is probably a scop-constant value defined as global, 408 // function parameter or an instruction not within the scop. 409 return const_cast<Value*>(Old); 410 } 411 412 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 413 ValueMapT &GlobalMap) { 414 Instruction *NewInst = Inst->clone(); 415 416 // Replace old operands with the new ones. 417 for (Instruction::const_op_iterator OI = Inst->op_begin(), 418 OE = Inst->op_end(); OI != OE; ++OI) { 419 Value *OldOperand = *OI; 420 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 421 422 if (!NewOperand) { 423 assert(!isa<StoreInst>(NewInst) 424 && "Store instructions are always needed!"); 425 delete NewInst; 426 return; 427 } 428 429 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 430 } 431 432 Builder.Insert(NewInst); 433 BBMap[Inst] = NewInst; 434 435 if (!NewInst->getType()->isVoidTy()) 436 NewInst->setName("p_" + Inst->getName()); 437 } 438 439 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 440 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 441 ValueMapT &BBMap, ValueMapT &GlobalMap) { 442 443 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) 444 && "Only single dimensional access functions supported"); 445 446 std::vector<Value *> IVS; 447 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 448 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 449 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 450 IVS.push_back(NewIV); 451 } 452 453 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 454 IslGenerator IslGen(Builder, IVS); 455 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 456 457 Type *Ty = Builder.getInt64Ty(); 458 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 459 460 std::vector<Value*> IndexArray; 461 Value *NullValue = Constant::getNullValue(Ty); 462 IndexArray.push_back(NullValue); 463 IndexArray.push_back(OffsetValue); 464 return IndexArray; 465 } 466 467 Value *BlockGenerator::getNewAccessOperand( 468 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, 469 ValueMapT &BBMap, ValueMapT &GlobalMap) { 470 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 471 BaseAddress, 472 BBMap, GlobalMap); 473 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 474 "p_newarrayidx_"); 475 return NewOperand; 476 } 477 478 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 479 const Value *Pointer, 480 ValueMapT &BBMap, 481 ValueMapT &GlobalMap) { 482 MemoryAccess &Access = Statement.getAccessFor(Inst); 483 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 484 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 485 486 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) 487 && "Current and new access function use different spaces"); 488 489 Value *NewPointer; 490 491 if (!NewAccessRelation) { 492 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 493 } else { 494 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 495 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, 496 BBMap, GlobalMap); 497 } 498 499 isl_map_free(CurrentAccessRelation); 500 isl_map_free(NewAccessRelation); 501 return NewPointer; 502 } 503 504 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 505 ValueMapT &BBMap, 506 ValueMapT &GlobalMap) { 507 const Value *Pointer = Load->getPointerOperand(); 508 const Instruction *Inst = dyn_cast<Instruction>(Load); 509 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 510 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 511 Load->getName() + "_p_scalar_"); 512 return ScalarLoad; 513 } 514 515 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 516 ValueMapT &BBMap, 517 ValueMapT &GlobalMap) { 518 const Value *Pointer = Store->getPointerOperand(); 519 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 520 GlobalMap); 521 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 522 523 return Builder.CreateStore(ValueOperand, NewPointer); 524 } 525 526 void BlockGenerator::copyInstruction(const Instruction *Inst, 527 ValueMapT &BBMap, ValueMapT &GlobalMap) { 528 // Terminator instructions control the control flow. They are explicitly 529 // expressed in the clast and do not need to be copied. 530 if (Inst->isTerminator()) 531 return; 532 533 if (isSCEVIgnore(Inst)) 534 return; 535 536 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 537 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 538 return; 539 } 540 541 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 542 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 543 return; 544 } 545 546 copyInstScalar(Inst, BBMap, GlobalMap); 547 } 548 549 550 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 551 BasicBlock *BB = Statement.getBasicBlock(); 552 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 553 Builder.GetInsertPoint(), P); 554 CopyBB->setName("polly.stmt." + BB->getName()); 555 Builder.SetInsertPoint(CopyBB->begin()); 556 557 ValueMapT BBMap; 558 559 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 560 ++II) 561 copyInstruction(II, BBMap, GlobalMap); 562 } 563 564 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 565 VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain, 566 Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), 567 Domain(Domain) { 568 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 569 assert(Domain && "No statement domain provided"); 570 } 571 572 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 573 ValueMapT &VectorMap, 574 VectorValueMapT &ScalarMaps) { 575 if (VectorMap.count(Old)) 576 return VectorMap[Old]; 577 578 int Width = getVectorWidth(); 579 580 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 581 582 for (int Lane = 0; Lane < Width; Lane++) 583 Vector = Builder.CreateInsertElement(Vector, 584 getNewValue(Old, 585 ScalarMaps[Lane], 586 GlobalMaps[Lane]), 587 Builder.getInt32(Lane)); 588 589 VectorMap[Old] = Vector; 590 591 return Vector; 592 } 593 594 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 595 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 596 assert(PointerTy && "PointerType expected"); 597 598 Type *ScalarType = PointerTy->getElementType(); 599 VectorType *VectorType = VectorType::get(ScalarType, Width); 600 601 return PointerType::getUnqual(VectorType); 602 } 603 604 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 605 ValueMapT &BBMap) { 606 const Value *Pointer = Load->getPointerOperand(); 607 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 608 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 609 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 610 "vector_ptr"); 611 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 612 Load->getName() + "_p_vec_full"); 613 if (!Aligned) 614 VecLoad->setAlignment(8); 615 616 return VecLoad; 617 } 618 619 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 620 ValueMapT &BBMap) { 621 const Value *Pointer = Load->getPointerOperand(); 622 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 623 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 624 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 625 Load->getName() + "_p_vec_p"); 626 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 627 Load->getName() + "_p_splat_one"); 628 629 if (!Aligned) 630 ScalarLoad->setAlignment(8); 631 632 Constant *SplatVector = 633 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 634 getVectorWidth())); 635 636 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 637 SplatVector, 638 Load->getName() 639 + "_p_splat"); 640 return VectorLoad; 641 } 642 643 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 644 VectorValueMapT &ScalarMaps) { 645 int VectorWidth = getVectorWidth(); 646 const Value *Pointer = Load->getPointerOperand(); 647 VectorType *VectorType = VectorType::get( 648 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 649 650 Value *Vector = UndefValue::get(VectorType); 651 652 for (int i = 0; i < VectorWidth; i++) { 653 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 654 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 655 Load->getName() + "_p_scalar_"); 656 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 657 Builder.getInt32(i), 658 Load->getName() + "_p_vec_"); 659 } 660 661 return Vector; 662 } 663 664 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 665 ValueMapT &VectorMap, 666 VectorValueMapT &ScalarMaps) { 667 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 668 !VectorType::isValidElementType(Load->getType())) { 669 for (int i = 0; i < getVectorWidth(); i++) 670 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], 671 GlobalMaps[i]); 672 return; 673 } 674 675 MemoryAccess &Access = Statement.getAccessFor(Load); 676 677 Value *NewLoad; 678 if (Access.isStrideZero(isl_set_copy(Domain))) 679 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 680 else if (Access.isStrideOne(isl_set_copy(Domain))) 681 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 682 else 683 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 684 685 VectorMap[Load] = NewLoad; 686 } 687 688 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 689 ValueMapT &VectorMap, 690 VectorValueMapT &ScalarMaps) { 691 int VectorWidth = getVectorWidth(); 692 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 693 ScalarMaps); 694 695 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 696 697 const CastInst *Cast = dyn_cast<CastInst>(Inst); 698 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 699 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 700 } 701 702 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 703 ValueMapT &VectorMap, 704 VectorValueMapT &ScalarMaps) { 705 Value *OpZero = Inst->getOperand(0); 706 Value *OpOne = Inst->getOperand(1); 707 708 Value *NewOpZero, *NewOpOne; 709 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 710 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 711 712 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, 713 NewOpOne, 714 Inst->getName() + "p_vec"); 715 VectorMap[Inst] = NewInst; 716 } 717 718 void VectorBlockGenerator::copyStore(const StoreInst *Store, 719 ValueMapT &VectorMap, 720 VectorValueMapT &ScalarMaps) { 721 int VectorWidth = getVectorWidth(); 722 723 MemoryAccess &Access = Statement.getAccessFor(Store); 724 725 const Value *Pointer = Store->getPointerOperand(); 726 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 727 ScalarMaps); 728 729 if (Access.isStrideOne(isl_set_copy(Domain))) { 730 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 731 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 732 733 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 734 "vector_ptr"); 735 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 736 737 if (!Aligned) 738 Store->setAlignment(8); 739 } else { 740 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 741 Value *Scalar = Builder.CreateExtractElement(Vector, 742 Builder.getInt32(i)); 743 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 744 Builder.CreateStore(Scalar, NewPointer); 745 } 746 } 747 } 748 749 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 750 ValueMapT &VectorMap) { 751 for (Instruction::const_op_iterator OI = Inst->op_begin(), 752 OE = Inst->op_end(); OI != OE; ++OI) 753 if (VectorMap.count(*OI)) 754 return true; 755 return false; 756 } 757 758 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 759 ValueMapT &VectorMap, 760 VectorValueMapT &ScalarMaps) { 761 bool HasVectorOperand = false; 762 int VectorWidth = getVectorWidth(); 763 764 for (Instruction::const_op_iterator OI = Inst->op_begin(), 765 OE = Inst->op_end(); OI != OE; ++OI) { 766 ValueMapT::iterator VecOp = VectorMap.find(*OI); 767 768 if (VecOp == VectorMap.end()) 769 continue; 770 771 HasVectorOperand = true; 772 Value *NewVector = VecOp->second; 773 774 for (int i = 0; i < VectorWidth; ++i) { 775 ValueMapT &SM = ScalarMaps[i]; 776 777 // If there is one scalar extracted, all scalar elements should have 778 // already been extracted by the code here. So no need to check for the 779 // existance of all of them. 780 if (SM.count(*OI)) 781 break; 782 783 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 784 } 785 } 786 787 return HasVectorOperand; 788 } 789 790 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 791 ValueMapT &VectorMap, 792 VectorValueMapT &ScalarMaps) { 793 bool HasVectorOperand; 794 int VectorWidth = getVectorWidth(); 795 796 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 797 798 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 799 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 800 801 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 802 return; 803 804 // Make the result available as vector value. 805 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 806 Value *Vector = UndefValue::get(VectorType); 807 808 for (int i = 0; i < VectorWidth; i++) 809 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 810 Builder.getInt32(i)); 811 812 VectorMap[Inst] = Vector; 813 } 814 815 int VectorBlockGenerator::getVectorWidth() { 816 return GlobalMaps.size(); 817 } 818 819 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 820 ValueMapT &VectorMap, 821 VectorValueMapT &ScalarMaps) { 822 // Terminator instructions control the control flow. They are explicitly 823 // expressed in the clast and do not need to be copied. 824 if (Inst->isTerminator()) 825 return; 826 827 if (isSCEVIgnore(Inst)) 828 return; 829 830 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 831 generateLoad(Load, VectorMap, ScalarMaps); 832 return; 833 } 834 835 if (hasVectorOperands(Inst, VectorMap)) { 836 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 837 copyStore(Store, VectorMap, ScalarMaps); 838 return; 839 } 840 841 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 842 copyUnaryInst(Unary, VectorMap, ScalarMaps); 843 return; 844 } 845 846 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 847 copyBinaryInst(Binary, VectorMap, ScalarMaps); 848 return; 849 } 850 851 // Falltrough: We generate scalar instructions, if we don't know how to 852 // generate vector code. 853 } 854 855 copyInstScalarized(Inst, VectorMap, ScalarMaps); 856 } 857 858 void VectorBlockGenerator::copyBB() { 859 BasicBlock *BB = Statement.getBasicBlock(); 860 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 861 Builder.GetInsertPoint(), P); 862 CopyBB->setName("polly.stmt." + BB->getName()); 863 Builder.SetInsertPoint(CopyBB->begin()); 864 865 // Create two maps that store the mapping from the original instructions of 866 // the old basic block to their copies in the new basic block. Those maps 867 // are basic block local. 868 // 869 // As vector code generation is supported there is one map for scalar values 870 // and one for vector values. 871 // 872 // In case we just do scalar code generation, the vectorMap is not used and 873 // the scalarMap has just one dimension, which contains the mapping. 874 // 875 // In case vector code generation is done, an instruction may either appear 876 // in the vector map once (as it is calculating >vectorwidth< values at a 877 // time. Or (if the values are calculated using scalar operations), it 878 // appears once in every dimension of the scalarMap. 879 VectorValueMapT ScalarBlockMap(getVectorWidth()); 880 ValueMapT VectorBlockMap; 881 882 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 883 II != IE; ++II) 884 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 885 } 886