1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/BlockGenerators.h" 19 #include "polly/Support/GICHelper.h" 20 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 25 #include "llvm/Support/CommandLine.h" 26 27 #include "isl/aff.h" 28 #include "isl/set.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", 35 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 36 cl::value_desc("OpenMP code generation enabled if true"), 37 cl::init(false), cl::ZeroOrMore); 38 39 static cl::opt<bool> 40 SCEVCodegen("polly-codegen-scev", 41 cl::desc("Use SCEV based code generation."), cl::Hidden, 42 cl::init(false), cl::ZeroOrMore); 43 44 /// The SCEVRewriter takes a scalar evolution expression and updates the 45 /// following components: 46 /// 47 /// - SCEVUnknown 48 /// 49 /// Values referenced in SCEVUnknown subexpressions are looked up in 50 /// two Value to Value maps (GlobalMap and BBMap). If they are found they are 51 /// replaced by a reference to the value they map to. 52 /// 53 /// - SCEVAddRecExpr 54 /// 55 /// Based on a Loop -> Value map {Loop_1: %Value}, an expression 56 /// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step. 57 /// AddRecExpr's with more than two operands can not be translated. 58 /// 59 /// FIXME: The comment above is not yet reality. At the moment we derive 60 /// %Value by looking up the canonical IV of the loop and by defining 61 /// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for 62 /// canonical induction variables. 63 /// 64 /// 65 /// How can this be used? 66 /// ==================== 67 /// 68 /// SCEVRewrite based code generation works on virtually independent blocks. 69 /// This means we do not run the independent blocks pass to rewrite scalar 70 /// instructions, but just ignore instructions that we can analyze with scalar 71 /// evolution. Virtually independent blocks are blocks that only reference the 72 /// following values: 73 /// 74 /// o Values calculated within a basic block 75 /// o Values representable by SCEV 76 /// 77 /// During code generation we can ignore all instructions: 78 /// 79 /// - Ignore all instructions except: 80 /// - Load instructions 81 /// - Instructions that reference operands already calculated within the 82 /// basic block. 83 /// - Store instructions 84 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> { 85 public: 86 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE, 87 ValueMapT &GlobalMap, ValueMapT &BBMap) { 88 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap); 89 return Rewriter.visit(scev); 90 } 91 92 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap, 93 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap), 94 BBMap(BBMap) {} 95 96 const SCEV *visit(const SCEV *Expr) { 97 // FIXME: The parameter handling is incorrect. 98 // 99 // Polly does only detect parameters in Access function and loop iteration 100 // counters, but it does not get parameters that are just used by 101 // instructions within the basic block. 102 // 103 // There are two options to solve this: 104 // o Iterate over all instructions of the SCoP and find the actual 105 // parameters. 106 // o Just check within the SCEVRewriter if Values lay outside of the SCoP 107 // and detect parameters on the fly. 108 // 109 // This is especially important for OpenMP and GPGPU code generation, as 110 // they require us to detect and possibly rewrite the corresponding 111 // parameters. 112 if (isl_id *Id = S.getIdForParam(Expr)) { 113 isl_id_free(Id); 114 return Expr; 115 } 116 117 118 return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr); 119 } 120 121 const SCEV *visitConstant(const SCEVConstant *Constant) { 122 return Constant; 123 } 124 125 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { 126 const SCEV *Operand = visit(Expr->getOperand()); 127 return SE.getTruncateExpr(Operand, Expr->getType()); 128 } 129 130 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 131 const SCEV *Operand = visit(Expr->getOperand()); 132 return SE.getZeroExtendExpr(Operand, Expr->getType()); 133 } 134 135 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 136 const SCEV *Operand = visit(Expr->getOperand()); 137 return SE.getSignExtendExpr(Operand, Expr->getType()); 138 } 139 140 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 141 SmallVector<const SCEV *, 2> Operands; 142 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 143 const SCEV *Operand = visit(Expr->getOperand(i)); 144 Operands.push_back(Operand); 145 } 146 147 return SE.getAddExpr(Operands); 148 } 149 150 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 151 SmallVector<const SCEV *, 2> Operands; 152 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 153 const SCEV *Operand = visit(Expr->getOperand(i)); 154 Operands.push_back(Operand); 155 } 156 157 return SE.getMulExpr(Operands); 158 } 159 160 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { 161 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); 162 } 163 164 // Return a new induction variable if the loop is within the original SCoP 165 // or NULL otherwise. 166 Value *getNewIV(const Loop *L) { 167 Value *IV = L->getCanonicalInductionVariable(); 168 if (!IV) 169 return NULL; 170 171 ValueMapT::iterator NewIV = GlobalMap.find(IV); 172 173 if (NewIV == GlobalMap.end()) 174 return NULL; 175 176 return NewIV->second; 177 } 178 179 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 180 Value *IV; 181 182 IV = getNewIV(Expr->getLoop()); 183 184 // The IV is not within the GlobalMaps. So do not rewrite it and also do 185 // not rewrite any descendants. 186 if (!IV) 187 return Expr; 188 189 assert(Expr->getNumOperands() == 2 && 190 "An AddRecExpr with more than two operands can not be rewritten."); 191 192 const SCEV *Base, *Step, *IVExpr, *Product; 193 194 Base = visit(Expr->getStart()); 195 Step = visit(Expr->getOperand(1)); 196 IVExpr = SE.getUnknown(IV); 197 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType()); 198 Product = SE.getMulExpr(Step, IVExpr); 199 200 return SE.getAddExpr(Base, Product); 201 } 202 203 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 204 SmallVector<const SCEV *, 2> Operands; 205 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 206 const SCEV *Operand = visit(Expr->getOperand(i)); 207 Operands.push_back(Operand); 208 } 209 210 return SE.getSMaxExpr(Operands); 211 } 212 213 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { 214 SmallVector<const SCEV *, 2> Operands; 215 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 216 const SCEV *Operand = visit(Expr->getOperand(i)); 217 Operands.push_back(Operand); 218 } 219 220 return SE.getUMaxExpr(Operands); 221 } 222 223 const SCEV *visitUnknown(const SCEVUnknown *Expr) { 224 Value *V = Expr->getValue(); 225 226 if (GlobalMap.count(V)) 227 return SE.getUnknown(GlobalMap[V]); 228 229 if (BBMap.count(V)) 230 return SE.getUnknown(BBMap[V]); 231 232 return Expr; 233 } 234 235 private: 236 Scop &S; 237 ScalarEvolution &SE; 238 ValueMapT &GlobalMap; 239 ValueMapT &BBMap; 240 }; 241 242 // Helper class to generate memory location. 243 namespace { 244 class IslGenerator { 245 public: 246 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 247 Builder(Builder), IVS(IVS) {} 248 Value *generateIslInt(__isl_take isl_int Int); 249 Value *generateIslAff(__isl_take isl_aff *Aff); 250 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 251 252 private: 253 typedef struct { 254 Value *Result; 255 class IslGenerator *Generator; 256 } IslGenInfo; 257 258 IRBuilder<> &Builder; 259 std::vector<Value *> &IVS; 260 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 261 void *User); 262 }; 263 } 264 265 Value *IslGenerator::generateIslInt(isl_int Int) { 266 mpz_t IntMPZ; 267 mpz_init(IntMPZ); 268 isl_int_get_gmp(Int, IntMPZ); 269 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 270 mpz_clear(IntMPZ); 271 return IntValue; 272 } 273 274 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 275 Value *Result; 276 Value *ConstValue; 277 isl_int ConstIsl; 278 279 isl_int_init(ConstIsl); 280 isl_aff_get_constant(Aff, &ConstIsl); 281 ConstValue = generateIslInt(ConstIsl); 282 Type *Ty = Builder.getInt64Ty(); 283 284 // FIXME: We should give the constant and coefficients the right type. Here 285 // we force it into i64. 286 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 287 288 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 289 290 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 291 "must match the dimension of the affine space."); 292 293 isl_int CoefficientIsl; 294 isl_int_init(CoefficientIsl); 295 296 for (unsigned int i = 0; i < NbInputDims; ++i) { 297 Value *CoefficientValue; 298 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 299 300 if (isl_int_is_zero(CoefficientIsl)) 301 continue; 302 303 CoefficientValue = generateIslInt(CoefficientIsl); 304 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 305 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 306 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 307 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 308 } 309 310 isl_int_clear(CoefficientIsl); 311 isl_int_clear(ConstIsl); 312 isl_aff_free(Aff); 313 314 return Result; 315 } 316 317 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 318 __isl_take isl_aff *Aff, void *User) { 319 IslGenInfo *GenInfo = (IslGenInfo *)User; 320 321 assert((GenInfo->Result == NULL) && "Result is already set." 322 "Currently only single isl_aff is supported"); 323 assert(isl_set_plain_is_universe(Set) 324 && "Code generation failed because the set is not universe"); 325 326 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 327 328 isl_set_free(Set); 329 return 0; 330 } 331 332 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 333 IslGenInfo User; 334 User.Result = NULL; 335 User.Generator = this; 336 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 337 assert(User.Result && "Code generation for isl_pw_aff failed"); 338 339 isl_pw_aff_free(PwAff); 340 return User.Result; 341 } 342 343 344 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 345 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {} 346 347 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) { 348 if (SCEVCodegen && SE.isSCEVable(Inst->getType())) 349 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst))) 350 if (!isa<SCEVCouldNotCompute>(Scev)) { 351 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) { 352 if (Unknown->getValue() != Inst) 353 return true; 354 } else { 355 return true; 356 } 357 } 358 359 return false; 360 } 361 362 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 363 ValueMapT &GlobalMap) { 364 // We assume constants never change. 365 // This avoids map lookups for many calls to this function. 366 if (isa<Constant>(Old)) 367 return const_cast<Value*>(Old); 368 369 if (GlobalMap.count(Old)) { 370 Value *New = GlobalMap[Old]; 371 372 if (Old->getType()->getScalarSizeInBits() 373 < New->getType()->getScalarSizeInBits()) 374 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 375 376 return New; 377 } 378 379 if (BBMap.count(Old)) { 380 return BBMap[Old]; 381 } 382 383 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 384 if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old))) 385 if (!isa<SCEVCouldNotCompute>(Scev)) { 386 const SCEV *NewScev = SCEVRewriter::rewrite(Scev, 387 *Statement.getParent(), SE, 388 GlobalMap, BBMap); 389 SCEVExpander Expander(SE, "polly"); 390 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 391 Builder.GetInsertPoint()); 392 393 BBMap[Old] = Expanded; 394 return Expanded; 395 } 396 397 // 'Old' is within the original SCoP, but was not rewritten. 398 // 399 // Such values appear, if they only calculate information already available in 400 // the polyhedral description (e.g. an induction variable increment). They 401 // can be safely ignored. 402 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 403 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 404 return NULL; 405 406 // Everything else is probably a scop-constant value defined as global, 407 // function parameter or an instruction not within the scop. 408 return const_cast<Value*>(Old); 409 } 410 411 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 412 ValueMapT &GlobalMap) { 413 Instruction *NewInst = Inst->clone(); 414 415 // Replace old operands with the new ones. 416 for (Instruction::const_op_iterator OI = Inst->op_begin(), 417 OE = Inst->op_end(); OI != OE; ++OI) { 418 Value *OldOperand = *OI; 419 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 420 421 if (!NewOperand) { 422 assert(!isa<StoreInst>(NewInst) 423 && "Store instructions are always needed!"); 424 delete NewInst; 425 return; 426 } 427 428 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 429 } 430 431 Builder.Insert(NewInst); 432 BBMap[Inst] = NewInst; 433 434 if (!NewInst->getType()->isVoidTy()) 435 NewInst->setName("p_" + Inst->getName()); 436 } 437 438 std::vector<Value*> BlockGenerator::getMemoryAccessIndex( 439 __isl_keep isl_map *AccessRelation, Value *BaseAddress, 440 ValueMapT &BBMap, ValueMapT &GlobalMap) { 441 442 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 443 "Only single dimensional access functions supported"); 444 445 std::vector<Value *> IVS; 446 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 447 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 448 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 449 IVS.push_back(NewIV); 450 } 451 452 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 453 IslGenerator IslGen(Builder, IVS); 454 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 455 456 Type *Ty = Builder.getInt64Ty(); 457 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 458 459 std::vector<Value*> IndexArray; 460 Value *NullValue = Constant::getNullValue(Ty); 461 IndexArray.push_back(NullValue); 462 IndexArray.push_back(OffsetValue); 463 return IndexArray; 464 } 465 466 Value *BlockGenerator::getNewAccessOperand( 467 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, 468 ValueMapT &BBMap, ValueMapT &GlobalMap) { 469 std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation, 470 BaseAddress, 471 BBMap, GlobalMap); 472 Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, 473 "p_newarrayidx_"); 474 return NewOperand; 475 } 476 477 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 478 const Value *Pointer, 479 ValueMapT &BBMap, 480 ValueMapT &GlobalMap) { 481 MemoryAccess &Access = Statement.getAccessFor(Inst); 482 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 483 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 484 485 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 486 "Current and new access function use different spaces"); 487 488 Value *NewPointer; 489 490 if (!NewAccessRelation) { 491 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 492 } else { 493 Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr()); 494 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, 495 BBMap, GlobalMap); 496 } 497 498 isl_map_free(CurrentAccessRelation); 499 isl_map_free(NewAccessRelation); 500 return NewPointer; 501 } 502 503 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 504 ValueMapT &BBMap, 505 ValueMapT &GlobalMap) { 506 const Value *Pointer = Load->getPointerOperand(); 507 const Instruction *Inst = dyn_cast<Instruction>(Load); 508 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 509 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 510 Load->getName() + "_p_scalar_"); 511 return ScalarLoad; 512 } 513 514 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 515 ValueMapT &BBMap, 516 ValueMapT &GlobalMap) { 517 const Value *Pointer = Store->getPointerOperand(); 518 Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, 519 GlobalMap); 520 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 521 522 return Builder.CreateStore(ValueOperand, NewPointer); 523 } 524 525 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 526 ValueMapT &GlobalMap) { 527 // Terminator instructions control the control flow. They are explicitly 528 // expressed in the clast and do not need to be copied. 529 if (Inst->isTerminator()) 530 return; 531 532 if (isSCEVIgnore(Inst)) 533 return; 534 535 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 536 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 537 return; 538 } 539 540 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 541 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 542 return; 543 } 544 545 copyInstScalar(Inst, BBMap, GlobalMap); 546 } 547 548 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 549 BasicBlock *BB = Statement.getBasicBlock(); 550 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 551 Builder.GetInsertPoint(), P); 552 CopyBB->setName("polly.stmt." + BB->getName()); 553 Builder.SetInsertPoint(CopyBB->begin()); 554 555 ValueMapT BBMap; 556 557 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 558 ++II) 559 copyInstruction(II, BBMap, GlobalMap); 560 } 561 562 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 563 VectorValueMapT &GlobalMaps, 564 ScopStmt &Stmt, 565 __isl_keep isl_map *Schedule, 566 Pass *P) 567 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) { 568 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 569 assert(Schedule && "No statement domain provided"); 570 } 571 572 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 573 ValueMapT &VectorMap, 574 VectorValueMapT &ScalarMaps) { 575 if (VectorMap.count(Old)) 576 return VectorMap[Old]; 577 578 int Width = getVectorWidth(); 579 580 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 581 582 for (int Lane = 0; Lane < Width; Lane++) 583 Vector = Builder.CreateInsertElement(Vector, 584 getNewValue(Old, 585 ScalarMaps[Lane], 586 GlobalMaps[Lane]), 587 Builder.getInt32(Lane)); 588 589 VectorMap[Old] = Vector; 590 591 return Vector; 592 } 593 594 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 595 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 596 assert(PointerTy && "PointerType expected"); 597 598 Type *ScalarType = PointerTy->getElementType(); 599 VectorType *VectorType = VectorType::get(ScalarType, Width); 600 601 return PointerType::getUnqual(VectorType); 602 } 603 604 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 605 ValueMapT &BBMap) { 606 const Value *Pointer = Load->getPointerOperand(); 607 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 608 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 609 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 610 "vector_ptr"); 611 LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, 612 Load->getName() + "_p_vec_full"); 613 if (!Aligned) 614 VecLoad->setAlignment(8); 615 616 return VecLoad; 617 } 618 619 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 620 ValueMapT &BBMap) { 621 const Value *Pointer = Load->getPointerOperand(); 622 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 623 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 624 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 625 Load->getName() + "_p_vec_p"); 626 LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, 627 Load->getName() + "_p_splat_one"); 628 629 if (!Aligned) 630 ScalarLoad->setAlignment(8); 631 632 Constant *SplatVector = 633 Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), 634 getVectorWidth())); 635 636 Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, 637 SplatVector, 638 Load->getName() 639 + "_p_splat"); 640 return VectorLoad; 641 } 642 643 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 644 VectorValueMapT &ScalarMaps) { 645 int VectorWidth = getVectorWidth(); 646 const Value *Pointer = Load->getPointerOperand(); 647 VectorType *VectorType = VectorType::get( 648 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 649 650 Value *Vector = UndefValue::get(VectorType); 651 652 for (int i = 0; i < VectorWidth; i++) { 653 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 654 Value *ScalarLoad = Builder.CreateLoad(NewPointer, 655 Load->getName() + "_p_scalar_"); 656 Vector = Builder.CreateInsertElement(Vector, ScalarLoad, 657 Builder.getInt32(i), 658 Load->getName() + "_p_vec_"); 659 } 660 661 return Vector; 662 } 663 664 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 665 ValueMapT &VectorMap, 666 VectorValueMapT &ScalarMaps) { 667 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 668 !VectorType::isValidElementType(Load->getType())) { 669 for (int i = 0; i < getVectorWidth(); i++) 670 ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], 671 GlobalMaps[i]); 672 return; 673 } 674 675 MemoryAccess &Access = Statement.getAccessFor(Load); 676 677 Value *NewLoad; 678 if (Access.isStrideZero(isl_map_copy(Schedule))) 679 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 680 else if (Access.isStrideOne(isl_map_copy(Schedule))) 681 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 682 else 683 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 684 685 VectorMap[Load] = NewLoad; 686 } 687 688 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 689 ValueMapT &VectorMap, 690 VectorValueMapT &ScalarMaps) { 691 int VectorWidth = getVectorWidth(); 692 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, 693 ScalarMaps); 694 695 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 696 697 const CastInst *Cast = dyn_cast<CastInst>(Inst); 698 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 699 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 700 } 701 702 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 703 ValueMapT &VectorMap, 704 VectorValueMapT &ScalarMaps) { 705 Value *OpZero = Inst->getOperand(0); 706 Value *OpOne = Inst->getOperand(1); 707 708 Value *NewOpZero, *NewOpOne; 709 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 710 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 711 712 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 713 Inst->getName() + "p_vec"); 714 VectorMap[Inst] = NewInst; 715 } 716 717 void VectorBlockGenerator::copyStore(const StoreInst *Store, 718 ValueMapT &VectorMap, 719 VectorValueMapT &ScalarMaps) { 720 int VectorWidth = getVectorWidth(); 721 722 MemoryAccess &Access = Statement.getAccessFor(Store); 723 724 const Value *Pointer = Store->getPointerOperand(); 725 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 726 ScalarMaps); 727 728 if (Access.isStrideOne(isl_map_copy(Schedule))) { 729 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 730 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 731 732 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 733 "vector_ptr"); 734 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 735 736 if (!Aligned) 737 Store->setAlignment(8); 738 } else { 739 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 740 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 741 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 742 Builder.CreateStore(Scalar, NewPointer); 743 } 744 } 745 } 746 747 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 748 ValueMapT &VectorMap) { 749 for (Instruction::const_op_iterator OI = Inst->op_begin(), 750 OE = Inst->op_end(); OI != OE; ++OI) 751 if (VectorMap.count(*OI)) 752 return true; 753 return false; 754 } 755 756 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 757 ValueMapT &VectorMap, 758 VectorValueMapT &ScalarMaps) { 759 bool HasVectorOperand = false; 760 int VectorWidth = getVectorWidth(); 761 762 for (Instruction::const_op_iterator OI = Inst->op_begin(), 763 OE = Inst->op_end(); OI != OE; ++OI) { 764 ValueMapT::iterator VecOp = VectorMap.find(*OI); 765 766 if (VecOp == VectorMap.end()) 767 continue; 768 769 HasVectorOperand = true; 770 Value *NewVector = VecOp->second; 771 772 for (int i = 0; i < VectorWidth; ++i) { 773 ValueMapT &SM = ScalarMaps[i]; 774 775 // If there is one scalar extracted, all scalar elements should have 776 // already been extracted by the code here. So no need to check for the 777 // existance of all of them. 778 if (SM.count(*OI)) 779 break; 780 781 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 782 } 783 } 784 785 return HasVectorOperand; 786 } 787 788 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 789 ValueMapT &VectorMap, 790 VectorValueMapT &ScalarMaps) { 791 bool HasVectorOperand; 792 int VectorWidth = getVectorWidth(); 793 794 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 795 796 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 797 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 798 799 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 800 return; 801 802 // Make the result available as vector value. 803 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 804 Value *Vector = UndefValue::get(VectorType); 805 806 for (int i = 0; i < VectorWidth; i++) 807 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 808 Builder.getInt32(i)); 809 810 VectorMap[Inst] = Vector; 811 } 812 813 int VectorBlockGenerator::getVectorWidth() { 814 return GlobalMaps.size(); 815 } 816 817 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 818 ValueMapT &VectorMap, 819 VectorValueMapT &ScalarMaps) { 820 // Terminator instructions control the control flow. They are explicitly 821 // expressed in the clast and do not need to be copied. 822 if (Inst->isTerminator()) 823 return; 824 825 if (isSCEVIgnore(Inst)) 826 return; 827 828 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 829 generateLoad(Load, VectorMap, ScalarMaps); 830 return; 831 } 832 833 if (hasVectorOperands(Inst, VectorMap)) { 834 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 835 copyStore(Store, VectorMap, ScalarMaps); 836 return; 837 } 838 839 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 840 copyUnaryInst(Unary, VectorMap, ScalarMaps); 841 return; 842 } 843 844 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 845 copyBinaryInst(Binary, VectorMap, ScalarMaps); 846 return; 847 } 848 849 // Falltrough: We generate scalar instructions, if we don't know how to 850 // generate vector code. 851 } 852 853 copyInstScalarized(Inst, VectorMap, ScalarMaps); 854 } 855 856 void VectorBlockGenerator::copyBB() { 857 BasicBlock *BB = Statement.getBasicBlock(); 858 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 859 Builder.GetInsertPoint(), P); 860 CopyBB->setName("polly.stmt." + BB->getName()); 861 Builder.SetInsertPoint(CopyBB->begin()); 862 863 // Create two maps that store the mapping from the original instructions of 864 // the old basic block to their copies in the new basic block. Those maps 865 // are basic block local. 866 // 867 // As vector code generation is supported there is one map for scalar values 868 // and one for vector values. 869 // 870 // In case we just do scalar code generation, the vectorMap is not used and 871 // the scalarMap has just one dimension, which contains the mapping. 872 // 873 // In case vector code generation is done, an instruction may either appear 874 // in the vector map once (as it is calculating >vectorwidth< values at a 875 // time. Or (if the values are calculated using scalar operations), it 876 // appears once in every dimension of the scalarMap. 877 VectorValueMapT ScalarBlockMap(getVectorWidth()); 878 ValueMapT VectorBlockMap; 879 880 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); 881 II != IE; ++II) 882 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 883 } 884