1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/BlockGenerators.h" 19 #include "polly/Support/GICHelper.h" 20 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionExpander.h" 24 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 25 #include "llvm/Support/CommandLine.h" 26 27 #include "isl/aff.h" 28 #include "isl/set.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 35 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 36 cl::init(false), cl::ZeroOrMore); 37 38 static cl::opt<bool> 39 SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 40 cl::Hidden, cl::init(false), cl::ZeroOrMore); 41 42 /// The SCEVRewriter takes a scalar evolution expression and updates the 43 /// following components: 44 /// 45 /// - SCEVUnknown 46 /// 47 /// Values referenced in SCEVUnknown subexpressions are looked up in 48 /// two Value to Value maps (GlobalMap and BBMap). If they are found they are 49 /// replaced by a reference to the value they map to. 50 /// 51 /// - SCEVAddRecExpr 52 /// 53 /// Based on a Loop -> Value map {Loop_1: %Value}, an expression 54 /// {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step. 55 /// AddRecExpr's with more than two operands can not be translated. 56 /// 57 /// FIXME: The comment above is not yet reality. At the moment we derive 58 /// %Value by looking up the canonical IV of the loop and by defining 59 /// %Value = GlobalMap[%IV]. This needs to be changed to remove the need for 60 /// canonical induction variables. 61 /// 62 /// 63 /// How can this be used? 64 /// ==================== 65 /// 66 /// SCEVRewrite based code generation works on virtually independent blocks. 67 /// This means we do not run the independent blocks pass to rewrite scalar 68 /// instructions, but just ignore instructions that we can analyze with scalar 69 /// evolution. Virtually independent blocks are blocks that only reference the 70 /// following values: 71 /// 72 /// o Values calculated within a basic block 73 /// o Values representable by SCEV 74 /// 75 /// During code generation we can ignore all instructions: 76 /// 77 /// - Ignore all instructions except: 78 /// - Load instructions 79 /// - Instructions that reference operands already calculated within the 80 /// basic block. 81 /// - Store instructions 82 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV *> { 83 public: 84 static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE, 85 ValueMapT &GlobalMap, ValueMapT &BBMap) { 86 SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap); 87 return Rewriter.visit(scev); 88 } 89 90 SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap, 91 ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap), 92 BBMap(BBMap) {} 93 94 const SCEV *visit(const SCEV *Expr) { 95 // FIXME: The parameter handling is incorrect. 96 // 97 // Polly does only detect parameters in Access function and loop iteration 98 // counters, but it does not get parameters that are just used by 99 // instructions within the basic block. 100 // 101 // There are two options to solve this: 102 // o Iterate over all instructions of the SCoP and find the actual 103 // parameters. 104 // o Just check within the SCEVRewriter if Values lay outside of the SCoP 105 // and detect parameters on the fly. 106 // 107 // This is especially important for OpenMP and GPGPU code generation, as 108 // they require us to detect and possibly rewrite the corresponding 109 // parameters. 110 if (isl_id *Id = S.getIdForParam(Expr)) { 111 isl_id_free(Id); 112 return Expr; 113 } 114 115 return SCEVVisitor<SCEVRewriter, const SCEV *>::visit(Expr); 116 } 117 118 const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; } 119 120 const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { 121 const SCEV *Operand = visit(Expr->getOperand()); 122 return SE.getTruncateExpr(Operand, Expr->getType()); 123 } 124 125 const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 126 const SCEV *Operand = visit(Expr->getOperand()); 127 return SE.getZeroExtendExpr(Operand, Expr->getType()); 128 } 129 130 const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 131 const SCEV *Operand = visit(Expr->getOperand()); 132 return SE.getSignExtendExpr(Operand, Expr->getType()); 133 } 134 135 const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { 136 SmallVector<const SCEV *, 2> Operands; 137 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 138 const SCEV *Operand = visit(Expr->getOperand(i)); 139 Operands.push_back(Operand); 140 } 141 142 return SE.getAddExpr(Operands); 143 } 144 145 const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { 146 SmallVector<const SCEV *, 2> Operands; 147 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 148 const SCEV *Operand = visit(Expr->getOperand(i)); 149 Operands.push_back(Operand); 150 } 151 152 return SE.getMulExpr(Operands); 153 } 154 155 const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { 156 return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); 157 } 158 159 // Return a new induction variable if the loop is within the original SCoP 160 // or NULL otherwise. 161 Value *getNewIV(const Loop *L) { 162 Value *IV = L->getCanonicalInductionVariable(); 163 if (!IV) 164 return NULL; 165 166 ValueMapT::iterator NewIV = GlobalMap.find(IV); 167 168 if (NewIV == GlobalMap.end()) 169 return NULL; 170 171 return NewIV->second; 172 } 173 174 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 175 Value *IV; 176 177 IV = getNewIV(Expr->getLoop()); 178 179 // The IV is not within the GlobalMaps. So do not rewrite it and also do 180 // not rewrite any descendants. 181 if (!IV) 182 return Expr; 183 184 assert(Expr->getNumOperands() == 2 && 185 "An AddRecExpr with more than two operands can not be rewritten."); 186 187 const SCEV *Base, *Step, *IVExpr, *Product; 188 189 Base = visit(Expr->getStart()); 190 Step = visit(Expr->getOperand(1)); 191 IVExpr = SE.getUnknown(IV); 192 IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType()); 193 Product = SE.getMulExpr(Step, IVExpr); 194 195 return SE.getAddExpr(Base, Product); 196 } 197 198 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 199 SmallVector<const SCEV *, 2> Operands; 200 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 201 const SCEV *Operand = visit(Expr->getOperand(i)); 202 Operands.push_back(Operand); 203 } 204 205 return SE.getSMaxExpr(Operands); 206 } 207 208 const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { 209 SmallVector<const SCEV *, 2> Operands; 210 for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { 211 const SCEV *Operand = visit(Expr->getOperand(i)); 212 Operands.push_back(Operand); 213 } 214 215 return SE.getUMaxExpr(Operands); 216 } 217 218 const SCEV *visitUnknown(const SCEVUnknown *Expr) { 219 Value *V = Expr->getValue(); 220 221 if (GlobalMap.count(V)) 222 return SE.getUnknown(GlobalMap[V]); 223 224 if (BBMap.count(V)) 225 return SE.getUnknown(BBMap[V]); 226 227 return Expr; 228 } 229 230 private: 231 Scop &S; 232 ScalarEvolution &SE; 233 ValueMapT &GlobalMap; 234 ValueMapT &BBMap; 235 }; 236 237 // Helper class to generate memory location. 238 namespace { 239 class IslGenerator { 240 public: 241 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) : 242 Builder(Builder), IVS(IVS) {} 243 Value *generateIslInt(__isl_take isl_int Int); 244 Value *generateIslAff(__isl_take isl_aff *Aff); 245 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 246 247 private: 248 typedef struct { 249 Value *Result; 250 class IslGenerator *Generator; 251 } IslGenInfo; 252 253 IRBuilder<> &Builder; 254 std::vector<Value *> &IVS; 255 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 256 void *User); 257 }; 258 } 259 260 Value *IslGenerator::generateIslInt(isl_int Int) { 261 mpz_t IntMPZ; 262 mpz_init(IntMPZ); 263 isl_int_get_gmp(Int, IntMPZ); 264 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 265 mpz_clear(IntMPZ); 266 return IntValue; 267 } 268 269 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 270 Value *Result; 271 Value *ConstValue; 272 isl_int ConstIsl; 273 274 isl_int_init(ConstIsl); 275 isl_aff_get_constant(Aff, &ConstIsl); 276 ConstValue = generateIslInt(ConstIsl); 277 Type *Ty = Builder.getInt64Ty(); 278 279 // FIXME: We should give the constant and coefficients the right type. Here 280 // we force it into i64. 281 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 282 283 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 284 285 assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables" 286 "must match the dimension of the affine space."); 287 288 isl_int CoefficientIsl; 289 isl_int_init(CoefficientIsl); 290 291 for (unsigned int i = 0; i < NbInputDims; ++i) { 292 Value *CoefficientValue; 293 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 294 295 if (isl_int_is_zero(CoefficientIsl)) 296 continue; 297 298 CoefficientValue = generateIslInt(CoefficientIsl); 299 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 300 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 301 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 302 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 303 } 304 305 isl_int_clear(CoefficientIsl); 306 isl_int_clear(ConstIsl); 307 isl_aff_free(Aff); 308 309 return Result; 310 } 311 312 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 313 __isl_take isl_aff *Aff, void *User) { 314 IslGenInfo *GenInfo = (IslGenInfo *)User; 315 316 assert((GenInfo->Result == NULL) && "Result is already set." 317 "Currently only single isl_aff is supported"); 318 assert(isl_set_plain_is_universe(Set) 319 && "Code generation failed because the set is not universe"); 320 321 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 322 323 isl_set_free(Set); 324 return 0; 325 } 326 327 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 328 IslGenInfo User; 329 User.Result = NULL; 330 User.Generator = this; 331 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 332 assert(User.Result && "Code generation for isl_pw_aff failed"); 333 334 isl_pw_aff_free(PwAff); 335 return User.Result; 336 } 337 338 339 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P): 340 Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {} 341 342 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) { 343 if (SCEVCodegen && SE.isSCEVable(Inst->getType())) 344 if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst))) 345 if (!isa<SCEVCouldNotCompute>(Scev)) { 346 if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) { 347 if (Unknown->getValue() != Inst) 348 return true; 349 } else { 350 return true; 351 } 352 } 353 354 return false; 355 } 356 357 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 358 ValueMapT &GlobalMap) { 359 // We assume constants never change. 360 // This avoids map lookups for many calls to this function. 361 if (isa<Constant>(Old)) 362 return const_cast<Value *>(Old); 363 364 if (GlobalMap.count(Old)) { 365 Value *New = GlobalMap[Old]; 366 367 if (Old->getType()->getScalarSizeInBits() < 368 New->getType()->getScalarSizeInBits()) 369 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 370 371 return New; 372 } 373 374 if (BBMap.count(Old)) { 375 return BBMap[Old]; 376 } 377 378 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 379 if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old))) 380 if (!isa<SCEVCouldNotCompute>(Scev)) { 381 const SCEV *NewScev = SCEVRewriter::rewrite( 382 Scev, *Statement.getParent(), SE, GlobalMap, BBMap); 383 SCEVExpander Expander(SE, "polly"); 384 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 385 Builder.GetInsertPoint()); 386 387 BBMap[Old] = Expanded; 388 return Expanded; 389 } 390 391 // 'Old' is within the original SCoP, but was not rewritten. 392 // 393 // Such values appear, if they only calculate information already available in 394 // the polyhedral description (e.g. an induction variable increment). They 395 // can be safely ignored. 396 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 397 if (Statement.getParent()->getRegion().contains(Inst->getParent())) 398 return NULL; 399 400 // Everything else is probably a scop-constant value defined as global, 401 // function parameter or an instruction not within the scop. 402 return const_cast<Value *>(Old); 403 } 404 405 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 406 ValueMapT &GlobalMap) { 407 Instruction *NewInst = Inst->clone(); 408 409 // Replace old operands with the new ones. 410 for (Instruction::const_op_iterator OI = Inst->op_begin(), 411 OE = Inst->op_end(); 412 OI != OE; ++OI) { 413 Value *OldOperand = *OI; 414 Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); 415 416 if (!NewOperand) { 417 assert(!isa<StoreInst>(NewInst) && 418 "Store instructions are always needed!"); 419 delete NewInst; 420 return; 421 } 422 423 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 424 } 425 426 Builder.Insert(NewInst); 427 BBMap[Inst] = NewInst; 428 429 if (!NewInst->getType()->isVoidTy()) 430 NewInst->setName("p_" + Inst->getName()); 431 } 432 433 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 434 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 435 ValueMapT &GlobalMap) { 436 437 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 438 "Only single dimensional access functions supported"); 439 440 std::vector<Value *> IVS; 441 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 442 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 443 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap); 444 IVS.push_back(NewIV); 445 } 446 447 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 448 IslGenerator IslGen(Builder, IVS); 449 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 450 451 Type *Ty = Builder.getInt64Ty(); 452 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 453 454 std::vector<Value *> IndexArray; 455 Value *NullValue = Constant::getNullValue(Ty); 456 IndexArray.push_back(NullValue); 457 IndexArray.push_back(OffsetValue); 458 return IndexArray; 459 } 460 461 Value *BlockGenerator::getNewAccessOperand( 462 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 463 ValueMapT &GlobalMap) { 464 std::vector<Value *> IndexArray = 465 getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap); 466 Value *NewOperand = 467 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 468 return NewOperand; 469 } 470 471 Value *BlockGenerator::generateLocationAccessed( 472 const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap, 473 ValueMapT &GlobalMap) { 474 MemoryAccess &Access = Statement.getAccessFor(Inst); 475 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 476 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 477 478 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 479 "Current and new access function use different spaces"); 480 481 Value *NewPointer; 482 483 if (!NewAccessRelation) { 484 NewPointer = getNewValue(Pointer, BBMap, GlobalMap); 485 } else { 486 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 487 NewPointer = 488 getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap); 489 } 490 491 isl_map_free(CurrentAccessRelation); 492 isl_map_free(NewAccessRelation); 493 return NewPointer; 494 } 495 496 Value *BlockGenerator::generateScalarLoad( 497 const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap) { 498 const Value *Pointer = Load->getPointerOperand(); 499 const Instruction *Inst = dyn_cast<Instruction>(Load); 500 Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); 501 Value *ScalarLoad = 502 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 503 return ScalarLoad; 504 } 505 506 Value *BlockGenerator::generateScalarStore( 507 const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap) { 508 const Value *Pointer = Store->getPointerOperand(); 509 Value *NewPointer = 510 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap); 511 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); 512 513 return Builder.CreateStore(ValueOperand, NewPointer); 514 } 515 516 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 517 ValueMapT &GlobalMap) { 518 // Terminator instructions control the control flow. They are explicitly 519 // expressed in the clast and do not need to be copied. 520 if (Inst->isTerminator()) 521 return; 522 523 if (isSCEVIgnore(Inst)) 524 return; 525 526 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 527 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap); 528 return; 529 } 530 531 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 532 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap); 533 return; 534 } 535 536 copyInstScalar(Inst, BBMap, GlobalMap); 537 } 538 539 void BlockGenerator::copyBB(ValueMapT &GlobalMap) { 540 BasicBlock *BB = Statement.getBasicBlock(); 541 BasicBlock *CopyBB = 542 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 543 CopyBB->setName("polly.stmt." + BB->getName()); 544 Builder.SetInsertPoint(CopyBB->begin()); 545 546 ValueMapT BBMap; 547 548 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 549 ++II) 550 copyInstruction(II, BBMap, GlobalMap); 551 } 552 553 VectorBlockGenerator::VectorBlockGenerator( 554 IRBuilder<> &B, VectorValueMapT &GlobalMaps, ScopStmt &Stmt, 555 __isl_keep isl_map *Schedule, Pass *P) 556 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) { 557 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 558 assert(Schedule && "No statement domain provided"); 559 } 560 561 Value *VectorBlockGenerator::getVectorValue( 562 const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 563 if (VectorMap.count(Old)) 564 return VectorMap[Old]; 565 566 int Width = getVectorWidth(); 567 568 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 569 570 for (int Lane = 0; Lane < Width; Lane++) 571 Vector = Builder.CreateInsertElement( 572 Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane]), 573 Builder.getInt32(Lane)); 574 575 VectorMap[Old] = Vector; 576 577 return Vector; 578 } 579 580 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 581 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 582 assert(PointerTy && "PointerType expected"); 583 584 Type *ScalarType = PointerTy->getElementType(); 585 VectorType *VectorType = VectorType::get(ScalarType, Width); 586 587 return PointerType::getUnqual(VectorType); 588 } 589 590 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 591 ValueMapT &BBMap) { 592 const Value *Pointer = Load->getPointerOperand(); 593 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 594 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 595 Value *VectorPtr = 596 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 597 LoadInst *VecLoad = 598 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 599 if (!Aligned) 600 VecLoad->setAlignment(8); 601 602 return VecLoad; 603 } 604 605 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 606 ValueMapT &BBMap) { 607 const Value *Pointer = Load->getPointerOperand(); 608 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 609 Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); 610 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 611 Load->getName() + "_p_vec_p"); 612 LoadInst *ScalarLoad = 613 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 614 615 if (!Aligned) 616 ScalarLoad->setAlignment(8); 617 618 Constant *SplatVector = Constant::getNullValue( 619 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 620 621 Value *VectorLoad = Builder.CreateShuffleVector( 622 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 623 return VectorLoad; 624 } 625 626 Value *VectorBlockGenerator::generateUnknownStrideLoad( 627 const LoadInst *Load, VectorValueMapT &ScalarMaps) { 628 int VectorWidth = getVectorWidth(); 629 const Value *Pointer = Load->getPointerOperand(); 630 VectorType *VectorType = VectorType::get( 631 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 632 633 Value *Vector = UndefValue::get(VectorType); 634 635 for (int i = 0; i < VectorWidth; i++) { 636 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 637 Value *ScalarLoad = 638 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 639 Vector = Builder.CreateInsertElement( 640 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 641 } 642 643 return Vector; 644 } 645 646 void VectorBlockGenerator::generateLoad( 647 const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 648 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 649 !VectorType::isValidElementType(Load->getType())) { 650 for (int i = 0; i < getVectorWidth(); i++) 651 ScalarMaps[i][Load] = 652 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i]); 653 return; 654 } 655 656 MemoryAccess &Access = Statement.getAccessFor(Load); 657 658 Value *NewLoad; 659 if (Access.isStrideZero(isl_map_copy(Schedule))) 660 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 661 else if (Access.isStrideOne(isl_map_copy(Schedule))) 662 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 663 else 664 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 665 666 VectorMap[Load] = NewLoad; 667 } 668 669 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 670 ValueMapT &VectorMap, 671 VectorValueMapT &ScalarMaps) { 672 int VectorWidth = getVectorWidth(); 673 Value *NewOperand = 674 getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps); 675 676 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 677 678 const CastInst *Cast = dyn_cast<CastInst>(Inst); 679 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 680 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 681 } 682 683 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 684 ValueMapT &VectorMap, 685 VectorValueMapT &ScalarMaps) { 686 Value *OpZero = Inst->getOperand(0); 687 Value *OpOne = Inst->getOperand(1); 688 689 Value *NewOpZero, *NewOpOne; 690 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps); 691 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps); 692 693 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 694 Inst->getName() + "p_vec"); 695 VectorMap[Inst] = NewInst; 696 } 697 698 void VectorBlockGenerator::copyStore( 699 const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 700 int VectorWidth = getVectorWidth(); 701 702 MemoryAccess &Access = Statement.getAccessFor(Store); 703 704 const Value *Pointer = Store->getPointerOperand(); 705 Value *Vector = 706 getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps); 707 708 if (Access.isStrideOne(isl_map_copy(Schedule))) { 709 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 710 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); 711 712 Value *VectorPtr = 713 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 714 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 715 716 if (!Aligned) 717 Store->setAlignment(8); 718 } else { 719 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 720 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 721 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); 722 Builder.CreateStore(Scalar, NewPointer); 723 } 724 } 725 } 726 727 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 728 ValueMapT &VectorMap) { 729 for (Instruction::const_op_iterator OI = Inst->op_begin(), 730 OE = Inst->op_end(); 731 OI != OE; ++OI) 732 if (VectorMap.count(*OI)) 733 return true; 734 return false; 735 } 736 737 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 738 ValueMapT &VectorMap, 739 VectorValueMapT &ScalarMaps) { 740 bool HasVectorOperand = false; 741 int VectorWidth = getVectorWidth(); 742 743 for (Instruction::const_op_iterator OI = Inst->op_begin(), 744 OE = Inst->op_end(); 745 OI != OE; ++OI) { 746 ValueMapT::iterator VecOp = VectorMap.find(*OI); 747 748 if (VecOp == VectorMap.end()) 749 continue; 750 751 HasVectorOperand = true; 752 Value *NewVector = VecOp->second; 753 754 for (int i = 0; i < VectorWidth; ++i) { 755 ValueMapT &SM = ScalarMaps[i]; 756 757 // If there is one scalar extracted, all scalar elements should have 758 // already been extracted by the code here. So no need to check for the 759 // existance of all of them. 760 if (SM.count(*OI)) 761 break; 762 763 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 764 } 765 } 766 767 return HasVectorOperand; 768 } 769 770 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 771 ValueMapT &VectorMap, 772 VectorValueMapT &ScalarMaps) { 773 bool HasVectorOperand; 774 int VectorWidth = getVectorWidth(); 775 776 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 777 778 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 779 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]); 780 781 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 782 return; 783 784 // Make the result available as vector value. 785 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 786 Value *Vector = UndefValue::get(VectorType); 787 788 for (int i = 0; i < VectorWidth; i++) 789 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 790 Builder.getInt32(i)); 791 792 VectorMap[Inst] = Vector; 793 } 794 795 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 796 797 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 798 ValueMapT &VectorMap, 799 VectorValueMapT &ScalarMaps) { 800 // Terminator instructions control the control flow. They are explicitly 801 // expressed in the clast and do not need to be copied. 802 if (Inst->isTerminator()) 803 return; 804 805 if (isSCEVIgnore(Inst)) 806 return; 807 808 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 809 generateLoad(Load, VectorMap, ScalarMaps); 810 return; 811 } 812 813 if (hasVectorOperands(Inst, VectorMap)) { 814 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 815 copyStore(Store, VectorMap, ScalarMaps); 816 return; 817 } 818 819 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 820 copyUnaryInst(Unary, VectorMap, ScalarMaps); 821 return; 822 } 823 824 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 825 copyBinaryInst(Binary, VectorMap, ScalarMaps); 826 return; 827 } 828 829 // Falltrough: We generate scalar instructions, if we don't know how to 830 // generate vector code. 831 } 832 833 copyInstScalarized(Inst, VectorMap, ScalarMaps); 834 } 835 836 void VectorBlockGenerator::copyBB() { 837 BasicBlock *BB = Statement.getBasicBlock(); 838 BasicBlock *CopyBB = 839 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 840 CopyBB->setName("polly.stmt." + BB->getName()); 841 Builder.SetInsertPoint(CopyBB->begin()); 842 843 // Create two maps that store the mapping from the original instructions of 844 // the old basic block to their copies in the new basic block. Those maps 845 // are basic block local. 846 // 847 // As vector code generation is supported there is one map for scalar values 848 // and one for vector values. 849 // 850 // In case we just do scalar code generation, the vectorMap is not used and 851 // the scalarMap has just one dimension, which contains the mapping. 852 // 853 // In case vector code generation is done, an instruction may either appear 854 // in the vector map once (as it is calculating >vectorwidth< values at a 855 // time. Or (if the values are calculated using scalar operations), it 856 // appears once in every dimension of the scalarMap. 857 VectorValueMapT ScalarBlockMap(getVectorWidth()); 858 ValueMapT VectorBlockMap; 859 860 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 861 ++II) 862 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 863 } 864