1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/BlockGenerators.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/SCEVValidator.h" 21 #include "polly/Support/ScopHelper.h" 22 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/ScalarEvolution.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 #include "llvm/Support/CommandLine.h" 28 29 #include "isl/aff.h" 30 #include "isl/set.h" 31 32 using namespace llvm; 33 using namespace polly; 34 35 static cl::opt<bool> 36 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 37 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 38 cl::init(false), cl::ZeroOrMore); 39 40 static cl::opt<bool, true> SCEVCodegenF( 41 "polly-codegen-scev", cl::desc("Use SCEV based code generation."), 42 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore); 43 44 bool polly::SCEVCodegen; 45 46 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 47 ScalarEvolution *SE, const Region *R) { 48 if (SCEVCodegen) { 49 if (!I || !SE->isSCEVable(I->getType())) 50 return false; 51 52 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 53 if (!isa<SCEVCouldNotCompute>(Scev)) 54 if (!hasScalarDepsInsideRegion(Scev, R)) 55 return true; 56 57 return false; 58 } 59 60 Loop *L = LI->getLoopFor(I->getParent()); 61 return L && I == L->getCanonicalInductionVariable(); 62 } 63 64 // Helper class to generate memory location. 65 namespace { 66 class IslGenerator { 67 public: 68 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) 69 : Builder(Builder), IVS(IVS) {} 70 Value *generateIslInt(__isl_take isl_int Int); 71 Value *generateIslAff(__isl_take isl_aff *Aff); 72 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 73 74 private: 75 typedef struct { 76 Value *Result; 77 class IslGenerator *Generator; 78 } IslGenInfo; 79 80 IRBuilder<> &Builder; 81 std::vector<Value *> &IVS; 82 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 83 void *User); 84 }; 85 } 86 87 Value *IslGenerator::generateIslInt(isl_int Int) { 88 mpz_t IntMPZ; 89 mpz_init(IntMPZ); 90 isl_int_get_gmp(Int, IntMPZ); 91 Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ)); 92 mpz_clear(IntMPZ); 93 return IntValue; 94 } 95 96 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 97 Value *Result; 98 Value *ConstValue; 99 isl_int ConstIsl; 100 101 isl_int_init(ConstIsl); 102 isl_aff_get_constant(Aff, &ConstIsl); 103 ConstValue = generateIslInt(ConstIsl); 104 Type *Ty = Builder.getInt64Ty(); 105 106 // FIXME: We should give the constant and coefficients the right type. Here 107 // we force it into i64. 108 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 109 110 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 111 112 assert((IVS.size() == NbInputDims) && 113 "The Dimension of Induction Variables must match the dimension of the " 114 "affine space."); 115 116 isl_int CoefficientIsl; 117 isl_int_init(CoefficientIsl); 118 119 for (unsigned int i = 0; i < NbInputDims; ++i) { 120 Value *CoefficientValue; 121 isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl); 122 123 if (isl_int_is_zero(CoefficientIsl)) 124 continue; 125 126 CoefficientValue = generateIslInt(CoefficientIsl); 127 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 128 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 129 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 130 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 131 } 132 133 isl_int_clear(CoefficientIsl); 134 isl_int_clear(ConstIsl); 135 isl_aff_free(Aff); 136 137 return Result; 138 } 139 140 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 141 __isl_take isl_aff *Aff, void *User) { 142 IslGenInfo *GenInfo = (IslGenInfo *)User; 143 144 assert((GenInfo->Result == NULL) && 145 "Result is already set. Currently only single isl_aff is supported"); 146 assert(isl_set_plain_is_universe(Set) && 147 "Code generation failed because the set is not universe"); 148 149 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 150 151 isl_set_free(Set); 152 return 0; 153 } 154 155 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 156 IslGenInfo User; 157 User.Result = NULL; 158 User.Generator = this; 159 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 160 assert(User.Result && "Code generation for isl_pw_aff failed"); 161 162 isl_pw_aff_free(PwAff); 163 return User.Result; 164 } 165 166 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P) 167 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 168 } 169 170 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 171 ValueMapT &GlobalMap, LoopToScevMapT <S, 172 Loop *L) { 173 // We assume constants never change. 174 // This avoids map lookups for many calls to this function. 175 if (isa<Constant>(Old)) 176 return const_cast<Value *>(Old); 177 178 if (GlobalMap.count(Old)) { 179 Value *New = GlobalMap[Old]; 180 181 if (Old->getType()->getScalarSizeInBits() < 182 New->getType()->getScalarSizeInBits()) 183 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 184 185 return New; 186 } 187 188 if (BBMap.count(Old)) { 189 return BBMap[Old]; 190 } 191 192 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 193 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 194 if (!isa<SCEVCouldNotCompute>(Scev)) { 195 const SCEV *NewScev = apply(Scev, LTS, SE); 196 ValueToValueMap VTV; 197 VTV.insert(BBMap.begin(), BBMap.end()); 198 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 199 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 200 SCEVExpander Expander(SE, "polly"); 201 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 202 Builder.GetInsertPoint()); 203 204 BBMap[Old] = Expanded; 205 return Expanded; 206 } 207 } 208 209 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) { 210 (void) Inst; 211 assert(!Statement.getParent()->getRegion().contains(Inst->getParent()) && 212 "unexpected scalar dependence in region"); 213 } 214 215 // Everything else is probably a scop-constant value defined as global, 216 // function parameter or an instruction not within the scop. 217 return const_cast<Value *>(Old); 218 } 219 220 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 221 ValueMapT &GlobalMap, LoopToScevMapT <S) { 222 Instruction *NewInst = Inst->clone(); 223 224 // Replace old operands with the new ones. 225 for (Instruction::const_op_iterator OI = Inst->op_begin(), 226 OE = Inst->op_end(); 227 OI != OE; ++OI) { 228 Value *OldOperand = *OI; 229 Value *NewOperand = 230 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 231 232 if (!NewOperand) { 233 assert(!isa<StoreInst>(NewInst) && 234 "Store instructions are always needed!"); 235 delete NewInst; 236 return; 237 } 238 239 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 240 } 241 242 Builder.Insert(NewInst); 243 BBMap[Inst] = NewInst; 244 245 if (!NewInst->getType()->isVoidTy()) 246 NewInst->setName("p_" + Inst->getName()); 247 } 248 249 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 250 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 251 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 252 253 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 254 "Only single dimensional access functions supported"); 255 256 std::vector<Value *> IVS; 257 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 258 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 259 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 260 IVS.push_back(NewIV); 261 } 262 263 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 264 IslGenerator IslGen(Builder, IVS); 265 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 266 267 Type *Ty = Builder.getInt64Ty(); 268 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 269 270 std::vector<Value *> IndexArray; 271 Value *NullValue = Constant::getNullValue(Ty); 272 IndexArray.push_back(NullValue); 273 IndexArray.push_back(OffsetValue); 274 return IndexArray; 275 } 276 277 Value *BlockGenerator::getNewAccessOperand( 278 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 279 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 280 std::vector<Value *> IndexArray = getMemoryAccessIndex( 281 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 282 Value *NewOperand = 283 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 284 return NewOperand; 285 } 286 287 Value *BlockGenerator::generateLocationAccessed( 288 const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap, 289 ValueMapT &GlobalMap, LoopToScevMapT <S) { 290 MemoryAccess &Access = Statement.getAccessFor(Inst); 291 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 292 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 293 294 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 295 "Current and new access function use different spaces"); 296 297 Value *NewPointer; 298 299 if (!NewAccessRelation) { 300 NewPointer = 301 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 302 } else { 303 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 304 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 305 GlobalMap, LTS, getLoopForInst(Inst)); 306 } 307 308 isl_map_free(CurrentAccessRelation); 309 isl_map_free(NewAccessRelation); 310 return NewPointer; 311 } 312 313 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 314 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 315 } 316 317 Value * 318 BlockGenerator::generateScalarLoad(const LoadInst *Load, ValueMapT &BBMap, 319 ValueMapT &GlobalMap, LoopToScevMapT <S) { 320 const Value *Pointer = Load->getPointerOperand(); 321 const Instruction *Inst = dyn_cast<Instruction>(Load); 322 Value *NewPointer = 323 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 324 Value *ScalarLoad = 325 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 326 return ScalarLoad; 327 } 328 329 Value * 330 BlockGenerator::generateScalarStore(const StoreInst *Store, ValueMapT &BBMap, 331 ValueMapT &GlobalMap, LoopToScevMapT <S) { 332 const Value *Pointer = Store->getPointerOperand(); 333 Value *NewPointer = 334 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 335 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 336 LTS, getLoopForInst(Store)); 337 338 return Builder.CreateStore(ValueOperand, NewPointer); 339 } 340 341 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 342 ValueMapT &GlobalMap, 343 LoopToScevMapT <S) { 344 // Terminator instructions control the control flow. They are explicitly 345 // expressed in the clast and do not need to be copied. 346 if (Inst->isTerminator()) 347 return; 348 349 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 350 &Statement.getParent()->getRegion())) 351 return; 352 353 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 354 BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 355 return; 356 } 357 358 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 359 BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap, LTS); 360 return; 361 } 362 363 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 364 } 365 366 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 367 BasicBlock *BB = Statement.getBasicBlock(); 368 BasicBlock *CopyBB = 369 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 370 CopyBB->setName("polly.stmt." + BB->getName()); 371 Builder.SetInsertPoint(CopyBB->begin()); 372 373 ValueMapT BBMap; 374 375 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 376 ++II) 377 copyInstruction(II, BBMap, GlobalMap, LTS); 378 } 379 380 VectorBlockGenerator::VectorBlockGenerator( 381 IRBuilder<> &B, VectorValueMapT &GlobalMaps, 382 std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt, 383 __isl_keep isl_map *Schedule, Pass *P) 384 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 385 Schedule(Schedule) { 386 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 387 assert(Schedule && "No statement domain provided"); 388 } 389 390 Value * 391 VectorBlockGenerator::getVectorValue(const Value *Old, ValueMapT &VectorMap, 392 VectorValueMapT &ScalarMaps, Loop *L) { 393 if (VectorMap.count(Old)) 394 return VectorMap[Old]; 395 396 int Width = getVectorWidth(); 397 398 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 399 400 for (int Lane = 0; Lane < Width; Lane++) 401 Vector = Builder.CreateInsertElement( 402 Vector, 403 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 404 Builder.getInt32(Lane)); 405 406 VectorMap[Old] = Vector; 407 408 return Vector; 409 } 410 411 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 412 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 413 assert(PointerTy && "PointerType expected"); 414 415 Type *ScalarType = PointerTy->getElementType(); 416 VectorType *VectorType = VectorType::get(ScalarType, Width); 417 418 return PointerType::getUnqual(VectorType); 419 } 420 421 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 422 ValueMapT &BBMap) { 423 const Value *Pointer = Load->getPointerOperand(); 424 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 425 Value *NewPointer = 426 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 427 Value *VectorPtr = 428 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 429 LoadInst *VecLoad = 430 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 431 if (!Aligned) 432 VecLoad->setAlignment(8); 433 434 return VecLoad; 435 } 436 437 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 438 ValueMapT &BBMap) { 439 const Value *Pointer = Load->getPointerOperand(); 440 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 441 Value *NewPointer = 442 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 443 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 444 Load->getName() + "_p_vec_p"); 445 LoadInst *ScalarLoad = 446 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 447 448 if (!Aligned) 449 ScalarLoad->setAlignment(8); 450 451 Constant *SplatVector = Constant::getNullValue( 452 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 453 454 Value *VectorLoad = Builder.CreateShuffleVector( 455 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 456 return VectorLoad; 457 } 458 459 Value *VectorBlockGenerator::generateUnknownStrideLoad( 460 const LoadInst *Load, VectorValueMapT &ScalarMaps) { 461 int VectorWidth = getVectorWidth(); 462 const Value *Pointer = Load->getPointerOperand(); 463 VectorType *VectorType = VectorType::get( 464 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 465 466 Value *Vector = UndefValue::get(VectorType); 467 468 for (int i = 0; i < VectorWidth; i++) { 469 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 470 VLTS[i], getLoopForInst(Load)); 471 Value *ScalarLoad = 472 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 473 Vector = Builder.CreateInsertElement( 474 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 475 } 476 477 return Vector; 478 } 479 480 void VectorBlockGenerator::generateLoad( 481 const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 482 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 483 !VectorType::isValidElementType(Load->getType())) { 484 for (int i = 0; i < getVectorWidth(); i++) 485 ScalarMaps[i][Load] = 486 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 487 return; 488 } 489 490 MemoryAccess &Access = Statement.getAccessFor(Load); 491 492 Value *NewLoad; 493 if (Access.isStrideZero(isl_map_copy(Schedule))) 494 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 495 else if (Access.isStrideOne(isl_map_copy(Schedule))) 496 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 497 else 498 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 499 500 VectorMap[Load] = NewLoad; 501 } 502 503 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 504 ValueMapT &VectorMap, 505 VectorValueMapT &ScalarMaps) { 506 int VectorWidth = getVectorWidth(); 507 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 508 getLoopForInst(Inst)); 509 510 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 511 512 const CastInst *Cast = dyn_cast<CastInst>(Inst); 513 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 514 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 515 } 516 517 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 518 ValueMapT &VectorMap, 519 VectorValueMapT &ScalarMaps) { 520 Loop *L = getLoopForInst(Inst); 521 Value *OpZero = Inst->getOperand(0); 522 Value *OpOne = Inst->getOperand(1); 523 524 Value *NewOpZero, *NewOpOne; 525 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 526 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 527 528 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 529 Inst->getName() + "p_vec"); 530 VectorMap[Inst] = NewInst; 531 } 532 533 void VectorBlockGenerator::copyStore( 534 const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { 535 int VectorWidth = getVectorWidth(); 536 537 MemoryAccess &Access = Statement.getAccessFor(Store); 538 539 const Value *Pointer = Store->getPointerOperand(); 540 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 541 ScalarMaps, getLoopForInst(Store)); 542 543 if (Access.isStrideOne(isl_map_copy(Schedule))) { 544 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 545 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 546 VLTS[0], getLoopForInst(Store)); 547 548 Value *VectorPtr = 549 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 550 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 551 552 if (!Aligned) 553 Store->setAlignment(8); 554 } else { 555 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 556 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 557 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 558 VLTS[i], getLoopForInst(Store)); 559 Builder.CreateStore(Scalar, NewPointer); 560 } 561 } 562 } 563 564 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 565 ValueMapT &VectorMap) { 566 for (Instruction::const_op_iterator OI = Inst->op_begin(), 567 OE = Inst->op_end(); 568 OI != OE; ++OI) 569 if (VectorMap.count(*OI)) 570 return true; 571 return false; 572 } 573 574 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 575 ValueMapT &VectorMap, 576 VectorValueMapT &ScalarMaps) { 577 bool HasVectorOperand = false; 578 int VectorWidth = getVectorWidth(); 579 580 for (Instruction::const_op_iterator OI = Inst->op_begin(), 581 OE = Inst->op_end(); 582 OI != OE; ++OI) { 583 ValueMapT::iterator VecOp = VectorMap.find(*OI); 584 585 if (VecOp == VectorMap.end()) 586 continue; 587 588 HasVectorOperand = true; 589 Value *NewVector = VecOp->second; 590 591 for (int i = 0; i < VectorWidth; ++i) { 592 ValueMapT &SM = ScalarMaps[i]; 593 594 // If there is one scalar extracted, all scalar elements should have 595 // already been extracted by the code here. So no need to check for the 596 // existance of all of them. 597 if (SM.count(*OI)) 598 break; 599 600 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 601 } 602 } 603 604 return HasVectorOperand; 605 } 606 607 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 608 ValueMapT &VectorMap, 609 VectorValueMapT &ScalarMaps) { 610 bool HasVectorOperand; 611 int VectorWidth = getVectorWidth(); 612 613 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 614 615 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 616 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 617 VLTS[VectorLane]); 618 619 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 620 return; 621 622 // Make the result available as vector value. 623 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 624 Value *Vector = UndefValue::get(VectorType); 625 626 for (int i = 0; i < VectorWidth; i++) 627 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 628 Builder.getInt32(i)); 629 630 VectorMap[Inst] = Vector; 631 } 632 633 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 634 635 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 636 ValueMapT &VectorMap, 637 VectorValueMapT &ScalarMaps) { 638 // Terminator instructions control the control flow. They are explicitly 639 // expressed in the clast and do not need to be copied. 640 if (Inst->isTerminator()) 641 return; 642 643 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 644 &Statement.getParent()->getRegion())) 645 return; 646 647 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 648 generateLoad(Load, VectorMap, ScalarMaps); 649 return; 650 } 651 652 if (hasVectorOperands(Inst, VectorMap)) { 653 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 654 copyStore(Store, VectorMap, ScalarMaps); 655 return; 656 } 657 658 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 659 copyUnaryInst(Unary, VectorMap, ScalarMaps); 660 return; 661 } 662 663 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 664 copyBinaryInst(Binary, VectorMap, ScalarMaps); 665 return; 666 } 667 668 // Falltrough: We generate scalar instructions, if we don't know how to 669 // generate vector code. 670 } 671 672 copyInstScalarized(Inst, VectorMap, ScalarMaps); 673 } 674 675 void VectorBlockGenerator::copyBB() { 676 BasicBlock *BB = Statement.getBasicBlock(); 677 BasicBlock *CopyBB = 678 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 679 CopyBB->setName("polly.stmt." + BB->getName()); 680 Builder.SetInsertPoint(CopyBB->begin()); 681 682 // Create two maps that store the mapping from the original instructions of 683 // the old basic block to their copies in the new basic block. Those maps 684 // are basic block local. 685 // 686 // As vector code generation is supported there is one map for scalar values 687 // and one for vector values. 688 // 689 // In case we just do scalar code generation, the vectorMap is not used and 690 // the scalarMap has just one dimension, which contains the mapping. 691 // 692 // In case vector code generation is done, an instruction may either appear 693 // in the vector map once (as it is calculating >vectorwidth< values at a 694 // time. Or (if the values are calculated using scalar operations), it 695 // appears once in every dimension of the scalarMap. 696 VectorValueMapT ScalarBlockMap(getVectorWidth()); 697 ValueMapT VectorBlockMap; 698 699 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 700 ++II) 701 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 702 } 703