1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/set.h" 19 #include "polly/CodeGen/BlockGenerators.h" 20 #include "polly/CodeGen/CodeGeneration.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 35 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 36 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 37 38 static cl::opt<bool, true> 39 SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 40 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), 41 cl::ZeroOrMore, cl::cat(PollyCategory)); 42 43 bool polly::SCEVCodegen; 44 45 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 46 ScalarEvolution *SE, const Region *R) { 47 if (SCEVCodegen) { 48 if (!I || !SE->isSCEVable(I->getType())) 49 return false; 50 51 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 52 if (!isa<SCEVCouldNotCompute>(Scev)) 53 if (!hasScalarDepsInsideRegion(Scev, R)) 54 return true; 55 56 return false; 57 } 58 59 Loop *L = LI->getLoopFor(I->getParent()); 60 return L && I == L->getCanonicalInductionVariable() && R->contains(L); 61 } 62 63 // Helper class to generate memory location. 64 namespace { 65 class IslGenerator { 66 public: 67 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) 68 : Builder(Builder), IVS(IVS) {} 69 Value *generateIslVal(__isl_take isl_val *Val); 70 Value *generateIslAff(__isl_take isl_aff *Aff); 71 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 72 73 private: 74 typedef struct { 75 Value *Result; 76 class IslGenerator *Generator; 77 } IslGenInfo; 78 79 IRBuilder<> &Builder; 80 std::vector<Value *> &IVS; 81 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 82 void *User); 83 }; 84 } 85 86 Value *IslGenerator::generateIslVal(__isl_take isl_val *Val) { 87 Value *IntValue = Builder.getInt(APIntFromVal(Val)); 88 return IntValue; 89 } 90 91 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 92 Value *Result; 93 Value *ConstValue; 94 isl_val *Val; 95 96 Val = isl_aff_get_constant_val(Aff); 97 ConstValue = generateIslVal(Val); 98 Type *Ty = Builder.getInt64Ty(); 99 100 // FIXME: We should give the constant and coefficients the right type. Here 101 // we force it into i64. 102 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 103 104 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 105 106 assert((IVS.size() == NbInputDims) && 107 "The Dimension of Induction Variables must match the dimension of the " 108 "affine space."); 109 110 for (unsigned int i = 0; i < NbInputDims; ++i) { 111 Value *CoefficientValue; 112 Val = isl_aff_get_coefficient_val(Aff, isl_dim_in, i); 113 114 if (isl_val_is_zero(Val)) { 115 isl_val_free(Val); 116 continue; 117 } 118 119 CoefficientValue = generateIslVal(Val); 120 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 121 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 122 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 123 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 124 } 125 126 isl_aff_free(Aff); 127 128 return Result; 129 } 130 131 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 132 __isl_take isl_aff *Aff, void *User) { 133 IslGenInfo *GenInfo = (IslGenInfo *)User; 134 135 assert((GenInfo->Result == NULL) && 136 "Result is already set. Currently only single isl_aff is supported"); 137 assert(isl_set_plain_is_universe(Set) && 138 "Code generation failed because the set is not universe"); 139 140 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 141 142 isl_set_free(Set); 143 return 0; 144 } 145 146 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 147 IslGenInfo User; 148 User.Result = NULL; 149 User.Generator = this; 150 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 151 assert(User.Result && "Code generation for isl_pw_aff failed"); 152 153 isl_pw_aff_free(PwAff); 154 return User.Result; 155 } 156 157 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P) 158 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 159 } 160 161 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 162 ValueMapT &GlobalMap, LoopToScevMapT <S, 163 Loop *L) { 164 // We assume constants never change. 165 // This avoids map lookups for many calls to this function. 166 if (isa<Constant>(Old)) 167 return const_cast<Value *>(Old); 168 169 if (GlobalMap.count(Old)) { 170 Value *New = GlobalMap[Old]; 171 172 if (Old->getType()->getScalarSizeInBits() < 173 New->getType()->getScalarSizeInBits()) 174 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 175 176 return New; 177 } 178 179 if (BBMap.count(Old)) { 180 assert(BBMap[Old] && "BBMap[Old] should not be NULL!"); 181 return BBMap[Old]; 182 } 183 184 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 185 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 186 if (!isa<SCEVCouldNotCompute>(Scev)) { 187 const SCEV *NewScev = apply(Scev, LTS, SE); 188 ValueToValueMap VTV; 189 VTV.insert(BBMap.begin(), BBMap.end()); 190 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 191 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 192 SCEVExpander Expander(SE, "polly"); 193 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 194 Builder.GetInsertPoint()); 195 196 BBMap[Old] = Expanded; 197 return Expanded; 198 } 199 } 200 201 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) { 202 (void)Inst; 203 assert(!Statement.getParent()->getRegion().contains(Inst->getParent()) && 204 "unexpected scalar dependence in region"); 205 } 206 207 // Everything else is probably a scop-constant value defined as global, 208 // function parameter or an instruction not within the scop. 209 return const_cast<Value *>(Old); 210 } 211 212 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 213 ValueMapT &GlobalMap, LoopToScevMapT <S) { 214 Instruction *NewInst = Inst->clone(); 215 216 // Replace old operands with the new ones. 217 for (Instruction::const_op_iterator OI = Inst->op_begin(), 218 OE = Inst->op_end(); 219 OI != OE; ++OI) { 220 Value *OldOperand = *OI; 221 Value *NewOperand = 222 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 223 224 if (!NewOperand) { 225 assert(!isa<StoreInst>(NewInst) && 226 "Store instructions are always needed!"); 227 delete NewInst; 228 return; 229 } 230 231 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 232 } 233 234 Builder.Insert(NewInst); 235 BBMap[Inst] = NewInst; 236 237 if (!NewInst->getType()->isVoidTy()) 238 NewInst->setName("p_" + Inst->getName()); 239 } 240 241 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 242 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 243 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 244 245 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 246 "Only single dimensional access functions supported"); 247 248 std::vector<Value *> IVS; 249 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 250 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 251 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 252 IVS.push_back(NewIV); 253 } 254 255 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 256 IslGenerator IslGen(Builder, IVS); 257 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 258 259 Type *Ty = Builder.getInt64Ty(); 260 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 261 262 std::vector<Value *> IndexArray; 263 Value *NullValue = Constant::getNullValue(Ty); 264 IndexArray.push_back(NullValue); 265 IndexArray.push_back(OffsetValue); 266 return IndexArray; 267 } 268 269 Value *BlockGenerator::getNewAccessOperand( 270 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 271 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 272 std::vector<Value *> IndexArray = getMemoryAccessIndex( 273 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 274 Value *NewOperand = 275 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 276 return NewOperand; 277 } 278 279 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 280 const Value *Pointer, 281 ValueMapT &BBMap, 282 ValueMapT &GlobalMap, 283 LoopToScevMapT <S) { 284 MemoryAccess &Access = Statement.getAccessFor(Inst); 285 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 286 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 287 288 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 289 "Current and new access function use different spaces"); 290 291 Value *NewPointer; 292 293 if (!NewAccessRelation) { 294 NewPointer = 295 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 296 } else { 297 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 298 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 299 GlobalMap, LTS, getLoopForInst(Inst)); 300 } 301 302 isl_map_free(CurrentAccessRelation); 303 isl_map_free(NewAccessRelation); 304 return NewPointer; 305 } 306 307 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 308 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 309 } 310 311 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 312 ValueMapT &BBMap, 313 ValueMapT &GlobalMap, 314 LoopToScevMapT <S) { 315 const Value *Pointer = Load->getPointerOperand(); 316 const Instruction *Inst = dyn_cast<Instruction>(Load); 317 Value *NewPointer = 318 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 319 Value *ScalarLoad = 320 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 321 return ScalarLoad; 322 } 323 324 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 325 ValueMapT &BBMap, 326 ValueMapT &GlobalMap, 327 LoopToScevMapT <S) { 328 const Value *Pointer = Store->getPointerOperand(); 329 Value *NewPointer = 330 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 331 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 332 LTS, getLoopForInst(Store)); 333 334 return Builder.CreateStore(ValueOperand, NewPointer); 335 } 336 337 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 338 ValueMapT &GlobalMap, 339 LoopToScevMapT <S) { 340 // Terminator instructions control the control flow. They are explicitly 341 // expressed in the clast and do not need to be copied. 342 if (Inst->isTerminator()) 343 return; 344 345 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 346 &Statement.getParent()->getRegion())) 347 return; 348 349 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 350 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 351 // Compute NewLoad before its insertion in BBMap to make the insertion 352 // deterministic. 353 BBMap[Load] = NewLoad; 354 return; 355 } 356 357 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 358 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 359 // Compute NewStore before its insertion in BBMap to make the insertion 360 // deterministic. 361 BBMap[Store] = NewStore; 362 return; 363 } 364 365 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 366 } 367 368 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 369 BasicBlock *BB = Statement.getBasicBlock(); 370 BasicBlock *CopyBB = 371 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 372 CopyBB->setName("polly.stmt." + BB->getName()); 373 Builder.SetInsertPoint(CopyBB->begin()); 374 375 ValueMapT BBMap; 376 377 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 378 ++II) 379 copyInstruction(II, BBMap, GlobalMap, LTS); 380 } 381 382 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 383 VectorValueMapT &GlobalMaps, 384 std::vector<LoopToScevMapT> &VLTS, 385 ScopStmt &Stmt, 386 __isl_keep isl_map *Schedule, 387 Pass *P) 388 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 389 Schedule(Schedule) { 390 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 391 assert(Schedule && "No statement domain provided"); 392 } 393 394 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 395 ValueMapT &VectorMap, 396 VectorValueMapT &ScalarMaps, 397 Loop *L) { 398 if (VectorMap.count(Old)) 399 return VectorMap[Old]; 400 401 int Width = getVectorWidth(); 402 403 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 404 405 for (int Lane = 0; Lane < Width; Lane++) 406 Vector = Builder.CreateInsertElement( 407 Vector, 408 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 409 Builder.getInt32(Lane)); 410 411 VectorMap[Old] = Vector; 412 413 return Vector; 414 } 415 416 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 417 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 418 assert(PointerTy && "PointerType expected"); 419 420 Type *ScalarType = PointerTy->getElementType(); 421 VectorType *VectorType = VectorType::get(ScalarType, Width); 422 423 return PointerType::getUnqual(VectorType); 424 } 425 426 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 427 ValueMapT &BBMap) { 428 const Value *Pointer = Load->getPointerOperand(); 429 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 430 Value *NewPointer = 431 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 432 Value *VectorPtr = 433 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 434 LoadInst *VecLoad = 435 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 436 if (!Aligned) 437 VecLoad->setAlignment(8); 438 439 return VecLoad; 440 } 441 442 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 443 ValueMapT &BBMap) { 444 const Value *Pointer = Load->getPointerOperand(); 445 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 446 Value *NewPointer = 447 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 448 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 449 Load->getName() + "_p_vec_p"); 450 LoadInst *ScalarLoad = 451 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 452 453 if (!Aligned) 454 ScalarLoad->setAlignment(8); 455 456 Constant *SplatVector = Constant::getNullValue( 457 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 458 459 Value *VectorLoad = Builder.CreateShuffleVector( 460 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 461 return VectorLoad; 462 } 463 464 Value * 465 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 466 VectorValueMapT &ScalarMaps) { 467 int VectorWidth = getVectorWidth(); 468 const Value *Pointer = Load->getPointerOperand(); 469 VectorType *VectorType = VectorType::get( 470 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 471 472 Value *Vector = UndefValue::get(VectorType); 473 474 for (int i = 0; i < VectorWidth; i++) { 475 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 476 VLTS[i], getLoopForInst(Load)); 477 Value *ScalarLoad = 478 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 479 Vector = Builder.CreateInsertElement( 480 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 481 } 482 483 return Vector; 484 } 485 486 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 487 ValueMapT &VectorMap, 488 VectorValueMapT &ScalarMaps) { 489 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 490 !VectorType::isValidElementType(Load->getType())) { 491 for (int i = 0; i < getVectorWidth(); i++) 492 ScalarMaps[i][Load] = 493 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 494 return; 495 } 496 497 MemoryAccess &Access = Statement.getAccessFor(Load); 498 499 Value *NewLoad; 500 if (Access.isStrideZero(isl_map_copy(Schedule))) 501 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 502 else if (Access.isStrideOne(isl_map_copy(Schedule))) 503 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 504 else 505 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 506 507 VectorMap[Load] = NewLoad; 508 } 509 510 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 511 ValueMapT &VectorMap, 512 VectorValueMapT &ScalarMaps) { 513 int VectorWidth = getVectorWidth(); 514 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 515 getLoopForInst(Inst)); 516 517 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 518 519 const CastInst *Cast = dyn_cast<CastInst>(Inst); 520 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 521 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 522 } 523 524 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 525 ValueMapT &VectorMap, 526 VectorValueMapT &ScalarMaps) { 527 Loop *L = getLoopForInst(Inst); 528 Value *OpZero = Inst->getOperand(0); 529 Value *OpOne = Inst->getOperand(1); 530 531 Value *NewOpZero, *NewOpOne; 532 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 533 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 534 535 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 536 Inst->getName() + "p_vec"); 537 VectorMap[Inst] = NewInst; 538 } 539 540 void VectorBlockGenerator::copyStore(const StoreInst *Store, 541 ValueMapT &VectorMap, 542 VectorValueMapT &ScalarMaps) { 543 int VectorWidth = getVectorWidth(); 544 545 MemoryAccess &Access = Statement.getAccessFor(Store); 546 547 const Value *Pointer = Store->getPointerOperand(); 548 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 549 ScalarMaps, getLoopForInst(Store)); 550 551 if (Access.isStrideOne(isl_map_copy(Schedule))) { 552 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 553 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 554 VLTS[0], getLoopForInst(Store)); 555 556 Value *VectorPtr = 557 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 558 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 559 560 if (!Aligned) 561 Store->setAlignment(8); 562 } else { 563 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 564 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 565 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 566 VLTS[i], getLoopForInst(Store)); 567 Builder.CreateStore(Scalar, NewPointer); 568 } 569 } 570 } 571 572 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 573 ValueMapT &VectorMap) { 574 for (Instruction::const_op_iterator OI = Inst->op_begin(), 575 OE = Inst->op_end(); 576 OI != OE; ++OI) 577 if (VectorMap.count(*OI)) 578 return true; 579 return false; 580 } 581 582 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 583 ValueMapT &VectorMap, 584 VectorValueMapT &ScalarMaps) { 585 bool HasVectorOperand = false; 586 int VectorWidth = getVectorWidth(); 587 588 for (Instruction::const_op_iterator OI = Inst->op_begin(), 589 OE = Inst->op_end(); 590 OI != OE; ++OI) { 591 ValueMapT::iterator VecOp = VectorMap.find(*OI); 592 593 if (VecOp == VectorMap.end()) 594 continue; 595 596 HasVectorOperand = true; 597 Value *NewVector = VecOp->second; 598 599 for (int i = 0; i < VectorWidth; ++i) { 600 ValueMapT &SM = ScalarMaps[i]; 601 602 // If there is one scalar extracted, all scalar elements should have 603 // already been extracted by the code here. So no need to check for the 604 // existance of all of them. 605 if (SM.count(*OI)) 606 break; 607 608 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 609 } 610 } 611 612 return HasVectorOperand; 613 } 614 615 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 616 ValueMapT &VectorMap, 617 VectorValueMapT &ScalarMaps) { 618 bool HasVectorOperand; 619 int VectorWidth = getVectorWidth(); 620 621 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 622 623 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 624 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 625 VLTS[VectorLane]); 626 627 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 628 return; 629 630 // Make the result available as vector value. 631 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 632 Value *Vector = UndefValue::get(VectorType); 633 634 for (int i = 0; i < VectorWidth; i++) 635 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 636 Builder.getInt32(i)); 637 638 VectorMap[Inst] = Vector; 639 } 640 641 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 642 643 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 644 ValueMapT &VectorMap, 645 VectorValueMapT &ScalarMaps) { 646 // Terminator instructions control the control flow. They are explicitly 647 // expressed in the clast and do not need to be copied. 648 if (Inst->isTerminator()) 649 return; 650 651 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 652 &Statement.getParent()->getRegion())) 653 return; 654 655 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 656 generateLoad(Load, VectorMap, ScalarMaps); 657 return; 658 } 659 660 if (hasVectorOperands(Inst, VectorMap)) { 661 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 662 copyStore(Store, VectorMap, ScalarMaps); 663 return; 664 } 665 666 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 667 copyUnaryInst(Unary, VectorMap, ScalarMaps); 668 return; 669 } 670 671 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 672 copyBinaryInst(Binary, VectorMap, ScalarMaps); 673 return; 674 } 675 676 // Falltrough: We generate scalar instructions, if we don't know how to 677 // generate vector code. 678 } 679 680 copyInstScalarized(Inst, VectorMap, ScalarMaps); 681 } 682 683 void VectorBlockGenerator::copyBB() { 684 BasicBlock *BB = Statement.getBasicBlock(); 685 BasicBlock *CopyBB = 686 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 687 CopyBB->setName("polly.stmt." + BB->getName()); 688 Builder.SetInsertPoint(CopyBB->begin()); 689 690 // Create two maps that store the mapping from the original instructions of 691 // the old basic block to their copies in the new basic block. Those maps 692 // are basic block local. 693 // 694 // As vector code generation is supported there is one map for scalar values 695 // and one for vector values. 696 // 697 // In case we just do scalar code generation, the vectorMap is not used and 698 // the scalarMap has just one dimension, which contains the mapping. 699 // 700 // In case vector code generation is done, an instruction may either appear 701 // in the vector map once (as it is calculating >vectorwidth< values at a 702 // time. Or (if the values are calculated using scalar operations), it 703 // appears once in every dimension of the scalarMap. 704 VectorValueMapT ScalarBlockMap(getVectorWidth()); 705 ValueMapT VectorBlockMap; 706 707 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 708 ++II) 709 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 710 } 711