1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/set.h" 19 #include "polly/CodeGen/BlockGenerators.h" 20 #include "polly/CodeGen/CodeGeneration.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 29 30 using namespace llvm; 31 using namespace polly; 32 33 static cl::opt<bool> 34 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), 35 cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), 36 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 37 38 static cl::opt<bool, true> 39 SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), 40 cl::Hidden, cl::location(SCEVCodegen), cl::init(false), 41 cl::ZeroOrMore, cl::cat(PollyCategory)); 42 43 bool polly::SCEVCodegen; 44 45 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 46 ScalarEvolution *SE, const Region *R) { 47 if (SCEVCodegen) { 48 if (!I || !SE->isSCEVable(I->getType())) 49 return false; 50 51 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 52 if (!isa<SCEVCouldNotCompute>(Scev)) 53 if (!hasScalarDepsInsideRegion(Scev, R)) 54 return true; 55 56 return false; 57 } 58 59 Loop *L = LI->getLoopFor(I->getParent()); 60 return L && I == L->getCanonicalInductionVariable() && R->contains(L); 61 } 62 63 // Helper class to generate memory location. 64 namespace { 65 class IslGenerator { 66 public: 67 IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) 68 : Builder(Builder), IVS(IVS) {} 69 Value *generateIslVal(__isl_take isl_val *Val); 70 Value *generateIslAff(__isl_take isl_aff *Aff); 71 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 72 73 private: 74 typedef struct { 75 Value *Result; 76 class IslGenerator *Generator; 77 } IslGenInfo; 78 79 IRBuilder<> &Builder; 80 std::vector<Value *> &IVS; 81 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 82 void *User); 83 }; 84 } 85 86 Value *IslGenerator::generateIslVal(__isl_take isl_val *Val) { 87 Value *IntValue = Builder.getInt(APIntFromVal(Val)); 88 return IntValue; 89 } 90 91 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 92 Value *Result; 93 Value *ConstValue; 94 isl_val *Val; 95 96 Val = isl_aff_get_constant_val(Aff); 97 ConstValue = generateIslVal(Val); 98 Type *Ty = Builder.getInt64Ty(); 99 100 // FIXME: We should give the constant and coefficients the right type. Here 101 // we force it into i64. 102 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 103 104 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 105 106 assert((IVS.size() == NbInputDims) && 107 "The Dimension of Induction Variables must match the dimension of the " 108 "affine space."); 109 110 for (unsigned int i = 0; i < NbInputDims; ++i) { 111 Value *CoefficientValue; 112 Val = isl_aff_get_coefficient_val(Aff, isl_dim_in, i); 113 114 if (isl_val_is_zero(Val)) { 115 isl_val_free(Val); 116 continue; 117 } 118 119 CoefficientValue = generateIslVal(Val); 120 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 121 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 122 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 123 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 124 } 125 126 isl_aff_free(Aff); 127 128 return Result; 129 } 130 131 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 132 __isl_take isl_aff *Aff, void *User) { 133 IslGenInfo *GenInfo = (IslGenInfo *)User; 134 135 assert((GenInfo->Result == NULL) && 136 "Result is already set. Currently only single isl_aff is supported"); 137 assert(isl_set_plain_is_universe(Set) && 138 "Code generation failed because the set is not universe"); 139 140 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 141 142 isl_set_free(Set); 143 return 0; 144 } 145 146 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 147 IslGenInfo User; 148 User.Result = NULL; 149 User.Generator = this; 150 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 151 assert(User.Result && "Code generation for isl_pw_aff failed"); 152 153 isl_pw_aff_free(PwAff); 154 return User.Result; 155 } 156 157 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P) 158 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 159 } 160 161 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 162 ValueMapT &GlobalMap, LoopToScevMapT <S, 163 Loop *L) { 164 // We assume constants never change. 165 // This avoids map lookups for many calls to this function. 166 if (isa<Constant>(Old)) 167 return const_cast<Value *>(Old); 168 169 if (Value *New = GlobalMap.lookup(Old)) { 170 if (Old->getType()->getScalarSizeInBits() < 171 New->getType()->getScalarSizeInBits()) 172 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 173 174 return New; 175 } 176 177 if (Value *New = BBMap.lookup(Old)) 178 return New; 179 180 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 181 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 182 if (!isa<SCEVCouldNotCompute>(Scev)) { 183 const SCEV *NewScev = apply(Scev, LTS, SE); 184 ValueToValueMap VTV; 185 VTV.insert(BBMap.begin(), BBMap.end()); 186 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 187 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 188 SCEVExpander Expander(SE, "polly"); 189 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 190 Builder.GetInsertPoint()); 191 192 BBMap[Old] = Expanded; 193 return Expanded; 194 } 195 } 196 197 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) { 198 (void)Inst; 199 assert(!Statement.getParent()->getRegion().contains(Inst->getParent()) && 200 "unexpected scalar dependence in region"); 201 } 202 203 // Everything else is probably a scop-constant value defined as global, 204 // function parameter or an instruction not within the scop. 205 return const_cast<Value *>(Old); 206 } 207 208 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 209 ValueMapT &GlobalMap, LoopToScevMapT <S) { 210 Instruction *NewInst = Inst->clone(); 211 212 // Replace old operands with the new ones. 213 for (Instruction::const_op_iterator OI = Inst->op_begin(), 214 OE = Inst->op_end(); 215 OI != OE; ++OI) { 216 Value *OldOperand = *OI; 217 Value *NewOperand = 218 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 219 220 if (!NewOperand) { 221 assert(!isa<StoreInst>(NewInst) && 222 "Store instructions are always needed!"); 223 delete NewInst; 224 return; 225 } 226 227 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 228 } 229 230 Builder.Insert(NewInst); 231 BBMap[Inst] = NewInst; 232 233 if (!NewInst->getType()->isVoidTy()) 234 NewInst->setName("p_" + Inst->getName()); 235 } 236 237 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 238 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 239 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 240 241 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 242 "Only single dimensional access functions supported"); 243 244 std::vector<Value *> IVS; 245 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 246 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 247 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 248 IVS.push_back(NewIV); 249 } 250 251 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 252 IslGenerator IslGen(Builder, IVS); 253 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 254 255 Type *Ty = Builder.getInt64Ty(); 256 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 257 258 std::vector<Value *> IndexArray; 259 Value *NullValue = Constant::getNullValue(Ty); 260 IndexArray.push_back(NullValue); 261 IndexArray.push_back(OffsetValue); 262 return IndexArray; 263 } 264 265 Value *BlockGenerator::getNewAccessOperand( 266 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 267 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 268 std::vector<Value *> IndexArray = getMemoryAccessIndex( 269 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 270 Value *NewOperand = 271 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 272 return NewOperand; 273 } 274 275 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 276 const Value *Pointer, 277 ValueMapT &BBMap, 278 ValueMapT &GlobalMap, 279 LoopToScevMapT <S) { 280 const MemoryAccess &Access = Statement.getAccessFor(Inst); 281 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 282 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 283 284 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 285 "Current and new access function use different spaces"); 286 287 Value *NewPointer; 288 289 if (!NewAccessRelation) { 290 NewPointer = 291 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 292 } else { 293 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 294 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 295 GlobalMap, LTS, getLoopForInst(Inst)); 296 } 297 298 isl_map_free(CurrentAccessRelation); 299 isl_map_free(NewAccessRelation); 300 return NewPointer; 301 } 302 303 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 304 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 305 } 306 307 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 308 ValueMapT &BBMap, 309 ValueMapT &GlobalMap, 310 LoopToScevMapT <S) { 311 const Value *Pointer = Load->getPointerOperand(); 312 const Instruction *Inst = dyn_cast<Instruction>(Load); 313 Value *NewPointer = 314 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 315 Value *ScalarLoad = 316 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 317 return ScalarLoad; 318 } 319 320 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 321 ValueMapT &BBMap, 322 ValueMapT &GlobalMap, 323 LoopToScevMapT <S) { 324 const Value *Pointer = Store->getPointerOperand(); 325 Value *NewPointer = 326 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 327 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 328 LTS, getLoopForInst(Store)); 329 330 return Builder.CreateStore(ValueOperand, NewPointer); 331 } 332 333 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 334 ValueMapT &GlobalMap, 335 LoopToScevMapT <S) { 336 // Terminator instructions control the control flow. They are explicitly 337 // expressed in the clast and do not need to be copied. 338 if (Inst->isTerminator()) 339 return; 340 341 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 342 &Statement.getParent()->getRegion())) 343 return; 344 345 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 346 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 347 // Compute NewLoad before its insertion in BBMap to make the insertion 348 // deterministic. 349 BBMap[Load] = NewLoad; 350 return; 351 } 352 353 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 354 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 355 // Compute NewStore before its insertion in BBMap to make the insertion 356 // deterministic. 357 BBMap[Store] = NewStore; 358 return; 359 } 360 361 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 362 } 363 364 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 365 BasicBlock *BB = Statement.getBasicBlock(); 366 BasicBlock *CopyBB = 367 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 368 CopyBB->setName("polly.stmt." + BB->getName()); 369 Builder.SetInsertPoint(CopyBB->begin()); 370 371 ValueMapT BBMap; 372 373 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 374 ++II) 375 copyInstruction(II, BBMap, GlobalMap, LTS); 376 } 377 378 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, 379 VectorValueMapT &GlobalMaps, 380 std::vector<LoopToScevMapT> &VLTS, 381 ScopStmt &Stmt, 382 __isl_keep isl_map *Schedule, 383 Pass *P) 384 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 385 Schedule(Schedule) { 386 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 387 assert(Schedule && "No statement domain provided"); 388 } 389 390 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 391 ValueMapT &VectorMap, 392 VectorValueMapT &ScalarMaps, 393 Loop *L) { 394 if (Value *NewValue = VectorMap.lookup(Old)) 395 return NewValue; 396 397 int Width = getVectorWidth(); 398 399 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 400 401 for (int Lane = 0; Lane < Width; Lane++) 402 Vector = Builder.CreateInsertElement( 403 Vector, 404 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 405 Builder.getInt32(Lane)); 406 407 VectorMap[Old] = Vector; 408 409 return Vector; 410 } 411 412 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 413 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 414 assert(PointerTy && "PointerType expected"); 415 416 Type *ScalarType = PointerTy->getElementType(); 417 VectorType *VectorType = VectorType::get(ScalarType, Width); 418 419 return PointerType::getUnqual(VectorType); 420 } 421 422 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 423 ValueMapT &BBMap) { 424 const Value *Pointer = Load->getPointerOperand(); 425 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 426 Value *NewPointer = 427 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 428 Value *VectorPtr = 429 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 430 LoadInst *VecLoad = 431 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 432 if (!Aligned) 433 VecLoad->setAlignment(8); 434 435 return VecLoad; 436 } 437 438 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 439 ValueMapT &BBMap) { 440 const Value *Pointer = Load->getPointerOperand(); 441 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 442 Value *NewPointer = 443 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 444 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 445 Load->getName() + "_p_vec_p"); 446 LoadInst *ScalarLoad = 447 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 448 449 if (!Aligned) 450 ScalarLoad->setAlignment(8); 451 452 Constant *SplatVector = Constant::getNullValue( 453 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 454 455 Value *VectorLoad = Builder.CreateShuffleVector( 456 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 457 return VectorLoad; 458 } 459 460 Value * 461 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 462 VectorValueMapT &ScalarMaps) { 463 int VectorWidth = getVectorWidth(); 464 const Value *Pointer = Load->getPointerOperand(); 465 VectorType *VectorType = VectorType::get( 466 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 467 468 Value *Vector = UndefValue::get(VectorType); 469 470 for (int i = 0; i < VectorWidth; i++) { 471 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 472 VLTS[i], getLoopForInst(Load)); 473 Value *ScalarLoad = 474 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 475 Vector = Builder.CreateInsertElement( 476 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 477 } 478 479 return Vector; 480 } 481 482 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 483 ValueMapT &VectorMap, 484 VectorValueMapT &ScalarMaps) { 485 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 486 !VectorType::isValidElementType(Load->getType())) { 487 for (int i = 0; i < getVectorWidth(); i++) 488 ScalarMaps[i][Load] = 489 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 490 return; 491 } 492 493 const MemoryAccess &Access = Statement.getAccessFor(Load); 494 495 Value *NewLoad; 496 if (Access.isStrideZero(isl_map_copy(Schedule))) 497 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 498 else if (Access.isStrideOne(isl_map_copy(Schedule))) 499 NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); 500 else 501 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 502 503 VectorMap[Load] = NewLoad; 504 } 505 506 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 507 ValueMapT &VectorMap, 508 VectorValueMapT &ScalarMaps) { 509 int VectorWidth = getVectorWidth(); 510 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 511 getLoopForInst(Inst)); 512 513 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 514 515 const CastInst *Cast = dyn_cast<CastInst>(Inst); 516 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 517 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 518 } 519 520 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 521 ValueMapT &VectorMap, 522 VectorValueMapT &ScalarMaps) { 523 Loop *L = getLoopForInst(Inst); 524 Value *OpZero = Inst->getOperand(0); 525 Value *OpOne = Inst->getOperand(1); 526 527 Value *NewOpZero, *NewOpOne; 528 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 529 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 530 531 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 532 Inst->getName() + "p_vec"); 533 VectorMap[Inst] = NewInst; 534 } 535 536 void VectorBlockGenerator::copyStore(const StoreInst *Store, 537 ValueMapT &VectorMap, 538 VectorValueMapT &ScalarMaps) { 539 int VectorWidth = getVectorWidth(); 540 541 const MemoryAccess &Access = Statement.getAccessFor(Store); 542 543 const Value *Pointer = Store->getPointerOperand(); 544 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 545 ScalarMaps, getLoopForInst(Store)); 546 547 if (Access.isStrideOne(isl_map_copy(Schedule))) { 548 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 549 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 550 VLTS[0], getLoopForInst(Store)); 551 552 Value *VectorPtr = 553 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 554 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 555 556 if (!Aligned) 557 Store->setAlignment(8); 558 } else { 559 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 560 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 561 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 562 VLTS[i], getLoopForInst(Store)); 563 Builder.CreateStore(Scalar, NewPointer); 564 } 565 } 566 } 567 568 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 569 ValueMapT &VectorMap) { 570 for (Instruction::const_op_iterator OI = Inst->op_begin(), 571 OE = Inst->op_end(); 572 OI != OE; ++OI) 573 if (VectorMap.count(*OI)) 574 return true; 575 return false; 576 } 577 578 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 579 ValueMapT &VectorMap, 580 VectorValueMapT &ScalarMaps) { 581 bool HasVectorOperand = false; 582 int VectorWidth = getVectorWidth(); 583 584 for (Instruction::const_op_iterator OI = Inst->op_begin(), 585 OE = Inst->op_end(); 586 OI != OE; ++OI) { 587 ValueMapT::iterator VecOp = VectorMap.find(*OI); 588 589 if (VecOp == VectorMap.end()) 590 continue; 591 592 HasVectorOperand = true; 593 Value *NewVector = VecOp->second; 594 595 for (int i = 0; i < VectorWidth; ++i) { 596 ValueMapT &SM = ScalarMaps[i]; 597 598 // If there is one scalar extracted, all scalar elements should have 599 // already been extracted by the code here. So no need to check for the 600 // existance of all of them. 601 if (SM.count(*OI)) 602 break; 603 604 SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 605 } 606 } 607 608 return HasVectorOperand; 609 } 610 611 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 612 ValueMapT &VectorMap, 613 VectorValueMapT &ScalarMaps) { 614 bool HasVectorOperand; 615 int VectorWidth = getVectorWidth(); 616 617 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 618 619 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 620 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 621 VLTS[VectorLane]); 622 623 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 624 return; 625 626 // Make the result available as vector value. 627 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 628 Value *Vector = UndefValue::get(VectorType); 629 630 for (int i = 0; i < VectorWidth; i++) 631 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 632 Builder.getInt32(i)); 633 634 VectorMap[Inst] = Vector; 635 } 636 637 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 638 639 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 640 ValueMapT &VectorMap, 641 VectorValueMapT &ScalarMaps) { 642 // Terminator instructions control the control flow. They are explicitly 643 // expressed in the clast and do not need to be copied. 644 if (Inst->isTerminator()) 645 return; 646 647 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 648 &Statement.getParent()->getRegion())) 649 return; 650 651 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 652 generateLoad(Load, VectorMap, ScalarMaps); 653 return; 654 } 655 656 if (hasVectorOperands(Inst, VectorMap)) { 657 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 658 copyStore(Store, VectorMap, ScalarMaps); 659 return; 660 } 661 662 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 663 copyUnaryInst(Unary, VectorMap, ScalarMaps); 664 return; 665 } 666 667 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 668 copyBinaryInst(Binary, VectorMap, ScalarMaps); 669 return; 670 } 671 672 // Falltrough: We generate scalar instructions, if we don't know how to 673 // generate vector code. 674 } 675 676 copyInstScalarized(Inst, VectorMap, ScalarMaps); 677 } 678 679 void VectorBlockGenerator::copyBB() { 680 BasicBlock *BB = Statement.getBasicBlock(); 681 BasicBlock *CopyBB = 682 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 683 CopyBB->setName("polly.stmt." + BB->getName()); 684 Builder.SetInsertPoint(CopyBB->begin()); 685 686 // Create two maps that store the mapping from the original instructions of 687 // the old basic block to their copies in the new basic block. Those maps 688 // are basic block local. 689 // 690 // As vector code generation is supported there is one map for scalar values 691 // and one for vector values. 692 // 693 // In case we just do scalar code generation, the vectorMap is not used and 694 // the scalarMap has just one dimension, which contains the mapping. 695 // 696 // In case vector code generation is done, an instruction may either appear 697 // in the vector map once (as it is calculating >vectorwidth< values at a 698 // time. Or (if the values are calculated using scalar operations), it 699 // appears once in every dimension of the scalarMap. 700 VectorValueMapT ScalarBlockMap(getVectorWidth()); 701 ValueMapT VectorBlockMap; 702 703 for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; 704 ++II) 705 copyInstruction(II, VectorBlockMap, ScalarBlockMap); 706 } 707