1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/set.h" 19 #include "polly/CodeGen/BlockGenerators.h" 20 #include "polly/CodeGen/CodeGeneration.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/ScalarEvolution.h" 27 #include "llvm/Analysis/ScalarEvolutionExpander.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 31 using namespace llvm; 32 using namespace polly; 33 34 static cl::opt<bool> 35 Aligned("enable-polly-aligned", 36 cl::desc("Assumed aligned memory accesses."), cl::Hidden, 37 cl::value_desc("OpenMP code generation enabled if true"), 38 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 39 40 static cl::opt<bool, true> 41 SCEVCodegenF("polly-codegen-scev", 42 cl::desc("Use SCEV based code generation."), cl::Hidden, 43 cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore, 44 cl::cat(PollyCategory)); 45 46 bool polly::SCEVCodegen; 47 48 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 49 ScalarEvolution *SE, const Region *R) { 50 if (SCEVCodegen) { 51 if (!I || !SE->isSCEVable(I->getType())) 52 return false; 53 54 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 55 if (!isa<SCEVCouldNotCompute>(Scev)) 56 if (!hasScalarDepsInsideRegion(Scev, R)) 57 return true; 58 59 return false; 60 } 61 62 Loop *L = LI->getLoopFor(I->getParent()); 63 return L && I == L->getCanonicalInductionVariable() && R->contains(L); 64 } 65 66 // Helper class to generate memory location. 67 namespace { 68 class IslGenerator { 69 public: 70 IslGenerator(PollyIRBuilder &Builder, std::vector<Value *> &IVS) 71 : Builder(Builder), IVS(IVS) {} 72 Value *generateIslVal(__isl_take isl_val *Val); 73 Value *generateIslAff(__isl_take isl_aff *Aff); 74 Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff); 75 76 private: 77 typedef struct { 78 Value *Result; 79 class IslGenerator *Generator; 80 } IslGenInfo; 81 82 PollyIRBuilder &Builder; 83 std::vector<Value *> &IVS; 84 static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff, 85 void *User); 86 }; 87 } 88 89 Value *IslGenerator::generateIslVal(__isl_take isl_val *Val) { 90 Value *IntValue = Builder.getInt(APIntFromVal(Val)); 91 return IntValue; 92 } 93 94 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) { 95 Value *Result; 96 Value *ConstValue; 97 isl_val *Val; 98 99 Val = isl_aff_get_constant_val(Aff); 100 ConstValue = generateIslVal(Val); 101 Type *Ty = Builder.getInt64Ty(); 102 103 // FIXME: We should give the constant and coefficients the right type. Here 104 // we force it into i64. 105 Result = Builder.CreateSExtOrBitCast(ConstValue, Ty); 106 107 unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in); 108 109 assert((IVS.size() == NbInputDims) && 110 "The Dimension of Induction Variables must match the dimension of the " 111 "affine space."); 112 113 for (unsigned int i = 0; i < NbInputDims; ++i) { 114 Value *CoefficientValue; 115 Val = isl_aff_get_coefficient_val(Aff, isl_dim_in, i); 116 117 if (isl_val_is_zero(Val)) { 118 isl_val_free(Val); 119 continue; 120 } 121 122 CoefficientValue = generateIslVal(Val); 123 CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true); 124 Value *IV = Builder.CreateIntCast(IVS[i], Ty, true); 125 Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff"); 126 Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff"); 127 } 128 129 isl_aff_free(Aff); 130 131 return Result; 132 } 133 134 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set, 135 __isl_take isl_aff *Aff, void *User) { 136 IslGenInfo *GenInfo = (IslGenInfo *)User; 137 138 assert((GenInfo->Result == nullptr) && 139 "Result is already set. Currently only single isl_aff is supported"); 140 assert(isl_set_plain_is_universe(Set) && 141 "Code generation failed because the set is not universe"); 142 143 GenInfo->Result = GenInfo->Generator->generateIslAff(Aff); 144 145 isl_set_free(Set); 146 return 0; 147 } 148 149 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) { 150 IslGenInfo User; 151 User.Result = nullptr; 152 User.Generator = this; 153 isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User); 154 assert(User.Result && "Code generation for isl_pw_aff failed"); 155 156 isl_pw_aff_free(PwAff); 157 return User.Result; 158 } 159 160 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P) 161 : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) { 162 } 163 164 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap, 165 ValueMapT &GlobalMap) const { 166 // We assume constants never change. 167 // This avoids map lookups for many calls to this function. 168 if (isa<Constant>(Old)) 169 return const_cast<Value *>(Old); 170 171 if (Value *New = GlobalMap.lookup(Old)) { 172 if (Old->getType()->getScalarSizeInBits() < 173 New->getType()->getScalarSizeInBits()) 174 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 175 176 return New; 177 } 178 179 // Or it is probably a scop-constant value defined as global, function 180 // parameter or an instruction not within the scop. 181 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 182 return const_cast<Value *>(Old); 183 184 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 185 if (!Statement.getParent()->getRegion().contains(Inst->getParent())) 186 return const_cast<Value *>(Old); 187 188 if (Value *New = BBMap.lookup(Old)) 189 return New; 190 191 return nullptr; 192 } 193 194 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 195 ValueMapT &GlobalMap, LoopToScevMapT <S, 196 Loop *L) { 197 if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap)) 198 return New; 199 200 if (SCEVCodegen && SE.isSCEVable(Old->getType())) 201 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 202 if (!isa<SCEVCouldNotCompute>(Scev)) { 203 const SCEV *NewScev = apply(Scev, LTS, SE); 204 ValueToValueMap VTV; 205 VTV.insert(BBMap.begin(), BBMap.end()); 206 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 207 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 208 SCEVExpander Expander(SE, "polly"); 209 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 210 Builder.GetInsertPoint()); 211 212 BBMap[Old] = Expanded; 213 return Expanded; 214 } 215 } 216 217 // Now the scalar dependence is neither available nor SCEVCodegenable, this 218 // should never happen in the current code generator. 219 llvm_unreachable("Unexpected scalar dependence in region!"); 220 return nullptr; 221 } 222 223 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 224 ValueMapT &GlobalMap, LoopToScevMapT <S) { 225 // We do not generate debug intrinsics as we did not investigate how to 226 // copy them correctly. At the current state, they just crash the code 227 // generation as the meta-data operands are not correctly copied. 228 if (isa<DbgInfoIntrinsic>(Inst)) 229 return; 230 231 Instruction *NewInst = Inst->clone(); 232 233 // Replace old operands with the new ones. 234 for (Value *OldOperand : Inst->operands()) { 235 Value *NewOperand = 236 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 237 238 if (!NewOperand) { 239 assert(!isa<StoreInst>(NewInst) && 240 "Store instructions are always needed!"); 241 delete NewInst; 242 return; 243 } 244 245 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 246 } 247 248 Builder.Insert(NewInst); 249 BBMap[Inst] = NewInst; 250 251 if (!NewInst->getType()->isVoidTy()) 252 NewInst->setName("p_" + Inst->getName()); 253 } 254 255 std::vector<Value *> BlockGenerator::getMemoryAccessIndex( 256 __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, 257 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 258 assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && 259 "Only single dimensional access functions supported"); 260 261 std::vector<Value *> IVS; 262 for (unsigned i = 0; i < Statement.getNumIterators(); ++i) { 263 const Value *OriginalIV = Statement.getInductionVariableForDimension(i); 264 Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L); 265 IVS.push_back(NewIV); 266 } 267 268 isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0); 269 IslGenerator IslGen(Builder, IVS); 270 Value *OffsetValue = IslGen.generateIslPwAff(PwAff); 271 272 Type *Ty = Builder.getInt64Ty(); 273 OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); 274 275 std::vector<Value *> IndexArray; 276 Value *NullValue = Constant::getNullValue(Ty); 277 IndexArray.push_back(NullValue); 278 IndexArray.push_back(OffsetValue); 279 return IndexArray; 280 } 281 282 Value *BlockGenerator::getNewAccessOperand( 283 __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, 284 ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) { 285 std::vector<Value *> IndexArray = getMemoryAccessIndex( 286 NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L); 287 Value *NewOperand = 288 Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); 289 return NewOperand; 290 } 291 292 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 293 const Value *Pointer, 294 ValueMapT &BBMap, 295 ValueMapT &GlobalMap, 296 LoopToScevMapT <S) { 297 const MemoryAccess &Access = Statement.getAccessFor(Inst); 298 isl_map *CurrentAccessRelation = Access.getAccessRelation(); 299 isl_map *NewAccessRelation = Access.getNewAccessRelation(); 300 301 assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) && 302 "Current and new access function use different spaces"); 303 304 Value *NewPointer; 305 306 if (!NewAccessRelation) { 307 NewPointer = 308 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 309 } else { 310 Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr()); 311 NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, 312 GlobalMap, LTS, getLoopForInst(Inst)); 313 } 314 315 isl_map_free(CurrentAccessRelation); 316 isl_map_free(NewAccessRelation); 317 return NewPointer; 318 } 319 320 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 321 return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent()); 322 } 323 324 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 325 ValueMapT &BBMap, 326 ValueMapT &GlobalMap, 327 LoopToScevMapT <S) { 328 const Value *Pointer = Load->getPointerOperand(); 329 const Instruction *Inst = dyn_cast<Instruction>(Load); 330 Value *NewPointer = 331 generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS); 332 Value *ScalarLoad = 333 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 334 return ScalarLoad; 335 } 336 337 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 338 ValueMapT &BBMap, 339 ValueMapT &GlobalMap, 340 LoopToScevMapT <S) { 341 const Value *Pointer = Store->getPointerOperand(); 342 Value *NewPointer = 343 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 344 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 345 LTS, getLoopForInst(Store)); 346 347 return Builder.CreateStore(ValueOperand, NewPointer); 348 } 349 350 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 351 ValueMapT &GlobalMap, 352 LoopToScevMapT <S) { 353 // Terminator instructions control the control flow. They are explicitly 354 // expressed in the clast and do not need to be copied. 355 if (Inst->isTerminator()) 356 return; 357 358 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 359 &Statement.getParent()->getRegion())) 360 return; 361 362 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 363 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 364 // Compute NewLoad before its insertion in BBMap to make the insertion 365 // deterministic. 366 BBMap[Load] = NewLoad; 367 return; 368 } 369 370 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 371 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 372 // Compute NewStore before its insertion in BBMap to make the insertion 373 // deterministic. 374 BBMap[Store] = NewStore; 375 return; 376 } 377 378 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 379 } 380 381 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 382 BasicBlock *BB = Statement.getBasicBlock(); 383 BasicBlock *CopyBB = 384 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 385 CopyBB->setName("polly.stmt." + BB->getName()); 386 Builder.SetInsertPoint(CopyBB->begin()); 387 388 ValueMapT BBMap; 389 390 for (Instruction &Inst : *BB) 391 copyInstruction(&Inst, BBMap, GlobalMap, LTS); 392 } 393 394 VectorBlockGenerator::VectorBlockGenerator(PollyIRBuilder &B, 395 VectorValueMapT &GlobalMaps, 396 std::vector<LoopToScevMapT> &VLTS, 397 ScopStmt &Stmt, 398 __isl_keep isl_map *Schedule, 399 Pass *P) 400 : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS), 401 Schedule(Schedule) { 402 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 403 assert(Schedule && "No statement domain provided"); 404 } 405 406 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 407 ValueMapT &VectorMap, 408 VectorValueMapT &ScalarMaps, 409 Loop *L) { 410 if (Value *NewValue = VectorMap.lookup(Old)) 411 return NewValue; 412 413 int Width = getVectorWidth(); 414 415 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 416 417 for (int Lane = 0; Lane < Width; Lane++) 418 Vector = Builder.CreateInsertElement( 419 Vector, 420 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 421 Builder.getInt32(Lane)); 422 423 VectorMap[Old] = Vector; 424 425 return Vector; 426 } 427 428 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 429 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 430 assert(PointerTy && "PointerType expected"); 431 432 Type *ScalarType = PointerTy->getElementType(); 433 VectorType *VectorType = VectorType::get(ScalarType, Width); 434 435 return PointerType::getUnqual(VectorType); 436 } 437 438 Value * 439 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 440 VectorValueMapT &ScalarMaps, 441 bool NegativeStride = false) { 442 unsigned VectorWidth = getVectorWidth(); 443 const Value *Pointer = Load->getPointerOperand(); 444 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 445 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 446 447 Value *NewPointer = nullptr; 448 NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset], 449 VLTS[Offset], getLoopForInst(Load)); 450 Value *VectorPtr = 451 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 452 LoadInst *VecLoad = 453 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 454 if (!Aligned) 455 VecLoad->setAlignment(8); 456 457 if (NegativeStride) { 458 SmallVector<Constant *, 16> Indices; 459 for (int i = VectorWidth - 1; i >= 0; i--) 460 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 461 Constant *SV = llvm::ConstantVector::get(Indices); 462 Value *RevVecLoad = Builder.CreateShuffleVector( 463 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 464 return RevVecLoad; 465 } 466 467 return VecLoad; 468 } 469 470 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 471 ValueMapT &BBMap) { 472 const Value *Pointer = Load->getPointerOperand(); 473 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 474 Value *NewPointer = 475 getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); 476 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 477 Load->getName() + "_p_vec_p"); 478 LoadInst *ScalarLoad = 479 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 480 481 if (!Aligned) 482 ScalarLoad->setAlignment(8); 483 484 Constant *SplatVector = Constant::getNullValue( 485 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 486 487 Value *VectorLoad = Builder.CreateShuffleVector( 488 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 489 return VectorLoad; 490 } 491 492 Value * 493 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 494 VectorValueMapT &ScalarMaps) { 495 int VectorWidth = getVectorWidth(); 496 const Value *Pointer = Load->getPointerOperand(); 497 VectorType *VectorType = VectorType::get( 498 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 499 500 Value *Vector = UndefValue::get(VectorType); 501 502 for (int i = 0; i < VectorWidth; i++) { 503 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 504 VLTS[i], getLoopForInst(Load)); 505 Value *ScalarLoad = 506 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 507 Vector = Builder.CreateInsertElement( 508 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 509 } 510 511 return Vector; 512 } 513 514 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 515 ValueMapT &VectorMap, 516 VectorValueMapT &ScalarMaps) { 517 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 518 !VectorType::isValidElementType(Load->getType())) { 519 for (int i = 0; i < getVectorWidth(); i++) 520 ScalarMaps[i][Load] = 521 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 522 return; 523 } 524 525 const MemoryAccess &Access = Statement.getAccessFor(Load); 526 527 // Make sure we have scalar values available to access the pointer to 528 // the data location. 529 extractScalarValues(Load, VectorMap, ScalarMaps); 530 531 Value *NewLoad; 532 if (Access.isStrideZero(isl_map_copy(Schedule))) 533 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 534 else if (Access.isStrideOne(isl_map_copy(Schedule))) 535 NewLoad = generateStrideOneLoad(Load, ScalarMaps); 536 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 537 NewLoad = generateStrideOneLoad(Load, ScalarMaps, true); 538 else 539 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 540 541 VectorMap[Load] = NewLoad; 542 } 543 544 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 545 ValueMapT &VectorMap, 546 VectorValueMapT &ScalarMaps) { 547 int VectorWidth = getVectorWidth(); 548 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 549 getLoopForInst(Inst)); 550 551 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 552 553 const CastInst *Cast = dyn_cast<CastInst>(Inst); 554 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 555 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 556 } 557 558 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 559 ValueMapT &VectorMap, 560 VectorValueMapT &ScalarMaps) { 561 Loop *L = getLoopForInst(Inst); 562 Value *OpZero = Inst->getOperand(0); 563 Value *OpOne = Inst->getOperand(1); 564 565 Value *NewOpZero, *NewOpOne; 566 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 567 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 568 569 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 570 Inst->getName() + "p_vec"); 571 VectorMap[Inst] = NewInst; 572 } 573 574 void VectorBlockGenerator::copyStore(const StoreInst *Store, 575 ValueMapT &VectorMap, 576 VectorValueMapT &ScalarMaps) { 577 int VectorWidth = getVectorWidth(); 578 579 const MemoryAccess &Access = Statement.getAccessFor(Store); 580 581 const Value *Pointer = Store->getPointerOperand(); 582 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 583 ScalarMaps, getLoopForInst(Store)); 584 585 // Make sure we have scalar values available to access the pointer to 586 // the data location. 587 extractScalarValues(Store, VectorMap, ScalarMaps); 588 589 if (Access.isStrideOne(isl_map_copy(Schedule))) { 590 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 591 Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], 592 VLTS[0], getLoopForInst(Store)); 593 594 Value *VectorPtr = 595 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 596 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 597 598 if (!Aligned) 599 Store->setAlignment(8); 600 } else { 601 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 602 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 603 Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], 604 VLTS[i], getLoopForInst(Store)); 605 Builder.CreateStore(Scalar, NewPointer); 606 } 607 } 608 } 609 610 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 611 ValueMapT &VectorMap) { 612 for (Value *Operand : Inst->operands()) 613 if (VectorMap.count(Operand)) 614 return true; 615 return false; 616 } 617 618 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 619 ValueMapT &VectorMap, 620 VectorValueMapT &ScalarMaps) { 621 bool HasVectorOperand = false; 622 int VectorWidth = getVectorWidth(); 623 624 for (Value *Operand : Inst->operands()) { 625 ValueMapT::iterator VecOp = VectorMap.find(Operand); 626 627 if (VecOp == VectorMap.end()) 628 continue; 629 630 HasVectorOperand = true; 631 Value *NewVector = VecOp->second; 632 633 for (int i = 0; i < VectorWidth; ++i) { 634 ValueMapT &SM = ScalarMaps[i]; 635 636 // If there is one scalar extracted, all scalar elements should have 637 // already been extracted by the code here. So no need to check for the 638 // existance of all of them. 639 if (SM.count(Operand)) 640 break; 641 642 SM[Operand] = 643 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 644 } 645 } 646 647 return HasVectorOperand; 648 } 649 650 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 651 ValueMapT &VectorMap, 652 VectorValueMapT &ScalarMaps) { 653 bool HasVectorOperand; 654 int VectorWidth = getVectorWidth(); 655 656 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 657 658 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 659 copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], 660 VLTS[VectorLane]); 661 662 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 663 return; 664 665 // Make the result available as vector value. 666 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 667 Value *Vector = UndefValue::get(VectorType); 668 669 for (int i = 0; i < VectorWidth; i++) 670 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 671 Builder.getInt32(i)); 672 673 VectorMap[Inst] = Vector; 674 } 675 676 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 677 678 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 679 ValueMapT &VectorMap, 680 VectorValueMapT &ScalarMaps) { 681 // Terminator instructions control the control flow. They are explicitly 682 // expressed in the clast and do not need to be copied. 683 if (Inst->isTerminator()) 684 return; 685 686 if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE, 687 &Statement.getParent()->getRegion())) 688 return; 689 690 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 691 generateLoad(Load, VectorMap, ScalarMaps); 692 return; 693 } 694 695 if (hasVectorOperands(Inst, VectorMap)) { 696 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 697 copyStore(Store, VectorMap, ScalarMaps); 698 return; 699 } 700 701 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 702 copyUnaryInst(Unary, VectorMap, ScalarMaps); 703 return; 704 } 705 706 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 707 copyBinaryInst(Binary, VectorMap, ScalarMaps); 708 return; 709 } 710 711 // Falltrough: We generate scalar instructions, if we don't know how to 712 // generate vector code. 713 } 714 715 copyInstScalarized(Inst, VectorMap, ScalarMaps); 716 } 717 718 void VectorBlockGenerator::copyBB() { 719 BasicBlock *BB = Statement.getBasicBlock(); 720 BasicBlock *CopyBB = 721 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); 722 CopyBB->setName("polly.stmt." + BB->getName()); 723 Builder.SetInsertPoint(CopyBB->begin()); 724 725 // Create two maps that store the mapping from the original instructions of 726 // the old basic block to their copies in the new basic block. Those maps 727 // are basic block local. 728 // 729 // As vector code generation is supported there is one map for scalar values 730 // and one for vector values. 731 // 732 // In case we just do scalar code generation, the vectorMap is not used and 733 // the scalarMap has just one dimension, which contains the mapping. 734 // 735 // In case vector code generation is done, an instruction may either appear 736 // in the vector map once (as it is calculating >vectorwidth< values at a 737 // time. Or (if the values are calculated using scalar operations), it 738 // appears once in every dimension of the scalarMap. 739 VectorValueMapT ScalarBlockMap(getVectorWidth()); 740 ValueMapT VectorBlockMap; 741 742 for (Instruction &Inst : *BB) 743 copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap); 744 } 745