1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/ast.h" 19 #include "isl/ast_build.h" 20 #include "isl/set.h" 21 #include "polly/CodeGen/BlockGenerators.h" 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslExprBuilder.h" 24 #include "polly/Options.h" 25 #include "polly/Support/GICHelper.h" 26 #include "polly/Support/SCEVValidator.h" 27 #include "polly/Support/ScopHelper.h" 28 #include "llvm/Analysis/LoopInfo.h" 29 #include "llvm/Analysis/ScalarEvolution.h" 30 #include "llvm/Analysis/ScalarEvolutionExpander.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 34 using namespace llvm; 35 using namespace polly; 36 37 static cl::opt<bool> Aligned("enable-polly-aligned", 38 cl::desc("Assumed aligned memory accesses."), 39 cl::Hidden, cl::init(false), cl::ZeroOrMore, 40 cl::cat(PollyCategory)); 41 42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 43 ScalarEvolution *SE, const Region *R) { 44 if (!I || !SE->isSCEVable(I->getType())) 45 return false; 46 47 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 48 if (!isa<SCEVCouldNotCompute>(Scev)) 49 if (!hasScalarDepsInsideRegion(Scev, R)) 50 return true; 51 52 return false; 53 } 54 55 bool polly::isIgnoredIntrinsic(const Value *V) { 56 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 57 switch (IT->getIntrinsicID()) { 58 // Lifetime markers are supported/ignored. 59 case llvm::Intrinsic::lifetime_start: 60 case llvm::Intrinsic::lifetime_end: 61 // Invariant markers are supported/ignored. 62 case llvm::Intrinsic::invariant_start: 63 case llvm::Intrinsic::invariant_end: 64 // Some misc annotations are supported/ignored. 65 case llvm::Intrinsic::var_annotation: 66 case llvm::Intrinsic::ptr_annotation: 67 case llvm::Intrinsic::annotation: 68 case llvm::Intrinsic::donothing: 69 case llvm::Intrinsic::assume: 70 case llvm::Intrinsic::expect: 71 return true; 72 default: 73 break; 74 } 75 } 76 return false; 77 } 78 79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P, 80 LoopInfo &LI, ScalarEvolution &SE, 81 isl_ast_build *Build, 82 IslExprBuilder *ExprBuilder) 83 : Builder(B), Statement(Stmt), P(P), LI(LI), SE(SE), Build(Build), 84 ExprBuilder(ExprBuilder) {} 85 86 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, 87 ValueMapT &GlobalMap, LoopToScevMapT <S, 88 Loop *L) const { 89 // We assume constants never change. 90 // This avoids map lookups for many calls to this function. 91 if (isa<Constant>(Old)) 92 return const_cast<Value *>(Old); 93 94 if (Value *New = GlobalMap.lookup(Old)) { 95 if (Old->getType()->getScalarSizeInBits() < 96 New->getType()->getScalarSizeInBits()) 97 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 98 99 return New; 100 } 101 102 if (Value *New = BBMap.lookup(Old)) 103 return New; 104 105 if (SE.isSCEVable(Old->getType())) 106 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 107 if (!isa<SCEVCouldNotCompute>(Scev)) { 108 const SCEV *NewScev = apply(Scev, LTS, SE); 109 ValueToValueMap VTV; 110 VTV.insert(BBMap.begin(), BBMap.end()); 111 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 112 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 113 SCEVExpander Expander(SE, "polly"); 114 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 115 Builder.GetInsertPoint()); 116 117 BBMap[Old] = Expanded; 118 return Expanded; 119 } 120 } 121 122 // A scop-constant value defined by a global or a function parameter. 123 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 124 return const_cast<Value *>(Old); 125 126 // A scop-constant value defined by an instruction executed outside the scop. 127 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 128 if (!Statement.getParent()->getRegion().contains(Inst->getParent())) 129 return const_cast<Value *>(Old); 130 131 // The scalar dependence is neither available nor SCEVCodegenable. 132 llvm_unreachable("Unexpected scalar dependence in region!"); 133 return nullptr; 134 } 135 136 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, 137 ValueMapT &GlobalMap, LoopToScevMapT <S) { 138 // We do not generate debug intrinsics as we did not investigate how to 139 // copy them correctly. At the current state, they just crash the code 140 // generation as the meta-data operands are not correctly copied. 141 if (isa<DbgInfoIntrinsic>(Inst)) 142 return; 143 144 Instruction *NewInst = Inst->clone(); 145 146 // Replace old operands with the new ones. 147 for (Value *OldOperand : Inst->operands()) { 148 Value *NewOperand = 149 getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 150 151 if (!NewOperand) { 152 assert(!isa<StoreInst>(NewInst) && 153 "Store instructions are always needed!"); 154 delete NewInst; 155 return; 156 } 157 158 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 159 } 160 161 Builder.Insert(NewInst); 162 BBMap[Inst] = NewInst; 163 164 if (!NewInst->getType()->isVoidTy()) 165 NewInst->setName("p_" + Inst->getName()); 166 } 167 168 Value *BlockGenerator::getNewAccessOperand(const MemoryAccess &MA) { 169 isl_pw_multi_aff *PWAccRel; 170 isl_union_map *Schedule; 171 isl_ast_expr *Expr; 172 173 assert(ExprBuilder && Build && 174 "Cannot generate new value without IslExprBuilder!"); 175 176 Schedule = isl_ast_build_get_schedule(Build); 177 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 178 179 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 180 Expr = isl_ast_expr_address_of(Expr); 181 182 return ExprBuilder->create(Expr); 183 } 184 185 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, 186 const Value *Pointer, 187 ValueMapT &BBMap, 188 ValueMapT &GlobalMap, 189 LoopToScevMapT <S) { 190 const MemoryAccess &MA = Statement.getAccessFor(Inst); 191 192 Value *NewPointer; 193 if (MA.hasNewAccessRelation()) 194 NewPointer = getNewAccessOperand(MA); 195 else 196 NewPointer = 197 getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 198 199 return NewPointer; 200 } 201 202 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 203 return LI.getLoopFor(Inst->getParent()); 204 } 205 206 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, 207 ValueMapT &BBMap, 208 ValueMapT &GlobalMap, 209 LoopToScevMapT <S) { 210 const Value *Pointer = Load->getPointerOperand(); 211 Value *NewPointer = 212 generateLocationAccessed(Load, Pointer, BBMap, GlobalMap, LTS); 213 Value *ScalarLoad = Builder.CreateAlignedLoad( 214 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 215 return ScalarLoad; 216 } 217 218 Value *BlockGenerator::generateScalarStore(const StoreInst *Store, 219 ValueMapT &BBMap, 220 ValueMapT &GlobalMap, 221 LoopToScevMapT <S) { 222 const Value *Pointer = Store->getPointerOperand(); 223 Value *NewPointer = 224 generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS); 225 Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap, 226 LTS, getLoopForInst(Store)); 227 228 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 229 Store->getAlignment()); 230 return NewStore; 231 } 232 233 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, 234 ValueMapT &GlobalMap, 235 LoopToScevMapT <S) { 236 // Terminator instructions control the control flow. They are explicitly 237 // expressed in the clast and do not need to be copied. 238 if (Inst->isTerminator()) 239 return; 240 241 if (canSynthesize(Inst, &P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 242 &SE, &Statement.getParent()->getRegion())) 243 return; 244 245 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 246 Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS); 247 // Compute NewLoad before its insertion in BBMap to make the insertion 248 // deterministic. 249 BBMap[Load] = NewLoad; 250 return; 251 } 252 253 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 254 Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS); 255 // Compute NewStore before its insertion in BBMap to make the insertion 256 // deterministic. 257 BBMap[Store] = NewStore; 258 return; 259 } 260 261 // Skip some special intrinsics for which we do not adjust the semantics to 262 // the new schedule. All others are handled like every other instruction. 263 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 264 switch (IT->getIntrinsicID()) { 265 // Lifetime markers are ignored. 266 case llvm::Intrinsic::lifetime_start: 267 case llvm::Intrinsic::lifetime_end: 268 // Invariant markers are ignored. 269 case llvm::Intrinsic::invariant_start: 270 case llvm::Intrinsic::invariant_end: 271 // Some misc annotations are ignored. 272 case llvm::Intrinsic::var_annotation: 273 case llvm::Intrinsic::ptr_annotation: 274 case llvm::Intrinsic::annotation: 275 case llvm::Intrinsic::donothing: 276 case llvm::Intrinsic::assume: 277 case llvm::Intrinsic::expect: 278 return; 279 default: 280 // Other intrinsics are copied. 281 break; 282 } 283 } 284 285 copyInstScalar(Inst, BBMap, GlobalMap, LTS); 286 } 287 288 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT <S) { 289 auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 290 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; 291 auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>(); 292 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; 293 294 BasicBlock *BB = Statement.getBasicBlock(); 295 BasicBlock *CopyBB = 296 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, LI); 297 CopyBB->setName("polly.stmt." + BB->getName()); 298 Builder.SetInsertPoint(CopyBB->begin()); 299 300 ValueMapT BBMap; 301 302 for (Instruction &Inst : *BB) 303 copyInstruction(&Inst, BBMap, GlobalMap, LTS); 304 } 305 306 VectorBlockGenerator::VectorBlockGenerator( 307 PollyIRBuilder &B, VectorValueMapT &GlobalMaps, 308 std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt, 309 __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE, 310 __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder) 311 : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder), 312 GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) { 313 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 314 assert(Schedule && "No statement domain provided"); 315 } 316 317 Value *VectorBlockGenerator::getVectorValue(const Value *Old, 318 ValueMapT &VectorMap, 319 VectorValueMapT &ScalarMaps, 320 Loop *L) { 321 if (Value *NewValue = VectorMap.lookup(Old)) 322 return NewValue; 323 324 int Width = getVectorWidth(); 325 326 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 327 328 for (int Lane = 0; Lane < Width; Lane++) 329 Vector = Builder.CreateInsertElement( 330 Vector, 331 getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), 332 Builder.getInt32(Lane)); 333 334 VectorMap[Old] = Vector; 335 336 return Vector; 337 } 338 339 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 340 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 341 assert(PointerTy && "PointerType expected"); 342 343 Type *ScalarType = PointerTy->getElementType(); 344 VectorType *VectorType = VectorType::get(ScalarType, Width); 345 346 return PointerType::getUnqual(VectorType); 347 } 348 349 Value * 350 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, 351 VectorValueMapT &ScalarMaps, 352 bool NegativeStride = false) { 353 unsigned VectorWidth = getVectorWidth(); 354 const Value *Pointer = Load->getPointerOperand(); 355 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 356 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 357 358 Value *NewPointer = nullptr; 359 NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[Offset], 360 GlobalMaps[Offset], VLTS[Offset]); 361 Value *VectorPtr = 362 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 363 LoadInst *VecLoad = 364 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 365 if (!Aligned) 366 VecLoad->setAlignment(8); 367 368 if (NegativeStride) { 369 SmallVector<Constant *, 16> Indices; 370 for (int i = VectorWidth - 1; i >= 0; i--) 371 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 372 Constant *SV = llvm::ConstantVector::get(Indices); 373 Value *RevVecLoad = Builder.CreateShuffleVector( 374 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 375 return RevVecLoad; 376 } 377 378 return VecLoad; 379 } 380 381 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, 382 ValueMapT &BBMap) { 383 const Value *Pointer = Load->getPointerOperand(); 384 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 385 Value *NewPointer = 386 generateLocationAccessed(Load, Pointer, BBMap, GlobalMaps[0], VLTS[0]); 387 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 388 Load->getName() + "_p_vec_p"); 389 LoadInst *ScalarLoad = 390 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 391 392 if (!Aligned) 393 ScalarLoad->setAlignment(8); 394 395 Constant *SplatVector = Constant::getNullValue( 396 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 397 398 Value *VectorLoad = Builder.CreateShuffleVector( 399 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 400 return VectorLoad; 401 } 402 403 Value * 404 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, 405 VectorValueMapT &ScalarMaps) { 406 int VectorWidth = getVectorWidth(); 407 const Value *Pointer = Load->getPointerOperand(); 408 VectorType *VectorType = VectorType::get( 409 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 410 411 Value *Vector = UndefValue::get(VectorType); 412 413 for (int i = 0; i < VectorWidth; i++) { 414 Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i], 415 GlobalMaps[i], VLTS[i]); 416 Value *ScalarLoad = 417 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 418 Vector = Builder.CreateInsertElement( 419 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 420 } 421 422 return Vector; 423 } 424 425 void VectorBlockGenerator::generateLoad(const LoadInst *Load, 426 ValueMapT &VectorMap, 427 VectorValueMapT &ScalarMaps) { 428 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 429 !VectorType::isValidElementType(Load->getType())) { 430 for (int i = 0; i < getVectorWidth(); i++) 431 ScalarMaps[i][Load] = 432 generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 433 return; 434 } 435 436 const MemoryAccess &Access = Statement.getAccessFor(Load); 437 438 // Make sure we have scalar values available to access the pointer to 439 // the data location. 440 extractScalarValues(Load, VectorMap, ScalarMaps); 441 442 Value *NewLoad; 443 if (Access.isStrideZero(isl_map_copy(Schedule))) 444 NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); 445 else if (Access.isStrideOne(isl_map_copy(Schedule))) 446 NewLoad = generateStrideOneLoad(Load, ScalarMaps); 447 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 448 NewLoad = generateStrideOneLoad(Load, ScalarMaps, true); 449 else 450 NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); 451 452 VectorMap[Load] = NewLoad; 453 } 454 455 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, 456 ValueMapT &VectorMap, 457 VectorValueMapT &ScalarMaps) { 458 int VectorWidth = getVectorWidth(); 459 Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps, 460 getLoopForInst(Inst)); 461 462 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 463 464 const CastInst *Cast = dyn_cast<CastInst>(Inst); 465 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 466 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 467 } 468 469 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, 470 ValueMapT &VectorMap, 471 VectorValueMapT &ScalarMaps) { 472 Loop *L = getLoopForInst(Inst); 473 Value *OpZero = Inst->getOperand(0); 474 Value *OpOne = Inst->getOperand(1); 475 476 Value *NewOpZero, *NewOpOne; 477 NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L); 478 NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L); 479 480 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 481 Inst->getName() + "p_vec"); 482 VectorMap[Inst] = NewInst; 483 } 484 485 void VectorBlockGenerator::copyStore(const StoreInst *Store, 486 ValueMapT &VectorMap, 487 VectorValueMapT &ScalarMaps) { 488 const MemoryAccess &Access = Statement.getAccessFor(Store); 489 490 const Value *Pointer = Store->getPointerOperand(); 491 Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, 492 ScalarMaps, getLoopForInst(Store)); 493 494 // Make sure we have scalar values available to access the pointer to 495 // the data location. 496 extractScalarValues(Store, VectorMap, ScalarMaps); 497 498 if (Access.isStrideOne(isl_map_copy(Schedule))) { 499 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 500 Value *NewPointer = generateLocationAccessed(Store, Pointer, ScalarMaps[0], 501 GlobalMaps[0], VLTS[0]); 502 503 Value *VectorPtr = 504 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 505 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 506 507 if (!Aligned) 508 Store->setAlignment(8); 509 } else { 510 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 511 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 512 Value *NewPointer = generateLocationAccessed( 513 Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 514 Builder.CreateStore(Scalar, NewPointer); 515 } 516 } 517 } 518 519 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 520 ValueMapT &VectorMap) { 521 for (Value *Operand : Inst->operands()) 522 if (VectorMap.count(Operand)) 523 return true; 524 return false; 525 } 526 527 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 528 ValueMapT &VectorMap, 529 VectorValueMapT &ScalarMaps) { 530 bool HasVectorOperand = false; 531 int VectorWidth = getVectorWidth(); 532 533 for (Value *Operand : Inst->operands()) { 534 ValueMapT::iterator VecOp = VectorMap.find(Operand); 535 536 if (VecOp == VectorMap.end()) 537 continue; 538 539 HasVectorOperand = true; 540 Value *NewVector = VecOp->second; 541 542 for (int i = 0; i < VectorWidth; ++i) { 543 ValueMapT &SM = ScalarMaps[i]; 544 545 // If there is one scalar extracted, all scalar elements should have 546 // already been extracted by the code here. So no need to check for the 547 // existance of all of them. 548 if (SM.count(Operand)) 549 break; 550 551 SM[Operand] = 552 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 553 } 554 } 555 556 return HasVectorOperand; 557 } 558 559 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, 560 ValueMapT &VectorMap, 561 VectorValueMapT &ScalarMaps) { 562 bool HasVectorOperand; 563 int VectorWidth = getVectorWidth(); 564 565 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 566 567 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 568 BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane], 569 GlobalMaps[VectorLane], VLTS[VectorLane]); 570 571 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 572 return; 573 574 // Make the result available as vector value. 575 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 576 Value *Vector = UndefValue::get(VectorType); 577 578 for (int i = 0; i < VectorWidth; i++) 579 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 580 Builder.getInt32(i)); 581 582 VectorMap[Inst] = Vector; 583 } 584 585 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 586 587 void VectorBlockGenerator::copyInstruction(const Instruction *Inst, 588 ValueMapT &VectorMap, 589 VectorValueMapT &ScalarMaps) { 590 // Terminator instructions control the control flow. They are explicitly 591 // expressed in the clast and do not need to be copied. 592 if (Inst->isTerminator()) 593 return; 594 595 if (canSynthesize(Inst, &P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 596 &SE, &Statement.getParent()->getRegion())) 597 return; 598 599 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 600 generateLoad(Load, VectorMap, ScalarMaps); 601 return; 602 } 603 604 if (hasVectorOperands(Inst, VectorMap)) { 605 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 606 copyStore(Store, VectorMap, ScalarMaps); 607 return; 608 } 609 610 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 611 copyUnaryInst(Unary, VectorMap, ScalarMaps); 612 return; 613 } 614 615 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 616 copyBinaryInst(Binary, VectorMap, ScalarMaps); 617 return; 618 } 619 620 // Falltrough: We generate scalar instructions, if we don't know how to 621 // generate vector code. 622 } 623 624 copyInstScalarized(Inst, VectorMap, ScalarMaps); 625 } 626 627 void VectorBlockGenerator::copyBB() { 628 auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 629 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; 630 auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>(); 631 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; 632 633 BasicBlock *BB = Statement.getBasicBlock(); 634 BasicBlock *CopyBB = 635 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, LI); 636 CopyBB->setName("polly.stmt." + BB->getName()); 637 Builder.SetInsertPoint(CopyBB->begin()); 638 639 // Create two maps that store the mapping from the original instructions of 640 // the old basic block to their copies in the new basic block. Those maps 641 // are basic block local. 642 // 643 // As vector code generation is supported there is one map for scalar values 644 // and one for vector values. 645 // 646 // In case we just do scalar code generation, the vectorMap is not used and 647 // the scalarMap has just one dimension, which contains the mapping. 648 // 649 // In case vector code generation is done, an instruction may either appear 650 // in the vector map once (as it is calculating >vectorwidth< values at a 651 // time. Or (if the values are calculated using scalar operations), it 652 // appears once in every dimension of the scalarMap. 653 VectorValueMapT ScalarBlockMap(getVectorWidth()); 654 ValueMapT VectorBlockMap; 655 656 for (Instruction &Inst : *BB) 657 copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap); 658 } 659