1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/ast.h" 19 #include "isl/ast_build.h" 20 #include "isl/set.h" 21 #include "polly/CodeGen/BlockGenerators.h" 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslExprBuilder.h" 24 #include "polly/Options.h" 25 #include "polly/Support/GICHelper.h" 26 #include "polly/Support/SCEVValidator.h" 27 #include "polly/Support/ScopHelper.h" 28 #include "llvm/Analysis/LoopInfo.h" 29 #include "llvm/Analysis/ScalarEvolution.h" 30 #include "llvm/Analysis/ScalarEvolutionExpander.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 34 using namespace llvm; 35 using namespace polly; 36 37 static cl::opt<bool> Aligned("enable-polly-aligned", 38 cl::desc("Assumed aligned memory accesses."), 39 cl::Hidden, cl::init(false), cl::ZeroOrMore, 40 cl::cat(PollyCategory)); 41 42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 43 ScalarEvolution *SE, const Region *R) { 44 if (!I || !SE->isSCEVable(I->getType())) 45 return false; 46 47 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 48 if (!isa<SCEVCouldNotCompute>(Scev)) 49 if (!hasScalarDepsInsideRegion(Scev, R)) 50 return true; 51 52 return false; 53 } 54 55 bool polly::isIgnoredIntrinsic(const Value *V) { 56 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 57 switch (IT->getIntrinsicID()) { 58 // Lifetime markers are supported/ignored. 59 case llvm::Intrinsic::lifetime_start: 60 case llvm::Intrinsic::lifetime_end: 61 // Invariant markers are supported/ignored. 62 case llvm::Intrinsic::invariant_start: 63 case llvm::Intrinsic::invariant_end: 64 // Some misc annotations are supported/ignored. 65 case llvm::Intrinsic::var_annotation: 66 case llvm::Intrinsic::ptr_annotation: 67 case llvm::Intrinsic::annotation: 68 case llvm::Intrinsic::donothing: 69 case llvm::Intrinsic::assume: 70 case llvm::Intrinsic::expect: 71 return true; 72 default: 73 break; 74 } 75 } 76 return false; 77 } 78 79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, Pass *P, LoopInfo &LI, 80 ScalarEvolution &SE, IslExprBuilder *ExprBuilder) 81 : Builder(B), P(P), LI(LI), SE(SE), ExprBuilder(ExprBuilder) {} 82 83 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, 84 ValueMapT &BBMap, ValueMapT &GlobalMap, 85 LoopToScevMapT <S, Loop *L) const { 86 // We assume constants never change. 87 // This avoids map lookups for many calls to this function. 88 if (isa<Constant>(Old)) 89 return const_cast<Value *>(Old); 90 91 if (Value *New = GlobalMap.lookup(Old)) { 92 if (Old->getType()->getScalarSizeInBits() < 93 New->getType()->getScalarSizeInBits()) 94 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 95 96 return New; 97 } 98 99 if (Value *New = BBMap.lookup(Old)) 100 return New; 101 102 if (SE.isSCEVable(Old->getType())) 103 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 104 if (!isa<SCEVCouldNotCompute>(Scev)) { 105 const SCEV *NewScev = apply(Scev, LTS, SE); 106 ValueToValueMap VTV; 107 VTV.insert(BBMap.begin(), BBMap.end()); 108 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 109 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 110 SCEVExpander Expander(SE, "polly"); 111 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 112 Builder.GetInsertPoint()); 113 114 BBMap[Old] = Expanded; 115 return Expanded; 116 } 117 } 118 119 // A scop-constant value defined by a global or a function parameter. 120 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 121 return const_cast<Value *>(Old); 122 123 // A scop-constant value defined by an instruction executed outside the scop. 124 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 125 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 126 return const_cast<Value *>(Old); 127 128 // The scalar dependence is neither available nor SCEVCodegenable. 129 llvm_unreachable("Unexpected scalar dependence in region!"); 130 return nullptr; 131 } 132 133 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, 134 ValueMapT &BBMap, ValueMapT &GlobalMap, 135 LoopToScevMapT <S) { 136 // We do not generate debug intrinsics as we did not investigate how to 137 // copy them correctly. At the current state, they just crash the code 138 // generation as the meta-data operands are not correctly copied. 139 if (isa<DbgInfoIntrinsic>(Inst)) 140 return; 141 142 Instruction *NewInst = Inst->clone(); 143 144 // Replace old operands with the new ones. 145 for (Value *OldOperand : Inst->operands()) { 146 Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, 147 getLoopForInst(Inst)); 148 149 if (!NewOperand) { 150 assert(!isa<StoreInst>(NewInst) && 151 "Store instructions are always needed!"); 152 delete NewInst; 153 return; 154 } 155 156 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 157 } 158 159 Builder.Insert(NewInst); 160 BBMap[Inst] = NewInst; 161 162 if (!NewInst->getType()->isVoidTy()) 163 NewInst->setName("p_" + Inst->getName()); 164 } 165 166 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt, 167 const MemoryAccess &MA) { 168 isl_pw_multi_aff *PWAccRel; 169 isl_union_map *Schedule; 170 isl_ast_expr *Expr; 171 isl_ast_build *Build = Stmt.getAstBuild(); 172 173 assert(ExprBuilder && Build && 174 "Cannot generate new value without IslExprBuilder!"); 175 176 Schedule = isl_ast_build_get_schedule(Build); 177 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 178 179 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 180 Expr = isl_ast_expr_address_of(Expr); 181 182 return ExprBuilder->create(Expr); 183 } 184 185 Value *BlockGenerator::generateLocationAccessed( 186 ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, 187 ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { 188 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 189 190 Value *NewPointer; 191 if (MA.hasNewAccessRelation()) 192 NewPointer = getNewAccessOperand(Stmt, MA); 193 else 194 NewPointer = 195 getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 196 197 return NewPointer; 198 } 199 200 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 201 return LI.getLoopFor(Inst->getParent()); 202 } 203 204 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, 205 ValueMapT &BBMap, 206 ValueMapT &GlobalMap, 207 LoopToScevMapT <S) { 208 const Value *Pointer = Load->getPointerOperand(); 209 Value *NewPointer = 210 generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); 211 Value *ScalarLoad = Builder.CreateAlignedLoad( 212 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 213 return ScalarLoad; 214 } 215 216 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt, 217 const StoreInst *Store, 218 ValueMapT &BBMap, 219 ValueMapT &GlobalMap, 220 LoopToScevMapT <S) { 221 const Value *Pointer = Store->getPointerOperand(); 222 Value *NewPointer = 223 generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); 224 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, 225 GlobalMap, LTS, getLoopForInst(Store)); 226 227 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 228 Store->getAlignment()); 229 return NewStore; 230 } 231 232 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, 233 ValueMapT &BBMap, ValueMapT &GlobalMap, 234 LoopToScevMapT <S) { 235 // Terminator instructions control the control flow. They are explicitly 236 // expressed in the clast and do not need to be copied. 237 if (Inst->isTerminator()) 238 return; 239 240 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 241 return; 242 243 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 244 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); 245 // Compute NewLoad before its insertion in BBMap to make the insertion 246 // deterministic. 247 BBMap[Load] = NewLoad; 248 return; 249 } 250 251 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 252 Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); 253 // Compute NewStore before its insertion in BBMap to make the insertion 254 // deterministic. 255 BBMap[Store] = NewStore; 256 return; 257 } 258 259 // Skip some special intrinsics for which we do not adjust the semantics to 260 // the new schedule. All others are handled like every other instruction. 261 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 262 switch (IT->getIntrinsicID()) { 263 // Lifetime markers are ignored. 264 case llvm::Intrinsic::lifetime_start: 265 case llvm::Intrinsic::lifetime_end: 266 // Invariant markers are ignored. 267 case llvm::Intrinsic::invariant_start: 268 case llvm::Intrinsic::invariant_end: 269 // Some misc annotations are ignored. 270 case llvm::Intrinsic::var_annotation: 271 case llvm::Intrinsic::ptr_annotation: 272 case llvm::Intrinsic::annotation: 273 case llvm::Intrinsic::donothing: 274 case llvm::Intrinsic::assume: 275 case llvm::Intrinsic::expect: 276 return; 277 default: 278 // Other intrinsics are copied. 279 break; 280 } 281 } 282 283 copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); 284 } 285 286 void BlockGenerator::copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, 287 LoopToScevMapT <S) { 288 auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 289 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; 290 291 BasicBlock *BB = Stmt.getBasicBlock(); 292 BasicBlock *CopyBB = 293 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, &LI); 294 CopyBB->setName("polly.stmt." + BB->getName()); 295 Builder.SetInsertPoint(CopyBB->begin()); 296 297 ValueMapT BBMap; 298 299 for (Instruction &Inst : *BB) 300 copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); 301 } 302 303 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 304 VectorValueMapT &GlobalMaps, 305 std::vector<LoopToScevMapT> &VLTS, 306 isl_map *Schedule) 307 : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS), 308 Schedule(Schedule) { 309 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 310 assert(Schedule && "No statement domain provided"); 311 } 312 313 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old, 314 ValueMapT &VectorMap, 315 VectorValueMapT &ScalarMaps, 316 Loop *L) { 317 if (Value *NewValue = VectorMap.lookup(Old)) 318 return NewValue; 319 320 int Width = getVectorWidth(); 321 322 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 323 324 for (int Lane = 0; Lane < Width; Lane++) 325 Vector = Builder.CreateInsertElement( 326 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], 327 VLTS[Lane], L), 328 Builder.getInt32(Lane)); 329 330 VectorMap[Old] = Vector; 331 332 return Vector; 333 } 334 335 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 336 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 337 assert(PointerTy && "PointerType expected"); 338 339 Type *ScalarType = PointerTy->getElementType(); 340 VectorType *VectorType = VectorType::get(ScalarType, Width); 341 342 return PointerType::getUnqual(VectorType); 343 } 344 345 Value *VectorBlockGenerator::generateStrideOneLoad( 346 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, 347 bool NegativeStride = false) { 348 unsigned VectorWidth = getVectorWidth(); 349 const Value *Pointer = Load->getPointerOperand(); 350 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 351 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 352 353 Value *NewPointer = nullptr; 354 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 355 GlobalMaps[Offset], VLTS[Offset]); 356 Value *VectorPtr = 357 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 358 LoadInst *VecLoad = 359 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 360 if (!Aligned) 361 VecLoad->setAlignment(8); 362 363 if (NegativeStride) { 364 SmallVector<Constant *, 16> Indices; 365 for (int i = VectorWidth - 1; i >= 0; i--) 366 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 367 Constant *SV = llvm::ConstantVector::get(Indices); 368 Value *RevVecLoad = Builder.CreateShuffleVector( 369 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 370 return RevVecLoad; 371 } 372 373 return VecLoad; 374 } 375 376 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt, 377 const LoadInst *Load, 378 ValueMapT &BBMap) { 379 const Value *Pointer = Load->getPointerOperand(); 380 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 381 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 382 GlobalMaps[0], VLTS[0]); 383 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 384 Load->getName() + "_p_vec_p"); 385 LoadInst *ScalarLoad = 386 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 387 388 if (!Aligned) 389 ScalarLoad->setAlignment(8); 390 391 Constant *SplatVector = Constant::getNullValue( 392 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 393 394 Value *VectorLoad = Builder.CreateShuffleVector( 395 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 396 return VectorLoad; 397 } 398 399 Value *VectorBlockGenerator::generateUnknownStrideLoad( 400 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) { 401 int VectorWidth = getVectorWidth(); 402 const Value *Pointer = Load->getPointerOperand(); 403 VectorType *VectorType = VectorType::get( 404 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 405 406 Value *Vector = UndefValue::get(VectorType); 407 408 for (int i = 0; i < VectorWidth; i++) { 409 Value *NewPointer = generateLocationAccessed( 410 Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 411 Value *ScalarLoad = 412 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 413 Vector = Builder.CreateInsertElement( 414 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 415 } 416 417 return Vector; 418 } 419 420 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, 421 ValueMapT &VectorMap, 422 VectorValueMapT &ScalarMaps) { 423 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 424 !VectorType::isValidElementType(Load->getType())) { 425 for (int i = 0; i < getVectorWidth(); i++) 426 ScalarMaps[i][Load] = 427 generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 428 return; 429 } 430 431 const MemoryAccess &Access = Stmt.getAccessFor(Load); 432 433 // Make sure we have scalar values available to access the pointer to 434 // the data location. 435 extractScalarValues(Load, VectorMap, ScalarMaps); 436 437 Value *NewLoad; 438 if (Access.isStrideZero(isl_map_copy(Schedule))) 439 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); 440 else if (Access.isStrideOne(isl_map_copy(Schedule))) 441 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); 442 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 443 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); 444 else 445 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); 446 447 VectorMap[Load] = NewLoad; 448 } 449 450 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, 451 const UnaryInstruction *Inst, 452 ValueMapT &VectorMap, 453 VectorValueMapT &ScalarMaps) { 454 int VectorWidth = getVectorWidth(); 455 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 456 ScalarMaps, getLoopForInst(Inst)); 457 458 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 459 460 const CastInst *Cast = dyn_cast<CastInst>(Inst); 461 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 462 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 463 } 464 465 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, 466 const BinaryOperator *Inst, 467 ValueMapT &VectorMap, 468 VectorValueMapT &ScalarMaps) { 469 Loop *L = getLoopForInst(Inst); 470 Value *OpZero = Inst->getOperand(0); 471 Value *OpOne = Inst->getOperand(1); 472 473 Value *NewOpZero, *NewOpOne; 474 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 475 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 476 477 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 478 Inst->getName() + "p_vec"); 479 VectorMap[Inst] = NewInst; 480 } 481 482 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, 483 ValueMapT &VectorMap, 484 VectorValueMapT &ScalarMaps) { 485 const MemoryAccess &Access = Stmt.getAccessFor(Store); 486 487 const Value *Pointer = Store->getPointerOperand(); 488 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 489 ScalarMaps, getLoopForInst(Store)); 490 491 // Make sure we have scalar values available to access the pointer to 492 // the data location. 493 extractScalarValues(Store, VectorMap, ScalarMaps); 494 495 if (Access.isStrideOne(isl_map_copy(Schedule))) { 496 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 497 Value *NewPointer = generateLocationAccessed( 498 Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 499 500 Value *VectorPtr = 501 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 502 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 503 504 if (!Aligned) 505 Store->setAlignment(8); 506 } else { 507 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 508 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 509 Value *NewPointer = generateLocationAccessed( 510 Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 511 Builder.CreateStore(Scalar, NewPointer); 512 } 513 } 514 } 515 516 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 517 ValueMapT &VectorMap) { 518 for (Value *Operand : Inst->operands()) 519 if (VectorMap.count(Operand)) 520 return true; 521 return false; 522 } 523 524 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 525 ValueMapT &VectorMap, 526 VectorValueMapT &ScalarMaps) { 527 bool HasVectorOperand = false; 528 int VectorWidth = getVectorWidth(); 529 530 for (Value *Operand : Inst->operands()) { 531 ValueMapT::iterator VecOp = VectorMap.find(Operand); 532 533 if (VecOp == VectorMap.end()) 534 continue; 535 536 HasVectorOperand = true; 537 Value *NewVector = VecOp->second; 538 539 for (int i = 0; i < VectorWidth; ++i) { 540 ValueMapT &SM = ScalarMaps[i]; 541 542 // If there is one scalar extracted, all scalar elements should have 543 // already been extracted by the code here. So no need to check for the 544 // existance of all of them. 545 if (SM.count(Operand)) 546 break; 547 548 SM[Operand] = 549 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 550 } 551 } 552 553 return HasVectorOperand; 554 } 555 556 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt, 557 const Instruction *Inst, 558 ValueMapT &VectorMap, 559 VectorValueMapT &ScalarMaps) { 560 bool HasVectorOperand; 561 int VectorWidth = getVectorWidth(); 562 563 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 564 565 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 566 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 567 GlobalMaps[VectorLane], VLTS[VectorLane]); 568 569 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 570 return; 571 572 // Make the result available as vector value. 573 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 574 Value *Vector = UndefValue::get(VectorType); 575 576 for (int i = 0; i < VectorWidth; i++) 577 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 578 Builder.getInt32(i)); 579 580 VectorMap[Inst] = Vector; 581 } 582 583 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 584 585 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, 586 const Instruction *Inst, 587 ValueMapT &VectorMap, 588 VectorValueMapT &ScalarMaps) { 589 // Terminator instructions control the control flow. They are explicitly 590 // expressed in the clast and do not need to be copied. 591 if (Inst->isTerminator()) 592 return; 593 594 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 595 return; 596 597 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 598 generateLoad(Stmt, Load, VectorMap, ScalarMaps); 599 return; 600 } 601 602 if (hasVectorOperands(Inst, VectorMap)) { 603 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 604 copyStore(Stmt, Store, VectorMap, ScalarMaps); 605 return; 606 } 607 608 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 609 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 610 return; 611 } 612 613 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 614 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 615 return; 616 } 617 618 // Falltrough: We generate scalar instructions, if we don't know how to 619 // generate vector code. 620 } 621 622 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); 623 } 624 625 void VectorBlockGenerator::copyBB(ScopStmt &Stmt) { 626 auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 627 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; 628 629 BasicBlock *BB = Stmt.getBasicBlock(); 630 BasicBlock *CopyBB = 631 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, &LI); 632 CopyBB->setName("polly.stmt." + BB->getName()); 633 Builder.SetInsertPoint(CopyBB->begin()); 634 635 // Create two maps that store the mapping from the original instructions of 636 // the old basic block to their copies in the new basic block. Those maps 637 // are basic block local. 638 // 639 // As vector code generation is supported there is one map for scalar values 640 // and one for vector values. 641 // 642 // In case we just do scalar code generation, the vectorMap is not used and 643 // the scalarMap has just one dimension, which contains the mapping. 644 // 645 // In case vector code generation is done, an instruction may either appear 646 // in the vector map once (as it is calculating >vectorwidth< values at a 647 // time. Or (if the values are calculated using scalar operations), it 648 // appears once in every dimension of the scalarMap. 649 VectorValueMapT ScalarBlockMap(getVectorWidth()); 650 ValueMapT VectorBlockMap; 651 652 for (Instruction &Inst : *BB) 653 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); 654 } 655