1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "isl/aff.h" 18 #include "isl/ast.h" 19 #include "isl/ast_build.h" 20 #include "isl/set.h" 21 #include "polly/CodeGen/BlockGenerators.h" 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslExprBuilder.h" 24 #include "polly/Options.h" 25 #include "polly/Support/GICHelper.h" 26 #include "polly/Support/SCEVValidator.h" 27 #include "polly/Support/ScopHelper.h" 28 #include "llvm/Analysis/LoopInfo.h" 29 #include "llvm/Analysis/ScalarEvolution.h" 30 #include "llvm/Analysis/ScalarEvolutionExpander.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 34 using namespace llvm; 35 using namespace polly; 36 37 static cl::opt<bool> Aligned("enable-polly-aligned", 38 cl::desc("Assumed aligned memory accesses."), 39 cl::Hidden, cl::init(false), cl::ZeroOrMore, 40 cl::cat(PollyCategory)); 41 42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 43 ScalarEvolution *SE, const Region *R) { 44 if (!I || !SE->isSCEVable(I->getType())) 45 return false; 46 47 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 48 if (!isa<SCEVCouldNotCompute>(Scev)) 49 if (!hasScalarDepsInsideRegion(Scev, R)) 50 return true; 51 52 return false; 53 } 54 55 bool polly::isIgnoredIntrinsic(const Value *V) { 56 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 57 switch (IT->getIntrinsicID()) { 58 // Lifetime markers are supported/ignored. 59 case llvm::Intrinsic::lifetime_start: 60 case llvm::Intrinsic::lifetime_end: 61 // Invariant markers are supported/ignored. 62 case llvm::Intrinsic::invariant_start: 63 case llvm::Intrinsic::invariant_end: 64 // Some misc annotations are supported/ignored. 65 case llvm::Intrinsic::var_annotation: 66 case llvm::Intrinsic::ptr_annotation: 67 case llvm::Intrinsic::annotation: 68 case llvm::Intrinsic::donothing: 69 case llvm::Intrinsic::assume: 70 case llvm::Intrinsic::expect: 71 return true; 72 default: 73 break; 74 } 75 } 76 return false; 77 } 78 79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 80 ScalarEvolution &SE, DominatorTree &DT, 81 IslExprBuilder *ExprBuilder) 82 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT) {} 83 84 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, 85 ValueMapT &BBMap, ValueMapT &GlobalMap, 86 LoopToScevMapT <S, Loop *L) const { 87 // We assume constants never change. 88 // This avoids map lookups for many calls to this function. 89 if (isa<Constant>(Old)) 90 return const_cast<Value *>(Old); 91 92 if (Value *New = GlobalMap.lookup(Old)) { 93 if (Old->getType()->getScalarSizeInBits() < 94 New->getType()->getScalarSizeInBits()) 95 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 96 97 return New; 98 } 99 100 if (Value *New = BBMap.lookup(Old)) 101 return New; 102 103 if (SE.isSCEVable(Old->getType())) 104 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 105 if (!isa<SCEVCouldNotCompute>(Scev)) { 106 const SCEV *NewScev = apply(Scev, LTS, SE); 107 ValueToValueMap VTV; 108 VTV.insert(BBMap.begin(), BBMap.end()); 109 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 110 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 111 SCEVExpander Expander(SE, "polly"); 112 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 113 Builder.GetInsertPoint()); 114 115 BBMap[Old] = Expanded; 116 return Expanded; 117 } 118 } 119 120 // A scop-constant value defined by a global or a function parameter. 121 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 122 return const_cast<Value *>(Old); 123 124 // A scop-constant value defined by an instruction executed outside the scop. 125 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 126 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 127 return const_cast<Value *>(Old); 128 129 // The scalar dependence is neither available nor SCEVCodegenable. 130 llvm_unreachable("Unexpected scalar dependence in region!"); 131 return nullptr; 132 } 133 134 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, 135 ValueMapT &BBMap, ValueMapT &GlobalMap, 136 LoopToScevMapT <S) { 137 // We do not generate debug intrinsics as we did not investigate how to 138 // copy them correctly. At the current state, they just crash the code 139 // generation as the meta-data operands are not correctly copied. 140 if (isa<DbgInfoIntrinsic>(Inst)) 141 return; 142 143 Instruction *NewInst = Inst->clone(); 144 145 // Replace old operands with the new ones. 146 for (Value *OldOperand : Inst->operands()) { 147 Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, 148 getLoopForInst(Inst)); 149 150 if (!NewOperand) { 151 assert(!isa<StoreInst>(NewInst) && 152 "Store instructions are always needed!"); 153 delete NewInst; 154 return; 155 } 156 157 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 158 } 159 160 Builder.Insert(NewInst); 161 BBMap[Inst] = NewInst; 162 163 if (!NewInst->getType()->isVoidTy()) 164 NewInst->setName("p_" + Inst->getName()); 165 } 166 167 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt, 168 const MemoryAccess &MA) { 169 isl_pw_multi_aff *PWAccRel; 170 isl_union_map *Schedule; 171 isl_ast_expr *Expr; 172 isl_ast_build *Build = Stmt.getAstBuild(); 173 174 assert(ExprBuilder && Build && 175 "Cannot generate new value without IslExprBuilder!"); 176 177 Schedule = isl_ast_build_get_schedule(Build); 178 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 179 180 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 181 Expr = isl_ast_expr_address_of(Expr); 182 183 return ExprBuilder->create(Expr); 184 } 185 186 Value *BlockGenerator::generateLocationAccessed( 187 ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, 188 ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { 189 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 190 191 Value *NewPointer; 192 if (MA.hasNewAccessRelation()) 193 NewPointer = getNewAccessOperand(Stmt, MA); 194 else 195 NewPointer = 196 getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 197 198 return NewPointer; 199 } 200 201 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 202 return LI.getLoopFor(Inst->getParent()); 203 } 204 205 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, 206 ValueMapT &BBMap, 207 ValueMapT &GlobalMap, 208 LoopToScevMapT <S) { 209 const Value *Pointer = Load->getPointerOperand(); 210 Value *NewPointer = 211 generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); 212 Value *ScalarLoad = Builder.CreateAlignedLoad( 213 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 214 return ScalarLoad; 215 } 216 217 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt, 218 const StoreInst *Store, 219 ValueMapT &BBMap, 220 ValueMapT &GlobalMap, 221 LoopToScevMapT <S) { 222 const Value *Pointer = Store->getPointerOperand(); 223 Value *NewPointer = 224 generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); 225 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, 226 GlobalMap, LTS, getLoopForInst(Store)); 227 228 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 229 Store->getAlignment()); 230 return NewStore; 231 } 232 233 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, 234 ValueMapT &BBMap, ValueMapT &GlobalMap, 235 LoopToScevMapT <S) { 236 // Terminator instructions control the control flow. They are explicitly 237 // expressed in the clast and do not need to be copied. 238 if (Inst->isTerminator()) 239 return; 240 241 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 242 return; 243 244 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 245 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); 246 // Compute NewLoad before its insertion in BBMap to make the insertion 247 // deterministic. 248 BBMap[Load] = NewLoad; 249 return; 250 } 251 252 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 253 Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); 254 // Compute NewStore before its insertion in BBMap to make the insertion 255 // deterministic. 256 BBMap[Store] = NewStore; 257 return; 258 } 259 260 // Skip some special intrinsics for which we do not adjust the semantics to 261 // the new schedule. All others are handled like every other instruction. 262 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 263 switch (IT->getIntrinsicID()) { 264 // Lifetime markers are ignored. 265 case llvm::Intrinsic::lifetime_start: 266 case llvm::Intrinsic::lifetime_end: 267 // Invariant markers are ignored. 268 case llvm::Intrinsic::invariant_start: 269 case llvm::Intrinsic::invariant_end: 270 // Some misc annotations are ignored. 271 case llvm::Intrinsic::var_annotation: 272 case llvm::Intrinsic::ptr_annotation: 273 case llvm::Intrinsic::annotation: 274 case llvm::Intrinsic::donothing: 275 case llvm::Intrinsic::assume: 276 case llvm::Intrinsic::expect: 277 return; 278 default: 279 // Other intrinsics are copied. 280 break; 281 } 282 } 283 284 copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); 285 } 286 287 void BlockGenerator::copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, 288 LoopToScevMapT <S) { 289 BasicBlock *BB = Stmt.getBasicBlock(); 290 BasicBlock *CopyBB = 291 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 292 CopyBB->setName("polly.stmt." + BB->getName()); 293 Builder.SetInsertPoint(CopyBB->begin()); 294 295 ValueMapT BBMap; 296 297 for (Instruction &Inst : *BB) 298 copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); 299 } 300 301 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 302 VectorValueMapT &GlobalMaps, 303 std::vector<LoopToScevMapT> &VLTS, 304 isl_map *Schedule) 305 : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS), 306 Schedule(Schedule) { 307 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 308 assert(Schedule && "No statement domain provided"); 309 } 310 311 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old, 312 ValueMapT &VectorMap, 313 VectorValueMapT &ScalarMaps, 314 Loop *L) { 315 if (Value *NewValue = VectorMap.lookup(Old)) 316 return NewValue; 317 318 int Width = getVectorWidth(); 319 320 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 321 322 for (int Lane = 0; Lane < Width; Lane++) 323 Vector = Builder.CreateInsertElement( 324 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], 325 VLTS[Lane], L), 326 Builder.getInt32(Lane)); 327 328 VectorMap[Old] = Vector; 329 330 return Vector; 331 } 332 333 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 334 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 335 assert(PointerTy && "PointerType expected"); 336 337 Type *ScalarType = PointerTy->getElementType(); 338 VectorType *VectorType = VectorType::get(ScalarType, Width); 339 340 return PointerType::getUnqual(VectorType); 341 } 342 343 Value *VectorBlockGenerator::generateStrideOneLoad( 344 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, 345 bool NegativeStride = false) { 346 unsigned VectorWidth = getVectorWidth(); 347 const Value *Pointer = Load->getPointerOperand(); 348 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 349 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 350 351 Value *NewPointer = nullptr; 352 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 353 GlobalMaps[Offset], VLTS[Offset]); 354 Value *VectorPtr = 355 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 356 LoadInst *VecLoad = 357 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 358 if (!Aligned) 359 VecLoad->setAlignment(8); 360 361 if (NegativeStride) { 362 SmallVector<Constant *, 16> Indices; 363 for (int i = VectorWidth - 1; i >= 0; i--) 364 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 365 Constant *SV = llvm::ConstantVector::get(Indices); 366 Value *RevVecLoad = Builder.CreateShuffleVector( 367 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 368 return RevVecLoad; 369 } 370 371 return VecLoad; 372 } 373 374 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt, 375 const LoadInst *Load, 376 ValueMapT &BBMap) { 377 const Value *Pointer = Load->getPointerOperand(); 378 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 379 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 380 GlobalMaps[0], VLTS[0]); 381 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 382 Load->getName() + "_p_vec_p"); 383 LoadInst *ScalarLoad = 384 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 385 386 if (!Aligned) 387 ScalarLoad->setAlignment(8); 388 389 Constant *SplatVector = Constant::getNullValue( 390 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 391 392 Value *VectorLoad = Builder.CreateShuffleVector( 393 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 394 return VectorLoad; 395 } 396 397 Value *VectorBlockGenerator::generateUnknownStrideLoad( 398 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) { 399 int VectorWidth = getVectorWidth(); 400 const Value *Pointer = Load->getPointerOperand(); 401 VectorType *VectorType = VectorType::get( 402 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 403 404 Value *Vector = UndefValue::get(VectorType); 405 406 for (int i = 0; i < VectorWidth; i++) { 407 Value *NewPointer = generateLocationAccessed( 408 Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 409 Value *ScalarLoad = 410 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 411 Vector = Builder.CreateInsertElement( 412 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 413 } 414 415 return Vector; 416 } 417 418 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, 419 ValueMapT &VectorMap, 420 VectorValueMapT &ScalarMaps) { 421 if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || 422 !VectorType::isValidElementType(Load->getType())) { 423 for (int i = 0; i < getVectorWidth(); i++) 424 ScalarMaps[i][Load] = 425 generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 426 return; 427 } 428 429 const MemoryAccess &Access = Stmt.getAccessFor(Load); 430 431 // Make sure we have scalar values available to access the pointer to 432 // the data location. 433 extractScalarValues(Load, VectorMap, ScalarMaps); 434 435 Value *NewLoad; 436 if (Access.isStrideZero(isl_map_copy(Schedule))) 437 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); 438 else if (Access.isStrideOne(isl_map_copy(Schedule))) 439 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); 440 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 441 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); 442 else 443 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); 444 445 VectorMap[Load] = NewLoad; 446 } 447 448 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, 449 const UnaryInstruction *Inst, 450 ValueMapT &VectorMap, 451 VectorValueMapT &ScalarMaps) { 452 int VectorWidth = getVectorWidth(); 453 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 454 ScalarMaps, getLoopForInst(Inst)); 455 456 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 457 458 const CastInst *Cast = dyn_cast<CastInst>(Inst); 459 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 460 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 461 } 462 463 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, 464 const BinaryOperator *Inst, 465 ValueMapT &VectorMap, 466 VectorValueMapT &ScalarMaps) { 467 Loop *L = getLoopForInst(Inst); 468 Value *OpZero = Inst->getOperand(0); 469 Value *OpOne = Inst->getOperand(1); 470 471 Value *NewOpZero, *NewOpOne; 472 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 473 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 474 475 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 476 Inst->getName() + "p_vec"); 477 VectorMap[Inst] = NewInst; 478 } 479 480 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, 481 ValueMapT &VectorMap, 482 VectorValueMapT &ScalarMaps) { 483 const MemoryAccess &Access = Stmt.getAccessFor(Store); 484 485 const Value *Pointer = Store->getPointerOperand(); 486 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 487 ScalarMaps, getLoopForInst(Store)); 488 489 // Make sure we have scalar values available to access the pointer to 490 // the data location. 491 extractScalarValues(Store, VectorMap, ScalarMaps); 492 493 if (Access.isStrideOne(isl_map_copy(Schedule))) { 494 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 495 Value *NewPointer = generateLocationAccessed( 496 Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 497 498 Value *VectorPtr = 499 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 500 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 501 502 if (!Aligned) 503 Store->setAlignment(8); 504 } else { 505 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 506 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 507 Value *NewPointer = generateLocationAccessed( 508 Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 509 Builder.CreateStore(Scalar, NewPointer); 510 } 511 } 512 } 513 514 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 515 ValueMapT &VectorMap) { 516 for (Value *Operand : Inst->operands()) 517 if (VectorMap.count(Operand)) 518 return true; 519 return false; 520 } 521 522 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 523 ValueMapT &VectorMap, 524 VectorValueMapT &ScalarMaps) { 525 bool HasVectorOperand = false; 526 int VectorWidth = getVectorWidth(); 527 528 for (Value *Operand : Inst->operands()) { 529 ValueMapT::iterator VecOp = VectorMap.find(Operand); 530 531 if (VecOp == VectorMap.end()) 532 continue; 533 534 HasVectorOperand = true; 535 Value *NewVector = VecOp->second; 536 537 for (int i = 0; i < VectorWidth; ++i) { 538 ValueMapT &SM = ScalarMaps[i]; 539 540 // If there is one scalar extracted, all scalar elements should have 541 // already been extracted by the code here. So no need to check for the 542 // existance of all of them. 543 if (SM.count(Operand)) 544 break; 545 546 SM[Operand] = 547 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 548 } 549 } 550 551 return HasVectorOperand; 552 } 553 554 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt, 555 const Instruction *Inst, 556 ValueMapT &VectorMap, 557 VectorValueMapT &ScalarMaps) { 558 bool HasVectorOperand; 559 int VectorWidth = getVectorWidth(); 560 561 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 562 563 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 564 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 565 GlobalMaps[VectorLane], VLTS[VectorLane]); 566 567 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 568 return; 569 570 // Make the result available as vector value. 571 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 572 Value *Vector = UndefValue::get(VectorType); 573 574 for (int i = 0; i < VectorWidth; i++) 575 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 576 Builder.getInt32(i)); 577 578 VectorMap[Inst] = Vector; 579 } 580 581 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 582 583 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, 584 const Instruction *Inst, 585 ValueMapT &VectorMap, 586 VectorValueMapT &ScalarMaps) { 587 // Terminator instructions control the control flow. They are explicitly 588 // expressed in the clast and do not need to be copied. 589 if (Inst->isTerminator()) 590 return; 591 592 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 593 return; 594 595 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 596 generateLoad(Stmt, Load, VectorMap, ScalarMaps); 597 return; 598 } 599 600 if (hasVectorOperands(Inst, VectorMap)) { 601 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 602 copyStore(Stmt, Store, VectorMap, ScalarMaps); 603 return; 604 } 605 606 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 607 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 608 return; 609 } 610 611 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 612 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 613 return; 614 } 615 616 // Falltrough: We generate scalar instructions, if we don't know how to 617 // generate vector code. 618 } 619 620 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); 621 } 622 623 void VectorBlockGenerator::copyBB(ScopStmt &Stmt) { 624 BasicBlock *BB = Stmt.getBasicBlock(); 625 BasicBlock *CopyBB = 626 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 627 CopyBB->setName("polly.stmt." + BB->getName()); 628 Builder.SetInsertPoint(CopyBB->begin()); 629 630 // Create two maps that store the mapping from the original instructions of 631 // the old basic block to their copies in the new basic block. Those maps 632 // are basic block local. 633 // 634 // As vector code generation is supported there is one map for scalar values 635 // and one for vector values. 636 // 637 // In case we just do scalar code generation, the vectorMap is not used and 638 // the scalarMap has just one dimension, which contains the mapping. 639 // 640 // In case vector code generation is done, an instruction may either appear 641 // in the vector map once (as it is calculating >vectorwidth< values at a 642 // time. Or (if the values are calculated using scalar operations), it 643 // appears once in every dimension of the scalarMap. 644 VectorValueMapT ScalarBlockMap(getVectorWidth()); 645 ValueMapT VectorBlockMap; 646 647 for (Instruction &Inst : *BB) 648 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); 649 } 650