1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/BlockGenerators.h" 18 #include "polly/CodeGen/CodeGeneration.h" 19 #include "polly/CodeGen/IslExprBuilder.h" 20 #include "polly/Options.h" 21 #include "polly/Support/GICHelper.h" 22 #include "polly/Support/SCEVValidator.h" 23 #include "polly/Support/ScopHelper.h" 24 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/RegionInfo.h" 27 #include "llvm/Analysis/ScalarEvolution.h" 28 #include "llvm/Analysis/ScalarEvolutionExpander.h" 29 30 #include "llvm/IR/IntrinsicInst.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 34 #include "isl/aff.h" 35 #include "isl/ast.h" 36 #include "isl/set.h" 37 #include "isl/ast_build.h" 38 39 #include <deque> 40 41 using namespace llvm; 42 using namespace polly; 43 44 static cl::opt<bool> Aligned("enable-polly-aligned", 45 cl::desc("Assumed aligned memory accesses."), 46 cl::Hidden, cl::init(false), cl::ZeroOrMore, 47 cl::cat(PollyCategory)); 48 49 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI, 50 ScalarEvolution *SE, const Region *R) { 51 if (!I || !SE->isSCEVable(I->getType())) 52 return false; 53 54 if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I))) 55 if (!isa<SCEVCouldNotCompute>(Scev)) 56 if (!hasScalarDepsInsideRegion(Scev, R)) 57 return true; 58 59 return false; 60 } 61 62 bool polly::isIgnoredIntrinsic(const Value *V) { 63 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 64 switch (IT->getIntrinsicID()) { 65 // Lifetime markers are supported/ignored. 66 case llvm::Intrinsic::lifetime_start: 67 case llvm::Intrinsic::lifetime_end: 68 // Invariant markers are supported/ignored. 69 case llvm::Intrinsic::invariant_start: 70 case llvm::Intrinsic::invariant_end: 71 // Some misc annotations are supported/ignored. 72 case llvm::Intrinsic::var_annotation: 73 case llvm::Intrinsic::ptr_annotation: 74 case llvm::Intrinsic::annotation: 75 case llvm::Intrinsic::donothing: 76 case llvm::Intrinsic::assume: 77 case llvm::Intrinsic::expect: 78 return true; 79 default: 80 break; 81 } 82 } 83 return false; 84 } 85 86 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 87 ScalarEvolution &SE, DominatorTree &DT, 88 IslExprBuilder *ExprBuilder) 89 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT) {} 90 91 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, 92 ValueMapT &BBMap, ValueMapT &GlobalMap, 93 LoopToScevMapT <S, Loop *L) const { 94 // We assume constants never change. 95 // This avoids map lookups for many calls to this function. 96 if (isa<Constant>(Old)) 97 return const_cast<Value *>(Old); 98 99 if (Value *New = GlobalMap.lookup(Old)) { 100 if (Old->getType()->getScalarSizeInBits() < 101 New->getType()->getScalarSizeInBits()) 102 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 103 104 return New; 105 } 106 107 if (Value *New = BBMap.lookup(Old)) 108 return New; 109 110 if (SE.isSCEVable(Old->getType())) 111 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 112 if (!isa<SCEVCouldNotCompute>(Scev)) { 113 const SCEV *NewScev = apply(Scev, LTS, SE); 114 ValueToValueMap VTV; 115 VTV.insert(BBMap.begin(), BBMap.end()); 116 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 117 NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); 118 SCEVExpander Expander(SE, Stmt.getParent() 119 ->getRegion() 120 .getEntry() 121 ->getParent() 122 ->getParent() 123 ->getDataLayout(), 124 "polly"); 125 Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), 126 Builder.GetInsertPoint()); 127 128 BBMap[Old] = Expanded; 129 return Expanded; 130 } 131 } 132 133 // A scop-constant value defined by a global or a function parameter. 134 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 135 return const_cast<Value *>(Old); 136 137 // A scop-constant value defined by an instruction executed outside the scop. 138 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 139 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 140 return const_cast<Value *>(Old); 141 142 // The scalar dependence is neither available nor SCEVCodegenable. 143 llvm_unreachable("Unexpected scalar dependence in region!"); 144 return nullptr; 145 } 146 147 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, 148 ValueMapT &BBMap, ValueMapT &GlobalMap, 149 LoopToScevMapT <S) { 150 // We do not generate debug intrinsics as we did not investigate how to 151 // copy them correctly. At the current state, they just crash the code 152 // generation as the meta-data operands are not correctly copied. 153 if (isa<DbgInfoIntrinsic>(Inst)) 154 return; 155 156 Instruction *NewInst = Inst->clone(); 157 158 // Replace old operands with the new ones. 159 for (Value *OldOperand : Inst->operands()) { 160 Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, 161 getLoopForInst(Inst)); 162 163 if (!NewOperand) { 164 assert(!isa<StoreInst>(NewInst) && 165 "Store instructions are always needed!"); 166 delete NewInst; 167 return; 168 } 169 170 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 171 } 172 173 Builder.Insert(NewInst); 174 BBMap[Inst] = NewInst; 175 176 if (!NewInst->getType()->isVoidTy()) 177 NewInst->setName("p_" + Inst->getName()); 178 } 179 180 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt, 181 const MemoryAccess &MA) { 182 isl_pw_multi_aff *PWAccRel; 183 isl_union_map *Schedule; 184 isl_ast_expr *Expr; 185 isl_ast_build *Build = Stmt.getAstBuild(); 186 187 assert(ExprBuilder && Build && 188 "Cannot generate new value without IslExprBuilder!"); 189 190 Schedule = isl_ast_build_get_schedule(Build); 191 PWAccRel = MA.applyScheduleToAccessRelation(Schedule); 192 193 Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 194 Expr = isl_ast_expr_address_of(Expr); 195 196 return ExprBuilder->create(Expr); 197 } 198 199 Value *BlockGenerator::generateLocationAccessed( 200 ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, 201 ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { 202 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 203 204 Value *NewPointer; 205 if (MA.hasNewAccessRelation()) 206 NewPointer = getNewAccessOperand(Stmt, MA); 207 else 208 NewPointer = 209 getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); 210 211 return NewPointer; 212 } 213 214 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 215 return LI.getLoopFor(Inst->getParent()); 216 } 217 218 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, 219 ValueMapT &BBMap, 220 ValueMapT &GlobalMap, 221 LoopToScevMapT <S) { 222 const Value *Pointer = Load->getPointerOperand(); 223 Value *NewPointer = 224 generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS); 225 Value *ScalarLoad = Builder.CreateAlignedLoad( 226 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 227 return ScalarLoad; 228 } 229 230 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt, 231 const StoreInst *Store, 232 ValueMapT &BBMap, 233 ValueMapT &GlobalMap, 234 LoopToScevMapT <S) { 235 const Value *Pointer = Store->getPointerOperand(); 236 Value *NewPointer = 237 generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); 238 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, 239 GlobalMap, LTS, getLoopForInst(Store)); 240 241 Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer, 242 Store->getAlignment()); 243 return NewStore; 244 } 245 246 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, 247 ValueMapT &BBMap, ValueMapT &GlobalMap, 248 LoopToScevMapT <S) { 249 // Terminator instructions control the control flow. They are explicitly 250 // expressed in the clast and do not need to be copied. 251 if (Inst->isTerminator()) 252 return; 253 254 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 255 return; 256 257 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 258 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); 259 // Compute NewLoad before its insertion in BBMap to make the insertion 260 // deterministic. 261 BBMap[Load] = NewLoad; 262 return; 263 } 264 265 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 266 Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); 267 // Compute NewStore before its insertion in BBMap to make the insertion 268 // deterministic. 269 BBMap[Store] = NewStore; 270 return; 271 } 272 273 // Skip some special intrinsics for which we do not adjust the semantics to 274 // the new schedule. All others are handled like every other instruction. 275 if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { 276 switch (IT->getIntrinsicID()) { 277 // Lifetime markers are ignored. 278 case llvm::Intrinsic::lifetime_start: 279 case llvm::Intrinsic::lifetime_end: 280 // Invariant markers are ignored. 281 case llvm::Intrinsic::invariant_start: 282 case llvm::Intrinsic::invariant_end: 283 // Some misc annotations are ignored. 284 case llvm::Intrinsic::var_annotation: 285 case llvm::Intrinsic::ptr_annotation: 286 case llvm::Intrinsic::annotation: 287 case llvm::Intrinsic::donothing: 288 case llvm::Intrinsic::assume: 289 case llvm::Intrinsic::expect: 290 return; 291 default: 292 // Other intrinsics are copied. 293 break; 294 } 295 } 296 297 copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); 298 } 299 300 void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 301 LoopToScevMapT <S) { 302 assert(Stmt.isBlockStmt() && 303 "Only block statements can be copied by the block generator"); 304 305 ValueMapT BBMap; 306 307 BasicBlock *BB = Stmt.getBasicBlock(); 308 copyBB(Stmt, BB, BBMap, GlobalMap, LTS); 309 } 310 311 BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { 312 BasicBlock *CopyBB = 313 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 314 CopyBB->setName("polly.stmt." + BB->getName()); 315 return CopyBB; 316 } 317 318 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, 319 ValueMapT &BBMap, ValueMapT &GlobalMap, 320 LoopToScevMapT <S) { 321 BasicBlock *CopyBB = splitBB(BB); 322 copyBB(Stmt, BB, CopyBB, BBMap, GlobalMap, LTS); 323 return CopyBB; 324 } 325 326 void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, 327 ValueMapT &BBMap, ValueMapT &GlobalMap, 328 LoopToScevMapT <S) { 329 Builder.SetInsertPoint(CopyBB->begin()); 330 for (Instruction &Inst : *BB) 331 copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); 332 } 333 334 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 335 VectorValueMapT &GlobalMaps, 336 std::vector<LoopToScevMapT> &VLTS, 337 isl_map *Schedule) 338 : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS), 339 Schedule(Schedule) { 340 assert(GlobalMaps.size() > 1 && "Only one vector lane found"); 341 assert(Schedule && "No statement domain provided"); 342 } 343 344 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old, 345 ValueMapT &VectorMap, 346 VectorValueMapT &ScalarMaps, 347 Loop *L) { 348 if (Value *NewValue = VectorMap.lookup(Old)) 349 return NewValue; 350 351 int Width = getVectorWidth(); 352 353 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 354 355 for (int Lane = 0; Lane < Width; Lane++) 356 Vector = Builder.CreateInsertElement( 357 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], 358 VLTS[Lane], L), 359 Builder.getInt32(Lane)); 360 361 VectorMap[Old] = Vector; 362 363 return Vector; 364 } 365 366 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 367 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 368 assert(PointerTy && "PointerType expected"); 369 370 Type *ScalarType = PointerTy->getElementType(); 371 VectorType *VectorType = VectorType::get(ScalarType, Width); 372 373 return PointerType::getUnqual(VectorType); 374 } 375 376 Value *VectorBlockGenerator::generateStrideOneLoad( 377 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, 378 bool NegativeStride = false) { 379 unsigned VectorWidth = getVectorWidth(); 380 const Value *Pointer = Load->getPointerOperand(); 381 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 382 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 383 384 Value *NewPointer = nullptr; 385 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 386 GlobalMaps[Offset], VLTS[Offset]); 387 Value *VectorPtr = 388 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 389 LoadInst *VecLoad = 390 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 391 if (!Aligned) 392 VecLoad->setAlignment(8); 393 394 if (NegativeStride) { 395 SmallVector<Constant *, 16> Indices; 396 for (int i = VectorWidth - 1; i >= 0; i--) 397 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 398 Constant *SV = llvm::ConstantVector::get(Indices); 399 Value *RevVecLoad = Builder.CreateShuffleVector( 400 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 401 return RevVecLoad; 402 } 403 404 return VecLoad; 405 } 406 407 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt, 408 const LoadInst *Load, 409 ValueMapT &BBMap) { 410 const Value *Pointer = Load->getPointerOperand(); 411 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 412 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 413 GlobalMaps[0], VLTS[0]); 414 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 415 Load->getName() + "_p_vec_p"); 416 LoadInst *ScalarLoad = 417 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 418 419 if (!Aligned) 420 ScalarLoad->setAlignment(8); 421 422 Constant *SplatVector = Constant::getNullValue( 423 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 424 425 Value *VectorLoad = Builder.CreateShuffleVector( 426 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 427 return VectorLoad; 428 } 429 430 Value *VectorBlockGenerator::generateUnknownStrideLoad( 431 ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) { 432 int VectorWidth = getVectorWidth(); 433 const Value *Pointer = Load->getPointerOperand(); 434 VectorType *VectorType = VectorType::get( 435 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 436 437 Value *Vector = UndefValue::get(VectorType); 438 439 for (int i = 0; i < VectorWidth; i++) { 440 Value *NewPointer = generateLocationAccessed( 441 Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 442 Value *ScalarLoad = 443 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 444 Vector = Builder.CreateInsertElement( 445 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 446 } 447 448 return Vector; 449 } 450 451 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, 452 ValueMapT &VectorMap, 453 VectorValueMapT &ScalarMaps) { 454 if (!VectorType::isValidElementType(Load->getType())) { 455 for (int i = 0; i < getVectorWidth(); i++) 456 ScalarMaps[i][Load] = 457 generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 458 return; 459 } 460 461 const MemoryAccess &Access = Stmt.getAccessFor(Load); 462 463 // Make sure we have scalar values available to access the pointer to 464 // the data location. 465 extractScalarValues(Load, VectorMap, ScalarMaps); 466 467 Value *NewLoad; 468 if (Access.isStrideZero(isl_map_copy(Schedule))) 469 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); 470 else if (Access.isStrideOne(isl_map_copy(Schedule))) 471 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); 472 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 473 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); 474 else 475 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); 476 477 VectorMap[Load] = NewLoad; 478 } 479 480 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, 481 const UnaryInstruction *Inst, 482 ValueMapT &VectorMap, 483 VectorValueMapT &ScalarMaps) { 484 int VectorWidth = getVectorWidth(); 485 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 486 ScalarMaps, getLoopForInst(Inst)); 487 488 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 489 490 const CastInst *Cast = dyn_cast<CastInst>(Inst); 491 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 492 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 493 } 494 495 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, 496 const BinaryOperator *Inst, 497 ValueMapT &VectorMap, 498 VectorValueMapT &ScalarMaps) { 499 Loop *L = getLoopForInst(Inst); 500 Value *OpZero = Inst->getOperand(0); 501 Value *OpOne = Inst->getOperand(1); 502 503 Value *NewOpZero, *NewOpOne; 504 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 505 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 506 507 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 508 Inst->getName() + "p_vec"); 509 VectorMap[Inst] = NewInst; 510 } 511 512 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, 513 ValueMapT &VectorMap, 514 VectorValueMapT &ScalarMaps) { 515 const MemoryAccess &Access = Stmt.getAccessFor(Store); 516 517 const Value *Pointer = Store->getPointerOperand(); 518 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 519 ScalarMaps, getLoopForInst(Store)); 520 521 // Make sure we have scalar values available to access the pointer to 522 // the data location. 523 extractScalarValues(Store, VectorMap, ScalarMaps); 524 525 if (Access.isStrideOne(isl_map_copy(Schedule))) { 526 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 527 Value *NewPointer = generateLocationAccessed( 528 Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); 529 530 Value *VectorPtr = 531 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 532 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 533 534 if (!Aligned) 535 Store->setAlignment(8); 536 } else { 537 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 538 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 539 Value *NewPointer = generateLocationAccessed( 540 Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); 541 Builder.CreateStore(Scalar, NewPointer); 542 } 543 } 544 } 545 546 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 547 ValueMapT &VectorMap) { 548 for (Value *Operand : Inst->operands()) 549 if (VectorMap.count(Operand)) 550 return true; 551 return false; 552 } 553 554 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 555 ValueMapT &VectorMap, 556 VectorValueMapT &ScalarMaps) { 557 bool HasVectorOperand = false; 558 int VectorWidth = getVectorWidth(); 559 560 for (Value *Operand : Inst->operands()) { 561 ValueMapT::iterator VecOp = VectorMap.find(Operand); 562 563 if (VecOp == VectorMap.end()) 564 continue; 565 566 HasVectorOperand = true; 567 Value *NewVector = VecOp->second; 568 569 for (int i = 0; i < VectorWidth; ++i) { 570 ValueMapT &SM = ScalarMaps[i]; 571 572 // If there is one scalar extracted, all scalar elements should have 573 // already been extracted by the code here. So no need to check for the 574 // existance of all of them. 575 if (SM.count(Operand)) 576 break; 577 578 SM[Operand] = 579 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 580 } 581 } 582 583 return HasVectorOperand; 584 } 585 586 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt, 587 const Instruction *Inst, 588 ValueMapT &VectorMap, 589 VectorValueMapT &ScalarMaps) { 590 bool HasVectorOperand; 591 int VectorWidth = getVectorWidth(); 592 593 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 594 595 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 596 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 597 GlobalMaps[VectorLane], VLTS[VectorLane]); 598 599 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 600 return; 601 602 // Make the result available as vector value. 603 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 604 Value *Vector = UndefValue::get(VectorType); 605 606 for (int i = 0; i < VectorWidth; i++) 607 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 608 Builder.getInt32(i)); 609 610 VectorMap[Inst] = Vector; 611 } 612 613 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } 614 615 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, 616 const Instruction *Inst, 617 ValueMapT &VectorMap, 618 VectorValueMapT &ScalarMaps) { 619 // Terminator instructions control the control flow. They are explicitly 620 // expressed in the clast and do not need to be copied. 621 if (Inst->isTerminator()) 622 return; 623 624 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 625 return; 626 627 if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 628 generateLoad(Stmt, Load, VectorMap, ScalarMaps); 629 return; 630 } 631 632 if (hasVectorOperands(Inst, VectorMap)) { 633 if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 634 copyStore(Stmt, Store, VectorMap, ScalarMaps); 635 return; 636 } 637 638 if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { 639 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 640 return; 641 } 642 643 if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { 644 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 645 return; 646 } 647 648 // Falltrough: We generate scalar instructions, if we don't know how to 649 // generate vector code. 650 } 651 652 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); 653 } 654 655 void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { 656 assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " 657 "the vector block generator"); 658 659 BasicBlock *BB = Stmt.getBasicBlock(); 660 BasicBlock *CopyBB = 661 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 662 CopyBB->setName("polly.stmt." + BB->getName()); 663 Builder.SetInsertPoint(CopyBB->begin()); 664 665 // Create two maps that store the mapping from the original instructions of 666 // the old basic block to their copies in the new basic block. Those maps 667 // are basic block local. 668 // 669 // As vector code generation is supported there is one map for scalar values 670 // and one for vector values. 671 // 672 // In case we just do scalar code generation, the vectorMap is not used and 673 // the scalarMap has just one dimension, which contains the mapping. 674 // 675 // In case vector code generation is done, an instruction may either appear 676 // in the vector map once (as it is calculating >vectorwidth< values at a 677 // time. Or (if the values are calculated using scalar operations), it 678 // appears once in every dimension of the scalarMap. 679 VectorValueMapT ScalarBlockMap(getVectorWidth()); 680 ValueMapT VectorBlockMap; 681 682 for (Instruction &Inst : *BB) 683 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); 684 } 685 686 BasicBlock *RegionGenerator::repairDominance( 687 BasicBlock *BB, BasicBlock *BBCopy, 688 DenseMap<BasicBlock *, BasicBlock *> &BlockMap) { 689 690 BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); 691 BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); 692 693 if (BBCopyIDom) 694 DT.changeImmediateDominator(BBCopy, BBCopyIDom); 695 696 return BBCopyIDom; 697 } 698 699 void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, 700 LoopToScevMapT <S) { 701 assert(Stmt.isRegionStmt() && 702 "Only region statements can be copied by the block generator"); 703 704 // The region represented by the statement. 705 Region *R = Stmt.getRegion(); 706 707 // The "BBMaps" for the whole region. 708 DenseMap<BasicBlock *, ValueMapT> RegionMaps; 709 710 // A map from old to new blocks in the region 711 DenseMap<BasicBlock *, BasicBlock *> BlockMap; 712 713 // Iterate over all blocks in the region in a breadth-first search. 714 std::deque<BasicBlock *> Blocks; 715 SmallPtrSet<BasicBlock *, 8> SeenBlocks; 716 Blocks.push_back(R->getEntry()); 717 SeenBlocks.insert(R->getEntry()); 718 719 while (!Blocks.empty()) { 720 BasicBlock *BB = Blocks.front(); 721 Blocks.pop_front(); 722 723 // First split the block and update dominance information. 724 BasicBlock *BBCopy = splitBB(BB); 725 BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy, BlockMap); 726 727 // Get the mapping for this block and initialize it with the mapping 728 // available at its immediate dominator (in the new region). 729 ValueMapT &RegionMap = RegionMaps[BBCopy]; 730 RegionMap = RegionMaps[BBCopyIDom]; 731 732 // Copy the block with the BlockGenerator. 733 copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); 734 735 // And continue with new successors inside the region. 736 for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) 737 if (R->contains(*SI) && SeenBlocks.insert(*SI).second) 738 Blocks.push_back(*SI); 739 740 // In order to remap PHI nodes we store also basic block mappings. 741 BlockMap[BB] = BBCopy; 742 } 743 744 // Now create a new dedicated region exit block and add it to the region map. 745 BasicBlock *ExitBBCopy = 746 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 747 ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".as.exit"); 748 BlockMap[R->getExit()] = ExitBBCopy; 749 750 repairDominance(R->getExit(), ExitBBCopy, BlockMap); 751 752 // As the block generator doesn't handle control flow we need to add the 753 // region control flow by hand after all blocks have been copied. 754 for (BasicBlock *BB : SeenBlocks) { 755 756 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 757 758 BasicBlock *BBCopy = BlockMap[BB]; 759 Instruction *BICopy = BBCopy->getTerminator(); 760 761 ValueMapT &RegionMap = RegionMaps[BBCopy]; 762 RegionMap.insert(BlockMap.begin(), BlockMap.end()); 763 764 Builder.SetInsertPoint(BBCopy); 765 copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); 766 BICopy->eraseFromParent(); 767 } 768 769 // Reset the old insert point for the build. 770 Builder.SetInsertPoint(ExitBBCopy->begin()); 771 } 772