1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/CodeGen/BlockGenerators.h" 17 #include "polly/CodeGen/CodeGeneration.h" 18 #include "polly/CodeGen/IslExprBuilder.h" 19 #include "polly/CodeGen/RuntimeDebugBuilder.h" 20 #include "polly/Options.h" 21 #include "polly/ScopInfo.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/RegionInfo.h" 27 #include "llvm/Analysis/ScalarEvolution.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/Local.h" 32 #include "isl/aff.h" 33 #include "isl/ast.h" 34 #include "isl/ast_build.h" 35 #include "isl/set.h" 36 #include <deque> 37 38 using namespace llvm; 39 using namespace polly; 40 41 static cl::opt<bool> Aligned("enable-polly-aligned", 42 cl::desc("Assumed aligned memory accesses."), 43 cl::Hidden, cl::init(false), cl::ZeroOrMore, 44 cl::cat(PollyCategory)); 45 46 static cl::opt<bool> DebugPrinting( 47 "polly-codegen-add-debug-printing", 48 cl::desc("Add printf calls that show the values loaded/stored."), 49 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 50 51 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 52 ScalarEvolution &SE, DominatorTree &DT, 53 ScalarAllocaMapTy &ScalarMap, 54 ScalarAllocaMapTy &PHIOpMap, 55 EscapeUsersAllocaMapTy &EscapeMap, 56 ValueMapT &GlobalMap, 57 IslExprBuilder *ExprBuilder) 58 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), 59 EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), 60 EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} 61 62 Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, 63 ValueMapT &BBMap, 64 LoopToScevMapT <S, 65 Loop *L) const { 66 if (!SE.isSCEVable(Old->getType())) 67 return nullptr; 68 69 const SCEV *Scev = SE.getSCEVAtScope(Old, L); 70 if (!Scev) 71 return nullptr; 72 73 if (isa<SCEVCouldNotCompute>(Scev)) 74 return nullptr; 75 76 const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE); 77 ValueMapT VTV; 78 VTV.insert(BBMap.begin(), BBMap.end()); 79 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 80 81 Scop &S = *Stmt.getParent(); 82 const DataLayout &DL = S.getFunction().getParent()->getDataLayout(); 83 auto IP = Builder.GetInsertPoint(); 84 85 assert(IP != Builder.GetInsertBlock()->end() && 86 "Only instructions can be insert points for SCEVExpander"); 87 Value *Expanded = 88 expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV); 89 90 BBMap[Old] = Expanded; 91 return Expanded; 92 } 93 94 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 95 LoopToScevMapT <S, Loop *L) const { 96 // Constants that do not reference any named value can always remain 97 // unchanged. Handle them early to avoid expensive map lookups. We do not take 98 // the fast-path for external constants which are referenced through globals 99 // as these may need to be rewritten when distributing code accross different 100 // LLVM modules. 101 if (isa<Constant>(Old) && !isa<GlobalValue>(Old)) 102 return Old; 103 104 // Inline asm is like a constant to us. 105 if (isa<InlineAsm>(Old)) 106 return Old; 107 108 if (Value *New = GlobalMap.lookup(Old)) { 109 if (Value *NewRemapped = GlobalMap.lookup(New)) 110 New = NewRemapped; 111 if (Old->getType()->getScalarSizeInBits() < 112 New->getType()->getScalarSizeInBits()) 113 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 114 115 return New; 116 } 117 118 if (Value *New = BBMap.lookup(Old)) 119 return New; 120 121 if (Value *New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L)) 122 return New; 123 124 // A scop-constant value defined by a global or a function parameter. 125 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 126 return Old; 127 128 // A scop-constant value defined by an instruction executed outside the scop. 129 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 130 if (!Stmt.getParent()->contains(Inst->getParent())) 131 return Old; 132 133 // The scalar dependence is neither available nor SCEVCodegenable. 134 llvm_unreachable("Unexpected scalar dependence in region!"); 135 return nullptr; 136 } 137 138 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst, 139 ValueMapT &BBMap, LoopToScevMapT <S) { 140 // We do not generate debug intrinsics as we did not investigate how to 141 // copy them correctly. At the current state, they just crash the code 142 // generation as the meta-data operands are not correctly copied. 143 if (isa<DbgInfoIntrinsic>(Inst)) 144 return; 145 146 Instruction *NewInst = Inst->clone(); 147 148 // Replace old operands with the new ones. 149 for (Value *OldOperand : Inst->operands()) { 150 Value *NewOperand = 151 getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt)); 152 153 if (!NewOperand) { 154 assert(!isa<StoreInst>(NewInst) && 155 "Store instructions are always needed!"); 156 delete NewInst; 157 return; 158 } 159 160 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 161 } 162 163 Builder.Insert(NewInst); 164 BBMap[Inst] = NewInst; 165 166 if (!NewInst->getType()->isVoidTy()) 167 NewInst->setName("p_" + Inst->getName()); 168 } 169 170 Value * 171 BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst, 172 ValueMapT &BBMap, LoopToScevMapT <S, 173 isl_id_to_ast_expr *NewAccesses) { 174 const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst); 175 176 isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, MA.getId()); 177 178 if (AccessExpr) { 179 AccessExpr = isl_ast_expr_address_of(AccessExpr); 180 auto Address = ExprBuilder->create(AccessExpr); 181 182 // Cast the address of this memory access to a pointer type that has the 183 // same element type as the original access, but uses the address space of 184 // the newly generated pointer. 185 auto OldPtrTy = MA.getAccessValue()->getType()->getPointerTo(); 186 auto NewPtrTy = Address->getType(); 187 OldPtrTy = PointerType::get(OldPtrTy->getElementType(), 188 NewPtrTy->getPointerAddressSpace()); 189 190 if (OldPtrTy != NewPtrTy) 191 Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy); 192 return Address; 193 } 194 195 return getNewValue(Stmt, Inst.getPointerOperand(), BBMap, LTS, 196 getLoopForStmt(Stmt)); 197 } 198 199 Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const { 200 auto *StmtBB = Stmt.getEntryBlock(); 201 return LI.getLoopFor(StmtBB); 202 } 203 204 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, LoadInst *Load, 205 ValueMapT &BBMap, LoopToScevMapT <S, 206 isl_id_to_ast_expr *NewAccesses) { 207 if (Value *PreloadLoad = GlobalMap.lookup(Load)) 208 return PreloadLoad; 209 210 Value *NewPointer = 211 generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses); 212 Value *ScalarLoad = Builder.CreateAlignedLoad( 213 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 214 215 if (DebugPrinting) 216 RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer, 217 ": ", ScalarLoad, "\n"); 218 219 return ScalarLoad; 220 } 221 222 void BlockGenerator::generateScalarStore(ScopStmt &Stmt, StoreInst *Store, 223 ValueMapT &BBMap, LoopToScevMapT <S, 224 isl_id_to_ast_expr *NewAccesses) { 225 Value *NewPointer = 226 generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses); 227 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, LTS, 228 getLoopForStmt(Stmt)); 229 230 if (DebugPrinting) 231 RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer, 232 ": ", ValueOperand, "\n"); 233 234 Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); 235 } 236 237 bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) { 238 Loop *L = getLoopForStmt(Stmt); 239 return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && 240 canSynthesize(Inst, *Stmt.getParent(), &LI, &SE, L); 241 } 242 243 void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, 244 ValueMapT &BBMap, LoopToScevMapT <S, 245 isl_id_to_ast_expr *NewAccesses) { 246 // Terminator instructions control the control flow. They are explicitly 247 // expressed in the clast and do not need to be copied. 248 if (Inst->isTerminator()) 249 return; 250 251 // Synthesizable statements will be generated on-demand. 252 if (canSyntheziseInStmt(Stmt, Inst)) 253 return; 254 255 if (auto *Load = dyn_cast<LoadInst>(Inst)) { 256 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, LTS, NewAccesses); 257 // Compute NewLoad before its insertion in BBMap to make the insertion 258 // deterministic. 259 BBMap[Load] = NewLoad; 260 return; 261 } 262 263 if (auto *Store = dyn_cast<StoreInst>(Inst)) { 264 generateScalarStore(Stmt, Store, BBMap, LTS, NewAccesses); 265 return; 266 } 267 268 if (auto *PHI = dyn_cast<PHINode>(Inst)) { 269 copyPHIInstruction(Stmt, PHI, BBMap, LTS); 270 return; 271 } 272 273 // Skip some special intrinsics for which we do not adjust the semantics to 274 // the new schedule. All others are handled like every other instruction. 275 if (isIgnoredIntrinsic(Inst)) 276 return; 277 278 copyInstScalar(Stmt, Inst, BBMap, LTS); 279 } 280 281 void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 282 isl_id_to_ast_expr *NewAccesses) { 283 assert(Stmt.isBlockStmt() && 284 "Only block statements can be copied by the block generator"); 285 286 ValueMapT BBMap; 287 288 BasicBlock *BB = Stmt.getBasicBlock(); 289 copyBB(Stmt, BB, BBMap, LTS, NewAccesses); 290 } 291 292 BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { 293 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 294 &*Builder.GetInsertPoint(), &DT, &LI); 295 CopyBB->setName("polly.stmt." + BB->getName()); 296 return CopyBB; 297 } 298 299 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, 300 ValueMapT &BBMap, LoopToScevMapT <S, 301 isl_id_to_ast_expr *NewAccesses) { 302 BasicBlock *CopyBB = splitBB(BB); 303 Builder.SetInsertPoint(&CopyBB->front()); 304 generateScalarLoads(Stmt, BBMap); 305 306 copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); 307 308 // After a basic block was copied store all scalars that escape this block in 309 // their alloca. 310 generateScalarStores(Stmt, LTS, BBMap); 311 return CopyBB; 312 } 313 314 void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, 315 ValueMapT &BBMap, LoopToScevMapT <S, 316 isl_id_to_ast_expr *NewAccesses) { 317 EntryBB = &CopyBB->getParent()->getEntryBlock(); 318 319 for (Instruction &Inst : *BB) 320 copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); 321 } 322 323 Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, 324 ScalarAllocaMapTy &Map, 325 const char *NameExt) { 326 // If no alloca was found create one and insert it in the entry block. 327 if (!Map.count(ScalarBase)) { 328 auto *Ty = ScalarBase->getType(); 329 auto NewAddr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); 330 EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 331 NewAddr->insertBefore(&*EntryBB->getFirstInsertionPt()); 332 Map[ScalarBase] = NewAddr; 333 } 334 335 auto Addr = Map[ScalarBase]; 336 337 if (auto NewAddr = GlobalMap.lookup(Addr)) 338 return NewAddr; 339 340 return Addr; 341 } 342 343 Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) { 344 if (Access.isPHIKind()) 345 return getOrCreatePHIAlloca(Access.getBaseAddr()); 346 else 347 return getOrCreateScalarAlloca(Access.getBaseAddr()); 348 } 349 350 Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) { 351 if (Array->isPHIKind()) 352 return getOrCreatePHIAlloca(Array->getBasePtr()); 353 else 354 return getOrCreateScalarAlloca(Array->getBasePtr()); 355 } 356 357 Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) { 358 return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); 359 } 360 361 Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) { 362 return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); 363 } 364 365 void BlockGenerator::handleOutsideUsers(const Scop &S, Instruction *Inst) { 366 // If there are escape users we get the alloca for this instruction and put it 367 // in the EscapeMap for later finalization. Lastly, if the instruction was 368 // copied multiple times we already did this and can exit. 369 if (EscapeMap.count(Inst)) 370 return; 371 372 EscapeUserVectorTy EscapeUsers; 373 for (User *U : Inst->users()) { 374 375 // Non-instruction user will never escape. 376 Instruction *UI = dyn_cast<Instruction>(U); 377 if (!UI) 378 continue; 379 380 if (S.contains(UI)) 381 continue; 382 383 EscapeUsers.push_back(UI); 384 } 385 386 // Exit if no escape uses were found. 387 if (EscapeUsers.empty()) 388 return; 389 390 // Get or create an escape alloca for this instruction. 391 auto *ScalarAddr = getOrCreateScalarAlloca(Inst); 392 393 // Remember that this instruction has escape uses and the escape alloca. 394 EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); 395 } 396 397 void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { 398 for (MemoryAccess *MA : Stmt) { 399 if (MA->isArrayKind() || MA->isWrite()) 400 continue; 401 402 auto *Address = getOrCreateAlloca(*MA); 403 assert((!isa<Instruction>(Address) || 404 DT.dominates(cast<Instruction>(Address)->getParent(), 405 Builder.GetInsertBlock())) && 406 "Domination violation"); 407 BBMap[MA->getBaseAddr()] = 408 Builder.CreateLoad(Address, Address->getName() + ".reload"); 409 } 410 } 411 412 void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, 413 ValueMapT &BBMap) { 414 Loop *L = LI.getLoopFor(Stmt.getBasicBlock()); 415 416 assert(Stmt.isBlockStmt() && "Region statements need to use the " 417 "generateScalarStores() function in the " 418 "RegionGenerator"); 419 420 for (MemoryAccess *MA : Stmt) { 421 if (MA->isArrayKind() || MA->isRead()) 422 continue; 423 424 Value *Val = MA->getAccessValue(); 425 if (MA->isAnyPHIKind()) { 426 assert(MA->getIncoming().size() >= 1 && 427 "Block statements have exactly one exiting block, or multiple but " 428 "with same incoming block and value"); 429 assert(std::all_of(MA->getIncoming().begin(), MA->getIncoming().end(), 430 [&](std::pair<BasicBlock *, Value *> p) -> bool { 431 return p.first == Stmt.getBasicBlock(); 432 }) && 433 "Incoming block must be statement's block"); 434 Val = MA->getIncoming()[0].second; 435 } 436 auto *Address = getOrCreateAlloca(*MA); 437 438 Val = getNewValue(Stmt, Val, BBMap, LTS, L); 439 assert((!isa<Instruction>(Val) || 440 DT.dominates(cast<Instruction>(Val)->getParent(), 441 Builder.GetInsertBlock())) && 442 "Domination violation"); 443 assert((!isa<Instruction>(Address) || 444 DT.dominates(cast<Instruction>(Address)->getParent(), 445 Builder.GetInsertBlock())) && 446 "Domination violation"); 447 Builder.CreateStore(Val, Address); 448 } 449 } 450 451 void BlockGenerator::createScalarInitialization(Scop &S) { 452 BasicBlock *ExitBB = S.getExit(); 453 454 // The split block __just before__ the region and optimized region. 455 BasicBlock *SplitBB = S.getEnteringBlock(); 456 BranchInst *SplitBBTerm = cast<BranchInst>(SplitBB->getTerminator()); 457 assert(SplitBBTerm->getNumSuccessors() == 2 && "Bad region entering block!"); 458 459 // Get the start block of the __optimized__ region. 460 BasicBlock *StartBB = SplitBBTerm->getSuccessor(0); 461 if (StartBB == S.getEntry()) 462 StartBB = SplitBBTerm->getSuccessor(1); 463 464 Builder.SetInsertPoint(StartBB->getTerminator()); 465 466 for (auto &Pair : S.arrays()) { 467 auto &Array = Pair.second; 468 if (Array->getNumberOfDimensions() != 0) 469 continue; 470 if (Array->isPHIKind()) { 471 // For PHI nodes, the only values we need to store are the ones that 472 // reach the PHI node from outside the region. In general there should 473 // only be one such incoming edge and this edge should enter through 474 // 'SplitBB'. 475 auto PHI = cast<PHINode>(Array->getBasePtr()); 476 477 for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++) 478 if (!S.contains(*BI) && *BI != SplitBB) 479 llvm_unreachable("Incoming edges from outside the scop should always " 480 "come from SplitBB"); 481 482 int Idx = PHI->getBasicBlockIndex(SplitBB); 483 if (Idx < 0) 484 continue; 485 486 Value *ScalarValue = PHI->getIncomingValue(Idx); 487 488 Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI)); 489 continue; 490 } 491 492 auto *Inst = dyn_cast<Instruction>(Array->getBasePtr()); 493 494 if (Inst && S.contains(Inst)) 495 continue; 496 497 // PHI nodes that are not marked as such in their SAI object are either exit 498 // PHI nodes we model as common scalars but without initialization, or 499 // incoming phi nodes that need to be initialized. Check if the first is the 500 // case for Inst and do not create and initialize memory if so. 501 if (auto *PHI = dyn_cast_or_null<PHINode>(Inst)) 502 if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0) 503 continue; 504 505 Builder.CreateStore(Array->getBasePtr(), 506 getOrCreateScalarAlloca(Array->getBasePtr())); 507 } 508 } 509 510 void BlockGenerator::createScalarFinalization(Scop &S) { 511 // The exit block of the __unoptimized__ region. 512 BasicBlock *ExitBB = S.getExitingBlock(); 513 // The merge block __just after__ the region and the optimized region. 514 BasicBlock *MergeBB = S.getExit(); 515 516 // The exit block of the __optimized__ region. 517 BasicBlock *OptExitBB = *(pred_begin(MergeBB)); 518 if (OptExitBB == ExitBB) 519 OptExitBB = *(++pred_begin(MergeBB)); 520 521 Builder.SetInsertPoint(OptExitBB->getTerminator()); 522 for (const auto &EscapeMapping : EscapeMap) { 523 // Extract the escaping instruction and the escaping users as well as the 524 // alloca the instruction was demoted to. 525 Instruction *EscapeInst = EscapeMapping.getFirst(); 526 const auto &EscapeMappingValue = EscapeMapping.getSecond(); 527 const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; 528 Value *ScalarAddr = EscapeMappingValue.first; 529 530 // Reload the demoted instruction in the optimized version of the SCoP. 531 Value *EscapeInstReload = 532 Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload"); 533 EscapeInstReload = 534 Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType()); 535 536 // Create the merge PHI that merges the optimized and unoptimized version. 537 PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2, 538 EscapeInst->getName() + ".merge"); 539 MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt()); 540 541 // Add the respective values to the merge PHI. 542 MergePHI->addIncoming(EscapeInstReload, OptExitBB); 543 MergePHI->addIncoming(EscapeInst, ExitBB); 544 545 // The information of scalar evolution about the escaping instruction needs 546 // to be revoked so the new merged instruction will be used. 547 if (SE.isSCEVable(EscapeInst->getType())) 548 SE.forgetValue(EscapeInst); 549 550 // Replace all uses of the demoted instruction with the merge PHI. 551 for (Instruction *EUser : EscapeUsers) 552 EUser->replaceUsesOfWith(EscapeInst, MergePHI); 553 } 554 } 555 556 void BlockGenerator::findOutsideUsers(Scop &S) { 557 for (auto &Pair : S.arrays()) { 558 auto &Array = Pair.second; 559 560 if (Array->getNumberOfDimensions() != 0) 561 continue; 562 563 if (Array->isPHIKind()) 564 continue; 565 566 auto *Inst = dyn_cast<Instruction>(Array->getBasePtr()); 567 568 if (!Inst) 569 continue; 570 571 // Scop invariant hoisting moves some of the base pointers out of the scop. 572 // We can ignore these, as the invariant load hoisting already registers the 573 // relevant outside users. 574 if (!S.contains(Inst)) 575 continue; 576 577 handleOutsideUsers(S, Inst); 578 } 579 } 580 581 void BlockGenerator::createExitPHINodeMerges(Scop &S) { 582 if (S.hasSingleExitEdge()) 583 return; 584 585 auto *ExitBB = S.getExitingBlock(); 586 auto *MergeBB = S.getExit(); 587 auto *AfterMergeBB = MergeBB->getSingleSuccessor(); 588 BasicBlock *OptExitBB = *(pred_begin(MergeBB)); 589 if (OptExitBB == ExitBB) 590 OptExitBB = *(++pred_begin(MergeBB)); 591 592 Builder.SetInsertPoint(OptExitBB->getTerminator()); 593 594 for (auto &Pair : S.arrays()) { 595 auto &SAI = Pair.second; 596 auto *Val = SAI->getBasePtr(); 597 598 // Only Value-like scalars need a merge PHI. Exit block PHIs receive either 599 // the original PHI's value or the reloaded incoming values from the 600 // generated code. An llvm::Value is merged between the original code's 601 // value or the generated one. 602 if (!SAI->isValueKind() && !SAI->isExitPHIKind()) 603 continue; 604 605 PHINode *PHI = dyn_cast<PHINode>(Val); 606 if (!PHI) 607 continue; 608 609 if (PHI->getParent() != AfterMergeBB) 610 continue; 611 612 std::string Name = PHI->getName(); 613 Value *ScalarAddr = getOrCreateScalarAlloca(PHI); 614 Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload"); 615 Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType()); 616 Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB); 617 assert((!isa<Instruction>(OriginalValue) || 618 cast<Instruction>(OriginalValue)->getParent() != MergeBB) && 619 "Original value must no be one we just generated."); 620 auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge"); 621 MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt()); 622 MergePHI->addIncoming(Reload, OptExitBB); 623 MergePHI->addIncoming(OriginalValue, ExitBB); 624 int Idx = PHI->getBasicBlockIndex(MergeBB); 625 PHI->setIncomingValue(Idx, MergePHI); 626 } 627 } 628 629 void BlockGenerator::finalizeSCoP(Scop &S) { 630 findOutsideUsers(S); 631 createScalarInitialization(S); 632 createExitPHINodeMerges(S); 633 createScalarFinalization(S); 634 } 635 636 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 637 std::vector<LoopToScevMapT> &VLTS, 638 isl_map *Schedule) 639 : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) { 640 assert(Schedule && "No statement domain provided"); 641 } 642 643 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old, 644 ValueMapT &VectorMap, 645 VectorValueMapT &ScalarMaps, 646 Loop *L) { 647 if (Value *NewValue = VectorMap.lookup(Old)) 648 return NewValue; 649 650 int Width = getVectorWidth(); 651 652 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 653 654 for (int Lane = 0; Lane < Width; Lane++) 655 Vector = Builder.CreateInsertElement( 656 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L), 657 Builder.getInt32(Lane)); 658 659 VectorMap[Old] = Vector; 660 661 return Vector; 662 } 663 664 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 665 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 666 assert(PointerTy && "PointerType expected"); 667 668 Type *ScalarType = PointerTy->getElementType(); 669 VectorType *VectorType = VectorType::get(ScalarType, Width); 670 671 return PointerType::getUnqual(VectorType); 672 } 673 674 Value *VectorBlockGenerator::generateStrideOneLoad( 675 ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, 676 __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) { 677 unsigned VectorWidth = getVectorWidth(); 678 auto *Pointer = Load->getPointerOperand(); 679 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 680 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 681 682 Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset], 683 VLTS[Offset], NewAccesses); 684 Value *VectorPtr = 685 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 686 LoadInst *VecLoad = 687 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 688 if (!Aligned) 689 VecLoad->setAlignment(8); 690 691 if (NegativeStride) { 692 SmallVector<Constant *, 16> Indices; 693 for (int i = VectorWidth - 1; i >= 0; i--) 694 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 695 Constant *SV = llvm::ConstantVector::get(Indices); 696 Value *RevVecLoad = Builder.CreateShuffleVector( 697 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 698 return RevVecLoad; 699 } 700 701 return VecLoad; 702 } 703 704 Value *VectorBlockGenerator::generateStrideZeroLoad( 705 ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap, 706 __isl_keep isl_id_to_ast_expr *NewAccesses) { 707 auto *Pointer = Load->getPointerOperand(); 708 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 709 Value *NewPointer = 710 generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses); 711 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 712 Load->getName() + "_p_vec_p"); 713 LoadInst *ScalarLoad = 714 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 715 716 if (!Aligned) 717 ScalarLoad->setAlignment(8); 718 719 Constant *SplatVector = Constant::getNullValue( 720 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 721 722 Value *VectorLoad = Builder.CreateShuffleVector( 723 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 724 return VectorLoad; 725 } 726 727 Value *VectorBlockGenerator::generateUnknownStrideLoad( 728 ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, 729 __isl_keep isl_id_to_ast_expr *NewAccesses) { 730 int VectorWidth = getVectorWidth(); 731 auto *Pointer = Load->getPointerOperand(); 732 VectorType *VectorType = VectorType::get( 733 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 734 735 Value *Vector = UndefValue::get(VectorType); 736 737 for (int i = 0; i < VectorWidth; i++) { 738 Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i], 739 VLTS[i], NewAccesses); 740 Value *ScalarLoad = 741 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 742 Vector = Builder.CreateInsertElement( 743 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 744 } 745 746 return Vector; 747 } 748 749 void VectorBlockGenerator::generateLoad( 750 ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, 751 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 752 if (Value *PreloadLoad = GlobalMap.lookup(Load)) { 753 VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, 754 Load->getName() + "_p"); 755 return; 756 } 757 758 if (!VectorType::isValidElementType(Load->getType())) { 759 for (int i = 0; i < getVectorWidth(); i++) 760 ScalarMaps[i][Load] = 761 generateScalarLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses); 762 return; 763 } 764 765 const MemoryAccess &Access = Stmt.getArrayAccessFor(Load); 766 767 // Make sure we have scalar values available to access the pointer to 768 // the data location. 769 extractScalarValues(Load, VectorMap, ScalarMaps); 770 771 Value *NewLoad; 772 if (Access.isStrideZero(isl_map_copy(Schedule))) 773 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses); 774 else if (Access.isStrideOne(isl_map_copy(Schedule))) 775 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses); 776 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 777 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true); 778 else 779 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses); 780 781 VectorMap[Load] = NewLoad; 782 } 783 784 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, 785 ValueMapT &VectorMap, 786 VectorValueMapT &ScalarMaps) { 787 int VectorWidth = getVectorWidth(); 788 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 789 ScalarMaps, getLoopForStmt(Stmt)); 790 791 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 792 793 const CastInst *Cast = dyn_cast<CastInst>(Inst); 794 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 795 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 796 } 797 798 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, 799 ValueMapT &VectorMap, 800 VectorValueMapT &ScalarMaps) { 801 Loop *L = getLoopForStmt(Stmt); 802 Value *OpZero = Inst->getOperand(0); 803 Value *OpOne = Inst->getOperand(1); 804 805 Value *NewOpZero, *NewOpOne; 806 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 807 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 808 809 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 810 Inst->getName() + "p_vec"); 811 VectorMap[Inst] = NewInst; 812 } 813 814 void VectorBlockGenerator::copyStore( 815 ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, 816 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 817 const MemoryAccess &Access = Stmt.getArrayAccessFor(Store); 818 819 auto *Pointer = Store->getPointerOperand(); 820 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 821 ScalarMaps, getLoopForStmt(Stmt)); 822 823 // Make sure we have scalar values available to access the pointer to 824 // the data location. 825 extractScalarValues(Store, VectorMap, ScalarMaps); 826 827 if (Access.isStrideOne(isl_map_copy(Schedule))) { 828 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 829 Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0], 830 VLTS[0], NewAccesses); 831 832 Value *VectorPtr = 833 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 834 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 835 836 if (!Aligned) 837 Store->setAlignment(8); 838 } else { 839 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 840 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 841 Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i], 842 VLTS[i], NewAccesses); 843 Builder.CreateStore(Scalar, NewPointer); 844 } 845 } 846 } 847 848 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 849 ValueMapT &VectorMap) { 850 for (Value *Operand : Inst->operands()) 851 if (VectorMap.count(Operand)) 852 return true; 853 return false; 854 } 855 856 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 857 ValueMapT &VectorMap, 858 VectorValueMapT &ScalarMaps) { 859 bool HasVectorOperand = false; 860 int VectorWidth = getVectorWidth(); 861 862 for (Value *Operand : Inst->operands()) { 863 ValueMapT::iterator VecOp = VectorMap.find(Operand); 864 865 if (VecOp == VectorMap.end()) 866 continue; 867 868 HasVectorOperand = true; 869 Value *NewVector = VecOp->second; 870 871 for (int i = 0; i < VectorWidth; ++i) { 872 ValueMapT &SM = ScalarMaps[i]; 873 874 // If there is one scalar extracted, all scalar elements should have 875 // already been extracted by the code here. So no need to check for the 876 // existance of all of them. 877 if (SM.count(Operand)) 878 break; 879 880 SM[Operand] = 881 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 882 } 883 } 884 885 return HasVectorOperand; 886 } 887 888 void VectorBlockGenerator::copyInstScalarized( 889 ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 890 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 891 bool HasVectorOperand; 892 int VectorWidth = getVectorWidth(); 893 894 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 895 896 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 897 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 898 VLTS[VectorLane], NewAccesses); 899 900 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 901 return; 902 903 // Make the result available as vector value. 904 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 905 Value *Vector = UndefValue::get(VectorType); 906 907 for (int i = 0; i < VectorWidth; i++) 908 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 909 Builder.getInt32(i)); 910 911 VectorMap[Inst] = Vector; 912 } 913 914 int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); } 915 916 void VectorBlockGenerator::copyInstruction( 917 ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 918 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 919 // Terminator instructions control the control flow. They are explicitly 920 // expressed in the clast and do not need to be copied. 921 if (Inst->isTerminator()) 922 return; 923 924 if (canSyntheziseInStmt(Stmt, Inst)) 925 return; 926 927 if (auto *Load = dyn_cast<LoadInst>(Inst)) { 928 generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses); 929 return; 930 } 931 932 if (hasVectorOperands(Inst, VectorMap)) { 933 if (auto *Store = dyn_cast<StoreInst>(Inst)) { 934 copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses); 935 return; 936 } 937 938 if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) { 939 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 940 return; 941 } 942 943 if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) { 944 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 945 return; 946 } 947 948 // Falltrough: We generate scalar instructions, if we don't know how to 949 // generate vector code. 950 } 951 952 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses); 953 } 954 955 void VectorBlockGenerator::generateScalarVectorLoads( 956 ScopStmt &Stmt, ValueMapT &VectorBlockMap) { 957 for (MemoryAccess *MA : Stmt) { 958 if (MA->isArrayKind() || MA->isWrite()) 959 continue; 960 961 auto *Address = getOrCreateAlloca(*MA); 962 Type *VectorPtrType = getVectorPtrTy(Address, 1); 963 Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType, 964 Address->getName() + "_p_vec_p"); 965 auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload"); 966 Constant *SplatVector = Constant::getNullValue( 967 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 968 969 Value *VectorVal = Builder.CreateShuffleVector( 970 Val, Val, SplatVector, Address->getName() + "_p_splat"); 971 VectorBlockMap[MA->getBaseAddr()] = VectorVal; 972 } 973 } 974 975 void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) { 976 for (MemoryAccess *MA : Stmt) { 977 if (MA->isArrayKind() || MA->isRead()) 978 continue; 979 980 llvm_unreachable("Scalar stores not expected in vector loop"); 981 } 982 } 983 984 void VectorBlockGenerator::copyStmt( 985 ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 986 assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " 987 "the vector block generator"); 988 989 BasicBlock *BB = Stmt.getBasicBlock(); 990 BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), 991 &*Builder.GetInsertPoint(), &DT, &LI); 992 CopyBB->setName("polly.stmt." + BB->getName()); 993 Builder.SetInsertPoint(&CopyBB->front()); 994 995 // Create two maps that store the mapping from the original instructions of 996 // the old basic block to their copies in the new basic block. Those maps 997 // are basic block local. 998 // 999 // As vector code generation is supported there is one map for scalar values 1000 // and one for vector values. 1001 // 1002 // In case we just do scalar code generation, the vectorMap is not used and 1003 // the scalarMap has just one dimension, which contains the mapping. 1004 // 1005 // In case vector code generation is done, an instruction may either appear 1006 // in the vector map once (as it is calculating >vectorwidth< values at a 1007 // time. Or (if the values are calculated using scalar operations), it 1008 // appears once in every dimension of the scalarMap. 1009 VectorValueMapT ScalarBlockMap(getVectorWidth()); 1010 ValueMapT VectorBlockMap; 1011 1012 generateScalarVectorLoads(Stmt, VectorBlockMap); 1013 1014 for (Instruction &Inst : *BB) 1015 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses); 1016 1017 verifyNoScalarStores(Stmt); 1018 } 1019 1020 BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB, 1021 BasicBlock *BBCopy) { 1022 1023 BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); 1024 BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); 1025 1026 if (BBCopyIDom) 1027 DT.changeImmediateDominator(BBCopy, BBCopyIDom); 1028 1029 return BBCopyIDom; 1030 } 1031 1032 // This is to determine whether an llvm::Value (defined in @p BB) is usable when 1033 // leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock()) 1034 // does not work in cases where the exit block has edges from outside the 1035 // region. In that case the llvm::Value would never be usable in in the exit 1036 // block. The RegionGenerator however creates an new exit block ('ExitBBCopy') 1037 // for the subregion's exiting edges only. We need to determine whether an 1038 // llvm::Value is usable in there. We do this by checking whether it dominates 1039 // all exiting blocks individually. 1040 static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R, 1041 BasicBlock *BB) { 1042 for (auto ExitingBB : predecessors(R->getExit())) { 1043 // Check for non-subregion incoming edges. 1044 if (!R->contains(ExitingBB)) 1045 continue; 1046 1047 if (!DT.dominates(BB, ExitingBB)) 1048 return false; 1049 } 1050 1051 return true; 1052 } 1053 1054 // Find the direct dominator of the subregion's exit block if the subregion was 1055 // simplified. 1056 static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) { 1057 BasicBlock *Common = nullptr; 1058 for (auto ExitingBB : predecessors(R->getExit())) { 1059 // Check for non-subregion incoming edges. 1060 if (!R->contains(ExitingBB)) 1061 continue; 1062 1063 // First exiting edge. 1064 if (!Common) { 1065 Common = ExitingBB; 1066 continue; 1067 } 1068 1069 Common = DT.findNearestCommonDominator(Common, ExitingBB); 1070 } 1071 1072 assert(Common && R->contains(Common)); 1073 return Common; 1074 } 1075 1076 void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 1077 isl_id_to_ast_expr *IdToAstExp) { 1078 assert(Stmt.isRegionStmt() && 1079 "Only region statements can be copied by the region generator"); 1080 1081 // Forget all old mappings. 1082 BlockMap.clear(); 1083 RegionMaps.clear(); 1084 IncompletePHINodeMap.clear(); 1085 1086 // Collection of all values related to this subregion. 1087 ValueMapT ValueMap; 1088 1089 // The region represented by the statement. 1090 Region *R = Stmt.getRegion(); 1091 1092 // Create a dedicated entry for the region where we can reload all demoted 1093 // inputs. 1094 BasicBlock *EntryBB = R->getEntry(); 1095 BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(), 1096 &*Builder.GetInsertPoint(), &DT, &LI); 1097 EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); 1098 Builder.SetInsertPoint(&EntryBBCopy->front()); 1099 1100 ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy]; 1101 generateScalarLoads(Stmt, EntryBBMap); 1102 1103 for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) 1104 if (!R->contains(*PI)) 1105 BlockMap[*PI] = EntryBBCopy; 1106 1107 // Iterate over all blocks in the region in a breadth-first search. 1108 std::deque<BasicBlock *> Blocks; 1109 SmallPtrSet<BasicBlock *, 8> SeenBlocks; 1110 Blocks.push_back(EntryBB); 1111 SeenBlocks.insert(EntryBB); 1112 1113 while (!Blocks.empty()) { 1114 BasicBlock *BB = Blocks.front(); 1115 Blocks.pop_front(); 1116 1117 // First split the block and update dominance information. 1118 BasicBlock *BBCopy = splitBB(BB); 1119 BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy); 1120 1121 // Get the mapping for this block and initialize it with either the scalar 1122 // loads from the generated entering block (which dominates all blocks of 1123 // this subregion) or the maps of the immediate dominator, if part of the 1124 // subregion. The latter necessarily includes the former. 1125 ValueMapT *InitBBMap; 1126 if (BBCopyIDom) { 1127 assert(RegionMaps.count(BBCopyIDom)); 1128 InitBBMap = &RegionMaps[BBCopyIDom]; 1129 } else 1130 InitBBMap = &EntryBBMap; 1131 auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap)); 1132 ValueMapT &RegionMap = Inserted.first->second; 1133 1134 // Copy the block with the BlockGenerator. 1135 Builder.SetInsertPoint(&BBCopy->front()); 1136 copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); 1137 1138 // In order to remap PHI nodes we store also basic block mappings. 1139 BlockMap[BB] = BBCopy; 1140 1141 // Add values to incomplete PHI nodes waiting for this block to be copied. 1142 for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) 1143 addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS); 1144 IncompletePHINodeMap[BB].clear(); 1145 1146 // And continue with new successors inside the region. 1147 for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) 1148 if (R->contains(*SI) && SeenBlocks.insert(*SI).second) 1149 Blocks.push_back(*SI); 1150 1151 // Remember value in case it is visible after this subregion. 1152 if (isDominatingSubregionExit(DT, R, BB)) 1153 ValueMap.insert(RegionMap.begin(), RegionMap.end()); 1154 } 1155 1156 // Now create a new dedicated region exit block and add it to the region map. 1157 BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), 1158 &*Builder.GetInsertPoint(), &DT, &LI); 1159 ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); 1160 BlockMap[R->getExit()] = ExitBBCopy; 1161 1162 BasicBlock *ExitDomBBCopy = BlockMap.lookup(findExitDominator(DT, R)); 1163 assert(ExitDomBBCopy && "Common exit dominator must be within region; at " 1164 "least the entry node must match"); 1165 DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy); 1166 1167 // As the block generator doesn't handle control flow we need to add the 1168 // region control flow by hand after all blocks have been copied. 1169 for (BasicBlock *BB : SeenBlocks) { 1170 1171 BasicBlock *BBCopy = BlockMap[BB]; 1172 TerminatorInst *TI = BB->getTerminator(); 1173 if (isa<UnreachableInst>(TI)) { 1174 while (!BBCopy->empty()) 1175 BBCopy->begin()->eraseFromParent(); 1176 new UnreachableInst(BBCopy->getContext(), BBCopy); 1177 continue; 1178 } 1179 1180 Instruction *BICopy = BBCopy->getTerminator(); 1181 1182 ValueMapT &RegionMap = RegionMaps[BBCopy]; 1183 RegionMap.insert(BlockMap.begin(), BlockMap.end()); 1184 1185 Builder.SetInsertPoint(BICopy); 1186 copyInstScalar(Stmt, TI, RegionMap, LTS); 1187 BICopy->eraseFromParent(); 1188 } 1189 1190 // Add counting PHI nodes to all loops in the region that can be used as 1191 // replacement for SCEVs refering to the old loop. 1192 for (BasicBlock *BB : SeenBlocks) { 1193 Loop *L = LI.getLoopFor(BB); 1194 if (L == nullptr || L->getHeader() != BB || !R->contains(L)) 1195 continue; 1196 1197 BasicBlock *BBCopy = BlockMap[BB]; 1198 Value *NullVal = Builder.getInt32(0); 1199 PHINode *LoopPHI = 1200 PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv"); 1201 Instruction *LoopPHIInc = BinaryOperator::CreateAdd( 1202 LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc"); 1203 LoopPHI->insertBefore(&BBCopy->front()); 1204 LoopPHIInc->insertBefore(BBCopy->getTerminator()); 1205 1206 for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) { 1207 if (!R->contains(PredBB)) 1208 continue; 1209 if (L->contains(PredBB)) 1210 LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]); 1211 else 1212 LoopPHI->addIncoming(NullVal, BlockMap[PredBB]); 1213 } 1214 1215 for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy))) 1216 if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0) 1217 LoopPHI->addIncoming(NullVal, PredBBCopy); 1218 1219 LTS[L] = SE.getUnknown(LoopPHI); 1220 } 1221 1222 // Continue generating code in the exit block. 1223 Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt()); 1224 1225 // Write values visible to other statements. 1226 generateScalarStores(Stmt, LTS, ValueMap); 1227 BlockMap.clear(); 1228 RegionMaps.clear(); 1229 IncompletePHINodeMap.clear(); 1230 } 1231 1232 PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S, 1233 ValueMapT &BBMap, Loop *L) { 1234 ScopStmt *Stmt = MA->getStatement(); 1235 Region *SubR = Stmt->getRegion(); 1236 auto Incoming = MA->getIncoming(); 1237 1238 PollyIRBuilder::InsertPointGuard IPGuard(Builder); 1239 PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction()); 1240 BasicBlock *NewSubregionExit = Builder.GetInsertBlock(); 1241 1242 // This can happen if the subregion is simplified after the ScopStmts 1243 // have been created; simplification happens as part of CodeGeneration. 1244 if (OrigPHI->getParent() != SubR->getExit()) { 1245 BasicBlock *FormerExit = SubR->getExitingBlock(); 1246 if (FormerExit) 1247 NewSubregionExit = BlockMap.lookup(FormerExit); 1248 } 1249 1250 PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(), 1251 "polly." + OrigPHI->getName(), 1252 NewSubregionExit->getFirstNonPHI()); 1253 1254 // Add the incoming values to the PHI. 1255 for (auto &Pair : Incoming) { 1256 BasicBlock *OrigIncomingBlock = Pair.first; 1257 BasicBlock *NewIncomingBlock = BlockMap.lookup(OrigIncomingBlock); 1258 Builder.SetInsertPoint(NewIncomingBlock->getTerminator()); 1259 assert(RegionMaps.count(NewIncomingBlock)); 1260 ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlock]; 1261 1262 Value *OrigIncomingValue = Pair.second; 1263 Value *NewIncomingValue = 1264 getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L); 1265 NewPHI->addIncoming(NewIncomingValue, NewIncomingBlock); 1266 } 1267 1268 return NewPHI; 1269 } 1270 1271 Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT <S, 1272 ValueMapT &BBMap) { 1273 ScopStmt *Stmt = MA->getStatement(); 1274 1275 // TODO: Add some test cases that ensure this is really the right choice. 1276 Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit()); 1277 1278 if (MA->isAnyPHIKind()) { 1279 auto Incoming = MA->getIncoming(); 1280 assert(!Incoming.empty() && 1281 "PHI WRITEs must have originate from at least one incoming block"); 1282 1283 // If there is only one incoming value, we do not need to create a PHI. 1284 if (Incoming.size() == 1) { 1285 Value *OldVal = Incoming[0].second; 1286 return getNewValue(*Stmt, OldVal, BBMap, LTS, L); 1287 } 1288 1289 return buildExitPHI(MA, LTS, BBMap, L); 1290 } 1291 1292 // MK_Value accesses leaving the subregion must dominate the exit block; just 1293 // pass the copied value 1294 Value *OldVal = MA->getAccessValue(); 1295 return getNewValue(*Stmt, OldVal, BBMap, LTS, L); 1296 } 1297 1298 void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, 1299 ValueMapT &BBMap) { 1300 assert(Stmt.getRegion() && 1301 "Block statements need to use the generateScalarStores() " 1302 "function in the BlockGenerator"); 1303 1304 for (MemoryAccess *MA : Stmt) { 1305 if (MA->isArrayKind() || MA->isRead()) 1306 continue; 1307 1308 Value *NewVal = getExitScalar(MA, LTS, BBMap); 1309 Value *Address = getOrCreateAlloca(*MA); 1310 assert((!isa<Instruction>(NewVal) || 1311 DT.dominates(cast<Instruction>(NewVal)->getParent(), 1312 Builder.GetInsertBlock())) && 1313 "Domination violation"); 1314 assert((!isa<Instruction>(Address) || 1315 DT.dominates(cast<Instruction>(Address)->getParent(), 1316 Builder.GetInsertBlock())) && 1317 "Domination violation"); 1318 Builder.CreateStore(NewVal, Address); 1319 } 1320 } 1321 1322 void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, 1323 PHINode *PHICopy, BasicBlock *IncomingBB, 1324 LoopToScevMapT <S) { 1325 Region *StmtR = Stmt.getRegion(); 1326 1327 // If the incoming block was not yet copied mark this PHI as incomplete. 1328 // Once the block will be copied the incoming value will be added. 1329 BasicBlock *BBCopy = BlockMap[IncomingBB]; 1330 if (!BBCopy) { 1331 assert(StmtR->contains(IncomingBB) && 1332 "Bad incoming block for PHI in non-affine region"); 1333 IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy)); 1334 return; 1335 } 1336 1337 Value *OpCopy = nullptr; 1338 if (StmtR->contains(IncomingBB)) { 1339 assert(RegionMaps.count(BBCopy) && 1340 "Incoming PHI block did not have a BBMap"); 1341 ValueMapT &BBCopyMap = RegionMaps[BBCopy]; 1342 1343 Value *Op = PHI->getIncomingValueForBlock(IncomingBB); 1344 1345 // If the current insert block is different from the PHIs incoming block 1346 // change it, otherwise do not. 1347 auto IP = Builder.GetInsertPoint(); 1348 if (IP->getParent() != BBCopy) 1349 Builder.SetInsertPoint(BBCopy->getTerminator()); 1350 OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt)); 1351 if (IP->getParent() != BBCopy) 1352 Builder.SetInsertPoint(&*IP); 1353 } else { 1354 1355 if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) 1356 return; 1357 1358 Value *PHIOpAddr = getOrCreatePHIAlloca(const_cast<PHINode *>(PHI)); 1359 OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", 1360 BlockMap[IncomingBB]->getTerminator()); 1361 } 1362 1363 assert(OpCopy && "Incoming PHI value was not copied properly"); 1364 assert(BBCopy && "Incoming PHI block was not copied properly"); 1365 PHICopy->addIncoming(OpCopy, BBCopy); 1366 } 1367 1368 void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI, 1369 ValueMapT &BBMap, 1370 LoopToScevMapT <S) { 1371 unsigned NumIncoming = PHI->getNumIncomingValues(); 1372 PHINode *PHICopy = 1373 Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName()); 1374 PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI()); 1375 BBMap[PHI] = PHICopy; 1376 1377 for (unsigned u = 0; u < NumIncoming; u++) 1378 addOperandToPHI(Stmt, PHI, PHICopy, PHI->getIncomingBlock(u), LTS); 1379 } 1380