1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the BlockGenerator and VectorBlockGenerator classes, 11 // which generate sequential code and vectorized code for a polyhedral 12 // statement, respectively. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopInfo.h" 17 #include "polly/CodeGen/BlockGenerators.h" 18 #include "polly/CodeGen/CodeGeneration.h" 19 #include "polly/CodeGen/IslExprBuilder.h" 20 #include "polly/CodeGen/RuntimeDebugBuilder.h" 21 #include "polly/Options.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/SCEVValidator.h" 24 #include "polly/Support/ScopHelper.h" 25 #include "llvm/Analysis/LoopInfo.h" 26 #include "llvm/Analysis/RegionInfo.h" 27 #include "llvm/Analysis/ScalarEvolution.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include "llvm/Transforms/Utils/Local.h" 32 #include "isl/aff.h" 33 #include "isl/ast.h" 34 #include "isl/ast_build.h" 35 #include "isl/set.h" 36 #include <deque> 37 38 using namespace llvm; 39 using namespace polly; 40 41 static cl::opt<bool> Aligned("enable-polly-aligned", 42 cl::desc("Assumed aligned memory accesses."), 43 cl::Hidden, cl::init(false), cl::ZeroOrMore, 44 cl::cat(PollyCategory)); 45 46 static cl::opt<bool> DebugPrinting( 47 "polly-codegen-add-debug-printing", 48 cl::desc("Add printf calls that show the values loaded/stored."), 49 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 50 51 bool polly::canSynthesize(const Value *V, const llvm::LoopInfo *LI, 52 ScalarEvolution *SE, const Region *R) { 53 if (!V || !SE->isSCEVable(V->getType())) 54 return false; 55 56 if (const SCEV *Scev = SE->getSCEV(const_cast<Value *>(V))) 57 if (!isa<SCEVCouldNotCompute>(Scev)) 58 if (!hasScalarDepsInsideRegion(Scev, R)) 59 return true; 60 61 return false; 62 } 63 64 bool polly::isIgnoredIntrinsic(const Value *V) { 65 if (auto *IT = dyn_cast<IntrinsicInst>(V)) { 66 switch (IT->getIntrinsicID()) { 67 // Lifetime markers are supported/ignored. 68 case llvm::Intrinsic::lifetime_start: 69 case llvm::Intrinsic::lifetime_end: 70 // Invariant markers are supported/ignored. 71 case llvm::Intrinsic::invariant_start: 72 case llvm::Intrinsic::invariant_end: 73 // Some misc annotations are supported/ignored. 74 case llvm::Intrinsic::var_annotation: 75 case llvm::Intrinsic::ptr_annotation: 76 case llvm::Intrinsic::annotation: 77 case llvm::Intrinsic::donothing: 78 case llvm::Intrinsic::assume: 79 case llvm::Intrinsic::expect: 80 // Some debug info intrisics are supported/ignored. 81 case llvm::Intrinsic::dbg_value: 82 case llvm::Intrinsic::dbg_declare: 83 return true; 84 default: 85 break; 86 } 87 } 88 return false; 89 } 90 91 BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, 92 ScalarEvolution &SE, DominatorTree &DT, 93 ScalarAllocaMapTy &ScalarMap, 94 ScalarAllocaMapTy &PHIOpMap, 95 EscapeUsersAllocaMapTy &EscapeMap, 96 ValueMapT &GlobalMap, 97 IslExprBuilder *ExprBuilder) 98 : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), 99 EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), 100 EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} 101 102 Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, 103 ValueMapT &BBMap, 104 LoopToScevMapT <S, 105 Loop *L) const { 106 if (SE.isSCEVable(Old->getType())) 107 if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) { 108 if (!isa<SCEVCouldNotCompute>(Scev)) { 109 const SCEV *NewScev = apply(Scev, LTS, SE); 110 llvm::ValueToValueMap VTV; 111 VTV.insert(BBMap.begin(), BBMap.end()); 112 VTV.insert(GlobalMap.begin(), GlobalMap.end()); 113 114 Scop &S = *Stmt.getParent(); 115 const DataLayout &DL = 116 S.getRegion().getEntry()->getParent()->getParent()->getDataLayout(); 117 auto IP = Builder.GetInsertPoint(); 118 119 assert(IP != Builder.GetInsertBlock()->end() && 120 "Only instructions can be insert points for SCEVExpander"); 121 Value *Expanded = expandCodeFor(S, SE, DL, "polly", NewScev, 122 Old->getType(), IP, &VTV); 123 124 BBMap[Old] = Expanded; 125 return Expanded; 126 } 127 } 128 129 return nullptr; 130 } 131 132 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 133 LoopToScevMapT <S, Loop *L) const { 134 // We assume constants never change. 135 // This avoids map lookups for many calls to this function. 136 if (isa<Constant>(Old)) 137 return const_cast<Value *>(Old); 138 139 if (Value *New = GlobalMap.lookup(Old)) { 140 if (Value *NewRemapped = GlobalMap.lookup(New)) 141 New = NewRemapped; 142 if (Old->getType()->getScalarSizeInBits() < 143 New->getType()->getScalarSizeInBits()) 144 New = Builder.CreateTruncOrBitCast(New, Old->getType()); 145 146 return New; 147 } 148 149 if (Value *New = BBMap.lookup(Old)) 150 return New; 151 152 if (Value *New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L)) 153 return New; 154 155 // A scop-constant value defined by a global or a function parameter. 156 if (isa<GlobalValue>(Old) || isa<Argument>(Old)) 157 return const_cast<Value *>(Old); 158 159 // A scop-constant value defined by an instruction executed outside the scop. 160 if (const Instruction *Inst = dyn_cast<Instruction>(Old)) 161 if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) 162 return const_cast<Value *>(Old); 163 164 // The scalar dependence is neither available nor SCEVCodegenable. 165 llvm_unreachable("Unexpected scalar dependence in region!"); 166 return nullptr; 167 } 168 169 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst, 170 ValueMapT &BBMap, LoopToScevMapT <S) { 171 // We do not generate debug intrinsics as we did not investigate how to 172 // copy them correctly. At the current state, they just crash the code 173 // generation as the meta-data operands are not correctly copied. 174 if (isa<DbgInfoIntrinsic>(Inst)) 175 return; 176 177 Instruction *NewInst = Inst->clone(); 178 179 // Replace old operands with the new ones. 180 for (Value *OldOperand : Inst->operands()) { 181 Value *NewOperand = 182 getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForInst(Inst)); 183 184 if (!NewOperand) { 185 assert(!isa<StoreInst>(NewInst) && 186 "Store instructions are always needed!"); 187 delete NewInst; 188 return; 189 } 190 191 NewInst->replaceUsesOfWith(OldOperand, NewOperand); 192 } 193 194 Builder.Insert(NewInst); 195 BBMap[Inst] = NewInst; 196 197 if (!NewInst->getType()->isVoidTy()) 198 NewInst->setName("p_" + Inst->getName()); 199 } 200 201 Value *BlockGenerator::generateLocationAccessed( 202 ScopStmt &Stmt, const Instruction *Inst, Value *Pointer, ValueMapT &BBMap, 203 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { 204 const MemoryAccess &MA = Stmt.getAccessFor(Inst); 205 206 isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, MA.getId()); 207 208 if (AccessExpr) { 209 AccessExpr = isl_ast_expr_address_of(AccessExpr); 210 auto Address = ExprBuilder->create(AccessExpr); 211 212 // Cast the address of this memory access to a pointer type that has the 213 // same element type as the original access, but uses the address space of 214 // the newly generated pointer. 215 auto OldPtrTy = MA.getAccessValue()->getType()->getPointerTo(); 216 auto NewPtrTy = Address->getType(); 217 OldPtrTy = PointerType::get(OldPtrTy->getElementType(), 218 NewPtrTy->getPointerAddressSpace()); 219 220 if (OldPtrTy != NewPtrTy) { 221 assert(OldPtrTy->getPointerElementType()->getPrimitiveSizeInBits() == 222 NewPtrTy->getPointerElementType()->getPrimitiveSizeInBits() && 223 "Pointer types to elements with different size found"); 224 Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy); 225 } 226 return Address; 227 } 228 229 return getNewValue(Stmt, Pointer, BBMap, LTS, getLoopForInst(Inst)); 230 } 231 232 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { 233 return LI.getLoopFor(Inst->getParent()); 234 } 235 236 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, LoadInst *Load, 237 ValueMapT &BBMap, LoopToScevMapT <S, 238 isl_id_to_ast_expr *NewAccesses) { 239 if (Value *PreloadLoad = GlobalMap.lookup(Load)) 240 return PreloadLoad; 241 242 auto *Pointer = Load->getPointerOperand(); 243 Value *NewPointer = 244 generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses); 245 Value *ScalarLoad = Builder.CreateAlignedLoad( 246 NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_"); 247 248 if (DebugPrinting) 249 RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer, 250 ": ", ScalarLoad, "\n"); 251 252 return ScalarLoad; 253 } 254 255 void BlockGenerator::generateScalarStore(ScopStmt &Stmt, StoreInst *Store, 256 ValueMapT &BBMap, LoopToScevMapT <S, 257 isl_id_to_ast_expr *NewAccesses) { 258 auto *Pointer = Store->getPointerOperand(); 259 Value *NewPointer = 260 generateLocationAccessed(Stmt, Store, Pointer, BBMap, LTS, NewAccesses); 261 Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, LTS, 262 getLoopForInst(Store)); 263 264 if (DebugPrinting) 265 RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer, 266 ": ", ValueOperand, "\n"); 267 268 Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); 269 } 270 271 void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, 272 ValueMapT &BBMap, LoopToScevMapT <S, 273 isl_id_to_ast_expr *NewAccesses) { 274 275 // First check for possible scalar dependences for this instruction. 276 generateScalarLoads(Stmt, Inst, BBMap); 277 278 // Terminator instructions control the control flow. They are explicitly 279 // expressed in the clast and do not need to be copied. 280 if (Inst->isTerminator()) 281 return; 282 283 Loop *L = getLoopForInst(Inst); 284 if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && 285 canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { 286 // Synthesizable statements will be generated on-demand. 287 return; 288 } 289 290 if (auto *Load = dyn_cast<LoadInst>(Inst)) { 291 Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, LTS, NewAccesses); 292 // Compute NewLoad before its insertion in BBMap to make the insertion 293 // deterministic. 294 BBMap[Load] = NewLoad; 295 return; 296 } 297 298 if (auto *Store = dyn_cast<StoreInst>(Inst)) { 299 generateScalarStore(Stmt, Store, BBMap, LTS, NewAccesses); 300 return; 301 } 302 303 if (auto *PHI = dyn_cast<PHINode>(Inst)) { 304 copyPHIInstruction(Stmt, PHI, BBMap, LTS); 305 return; 306 } 307 308 // Skip some special intrinsics for which we do not adjust the semantics to 309 // the new schedule. All others are handled like every other instruction. 310 if (isIgnoredIntrinsic(Inst)) 311 return; 312 313 copyInstScalar(Stmt, Inst, BBMap, LTS); 314 } 315 316 void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 317 isl_id_to_ast_expr *NewAccesses) { 318 assert(Stmt.isBlockStmt() && 319 "Only block statements can be copied by the block generator"); 320 321 ValueMapT BBMap; 322 323 BasicBlock *BB = Stmt.getBasicBlock(); 324 copyBB(Stmt, BB, BBMap, LTS, NewAccesses); 325 } 326 327 BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { 328 BasicBlock *CopyBB = 329 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 330 CopyBB->setName("polly.stmt." + BB->getName()); 331 return CopyBB; 332 } 333 334 BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, 335 ValueMapT &BBMap, LoopToScevMapT <S, 336 isl_id_to_ast_expr *NewAccesses) { 337 BasicBlock *CopyBB = splitBB(BB); 338 copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); 339 return CopyBB; 340 } 341 342 void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, 343 ValueMapT &BBMap, LoopToScevMapT <S, 344 isl_id_to_ast_expr *NewAccesses) { 345 Builder.SetInsertPoint(CopyBB->begin()); 346 EntryBB = &CopyBB->getParent()->getEntryBlock(); 347 348 for (Instruction &Inst : *BB) 349 copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); 350 351 // After a basic block was copied store all scalars that escape this block 352 // in their alloca. First the scalars that have dependences inside the SCoP, 353 // then the ones that might escape the SCoP. 354 generateScalarStores(Stmt, BB, LTS, BBMap); 355 356 const Region &R = Stmt.getParent()->getRegion(); 357 for (Instruction &Inst : *BB) 358 handleOutsideUsers(R, &Inst, BBMap[&Inst]); 359 } 360 361 Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, 362 ScalarAllocaMapTy &Map, 363 const char *NameExt) { 364 // If no alloca was found create one and insert it in the entry block. 365 if (!Map.count(ScalarBase)) { 366 auto *Ty = ScalarBase->getType(); 367 auto NewAddr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); 368 EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 369 NewAddr->insertBefore(EntryBB->getFirstInsertionPt()); 370 Map[ScalarBase] = NewAddr; 371 } 372 373 auto Addr = Map[ScalarBase]; 374 375 if (GlobalMap.count(Addr)) 376 return GlobalMap[Addr]; 377 378 return Addr; 379 } 380 381 Value *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access) { 382 if (Access.getScopArrayInfo()->isPHI()) 383 return getOrCreatePHIAlloca(Access.getBaseAddr()); 384 else 385 return getOrCreateScalarAlloca(Access.getBaseAddr()); 386 } 387 388 Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) { 389 return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); 390 } 391 392 Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) { 393 return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); 394 } 395 396 void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, 397 Value *InstCopy, Value *Address) { 398 // If there are escape users we get the alloca for this instruction and put it 399 // in the EscapeMap for later finalization. Lastly, if the instruction was 400 // copied multiple times we already did this and can exit. 401 if (EscapeMap.count(Inst)) 402 return; 403 404 EscapeUserVectorTy EscapeUsers; 405 for (User *U : Inst->users()) { 406 407 // Non-instruction user will never escape. 408 Instruction *UI = dyn_cast<Instruction>(U); 409 if (!UI) 410 continue; 411 412 if (R.contains(UI)) 413 continue; 414 415 EscapeUsers.push_back(UI); 416 } 417 418 // Exit if no escape uses were found. 419 if (EscapeUsers.empty()) 420 return; 421 422 // Get or create an escape alloca for this instruction. 423 auto *ScalarAddr = Address ? Address : getOrCreateScalarAlloca(Inst); 424 425 // Remember that this instruction has escape uses and the escape alloca. 426 EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); 427 } 428 429 void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, 430 const Instruction *Inst, 431 ValueMapT &BBMap) { 432 auto *MAL = Stmt.lookupAccessesFor(Inst); 433 434 if (!MAL) 435 return; 436 437 for (MemoryAccess *MA : *MAL) { 438 if (MA->isExplicit() || !MA->isRead()) 439 continue; 440 441 auto *Address = getOrCreateAlloca(*MA); 442 BBMap[MA->getBaseAddr()] = 443 Builder.CreateLoad(Address, Address->getName() + ".reload"); 444 } 445 } 446 447 Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, 448 ScopStmt &Stmt, LoopToScevMapT <S, 449 ValueMapT &BBMap) { 450 // If the value we want to store is an instruction we might have demoted it 451 // in order to make it accessible here. In such a case a reload is 452 // necessary. If it is no instruction it will always be a value that 453 // dominates the current point and we can just use it. In total there are 4 454 // options: 455 // (1) The value is no instruction ==> use the value. 456 // (2) The value is an instruction that was split out of the region prior to 457 // code generation ==> use the instruction as it dominates the region. 458 // (3) The value is an instruction: 459 // (a) The value was defined in the current block, thus a copy is in 460 // the BBMap ==> use the mapped value. 461 // (b) The value was defined in a previous block, thus we demoted it 462 // earlier ==> use the reloaded value. 463 Instruction *ScalarValueInst = dyn_cast<Instruction>(ScalarValue); 464 if (!ScalarValueInst) 465 return ScalarValue; 466 467 if (!R.contains(ScalarValueInst)) { 468 if (Value *ScalarValueCopy = GlobalMap.lookup(ScalarValueInst)) 469 return /* Case (3a) */ ScalarValueCopy; 470 else 471 return /* Case 2 */ ScalarValue; 472 } 473 474 if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst)) 475 return /* Case (3a) */ ScalarValueCopy; 476 477 if ((Stmt.isBlockStmt() && 478 Stmt.getBasicBlock() == ScalarValueInst->getParent()) || 479 (Stmt.isRegionStmt() && Stmt.getRegion()->contains(ScalarValueInst))) { 480 auto SynthesizedValue = trySynthesizeNewValue( 481 Stmt, ScalarValueInst, BBMap, LTS, getLoopForInst(ScalarValueInst)); 482 483 if (SynthesizedValue) 484 return SynthesizedValue; 485 } 486 487 // Case (3b) 488 Value *Address = getOrCreateScalarAlloca(ScalarValueInst); 489 ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload"); 490 491 return ScalarValue; 492 } 493 494 void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, 495 LoopToScevMapT <S, 496 ValueMapT &BBMap) { 497 const Region &R = Stmt.getParent()->getRegion(); 498 499 assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && 500 "Region statements need to use the generateScalarStores() " 501 "function in the RegionGenerator"); 502 503 for (MemoryAccess *MA : Stmt) { 504 if (MA->isExplicit() || MA->isRead()) 505 continue; 506 507 Value *Val = MA->getAccessValue(); 508 auto *Address = getOrCreateAlloca(*MA); 509 510 Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); 511 Builder.CreateStore(Val, Address); 512 } 513 } 514 515 void BlockGenerator::createScalarInitialization(Scop &S) { 516 Region &R = S.getRegion(); 517 // The split block __just before__ the region and optimized region. 518 BasicBlock *SplitBB = R.getEnteringBlock(); 519 BranchInst *SplitBBTerm = cast<BranchInst>(SplitBB->getTerminator()); 520 assert(SplitBBTerm->getNumSuccessors() == 2 && "Bad region entering block!"); 521 522 // Get the start block of the __optimized__ region. 523 BasicBlock *StartBB = SplitBBTerm->getSuccessor(0); 524 if (StartBB == R.getEntry()) 525 StartBB = SplitBBTerm->getSuccessor(1); 526 527 Builder.SetInsertPoint(StartBB->getTerminator()); 528 529 for (auto &Pair : S.arrays()) { 530 auto &Array = Pair.second; 531 if (Array->getNumberOfDimensions() != 0) 532 continue; 533 if (Array->isPHI()) { 534 // For PHI nodes, the only values we need to store are the ones that 535 // reach the PHI node from outside the region. In general there should 536 // only be one such incoming edge and this edge should enter through 537 // 'SplitBB'. 538 auto PHI = cast<PHINode>(Array->getBasePtr()); 539 540 for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++) 541 if (!R.contains(*BI) && *BI != SplitBB) 542 llvm_unreachable("Incoming edges from outside the scop should always " 543 "come from SplitBB"); 544 545 int Idx = PHI->getBasicBlockIndex(SplitBB); 546 if (Idx < 0) 547 continue; 548 549 Value *ScalarValue = PHI->getIncomingValue(Idx); 550 551 Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI)); 552 continue; 553 } 554 555 auto *Inst = dyn_cast<Instruction>(Array->getBasePtr()); 556 557 if (Inst && R.contains(Inst)) 558 continue; 559 560 // PHI nodes that are not marked as such in their SAI object are exit PHI 561 // nodes we model as common scalars but do not need to initialize. 562 if (Inst && isa<PHINode>(Inst)) 563 continue; 564 565 ValueMapT EmptyMap; 566 Builder.CreateStore(Array->getBasePtr(), 567 getOrCreateScalarAlloca(Array->getBasePtr())); 568 } 569 } 570 571 void BlockGenerator::createScalarFinalization(Region &R) { 572 // The exit block of the __unoptimized__ region. 573 BasicBlock *ExitBB = R.getExitingBlock(); 574 // The merge block __just after__ the region and the optimized region. 575 BasicBlock *MergeBB = R.getExit(); 576 577 // The exit block of the __optimized__ region. 578 BasicBlock *OptExitBB = *(pred_begin(MergeBB)); 579 if (OptExitBB == ExitBB) 580 OptExitBB = *(++pred_begin(MergeBB)); 581 582 Builder.SetInsertPoint(OptExitBB->getTerminator()); 583 for (const auto &EscapeMapping : EscapeMap) { 584 // Extract the escaping instruction and the escaping users as well as the 585 // alloca the instruction was demoted to. 586 Instruction *EscapeInst = EscapeMapping.getFirst(); 587 const auto &EscapeMappingValue = EscapeMapping.getSecond(); 588 const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; 589 Value *ScalarAddr = EscapeMappingValue.first; 590 591 // Reload the demoted instruction in the optimized version of the SCoP. 592 Instruction *EscapeInstReload = 593 Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload"); 594 595 // Create the merge PHI that merges the optimized and unoptimized version. 596 PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2, 597 EscapeInst->getName() + ".merge"); 598 MergePHI->insertBefore(MergeBB->getFirstInsertionPt()); 599 600 // Add the respective values to the merge PHI. 601 MergePHI->addIncoming(EscapeInstReload, OptExitBB); 602 MergePHI->addIncoming(EscapeInst, ExitBB); 603 604 // The information of scalar evolution about the escaping instruction needs 605 // to be revoked so the new merged instruction will be used. 606 if (SE.isSCEVable(EscapeInst->getType())) 607 SE.forgetValue(EscapeInst); 608 609 // Replace all uses of the demoted instruction with the merge PHI. 610 for (Instruction *EUser : EscapeUsers) 611 EUser->replaceUsesOfWith(EscapeInst, MergePHI); 612 } 613 } 614 615 void BlockGenerator::finalizeSCoP(Scop &S) { 616 617 // Handle PHI nodes that were in the original exit and are now 618 // moved into the region exiting block. 619 if (!S.hasSingleExitEdge()) { 620 for (Instruction &I : *S.getRegion().getExitingBlock()) { 621 PHINode *PHI = dyn_cast<PHINode>(&I); 622 if (!PHI) 623 break; 624 625 assert(PHI->getNumUses() == 1); 626 assert(ScalarMap.count(PHI->user_back())); 627 628 handleOutsideUsers(S.getRegion(), PHI, nullptr, 629 ScalarMap[PHI->user_back()]); 630 } 631 } 632 633 createScalarInitialization(S); 634 createScalarFinalization(S.getRegion()); 635 } 636 637 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, 638 std::vector<LoopToScevMapT> &VLTS, 639 isl_map *Schedule) 640 : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) { 641 assert(Schedule && "No statement domain provided"); 642 } 643 644 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old, 645 ValueMapT &VectorMap, 646 VectorValueMapT &ScalarMaps, 647 Loop *L) { 648 if (Value *NewValue = VectorMap.lookup(Old)) 649 return NewValue; 650 651 int Width = getVectorWidth(); 652 653 Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); 654 655 for (int Lane = 0; Lane < Width; Lane++) 656 Vector = Builder.CreateInsertElement( 657 Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L), 658 Builder.getInt32(Lane)); 659 660 VectorMap[Old] = Vector; 661 662 return Vector; 663 } 664 665 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { 666 PointerType *PointerTy = dyn_cast<PointerType>(Val->getType()); 667 assert(PointerTy && "PointerType expected"); 668 669 Type *ScalarType = PointerTy->getElementType(); 670 VectorType *VectorType = VectorType::get(ScalarType, Width); 671 672 return PointerType::getUnqual(VectorType); 673 } 674 675 Value *VectorBlockGenerator::generateStrideOneLoad( 676 ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, 677 __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) { 678 unsigned VectorWidth = getVectorWidth(); 679 auto *Pointer = Load->getPointerOperand(); 680 Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); 681 unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; 682 683 Value *NewPointer = nullptr; 684 NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], 685 VLTS[Offset], NewAccesses); 686 Value *VectorPtr = 687 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 688 LoadInst *VecLoad = 689 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); 690 if (!Aligned) 691 VecLoad->setAlignment(8); 692 693 if (NegativeStride) { 694 SmallVector<Constant *, 16> Indices; 695 for (int i = VectorWidth - 1; i >= 0; i--) 696 Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); 697 Constant *SV = llvm::ConstantVector::get(Indices); 698 Value *RevVecLoad = Builder.CreateShuffleVector( 699 VecLoad, VecLoad, SV, Load->getName() + "_reverse"); 700 return RevVecLoad; 701 } 702 703 return VecLoad; 704 } 705 706 Value *VectorBlockGenerator::generateStrideZeroLoad( 707 ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap, 708 __isl_keep isl_id_to_ast_expr *NewAccesses) { 709 auto *Pointer = Load->getPointerOperand(); 710 Type *VectorPtrType = getVectorPtrTy(Pointer, 1); 711 Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, 712 VLTS[0], NewAccesses); 713 Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, 714 Load->getName() + "_p_vec_p"); 715 LoadInst *ScalarLoad = 716 Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); 717 718 if (!Aligned) 719 ScalarLoad->setAlignment(8); 720 721 Constant *SplatVector = Constant::getNullValue( 722 VectorType::get(Builder.getInt32Ty(), getVectorWidth())); 723 724 Value *VectorLoad = Builder.CreateShuffleVector( 725 ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); 726 return VectorLoad; 727 } 728 729 Value *VectorBlockGenerator::generateUnknownStrideLoad( 730 ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, 731 __isl_keep isl_id_to_ast_expr *NewAccesses 732 733 ) { 734 int VectorWidth = getVectorWidth(); 735 auto *Pointer = Load->getPointerOperand(); 736 VectorType *VectorType = VectorType::get( 737 dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth); 738 739 Value *Vector = UndefValue::get(VectorType); 740 741 for (int i = 0; i < VectorWidth; i++) { 742 Value *NewPointer = generateLocationAccessed( 743 Stmt, Load, Pointer, ScalarMaps[i], VLTS[i], NewAccesses); 744 Value *ScalarLoad = 745 Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); 746 Vector = Builder.CreateInsertElement( 747 Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); 748 } 749 750 return Vector; 751 } 752 753 void VectorBlockGenerator::generateLoad( 754 ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, 755 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 756 if (Value *PreloadLoad = GlobalMap.lookup(Load)) { 757 VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, 758 Load->getName() + "_p"); 759 return; 760 } 761 762 if (!VectorType::isValidElementType(Load->getType())) { 763 for (int i = 0; i < getVectorWidth(); i++) 764 ScalarMaps[i][Load] = 765 generateScalarLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses); 766 return; 767 } 768 769 const MemoryAccess &Access = Stmt.getAccessFor(Load); 770 771 // Make sure we have scalar values available to access the pointer to 772 // the data location. 773 extractScalarValues(Load, VectorMap, ScalarMaps); 774 775 Value *NewLoad; 776 if (Access.isStrideZero(isl_map_copy(Schedule))) 777 NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses); 778 else if (Access.isStrideOne(isl_map_copy(Schedule))) 779 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses); 780 else if (Access.isStrideX(isl_map_copy(Schedule), -1)) 781 NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true); 782 else 783 NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses); 784 785 VectorMap[Load] = NewLoad; 786 } 787 788 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, 789 ValueMapT &VectorMap, 790 VectorValueMapT &ScalarMaps) { 791 int VectorWidth = getVectorWidth(); 792 Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, 793 ScalarMaps, getLoopForInst(Inst)); 794 795 assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction"); 796 797 const CastInst *Cast = dyn_cast<CastInst>(Inst); 798 VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth); 799 VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); 800 } 801 802 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, 803 ValueMapT &VectorMap, 804 VectorValueMapT &ScalarMaps) { 805 Loop *L = getLoopForInst(Inst); 806 Value *OpZero = Inst->getOperand(0); 807 Value *OpOne = Inst->getOperand(1); 808 809 Value *NewOpZero, *NewOpOne; 810 NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); 811 NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); 812 813 Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, 814 Inst->getName() + "p_vec"); 815 VectorMap[Inst] = NewInst; 816 } 817 818 void VectorBlockGenerator::copyStore( 819 ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, 820 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 821 const MemoryAccess &Access = Stmt.getAccessFor(Store); 822 823 auto *Pointer = Store->getPointerOperand(); 824 Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, 825 ScalarMaps, getLoopForInst(Store)); 826 827 // Make sure we have scalar values available to access the pointer to 828 // the data location. 829 extractScalarValues(Store, VectorMap, ScalarMaps); 830 831 if (Access.isStrideOne(isl_map_copy(Schedule))) { 832 Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); 833 Value *NewPointer = generateLocationAccessed( 834 Stmt, Store, Pointer, ScalarMaps[0], VLTS[0], NewAccesses); 835 836 Value *VectorPtr = 837 Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); 838 StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); 839 840 if (!Aligned) 841 Store->setAlignment(8); 842 } else { 843 for (unsigned i = 0; i < ScalarMaps.size(); i++) { 844 Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); 845 Value *NewPointer = generateLocationAccessed( 846 Stmt, Store, Pointer, ScalarMaps[i], VLTS[i], NewAccesses); 847 Builder.CreateStore(Scalar, NewPointer); 848 } 849 } 850 } 851 852 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, 853 ValueMapT &VectorMap) { 854 for (Value *Operand : Inst->operands()) 855 if (VectorMap.count(Operand)) 856 return true; 857 return false; 858 } 859 860 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, 861 ValueMapT &VectorMap, 862 VectorValueMapT &ScalarMaps) { 863 bool HasVectorOperand = false; 864 int VectorWidth = getVectorWidth(); 865 866 for (Value *Operand : Inst->operands()) { 867 ValueMapT::iterator VecOp = VectorMap.find(Operand); 868 869 if (VecOp == VectorMap.end()) 870 continue; 871 872 HasVectorOperand = true; 873 Value *NewVector = VecOp->second; 874 875 for (int i = 0; i < VectorWidth; ++i) { 876 ValueMapT &SM = ScalarMaps[i]; 877 878 // If there is one scalar extracted, all scalar elements should have 879 // already been extracted by the code here. So no need to check for the 880 // existance of all of them. 881 if (SM.count(Operand)) 882 break; 883 884 SM[Operand] = 885 Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); 886 } 887 } 888 889 return HasVectorOperand; 890 } 891 892 void VectorBlockGenerator::copyInstScalarized( 893 ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 894 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 895 bool HasVectorOperand; 896 int VectorWidth = getVectorWidth(); 897 898 HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); 899 900 for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) 901 BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], 902 VLTS[VectorLane], NewAccesses); 903 904 if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) 905 return; 906 907 // Make the result available as vector value. 908 VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth); 909 Value *Vector = UndefValue::get(VectorType); 910 911 for (int i = 0; i < VectorWidth; i++) 912 Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], 913 Builder.getInt32(i)); 914 915 VectorMap[Inst] = Vector; 916 } 917 918 int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); } 919 920 void VectorBlockGenerator::copyInstruction( 921 ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 922 VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { 923 // Terminator instructions control the control flow. They are explicitly 924 // expressed in the clast and do not need to be copied. 925 if (Inst->isTerminator()) 926 return; 927 928 if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) 929 return; 930 931 if (auto *Load = dyn_cast<LoadInst>(Inst)) { 932 generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses); 933 return; 934 } 935 936 if (hasVectorOperands(Inst, VectorMap)) { 937 if (auto *Store = dyn_cast<StoreInst>(Inst)) { 938 copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses); 939 return; 940 } 941 942 if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) { 943 copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); 944 return; 945 } 946 947 if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) { 948 copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); 949 return; 950 } 951 952 // Falltrough: We generate scalar instructions, if we don't know how to 953 // generate vector code. 954 } 955 956 copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses); 957 } 958 959 void VectorBlockGenerator::copyStmt( 960 ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 961 assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " 962 "the vector block generator"); 963 964 BasicBlock *BB = Stmt.getBasicBlock(); 965 BasicBlock *CopyBB = 966 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 967 CopyBB->setName("polly.stmt." + BB->getName()); 968 Builder.SetInsertPoint(CopyBB->begin()); 969 970 // Create two maps that store the mapping from the original instructions of 971 // the old basic block to their copies in the new basic block. Those maps 972 // are basic block local. 973 // 974 // As vector code generation is supported there is one map for scalar values 975 // and one for vector values. 976 // 977 // In case we just do scalar code generation, the vectorMap is not used and 978 // the scalarMap has just one dimension, which contains the mapping. 979 // 980 // In case vector code generation is done, an instruction may either appear 981 // in the vector map once (as it is calculating >vectorwidth< values at a 982 // time. Or (if the values are calculated using scalar operations), it 983 // appears once in every dimension of the scalarMap. 984 VectorValueMapT ScalarBlockMap(getVectorWidth()); 985 ValueMapT VectorBlockMap; 986 987 for (Instruction &Inst : *BB) 988 copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses); 989 } 990 991 BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB, 992 BasicBlock *BBCopy) { 993 994 BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); 995 BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); 996 997 if (BBCopyIDom) 998 DT.changeImmediateDominator(BBCopy, BBCopyIDom); 999 1000 return BBCopyIDom; 1001 } 1002 1003 void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 1004 isl_id_to_ast_expr *IdToAstExp) { 1005 assert(Stmt.isRegionStmt() && 1006 "Only region statements can be copied by the region generator"); 1007 1008 // Forget all old mappings. 1009 BlockMap.clear(); 1010 RegionMaps.clear(); 1011 IncompletePHINodeMap.clear(); 1012 1013 // The region represented by the statement. 1014 Region *R = Stmt.getRegion(); 1015 1016 // Create a dedicated entry for the region where we can reload all demoted 1017 // inputs. 1018 BasicBlock *EntryBB = R->getEntry(); 1019 BasicBlock *EntryBBCopy = 1020 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 1021 EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); 1022 Builder.SetInsertPoint(EntryBBCopy->begin()); 1023 1024 for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) 1025 if (!R->contains(*PI)) 1026 BlockMap[*PI] = EntryBBCopy; 1027 1028 // Iterate over all blocks in the region in a breadth-first search. 1029 std::deque<BasicBlock *> Blocks; 1030 SmallPtrSet<BasicBlock *, 8> SeenBlocks; 1031 Blocks.push_back(EntryBB); 1032 SeenBlocks.insert(EntryBB); 1033 1034 while (!Blocks.empty()) { 1035 BasicBlock *BB = Blocks.front(); 1036 Blocks.pop_front(); 1037 1038 // First split the block and update dominance information. 1039 BasicBlock *BBCopy = splitBB(BB); 1040 BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy); 1041 1042 // In order to remap PHI nodes we store also basic block mappings. 1043 BlockMap[BB] = BBCopy; 1044 1045 // Get the mapping for this block and initialize it with the mapping 1046 // available at its immediate dominator (in the new region). 1047 ValueMapT &RegionMap = RegionMaps[BBCopy]; 1048 RegionMap = RegionMaps[BBCopyIDom]; 1049 1050 // Copy the block with the BlockGenerator. 1051 copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); 1052 1053 // In order to remap PHI nodes we store also basic block mappings. 1054 BlockMap[BB] = BBCopy; 1055 1056 // Add values to incomplete PHI nodes waiting for this block to be copied. 1057 for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) 1058 addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS); 1059 IncompletePHINodeMap[BB].clear(); 1060 1061 // And continue with new successors inside the region. 1062 for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) 1063 if (R->contains(*SI) && SeenBlocks.insert(*SI).second) 1064 Blocks.push_back(*SI); 1065 } 1066 1067 // Now create a new dedicated region exit block and add it to the region map. 1068 BasicBlock *ExitBBCopy = 1069 SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); 1070 ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); 1071 BlockMap[R->getExit()] = ExitBBCopy; 1072 1073 repairDominance(R->getExit(), ExitBBCopy); 1074 1075 // As the block generator doesn't handle control flow we need to add the 1076 // region control flow by hand after all blocks have been copied. 1077 for (BasicBlock *BB : SeenBlocks) { 1078 1079 BasicBlock *BBCopy = BlockMap[BB]; 1080 TerminatorInst *TI = BB->getTerminator(); 1081 if (isa<UnreachableInst>(TI)) { 1082 while (!BBCopy->empty()) 1083 BBCopy->begin()->eraseFromParent(); 1084 new UnreachableInst(BBCopy->getContext(), BBCopy); 1085 continue; 1086 } 1087 1088 Instruction *BICopy = BBCopy->getTerminator(); 1089 1090 ValueMapT &RegionMap = RegionMaps[BBCopy]; 1091 RegionMap.insert(BlockMap.begin(), BlockMap.end()); 1092 1093 Builder.SetInsertPoint(BICopy); 1094 copyInstScalar(Stmt, TI, RegionMap, LTS); 1095 BICopy->eraseFromParent(); 1096 } 1097 1098 // Add counting PHI nodes to all loops in the region that can be used as 1099 // replacement for SCEVs refering to the old loop. 1100 for (BasicBlock *BB : SeenBlocks) { 1101 Loop *L = LI.getLoopFor(BB); 1102 if (L == nullptr || L->getHeader() != BB) 1103 continue; 1104 1105 BasicBlock *BBCopy = BlockMap[BB]; 1106 Value *NullVal = Builder.getInt32(0); 1107 PHINode *LoopPHI = 1108 PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv"); 1109 Instruction *LoopPHIInc = BinaryOperator::CreateAdd( 1110 LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc"); 1111 LoopPHI->insertBefore(BBCopy->begin()); 1112 LoopPHIInc->insertBefore(BBCopy->getTerminator()); 1113 1114 for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) { 1115 if (!R->contains(PredBB)) 1116 continue; 1117 if (L->contains(PredBB)) 1118 LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]); 1119 else 1120 LoopPHI->addIncoming(NullVal, BlockMap[PredBB]); 1121 } 1122 1123 for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy))) 1124 if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0) 1125 LoopPHI->addIncoming(NullVal, PredBBCopy); 1126 1127 LTS[L] = SE.getUnknown(LoopPHI); 1128 } 1129 1130 // Reset the old insert point for the build. 1131 Builder.SetInsertPoint(ExitBBCopy->begin()); 1132 } 1133 1134 void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, 1135 const Instruction *Inst, 1136 ValueMapT &BBMap) { 1137 1138 // Inside a non-affine region PHI nodes are copied not demoted. Once the 1139 // phi is copied it will reload all inputs from outside the region, hence 1140 // we do not need to generate code for the read access of the operands of a 1141 // PHI. 1142 if (isa<PHINode>(Inst)) 1143 return; 1144 1145 return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); 1146 } 1147 1148 void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, 1149 LoopToScevMapT <S, 1150 ValueMapT &BBMap) { 1151 const Region &R = Stmt.getParent()->getRegion(); 1152 1153 assert(Stmt.getRegion() && 1154 "Block statements need to use the generateScalarStores() " 1155 "function in the BlockGenerator"); 1156 1157 for (MemoryAccess *MA : Stmt) { 1158 1159 if (MA->isExplicit() || MA->isRead()) 1160 continue; 1161 1162 Instruction *ScalarInst = MA->getAccessInstruction(); 1163 1164 // Only generate accesses that belong to this basic block. 1165 if (ScalarInst->getParent() != BB) 1166 continue; 1167 1168 Value *Val = MA->getAccessValue(); 1169 1170 auto Address = getOrCreateAlloca(*MA); 1171 1172 Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); 1173 Builder.CreateStore(Val, Address); 1174 } 1175 } 1176 1177 void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, 1178 PHINode *PHICopy, BasicBlock *IncomingBB, 1179 LoopToScevMapT <S) { 1180 Region *StmtR = Stmt.getRegion(); 1181 1182 // If the incoming block was not yet copied mark this PHI as incomplete. 1183 // Once the block will be copied the incoming value will be added. 1184 BasicBlock *BBCopy = BlockMap[IncomingBB]; 1185 if (!BBCopy) { 1186 assert(StmtR->contains(IncomingBB) && 1187 "Bad incoming block for PHI in non-affine region"); 1188 IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy)); 1189 return; 1190 } 1191 1192 Value *OpCopy = nullptr; 1193 if (StmtR->contains(IncomingBB)) { 1194 assert(RegionMaps.count(BBCopy) && 1195 "Incoming PHI block did not have a BBMap"); 1196 ValueMapT &BBCopyMap = RegionMaps[BBCopy]; 1197 1198 Value *Op = PHI->getIncomingValueForBlock(IncomingBB); 1199 OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForInst(PHI)); 1200 } else { 1201 1202 if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) 1203 return; 1204 1205 Value *PHIOpAddr = getOrCreatePHIAlloca(const_cast<PHINode *>(PHI)); 1206 OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", 1207 BlockMap[IncomingBB]->getTerminator()); 1208 } 1209 1210 assert(OpCopy && "Incoming PHI value was not copied properly"); 1211 assert(BBCopy && "Incoming PHI block was not copied properly"); 1212 PHICopy->addIncoming(OpCopy, BBCopy); 1213 } 1214 1215 Value *RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI, 1216 ValueMapT &BBMap, 1217 LoopToScevMapT <S) { 1218 unsigned NumIncoming = PHI->getNumIncomingValues(); 1219 PHINode *PHICopy = 1220 Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName()); 1221 PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI()); 1222 BBMap[PHI] = PHICopy; 1223 1224 for (unsigned u = 0; u < NumIncoming; u++) 1225 addOperandToPHI(Stmt, PHI, PHICopy, PHI->getIncomingBlock(u), LTS); 1226 return PHICopy; 1227 } 1228