1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion, attempting to remove as much 11 // code from the body of a loop as possible. It does this by either hoisting 12 // code into the preheader block, or by sinking code to the exit blocks if it is 13 // safe. This pass also promotes must-aliased memory locations in the loop to 14 // live in registers, thus hoisting and sinking "invariant" loads and stores. 15 // 16 // This pass uses alias analysis for two purposes: 17 // 18 // 1. Moving loop invariant loads and calls out of loops. If we can determine 19 // that a load or call inside of a loop never aliases anything stored to, 20 // we can hoist it or sink it like any other instruction. 21 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 22 // the loop, we try to move the store to happen AFTER the loop instead of 23 // inside of the loop. This can only happen if a few conditions are true: 24 // A. The pointer stored through is loop invariant 25 // B. There are no stores or loads in the loop which _may_ alias the 26 // pointer. There are no calls in the loop which mod/ref the pointer. 27 // If these conditions are true, we can promote the loads and stores in the 28 // loop of the pointer to use a temporary alloca'd variable. We then use 29 // the SSAUpdater to construct the appropriate SSA form for the value. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Transforms/Scalar/LICM.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/BasicAliasAnalysis.h" 38 #include "llvm/Analysis/CaptureTracking.h" 39 #include "llvm/Analysis/ConstantFolding.h" 40 #include "llvm/Analysis/GlobalsModRef.h" 41 #include "llvm/Analysis/Loads.h" 42 #include "llvm/Analysis/LoopInfo.h" 43 #include "llvm/Analysis/LoopPass.h" 44 #include "llvm/Analysis/MemoryBuiltins.h" 45 #include "llvm/Analysis/MemorySSA.h" 46 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 47 #include "llvm/Analysis/ScalarEvolution.h" 48 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/Utils/Local.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/CFG.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/DerivedTypes.h" 56 #include "llvm/IR/Dominators.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IntrinsicInst.h" 59 #include "llvm/IR/LLVMContext.h" 60 #include "llvm/IR/Metadata.h" 61 #include "llvm/IR/PredIteratorCache.h" 62 #include "llvm/Support/CommandLine.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/Scalar.h" 66 #include "llvm/Transforms/Scalar/LoopPassManager.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/LoopUtils.h" 69 #include "llvm/Transforms/Utils/SSAUpdater.h" 70 #include <algorithm> 71 #include <utility> 72 using namespace llvm; 73 74 #define DEBUG_TYPE "licm" 75 76 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 77 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 78 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 79 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 80 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 81 82 /// Memory promotion is enabled by default. 83 static cl::opt<bool> 84 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 85 cl::desc("Disable memory promotion in LICM pass")); 86 87 static cl::opt<uint32_t> MaxNumUsesTraversed( 88 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 89 cl::desc("Max num uses visited for identifying load " 90 "invariance in loop using invariant start (default = 8)")); 91 92 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 93 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 94 const LoopSafetyInfo *SafetyInfo, 95 TargetTransformInfo *TTI, bool &FreeInLoop); 96 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 97 const LoopSafetyInfo *SafetyInfo, 98 OptimizationRemarkEmitter *ORE); 99 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 100 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, 101 OptimizationRemarkEmitter *ORE, bool FreeInLoop); 102 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 103 const DominatorTree *DT, 104 const Loop *CurLoop, 105 const LoopSafetyInfo *SafetyInfo, 106 OptimizationRemarkEmitter *ORE, 107 const Instruction *CtxI = nullptr); 108 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 109 const AAMDNodes &AAInfo, 110 AliasSetTracker *CurAST); 111 static Instruction * 112 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 113 const LoopInfo *LI, 114 const LoopSafetyInfo *SafetyInfo); 115 116 namespace { 117 struct LoopInvariantCodeMotion { 118 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 119 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 120 ScalarEvolution *SE, MemorySSA *MSSA, 121 OptimizationRemarkEmitter *ORE, bool DeleteAST); 122 123 DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { 124 return LoopToAliasSetMap; 125 } 126 127 private: 128 DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap; 129 130 AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 131 AliasAnalysis *AA); 132 }; 133 134 struct LegacyLICMPass : public LoopPass { 135 static char ID; // Pass identification, replacement for typeid 136 LegacyLICMPass() : LoopPass(ID) { 137 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 141 if (skipLoop(L)) { 142 // If we have run LICM on a previous loop but now we are skipping 143 // (because we've hit the opt-bisect limit), we need to clear the 144 // loop alias information. 145 for (auto <AS : LICM.getLoopToAliasSetMap()) 146 delete LTAS.second; 147 LICM.getLoopToAliasSetMap().clear(); 148 return false; 149 } 150 151 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 152 MemorySSA *MSSA = EnableMSSALoopDependency 153 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) 154 : nullptr; 155 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 156 // pass. Function analyses need to be preserved across loop transformations 157 // but ORE cannot be preserved (see comment before the pass definition). 158 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 159 return LICM.runOnLoop(L, 160 &getAnalysis<AAResultsWrapperPass>().getAAResults(), 161 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 162 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 163 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 164 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 165 *L->getHeader()->getParent()), 166 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); 167 } 168 169 /// This transformation requires natural loop information & requires that 170 /// loop preheaders be inserted into the CFG... 171 /// 172 void getAnalysisUsage(AnalysisUsage &AU) const override { 173 AU.setPreservesCFG(); 174 AU.addRequired<TargetLibraryInfoWrapperPass>(); 175 if (EnableMSSALoopDependency) 176 AU.addRequired<MemorySSAWrapperPass>(); 177 AU.addRequired<TargetTransformInfoWrapperPass>(); 178 getLoopAnalysisUsage(AU); 179 } 180 181 using llvm::Pass::doFinalization; 182 183 bool doFinalization() override { 184 assert(LICM.getLoopToAliasSetMap().empty() && 185 "Didn't free loop alias sets"); 186 return false; 187 } 188 189 private: 190 LoopInvariantCodeMotion LICM; 191 192 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 193 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 194 Loop *L) override; 195 196 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 197 /// set. 198 void deleteAnalysisValue(Value *V, Loop *L) override; 199 200 /// Simple Analysis hook. Delete loop L from alias set map. 201 void deleteAnalysisLoop(Loop *L) override; 202 }; 203 } // namespace 204 205 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 206 LoopStandardAnalysisResults &AR, LPMUpdater &) { 207 const auto &FAM = 208 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 209 Function *F = L.getHeader()->getParent(); 210 211 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); 212 // FIXME: This should probably be optional rather than required. 213 if (!ORE) 214 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " 215 "cached at a higher level"); 216 217 LoopInvariantCodeMotion LICM; 218 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, 219 AR.MSSA, ORE, true)) 220 return PreservedAnalyses::all(); 221 222 auto PA = getLoopPassPreservedAnalyses(); 223 PA.preserveSet<CFGAnalyses>(); 224 return PA; 225 } 226 227 char LegacyLICMPass::ID = 0; 228 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 229 false, false) 230 INITIALIZE_PASS_DEPENDENCY(LoopPass) 231 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 232 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 233 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 234 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 235 false) 236 237 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 238 239 /// Hoist expressions out of the specified loop. Note, alias info for inner 240 /// loop is not preserved so it is not a good idea to run LICM multiple 241 /// times on one loop. 242 /// We should delete AST for inner loops in the new pass manager to avoid 243 /// memory leak. 244 /// 245 bool LoopInvariantCodeMotion::runOnLoop( 246 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 247 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, 248 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { 249 bool Changed = false; 250 251 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 252 253 AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); 254 255 // Get the preheader block to move instructions into... 256 BasicBlock *Preheader = L->getLoopPreheader(); 257 258 // Compute loop safety information. 259 LoopSafetyInfo SafetyInfo; 260 computeLoopSafetyInfo(&SafetyInfo, L); 261 262 // We want to visit all of the instructions in this loop... that are not parts 263 // of our subloops (they have already had their invariants hoisted out of 264 // their loop, into this loop, so there is no need to process the BODIES of 265 // the subloops). 266 // 267 // Traverse the body of the loop in depth first order on the dominator tree so 268 // that we are guaranteed to see definitions before we see uses. This allows 269 // us to sink instructions in one pass, without iteration. After sinking 270 // instructions, we perform another pass to hoist them out of the loop. 271 // 272 if (L->hasDedicatedExits()) 273 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 274 CurAST, &SafetyInfo, ORE); 275 if (Preheader) 276 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 277 CurAST, &SafetyInfo, ORE); 278 279 // Now that all loop invariants have been removed from the loop, promote any 280 // memory references to scalars that we can. 281 // Don't sink stores from loops without dedicated block exits. Exits 282 // containing indirect branches are not transformed by loop simplify, 283 // make sure we catch that. An additional load may be generated in the 284 // preheader for SSA updater, so also avoid sinking when no preheader 285 // is available. 286 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 287 // Figure out the loop exits and their insertion points 288 SmallVector<BasicBlock *, 8> ExitBlocks; 289 L->getUniqueExitBlocks(ExitBlocks); 290 291 // We can't insert into a catchswitch. 292 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 293 return isa<CatchSwitchInst>(Exit->getTerminator()); 294 }); 295 296 if (!HasCatchSwitch) { 297 SmallVector<Instruction *, 8> InsertPts; 298 InsertPts.reserve(ExitBlocks.size()); 299 for (BasicBlock *ExitBlock : ExitBlocks) 300 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 301 302 PredIteratorCache PIC; 303 304 bool Promoted = false; 305 306 // Loop over all of the alias sets in the tracker object. 307 for (AliasSet &AS : *CurAST) { 308 // We can promote this alias set if it has a store, if it is a "Must" 309 // alias set, if the pointer is loop invariant, and if we are not 310 // eliminating any volatile loads or stores. 311 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 312 AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) 313 continue; 314 315 assert( 316 !AS.empty() && 317 "Must alias set should have at least one pointer element in it!"); 318 319 SmallSetVector<Value *, 8> PointerMustAliases; 320 for (const auto &ASI : AS) 321 PointerMustAliases.insert(ASI.getValue()); 322 323 Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, 324 InsertPts, PIC, LI, DT, TLI, L, 325 CurAST, &SafetyInfo, ORE); 326 } 327 328 // Once we have promoted values across the loop body we have to 329 // recursively reform LCSSA as any nested loop may now have values defined 330 // within the loop used in the outer loop. 331 // FIXME: This is really heavy handed. It would be a bit better to use an 332 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 333 // it as it went. 334 if (Promoted) 335 formLCSSARecursively(*L, *DT, LI, SE); 336 337 Changed |= Promoted; 338 } 339 } 340 341 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 342 // specifically moving instructions across the loop boundary and so it is 343 // especially in need of sanity checking here. 344 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 345 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && 346 "Parent loop not left in LCSSA form after LICM!"); 347 348 // If this loop is nested inside of another one, save the alias information 349 // for when we process the outer loop. 350 if (L->getParentLoop() && !DeleteAST) 351 LoopToAliasSetMap[L] = CurAST; 352 else 353 delete CurAST; 354 355 if (Changed && SE) 356 SE->forgetLoopDispositions(L); 357 return Changed; 358 } 359 360 /// Walk the specified region of the CFG (defined by all blocks dominated by 361 /// the specified block, and that are in the current loop) in reverse depth 362 /// first order w.r.t the DominatorTree. This allows us to visit uses before 363 /// definitions, allowing us to sink a loop body in one pass without iteration. 364 /// 365 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 366 DominatorTree *DT, TargetLibraryInfo *TLI, 367 TargetTransformInfo *TTI, Loop *CurLoop, 368 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 369 OptimizationRemarkEmitter *ORE) { 370 371 // Verify inputs. 372 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 373 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 374 "Unexpected input to sinkRegion"); 375 376 // We want to visit children before parents. We will enque all the parents 377 // before their children in the worklist and process the worklist in reverse 378 // order. 379 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 380 381 bool Changed = false; 382 for (DomTreeNode *DTN : reverse(Worklist)) { 383 BasicBlock *BB = DTN->getBlock(); 384 // Only need to process the contents of this block if it is not part of a 385 // subloop (which would already have been processed). 386 if (inSubLoop(BB, CurLoop, LI)) 387 continue; 388 389 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 390 Instruction &I = *--II; 391 392 // If the instruction is dead, we would try to sink it because it isn't 393 // used in the loop, instead, just delete it. 394 if (isInstructionTriviallyDead(&I, TLI)) { 395 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 396 salvageDebugInfo(I); 397 ++II; 398 CurAST->deleteValue(&I); 399 I.eraseFromParent(); 400 Changed = true; 401 continue; 402 } 403 404 // Check to see if we can sink this instruction to the exit blocks 405 // of the loop. We can do this if the all users of the instruction are 406 // outside of the loop. In this case, it doesn't even matter if the 407 // operands of the instruction are loop invariant. 408 // 409 bool FreeInLoop = false; 410 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && 411 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { 412 if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { 413 if (!FreeInLoop) { 414 ++II; 415 CurAST->deleteValue(&I); 416 I.eraseFromParent(); 417 } 418 Changed = true; 419 } 420 } 421 } 422 } 423 return Changed; 424 } 425 426 /// Walk the specified region of the CFG (defined by all blocks dominated by 427 /// the specified block, and that are in the current loop) in depth first 428 /// order w.r.t the DominatorTree. This allows us to visit definitions before 429 /// uses, allowing us to hoist a loop body in one pass without iteration. 430 /// 431 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 432 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 433 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 434 OptimizationRemarkEmitter *ORE) { 435 // Verify inputs. 436 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 437 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 438 "Unexpected input to hoistRegion"); 439 440 // We want to visit parents before children. We will enque all the parents 441 // before their children in the worklist and process the worklist in order. 442 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 443 444 bool Changed = false; 445 for (DomTreeNode *DTN : Worklist) { 446 BasicBlock *BB = DTN->getBlock(); 447 // Only need to process the contents of this block if it is not part of a 448 // subloop (which would already have been processed). 449 if (inSubLoop(BB, CurLoop, LI)) 450 continue; 451 452 // Keep track of whether the prefix of instructions visited so far are such 453 // that the next instruction visited is guaranteed to execute if the loop 454 // is entered. 455 bool IsMustExecute = CurLoop->getHeader() == BB; 456 457 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { 458 Instruction &I = *II++; 459 // Try constant folding this instruction. If all the operands are 460 // constants, it is technically hoistable, but it would be better to 461 // just fold it. 462 if (Constant *C = ConstantFoldInstruction( 463 &I, I.getModule()->getDataLayout(), TLI)) { 464 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 465 CurAST->copyValue(&I, C); 466 I.replaceAllUsesWith(C); 467 if (isInstructionTriviallyDead(&I, TLI)) { 468 CurAST->deleteValue(&I); 469 I.eraseFromParent(); 470 } 471 Changed = true; 472 continue; 473 } 474 475 // Attempt to remove floating point division out of the loop by 476 // converting it to a reciprocal multiplication. 477 if (I.getOpcode() == Instruction::FDiv && 478 CurLoop->isLoopInvariant(I.getOperand(1)) && 479 I.hasAllowReciprocal()) { 480 auto Divisor = I.getOperand(1); 481 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 482 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 483 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 484 ReciprocalDivisor->insertBefore(&I); 485 486 auto Product = 487 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 488 Product->setFastMathFlags(I.getFastMathFlags()); 489 Product->insertAfter(&I); 490 I.replaceAllUsesWith(Product); 491 I.eraseFromParent(); 492 493 hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); 494 Changed = true; 495 continue; 496 } 497 498 // Try hoisting the instruction out to the preheader. We can only do 499 // this if all of the operands of the instruction are loop invariant and 500 // if it is safe to hoist the instruction. 501 // 502 if (CurLoop->hasLoopInvariantOperands(&I) && 503 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && 504 (IsMustExecute || 505 isSafeToExecuteUnconditionally( 506 I, DT, CurLoop, SafetyInfo, ORE, 507 CurLoop->getLoopPreheader()->getTerminator()))) { 508 Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); 509 continue; 510 } 511 512 if (IsMustExecute) 513 IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I); 514 } 515 } 516 517 return Changed; 518 } 519 520 // Return true if LI is invariant within scope of the loop. LI is invariant if 521 // CurLoop is dominated by an invariant.start representing the same memory 522 // location and size as the memory location LI loads from, and also the 523 // invariant.start has no uses. 524 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 525 Loop *CurLoop) { 526 Value *Addr = LI->getOperand(0); 527 const DataLayout &DL = LI->getModule()->getDataLayout(); 528 const uint32_t LocSizeInBits = DL.getTypeSizeInBits( 529 cast<PointerType>(Addr->getType())->getElementType()); 530 531 // if the type is i8 addrspace(x)*, we know this is the type of 532 // llvm.invariant.start operand 533 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 534 LI->getPointerAddressSpace()); 535 unsigned BitcastsVisited = 0; 536 // Look through bitcasts until we reach the i8* type (this is invariant.start 537 // operand type). 538 while (Addr->getType() != PtrInt8Ty) { 539 auto *BC = dyn_cast<BitCastInst>(Addr); 540 // Avoid traversing high number of bitcast uses. 541 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 542 return false; 543 Addr = BC->getOperand(0); 544 } 545 546 unsigned UsesVisited = 0; 547 // Traverse all uses of the load operand value, to see if invariant.start is 548 // one of the uses, and whether it dominates the load instruction. 549 for (auto *U : Addr->users()) { 550 // Avoid traversing for Load operand with high number of users. 551 if (++UsesVisited > MaxNumUsesTraversed) 552 return false; 553 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 554 // If there are escaping uses of invariant.start instruction, the load maybe 555 // non-invariant. 556 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 557 !II->use_empty()) 558 continue; 559 unsigned InvariantSizeInBits = 560 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; 561 // Confirm the invariant.start location size contains the load operand size 562 // in bits. Also, the invariant.start should dominate the load, and we 563 // should not hoist the load out of a loop that contains this dominating 564 // invariant.start. 565 if (LocSizeInBits <= InvariantSizeInBits && 566 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 567 return true; 568 } 569 570 return false; 571 } 572 573 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 574 Loop *CurLoop, AliasSetTracker *CurAST, 575 LoopSafetyInfo *SafetyInfo, 576 OptimizationRemarkEmitter *ORE) { 577 // SafetyInfo is nullptr if we are checking for sinking from preheader to 578 // loop body. 579 const bool SinkingToLoopBody = !SafetyInfo; 580 // Loads have extra constraints we have to verify before we can hoist them. 581 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 582 if (!LI->isUnordered()) 583 return false; // Don't sink/hoist volatile or ordered atomic loads! 584 585 // Loads from constant memory are always safe to move, even if they end up 586 // in the same alias set as something that ends up being modified. 587 if (AA->pointsToConstantMemory(LI->getOperand(0))) 588 return true; 589 if (LI->getMetadata(LLVMContext::MD_invariant_load)) 590 return true; 591 592 if (LI->isAtomic() && SinkingToLoopBody) 593 return false; // Don't sink unordered atomic loads to loop body. 594 595 // This checks for an invariant.start dominating the load. 596 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 597 return true; 598 599 // Don't hoist loads which have may-aliased stores in loop. 600 uint64_t Size = 0; 601 if (LI->getType()->isSized()) 602 Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); 603 604 AAMDNodes AAInfo; 605 LI->getAAMetadata(AAInfo); 606 607 bool Invalidated = 608 pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); 609 // Check loop-invariant address because this may also be a sinkable load 610 // whose address is not necessarily loop-invariant. 611 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 612 ORE->emit([&]() { 613 return OptimizationRemarkMissed( 614 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 615 << "failed to move load with loop-invariant address " 616 "because the loop may invalidate its value"; 617 }); 618 619 return !Invalidated; 620 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 621 // Don't sink or hoist dbg info; it's legal, but not useful. 622 if (isa<DbgInfoIntrinsic>(I)) 623 return false; 624 625 // Don't sink calls which can throw. 626 if (CI->mayThrow()) 627 return false; 628 629 // Handle simple cases by querying alias analysis. 630 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 631 if (Behavior == FMRB_DoesNotAccessMemory) 632 return true; 633 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 634 // A readonly argmemonly function only reads from memory pointed to by 635 // it's arguments with arbitrary offsets. If we can prove there are no 636 // writes to this memory in the loop, we can hoist or sink. 637 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { 638 for (Value *Op : CI->arg_operands()) 639 if (Op->getType()->isPointerTy() && 640 pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, 641 AAMDNodes(), CurAST)) 642 return false; 643 return true; 644 } 645 // If this call only reads from memory and there are no writes to memory 646 // in the loop, we can hoist or sink the call as appropriate. 647 bool FoundMod = false; 648 for (AliasSet &AS : *CurAST) { 649 if (!AS.isForwardingAliasSet() && AS.isMod()) { 650 FoundMod = true; 651 break; 652 } 653 } 654 if (!FoundMod) 655 return true; 656 } 657 658 // FIXME: This should use mod/ref information to see if we can hoist or 659 // sink the call. 660 661 return false; 662 } 663 664 // Only these instructions are hoistable/sinkable. 665 if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && 666 !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && 667 !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && 668 !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && 669 !isa<InsertValueInst>(I)) 670 return false; 671 672 // If we are checking for sinking from preheader to loop body it will be 673 // always safe as there is no speculative execution. 674 if (SinkingToLoopBody) 675 return true; 676 677 // TODO: Plumb the context instruction through to make hoisting and sinking 678 // more powerful. Hoisting of loads already works due to the special casing 679 // above. 680 return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); 681 } 682 683 /// Returns true if a PHINode is a trivially replaceable with an 684 /// Instruction. 685 /// This is true when all incoming values are that instruction. 686 /// This pattern occurs most often with LCSSA PHI nodes. 687 /// 688 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { 689 for (const Value *IncValue : PN.incoming_values()) 690 if (IncValue != &I) 691 return false; 692 693 return true; 694 } 695 696 /// Return true if the instruction is free in the loop. 697 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, 698 const TargetTransformInfo *TTI) { 699 700 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 701 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) 702 return false; 703 // For a GEP, we cannot simply use getUserCost because currently it 704 // optimistically assume that a GEP will fold into addressing mode 705 // regardless of its users. 706 const BasicBlock *BB = GEP->getParent(); 707 for (const User *U : GEP->users()) { 708 const Instruction *UI = cast<Instruction>(U); 709 if (CurLoop->contains(UI) && 710 (BB != UI->getParent() || 711 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 712 return false; 713 } 714 return true; 715 } else 716 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; 717 } 718 719 /// Return true if the only users of this instruction are outside of 720 /// the loop. If this is true, we can sink the instruction to the exit 721 /// blocks of the loop. 722 /// 723 /// We also return true if the instruction could be folded away in lowering. 724 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 725 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 726 const LoopSafetyInfo *SafetyInfo, 727 TargetTransformInfo *TTI, bool &FreeInLoop) { 728 const auto &BlockColors = SafetyInfo->BlockColors; 729 bool IsFree = isFreeInLoop(I, CurLoop, TTI); 730 for (const User *U : I.users()) { 731 const Instruction *UI = cast<Instruction>(U); 732 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 733 const BasicBlock *BB = PN->getParent(); 734 // We cannot sink uses in catchswitches. 735 if (isa<CatchSwitchInst>(BB->getTerminator())) 736 return false; 737 738 // We need to sink a callsite to a unique funclet. Avoid sinking if the 739 // phi use is too muddled. 740 if (isa<CallInst>(I)) 741 if (!BlockColors.empty() && 742 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 743 return false; 744 } 745 746 if (CurLoop->contains(UI)) { 747 if (IsFree) { 748 FreeInLoop = true; 749 continue; 750 } 751 return false; 752 } 753 } 754 return true; 755 } 756 757 static Instruction * 758 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 759 const LoopInfo *LI, 760 const LoopSafetyInfo *SafetyInfo) { 761 Instruction *New; 762 if (auto *CI = dyn_cast<CallInst>(&I)) { 763 const auto &BlockColors = SafetyInfo->BlockColors; 764 765 // Sinking call-sites need to be handled differently from other 766 // instructions. The cloned call-site needs a funclet bundle operand 767 // appropriate for it's location in the CFG. 768 SmallVector<OperandBundleDef, 1> OpBundles; 769 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 770 BundleIdx != BundleEnd; ++BundleIdx) { 771 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 772 if (Bundle.getTagID() == LLVMContext::OB_funclet) 773 continue; 774 775 OpBundles.emplace_back(Bundle); 776 } 777 778 if (!BlockColors.empty()) { 779 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 780 assert(CV.size() == 1 && "non-unique color for exit block!"); 781 BasicBlock *BBColor = CV.front(); 782 Instruction *EHPad = BBColor->getFirstNonPHI(); 783 if (EHPad->isEHPad()) 784 OpBundles.emplace_back("funclet", EHPad); 785 } 786 787 New = CallInst::Create(CI, OpBundles); 788 } else { 789 New = I.clone(); 790 } 791 792 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 793 if (!I.getName().empty()) 794 New->setName(I.getName() + ".le"); 795 796 // Build LCSSA PHI nodes for any in-loop operands. Note that this is 797 // particularly cheap because we can rip off the PHI node that we're 798 // replacing for the number and blocks of the predecessors. 799 // OPT: If this shows up in a profile, we can instead finish sinking all 800 // invariant instructions, and then walk their operands to re-establish 801 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 802 // sinking bottom-up. 803 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; 804 ++OI) 805 if (Instruction *OInst = dyn_cast<Instruction>(*OI)) 806 if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) 807 if (!OLoop->contains(&PN)) { 808 PHINode *OpPN = 809 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 810 OInst->getName() + ".lcssa", &ExitBlock.front()); 811 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 812 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 813 *OI = OpPN; 814 } 815 return New; 816 } 817 818 static Instruction *sinkThroughTriviallyReplacablePHI( 819 PHINode *TPN, Instruction *I, LoopInfo *LI, 820 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 821 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { 822 assert(isTriviallyReplacablePHI(*TPN, *I) && 823 "Expect only trivially replacalbe PHI"); 824 BasicBlock *ExitBlock = TPN->getParent(); 825 Instruction *New; 826 auto It = SunkCopies.find(ExitBlock); 827 if (It != SunkCopies.end()) 828 New = It->second; 829 else 830 New = SunkCopies[ExitBlock] = 831 CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); 832 return New; 833 } 834 835 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { 836 BasicBlock *BB = PN->getParent(); 837 if (!BB->canSplitPredecessors()) 838 return false; 839 // It's not impossible to split EHPad blocks, but if BlockColors already exist 840 // it require updating BlockColors for all offspring blocks accordingly. By 841 // skipping such corner case, we can make updating BlockColors after splitting 842 // predecessor fairly simple. 843 if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad()) 844 return false; 845 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 846 BasicBlock *BBPred = *PI; 847 if (isa<IndirectBrInst>(BBPred->getTerminator())) 848 return false; 849 } 850 return true; 851 } 852 853 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 854 LoopInfo *LI, const Loop *CurLoop, 855 LoopSafetyInfo *SafetyInfo) { 856 #ifndef NDEBUG 857 SmallVector<BasicBlock *, 32> ExitBlocks; 858 CurLoop->getUniqueExitBlocks(ExitBlocks); 859 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 860 ExitBlocks.end()); 861 #endif 862 BasicBlock *ExitBB = PN->getParent(); 863 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 864 865 // Split predecessors of the loop exit to make instructions in the loop are 866 // exposed to exit blocks through trivially replacable PHIs while keeping the 867 // loop in the canonical form where each predecessor of each exit block should 868 // be contained within the loop. For example, this will convert the loop below 869 // from 870 // 871 // LB1: 872 // %v1 = 873 // br %LE, %LB2 874 // LB2: 875 // %v2 = 876 // br %LE, %LB1 877 // LE: 878 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable 879 // 880 // to 881 // 882 // LB1: 883 // %v1 = 884 // br %LE.split, %LB2 885 // LB2: 886 // %v2 = 887 // br %LE.split2, %LB1 888 // LE.split: 889 // %p1 = phi [%v1, %LB1] <-- trivially replacable 890 // br %LE 891 // LE.split2: 892 // %p2 = phi [%v2, %LB2] <-- trivially replacable 893 // br %LE 894 // LE: 895 // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 896 // 897 auto &BlockColors = SafetyInfo->BlockColors; 898 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 899 while (!PredBBs.empty()) { 900 BasicBlock *PredBB = *PredBBs.begin(); 901 assert(CurLoop->contains(PredBB) && 902 "Expect all predecessors are in the loop"); 903 if (PN->getBasicBlockIndex(PredBB) >= 0) { 904 BasicBlock *NewPred = SplitBlockPredecessors( 905 ExitBB, PredBB, ".split.loop.exit", DT, LI, true); 906 // Since we do not allow splitting EH-block with BlockColors in 907 // canSplitPredecessors(), we can simply assign predecessor's color to 908 // the new block. 909 if (!BlockColors.empty()) { 910 // Grab a reference to the ColorVector to be inserted before getting the 911 // reference to the vector we are copying because inserting the new 912 // element in BlockColors might cause the map to be reallocated. 913 ColorVector &ColorsForNewBlock = BlockColors[NewPred]; 914 ColorVector &ColorsForOldBlock = BlockColors[PredBB]; 915 ColorsForNewBlock = ColorsForOldBlock; 916 } 917 } 918 PredBBs.remove(PredBB); 919 } 920 } 921 922 /// When an instruction is found to only be used outside of the loop, this 923 /// function moves it to the exit blocks and patches up SSA form as needed. 924 /// This method is guaranteed to remove the original instruction from its 925 /// position, and may either delete it or move it to outside of the loop. 926 /// 927 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 928 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, 929 OptimizationRemarkEmitter *ORE, bool FreeInLoop) { 930 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 931 ORE->emit([&]() { 932 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 933 << "sinking " << ore::NV("Inst", &I); 934 }); 935 bool Changed = false; 936 if (isa<LoadInst>(I)) 937 ++NumMovedLoads; 938 else if (isa<CallInst>(I)) 939 ++NumMovedCalls; 940 ++NumSunk; 941 942 // Iterate over users to be ready for actual sinking. Replace users via 943 // unrechable blocks with undef and make all user PHIs trivially replcable. 944 SmallPtrSet<Instruction *, 8> VisitedUsers; 945 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 946 auto *User = cast<Instruction>(*UI); 947 Use &U = UI.getUse(); 948 ++UI; 949 950 if (VisitedUsers.count(User) || CurLoop->contains(User)) 951 continue; 952 953 if (!DT->isReachableFromEntry(User->getParent())) { 954 U = UndefValue::get(I.getType()); 955 Changed = true; 956 continue; 957 } 958 959 // The user must be a PHI node. 960 PHINode *PN = cast<PHINode>(User); 961 962 // Surprisingly, instructions can be used outside of loops without any 963 // exits. This can only happen in PHI nodes if the incoming block is 964 // unreachable. 965 BasicBlock *BB = PN->getIncomingBlock(U); 966 if (!DT->isReachableFromEntry(BB)) { 967 U = UndefValue::get(I.getType()); 968 Changed = true; 969 continue; 970 } 971 972 VisitedUsers.insert(PN); 973 if (isTriviallyReplacablePHI(*PN, I)) 974 continue; 975 976 if (!canSplitPredecessors(PN, SafetyInfo)) 977 return Changed; 978 979 // Split predecessors of the PHI so that we can make users trivially 980 // replacable. 981 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); 982 983 // Should rebuild the iterators, as they may be invalidated by 984 // splitPredecessorsOfLoopExit(). 985 UI = I.user_begin(); 986 UE = I.user_end(); 987 } 988 989 if (VisitedUsers.empty()) 990 return Changed; 991 992 #ifndef NDEBUG 993 SmallVector<BasicBlock *, 32> ExitBlocks; 994 CurLoop->getUniqueExitBlocks(ExitBlocks); 995 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 996 ExitBlocks.end()); 997 #endif 998 999 // Clones of this instruction. Don't create more than one per exit block! 1000 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 1001 1002 // If this instruction is only used outside of the loop, then all users are 1003 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 1004 // the instruction. 1005 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 1006 for (auto *UI : Users) { 1007 auto *User = cast<Instruction>(UI); 1008 1009 if (CurLoop->contains(User)) 1010 continue; 1011 1012 PHINode *PN = cast<PHINode>(User); 1013 assert(ExitBlockSet.count(PN->getParent()) && 1014 "The LCSSA PHI is not in an exit block!"); 1015 // The PHI must be trivially replacable. 1016 Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies, 1017 SafetyInfo, CurLoop); 1018 PN->replaceAllUsesWith(New); 1019 PN->eraseFromParent(); 1020 Changed = true; 1021 } 1022 return Changed; 1023 } 1024 1025 /// When an instruction is found to only use loop invariant operands that 1026 /// is safe to hoist, this instruction is called to do the dirty work. 1027 /// 1028 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1029 const LoopSafetyInfo *SafetyInfo, 1030 OptimizationRemarkEmitter *ORE) { 1031 auto *Preheader = CurLoop->getLoopPreheader(); 1032 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I 1033 << "\n"); 1034 ORE->emit([&]() { 1035 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 1036 << ore::NV("Inst", &I); 1037 }); 1038 1039 // Metadata can be dependent on conditions we are hoisting above. 1040 // Conservatively strip all metadata on the instruction unless we were 1041 // guaranteed to execute I if we entered the loop, in which case the metadata 1042 // is valid in the loop preheader. 1043 if (I.hasMetadataOtherThanDebugLoc() && 1044 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 1045 // time in isGuaranteedToExecute if we don't actually have anything to 1046 // drop. It is a compile time optimization, not required for correctness. 1047 !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) 1048 I.dropUnknownNonDebugMetadata(); 1049 1050 // Move the new node to the Preheader, before its terminator. 1051 I.moveBefore(Preheader->getTerminator()); 1052 1053 // Do not retain debug locations when we are moving instructions to different 1054 // basic blocks, because we want to avoid jumpy line tables. Calls, however, 1055 // need to retain their debug locs because they may be inlined. 1056 // FIXME: How do we retain source locations without causing poor debugging 1057 // behavior? 1058 if (!isa<CallInst>(I)) 1059 I.setDebugLoc(DebugLoc()); 1060 1061 if (isa<LoadInst>(I)) 1062 ++NumMovedLoads; 1063 else if (isa<CallInst>(I)) 1064 ++NumMovedCalls; 1065 ++NumHoisted; 1066 return true; 1067 } 1068 1069 /// Only sink or hoist an instruction if it is not a trapping instruction, 1070 /// or if the instruction is known not to trap when moved to the preheader. 1071 /// or if it is a trapping instruction and is guaranteed to execute. 1072 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 1073 const DominatorTree *DT, 1074 const Loop *CurLoop, 1075 const LoopSafetyInfo *SafetyInfo, 1076 OptimizationRemarkEmitter *ORE, 1077 const Instruction *CtxI) { 1078 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) 1079 return true; 1080 1081 bool GuaranteedToExecute = 1082 isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); 1083 1084 if (!GuaranteedToExecute) { 1085 auto *LI = dyn_cast<LoadInst>(&Inst); 1086 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1087 ORE->emit([&]() { 1088 return OptimizationRemarkMissed( 1089 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 1090 << "failed to hoist load with loop-invariant address " 1091 "because load is conditionally executed"; 1092 }); 1093 } 1094 1095 return GuaranteedToExecute; 1096 } 1097 1098 namespace { 1099 class LoopPromoter : public LoadAndStorePromoter { 1100 Value *SomePtr; // Designated pointer to store to. 1101 const SmallSetVector<Value *, 8> &PointerMustAliases; 1102 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 1103 SmallVectorImpl<Instruction *> &LoopInsertPts; 1104 PredIteratorCache &PredCache; 1105 AliasSetTracker &AST; 1106 LoopInfo &LI; 1107 DebugLoc DL; 1108 int Alignment; 1109 bool UnorderedAtomic; 1110 AAMDNodes AATags; 1111 1112 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1113 if (Instruction *I = dyn_cast<Instruction>(V)) 1114 if (Loop *L = LI.getLoopFor(I->getParent())) 1115 if (!L->contains(BB)) { 1116 // We need to create an LCSSA PHI node for the incoming value and 1117 // store that. 1118 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 1119 I->getName() + ".lcssa", &BB->front()); 1120 for (BasicBlock *Pred : PredCache.get(BB)) 1121 PN->addIncoming(I, Pred); 1122 return PN; 1123 } 1124 return V; 1125 } 1126 1127 public: 1128 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 1129 const SmallSetVector<Value *, 8> &PMA, 1130 SmallVectorImpl<BasicBlock *> &LEB, 1131 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, 1132 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, 1133 bool UnorderedAtomic, const AAMDNodes &AATags) 1134 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 1135 LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), 1136 LI(li), DL(std::move(dl)), Alignment(alignment), 1137 UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} 1138 1139 bool isInstInList(Instruction *I, 1140 const SmallVectorImpl<Instruction *> &) const override { 1141 Value *Ptr; 1142 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1143 Ptr = LI->getOperand(0); 1144 else 1145 Ptr = cast<StoreInst>(I)->getPointerOperand(); 1146 return PointerMustAliases.count(Ptr); 1147 } 1148 1149 void doExtraRewritesBeforeFinalDeletion() const override { 1150 // Insert stores after in the loop exit blocks. Each exit block gets a 1151 // store of the live-out values that feed them. Since we've already told 1152 // the SSA updater about the defs in the loop and the preheader 1153 // definition, it is all set and we can start using it. 1154 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 1155 BasicBlock *ExitBlock = LoopExitBlocks[i]; 1156 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 1157 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 1158 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 1159 Instruction *InsertPos = LoopInsertPts[i]; 1160 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 1161 if (UnorderedAtomic) 1162 NewSI->setOrdering(AtomicOrdering::Unordered); 1163 NewSI->setAlignment(Alignment); 1164 NewSI->setDebugLoc(DL); 1165 if (AATags) 1166 NewSI->setAAMetadata(AATags); 1167 } 1168 } 1169 1170 void replaceLoadWithValue(LoadInst *LI, Value *V) const override { 1171 // Update alias analysis. 1172 AST.copyValue(LI, V); 1173 } 1174 void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } 1175 }; 1176 1177 1178 /// Return true iff we can prove that a caller of this function can not inspect 1179 /// the contents of the provided object in a well defined program. 1180 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { 1181 if (isa<AllocaInst>(Object)) 1182 // Since the alloca goes out of scope, we know the caller can't retain a 1183 // reference to it and be well defined. Thus, we don't need to check for 1184 // capture. 1185 return true; 1186 1187 // For all other objects we need to know that the caller can't possibly 1188 // have gotten a reference to the object. There are two components of 1189 // that: 1190 // 1) Object can't be escaped by this function. This is what 1191 // PointerMayBeCaptured checks. 1192 // 2) Object can't have been captured at definition site. For this, we 1193 // need to know the return value is noalias. At the moment, we use a 1194 // weaker condition and handle only AllocLikeFunctions (which are 1195 // known to be noalias). TODO 1196 return isAllocLikeFn(Object, TLI) && 1197 !PointerMayBeCaptured(Object, true, true); 1198 } 1199 1200 } // namespace 1201 1202 /// Try to promote memory values to scalars by sinking stores out of the 1203 /// loop and moving loads to before the loop. We do this by looping over 1204 /// the stores in the loop, looking for stores to Must pointers which are 1205 /// loop invariant. 1206 /// 1207 bool llvm::promoteLoopAccessesToScalars( 1208 const SmallSetVector<Value *, 8> &PointerMustAliases, 1209 SmallVectorImpl<BasicBlock *> &ExitBlocks, 1210 SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, 1211 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1212 Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 1213 OptimizationRemarkEmitter *ORE) { 1214 // Verify inputs. 1215 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1216 CurAST != nullptr && SafetyInfo != nullptr && 1217 "Unexpected Input to promoteLoopAccessesToScalars"); 1218 1219 Value *SomePtr = *PointerMustAliases.begin(); 1220 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1221 1222 // It is not safe to promote a load/store from the loop if the load/store is 1223 // conditional. For example, turning: 1224 // 1225 // for () { if (c) *P += 1; } 1226 // 1227 // into: 1228 // 1229 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1230 // 1231 // is not safe, because *P may only be valid to access if 'c' is true. 1232 // 1233 // The safety property divides into two parts: 1234 // p1) The memory may not be dereferenceable on entry to the loop. In this 1235 // case, we can't insert the required load in the preheader. 1236 // p2) The memory model does not allow us to insert a store along any dynamic 1237 // path which did not originally have one. 1238 // 1239 // If at least one store is guaranteed to execute, both properties are 1240 // satisfied, and promotion is legal. 1241 // 1242 // This, however, is not a necessary condition. Even if no store/load is 1243 // guaranteed to execute, we can still establish these properties. 1244 // We can establish (p1) by proving that hoisting the load into the preheader 1245 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1246 // can use any access within the alias set to prove dereferenceability, 1247 // since they're all must alias. 1248 // 1249 // There are two ways establish (p2): 1250 // a) Prove the location is thread-local. In this case the memory model 1251 // requirement does not apply, and stores are safe to insert. 1252 // b) Prove a store dominates every exit block. In this case, if an exit 1253 // blocks is reached, the original dynamic path would have taken us through 1254 // the store, so inserting a store into the exit block is safe. Note that this 1255 // is different from the store being guaranteed to execute. For instance, 1256 // if an exception is thrown on the first iteration of the loop, the original 1257 // store is never executed, but the exit blocks are not executed either. 1258 1259 bool DereferenceableInPH = false; 1260 bool SafeToInsertStore = false; 1261 1262 SmallVector<Instruction *, 64> LoopUses; 1263 1264 // We start with an alignment of one and try to find instructions that allow 1265 // us to prove better alignment. 1266 unsigned Alignment = 1; 1267 // Keep track of which types of access we see 1268 bool SawUnorderedAtomic = false; 1269 bool SawNotAtomic = false; 1270 AAMDNodes AATags; 1271 1272 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 1273 1274 bool IsKnownThreadLocalObject = false; 1275 if (SafetyInfo->MayThrow) { 1276 // If a loop can throw, we have to insert a store along each unwind edge. 1277 // That said, we can't actually make the unwind edge explicit. Therefore, 1278 // we have to prove that the store is dead along the unwind edge. We do 1279 // this by proving that the caller can't have a reference to the object 1280 // after return and thus can't possibly load from the object. 1281 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1282 if (!isKnownNonEscaping(Object, TLI)) 1283 return false; 1284 // Subtlety: Alloca's aren't visible to callers, but *are* potentially 1285 // visible to other threads if captured and used during their lifetimes. 1286 IsKnownThreadLocalObject = !isa<AllocaInst>(Object); 1287 } 1288 1289 // Check that all of the pointers in the alias set have the same type. We 1290 // cannot (yet) promote a memory location that is loaded and stored in 1291 // different sizes. While we are at it, collect alignment and AA info. 1292 for (Value *ASIV : PointerMustAliases) { 1293 // Check that all of the pointers in the alias set have the same type. We 1294 // cannot (yet) promote a memory location that is loaded and stored in 1295 // different sizes. 1296 if (SomePtr->getType() != ASIV->getType()) 1297 return false; 1298 1299 for (User *U : ASIV->users()) { 1300 // Ignore instructions that are outside the loop. 1301 Instruction *UI = dyn_cast<Instruction>(U); 1302 if (!UI || !CurLoop->contains(UI)) 1303 continue; 1304 1305 // If there is an non-load/store instruction in the loop, we can't promote 1306 // it. 1307 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 1308 assert(!Load->isVolatile() && "AST broken"); 1309 if (!Load->isUnordered()) 1310 return false; 1311 1312 SawUnorderedAtomic |= Load->isAtomic(); 1313 SawNotAtomic |= !Load->isAtomic(); 1314 1315 if (!DereferenceableInPH) 1316 DereferenceableInPH = isSafeToExecuteUnconditionally( 1317 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); 1318 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 1319 // Stores *of* the pointer are not interesting, only stores *to* the 1320 // pointer. 1321 if (UI->getOperand(1) != ASIV) 1322 continue; 1323 assert(!Store->isVolatile() && "AST broken"); 1324 if (!Store->isUnordered()) 1325 return false; 1326 1327 SawUnorderedAtomic |= Store->isAtomic(); 1328 SawNotAtomic |= !Store->isAtomic(); 1329 1330 // If the store is guaranteed to execute, both properties are satisfied. 1331 // We may want to check if a store is guaranteed to execute even if we 1332 // already know that promotion is safe, since it may have higher 1333 // alignment than any other guaranteed stores, in which case we can 1334 // raise the alignment on the promoted store. 1335 unsigned InstAlignment = Store->getAlignment(); 1336 if (!InstAlignment) 1337 InstAlignment = 1338 MDL.getABITypeAlignment(Store->getValueOperand()->getType()); 1339 1340 if (!DereferenceableInPH || !SafeToInsertStore || 1341 (InstAlignment > Alignment)) { 1342 if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { 1343 DereferenceableInPH = true; 1344 SafeToInsertStore = true; 1345 Alignment = std::max(Alignment, InstAlignment); 1346 } 1347 } 1348 1349 // If a store dominates all exit blocks, it is safe to sink. 1350 // As explained above, if an exit block was executed, a dominating 1351 // store must have been executed at least once, so we are not 1352 // introducing stores on paths that did not have them. 1353 // Note that this only looks at explicit exit blocks. If we ever 1354 // start sinking stores into unwind edges (see above), this will break. 1355 if (!SafeToInsertStore) 1356 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 1357 return DT->dominates(Store->getParent(), Exit); 1358 }); 1359 1360 // If the store is not guaranteed to execute, we may still get 1361 // deref info through it. 1362 if (!DereferenceableInPH) { 1363 DereferenceableInPH = isDereferenceableAndAlignedPointer( 1364 Store->getPointerOperand(), Store->getAlignment(), MDL, 1365 Preheader->getTerminator(), DT); 1366 } 1367 } else 1368 return false; // Not a load or store. 1369 1370 // Merge the AA tags. 1371 if (LoopUses.empty()) { 1372 // On the first load/store, just take its AA tags. 1373 UI->getAAMetadata(AATags); 1374 } else if (AATags) { 1375 UI->getAAMetadata(AATags, /* Merge = */ true); 1376 } 1377 1378 LoopUses.push_back(UI); 1379 } 1380 } 1381 1382 // If we found both an unordered atomic instruction and a non-atomic memory 1383 // access, bail. We can't blindly promote non-atomic to atomic since we 1384 // might not be able to lower the result. We can't downgrade since that 1385 // would violate memory model. Also, align 0 is an error for atomics. 1386 if (SawUnorderedAtomic && SawNotAtomic) 1387 return false; 1388 1389 // If we couldn't prove we can hoist the load, bail. 1390 if (!DereferenceableInPH) 1391 return false; 1392 1393 // We know we can hoist the load, but don't have a guaranteed store. 1394 // Check whether the location is thread-local. If it is, then we can insert 1395 // stores along paths which originally didn't have them without violating the 1396 // memory model. 1397 if (!SafeToInsertStore) { 1398 if (IsKnownThreadLocalObject) 1399 SafeToInsertStore = true; 1400 else { 1401 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1402 SafeToInsertStore = 1403 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && 1404 !PointerMayBeCaptured(Object, true, true); 1405 } 1406 } 1407 1408 // If we've still failed to prove we can sink the store, give up. 1409 if (!SafeToInsertStore) 1410 return false; 1411 1412 // Otherwise, this is safe to promote, lets do it! 1413 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr 1414 << '\n'); 1415 ORE->emit([&]() { 1416 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 1417 LoopUses[0]) 1418 << "Moving accesses to memory location out of the loop"; 1419 }); 1420 ++NumPromoted; 1421 1422 // Grab a debug location for the inserted loads/stores; given that the 1423 // inserted loads/stores have little relation to the original loads/stores, 1424 // this code just arbitrarily picks a location from one, since any debug 1425 // location is better than none. 1426 DebugLoc DL = LoopUses[0]->getDebugLoc(); 1427 1428 // We use the SSAUpdater interface to insert phi nodes as required. 1429 SmallVector<PHINode *, 16> NewPHIs; 1430 SSAUpdater SSA(&NewPHIs); 1431 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 1432 InsertPts, PIC, *CurAST, *LI, DL, Alignment, 1433 SawUnorderedAtomic, AATags); 1434 1435 // Set up the preheader to have a definition of the value. It is the live-out 1436 // value from the preheader that uses in the loop will use. 1437 LoadInst *PreheaderLoad = new LoadInst( 1438 SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); 1439 if (SawUnorderedAtomic) 1440 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 1441 PreheaderLoad->setAlignment(Alignment); 1442 PreheaderLoad->setDebugLoc(DL); 1443 if (AATags) 1444 PreheaderLoad->setAAMetadata(AATags); 1445 SSA.AddAvailableValue(Preheader, PreheaderLoad); 1446 1447 // Rewrite all the loads in the loop and remember all the definitions from 1448 // stores in the loop. 1449 Promoter.run(LoopUses); 1450 1451 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 1452 if (PreheaderLoad->use_empty()) 1453 PreheaderLoad->eraseFromParent(); 1454 1455 return true; 1456 } 1457 1458 /// Returns an owning pointer to an alias set which incorporates aliasing info 1459 /// from L and all subloops of L. 1460 /// FIXME: In new pass manager, there is no helper function to handle loop 1461 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed 1462 /// from scratch for every loop. Hook up with the helper functions when 1463 /// available in the new pass manager to avoid redundant computation. 1464 AliasSetTracker * 1465 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 1466 AliasAnalysis *AA) { 1467 AliasSetTracker *CurAST = nullptr; 1468 SmallVector<Loop *, 4> RecomputeLoops; 1469 for (Loop *InnerL : L->getSubLoops()) { 1470 auto MapI = LoopToAliasSetMap.find(InnerL); 1471 // If the AST for this inner loop is missing it may have been merged into 1472 // some other loop's AST and then that loop unrolled, and so we need to 1473 // recompute it. 1474 if (MapI == LoopToAliasSetMap.end()) { 1475 RecomputeLoops.push_back(InnerL); 1476 continue; 1477 } 1478 AliasSetTracker *InnerAST = MapI->second; 1479 1480 if (CurAST != nullptr) { 1481 // What if InnerLoop was modified by other passes ? 1482 CurAST->add(*InnerAST); 1483 1484 // Once we've incorporated the inner loop's AST into ours, we don't need 1485 // the subloop's anymore. 1486 delete InnerAST; 1487 } else { 1488 CurAST = InnerAST; 1489 } 1490 LoopToAliasSetMap.erase(MapI); 1491 } 1492 if (CurAST == nullptr) 1493 CurAST = new AliasSetTracker(*AA); 1494 1495 auto mergeLoop = [&](Loop *L) { 1496 // Loop over the body of this loop, looking for calls, invokes, and stores. 1497 for (BasicBlock *BB : L->blocks()) 1498 CurAST->add(*BB); // Incorporate the specified basic block 1499 }; 1500 1501 // Add everything from the sub loops that are no longer directly available. 1502 for (Loop *InnerL : RecomputeLoops) 1503 mergeLoop(InnerL); 1504 1505 // And merge in this loop. 1506 mergeLoop(L); 1507 1508 return CurAST; 1509 } 1510 1511 /// Simple analysis hook. Clone alias set info. 1512 /// 1513 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 1514 Loop *L) { 1515 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1516 if (!AST) 1517 return; 1518 1519 AST->copyValue(From, To); 1520 } 1521 1522 /// Simple Analysis hook. Delete value V from alias set 1523 /// 1524 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { 1525 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1526 if (!AST) 1527 return; 1528 1529 AST->deleteValue(V); 1530 } 1531 1532 /// Simple Analysis hook. Delete value L from alias set map. 1533 /// 1534 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { 1535 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1536 if (!AST) 1537 return; 1538 1539 delete AST; 1540 LICM.getLoopToAliasSetMap().erase(L); 1541 } 1542 1543 /// Return true if the body of this loop may store into the memory 1544 /// location pointed to by V. 1545 /// 1546 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 1547 const AAMDNodes &AAInfo, 1548 AliasSetTracker *CurAST) { 1549 // Check to see if any of the basic blocks in CurLoop invalidate *V. 1550 return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); 1551 } 1552 1553 /// Little predicate that returns true if the specified basic block is in 1554 /// a subloop of the current one, not the current one itself. 1555 /// 1556 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 1557 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 1558 return LI->getLoopFor(BB) != CurLoop; 1559 } 1560