1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion, attempting to remove as much 11 // code from the body of a loop as possible. It does this by either hoisting 12 // code into the preheader block, or by sinking code to the exit blocks if it is 13 // safe. This pass also promotes must-aliased memory locations in the loop to 14 // live in registers, thus hoisting and sinking "invariant" loads and stores. 15 // 16 // This pass uses alias analysis for two purposes: 17 // 18 // 1. Moving loop invariant loads and calls out of loops. If we can determine 19 // that a load or call inside of a loop never aliases anything stored to, 20 // we can hoist it or sink it like any other instruction. 21 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 22 // the loop, we try to move the store to happen AFTER the loop instead of 23 // inside of the loop. This can only happen if a few conditions are true: 24 // A. The pointer stored through is loop invariant 25 // B. There are no stores or loads in the loop which _may_ alias the 26 // pointer. There are no calls in the loop which mod/ref the pointer. 27 // If these conditions are true, we can promote the loads and stores in the 28 // loop of the pointer to use a temporary alloca'd variable. We then use 29 // the SSAUpdater to construct the appropriate SSA form for the value. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Transforms/Scalar/LICM.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/BasicAliasAnalysis.h" 38 #include "llvm/Analysis/CaptureTracking.h" 39 #include "llvm/Analysis/ConstantFolding.h" 40 #include "llvm/Analysis/GlobalsModRef.h" 41 #include "llvm/Analysis/Loads.h" 42 #include "llvm/Analysis/LoopInfo.h" 43 #include "llvm/Analysis/LoopPass.h" 44 #include "llvm/Analysis/MemoryBuiltins.h" 45 #include "llvm/Analysis/MemorySSA.h" 46 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 47 #include "llvm/Analysis/ScalarEvolution.h" 48 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/ValueTracking.h" 51 #include "llvm/IR/CFG.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Instructions.h" 57 #include "llvm/IR/IntrinsicInst.h" 58 #include "llvm/IR/LLVMContext.h" 59 #include "llvm/IR/Metadata.h" 60 #include "llvm/IR/PredIteratorCache.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Scalar.h" 65 #include "llvm/Transforms/Scalar/LoopPassManager.h" 66 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 67 #include "llvm/Transforms/Utils/Local.h" 68 #include "llvm/Transforms/Utils/LoopUtils.h" 69 #include "llvm/Transforms/Utils/SSAUpdater.h" 70 #include <algorithm> 71 #include <utility> 72 using namespace llvm; 73 74 #define DEBUG_TYPE "licm" 75 76 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 77 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 78 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 79 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 80 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 81 82 /// Memory promotion is enabled by default. 83 static cl::opt<bool> 84 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 85 cl::desc("Disable memory promotion in LICM pass")); 86 87 static cl::opt<uint32_t> MaxNumUsesTraversed( 88 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 89 cl::desc("Max num uses visited for identifying load " 90 "invariance in loop using invariant start (default = 8)")); 91 92 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 93 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 94 const LoopSafetyInfo *SafetyInfo, 95 TargetTransformInfo *TTI, bool &FreeInLoop); 96 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 97 const LoopSafetyInfo *SafetyInfo, 98 OptimizationRemarkEmitter *ORE); 99 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 100 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 101 OptimizationRemarkEmitter *ORE, bool FreeInLoop); 102 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 103 const DominatorTree *DT, 104 const Loop *CurLoop, 105 const LoopSafetyInfo *SafetyInfo, 106 OptimizationRemarkEmitter *ORE, 107 const Instruction *CtxI = nullptr); 108 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 109 const AAMDNodes &AAInfo, 110 AliasSetTracker *CurAST); 111 static Instruction * 112 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 113 const LoopInfo *LI, 114 const LoopSafetyInfo *SafetyInfo); 115 116 namespace { 117 struct LoopInvariantCodeMotion { 118 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 119 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 120 ScalarEvolution *SE, MemorySSA *MSSA, 121 OptimizationRemarkEmitter *ORE, bool DeleteAST); 122 123 DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { 124 return LoopToAliasSetMap; 125 } 126 127 private: 128 DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap; 129 130 AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 131 AliasAnalysis *AA); 132 }; 133 134 struct LegacyLICMPass : public LoopPass { 135 static char ID; // Pass identification, replacement for typeid 136 LegacyLICMPass() : LoopPass(ID) { 137 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 141 if (skipLoop(L)) { 142 // If we have run LICM on a previous loop but now we are skipping 143 // (because we've hit the opt-bisect limit), we need to clear the 144 // loop alias information. 145 for (auto <AS : LICM.getLoopToAliasSetMap()) 146 delete LTAS.second; 147 LICM.getLoopToAliasSetMap().clear(); 148 return false; 149 } 150 151 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 152 MemorySSA *MSSA = EnableMSSALoopDependency 153 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) 154 : nullptr; 155 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 156 // pass. Function analyses need to be preserved across loop transformations 157 // but ORE cannot be preserved (see comment before the pass definition). 158 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 159 return LICM.runOnLoop(L, 160 &getAnalysis<AAResultsWrapperPass>().getAAResults(), 161 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 162 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 163 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 164 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 165 *L->getHeader()->getParent()), 166 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); 167 } 168 169 /// This transformation requires natural loop information & requires that 170 /// loop preheaders be inserted into the CFG... 171 /// 172 void getAnalysisUsage(AnalysisUsage &AU) const override { 173 AU.setPreservesCFG(); 174 AU.addRequired<TargetLibraryInfoWrapperPass>(); 175 if (EnableMSSALoopDependency) 176 AU.addRequired<MemorySSAWrapperPass>(); 177 AU.addRequired<TargetTransformInfoWrapperPass>(); 178 getLoopAnalysisUsage(AU); 179 } 180 181 using llvm::Pass::doFinalization; 182 183 bool doFinalization() override { 184 assert(LICM.getLoopToAliasSetMap().empty() && 185 "Didn't free loop alias sets"); 186 return false; 187 } 188 189 private: 190 LoopInvariantCodeMotion LICM; 191 192 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 193 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 194 Loop *L) override; 195 196 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 197 /// set. 198 void deleteAnalysisValue(Value *V, Loop *L) override; 199 200 /// Simple Analysis hook. Delete loop L from alias set map. 201 void deleteAnalysisLoop(Loop *L) override; 202 }; 203 } // namespace 204 205 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 206 LoopStandardAnalysisResults &AR, LPMUpdater &) { 207 const auto &FAM = 208 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 209 Function *F = L.getHeader()->getParent(); 210 211 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); 212 // FIXME: This should probably be optional rather than required. 213 if (!ORE) 214 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " 215 "cached at a higher level"); 216 217 LoopInvariantCodeMotion LICM; 218 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, 219 AR.MSSA, ORE, true)) 220 return PreservedAnalyses::all(); 221 222 auto PA = getLoopPassPreservedAnalyses(); 223 PA.preserveSet<CFGAnalyses>(); 224 return PA; 225 } 226 227 char LegacyLICMPass::ID = 0; 228 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 229 false, false) 230 INITIALIZE_PASS_DEPENDENCY(LoopPass) 231 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 232 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 233 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 234 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 235 false) 236 237 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 238 239 /// Hoist expressions out of the specified loop. Note, alias info for inner 240 /// loop is not preserved so it is not a good idea to run LICM multiple 241 /// times on one loop. 242 /// We should delete AST for inner loops in the new pass manager to avoid 243 /// memory leak. 244 /// 245 bool LoopInvariantCodeMotion::runOnLoop( 246 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 247 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, 248 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { 249 bool Changed = false; 250 251 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 252 253 AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); 254 255 // Get the preheader block to move instructions into... 256 BasicBlock *Preheader = L->getLoopPreheader(); 257 258 // Compute loop safety information. 259 LoopSafetyInfo SafetyInfo; 260 computeLoopSafetyInfo(&SafetyInfo, L); 261 262 // We want to visit all of the instructions in this loop... that are not parts 263 // of our subloops (they have already had their invariants hoisted out of 264 // their loop, into this loop, so there is no need to process the BODIES of 265 // the subloops). 266 // 267 // Traverse the body of the loop in depth first order on the dominator tree so 268 // that we are guaranteed to see definitions before we see uses. This allows 269 // us to sink instructions in one pass, without iteration. After sinking 270 // instructions, we perform another pass to hoist them out of the loop. 271 // 272 if (L->hasDedicatedExits()) 273 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 274 CurAST, &SafetyInfo, ORE); 275 if (Preheader) 276 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 277 CurAST, &SafetyInfo, ORE); 278 279 // Now that all loop invariants have been removed from the loop, promote any 280 // memory references to scalars that we can. 281 // Don't sink stores from loops without dedicated block exits. Exits 282 // containing indirect branches are not transformed by loop simplify, 283 // make sure we catch that. An additional load may be generated in the 284 // preheader for SSA updater, so also avoid sinking when no preheader 285 // is available. 286 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 287 // Figure out the loop exits and their insertion points 288 SmallVector<BasicBlock *, 8> ExitBlocks; 289 L->getUniqueExitBlocks(ExitBlocks); 290 291 // We can't insert into a catchswitch. 292 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 293 return isa<CatchSwitchInst>(Exit->getTerminator()); 294 }); 295 296 if (!HasCatchSwitch) { 297 SmallVector<Instruction *, 8> InsertPts; 298 InsertPts.reserve(ExitBlocks.size()); 299 for (BasicBlock *ExitBlock : ExitBlocks) 300 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 301 302 PredIteratorCache PIC; 303 304 bool Promoted = false; 305 306 // Loop over all of the alias sets in the tracker object. 307 for (AliasSet &AS : *CurAST) { 308 // We can promote this alias set if it has a store, if it is a "Must" 309 // alias set, if the pointer is loop invariant, and if we are not 310 // eliminating any volatile loads or stores. 311 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 312 AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) 313 continue; 314 315 assert( 316 !AS.empty() && 317 "Must alias set should have at least one pointer element in it!"); 318 319 SmallSetVector<Value *, 8> PointerMustAliases; 320 for (const auto &ASI : AS) 321 PointerMustAliases.insert(ASI.getValue()); 322 323 Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, 324 InsertPts, PIC, LI, DT, TLI, L, 325 CurAST, &SafetyInfo, ORE); 326 } 327 328 // Once we have promoted values across the loop body we have to 329 // recursively reform LCSSA as any nested loop may now have values defined 330 // within the loop used in the outer loop. 331 // FIXME: This is really heavy handed. It would be a bit better to use an 332 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 333 // it as it went. 334 if (Promoted) 335 formLCSSARecursively(*L, *DT, LI, SE); 336 337 Changed |= Promoted; 338 } 339 } 340 341 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 342 // specifically moving instructions across the loop boundary and so it is 343 // especially in need of sanity checking here. 344 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 345 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && 346 "Parent loop not left in LCSSA form after LICM!"); 347 348 // If this loop is nested inside of another one, save the alias information 349 // for when we process the outer loop. 350 if (L->getParentLoop() && !DeleteAST) 351 LoopToAliasSetMap[L] = CurAST; 352 else 353 delete CurAST; 354 355 if (Changed && SE) 356 SE->forgetLoopDispositions(L); 357 return Changed; 358 } 359 360 /// Walk the specified region of the CFG (defined by all blocks dominated by 361 /// the specified block, and that are in the current loop) in reverse depth 362 /// first order w.r.t the DominatorTree. This allows us to visit uses before 363 /// definitions, allowing us to sink a loop body in one pass without iteration. 364 /// 365 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 366 DominatorTree *DT, TargetLibraryInfo *TLI, 367 TargetTransformInfo *TTI, Loop *CurLoop, 368 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 369 OptimizationRemarkEmitter *ORE) { 370 371 // Verify inputs. 372 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 373 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 374 "Unexpected input to sinkRegion"); 375 376 // We want to visit children before parents. We will enque all the parents 377 // before their children in the worklist and process the worklist in reverse 378 // order. 379 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 380 381 bool Changed = false; 382 for (DomTreeNode *DTN : reverse(Worklist)) { 383 BasicBlock *BB = DTN->getBlock(); 384 // Only need to process the contents of this block if it is not part of a 385 // subloop (which would already have been processed). 386 if (inSubLoop(BB, CurLoop, LI)) 387 continue; 388 389 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 390 Instruction &I = *--II; 391 392 // If the instruction is dead, we would try to sink it because it isn't 393 // used in the loop, instead, just delete it. 394 if (isInstructionTriviallyDead(&I, TLI)) { 395 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 396 ++II; 397 CurAST->deleteValue(&I); 398 I.eraseFromParent(); 399 Changed = true; 400 continue; 401 } 402 403 // Check to see if we can sink this instruction to the exit blocks 404 // of the loop. We can do this if the all users of the instruction are 405 // outside of the loop. In this case, it doesn't even matter if the 406 // operands of the instruction are loop invariant. 407 // 408 bool FreeInLoop = false; 409 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && 410 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { 411 if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { 412 if (!FreeInLoop) { 413 ++II; 414 CurAST->deleteValue(&I); 415 I.eraseFromParent(); 416 } 417 Changed = true; 418 } 419 } 420 } 421 } 422 return Changed; 423 } 424 425 /// Walk the specified region of the CFG (defined by all blocks dominated by 426 /// the specified block, and that are in the current loop) in depth first 427 /// order w.r.t the DominatorTree. This allows us to visit definitions before 428 /// uses, allowing us to hoist a loop body in one pass without iteration. 429 /// 430 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 431 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 432 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 433 OptimizationRemarkEmitter *ORE) { 434 // Verify inputs. 435 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 436 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 437 "Unexpected input to hoistRegion"); 438 439 // We want to visit parents before children. We will enque all the parents 440 // before their children in the worklist and process the worklist in order. 441 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 442 443 bool Changed = false; 444 for (DomTreeNode *DTN : Worklist) { 445 BasicBlock *BB = DTN->getBlock(); 446 // Only need to process the contents of this block if it is not part of a 447 // subloop (which would already have been processed). 448 if (!inSubLoop(BB, CurLoop, LI)) 449 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { 450 Instruction &I = *II++; 451 // Try constant folding this instruction. If all the operands are 452 // constants, it is technically hoistable, but it would be better to 453 // just fold it. 454 if (Constant *C = ConstantFoldInstruction( 455 &I, I.getModule()->getDataLayout(), TLI)) { 456 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 457 CurAST->copyValue(&I, C); 458 I.replaceAllUsesWith(C); 459 if (isInstructionTriviallyDead(&I, TLI)) { 460 CurAST->deleteValue(&I); 461 I.eraseFromParent(); 462 } 463 Changed = true; 464 continue; 465 } 466 467 // Attempt to remove floating point division out of the loop by 468 // converting it to a reciprocal multiplication. 469 if (I.getOpcode() == Instruction::FDiv && 470 CurLoop->isLoopInvariant(I.getOperand(1)) && 471 I.hasAllowReciprocal()) { 472 auto Divisor = I.getOperand(1); 473 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 474 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 475 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 476 ReciprocalDivisor->insertBefore(&I); 477 478 auto Product = 479 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 480 Product->setFastMathFlags(I.getFastMathFlags()); 481 Product->insertAfter(&I); 482 I.replaceAllUsesWith(Product); 483 I.eraseFromParent(); 484 485 hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); 486 Changed = true; 487 continue; 488 } 489 490 // Try hoisting the instruction out to the preheader. We can only do 491 // this if all of the operands of the instruction are loop invariant and 492 // if it is safe to hoist the instruction. 493 // 494 if (CurLoop->hasLoopInvariantOperands(&I) && 495 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && 496 isSafeToExecuteUnconditionally( 497 I, DT, CurLoop, SafetyInfo, ORE, 498 CurLoop->getLoopPreheader()->getTerminator())) 499 Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); 500 } 501 } 502 503 return Changed; 504 } 505 506 /// Computes loop safety information, checks loop body & header 507 /// for the possibility of may throw exception. 508 /// 509 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { 510 assert(CurLoop != nullptr && "CurLoop cant be null"); 511 BasicBlock *Header = CurLoop->getHeader(); 512 // Setting default safety values. 513 SafetyInfo->MayThrow = false; 514 SafetyInfo->HeaderMayThrow = false; 515 // Iterate over header and compute safety info. 516 for (BasicBlock::iterator I = Header->begin(), E = Header->end(); 517 (I != E) && !SafetyInfo->HeaderMayThrow; ++I) 518 SafetyInfo->HeaderMayThrow |= 519 !isGuaranteedToTransferExecutionToSuccessor(&*I); 520 521 SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; 522 // Iterate over loop instructions and compute safety info. 523 // Skip header as it has been computed and stored in HeaderMayThrow. 524 // The first block in loopinfo.Blocks is guaranteed to be the header. 525 assert(Header == *CurLoop->getBlocks().begin() && 526 "First block must be header"); 527 for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), 528 BBE = CurLoop->block_end(); 529 (BB != BBE) && !SafetyInfo->MayThrow; ++BB) 530 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); 531 (I != E) && !SafetyInfo->MayThrow; ++I) 532 SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); 533 534 // Compute funclet colors if we might sink/hoist in a function with a funclet 535 // personality routine. 536 Function *Fn = CurLoop->getHeader()->getParent(); 537 if (Fn->hasPersonalityFn()) 538 if (Constant *PersonalityFn = Fn->getPersonalityFn()) 539 if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) 540 SafetyInfo->BlockColors = colorEHFunclets(*Fn); 541 } 542 543 // Return true if LI is invariant within scope of the loop. LI is invariant if 544 // CurLoop is dominated by an invariant.start representing the same memory 545 // location and size as the memory location LI loads from, and also the 546 // invariant.start has no uses. 547 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 548 Loop *CurLoop) { 549 Value *Addr = LI->getOperand(0); 550 const DataLayout &DL = LI->getModule()->getDataLayout(); 551 const uint32_t LocSizeInBits = DL.getTypeSizeInBits( 552 cast<PointerType>(Addr->getType())->getElementType()); 553 554 // if the type is i8 addrspace(x)*, we know this is the type of 555 // llvm.invariant.start operand 556 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 557 LI->getPointerAddressSpace()); 558 unsigned BitcastsVisited = 0; 559 // Look through bitcasts until we reach the i8* type (this is invariant.start 560 // operand type). 561 while (Addr->getType() != PtrInt8Ty) { 562 auto *BC = dyn_cast<BitCastInst>(Addr); 563 // Avoid traversing high number of bitcast uses. 564 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 565 return false; 566 Addr = BC->getOperand(0); 567 } 568 569 unsigned UsesVisited = 0; 570 // Traverse all uses of the load operand value, to see if invariant.start is 571 // one of the uses, and whether it dominates the load instruction. 572 for (auto *U : Addr->users()) { 573 // Avoid traversing for Load operand with high number of users. 574 if (++UsesVisited > MaxNumUsesTraversed) 575 return false; 576 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 577 // If there are escaping uses of invariant.start instruction, the load maybe 578 // non-invariant. 579 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 580 !II->use_empty()) 581 continue; 582 unsigned InvariantSizeInBits = 583 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; 584 // Confirm the invariant.start location size contains the load operand size 585 // in bits. Also, the invariant.start should dominate the load, and we 586 // should not hoist the load out of a loop that contains this dominating 587 // invariant.start. 588 if (LocSizeInBits <= InvariantSizeInBits && 589 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 590 return true; 591 } 592 593 return false; 594 } 595 596 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 597 Loop *CurLoop, AliasSetTracker *CurAST, 598 LoopSafetyInfo *SafetyInfo, 599 OptimizationRemarkEmitter *ORE) { 600 // SafetyInfo is nullptr if we are checking for sinking from preheader to 601 // loop body. 602 const bool SinkingToLoopBody = !SafetyInfo; 603 // Loads have extra constraints we have to verify before we can hoist them. 604 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 605 if (!LI->isUnordered()) 606 return false; // Don't sink/hoist volatile or ordered atomic loads! 607 608 // Loads from constant memory are always safe to move, even if they end up 609 // in the same alias set as something that ends up being modified. 610 if (AA->pointsToConstantMemory(LI->getOperand(0))) 611 return true; 612 if (LI->getMetadata(LLVMContext::MD_invariant_load)) 613 return true; 614 615 if (LI->isAtomic() && SinkingToLoopBody) 616 return false; // Don't sink unordered atomic loads to loop body. 617 618 // This checks for an invariant.start dominating the load. 619 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 620 return true; 621 622 // Don't hoist loads which have may-aliased stores in loop. 623 uint64_t Size = 0; 624 if (LI->getType()->isSized()) 625 Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); 626 627 AAMDNodes AAInfo; 628 LI->getAAMetadata(AAInfo); 629 630 bool Invalidated = 631 pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); 632 // Check loop-invariant address because this may also be a sinkable load 633 // whose address is not necessarily loop-invariant. 634 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 635 ORE->emit([&]() { 636 return OptimizationRemarkMissed( 637 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 638 << "failed to move load with loop-invariant address " 639 "because the loop may invalidate its value"; 640 }); 641 642 return !Invalidated; 643 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 644 // Don't sink or hoist dbg info; it's legal, but not useful. 645 if (isa<DbgInfoIntrinsic>(I)) 646 return false; 647 648 // Don't sink calls which can throw. 649 if (CI->mayThrow()) 650 return false; 651 652 // Handle simple cases by querying alias analysis. 653 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 654 if (Behavior == FMRB_DoesNotAccessMemory) 655 return true; 656 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 657 // A readonly argmemonly function only reads from memory pointed to by 658 // it's arguments with arbitrary offsets. If we can prove there are no 659 // writes to this memory in the loop, we can hoist or sink. 660 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { 661 for (Value *Op : CI->arg_operands()) 662 if (Op->getType()->isPointerTy() && 663 pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, 664 AAMDNodes(), CurAST)) 665 return false; 666 return true; 667 } 668 // If this call only reads from memory and there are no writes to memory 669 // in the loop, we can hoist or sink the call as appropriate. 670 bool FoundMod = false; 671 for (AliasSet &AS : *CurAST) { 672 if (!AS.isForwardingAliasSet() && AS.isMod()) { 673 FoundMod = true; 674 break; 675 } 676 } 677 if (!FoundMod) 678 return true; 679 } 680 681 // FIXME: This should use mod/ref information to see if we can hoist or 682 // sink the call. 683 684 return false; 685 } 686 687 // Only these instructions are hoistable/sinkable. 688 if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && 689 !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && 690 !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && 691 !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && 692 !isa<InsertValueInst>(I)) 693 return false; 694 695 // If we are checking for sinking from preheader to loop body it will be 696 // always safe as there is no speculative execution. 697 if (SinkingToLoopBody) 698 return true; 699 700 // TODO: Plumb the context instruction through to make hoisting and sinking 701 // more powerful. Hoisting of loads already works due to the special casing 702 // above. 703 return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); 704 } 705 706 /// Returns true if a PHINode is a trivially replaceable with an 707 /// Instruction. 708 /// This is true when all incoming values are that instruction. 709 /// This pattern occurs most often with LCSSA PHI nodes. 710 /// 711 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { 712 for (const Value *IncValue : PN.incoming_values()) 713 if (IncValue != &I) 714 return false; 715 716 return true; 717 } 718 719 /// Return true if the instruction is free in the loop. 720 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, 721 const TargetTransformInfo *TTI) { 722 723 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 724 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) 725 return false; 726 // For a GEP, we cannot simply use getUserCost because currently it 727 // optimistically assume that a GEP will fold into addressing mode 728 // regardless of its users. 729 const BasicBlock *BB = GEP->getParent(); 730 for (const User *U : GEP->users()) { 731 const Instruction *UI = cast<Instruction>(U); 732 if (CurLoop->contains(UI) && 733 (BB != UI->getParent() || 734 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 735 return false; 736 } 737 return true; 738 } else 739 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; 740 } 741 742 /// Return true if the only users of this instruction are outside of 743 /// the loop. If this is true, we can sink the instruction to the exit 744 /// blocks of the loop. 745 /// 746 /// We also return true if the instruction could be folded away in lowering. 747 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 748 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 749 const LoopSafetyInfo *SafetyInfo, 750 TargetTransformInfo *TTI, bool &FreeInLoop) { 751 const auto &BlockColors = SafetyInfo->BlockColors; 752 bool IsFree = isFreeInLoop(I, CurLoop, TTI); 753 for (const User *U : I.users()) { 754 const Instruction *UI = cast<Instruction>(U); 755 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 756 const BasicBlock *BB = PN->getParent(); 757 // We cannot sink uses in catchswitches. 758 if (isa<CatchSwitchInst>(BB->getTerminator())) 759 return false; 760 761 // We need to sink a callsite to a unique funclet. Avoid sinking if the 762 // phi use is too muddled. 763 if (isa<CallInst>(I)) 764 if (!BlockColors.empty() && 765 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 766 return false; 767 } 768 769 if (CurLoop->contains(UI)) { 770 if (IsFree) { 771 FreeInLoop = true; 772 continue; 773 } 774 return false; 775 } 776 } 777 return true; 778 } 779 780 static Instruction * 781 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 782 const LoopInfo *LI, 783 const LoopSafetyInfo *SafetyInfo) { 784 Instruction *New; 785 if (auto *CI = dyn_cast<CallInst>(&I)) { 786 const auto &BlockColors = SafetyInfo->BlockColors; 787 788 // Sinking call-sites need to be handled differently from other 789 // instructions. The cloned call-site needs a funclet bundle operand 790 // appropriate for it's location in the CFG. 791 SmallVector<OperandBundleDef, 1> OpBundles; 792 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 793 BundleIdx != BundleEnd; ++BundleIdx) { 794 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 795 if (Bundle.getTagID() == LLVMContext::OB_funclet) 796 continue; 797 798 OpBundles.emplace_back(Bundle); 799 } 800 801 if (!BlockColors.empty()) { 802 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 803 assert(CV.size() == 1 && "non-unique color for exit block!"); 804 BasicBlock *BBColor = CV.front(); 805 Instruction *EHPad = BBColor->getFirstNonPHI(); 806 if (EHPad->isEHPad()) 807 OpBundles.emplace_back("funclet", EHPad); 808 } 809 810 New = CallInst::Create(CI, OpBundles); 811 } else { 812 New = I.clone(); 813 } 814 815 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 816 if (!I.getName().empty()) 817 New->setName(I.getName() + ".le"); 818 819 // Build LCSSA PHI nodes for any in-loop operands. Note that this is 820 // particularly cheap because we can rip off the PHI node that we're 821 // replacing for the number and blocks of the predecessors. 822 // OPT: If this shows up in a profile, we can instead finish sinking all 823 // invariant instructions, and then walk their operands to re-establish 824 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 825 // sinking bottom-up. 826 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; 827 ++OI) 828 if (Instruction *OInst = dyn_cast<Instruction>(*OI)) 829 if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) 830 if (!OLoop->contains(&PN)) { 831 PHINode *OpPN = 832 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 833 OInst->getName() + ".lcssa", &ExitBlock.front()); 834 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 835 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 836 *OI = OpPN; 837 } 838 return New; 839 } 840 841 static Instruction *sinkThroughTriviallyReplacablePHI( 842 PHINode *TPN, Instruction *I, LoopInfo *LI, 843 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 844 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { 845 assert(isTriviallyReplacablePHI(*TPN, *I) && 846 "Expect only trivially replacalbe PHI"); 847 BasicBlock *ExitBlock = TPN->getParent(); 848 Instruction *New; 849 auto It = SunkCopies.find(ExitBlock); 850 if (It != SunkCopies.end()) 851 New = It->second; 852 else 853 New = SunkCopies[ExitBlock] = 854 CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); 855 return New; 856 } 857 858 static bool canSplitPredecessors(PHINode *PN) { 859 BasicBlock *BB = PN->getParent(); 860 if (!BB->canSplitPredecessors()) 861 return false; 862 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 863 BasicBlock *BBPred = *PI; 864 if (isa<IndirectBrInst>(BBPred->getTerminator())) 865 return false; 866 } 867 return true; 868 } 869 870 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 871 LoopInfo *LI, const Loop *CurLoop) { 872 #ifndef NDEBUG 873 SmallVector<BasicBlock *, 32> ExitBlocks; 874 CurLoop->getUniqueExitBlocks(ExitBlocks); 875 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 876 ExitBlocks.end()); 877 #endif 878 BasicBlock *ExitBB = PN->getParent(); 879 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 880 881 // Split predecessors of the loop exit to make instructions in the loop are 882 // exposed to exit blocks through trivially replacable PHIs while keeping the 883 // loop in the canonical form where each predecessor of each exit block should 884 // be contained within the loop. For example, this will convert the loop below 885 // from 886 // 887 // LB1: 888 // %v1 = 889 // br %LE, %LB2 890 // LB2: 891 // %v2 = 892 // br %LE, %LB1 893 // LE: 894 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable 895 // 896 // to 897 // 898 // LB1: 899 // %v1 = 900 // br %LE.split, %LB2 901 // LB2: 902 // %v2 = 903 // br %LE.split2, %LB1 904 // LE.split: 905 // %p1 = phi [%v1, %LB1] <-- trivially replacable 906 // br %LE 907 // LE.split2: 908 // %p2 = phi [%v2, %LB2] <-- trivially replacable 909 // br %LE 910 // LE: 911 // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 912 // 913 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 914 while (!PredBBs.empty()) { 915 BasicBlock *PredBB = *PredBBs.begin(); 916 assert(CurLoop->contains(PredBB) && 917 "Expect all predecessors are in the loop"); 918 if (PN->getBasicBlockIndex(PredBB) >= 0) 919 SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true); 920 PredBBs.remove(PredBB); 921 } 922 } 923 924 /// When an instruction is found to only be used outside of the loop, this 925 /// function moves it to the exit blocks and patches up SSA form as needed. 926 /// This method is guaranteed to remove the original instruction from its 927 /// position, and may either delete it or move it to outside of the loop. 928 /// 929 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 930 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 931 OptimizationRemarkEmitter *ORE, bool FreeInLoop) { 932 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 933 ORE->emit([&]() { 934 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 935 << "sinking " << ore::NV("Inst", &I); 936 }); 937 bool Changed = false; 938 if (isa<LoadInst>(I)) 939 ++NumMovedLoads; 940 else if (isa<CallInst>(I)) 941 ++NumMovedCalls; 942 ++NumSunk; 943 944 // Iterate over users to be ready for actual sinking. Replace users via 945 // unrechable blocks with undef and make all user PHIs trivially replcable. 946 SmallPtrSet<Instruction *, 8> VisitedUsers; 947 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 948 auto *User = cast<Instruction>(*UI); 949 Use &U = UI.getUse(); 950 ++UI; 951 952 if (VisitedUsers.count(User) || CurLoop->contains(User)) 953 continue; 954 955 if (!DT->isReachableFromEntry(User->getParent())) { 956 U = UndefValue::get(I.getType()); 957 Changed = true; 958 continue; 959 } 960 961 // The user must be a PHI node. 962 PHINode *PN = cast<PHINode>(User); 963 964 // Surprisingly, instructions can be used outside of loops without any 965 // exits. This can only happen in PHI nodes if the incoming block is 966 // unreachable. 967 BasicBlock *BB = PN->getIncomingBlock(U); 968 if (!DT->isReachableFromEntry(BB)) { 969 U = UndefValue::get(I.getType()); 970 Changed = true; 971 continue; 972 } 973 974 VisitedUsers.insert(PN); 975 if (isTriviallyReplacablePHI(*PN, I)) 976 continue; 977 978 if (!canSplitPredecessors(PN)) 979 return Changed; 980 981 // Split predecessors of the PHI so that we can make users trivially 982 // replacable. 983 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop); 984 985 // Should rebuild the iterators, as they may be invalidated by 986 // splitPredecessorsOfLoopExit(). 987 UI = I.user_begin(); 988 UE = I.user_end(); 989 } 990 991 if (VisitedUsers.empty()) 992 return Changed; 993 994 #ifndef NDEBUG 995 SmallVector<BasicBlock *, 32> ExitBlocks; 996 CurLoop->getUniqueExitBlocks(ExitBlocks); 997 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 998 ExitBlocks.end()); 999 #endif 1000 1001 // Clones of this instruction. Don't create more than one per exit block! 1002 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 1003 1004 // If this instruction is only used outside of the loop, then all users are 1005 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 1006 // the instruction. 1007 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 1008 for (auto *UI : Users) { 1009 auto *User = cast<Instruction>(UI); 1010 1011 if (CurLoop->contains(User)) 1012 continue; 1013 1014 PHINode *PN = cast<PHINode>(User); 1015 assert(ExitBlockSet.count(PN->getParent()) && 1016 "The LCSSA PHI is not in an exit block!"); 1017 // The PHI must be trivially replacable. 1018 Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies, 1019 SafetyInfo, CurLoop); 1020 PN->replaceAllUsesWith(New); 1021 PN->eraseFromParent(); 1022 Changed = true; 1023 } 1024 return Changed; 1025 } 1026 1027 /// When an instruction is found to only use loop invariant operands that 1028 /// is safe to hoist, this instruction is called to do the dirty work. 1029 /// 1030 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1031 const LoopSafetyInfo *SafetyInfo, 1032 OptimizationRemarkEmitter *ORE) { 1033 auto *Preheader = CurLoop->getLoopPreheader(); 1034 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I 1035 << "\n"); 1036 ORE->emit([&]() { 1037 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 1038 << ore::NV("Inst", &I); 1039 }); 1040 1041 // Metadata can be dependent on conditions we are hoisting above. 1042 // Conservatively strip all metadata on the instruction unless we were 1043 // guaranteed to execute I if we entered the loop, in which case the metadata 1044 // is valid in the loop preheader. 1045 if (I.hasMetadataOtherThanDebugLoc() && 1046 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 1047 // time in isGuaranteedToExecute if we don't actually have anything to 1048 // drop. It is a compile time optimization, not required for correctness. 1049 !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) 1050 I.dropUnknownNonDebugMetadata(); 1051 1052 // Move the new node to the Preheader, before its terminator. 1053 I.moveBefore(Preheader->getTerminator()); 1054 1055 // Do not retain debug locations when we are moving instructions to different 1056 // basic blocks, because we want to avoid jumpy line tables. Calls, however, 1057 // need to retain their debug locs because they may be inlined. 1058 // FIXME: How do we retain source locations without causing poor debugging 1059 // behavior? 1060 if (!isa<CallInst>(I)) 1061 I.setDebugLoc(DebugLoc()); 1062 1063 if (isa<LoadInst>(I)) 1064 ++NumMovedLoads; 1065 else if (isa<CallInst>(I)) 1066 ++NumMovedCalls; 1067 ++NumHoisted; 1068 return true; 1069 } 1070 1071 /// Only sink or hoist an instruction if it is not a trapping instruction, 1072 /// or if the instruction is known not to trap when moved to the preheader. 1073 /// or if it is a trapping instruction and is guaranteed to execute. 1074 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 1075 const DominatorTree *DT, 1076 const Loop *CurLoop, 1077 const LoopSafetyInfo *SafetyInfo, 1078 OptimizationRemarkEmitter *ORE, 1079 const Instruction *CtxI) { 1080 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) 1081 return true; 1082 1083 bool GuaranteedToExecute = 1084 isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); 1085 1086 if (!GuaranteedToExecute) { 1087 auto *LI = dyn_cast<LoadInst>(&Inst); 1088 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1089 ORE->emit([&]() { 1090 return OptimizationRemarkMissed( 1091 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 1092 << "failed to hoist load with loop-invariant address " 1093 "because load is conditionally executed"; 1094 }); 1095 } 1096 1097 return GuaranteedToExecute; 1098 } 1099 1100 namespace { 1101 class LoopPromoter : public LoadAndStorePromoter { 1102 Value *SomePtr; // Designated pointer to store to. 1103 const SmallSetVector<Value *, 8> &PointerMustAliases; 1104 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 1105 SmallVectorImpl<Instruction *> &LoopInsertPts; 1106 PredIteratorCache &PredCache; 1107 AliasSetTracker &AST; 1108 LoopInfo &LI; 1109 DebugLoc DL; 1110 int Alignment; 1111 bool UnorderedAtomic; 1112 AAMDNodes AATags; 1113 1114 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1115 if (Instruction *I = dyn_cast<Instruction>(V)) 1116 if (Loop *L = LI.getLoopFor(I->getParent())) 1117 if (!L->contains(BB)) { 1118 // We need to create an LCSSA PHI node for the incoming value and 1119 // store that. 1120 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 1121 I->getName() + ".lcssa", &BB->front()); 1122 for (BasicBlock *Pred : PredCache.get(BB)) 1123 PN->addIncoming(I, Pred); 1124 return PN; 1125 } 1126 return V; 1127 } 1128 1129 public: 1130 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 1131 const SmallSetVector<Value *, 8> &PMA, 1132 SmallVectorImpl<BasicBlock *> &LEB, 1133 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, 1134 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, 1135 bool UnorderedAtomic, const AAMDNodes &AATags) 1136 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 1137 LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), 1138 LI(li), DL(std::move(dl)), Alignment(alignment), 1139 UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} 1140 1141 bool isInstInList(Instruction *I, 1142 const SmallVectorImpl<Instruction *> &) const override { 1143 Value *Ptr; 1144 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1145 Ptr = LI->getOperand(0); 1146 else 1147 Ptr = cast<StoreInst>(I)->getPointerOperand(); 1148 return PointerMustAliases.count(Ptr); 1149 } 1150 1151 void doExtraRewritesBeforeFinalDeletion() const override { 1152 // Insert stores after in the loop exit blocks. Each exit block gets a 1153 // store of the live-out values that feed them. Since we've already told 1154 // the SSA updater about the defs in the loop and the preheader 1155 // definition, it is all set and we can start using it. 1156 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 1157 BasicBlock *ExitBlock = LoopExitBlocks[i]; 1158 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 1159 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 1160 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 1161 Instruction *InsertPos = LoopInsertPts[i]; 1162 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 1163 if (UnorderedAtomic) 1164 NewSI->setOrdering(AtomicOrdering::Unordered); 1165 NewSI->setAlignment(Alignment); 1166 NewSI->setDebugLoc(DL); 1167 if (AATags) 1168 NewSI->setAAMetadata(AATags); 1169 } 1170 } 1171 1172 void replaceLoadWithValue(LoadInst *LI, Value *V) const override { 1173 // Update alias analysis. 1174 AST.copyValue(LI, V); 1175 } 1176 void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } 1177 }; 1178 1179 1180 /// Return true iff we can prove that a caller of this function can not inspect 1181 /// the contents of the provided object in a well defined program. 1182 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { 1183 if (isa<AllocaInst>(Object)) 1184 // Since the alloca goes out of scope, we know the caller can't retain a 1185 // reference to it and be well defined. Thus, we don't need to check for 1186 // capture. 1187 return true; 1188 1189 // For all other objects we need to know that the caller can't possibly 1190 // have gotten a reference to the object. There are two components of 1191 // that: 1192 // 1) Object can't be escaped by this function. This is what 1193 // PointerMayBeCaptured checks. 1194 // 2) Object can't have been captured at definition site. For this, we 1195 // need to know the return value is noalias. At the moment, we use a 1196 // weaker condition and handle only AllocLikeFunctions (which are 1197 // known to be noalias). TODO 1198 return isAllocLikeFn(Object, TLI) && 1199 !PointerMayBeCaptured(Object, true, true); 1200 } 1201 1202 } // namespace 1203 1204 /// Try to promote memory values to scalars by sinking stores out of the 1205 /// loop and moving loads to before the loop. We do this by looping over 1206 /// the stores in the loop, looking for stores to Must pointers which are 1207 /// loop invariant. 1208 /// 1209 bool llvm::promoteLoopAccessesToScalars( 1210 const SmallSetVector<Value *, 8> &PointerMustAliases, 1211 SmallVectorImpl<BasicBlock *> &ExitBlocks, 1212 SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, 1213 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1214 Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 1215 OptimizationRemarkEmitter *ORE) { 1216 // Verify inputs. 1217 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1218 CurAST != nullptr && SafetyInfo != nullptr && 1219 "Unexpected Input to promoteLoopAccessesToScalars"); 1220 1221 Value *SomePtr = *PointerMustAliases.begin(); 1222 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1223 1224 // It isn't safe to promote a load/store from the loop if the load/store is 1225 // conditional. For example, turning: 1226 // 1227 // for () { if (c) *P += 1; } 1228 // 1229 // into: 1230 // 1231 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1232 // 1233 // is not safe, because *P may only be valid to access if 'c' is true. 1234 // 1235 // The safety property divides into two parts: 1236 // p1) The memory may not be dereferenceable on entry to the loop. In this 1237 // case, we can't insert the required load in the preheader. 1238 // p2) The memory model does not allow us to insert a store along any dynamic 1239 // path which did not originally have one. 1240 // 1241 // If at least one store is guaranteed to execute, both properties are 1242 // satisfied, and promotion is legal. 1243 // 1244 // This, however, is not a necessary condition. Even if no store/load is 1245 // guaranteed to execute, we can still establish these properties. 1246 // We can establish (p1) by proving that hoisting the load into the preheader 1247 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1248 // can use any access within the alias set to prove dereferenceability, 1249 // since they're all must alias. 1250 // 1251 // There are two ways establish (p2): 1252 // a) Prove the location is thread-local. In this case the memory model 1253 // requirement does not apply, and stores are safe to insert. 1254 // b) Prove a store dominates every exit block. In this case, if an exit 1255 // blocks is reached, the original dynamic path would have taken us through 1256 // the store, so inserting a store into the exit block is safe. Note that this 1257 // is different from the store being guaranteed to execute. For instance, 1258 // if an exception is thrown on the first iteration of the loop, the original 1259 // store is never executed, but the exit blocks are not executed either. 1260 1261 bool DereferenceableInPH = false; 1262 bool SafeToInsertStore = false; 1263 1264 SmallVector<Instruction *, 64> LoopUses; 1265 1266 // We start with an alignment of one and try to find instructions that allow 1267 // us to prove better alignment. 1268 unsigned Alignment = 1; 1269 // Keep track of which types of access we see 1270 bool SawUnorderedAtomic = false; 1271 bool SawNotAtomic = false; 1272 AAMDNodes AATags; 1273 1274 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 1275 1276 bool IsKnownThreadLocalObject = false; 1277 if (SafetyInfo->MayThrow) { 1278 // If a loop can throw, we have to insert a store along each unwind edge. 1279 // That said, we can't actually make the unwind edge explicit. Therefore, 1280 // we have to prove that the store is dead along the unwind edge. We do 1281 // this by proving that the caller can't have a reference to the object 1282 // after return and thus can't possibly load from the object. 1283 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1284 if (!isKnownNonEscaping(Object, TLI)) 1285 return false; 1286 // Subtlety: Alloca's aren't visible to callers, but *are* potentially 1287 // visible to other threads if captured and used during their lifetimes. 1288 IsKnownThreadLocalObject = !isa<AllocaInst>(Object); 1289 } 1290 1291 // Check that all of the pointers in the alias set have the same type. We 1292 // cannot (yet) promote a memory location that is loaded and stored in 1293 // different sizes. While we are at it, collect alignment and AA info. 1294 for (Value *ASIV : PointerMustAliases) { 1295 // Check that all of the pointers in the alias set have the same type. We 1296 // cannot (yet) promote a memory location that is loaded and stored in 1297 // different sizes. 1298 if (SomePtr->getType() != ASIV->getType()) 1299 return false; 1300 1301 for (User *U : ASIV->users()) { 1302 // Ignore instructions that are outside the loop. 1303 Instruction *UI = dyn_cast<Instruction>(U); 1304 if (!UI || !CurLoop->contains(UI)) 1305 continue; 1306 1307 // If there is an non-load/store instruction in the loop, we can't promote 1308 // it. 1309 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 1310 assert(!Load->isVolatile() && "AST broken"); 1311 if (!Load->isUnordered()) 1312 return false; 1313 1314 SawUnorderedAtomic |= Load->isAtomic(); 1315 SawNotAtomic |= !Load->isAtomic(); 1316 1317 if (!DereferenceableInPH) 1318 DereferenceableInPH = isSafeToExecuteUnconditionally( 1319 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); 1320 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 1321 // Stores *of* the pointer are not interesting, only stores *to* the 1322 // pointer. 1323 if (UI->getOperand(1) != ASIV) 1324 continue; 1325 assert(!Store->isVolatile() && "AST broken"); 1326 if (!Store->isUnordered()) 1327 return false; 1328 1329 SawUnorderedAtomic |= Store->isAtomic(); 1330 SawNotAtomic |= !Store->isAtomic(); 1331 1332 // If the store is guaranteed to execute, both properties are satisfied. 1333 // We may want to check if a store is guaranteed to execute even if we 1334 // already know that promotion is safe, since it may have higher 1335 // alignment than any other guaranteed stores, in which case we can 1336 // raise the alignment on the promoted store. 1337 unsigned InstAlignment = Store->getAlignment(); 1338 if (!InstAlignment) 1339 InstAlignment = 1340 MDL.getABITypeAlignment(Store->getValueOperand()->getType()); 1341 1342 if (!DereferenceableInPH || !SafeToInsertStore || 1343 (InstAlignment > Alignment)) { 1344 if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { 1345 DereferenceableInPH = true; 1346 SafeToInsertStore = true; 1347 Alignment = std::max(Alignment, InstAlignment); 1348 } 1349 } 1350 1351 // If a store dominates all exit blocks, it is safe to sink. 1352 // As explained above, if an exit block was executed, a dominating 1353 // store must have been been executed at least once, so we are not 1354 // introducing stores on paths that did not have them. 1355 // Note that this only looks at explicit exit blocks. If we ever 1356 // start sinking stores into unwind edges (see above), this will break. 1357 if (!SafeToInsertStore) 1358 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 1359 return DT->dominates(Store->getParent(), Exit); 1360 }); 1361 1362 // If the store is not guaranteed to execute, we may still get 1363 // deref info through it. 1364 if (!DereferenceableInPH) { 1365 DereferenceableInPH = isDereferenceableAndAlignedPointer( 1366 Store->getPointerOperand(), Store->getAlignment(), MDL, 1367 Preheader->getTerminator(), DT); 1368 } 1369 } else 1370 return false; // Not a load or store. 1371 1372 // Merge the AA tags. 1373 if (LoopUses.empty()) { 1374 // On the first load/store, just take its AA tags. 1375 UI->getAAMetadata(AATags); 1376 } else if (AATags) { 1377 UI->getAAMetadata(AATags, /* Merge = */ true); 1378 } 1379 1380 LoopUses.push_back(UI); 1381 } 1382 } 1383 1384 // If we found both an unordered atomic instruction and a non-atomic memory 1385 // access, bail. We can't blindly promote non-atomic to atomic since we 1386 // might not be able to lower the result. We can't downgrade since that 1387 // would violate memory model. Also, align 0 is an error for atomics. 1388 if (SawUnorderedAtomic && SawNotAtomic) 1389 return false; 1390 1391 // If we couldn't prove we can hoist the load, bail. 1392 if (!DereferenceableInPH) 1393 return false; 1394 1395 // We know we can hoist the load, but don't have a guaranteed store. 1396 // Check whether the location is thread-local. If it is, then we can insert 1397 // stores along paths which originally didn't have them without violating the 1398 // memory model. 1399 if (!SafeToInsertStore) { 1400 if (IsKnownThreadLocalObject) 1401 SafeToInsertStore = true; 1402 else { 1403 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1404 SafeToInsertStore = 1405 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && 1406 !PointerMayBeCaptured(Object, true, true); 1407 } 1408 } 1409 1410 // If we've still failed to prove we can sink the store, give up. 1411 if (!SafeToInsertStore) 1412 return false; 1413 1414 // Otherwise, this is safe to promote, lets do it! 1415 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr 1416 << '\n'); 1417 ORE->emit([&]() { 1418 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 1419 LoopUses[0]) 1420 << "Moving accesses to memory location out of the loop"; 1421 }); 1422 ++NumPromoted; 1423 1424 // Grab a debug location for the inserted loads/stores; given that the 1425 // inserted loads/stores have little relation to the original loads/stores, 1426 // this code just arbitrarily picks a location from one, since any debug 1427 // location is better than none. 1428 DebugLoc DL = LoopUses[0]->getDebugLoc(); 1429 1430 // We use the SSAUpdater interface to insert phi nodes as required. 1431 SmallVector<PHINode *, 16> NewPHIs; 1432 SSAUpdater SSA(&NewPHIs); 1433 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 1434 InsertPts, PIC, *CurAST, *LI, DL, Alignment, 1435 SawUnorderedAtomic, AATags); 1436 1437 // Set up the preheader to have a definition of the value. It is the live-out 1438 // value from the preheader that uses in the loop will use. 1439 LoadInst *PreheaderLoad = new LoadInst( 1440 SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); 1441 if (SawUnorderedAtomic) 1442 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 1443 PreheaderLoad->setAlignment(Alignment); 1444 PreheaderLoad->setDebugLoc(DL); 1445 if (AATags) 1446 PreheaderLoad->setAAMetadata(AATags); 1447 SSA.AddAvailableValue(Preheader, PreheaderLoad); 1448 1449 // Rewrite all the loads in the loop and remember all the definitions from 1450 // stores in the loop. 1451 Promoter.run(LoopUses); 1452 1453 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 1454 if (PreheaderLoad->use_empty()) 1455 PreheaderLoad->eraseFromParent(); 1456 1457 return true; 1458 } 1459 1460 /// Returns an owning pointer to an alias set which incorporates aliasing info 1461 /// from L and all subloops of L. 1462 /// FIXME: In new pass manager, there is no helper function to handle loop 1463 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed 1464 /// from scratch for every loop. Hook up with the helper functions when 1465 /// available in the new pass manager to avoid redundant computation. 1466 AliasSetTracker * 1467 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 1468 AliasAnalysis *AA) { 1469 AliasSetTracker *CurAST = nullptr; 1470 SmallVector<Loop *, 4> RecomputeLoops; 1471 for (Loop *InnerL : L->getSubLoops()) { 1472 auto MapI = LoopToAliasSetMap.find(InnerL); 1473 // If the AST for this inner loop is missing it may have been merged into 1474 // some other loop's AST and then that loop unrolled, and so we need to 1475 // recompute it. 1476 if (MapI == LoopToAliasSetMap.end()) { 1477 RecomputeLoops.push_back(InnerL); 1478 continue; 1479 } 1480 AliasSetTracker *InnerAST = MapI->second; 1481 1482 if (CurAST != nullptr) { 1483 // What if InnerLoop was modified by other passes ? 1484 CurAST->add(*InnerAST); 1485 1486 // Once we've incorporated the inner loop's AST into ours, we don't need 1487 // the subloop's anymore. 1488 delete InnerAST; 1489 } else { 1490 CurAST = InnerAST; 1491 } 1492 LoopToAliasSetMap.erase(MapI); 1493 } 1494 if (CurAST == nullptr) 1495 CurAST = new AliasSetTracker(*AA); 1496 1497 auto mergeLoop = [&](Loop *L) { 1498 // Loop over the body of this loop, looking for calls, invokes, and stores. 1499 for (BasicBlock *BB : L->blocks()) 1500 CurAST->add(*BB); // Incorporate the specified basic block 1501 }; 1502 1503 // Add everything from the sub loops that are no longer directly available. 1504 for (Loop *InnerL : RecomputeLoops) 1505 mergeLoop(InnerL); 1506 1507 // And merge in this loop. 1508 mergeLoop(L); 1509 1510 return CurAST; 1511 } 1512 1513 /// Simple analysis hook. Clone alias set info. 1514 /// 1515 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 1516 Loop *L) { 1517 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1518 if (!AST) 1519 return; 1520 1521 AST->copyValue(From, To); 1522 } 1523 1524 /// Simple Analysis hook. Delete value V from alias set 1525 /// 1526 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { 1527 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1528 if (!AST) 1529 return; 1530 1531 AST->deleteValue(V); 1532 } 1533 1534 /// Simple Analysis hook. Delete value L from alias set map. 1535 /// 1536 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { 1537 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1538 if (!AST) 1539 return; 1540 1541 delete AST; 1542 LICM.getLoopToAliasSetMap().erase(L); 1543 } 1544 1545 /// Return true if the body of this loop may store into the memory 1546 /// location pointed to by V. 1547 /// 1548 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 1549 const AAMDNodes &AAInfo, 1550 AliasSetTracker *CurAST) { 1551 // Check to see if any of the basic blocks in CurLoop invalidate *V. 1552 return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); 1553 } 1554 1555 /// Little predicate that returns true if the specified basic block is in 1556 /// a subloop of the current one, not the current one itself. 1557 /// 1558 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 1559 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 1560 return LI->getLoopFor(BB) != CurLoop; 1561 } 1562