1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion, attempting to remove as much 11 // code from the body of a loop as possible. It does this by either hoisting 12 // code into the preheader block, or by sinking code to the exit blocks if it is 13 // safe. This pass also promotes must-aliased memory locations in the loop to 14 // live in registers, thus hoisting and sinking "invariant" loads and stores. 15 // 16 // This pass uses alias analysis for two purposes: 17 // 18 // 1. Moving loop invariant loads and calls out of loops. If we can determine 19 // that a load or call inside of a loop never aliases anything stored to, 20 // we can hoist it or sink it like any other instruction. 21 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 22 // the loop, we try to move the store to happen AFTER the loop instead of 23 // inside of the loop. This can only happen if a few conditions are true: 24 // A. The pointer stored through is loop invariant 25 // B. There are no stores or loads in the loop which _may_ alias the 26 // pointer. There are no calls in the loop which mod/ref the pointer. 27 // If these conditions are true, we can promote the loads and stores in the 28 // loop of the pointer to use a temporary alloca'd variable. We then use 29 // the SSAUpdater to construct the appropriate SSA form for the value. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Transforms/Scalar/LICM.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/BasicAliasAnalysis.h" 38 #include "llvm/Analysis/CaptureTracking.h" 39 #include "llvm/Analysis/ConstantFolding.h" 40 #include "llvm/Analysis/GlobalsModRef.h" 41 #include "llvm/Analysis/GuardUtils.h" 42 #include "llvm/Analysis/Loads.h" 43 #include "llvm/Analysis/LoopInfo.h" 44 #include "llvm/Analysis/LoopPass.h" 45 #include "llvm/Analysis/MemoryBuiltins.h" 46 #include "llvm/Analysis/MemorySSA.h" 47 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 48 #include "llvm/Analysis/ScalarEvolution.h" 49 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 50 #include "llvm/Analysis/TargetLibraryInfo.h" 51 #include "llvm/Transforms/Utils/Local.h" 52 #include "llvm/Analysis/ValueTracking.h" 53 #include "llvm/IR/CFG.h" 54 #include "llvm/IR/Constants.h" 55 #include "llvm/IR/DataLayout.h" 56 #include "llvm/IR/DerivedTypes.h" 57 #include "llvm/IR/Dominators.h" 58 #include "llvm/IR/Instructions.h" 59 #include "llvm/IR/IntrinsicInst.h" 60 #include "llvm/IR/LLVMContext.h" 61 #include "llvm/IR/Metadata.h" 62 #include "llvm/IR/PatternMatch.h" 63 #include "llvm/IR/PredIteratorCache.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/raw_ostream.h" 67 #include "llvm/Transforms/Scalar.h" 68 #include "llvm/Transforms/Scalar/LoopPassManager.h" 69 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 70 #include "llvm/Transforms/Utils/LoopUtils.h" 71 #include "llvm/Transforms/Utils/SSAUpdater.h" 72 #include <algorithm> 73 #include <utility> 74 using namespace llvm; 75 76 #define DEBUG_TYPE "licm" 77 78 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 79 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 80 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 81 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 82 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 83 84 /// Memory promotion is enabled by default. 85 static cl::opt<bool> 86 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 87 cl::desc("Disable memory promotion in LICM pass")); 88 89 static cl::opt<uint32_t> MaxNumUsesTraversed( 90 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 91 cl::desc("Max num uses visited for identifying load " 92 "invariance in loop using invariant start (default = 8)")); 93 94 // Default value of zero implies we use the regular alias set tracker mechanism 95 // instead of the cross product using AA to identify aliasing of the memory 96 // location we are interested in. 97 static cl::opt<int> 98 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), 99 cl::desc("How many instruction to cross product using AA")); 100 101 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 102 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 103 const LoopSafetyInfo *SafetyInfo, 104 TargetTransformInfo *TTI, bool &FreeInLoop); 105 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 106 ICFLoopSafetyInfo *SafetyInfo, 107 OptimizationRemarkEmitter *ORE); 108 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 109 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, 110 OptimizationRemarkEmitter *ORE, bool FreeInLoop); 111 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 112 const DominatorTree *DT, 113 const Loop *CurLoop, 114 const LoopSafetyInfo *SafetyInfo, 115 OptimizationRemarkEmitter *ORE, 116 const Instruction *CtxI = nullptr); 117 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, 118 AliasSetTracker *CurAST, Loop *CurLoop, 119 AliasAnalysis *AA); 120 121 static Instruction * 122 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 123 const LoopInfo *LI, 124 const LoopSafetyInfo *SafetyInfo); 125 126 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 127 AliasSetTracker *AST); 128 129 static void moveInstructionBefore(Instruction &I, Instruction &Dest, 130 ICFLoopSafetyInfo &SafetyInfo); 131 132 namespace { 133 struct LoopInvariantCodeMotion { 134 using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>; 135 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 136 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 137 ScalarEvolution *SE, MemorySSA *MSSA, 138 OptimizationRemarkEmitter *ORE, bool DeleteAST); 139 140 ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; } 141 142 private: 143 ASTrackerMapTy LoopToAliasSetMap; 144 145 std::unique_ptr<AliasSetTracker> 146 collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); 147 }; 148 149 struct LegacyLICMPass : public LoopPass { 150 static char ID; // Pass identification, replacement for typeid 151 LegacyLICMPass() : LoopPass(ID) { 152 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 153 } 154 155 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 156 if (skipLoop(L)) { 157 // If we have run LICM on a previous loop but now we are skipping 158 // (because we've hit the opt-bisect limit), we need to clear the 159 // loop alias information. 160 LICM.getLoopToAliasSetMap().clear(); 161 return false; 162 } 163 164 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 165 MemorySSA *MSSA = EnableMSSALoopDependency 166 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) 167 : nullptr; 168 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 169 // pass. Function analyses need to be preserved across loop transformations 170 // but ORE cannot be preserved (see comment before the pass definition). 171 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 172 return LICM.runOnLoop(L, 173 &getAnalysis<AAResultsWrapperPass>().getAAResults(), 174 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 175 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 176 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 177 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 178 *L->getHeader()->getParent()), 179 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); 180 } 181 182 /// This transformation requires natural loop information & requires that 183 /// loop preheaders be inserted into the CFG... 184 /// 185 void getAnalysisUsage(AnalysisUsage &AU) const override { 186 AU.addPreserved<DominatorTreeWrapperPass>(); 187 AU.addPreserved<LoopInfoWrapperPass>(); 188 AU.addRequired<TargetLibraryInfoWrapperPass>(); 189 if (EnableMSSALoopDependency) 190 AU.addRequired<MemorySSAWrapperPass>(); 191 AU.addRequired<TargetTransformInfoWrapperPass>(); 192 getLoopAnalysisUsage(AU); 193 } 194 195 using llvm::Pass::doFinalization; 196 197 bool doFinalization() override { 198 assert(LICM.getLoopToAliasSetMap().empty() && 199 "Didn't free loop alias sets"); 200 return false; 201 } 202 203 private: 204 LoopInvariantCodeMotion LICM; 205 206 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 207 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 208 Loop *L) override; 209 210 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 211 /// set. 212 void deleteAnalysisValue(Value *V, Loop *L) override; 213 214 /// Simple Analysis hook. Delete loop L from alias set map. 215 void deleteAnalysisLoop(Loop *L) override; 216 }; 217 } // namespace 218 219 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 220 LoopStandardAnalysisResults &AR, LPMUpdater &) { 221 const auto &FAM = 222 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 223 Function *F = L.getHeader()->getParent(); 224 225 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); 226 // FIXME: This should probably be optional rather than required. 227 if (!ORE) 228 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " 229 "cached at a higher level"); 230 231 LoopInvariantCodeMotion LICM; 232 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, 233 AR.MSSA, ORE, true)) 234 return PreservedAnalyses::all(); 235 236 auto PA = getLoopPassPreservedAnalyses(); 237 238 PA.preserve<DominatorTreeAnalysis>(); 239 PA.preserve<LoopAnalysis>(); 240 241 return PA; 242 } 243 244 char LegacyLICMPass::ID = 0; 245 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 246 false, false) 247 INITIALIZE_PASS_DEPENDENCY(LoopPass) 248 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 249 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 250 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 251 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 252 false) 253 254 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 255 256 /// Hoist expressions out of the specified loop. Note, alias info for inner 257 /// loop is not preserved so it is not a good idea to run LICM multiple 258 /// times on one loop. 259 /// We should delete AST for inner loops in the new pass manager to avoid 260 /// memory leak. 261 /// 262 bool LoopInvariantCodeMotion::runOnLoop( 263 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 264 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, 265 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { 266 bool Changed = false; 267 268 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 269 270 std::unique_ptr<AliasSetTracker> CurAST = collectAliasInfoForLoop(L, LI, AA); 271 272 // Get the preheader block to move instructions into... 273 BasicBlock *Preheader = L->getLoopPreheader(); 274 275 // Compute loop safety information. 276 ICFLoopSafetyInfo SafetyInfo(DT); 277 SafetyInfo.computeLoopSafetyInfo(L); 278 279 // We want to visit all of the instructions in this loop... that are not parts 280 // of our subloops (they have already had their invariants hoisted out of 281 // their loop, into this loop, so there is no need to process the BODIES of 282 // the subloops). 283 // 284 // Traverse the body of the loop in depth first order on the dominator tree so 285 // that we are guaranteed to see definitions before we see uses. This allows 286 // us to sink instructions in one pass, without iteration. After sinking 287 // instructions, we perform another pass to hoist them out of the loop. 288 // 289 if (L->hasDedicatedExits()) 290 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 291 CurAST.get(), &SafetyInfo, ORE); 292 if (Preheader) 293 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 294 CurAST.get(), &SafetyInfo, ORE); 295 296 // Now that all loop invariants have been removed from the loop, promote any 297 // memory references to scalars that we can. 298 // Don't sink stores from loops without dedicated block exits. Exits 299 // containing indirect branches are not transformed by loop simplify, 300 // make sure we catch that. An additional load may be generated in the 301 // preheader for SSA updater, so also avoid sinking when no preheader 302 // is available. 303 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 304 // Figure out the loop exits and their insertion points 305 SmallVector<BasicBlock *, 8> ExitBlocks; 306 L->getUniqueExitBlocks(ExitBlocks); 307 308 // We can't insert into a catchswitch. 309 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 310 return isa<CatchSwitchInst>(Exit->getTerminator()); 311 }); 312 313 if (!HasCatchSwitch) { 314 SmallVector<Instruction *, 8> InsertPts; 315 InsertPts.reserve(ExitBlocks.size()); 316 for (BasicBlock *ExitBlock : ExitBlocks) 317 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 318 319 PredIteratorCache PIC; 320 321 bool Promoted = false; 322 323 // Loop over all of the alias sets in the tracker object. 324 for (AliasSet &AS : *CurAST) { 325 // We can promote this alias set if it has a store, if it is a "Must" 326 // alias set, if the pointer is loop invariant, and if we are not 327 // eliminating any volatile loads or stores. 328 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 329 !L->isLoopInvariant(AS.begin()->getValue())) 330 continue; 331 332 assert( 333 !AS.empty() && 334 "Must alias set should have at least one pointer element in it!"); 335 336 SmallSetVector<Value *, 8> PointerMustAliases; 337 for (const auto &ASI : AS) 338 PointerMustAliases.insert(ASI.getValue()); 339 340 Promoted |= promoteLoopAccessesToScalars( 341 PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, 342 CurAST.get(), &SafetyInfo, ORE); 343 } 344 345 // Once we have promoted values across the loop body we have to 346 // recursively reform LCSSA as any nested loop may now have values defined 347 // within the loop used in the outer loop. 348 // FIXME: This is really heavy handed. It would be a bit better to use an 349 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 350 // it as it went. 351 if (Promoted) 352 formLCSSARecursively(*L, *DT, LI, SE); 353 354 Changed |= Promoted; 355 } 356 } 357 358 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 359 // specifically moving instructions across the loop boundary and so it is 360 // especially in need of sanity checking here. 361 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 362 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && 363 "Parent loop not left in LCSSA form after LICM!"); 364 365 // If this loop is nested inside of another one, save the alias information 366 // for when we process the outer loop. 367 if (L->getParentLoop() && !DeleteAST) 368 LoopToAliasSetMap[L] = std::move(CurAST); 369 370 if (Changed && SE) 371 SE->forgetLoopDispositions(L); 372 return Changed; 373 } 374 375 /// Walk the specified region of the CFG (defined by all blocks dominated by 376 /// the specified block, and that are in the current loop) in reverse depth 377 /// first order w.r.t the DominatorTree. This allows us to visit uses before 378 /// definitions, allowing us to sink a loop body in one pass without iteration. 379 /// 380 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 381 DominatorTree *DT, TargetLibraryInfo *TLI, 382 TargetTransformInfo *TTI, Loop *CurLoop, 383 AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, 384 OptimizationRemarkEmitter *ORE) { 385 386 // Verify inputs. 387 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 388 CurLoop != nullptr && CurAST && SafetyInfo != nullptr && 389 "Unexpected input to sinkRegion"); 390 391 // We want to visit children before parents. We will enque all the parents 392 // before their children in the worklist and process the worklist in reverse 393 // order. 394 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 395 396 bool Changed = false; 397 for (DomTreeNode *DTN : reverse(Worklist)) { 398 BasicBlock *BB = DTN->getBlock(); 399 // Only need to process the contents of this block if it is not part of a 400 // subloop (which would already have been processed). 401 if (inSubLoop(BB, CurLoop, LI)) 402 continue; 403 404 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 405 Instruction &I = *--II; 406 407 // If the instruction is dead, we would try to sink it because it isn't 408 // used in the loop, instead, just delete it. 409 if (isInstructionTriviallyDead(&I, TLI)) { 410 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 411 salvageDebugInfo(I); 412 ++II; 413 eraseInstruction(I, *SafetyInfo, CurAST); 414 Changed = true; 415 continue; 416 } 417 418 // Check to see if we can sink this instruction to the exit blocks 419 // of the loop. We can do this if the all users of the instruction are 420 // outside of the loop. In this case, it doesn't even matter if the 421 // operands of the instruction are loop invariant. 422 // 423 bool FreeInLoop = false; 424 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && 425 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && 426 !I.mayHaveSideEffects()) { 427 if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { 428 if (!FreeInLoop) { 429 ++II; 430 eraseInstruction(I, *SafetyInfo, CurAST); 431 } 432 Changed = true; 433 } 434 } 435 } 436 } 437 return Changed; 438 } 439 440 /// Walk the specified region of the CFG (defined by all blocks dominated by 441 /// the specified block, and that are in the current loop) in depth first 442 /// order w.r.t the DominatorTree. This allows us to visit definitions before 443 /// uses, allowing us to hoist a loop body in one pass without iteration. 444 /// 445 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 446 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 447 AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, 448 OptimizationRemarkEmitter *ORE) { 449 // Verify inputs. 450 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 451 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 452 "Unexpected input to hoistRegion"); 453 454 // We want to visit parents before children. We will enque all the parents 455 // before their children in the worklist and process the worklist in order. 456 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 457 458 bool Changed = false; 459 for (DomTreeNode *DTN : Worklist) { 460 BasicBlock *BB = DTN->getBlock(); 461 // Only need to process the contents of this block if it is not part of a 462 // subloop (which would already have been processed). 463 if (inSubLoop(BB, CurLoop, LI)) 464 continue; 465 466 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { 467 Instruction &I = *II++; 468 // Try constant folding this instruction. If all the operands are 469 // constants, it is technically hoistable, but it would be better to 470 // just fold it. 471 if (Constant *C = ConstantFoldInstruction( 472 &I, I.getModule()->getDataLayout(), TLI)) { 473 LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C 474 << '\n'); 475 CurAST->copyValue(&I, C); 476 I.replaceAllUsesWith(C); 477 if (isInstructionTriviallyDead(&I, TLI)) 478 eraseInstruction(I, *SafetyInfo, CurAST); 479 Changed = true; 480 continue; 481 } 482 483 // Try hoisting the instruction out to the preheader. We can only do 484 // this if all of the operands of the instruction are loop invariant and 485 // if it is safe to hoist the instruction. 486 // 487 if (CurLoop->hasLoopInvariantOperands(&I) && 488 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && 489 isSafeToExecuteUnconditionally( 490 I, DT, CurLoop, SafetyInfo, ORE, 491 CurLoop->getLoopPreheader()->getTerminator())) { 492 hoist(I, DT, CurLoop, SafetyInfo, ORE); 493 Changed = true; 494 continue; 495 } 496 497 // Attempt to remove floating point division out of the loop by 498 // converting it to a reciprocal multiplication. 499 if (I.getOpcode() == Instruction::FDiv && 500 CurLoop->isLoopInvariant(I.getOperand(1)) && 501 I.hasAllowReciprocal()) { 502 auto Divisor = I.getOperand(1); 503 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 504 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 505 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 506 SafetyInfo->insertInstructionTo(I.getParent()); 507 ReciprocalDivisor->insertBefore(&I); 508 509 auto Product = 510 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 511 Product->setFastMathFlags(I.getFastMathFlags()); 512 SafetyInfo->insertInstructionTo(I.getParent()); 513 Product->insertAfter(&I); 514 I.replaceAllUsesWith(Product); 515 eraseInstruction(I, *SafetyInfo, CurAST); 516 517 hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); 518 Changed = true; 519 continue; 520 } 521 522 using namespace PatternMatch; 523 if (((I.use_empty() && 524 match(&I, m_Intrinsic<Intrinsic::invariant_start>())) || 525 isGuard(&I)) && 526 CurLoop->hasLoopInvariantOperands(&I) && 527 SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && 528 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) { 529 hoist(I, DT, CurLoop, SafetyInfo, ORE); 530 Changed = true; 531 continue; 532 } 533 } 534 } 535 536 return Changed; 537 } 538 539 // Return true if LI is invariant within scope of the loop. LI is invariant if 540 // CurLoop is dominated by an invariant.start representing the same memory 541 // location and size as the memory location LI loads from, and also the 542 // invariant.start has no uses. 543 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 544 Loop *CurLoop) { 545 Value *Addr = LI->getOperand(0); 546 const DataLayout &DL = LI->getModule()->getDataLayout(); 547 const uint32_t LocSizeInBits = DL.getTypeSizeInBits( 548 cast<PointerType>(Addr->getType())->getElementType()); 549 550 // if the type is i8 addrspace(x)*, we know this is the type of 551 // llvm.invariant.start operand 552 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 553 LI->getPointerAddressSpace()); 554 unsigned BitcastsVisited = 0; 555 // Look through bitcasts until we reach the i8* type (this is invariant.start 556 // operand type). 557 while (Addr->getType() != PtrInt8Ty) { 558 auto *BC = dyn_cast<BitCastInst>(Addr); 559 // Avoid traversing high number of bitcast uses. 560 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 561 return false; 562 Addr = BC->getOperand(0); 563 } 564 565 unsigned UsesVisited = 0; 566 // Traverse all uses of the load operand value, to see if invariant.start is 567 // one of the uses, and whether it dominates the load instruction. 568 for (auto *U : Addr->users()) { 569 // Avoid traversing for Load operand with high number of users. 570 if (++UsesVisited > MaxNumUsesTraversed) 571 return false; 572 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 573 // If there are escaping uses of invariant.start instruction, the load maybe 574 // non-invariant. 575 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 576 !II->use_empty()) 577 continue; 578 unsigned InvariantSizeInBits = 579 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; 580 // Confirm the invariant.start location size contains the load operand size 581 // in bits. Also, the invariant.start should dominate the load, and we 582 // should not hoist the load out of a loop that contains this dominating 583 // invariant.start. 584 if (LocSizeInBits <= InvariantSizeInBits && 585 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 586 return true; 587 } 588 589 return false; 590 } 591 592 namespace { 593 /// Return true if-and-only-if we know how to (mechanically) both hoist and 594 /// sink a given instruction out of a loop. Does not address legality 595 /// concerns such as aliasing or speculation safety. 596 bool isHoistableAndSinkableInst(Instruction &I) { 597 // Only these instructions are hoistable/sinkable. 598 return (isa<LoadInst>(I) || isa<StoreInst>(I) || 599 isa<CallInst>(I) || isa<FenceInst>(I) || 600 isa<BinaryOperator>(I) || isa<CastInst>(I) || 601 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || 602 isa<CmpInst>(I) || isa<InsertElementInst>(I) || 603 isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) || 604 isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)); 605 } 606 /// Return true if all of the alias sets within this AST are known not to 607 /// contain a Mod. 608 bool isReadOnly(AliasSetTracker *CurAST) { 609 for (AliasSet &AS : *CurAST) { 610 if (!AS.isForwardingAliasSet() && AS.isMod()) { 611 return false; 612 } 613 } 614 return true; 615 } 616 } 617 618 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 619 Loop *CurLoop, AliasSetTracker *CurAST, 620 bool TargetExecutesOncePerLoop, 621 OptimizationRemarkEmitter *ORE) { 622 // If we don't understand the instruction, bail early. 623 if (!isHoistableAndSinkableInst(I)) 624 return false; 625 626 // Loads have extra constraints we have to verify before we can hoist them. 627 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 628 if (!LI->isUnordered()) 629 return false; // Don't sink/hoist volatile or ordered atomic loads! 630 631 // Loads from constant memory are always safe to move, even if they end up 632 // in the same alias set as something that ends up being modified. 633 if (AA->pointsToConstantMemory(LI->getOperand(0))) 634 return true; 635 if (LI->getMetadata(LLVMContext::MD_invariant_load)) 636 return true; 637 638 if (LI->isAtomic() && !TargetExecutesOncePerLoop) 639 return false; // Don't risk duplicating unordered loads 640 641 // This checks for an invariant.start dominating the load. 642 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 643 return true; 644 645 bool Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), 646 CurAST, CurLoop, AA); 647 // Check loop-invariant address because this may also be a sinkable load 648 // whose address is not necessarily loop-invariant. 649 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 650 ORE->emit([&]() { 651 return OptimizationRemarkMissed( 652 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 653 << "failed to move load with loop-invariant address " 654 "because the loop may invalidate its value"; 655 }); 656 657 return !Invalidated; 658 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 659 // Don't sink or hoist dbg info; it's legal, but not useful. 660 if (isa<DbgInfoIntrinsic>(I)) 661 return false; 662 663 // Don't sink calls which can throw. 664 if (CI->mayThrow()) 665 return false; 666 667 using namespace PatternMatch; 668 if (match(CI, m_Intrinsic<Intrinsic::assume>())) 669 // Assumes don't actually alias anything or throw 670 return true; 671 672 // Handle simple cases by querying alias analysis. 673 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 674 if (Behavior == FMRB_DoesNotAccessMemory) 675 return true; 676 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 677 // A readonly argmemonly function only reads from memory pointed to by 678 // it's arguments with arbitrary offsets. If we can prove there are no 679 // writes to this memory in the loop, we can hoist or sink. 680 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { 681 // TODO: expand to writeable arguments 682 for (Value *Op : CI->arg_operands()) 683 if (Op->getType()->isPointerTy() && 684 pointerInvalidatedByLoop( 685 MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()), 686 CurAST, CurLoop, AA)) 687 return false; 688 return true; 689 } 690 691 // If this call only reads from memory and there are no writes to memory 692 // in the loop, we can hoist or sink the call as appropriate. 693 if (isReadOnly(CurAST)) 694 return true; 695 } 696 697 // FIXME: This should use mod/ref information to see if we can hoist or 698 // sink the call. 699 700 return false; 701 } else if (auto *FI = dyn_cast<FenceInst>(&I)) { 702 // Fences alias (most) everything to provide ordering. For the moment, 703 // just give up if there are any other memory operations in the loop. 704 auto Begin = CurAST->begin(); 705 assert(Begin != CurAST->end() && "must contain FI"); 706 if (std::next(Begin) != CurAST->end()) 707 // constant memory for instance, TODO: handle better 708 return false; 709 auto *UniqueI = Begin->getUniqueInstruction(); 710 if (!UniqueI) 711 // other memory op, give up 712 return false; 713 (void)FI; //suppress unused variable warning 714 assert(UniqueI == FI && "AS must contain FI"); 715 return true; 716 } else if (auto *SI = dyn_cast<StoreInst>(&I)) { 717 if (!SI->isUnordered()) 718 return false; // Don't sink/hoist volatile or ordered atomic store! 719 720 // We can only hoist a store that we can prove writes a value which is not 721 // read or overwritten within the loop. For those cases, we fallback to 722 // load store promotion instead. TODO: We can extend this to cases where 723 // there is exactly one write to the location and that write dominates an 724 // arbitrary number of reads in the loop. 725 auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); 726 727 if (AS.isRef() || !AS.isMustAlias()) 728 // Quick exit test, handled by the full path below as well. 729 return false; 730 auto *UniqueI = AS.getUniqueInstruction(); 731 if (!UniqueI) 732 // other memory op, give up 733 return false; 734 assert(UniqueI == SI && "AS must contain SI"); 735 return true; 736 } 737 738 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); 739 740 // We've established mechanical ability and aliasing, it's up to the caller 741 // to check fault safety 742 return true; 743 } 744 745 /// Returns true if a PHINode is a trivially replaceable with an 746 /// Instruction. 747 /// This is true when all incoming values are that instruction. 748 /// This pattern occurs most often with LCSSA PHI nodes. 749 /// 750 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { 751 for (const Value *IncValue : PN.incoming_values()) 752 if (IncValue != &I) 753 return false; 754 755 return true; 756 } 757 758 /// Return true if the instruction is free in the loop. 759 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, 760 const TargetTransformInfo *TTI) { 761 762 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 763 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) 764 return false; 765 // For a GEP, we cannot simply use getUserCost because currently it 766 // optimistically assume that a GEP will fold into addressing mode 767 // regardless of its users. 768 const BasicBlock *BB = GEP->getParent(); 769 for (const User *U : GEP->users()) { 770 const Instruction *UI = cast<Instruction>(U); 771 if (CurLoop->contains(UI) && 772 (BB != UI->getParent() || 773 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 774 return false; 775 } 776 return true; 777 } else 778 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; 779 } 780 781 /// Return true if the only users of this instruction are outside of 782 /// the loop. If this is true, we can sink the instruction to the exit 783 /// blocks of the loop. 784 /// 785 /// We also return true if the instruction could be folded away in lowering. 786 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 787 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 788 const LoopSafetyInfo *SafetyInfo, 789 TargetTransformInfo *TTI, bool &FreeInLoop) { 790 const auto &BlockColors = SafetyInfo->getBlockColors(); 791 bool IsFree = isFreeInLoop(I, CurLoop, TTI); 792 for (const User *U : I.users()) { 793 const Instruction *UI = cast<Instruction>(U); 794 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 795 const BasicBlock *BB = PN->getParent(); 796 // We cannot sink uses in catchswitches. 797 if (isa<CatchSwitchInst>(BB->getTerminator())) 798 return false; 799 800 // We need to sink a callsite to a unique funclet. Avoid sinking if the 801 // phi use is too muddled. 802 if (isa<CallInst>(I)) 803 if (!BlockColors.empty() && 804 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 805 return false; 806 } 807 808 if (CurLoop->contains(UI)) { 809 if (IsFree) { 810 FreeInLoop = true; 811 continue; 812 } 813 return false; 814 } 815 } 816 return true; 817 } 818 819 static Instruction * 820 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 821 const LoopInfo *LI, 822 const LoopSafetyInfo *SafetyInfo) { 823 Instruction *New; 824 if (auto *CI = dyn_cast<CallInst>(&I)) { 825 const auto &BlockColors = SafetyInfo->getBlockColors(); 826 827 // Sinking call-sites need to be handled differently from other 828 // instructions. The cloned call-site needs a funclet bundle operand 829 // appropriate for it's location in the CFG. 830 SmallVector<OperandBundleDef, 1> OpBundles; 831 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 832 BundleIdx != BundleEnd; ++BundleIdx) { 833 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 834 if (Bundle.getTagID() == LLVMContext::OB_funclet) 835 continue; 836 837 OpBundles.emplace_back(Bundle); 838 } 839 840 if (!BlockColors.empty()) { 841 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 842 assert(CV.size() == 1 && "non-unique color for exit block!"); 843 BasicBlock *BBColor = CV.front(); 844 Instruction *EHPad = BBColor->getFirstNonPHI(); 845 if (EHPad->isEHPad()) 846 OpBundles.emplace_back("funclet", EHPad); 847 } 848 849 New = CallInst::Create(CI, OpBundles); 850 } else { 851 New = I.clone(); 852 } 853 854 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 855 if (!I.getName().empty()) 856 New->setName(I.getName() + ".le"); 857 858 // Build LCSSA PHI nodes for any in-loop operands. Note that this is 859 // particularly cheap because we can rip off the PHI node that we're 860 // replacing for the number and blocks of the predecessors. 861 // OPT: If this shows up in a profile, we can instead finish sinking all 862 // invariant instructions, and then walk their operands to re-establish 863 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 864 // sinking bottom-up. 865 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; 866 ++OI) 867 if (Instruction *OInst = dyn_cast<Instruction>(*OI)) 868 if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) 869 if (!OLoop->contains(&PN)) { 870 PHINode *OpPN = 871 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 872 OInst->getName() + ".lcssa", &ExitBlock.front()); 873 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 874 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 875 *OI = OpPN; 876 } 877 return New; 878 } 879 880 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 881 AliasSetTracker *AST) { 882 if (AST) 883 AST->deleteValue(&I); 884 SafetyInfo.removeInstruction(&I); 885 I.eraseFromParent(); 886 } 887 888 static void moveInstructionBefore(Instruction &I, Instruction &Dest, 889 ICFLoopSafetyInfo &SafetyInfo) { 890 SafetyInfo.removeInstruction(&I); 891 SafetyInfo.insertInstructionTo(Dest.getParent()); 892 I.moveBefore(&Dest); 893 } 894 895 static Instruction *sinkThroughTriviallyReplaceablePHI( 896 PHINode *TPN, Instruction *I, LoopInfo *LI, 897 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 898 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { 899 assert(isTriviallyReplaceablePHI(*TPN, *I) && 900 "Expect only trivially replaceable PHI"); 901 BasicBlock *ExitBlock = TPN->getParent(); 902 Instruction *New; 903 auto It = SunkCopies.find(ExitBlock); 904 if (It != SunkCopies.end()) 905 New = It->second; 906 else 907 New = SunkCopies[ExitBlock] = 908 CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); 909 return New; 910 } 911 912 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { 913 BasicBlock *BB = PN->getParent(); 914 if (!BB->canSplitPredecessors()) 915 return false; 916 // It's not impossible to split EHPad blocks, but if BlockColors already exist 917 // it require updating BlockColors for all offspring blocks accordingly. By 918 // skipping such corner case, we can make updating BlockColors after splitting 919 // predecessor fairly simple. 920 if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) 921 return false; 922 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 923 BasicBlock *BBPred = *PI; 924 if (isa<IndirectBrInst>(BBPred->getTerminator())) 925 return false; 926 } 927 return true; 928 } 929 930 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 931 LoopInfo *LI, const Loop *CurLoop, 932 LoopSafetyInfo *SafetyInfo) { 933 #ifndef NDEBUG 934 SmallVector<BasicBlock *, 32> ExitBlocks; 935 CurLoop->getUniqueExitBlocks(ExitBlocks); 936 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 937 ExitBlocks.end()); 938 #endif 939 BasicBlock *ExitBB = PN->getParent(); 940 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 941 942 // Split predecessors of the loop exit to make instructions in the loop are 943 // exposed to exit blocks through trivially replaceable PHIs while keeping the 944 // loop in the canonical form where each predecessor of each exit block should 945 // be contained within the loop. For example, this will convert the loop below 946 // from 947 // 948 // LB1: 949 // %v1 = 950 // br %LE, %LB2 951 // LB2: 952 // %v2 = 953 // br %LE, %LB1 954 // LE: 955 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable 956 // 957 // to 958 // 959 // LB1: 960 // %v1 = 961 // br %LE.split, %LB2 962 // LB2: 963 // %v2 = 964 // br %LE.split2, %LB1 965 // LE.split: 966 // %p1 = phi [%v1, %LB1] <-- trivially replaceable 967 // br %LE 968 // LE.split2: 969 // %p2 = phi [%v2, %LB2] <-- trivially replaceable 970 // br %LE 971 // LE: 972 // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 973 // 974 const auto &BlockColors = SafetyInfo->getBlockColors(); 975 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 976 while (!PredBBs.empty()) { 977 BasicBlock *PredBB = *PredBBs.begin(); 978 assert(CurLoop->contains(PredBB) && 979 "Expect all predecessors are in the loop"); 980 if (PN->getBasicBlockIndex(PredBB) >= 0) { 981 BasicBlock *NewPred = SplitBlockPredecessors( 982 ExitBB, PredBB, ".split.loop.exit", DT, LI, nullptr, true); 983 // Since we do not allow splitting EH-block with BlockColors in 984 // canSplitPredecessors(), we can simply assign predecessor's color to 985 // the new block. 986 if (!BlockColors.empty()) 987 // Grab a reference to the ColorVector to be inserted before getting the 988 // reference to the vector we are copying because inserting the new 989 // element in BlockColors might cause the map to be reallocated. 990 SafetyInfo->copyColors(NewPred, PredBB); 991 } 992 PredBBs.remove(PredBB); 993 } 994 } 995 996 /// When an instruction is found to only be used outside of the loop, this 997 /// function moves it to the exit blocks and patches up SSA form as needed. 998 /// This method is guaranteed to remove the original instruction from its 999 /// position, and may either delete it or move it to outside of the loop. 1000 /// 1001 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 1002 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, 1003 OptimizationRemarkEmitter *ORE, bool FreeInLoop) { 1004 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 1005 ORE->emit([&]() { 1006 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 1007 << "sinking " << ore::NV("Inst", &I); 1008 }); 1009 bool Changed = false; 1010 if (isa<LoadInst>(I)) 1011 ++NumMovedLoads; 1012 else if (isa<CallInst>(I)) 1013 ++NumMovedCalls; 1014 ++NumSunk; 1015 1016 // Iterate over users to be ready for actual sinking. Replace users via 1017 // unrechable blocks with undef and make all user PHIs trivially replcable. 1018 SmallPtrSet<Instruction *, 8> VisitedUsers; 1019 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 1020 auto *User = cast<Instruction>(*UI); 1021 Use &U = UI.getUse(); 1022 ++UI; 1023 1024 if (VisitedUsers.count(User) || CurLoop->contains(User)) 1025 continue; 1026 1027 if (!DT->isReachableFromEntry(User->getParent())) { 1028 U = UndefValue::get(I.getType()); 1029 Changed = true; 1030 continue; 1031 } 1032 1033 // The user must be a PHI node. 1034 PHINode *PN = cast<PHINode>(User); 1035 1036 // Surprisingly, instructions can be used outside of loops without any 1037 // exits. This can only happen in PHI nodes if the incoming block is 1038 // unreachable. 1039 BasicBlock *BB = PN->getIncomingBlock(U); 1040 if (!DT->isReachableFromEntry(BB)) { 1041 U = UndefValue::get(I.getType()); 1042 Changed = true; 1043 continue; 1044 } 1045 1046 VisitedUsers.insert(PN); 1047 if (isTriviallyReplaceablePHI(*PN, I)) 1048 continue; 1049 1050 if (!canSplitPredecessors(PN, SafetyInfo)) 1051 return Changed; 1052 1053 // Split predecessors of the PHI so that we can make users trivially 1054 // replaceable. 1055 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); 1056 1057 // Should rebuild the iterators, as they may be invalidated by 1058 // splitPredecessorsOfLoopExit(). 1059 UI = I.user_begin(); 1060 UE = I.user_end(); 1061 } 1062 1063 if (VisitedUsers.empty()) 1064 return Changed; 1065 1066 #ifndef NDEBUG 1067 SmallVector<BasicBlock *, 32> ExitBlocks; 1068 CurLoop->getUniqueExitBlocks(ExitBlocks); 1069 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 1070 ExitBlocks.end()); 1071 #endif 1072 1073 // Clones of this instruction. Don't create more than one per exit block! 1074 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 1075 1076 // If this instruction is only used outside of the loop, then all users are 1077 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 1078 // the instruction. 1079 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 1080 for (auto *UI : Users) { 1081 auto *User = cast<Instruction>(UI); 1082 1083 if (CurLoop->contains(User)) 1084 continue; 1085 1086 PHINode *PN = cast<PHINode>(User); 1087 assert(ExitBlockSet.count(PN->getParent()) && 1088 "The LCSSA PHI is not in an exit block!"); 1089 // The PHI must be trivially replaceable. 1090 Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies, 1091 SafetyInfo, CurLoop); 1092 PN->replaceAllUsesWith(New); 1093 eraseInstruction(*PN, *SafetyInfo, nullptr); 1094 Changed = true; 1095 } 1096 return Changed; 1097 } 1098 1099 /// When an instruction is found to only use loop invariant operands that 1100 /// is safe to hoist, this instruction is called to do the dirty work. 1101 /// 1102 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1103 ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { 1104 auto *Preheader = CurLoop->getLoopPreheader(); 1105 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I 1106 << "\n"); 1107 ORE->emit([&]() { 1108 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 1109 << ore::NV("Inst", &I); 1110 }); 1111 1112 // Metadata can be dependent on conditions we are hoisting above. 1113 // Conservatively strip all metadata on the instruction unless we were 1114 // guaranteed to execute I if we entered the loop, in which case the metadata 1115 // is valid in the loop preheader. 1116 if (I.hasMetadataOtherThanDebugLoc() && 1117 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 1118 // time in isGuaranteedToExecute if we don't actually have anything to 1119 // drop. It is a compile time optimization, not required for correctness. 1120 !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) 1121 I.dropUnknownNonDebugMetadata(); 1122 1123 // Move the new node to the Preheader, before its terminator. 1124 moveInstructionBefore(I, *Preheader->getTerminator(), *SafetyInfo); 1125 1126 // Do not retain debug locations when we are moving instructions to different 1127 // basic blocks, because we want to avoid jumpy line tables. Calls, however, 1128 // need to retain their debug locs because they may be inlined. 1129 // FIXME: How do we retain source locations without causing poor debugging 1130 // behavior? 1131 if (!isa<CallInst>(I)) 1132 I.setDebugLoc(DebugLoc()); 1133 1134 if (isa<LoadInst>(I)) 1135 ++NumMovedLoads; 1136 else if (isa<CallInst>(I)) 1137 ++NumMovedCalls; 1138 ++NumHoisted; 1139 } 1140 1141 /// Only sink or hoist an instruction if it is not a trapping instruction, 1142 /// or if the instruction is known not to trap when moved to the preheader. 1143 /// or if it is a trapping instruction and is guaranteed to execute. 1144 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 1145 const DominatorTree *DT, 1146 const Loop *CurLoop, 1147 const LoopSafetyInfo *SafetyInfo, 1148 OptimizationRemarkEmitter *ORE, 1149 const Instruction *CtxI) { 1150 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) 1151 return true; 1152 1153 bool GuaranteedToExecute = 1154 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); 1155 1156 if (!GuaranteedToExecute) { 1157 auto *LI = dyn_cast<LoadInst>(&Inst); 1158 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1159 ORE->emit([&]() { 1160 return OptimizationRemarkMissed( 1161 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 1162 << "failed to hoist load with loop-invariant address " 1163 "because load is conditionally executed"; 1164 }); 1165 } 1166 1167 return GuaranteedToExecute; 1168 } 1169 1170 namespace { 1171 class LoopPromoter : public LoadAndStorePromoter { 1172 Value *SomePtr; // Designated pointer to store to. 1173 const SmallSetVector<Value *, 8> &PointerMustAliases; 1174 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 1175 SmallVectorImpl<Instruction *> &LoopInsertPts; 1176 PredIteratorCache &PredCache; 1177 AliasSetTracker &AST; 1178 LoopInfo &LI; 1179 DebugLoc DL; 1180 int Alignment; 1181 bool UnorderedAtomic; 1182 AAMDNodes AATags; 1183 ICFLoopSafetyInfo &SafetyInfo; 1184 1185 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1186 if (Instruction *I = dyn_cast<Instruction>(V)) 1187 if (Loop *L = LI.getLoopFor(I->getParent())) 1188 if (!L->contains(BB)) { 1189 // We need to create an LCSSA PHI node for the incoming value and 1190 // store that. 1191 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 1192 I->getName() + ".lcssa", &BB->front()); 1193 for (BasicBlock *Pred : PredCache.get(BB)) 1194 PN->addIncoming(I, Pred); 1195 return PN; 1196 } 1197 return V; 1198 } 1199 1200 public: 1201 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 1202 const SmallSetVector<Value *, 8> &PMA, 1203 SmallVectorImpl<BasicBlock *> &LEB, 1204 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, 1205 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, 1206 bool UnorderedAtomic, const AAMDNodes &AATags, 1207 ICFLoopSafetyInfo &SafetyInfo) 1208 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 1209 LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), 1210 LI(li), DL(std::move(dl)), Alignment(alignment), 1211 UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo) 1212 {} 1213 1214 bool isInstInList(Instruction *I, 1215 const SmallVectorImpl<Instruction *> &) const override { 1216 Value *Ptr; 1217 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1218 Ptr = LI->getOperand(0); 1219 else 1220 Ptr = cast<StoreInst>(I)->getPointerOperand(); 1221 return PointerMustAliases.count(Ptr); 1222 } 1223 1224 void doExtraRewritesBeforeFinalDeletion() const override { 1225 // Insert stores after in the loop exit blocks. Each exit block gets a 1226 // store of the live-out values that feed them. Since we've already told 1227 // the SSA updater about the defs in the loop and the preheader 1228 // definition, it is all set and we can start using it. 1229 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 1230 BasicBlock *ExitBlock = LoopExitBlocks[i]; 1231 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 1232 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 1233 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 1234 Instruction *InsertPos = LoopInsertPts[i]; 1235 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 1236 if (UnorderedAtomic) 1237 NewSI->setOrdering(AtomicOrdering::Unordered); 1238 NewSI->setAlignment(Alignment); 1239 NewSI->setDebugLoc(DL); 1240 if (AATags) 1241 NewSI->setAAMetadata(AATags); 1242 } 1243 } 1244 1245 void replaceLoadWithValue(LoadInst *LI, Value *V) const override { 1246 // Update alias analysis. 1247 AST.copyValue(LI, V); 1248 } 1249 void instructionDeleted(Instruction *I) const override { 1250 SafetyInfo.removeInstruction(I); 1251 AST.deleteValue(I); 1252 } 1253 }; 1254 1255 1256 /// Return true iff we can prove that a caller of this function can not inspect 1257 /// the contents of the provided object in a well defined program. 1258 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { 1259 if (isa<AllocaInst>(Object)) 1260 // Since the alloca goes out of scope, we know the caller can't retain a 1261 // reference to it and be well defined. Thus, we don't need to check for 1262 // capture. 1263 return true; 1264 1265 // For all other objects we need to know that the caller can't possibly 1266 // have gotten a reference to the object. There are two components of 1267 // that: 1268 // 1) Object can't be escaped by this function. This is what 1269 // PointerMayBeCaptured checks. 1270 // 2) Object can't have been captured at definition site. For this, we 1271 // need to know the return value is noalias. At the moment, we use a 1272 // weaker condition and handle only AllocLikeFunctions (which are 1273 // known to be noalias). TODO 1274 return isAllocLikeFn(Object, TLI) && 1275 !PointerMayBeCaptured(Object, true, true); 1276 } 1277 1278 } // namespace 1279 1280 /// Try to promote memory values to scalars by sinking stores out of the 1281 /// loop and moving loads to before the loop. We do this by looping over 1282 /// the stores in the loop, looking for stores to Must pointers which are 1283 /// loop invariant. 1284 /// 1285 bool llvm::promoteLoopAccessesToScalars( 1286 const SmallSetVector<Value *, 8> &PointerMustAliases, 1287 SmallVectorImpl<BasicBlock *> &ExitBlocks, 1288 SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, 1289 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1290 Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, 1291 OptimizationRemarkEmitter *ORE) { 1292 // Verify inputs. 1293 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1294 CurAST != nullptr && SafetyInfo != nullptr && 1295 "Unexpected Input to promoteLoopAccessesToScalars"); 1296 1297 Value *SomePtr = *PointerMustAliases.begin(); 1298 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1299 1300 // It is not safe to promote a load/store from the loop if the load/store is 1301 // conditional. For example, turning: 1302 // 1303 // for () { if (c) *P += 1; } 1304 // 1305 // into: 1306 // 1307 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1308 // 1309 // is not safe, because *P may only be valid to access if 'c' is true. 1310 // 1311 // The safety property divides into two parts: 1312 // p1) The memory may not be dereferenceable on entry to the loop. In this 1313 // case, we can't insert the required load in the preheader. 1314 // p2) The memory model does not allow us to insert a store along any dynamic 1315 // path which did not originally have one. 1316 // 1317 // If at least one store is guaranteed to execute, both properties are 1318 // satisfied, and promotion is legal. 1319 // 1320 // This, however, is not a necessary condition. Even if no store/load is 1321 // guaranteed to execute, we can still establish these properties. 1322 // We can establish (p1) by proving that hoisting the load into the preheader 1323 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1324 // can use any access within the alias set to prove dereferenceability, 1325 // since they're all must alias. 1326 // 1327 // There are two ways establish (p2): 1328 // a) Prove the location is thread-local. In this case the memory model 1329 // requirement does not apply, and stores are safe to insert. 1330 // b) Prove a store dominates every exit block. In this case, if an exit 1331 // blocks is reached, the original dynamic path would have taken us through 1332 // the store, so inserting a store into the exit block is safe. Note that this 1333 // is different from the store being guaranteed to execute. For instance, 1334 // if an exception is thrown on the first iteration of the loop, the original 1335 // store is never executed, but the exit blocks are not executed either. 1336 1337 bool DereferenceableInPH = false; 1338 bool SafeToInsertStore = false; 1339 1340 SmallVector<Instruction *, 64> LoopUses; 1341 1342 // We start with an alignment of one and try to find instructions that allow 1343 // us to prove better alignment. 1344 unsigned Alignment = 1; 1345 // Keep track of which types of access we see 1346 bool SawUnorderedAtomic = false; 1347 bool SawNotAtomic = false; 1348 AAMDNodes AATags; 1349 1350 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 1351 1352 bool IsKnownThreadLocalObject = false; 1353 if (SafetyInfo->anyBlockMayThrow()) { 1354 // If a loop can throw, we have to insert a store along each unwind edge. 1355 // That said, we can't actually make the unwind edge explicit. Therefore, 1356 // we have to prove that the store is dead along the unwind edge. We do 1357 // this by proving that the caller can't have a reference to the object 1358 // after return and thus can't possibly load from the object. 1359 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1360 if (!isKnownNonEscaping(Object, TLI)) 1361 return false; 1362 // Subtlety: Alloca's aren't visible to callers, but *are* potentially 1363 // visible to other threads if captured and used during their lifetimes. 1364 IsKnownThreadLocalObject = !isa<AllocaInst>(Object); 1365 } 1366 1367 // Check that all of the pointers in the alias set have the same type. We 1368 // cannot (yet) promote a memory location that is loaded and stored in 1369 // different sizes. While we are at it, collect alignment and AA info. 1370 for (Value *ASIV : PointerMustAliases) { 1371 // Check that all of the pointers in the alias set have the same type. We 1372 // cannot (yet) promote a memory location that is loaded and stored in 1373 // different sizes. 1374 if (SomePtr->getType() != ASIV->getType()) 1375 return false; 1376 1377 for (User *U : ASIV->users()) { 1378 // Ignore instructions that are outside the loop. 1379 Instruction *UI = dyn_cast<Instruction>(U); 1380 if (!UI || !CurLoop->contains(UI)) 1381 continue; 1382 1383 // If there is an non-load/store instruction in the loop, we can't promote 1384 // it. 1385 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 1386 if (!Load->isUnordered()) 1387 return false; 1388 1389 SawUnorderedAtomic |= Load->isAtomic(); 1390 SawNotAtomic |= !Load->isAtomic(); 1391 1392 if (!DereferenceableInPH) 1393 DereferenceableInPH = isSafeToExecuteUnconditionally( 1394 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); 1395 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 1396 // Stores *of* the pointer are not interesting, only stores *to* the 1397 // pointer. 1398 if (UI->getOperand(1) != ASIV) 1399 continue; 1400 if (!Store->isUnordered()) 1401 return false; 1402 1403 SawUnorderedAtomic |= Store->isAtomic(); 1404 SawNotAtomic |= !Store->isAtomic(); 1405 1406 // If the store is guaranteed to execute, both properties are satisfied. 1407 // We may want to check if a store is guaranteed to execute even if we 1408 // already know that promotion is safe, since it may have higher 1409 // alignment than any other guaranteed stores, in which case we can 1410 // raise the alignment on the promoted store. 1411 unsigned InstAlignment = Store->getAlignment(); 1412 if (!InstAlignment) 1413 InstAlignment = 1414 MDL.getABITypeAlignment(Store->getValueOperand()->getType()); 1415 1416 if (!DereferenceableInPH || !SafeToInsertStore || 1417 (InstAlignment > Alignment)) { 1418 if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { 1419 DereferenceableInPH = true; 1420 SafeToInsertStore = true; 1421 Alignment = std::max(Alignment, InstAlignment); 1422 } 1423 } 1424 1425 // If a store dominates all exit blocks, it is safe to sink. 1426 // As explained above, if an exit block was executed, a dominating 1427 // store must have been executed at least once, so we are not 1428 // introducing stores on paths that did not have them. 1429 // Note that this only looks at explicit exit blocks. If we ever 1430 // start sinking stores into unwind edges (see above), this will break. 1431 if (!SafeToInsertStore) 1432 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 1433 return DT->dominates(Store->getParent(), Exit); 1434 }); 1435 1436 // If the store is not guaranteed to execute, we may still get 1437 // deref info through it. 1438 if (!DereferenceableInPH) { 1439 DereferenceableInPH = isDereferenceableAndAlignedPointer( 1440 Store->getPointerOperand(), Store->getAlignment(), MDL, 1441 Preheader->getTerminator(), DT); 1442 } 1443 } else 1444 return false; // Not a load or store. 1445 1446 // Merge the AA tags. 1447 if (LoopUses.empty()) { 1448 // On the first load/store, just take its AA tags. 1449 UI->getAAMetadata(AATags); 1450 } else if (AATags) { 1451 UI->getAAMetadata(AATags, /* Merge = */ true); 1452 } 1453 1454 LoopUses.push_back(UI); 1455 } 1456 } 1457 1458 // If we found both an unordered atomic instruction and a non-atomic memory 1459 // access, bail. We can't blindly promote non-atomic to atomic since we 1460 // might not be able to lower the result. We can't downgrade since that 1461 // would violate memory model. Also, align 0 is an error for atomics. 1462 if (SawUnorderedAtomic && SawNotAtomic) 1463 return false; 1464 1465 // If we couldn't prove we can hoist the load, bail. 1466 if (!DereferenceableInPH) 1467 return false; 1468 1469 // We know we can hoist the load, but don't have a guaranteed store. 1470 // Check whether the location is thread-local. If it is, then we can insert 1471 // stores along paths which originally didn't have them without violating the 1472 // memory model. 1473 if (!SafeToInsertStore) { 1474 if (IsKnownThreadLocalObject) 1475 SafeToInsertStore = true; 1476 else { 1477 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1478 SafeToInsertStore = 1479 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && 1480 !PointerMayBeCaptured(Object, true, true); 1481 } 1482 } 1483 1484 // If we've still failed to prove we can sink the store, give up. 1485 if (!SafeToInsertStore) 1486 return false; 1487 1488 // Otherwise, this is safe to promote, lets do it! 1489 LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr 1490 << '\n'); 1491 ORE->emit([&]() { 1492 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 1493 LoopUses[0]) 1494 << "Moving accesses to memory location out of the loop"; 1495 }); 1496 ++NumPromoted; 1497 1498 // Grab a debug location for the inserted loads/stores; given that the 1499 // inserted loads/stores have little relation to the original loads/stores, 1500 // this code just arbitrarily picks a location from one, since any debug 1501 // location is better than none. 1502 DebugLoc DL = LoopUses[0]->getDebugLoc(); 1503 1504 // We use the SSAUpdater interface to insert phi nodes as required. 1505 SmallVector<PHINode *, 16> NewPHIs; 1506 SSAUpdater SSA(&NewPHIs); 1507 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 1508 InsertPts, PIC, *CurAST, *LI, DL, Alignment, 1509 SawUnorderedAtomic, AATags, *SafetyInfo); 1510 1511 // Set up the preheader to have a definition of the value. It is the live-out 1512 // value from the preheader that uses in the loop will use. 1513 LoadInst *PreheaderLoad = new LoadInst( 1514 SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); 1515 if (SawUnorderedAtomic) 1516 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 1517 PreheaderLoad->setAlignment(Alignment); 1518 PreheaderLoad->setDebugLoc(DL); 1519 if (AATags) 1520 PreheaderLoad->setAAMetadata(AATags); 1521 SSA.AddAvailableValue(Preheader, PreheaderLoad); 1522 1523 // Rewrite all the loads in the loop and remember all the definitions from 1524 // stores in the loop. 1525 Promoter.run(LoopUses); 1526 1527 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 1528 if (PreheaderLoad->use_empty()) 1529 eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST); 1530 1531 return true; 1532 } 1533 1534 /// Returns an owning pointer to an alias set which incorporates aliasing info 1535 /// from L and all subloops of L. 1536 /// FIXME: In new pass manager, there is no helper function to handle loop 1537 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed 1538 /// from scratch for every loop. Hook up with the helper functions when 1539 /// available in the new pass manager to avoid redundant computation. 1540 std::unique_ptr<AliasSetTracker> 1541 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 1542 AliasAnalysis *AA) { 1543 std::unique_ptr<AliasSetTracker> CurAST; 1544 SmallVector<Loop *, 4> RecomputeLoops; 1545 for (Loop *InnerL : L->getSubLoops()) { 1546 auto MapI = LoopToAliasSetMap.find(InnerL); 1547 // If the AST for this inner loop is missing it may have been merged into 1548 // some other loop's AST and then that loop unrolled, and so we need to 1549 // recompute it. 1550 if (MapI == LoopToAliasSetMap.end()) { 1551 RecomputeLoops.push_back(InnerL); 1552 continue; 1553 } 1554 std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second); 1555 1556 if (CurAST) { 1557 // What if InnerLoop was modified by other passes ? 1558 // Once we've incorporated the inner loop's AST into ours, we don't need 1559 // the subloop's anymore. 1560 CurAST->add(*InnerAST); 1561 } else { 1562 CurAST = std::move(InnerAST); 1563 } 1564 LoopToAliasSetMap.erase(MapI); 1565 } 1566 if (!CurAST) 1567 CurAST = make_unique<AliasSetTracker>(*AA); 1568 1569 // Add everything from the sub loops that are no longer directly available. 1570 for (Loop *InnerL : RecomputeLoops) 1571 for (BasicBlock *BB : InnerL->blocks()) 1572 CurAST->add(*BB); 1573 1574 // And merge in this loop (without anything from inner loops). 1575 for (BasicBlock *BB : L->blocks()) 1576 if (LI->getLoopFor(BB) == L) 1577 CurAST->add(*BB); 1578 1579 return CurAST; 1580 } 1581 1582 /// Simple analysis hook. Clone alias set info. 1583 /// 1584 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 1585 Loop *L) { 1586 auto ASTIt = LICM.getLoopToAliasSetMap().find(L); 1587 if (ASTIt == LICM.getLoopToAliasSetMap().end()) 1588 return; 1589 1590 ASTIt->second->copyValue(From, To); 1591 } 1592 1593 /// Simple Analysis hook. Delete value V from alias set 1594 /// 1595 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { 1596 auto ASTIt = LICM.getLoopToAliasSetMap().find(L); 1597 if (ASTIt == LICM.getLoopToAliasSetMap().end()) 1598 return; 1599 1600 ASTIt->second->deleteValue(V); 1601 } 1602 1603 /// Simple Analysis hook. Delete value L from alias set map. 1604 /// 1605 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { 1606 if (!LICM.getLoopToAliasSetMap().count(L)) 1607 return; 1608 1609 LICM.getLoopToAliasSetMap().erase(L); 1610 } 1611 1612 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, 1613 AliasSetTracker *CurAST, Loop *CurLoop, 1614 AliasAnalysis *AA) { 1615 // First check to see if any of the basic blocks in CurLoop invalidate *V. 1616 bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod(); 1617 1618 if (!isInvalidatedAccordingToAST || !LICMN2Theshold) 1619 return isInvalidatedAccordingToAST; 1620 1621 // Check with a diagnostic analysis if we can refine the information above. 1622 // This is to identify the limitations of using the AST. 1623 // The alias set mechanism used by LICM has a major weakness in that it 1624 // combines all things which may alias into a single set *before* asking 1625 // modref questions. As a result, a single readonly call within a loop will 1626 // collapse all loads and stores into a single alias set and report 1627 // invalidation if the loop contains any store. For example, readonly calls 1628 // with deopt states have this form and create a general alias set with all 1629 // loads and stores. In order to get any LICM in loops containing possible 1630 // deopt states we need a more precise invalidation of checking the mod ref 1631 // info of each instruction within the loop and LI. This has a complexity of 1632 // O(N^2), so currently, it is used only as a diagnostic tool since the 1633 // default value of LICMN2Threshold is zero. 1634 1635 // Don't look at nested loops. 1636 if (CurLoop->begin() != CurLoop->end()) 1637 return true; 1638 1639 int N = 0; 1640 for (BasicBlock *BB : CurLoop->getBlocks()) 1641 for (Instruction &I : *BB) { 1642 if (N >= LICMN2Theshold) { 1643 LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for " 1644 << *(MemLoc.Ptr) << "\n"); 1645 return true; 1646 } 1647 N++; 1648 auto Res = AA->getModRefInfo(&I, MemLoc); 1649 if (isModSet(Res)) { 1650 LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for " 1651 << *(MemLoc.Ptr) << "\n"); 1652 return true; 1653 } 1654 } 1655 LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n"); 1656 return false; 1657 } 1658 1659 /// Little predicate that returns true if the specified basic block is in 1660 /// a subloop of the current one, not the current one itself. 1661 /// 1662 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 1663 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 1664 return LI->getLoopFor(BB) != CurLoop; 1665 } 1666