1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion, attempting to remove as much 11 // code from the body of a loop as possible. It does this by either hoisting 12 // code into the preheader block, or by sinking code to the exit blocks if it is 13 // safe. This pass also promotes must-aliased memory locations in the loop to 14 // live in registers, thus hoisting and sinking "invariant" loads and stores. 15 // 16 // This pass uses alias analysis for two purposes: 17 // 18 // 1. Moving loop invariant loads and calls out of loops. If we can determine 19 // that a load or call inside of a loop never aliases anything stored to, 20 // we can hoist it or sink it like any other instruction. 21 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 22 // the loop, we try to move the store to happen AFTER the loop instead of 23 // inside of the loop. This can only happen if a few conditions are true: 24 // A. The pointer stored through is loop invariant 25 // B. There are no stores or loads in the loop which _may_ alias the 26 // pointer. There are no calls in the loop which mod/ref the pointer. 27 // If these conditions are true, we can promote the loads and stores in the 28 // loop of the pointer to use a temporary alloca'd variable. We then use 29 // the SSAUpdater to construct the appropriate SSA form for the value. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Transforms/Scalar/LICM.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/BasicAliasAnalysis.h" 38 #include "llvm/Analysis/CaptureTracking.h" 39 #include "llvm/Analysis/ConstantFolding.h" 40 #include "llvm/Analysis/GlobalsModRef.h" 41 #include "llvm/Analysis/Loads.h" 42 #include "llvm/Analysis/LoopInfo.h" 43 #include "llvm/Analysis/LoopPass.h" 44 #include "llvm/Analysis/MemoryBuiltins.h" 45 #include "llvm/Analysis/MemorySSA.h" 46 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 47 #include "llvm/Analysis/ScalarEvolution.h" 48 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/Utils/Local.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/CFG.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/DerivedTypes.h" 56 #include "llvm/IR/Dominators.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IntrinsicInst.h" 59 #include "llvm/IR/LLVMContext.h" 60 #include "llvm/IR/Metadata.h" 61 #include "llvm/IR/PredIteratorCache.h" 62 #include "llvm/Support/CommandLine.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/Scalar.h" 66 #include "llvm/Transforms/Scalar/LoopPassManager.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 #include "llvm/Transforms/Utils/LoopUtils.h" 69 #include "llvm/Transforms/Utils/SSAUpdater.h" 70 #include <algorithm> 71 #include <utility> 72 using namespace llvm; 73 74 #define DEBUG_TYPE "licm" 75 76 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 77 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 78 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 79 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 80 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 81 82 /// Memory promotion is enabled by default. 83 static cl::opt<bool> 84 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 85 cl::desc("Disable memory promotion in LICM pass")); 86 87 static cl::opt<uint32_t> MaxNumUsesTraversed( 88 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 89 cl::desc("Max num uses visited for identifying load " 90 "invariance in loop using invariant start (default = 8)")); 91 92 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 93 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 94 const LoopSafetyInfo *SafetyInfo, 95 TargetTransformInfo *TTI, bool &FreeInLoop); 96 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 97 const LoopSafetyInfo *SafetyInfo, 98 OptimizationRemarkEmitter *ORE); 99 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 100 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, 101 OptimizationRemarkEmitter *ORE, bool FreeInLoop); 102 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 103 const DominatorTree *DT, 104 const Loop *CurLoop, 105 const LoopSafetyInfo *SafetyInfo, 106 OptimizationRemarkEmitter *ORE, 107 const Instruction *CtxI = nullptr); 108 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 109 const AAMDNodes &AAInfo, 110 AliasSetTracker *CurAST); 111 static Instruction * 112 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 113 const LoopInfo *LI, 114 const LoopSafetyInfo *SafetyInfo); 115 116 namespace { 117 struct LoopInvariantCodeMotion { 118 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 119 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 120 ScalarEvolution *SE, MemorySSA *MSSA, 121 OptimizationRemarkEmitter *ORE, bool DeleteAST); 122 123 DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { 124 return LoopToAliasSetMap; 125 } 126 127 private: 128 DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap; 129 130 AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 131 AliasAnalysis *AA); 132 }; 133 134 struct LegacyLICMPass : public LoopPass { 135 static char ID; // Pass identification, replacement for typeid 136 LegacyLICMPass() : LoopPass(ID) { 137 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 141 if (skipLoop(L)) { 142 // If we have run LICM on a previous loop but now we are skipping 143 // (because we've hit the opt-bisect limit), we need to clear the 144 // loop alias information. 145 for (auto <AS : LICM.getLoopToAliasSetMap()) 146 delete LTAS.second; 147 LICM.getLoopToAliasSetMap().clear(); 148 return false; 149 } 150 151 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 152 MemorySSA *MSSA = EnableMSSALoopDependency 153 ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) 154 : nullptr; 155 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 156 // pass. Function analyses need to be preserved across loop transformations 157 // but ORE cannot be preserved (see comment before the pass definition). 158 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 159 return LICM.runOnLoop(L, 160 &getAnalysis<AAResultsWrapperPass>().getAAResults(), 161 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 162 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 163 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 164 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 165 *L->getHeader()->getParent()), 166 SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); 167 } 168 169 /// This transformation requires natural loop information & requires that 170 /// loop preheaders be inserted into the CFG... 171 /// 172 void getAnalysisUsage(AnalysisUsage &AU) const override { 173 AU.setPreservesCFG(); 174 AU.addRequired<TargetLibraryInfoWrapperPass>(); 175 if (EnableMSSALoopDependency) 176 AU.addRequired<MemorySSAWrapperPass>(); 177 AU.addRequired<TargetTransformInfoWrapperPass>(); 178 getLoopAnalysisUsage(AU); 179 } 180 181 using llvm::Pass::doFinalization; 182 183 bool doFinalization() override { 184 assert(LICM.getLoopToAliasSetMap().empty() && 185 "Didn't free loop alias sets"); 186 return false; 187 } 188 189 private: 190 LoopInvariantCodeMotion LICM; 191 192 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 193 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 194 Loop *L) override; 195 196 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 197 /// set. 198 void deleteAnalysisValue(Value *V, Loop *L) override; 199 200 /// Simple Analysis hook. Delete loop L from alias set map. 201 void deleteAnalysisLoop(Loop *L) override; 202 }; 203 } // namespace 204 205 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 206 LoopStandardAnalysisResults &AR, LPMUpdater &) { 207 const auto &FAM = 208 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 209 Function *F = L.getHeader()->getParent(); 210 211 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); 212 // FIXME: This should probably be optional rather than required. 213 if (!ORE) 214 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " 215 "cached at a higher level"); 216 217 LoopInvariantCodeMotion LICM; 218 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, 219 AR.MSSA, ORE, true)) 220 return PreservedAnalyses::all(); 221 222 auto PA = getLoopPassPreservedAnalyses(); 223 PA.preserveSet<CFGAnalyses>(); 224 return PA; 225 } 226 227 char LegacyLICMPass::ID = 0; 228 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 229 false, false) 230 INITIALIZE_PASS_DEPENDENCY(LoopPass) 231 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 232 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 233 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 234 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 235 false) 236 237 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 238 239 /// Hoist expressions out of the specified loop. Note, alias info for inner 240 /// loop is not preserved so it is not a good idea to run LICM multiple 241 /// times on one loop. 242 /// We should delete AST for inner loops in the new pass manager to avoid 243 /// memory leak. 244 /// 245 bool LoopInvariantCodeMotion::runOnLoop( 246 Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 247 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, 248 MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { 249 bool Changed = false; 250 251 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 252 253 AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); 254 255 // Get the preheader block to move instructions into... 256 BasicBlock *Preheader = L->getLoopPreheader(); 257 258 // Compute loop safety information. 259 LoopSafetyInfo SafetyInfo; 260 computeLoopSafetyInfo(&SafetyInfo, L); 261 262 // We want to visit all of the instructions in this loop... that are not parts 263 // of our subloops (they have already had their invariants hoisted out of 264 // their loop, into this loop, so there is no need to process the BODIES of 265 // the subloops). 266 // 267 // Traverse the body of the loop in depth first order on the dominator tree so 268 // that we are guaranteed to see definitions before we see uses. This allows 269 // us to sink instructions in one pass, without iteration. After sinking 270 // instructions, we perform another pass to hoist them out of the loop. 271 // 272 if (L->hasDedicatedExits()) 273 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, 274 CurAST, &SafetyInfo, ORE); 275 if (Preheader) 276 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 277 CurAST, &SafetyInfo, ORE); 278 279 // Now that all loop invariants have been removed from the loop, promote any 280 // memory references to scalars that we can. 281 // Don't sink stores from loops without dedicated block exits. Exits 282 // containing indirect branches are not transformed by loop simplify, 283 // make sure we catch that. An additional load may be generated in the 284 // preheader for SSA updater, so also avoid sinking when no preheader 285 // is available. 286 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 287 // Figure out the loop exits and their insertion points 288 SmallVector<BasicBlock *, 8> ExitBlocks; 289 L->getUniqueExitBlocks(ExitBlocks); 290 291 // We can't insert into a catchswitch. 292 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 293 return isa<CatchSwitchInst>(Exit->getTerminator()); 294 }); 295 296 if (!HasCatchSwitch) { 297 SmallVector<Instruction *, 8> InsertPts; 298 InsertPts.reserve(ExitBlocks.size()); 299 for (BasicBlock *ExitBlock : ExitBlocks) 300 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 301 302 PredIteratorCache PIC; 303 304 bool Promoted = false; 305 306 // Loop over all of the alias sets in the tracker object. 307 for (AliasSet &AS : *CurAST) { 308 // We can promote this alias set if it has a store, if it is a "Must" 309 // alias set, if the pointer is loop invariant, and if we are not 310 // eliminating any volatile loads or stores. 311 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 312 AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) 313 continue; 314 315 assert( 316 !AS.empty() && 317 "Must alias set should have at least one pointer element in it!"); 318 319 SmallSetVector<Value *, 8> PointerMustAliases; 320 for (const auto &ASI : AS) 321 PointerMustAliases.insert(ASI.getValue()); 322 323 Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, 324 InsertPts, PIC, LI, DT, TLI, L, 325 CurAST, &SafetyInfo, ORE); 326 } 327 328 // Once we have promoted values across the loop body we have to 329 // recursively reform LCSSA as any nested loop may now have values defined 330 // within the loop used in the outer loop. 331 // FIXME: This is really heavy handed. It would be a bit better to use an 332 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 333 // it as it went. 334 if (Promoted) 335 formLCSSARecursively(*L, *DT, LI, SE); 336 337 Changed |= Promoted; 338 } 339 } 340 341 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 342 // specifically moving instructions across the loop boundary and so it is 343 // especially in need of sanity checking here. 344 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 345 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && 346 "Parent loop not left in LCSSA form after LICM!"); 347 348 // If this loop is nested inside of another one, save the alias information 349 // for when we process the outer loop. 350 if (L->getParentLoop() && !DeleteAST) 351 LoopToAliasSetMap[L] = CurAST; 352 else 353 delete CurAST; 354 355 if (Changed && SE) 356 SE->forgetLoopDispositions(L); 357 return Changed; 358 } 359 360 /// Walk the specified region of the CFG (defined by all blocks dominated by 361 /// the specified block, and that are in the current loop) in reverse depth 362 /// first order w.r.t the DominatorTree. This allows us to visit uses before 363 /// definitions, allowing us to sink a loop body in one pass without iteration. 364 /// 365 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 366 DominatorTree *DT, TargetLibraryInfo *TLI, 367 TargetTransformInfo *TTI, Loop *CurLoop, 368 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 369 OptimizationRemarkEmitter *ORE) { 370 371 // Verify inputs. 372 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 373 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 374 "Unexpected input to sinkRegion"); 375 376 // We want to visit children before parents. We will enque all the parents 377 // before their children in the worklist and process the worklist in reverse 378 // order. 379 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 380 381 bool Changed = false; 382 for (DomTreeNode *DTN : reverse(Worklist)) { 383 BasicBlock *BB = DTN->getBlock(); 384 // Only need to process the contents of this block if it is not part of a 385 // subloop (which would already have been processed). 386 if (inSubLoop(BB, CurLoop, LI)) 387 continue; 388 389 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 390 Instruction &I = *--II; 391 392 // If the instruction is dead, we would try to sink it because it isn't 393 // used in the loop, instead, just delete it. 394 if (isInstructionTriviallyDead(&I, TLI)) { 395 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 396 salvageDebugInfo(I); 397 ++II; 398 CurAST->deleteValue(&I); 399 I.eraseFromParent(); 400 Changed = true; 401 continue; 402 } 403 404 // Check to see if we can sink this instruction to the exit blocks 405 // of the loop. We can do this if the all users of the instruction are 406 // outside of the loop. In this case, it doesn't even matter if the 407 // operands of the instruction are loop invariant. 408 // 409 bool FreeInLoop = false; 410 if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && 411 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { 412 if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { 413 if (!FreeInLoop) { 414 ++II; 415 CurAST->deleteValue(&I); 416 I.eraseFromParent(); 417 } 418 Changed = true; 419 } 420 } 421 } 422 } 423 return Changed; 424 } 425 426 /// Walk the specified region of the CFG (defined by all blocks dominated by 427 /// the specified block, and that are in the current loop) in depth first 428 /// order w.r.t the DominatorTree. This allows us to visit definitions before 429 /// uses, allowing us to hoist a loop body in one pass without iteration. 430 /// 431 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 432 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 433 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 434 OptimizationRemarkEmitter *ORE) { 435 // Verify inputs. 436 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 437 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 438 "Unexpected input to hoistRegion"); 439 440 // We want to visit parents before children. We will enque all the parents 441 // before their children in the worklist and process the worklist in order. 442 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 443 444 bool Changed = false; 445 for (DomTreeNode *DTN : Worklist) { 446 BasicBlock *BB = DTN->getBlock(); 447 // Only need to process the contents of this block if it is not part of a 448 // subloop (which would already have been processed). 449 if (!inSubLoop(BB, CurLoop, LI)) 450 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { 451 Instruction &I = *II++; 452 // Try constant folding this instruction. If all the operands are 453 // constants, it is technically hoistable, but it would be better to 454 // just fold it. 455 if (Constant *C = ConstantFoldInstruction( 456 &I, I.getModule()->getDataLayout(), TLI)) { 457 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 458 CurAST->copyValue(&I, C); 459 I.replaceAllUsesWith(C); 460 if (isInstructionTriviallyDead(&I, TLI)) { 461 CurAST->deleteValue(&I); 462 I.eraseFromParent(); 463 } 464 Changed = true; 465 continue; 466 } 467 468 // Attempt to remove floating point division out of the loop by 469 // converting it to a reciprocal multiplication. 470 if (I.getOpcode() == Instruction::FDiv && 471 CurLoop->isLoopInvariant(I.getOperand(1)) && 472 I.hasAllowReciprocal()) { 473 auto Divisor = I.getOperand(1); 474 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 475 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 476 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 477 ReciprocalDivisor->insertBefore(&I); 478 479 auto Product = 480 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 481 Product->setFastMathFlags(I.getFastMathFlags()); 482 Product->insertAfter(&I); 483 I.replaceAllUsesWith(Product); 484 I.eraseFromParent(); 485 486 hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); 487 Changed = true; 488 continue; 489 } 490 491 // Try hoisting the instruction out to the preheader. We can only do 492 // this if all of the operands of the instruction are loop invariant and 493 // if it is safe to hoist the instruction. 494 // 495 if (CurLoop->hasLoopInvariantOperands(&I) && 496 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && 497 isSafeToExecuteUnconditionally( 498 I, DT, CurLoop, SafetyInfo, ORE, 499 CurLoop->getLoopPreheader()->getTerminator())) 500 Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); 501 } 502 } 503 504 return Changed; 505 } 506 507 // Return true if LI is invariant within scope of the loop. LI is invariant if 508 // CurLoop is dominated by an invariant.start representing the same memory 509 // location and size as the memory location LI loads from, and also the 510 // invariant.start has no uses. 511 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 512 Loop *CurLoop) { 513 Value *Addr = LI->getOperand(0); 514 const DataLayout &DL = LI->getModule()->getDataLayout(); 515 const uint32_t LocSizeInBits = DL.getTypeSizeInBits( 516 cast<PointerType>(Addr->getType())->getElementType()); 517 518 // if the type is i8 addrspace(x)*, we know this is the type of 519 // llvm.invariant.start operand 520 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 521 LI->getPointerAddressSpace()); 522 unsigned BitcastsVisited = 0; 523 // Look through bitcasts until we reach the i8* type (this is invariant.start 524 // operand type). 525 while (Addr->getType() != PtrInt8Ty) { 526 auto *BC = dyn_cast<BitCastInst>(Addr); 527 // Avoid traversing high number of bitcast uses. 528 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 529 return false; 530 Addr = BC->getOperand(0); 531 } 532 533 unsigned UsesVisited = 0; 534 // Traverse all uses of the load operand value, to see if invariant.start is 535 // one of the uses, and whether it dominates the load instruction. 536 for (auto *U : Addr->users()) { 537 // Avoid traversing for Load operand with high number of users. 538 if (++UsesVisited > MaxNumUsesTraversed) 539 return false; 540 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 541 // If there are escaping uses of invariant.start instruction, the load maybe 542 // non-invariant. 543 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 544 !II->use_empty()) 545 continue; 546 unsigned InvariantSizeInBits = 547 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; 548 // Confirm the invariant.start location size contains the load operand size 549 // in bits. Also, the invariant.start should dominate the load, and we 550 // should not hoist the load out of a loop that contains this dominating 551 // invariant.start. 552 if (LocSizeInBits <= InvariantSizeInBits && 553 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 554 return true; 555 } 556 557 return false; 558 } 559 560 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 561 Loop *CurLoop, AliasSetTracker *CurAST, 562 LoopSafetyInfo *SafetyInfo, 563 OptimizationRemarkEmitter *ORE) { 564 // SafetyInfo is nullptr if we are checking for sinking from preheader to 565 // loop body. 566 const bool SinkingToLoopBody = !SafetyInfo; 567 // Loads have extra constraints we have to verify before we can hoist them. 568 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 569 if (!LI->isUnordered()) 570 return false; // Don't sink/hoist volatile or ordered atomic loads! 571 572 // Loads from constant memory are always safe to move, even if they end up 573 // in the same alias set as something that ends up being modified. 574 if (AA->pointsToConstantMemory(LI->getOperand(0))) 575 return true; 576 if (LI->getMetadata(LLVMContext::MD_invariant_load)) 577 return true; 578 579 if (LI->isAtomic() && SinkingToLoopBody) 580 return false; // Don't sink unordered atomic loads to loop body. 581 582 // This checks for an invariant.start dominating the load. 583 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 584 return true; 585 586 // Don't hoist loads which have may-aliased stores in loop. 587 uint64_t Size = 0; 588 if (LI->getType()->isSized()) 589 Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); 590 591 AAMDNodes AAInfo; 592 LI->getAAMetadata(AAInfo); 593 594 bool Invalidated = 595 pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); 596 // Check loop-invariant address because this may also be a sinkable load 597 // whose address is not necessarily loop-invariant. 598 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 599 ORE->emit([&]() { 600 return OptimizationRemarkMissed( 601 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 602 << "failed to move load with loop-invariant address " 603 "because the loop may invalidate its value"; 604 }); 605 606 return !Invalidated; 607 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 608 // Don't sink or hoist dbg info; it's legal, but not useful. 609 if (isa<DbgInfoIntrinsic>(I)) 610 return false; 611 612 // Don't sink calls which can throw. 613 if (CI->mayThrow()) 614 return false; 615 616 // Handle simple cases by querying alias analysis. 617 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 618 if (Behavior == FMRB_DoesNotAccessMemory) 619 return true; 620 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 621 // A readonly argmemonly function only reads from memory pointed to by 622 // it's arguments with arbitrary offsets. If we can prove there are no 623 // writes to this memory in the loop, we can hoist or sink. 624 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { 625 for (Value *Op : CI->arg_operands()) 626 if (Op->getType()->isPointerTy() && 627 pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, 628 AAMDNodes(), CurAST)) 629 return false; 630 return true; 631 } 632 // If this call only reads from memory and there are no writes to memory 633 // in the loop, we can hoist or sink the call as appropriate. 634 bool FoundMod = false; 635 for (AliasSet &AS : *CurAST) { 636 if (!AS.isForwardingAliasSet() && AS.isMod()) { 637 FoundMod = true; 638 break; 639 } 640 } 641 if (!FoundMod) 642 return true; 643 } 644 645 // FIXME: This should use mod/ref information to see if we can hoist or 646 // sink the call. 647 648 return false; 649 } 650 651 // Only these instructions are hoistable/sinkable. 652 if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && 653 !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && 654 !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && 655 !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && 656 !isa<InsertValueInst>(I)) 657 return false; 658 659 // If we are checking for sinking from preheader to loop body it will be 660 // always safe as there is no speculative execution. 661 if (SinkingToLoopBody) 662 return true; 663 664 // TODO: Plumb the context instruction through to make hoisting and sinking 665 // more powerful. Hoisting of loads already works due to the special casing 666 // above. 667 return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); 668 } 669 670 /// Returns true if a PHINode is a trivially replaceable with an 671 /// Instruction. 672 /// This is true when all incoming values are that instruction. 673 /// This pattern occurs most often with LCSSA PHI nodes. 674 /// 675 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { 676 for (const Value *IncValue : PN.incoming_values()) 677 if (IncValue != &I) 678 return false; 679 680 return true; 681 } 682 683 /// Return true if the instruction is free in the loop. 684 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, 685 const TargetTransformInfo *TTI) { 686 687 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 688 if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) 689 return false; 690 // For a GEP, we cannot simply use getUserCost because currently it 691 // optimistically assume that a GEP will fold into addressing mode 692 // regardless of its users. 693 const BasicBlock *BB = GEP->getParent(); 694 for (const User *U : GEP->users()) { 695 const Instruction *UI = cast<Instruction>(U); 696 if (CurLoop->contains(UI) && 697 (BB != UI->getParent() || 698 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 699 return false; 700 } 701 return true; 702 } else 703 return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; 704 } 705 706 /// Return true if the only users of this instruction are outside of 707 /// the loop. If this is true, we can sink the instruction to the exit 708 /// blocks of the loop. 709 /// 710 /// We also return true if the instruction could be folded away in lowering. 711 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 712 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 713 const LoopSafetyInfo *SafetyInfo, 714 TargetTransformInfo *TTI, bool &FreeInLoop) { 715 const auto &BlockColors = SafetyInfo->BlockColors; 716 bool IsFree = isFreeInLoop(I, CurLoop, TTI); 717 for (const User *U : I.users()) { 718 const Instruction *UI = cast<Instruction>(U); 719 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 720 const BasicBlock *BB = PN->getParent(); 721 // We cannot sink uses in catchswitches. 722 if (isa<CatchSwitchInst>(BB->getTerminator())) 723 return false; 724 725 // We need to sink a callsite to a unique funclet. Avoid sinking if the 726 // phi use is too muddled. 727 if (isa<CallInst>(I)) 728 if (!BlockColors.empty() && 729 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 730 return false; 731 } 732 733 if (CurLoop->contains(UI)) { 734 if (IsFree) { 735 FreeInLoop = true; 736 continue; 737 } 738 return false; 739 } 740 } 741 return true; 742 } 743 744 static Instruction * 745 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 746 const LoopInfo *LI, 747 const LoopSafetyInfo *SafetyInfo) { 748 Instruction *New; 749 if (auto *CI = dyn_cast<CallInst>(&I)) { 750 const auto &BlockColors = SafetyInfo->BlockColors; 751 752 // Sinking call-sites need to be handled differently from other 753 // instructions. The cloned call-site needs a funclet bundle operand 754 // appropriate for it's location in the CFG. 755 SmallVector<OperandBundleDef, 1> OpBundles; 756 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 757 BundleIdx != BundleEnd; ++BundleIdx) { 758 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 759 if (Bundle.getTagID() == LLVMContext::OB_funclet) 760 continue; 761 762 OpBundles.emplace_back(Bundle); 763 } 764 765 if (!BlockColors.empty()) { 766 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 767 assert(CV.size() == 1 && "non-unique color for exit block!"); 768 BasicBlock *BBColor = CV.front(); 769 Instruction *EHPad = BBColor->getFirstNonPHI(); 770 if (EHPad->isEHPad()) 771 OpBundles.emplace_back("funclet", EHPad); 772 } 773 774 New = CallInst::Create(CI, OpBundles); 775 } else { 776 New = I.clone(); 777 } 778 779 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 780 if (!I.getName().empty()) 781 New->setName(I.getName() + ".le"); 782 783 // Build LCSSA PHI nodes for any in-loop operands. Note that this is 784 // particularly cheap because we can rip off the PHI node that we're 785 // replacing for the number and blocks of the predecessors. 786 // OPT: If this shows up in a profile, we can instead finish sinking all 787 // invariant instructions, and then walk their operands to re-establish 788 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 789 // sinking bottom-up. 790 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; 791 ++OI) 792 if (Instruction *OInst = dyn_cast<Instruction>(*OI)) 793 if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) 794 if (!OLoop->contains(&PN)) { 795 PHINode *OpPN = 796 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 797 OInst->getName() + ".lcssa", &ExitBlock.front()); 798 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 799 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 800 *OI = OpPN; 801 } 802 return New; 803 } 804 805 static Instruction *sinkThroughTriviallyReplacablePHI( 806 PHINode *TPN, Instruction *I, LoopInfo *LI, 807 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 808 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { 809 assert(isTriviallyReplacablePHI(*TPN, *I) && 810 "Expect only trivially replacalbe PHI"); 811 BasicBlock *ExitBlock = TPN->getParent(); 812 Instruction *New; 813 auto It = SunkCopies.find(ExitBlock); 814 if (It != SunkCopies.end()) 815 New = It->second; 816 else 817 New = SunkCopies[ExitBlock] = 818 CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); 819 return New; 820 } 821 822 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { 823 BasicBlock *BB = PN->getParent(); 824 if (!BB->canSplitPredecessors()) 825 return false; 826 // It's not impossible to split EHPad blocks, but if BlockColors already exist 827 // it require updating BlockColors for all offspring blocks accordingly. By 828 // skipping such corner case, we can make updating BlockColors after splitting 829 // predecessor fairly simple. 830 if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad()) 831 return false; 832 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 833 BasicBlock *BBPred = *PI; 834 if (isa<IndirectBrInst>(BBPred->getTerminator())) 835 return false; 836 } 837 return true; 838 } 839 840 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 841 LoopInfo *LI, const Loop *CurLoop, 842 LoopSafetyInfo *SafetyInfo) { 843 #ifndef NDEBUG 844 SmallVector<BasicBlock *, 32> ExitBlocks; 845 CurLoop->getUniqueExitBlocks(ExitBlocks); 846 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 847 ExitBlocks.end()); 848 #endif 849 BasicBlock *ExitBB = PN->getParent(); 850 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 851 852 // Split predecessors of the loop exit to make instructions in the loop are 853 // exposed to exit blocks through trivially replacable PHIs while keeping the 854 // loop in the canonical form where each predecessor of each exit block should 855 // be contained within the loop. For example, this will convert the loop below 856 // from 857 // 858 // LB1: 859 // %v1 = 860 // br %LE, %LB2 861 // LB2: 862 // %v2 = 863 // br %LE, %LB1 864 // LE: 865 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable 866 // 867 // to 868 // 869 // LB1: 870 // %v1 = 871 // br %LE.split, %LB2 872 // LB2: 873 // %v2 = 874 // br %LE.split2, %LB1 875 // LE.split: 876 // %p1 = phi [%v1, %LB1] <-- trivially replacable 877 // br %LE 878 // LE.split2: 879 // %p2 = phi [%v2, %LB2] <-- trivially replacable 880 // br %LE 881 // LE: 882 // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 883 // 884 auto &BlockColors = SafetyInfo->BlockColors; 885 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 886 while (!PredBBs.empty()) { 887 BasicBlock *PredBB = *PredBBs.begin(); 888 assert(CurLoop->contains(PredBB) && 889 "Expect all predecessors are in the loop"); 890 if (PN->getBasicBlockIndex(PredBB) >= 0) { 891 BasicBlock *NewPred = SplitBlockPredecessors( 892 ExitBB, PredBB, ".split.loop.exit", DT, LI, true); 893 // Since we do not allow splitting EH-block with BlockColors in 894 // canSplitPredecessors(), we can simply assign predecessor's color to 895 // the new block. 896 if (!BlockColors.empty()) { 897 // Grab a reference to the ColorVector to be inserted before getting the 898 // reference to the vector we are copying because inserting the new 899 // element in BlockColors might cause the map to be reallocated. 900 ColorVector &ColorsForNewBlock = BlockColors[NewPred]; 901 ColorVector &ColorsForOldBlock = BlockColors[PredBB]; 902 ColorsForNewBlock = ColorsForOldBlock; 903 } 904 } 905 PredBBs.remove(PredBB); 906 } 907 } 908 909 /// When an instruction is found to only be used outside of the loop, this 910 /// function moves it to the exit blocks and patches up SSA form as needed. 911 /// This method is guaranteed to remove the original instruction from its 912 /// position, and may either delete it or move it to outside of the loop. 913 /// 914 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 915 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, 916 OptimizationRemarkEmitter *ORE, bool FreeInLoop) { 917 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 918 ORE->emit([&]() { 919 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 920 << "sinking " << ore::NV("Inst", &I); 921 }); 922 bool Changed = false; 923 if (isa<LoadInst>(I)) 924 ++NumMovedLoads; 925 else if (isa<CallInst>(I)) 926 ++NumMovedCalls; 927 ++NumSunk; 928 929 // Iterate over users to be ready for actual sinking. Replace users via 930 // unrechable blocks with undef and make all user PHIs trivially replcable. 931 SmallPtrSet<Instruction *, 8> VisitedUsers; 932 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 933 auto *User = cast<Instruction>(*UI); 934 Use &U = UI.getUse(); 935 ++UI; 936 937 if (VisitedUsers.count(User) || CurLoop->contains(User)) 938 continue; 939 940 if (!DT->isReachableFromEntry(User->getParent())) { 941 U = UndefValue::get(I.getType()); 942 Changed = true; 943 continue; 944 } 945 946 // The user must be a PHI node. 947 PHINode *PN = cast<PHINode>(User); 948 949 // Surprisingly, instructions can be used outside of loops without any 950 // exits. This can only happen in PHI nodes if the incoming block is 951 // unreachable. 952 BasicBlock *BB = PN->getIncomingBlock(U); 953 if (!DT->isReachableFromEntry(BB)) { 954 U = UndefValue::get(I.getType()); 955 Changed = true; 956 continue; 957 } 958 959 VisitedUsers.insert(PN); 960 if (isTriviallyReplacablePHI(*PN, I)) 961 continue; 962 963 if (!canSplitPredecessors(PN, SafetyInfo)) 964 return Changed; 965 966 // Split predecessors of the PHI so that we can make users trivially 967 // replacable. 968 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); 969 970 // Should rebuild the iterators, as they may be invalidated by 971 // splitPredecessorsOfLoopExit(). 972 UI = I.user_begin(); 973 UE = I.user_end(); 974 } 975 976 if (VisitedUsers.empty()) 977 return Changed; 978 979 #ifndef NDEBUG 980 SmallVector<BasicBlock *, 32> ExitBlocks; 981 CurLoop->getUniqueExitBlocks(ExitBlocks); 982 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 983 ExitBlocks.end()); 984 #endif 985 986 // Clones of this instruction. Don't create more than one per exit block! 987 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 988 989 // If this instruction is only used outside of the loop, then all users are 990 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 991 // the instruction. 992 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 993 for (auto *UI : Users) { 994 auto *User = cast<Instruction>(UI); 995 996 if (CurLoop->contains(User)) 997 continue; 998 999 PHINode *PN = cast<PHINode>(User); 1000 assert(ExitBlockSet.count(PN->getParent()) && 1001 "The LCSSA PHI is not in an exit block!"); 1002 // The PHI must be trivially replacable. 1003 Instruction *New = sinkThroughTriviallyReplacablePHI(PN, &I, LI, SunkCopies, 1004 SafetyInfo, CurLoop); 1005 PN->replaceAllUsesWith(New); 1006 PN->eraseFromParent(); 1007 Changed = true; 1008 } 1009 return Changed; 1010 } 1011 1012 /// When an instruction is found to only use loop invariant operands that 1013 /// is safe to hoist, this instruction is called to do the dirty work. 1014 /// 1015 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1016 const LoopSafetyInfo *SafetyInfo, 1017 OptimizationRemarkEmitter *ORE) { 1018 auto *Preheader = CurLoop->getLoopPreheader(); 1019 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I 1020 << "\n"); 1021 ORE->emit([&]() { 1022 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 1023 << ore::NV("Inst", &I); 1024 }); 1025 1026 // Metadata can be dependent on conditions we are hoisting above. 1027 // Conservatively strip all metadata on the instruction unless we were 1028 // guaranteed to execute I if we entered the loop, in which case the metadata 1029 // is valid in the loop preheader. 1030 if (I.hasMetadataOtherThanDebugLoc() && 1031 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 1032 // time in isGuaranteedToExecute if we don't actually have anything to 1033 // drop. It is a compile time optimization, not required for correctness. 1034 !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) 1035 I.dropUnknownNonDebugMetadata(); 1036 1037 // Move the new node to the Preheader, before its terminator. 1038 I.moveBefore(Preheader->getTerminator()); 1039 1040 // Do not retain debug locations when we are moving instructions to different 1041 // basic blocks, because we want to avoid jumpy line tables. Calls, however, 1042 // need to retain their debug locs because they may be inlined. 1043 // FIXME: How do we retain source locations without causing poor debugging 1044 // behavior? 1045 if (!isa<CallInst>(I)) 1046 I.setDebugLoc(DebugLoc()); 1047 1048 if (isa<LoadInst>(I)) 1049 ++NumMovedLoads; 1050 else if (isa<CallInst>(I)) 1051 ++NumMovedCalls; 1052 ++NumHoisted; 1053 return true; 1054 } 1055 1056 /// Only sink or hoist an instruction if it is not a trapping instruction, 1057 /// or if the instruction is known not to trap when moved to the preheader. 1058 /// or if it is a trapping instruction and is guaranteed to execute. 1059 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 1060 const DominatorTree *DT, 1061 const Loop *CurLoop, 1062 const LoopSafetyInfo *SafetyInfo, 1063 OptimizationRemarkEmitter *ORE, 1064 const Instruction *CtxI) { 1065 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) 1066 return true; 1067 1068 bool GuaranteedToExecute = 1069 isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); 1070 1071 if (!GuaranteedToExecute) { 1072 auto *LI = dyn_cast<LoadInst>(&Inst); 1073 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1074 ORE->emit([&]() { 1075 return OptimizationRemarkMissed( 1076 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 1077 << "failed to hoist load with loop-invariant address " 1078 "because load is conditionally executed"; 1079 }); 1080 } 1081 1082 return GuaranteedToExecute; 1083 } 1084 1085 namespace { 1086 class LoopPromoter : public LoadAndStorePromoter { 1087 Value *SomePtr; // Designated pointer to store to. 1088 const SmallSetVector<Value *, 8> &PointerMustAliases; 1089 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 1090 SmallVectorImpl<Instruction *> &LoopInsertPts; 1091 PredIteratorCache &PredCache; 1092 AliasSetTracker &AST; 1093 LoopInfo &LI; 1094 DebugLoc DL; 1095 int Alignment; 1096 bool UnorderedAtomic; 1097 AAMDNodes AATags; 1098 1099 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1100 if (Instruction *I = dyn_cast<Instruction>(V)) 1101 if (Loop *L = LI.getLoopFor(I->getParent())) 1102 if (!L->contains(BB)) { 1103 // We need to create an LCSSA PHI node for the incoming value and 1104 // store that. 1105 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 1106 I->getName() + ".lcssa", &BB->front()); 1107 for (BasicBlock *Pred : PredCache.get(BB)) 1108 PN->addIncoming(I, Pred); 1109 return PN; 1110 } 1111 return V; 1112 } 1113 1114 public: 1115 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 1116 const SmallSetVector<Value *, 8> &PMA, 1117 SmallVectorImpl<BasicBlock *> &LEB, 1118 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, 1119 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, 1120 bool UnorderedAtomic, const AAMDNodes &AATags) 1121 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 1122 LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), 1123 LI(li), DL(std::move(dl)), Alignment(alignment), 1124 UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} 1125 1126 bool isInstInList(Instruction *I, 1127 const SmallVectorImpl<Instruction *> &) const override { 1128 Value *Ptr; 1129 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1130 Ptr = LI->getOperand(0); 1131 else 1132 Ptr = cast<StoreInst>(I)->getPointerOperand(); 1133 return PointerMustAliases.count(Ptr); 1134 } 1135 1136 void doExtraRewritesBeforeFinalDeletion() const override { 1137 // Insert stores after in the loop exit blocks. Each exit block gets a 1138 // store of the live-out values that feed them. Since we've already told 1139 // the SSA updater about the defs in the loop and the preheader 1140 // definition, it is all set and we can start using it. 1141 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 1142 BasicBlock *ExitBlock = LoopExitBlocks[i]; 1143 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 1144 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 1145 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 1146 Instruction *InsertPos = LoopInsertPts[i]; 1147 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 1148 if (UnorderedAtomic) 1149 NewSI->setOrdering(AtomicOrdering::Unordered); 1150 NewSI->setAlignment(Alignment); 1151 NewSI->setDebugLoc(DL); 1152 if (AATags) 1153 NewSI->setAAMetadata(AATags); 1154 } 1155 } 1156 1157 void replaceLoadWithValue(LoadInst *LI, Value *V) const override { 1158 // Update alias analysis. 1159 AST.copyValue(LI, V); 1160 } 1161 void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } 1162 }; 1163 1164 1165 /// Return true iff we can prove that a caller of this function can not inspect 1166 /// the contents of the provided object in a well defined program. 1167 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { 1168 if (isa<AllocaInst>(Object)) 1169 // Since the alloca goes out of scope, we know the caller can't retain a 1170 // reference to it and be well defined. Thus, we don't need to check for 1171 // capture. 1172 return true; 1173 1174 // For all other objects we need to know that the caller can't possibly 1175 // have gotten a reference to the object. There are two components of 1176 // that: 1177 // 1) Object can't be escaped by this function. This is what 1178 // PointerMayBeCaptured checks. 1179 // 2) Object can't have been captured at definition site. For this, we 1180 // need to know the return value is noalias. At the moment, we use a 1181 // weaker condition and handle only AllocLikeFunctions (which are 1182 // known to be noalias). TODO 1183 return isAllocLikeFn(Object, TLI) && 1184 !PointerMayBeCaptured(Object, true, true); 1185 } 1186 1187 } // namespace 1188 1189 /// Try to promote memory values to scalars by sinking stores out of the 1190 /// loop and moving loads to before the loop. We do this by looping over 1191 /// the stores in the loop, looking for stores to Must pointers which are 1192 /// loop invariant. 1193 /// 1194 bool llvm::promoteLoopAccessesToScalars( 1195 const SmallSetVector<Value *, 8> &PointerMustAliases, 1196 SmallVectorImpl<BasicBlock *> &ExitBlocks, 1197 SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, 1198 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1199 Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 1200 OptimizationRemarkEmitter *ORE) { 1201 // Verify inputs. 1202 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1203 CurAST != nullptr && SafetyInfo != nullptr && 1204 "Unexpected Input to promoteLoopAccessesToScalars"); 1205 1206 Value *SomePtr = *PointerMustAliases.begin(); 1207 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1208 1209 // It is not safe to promote a load/store from the loop if the load/store is 1210 // conditional. For example, turning: 1211 // 1212 // for () { if (c) *P += 1; } 1213 // 1214 // into: 1215 // 1216 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1217 // 1218 // is not safe, because *P may only be valid to access if 'c' is true. 1219 // 1220 // The safety property divides into two parts: 1221 // p1) The memory may not be dereferenceable on entry to the loop. In this 1222 // case, we can't insert the required load in the preheader. 1223 // p2) The memory model does not allow us to insert a store along any dynamic 1224 // path which did not originally have one. 1225 // 1226 // If at least one store is guaranteed to execute, both properties are 1227 // satisfied, and promotion is legal. 1228 // 1229 // This, however, is not a necessary condition. Even if no store/load is 1230 // guaranteed to execute, we can still establish these properties. 1231 // We can establish (p1) by proving that hoisting the load into the preheader 1232 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1233 // can use any access within the alias set to prove dereferenceability, 1234 // since they're all must alias. 1235 // 1236 // There are two ways establish (p2): 1237 // a) Prove the location is thread-local. In this case the memory model 1238 // requirement does not apply, and stores are safe to insert. 1239 // b) Prove a store dominates every exit block. In this case, if an exit 1240 // blocks is reached, the original dynamic path would have taken us through 1241 // the store, so inserting a store into the exit block is safe. Note that this 1242 // is different from the store being guaranteed to execute. For instance, 1243 // if an exception is thrown on the first iteration of the loop, the original 1244 // store is never executed, but the exit blocks are not executed either. 1245 1246 bool DereferenceableInPH = false; 1247 bool SafeToInsertStore = false; 1248 1249 SmallVector<Instruction *, 64> LoopUses; 1250 1251 // We start with an alignment of one and try to find instructions that allow 1252 // us to prove better alignment. 1253 unsigned Alignment = 1; 1254 // Keep track of which types of access we see 1255 bool SawUnorderedAtomic = false; 1256 bool SawNotAtomic = false; 1257 AAMDNodes AATags; 1258 1259 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 1260 1261 bool IsKnownThreadLocalObject = false; 1262 if (SafetyInfo->MayThrow) { 1263 // If a loop can throw, we have to insert a store along each unwind edge. 1264 // That said, we can't actually make the unwind edge explicit. Therefore, 1265 // we have to prove that the store is dead along the unwind edge. We do 1266 // this by proving that the caller can't have a reference to the object 1267 // after return and thus can't possibly load from the object. 1268 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1269 if (!isKnownNonEscaping(Object, TLI)) 1270 return false; 1271 // Subtlety: Alloca's aren't visible to callers, but *are* potentially 1272 // visible to other threads if captured and used during their lifetimes. 1273 IsKnownThreadLocalObject = !isa<AllocaInst>(Object); 1274 } 1275 1276 // Check that all of the pointers in the alias set have the same type. We 1277 // cannot (yet) promote a memory location that is loaded and stored in 1278 // different sizes. While we are at it, collect alignment and AA info. 1279 for (Value *ASIV : PointerMustAliases) { 1280 // Check that all of the pointers in the alias set have the same type. We 1281 // cannot (yet) promote a memory location that is loaded and stored in 1282 // different sizes. 1283 if (SomePtr->getType() != ASIV->getType()) 1284 return false; 1285 1286 for (User *U : ASIV->users()) { 1287 // Ignore instructions that are outside the loop. 1288 Instruction *UI = dyn_cast<Instruction>(U); 1289 if (!UI || !CurLoop->contains(UI)) 1290 continue; 1291 1292 // If there is an non-load/store instruction in the loop, we can't promote 1293 // it. 1294 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 1295 assert(!Load->isVolatile() && "AST broken"); 1296 if (!Load->isUnordered()) 1297 return false; 1298 1299 SawUnorderedAtomic |= Load->isAtomic(); 1300 SawNotAtomic |= !Load->isAtomic(); 1301 1302 if (!DereferenceableInPH) 1303 DereferenceableInPH = isSafeToExecuteUnconditionally( 1304 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); 1305 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 1306 // Stores *of* the pointer are not interesting, only stores *to* the 1307 // pointer. 1308 if (UI->getOperand(1) != ASIV) 1309 continue; 1310 assert(!Store->isVolatile() && "AST broken"); 1311 if (!Store->isUnordered()) 1312 return false; 1313 1314 SawUnorderedAtomic |= Store->isAtomic(); 1315 SawNotAtomic |= !Store->isAtomic(); 1316 1317 // If the store is guaranteed to execute, both properties are satisfied. 1318 // We may want to check if a store is guaranteed to execute even if we 1319 // already know that promotion is safe, since it may have higher 1320 // alignment than any other guaranteed stores, in which case we can 1321 // raise the alignment on the promoted store. 1322 unsigned InstAlignment = Store->getAlignment(); 1323 if (!InstAlignment) 1324 InstAlignment = 1325 MDL.getABITypeAlignment(Store->getValueOperand()->getType()); 1326 1327 if (!DereferenceableInPH || !SafeToInsertStore || 1328 (InstAlignment > Alignment)) { 1329 if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { 1330 DereferenceableInPH = true; 1331 SafeToInsertStore = true; 1332 Alignment = std::max(Alignment, InstAlignment); 1333 } 1334 } 1335 1336 // If a store dominates all exit blocks, it is safe to sink. 1337 // As explained above, if an exit block was executed, a dominating 1338 // store must have been executed at least once, so we are not 1339 // introducing stores on paths that did not have them. 1340 // Note that this only looks at explicit exit blocks. If we ever 1341 // start sinking stores into unwind edges (see above), this will break. 1342 if (!SafeToInsertStore) 1343 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 1344 return DT->dominates(Store->getParent(), Exit); 1345 }); 1346 1347 // If the store is not guaranteed to execute, we may still get 1348 // deref info through it. 1349 if (!DereferenceableInPH) { 1350 DereferenceableInPH = isDereferenceableAndAlignedPointer( 1351 Store->getPointerOperand(), Store->getAlignment(), MDL, 1352 Preheader->getTerminator(), DT); 1353 } 1354 } else 1355 return false; // Not a load or store. 1356 1357 // Merge the AA tags. 1358 if (LoopUses.empty()) { 1359 // On the first load/store, just take its AA tags. 1360 UI->getAAMetadata(AATags); 1361 } else if (AATags) { 1362 UI->getAAMetadata(AATags, /* Merge = */ true); 1363 } 1364 1365 LoopUses.push_back(UI); 1366 } 1367 } 1368 1369 // If we found both an unordered atomic instruction and a non-atomic memory 1370 // access, bail. We can't blindly promote non-atomic to atomic since we 1371 // might not be able to lower the result. We can't downgrade since that 1372 // would violate memory model. Also, align 0 is an error for atomics. 1373 if (SawUnorderedAtomic && SawNotAtomic) 1374 return false; 1375 1376 // If we couldn't prove we can hoist the load, bail. 1377 if (!DereferenceableInPH) 1378 return false; 1379 1380 // We know we can hoist the load, but don't have a guaranteed store. 1381 // Check whether the location is thread-local. If it is, then we can insert 1382 // stores along paths which originally didn't have them without violating the 1383 // memory model. 1384 if (!SafeToInsertStore) { 1385 if (IsKnownThreadLocalObject) 1386 SafeToInsertStore = true; 1387 else { 1388 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1389 SafeToInsertStore = 1390 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && 1391 !PointerMayBeCaptured(Object, true, true); 1392 } 1393 } 1394 1395 // If we've still failed to prove we can sink the store, give up. 1396 if (!SafeToInsertStore) 1397 return false; 1398 1399 // Otherwise, this is safe to promote, lets do it! 1400 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr 1401 << '\n'); 1402 ORE->emit([&]() { 1403 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 1404 LoopUses[0]) 1405 << "Moving accesses to memory location out of the loop"; 1406 }); 1407 ++NumPromoted; 1408 1409 // Grab a debug location for the inserted loads/stores; given that the 1410 // inserted loads/stores have little relation to the original loads/stores, 1411 // this code just arbitrarily picks a location from one, since any debug 1412 // location is better than none. 1413 DebugLoc DL = LoopUses[0]->getDebugLoc(); 1414 1415 // We use the SSAUpdater interface to insert phi nodes as required. 1416 SmallVector<PHINode *, 16> NewPHIs; 1417 SSAUpdater SSA(&NewPHIs); 1418 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 1419 InsertPts, PIC, *CurAST, *LI, DL, Alignment, 1420 SawUnorderedAtomic, AATags); 1421 1422 // Set up the preheader to have a definition of the value. It is the live-out 1423 // value from the preheader that uses in the loop will use. 1424 LoadInst *PreheaderLoad = new LoadInst( 1425 SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); 1426 if (SawUnorderedAtomic) 1427 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 1428 PreheaderLoad->setAlignment(Alignment); 1429 PreheaderLoad->setDebugLoc(DL); 1430 if (AATags) 1431 PreheaderLoad->setAAMetadata(AATags); 1432 SSA.AddAvailableValue(Preheader, PreheaderLoad); 1433 1434 // Rewrite all the loads in the loop and remember all the definitions from 1435 // stores in the loop. 1436 Promoter.run(LoopUses); 1437 1438 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 1439 if (PreheaderLoad->use_empty()) 1440 PreheaderLoad->eraseFromParent(); 1441 1442 return true; 1443 } 1444 1445 /// Returns an owning pointer to an alias set which incorporates aliasing info 1446 /// from L and all subloops of L. 1447 /// FIXME: In new pass manager, there is no helper function to handle loop 1448 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed 1449 /// from scratch for every loop. Hook up with the helper functions when 1450 /// available in the new pass manager to avoid redundant computation. 1451 AliasSetTracker * 1452 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 1453 AliasAnalysis *AA) { 1454 AliasSetTracker *CurAST = nullptr; 1455 SmallVector<Loop *, 4> RecomputeLoops; 1456 for (Loop *InnerL : L->getSubLoops()) { 1457 auto MapI = LoopToAliasSetMap.find(InnerL); 1458 // If the AST for this inner loop is missing it may have been merged into 1459 // some other loop's AST and then that loop unrolled, and so we need to 1460 // recompute it. 1461 if (MapI == LoopToAliasSetMap.end()) { 1462 RecomputeLoops.push_back(InnerL); 1463 continue; 1464 } 1465 AliasSetTracker *InnerAST = MapI->second; 1466 1467 if (CurAST != nullptr) { 1468 // What if InnerLoop was modified by other passes ? 1469 CurAST->add(*InnerAST); 1470 1471 // Once we've incorporated the inner loop's AST into ours, we don't need 1472 // the subloop's anymore. 1473 delete InnerAST; 1474 } else { 1475 CurAST = InnerAST; 1476 } 1477 LoopToAliasSetMap.erase(MapI); 1478 } 1479 if (CurAST == nullptr) 1480 CurAST = new AliasSetTracker(*AA); 1481 1482 auto mergeLoop = [&](Loop *L) { 1483 // Loop over the body of this loop, looking for calls, invokes, and stores. 1484 for (BasicBlock *BB : L->blocks()) 1485 CurAST->add(*BB); // Incorporate the specified basic block 1486 }; 1487 1488 // Add everything from the sub loops that are no longer directly available. 1489 for (Loop *InnerL : RecomputeLoops) 1490 mergeLoop(InnerL); 1491 1492 // And merge in this loop. 1493 mergeLoop(L); 1494 1495 return CurAST; 1496 } 1497 1498 /// Simple analysis hook. Clone alias set info. 1499 /// 1500 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 1501 Loop *L) { 1502 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1503 if (!AST) 1504 return; 1505 1506 AST->copyValue(From, To); 1507 } 1508 1509 /// Simple Analysis hook. Delete value V from alias set 1510 /// 1511 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { 1512 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1513 if (!AST) 1514 return; 1515 1516 AST->deleteValue(V); 1517 } 1518 1519 /// Simple Analysis hook. Delete value L from alias set map. 1520 /// 1521 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { 1522 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1523 if (!AST) 1524 return; 1525 1526 delete AST; 1527 LICM.getLoopToAliasSetMap().erase(L); 1528 } 1529 1530 /// Return true if the body of this loop may store into the memory 1531 /// location pointed to by V. 1532 /// 1533 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 1534 const AAMDNodes &AAInfo, 1535 AliasSetTracker *CurAST) { 1536 // Check to see if any of the basic blocks in CurLoop invalidate *V. 1537 return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); 1538 } 1539 1540 /// Little predicate that returns true if the specified basic block is in 1541 /// a subloop of the current one, not the current one itself. 1542 /// 1543 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 1544 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 1545 return LI->getLoopFor(BB) != CurLoop; 1546 } 1547