1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs loop invariant code motion, attempting to remove as much 10 // code from the body of a loop as possible. It does this by either hoisting 11 // code into the preheader block, or by sinking code to the exit blocks if it is 12 // safe. This pass also promotes must-aliased memory locations in the loop to 13 // live in registers, thus hoisting and sinking "invariant" loads and stores. 14 // 15 // Hoisting operations out of loops is a canonicalization transform. It 16 // enables and simplifies subsequent optimizations in the middle-end. 17 // Rematerialization of hoisted instructions to reduce register pressure is the 18 // responsibility of the back-end, which has more accurate information about 19 // register pressure and also handles other optimizations than LICM that 20 // increase live-ranges. 21 // 22 // This pass uses alias analysis for two purposes: 23 // 24 // 1. Moving loop invariant loads and calls out of loops. If we can determine 25 // that a load or call inside of a loop never aliases anything stored to, 26 // we can hoist it or sink it like any other instruction. 27 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 28 // the loop, we try to move the store to happen AFTER the loop instead of 29 // inside of the loop. This can only happen if a few conditions are true: 30 // A. The pointer stored through is loop invariant 31 // B. There are no stores or loads in the loop which _may_ alias the 32 // pointer. There are no calls in the loop which mod/ref the pointer. 33 // If these conditions are true, we can promote the loads and stores in the 34 // loop of the pointer to use a temporary alloca'd variable. We then use 35 // the SSAUpdater to construct the appropriate SSA form for the value. 36 // 37 //===----------------------------------------------------------------------===// 38 39 #include "llvm/Transforms/Scalar/LICM.h" 40 #include "llvm/ADT/SetOperations.h" 41 #include "llvm/ADT/Statistic.h" 42 #include "llvm/Analysis/AliasAnalysis.h" 43 #include "llvm/Analysis/AliasSetTracker.h" 44 #include "llvm/Analysis/BasicAliasAnalysis.h" 45 #include "llvm/Analysis/BlockFrequencyInfo.h" 46 #include "llvm/Analysis/CaptureTracking.h" 47 #include "llvm/Analysis/ConstantFolding.h" 48 #include "llvm/Analysis/GlobalsModRef.h" 49 #include "llvm/Analysis/GuardUtils.h" 50 #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 51 #include "llvm/Analysis/Loads.h" 52 #include "llvm/Analysis/LoopInfo.h" 53 #include "llvm/Analysis/LoopIterator.h" 54 #include "llvm/Analysis/LoopPass.h" 55 #include "llvm/Analysis/MemoryBuiltins.h" 56 #include "llvm/Analysis/MemorySSA.h" 57 #include "llvm/Analysis/MemorySSAUpdater.h" 58 #include "llvm/Analysis/MustExecute.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 62 #include "llvm/Analysis/TargetLibraryInfo.h" 63 #include "llvm/Analysis/TargetTransformInfo.h" 64 #include "llvm/Analysis/ValueTracking.h" 65 #include "llvm/IR/CFG.h" 66 #include "llvm/IR/Constants.h" 67 #include "llvm/IR/DataLayout.h" 68 #include "llvm/IR/DebugInfoMetadata.h" 69 #include "llvm/IR/DerivedTypes.h" 70 #include "llvm/IR/Dominators.h" 71 #include "llvm/IR/Instructions.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/IR/LLVMContext.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/PatternMatch.h" 76 #include "llvm/IR/PredIteratorCache.h" 77 #include "llvm/InitializePasses.h" 78 #include "llvm/Support/CommandLine.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Transforms/Scalar.h" 82 #include "llvm/Transforms/Scalar/LoopPassManager.h" 83 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 84 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 85 #include "llvm/Transforms/Utils/Local.h" 86 #include "llvm/Transforms/Utils/LoopUtils.h" 87 #include "llvm/Transforms/Utils/SSAUpdater.h" 88 #include <algorithm> 89 #include <utility> 90 using namespace llvm; 91 92 #define DEBUG_TYPE "licm" 93 94 STATISTIC(NumCreatedBlocks, "Number of blocks created"); 95 STATISTIC(NumClonedBranches, "Number of branches cloned"); 96 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 97 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 98 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 99 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 100 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 101 102 /// Memory promotion is enabled by default. 103 static cl::opt<bool> 104 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 105 cl::desc("Disable memory promotion in LICM pass")); 106 107 static cl::opt<bool> ControlFlowHoisting( 108 "licm-control-flow-hoisting", cl::Hidden, cl::init(false), 109 cl::desc("Enable control flow (and PHI) hoisting in LICM")); 110 111 static cl::opt<uint32_t> MaxNumUsesTraversed( 112 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 113 cl::desc("Max num uses visited for identifying load " 114 "invariance in loop using invariant start (default = 8)")); 115 116 // Experimental option to allow imprecision in LICM in pathological cases, in 117 // exchange for faster compile. This is to be removed if MemorySSA starts to 118 // address the same issue. This flag applies only when LICM uses MemorySSA 119 // instead on AliasSetTracker. LICM calls MemorySSAWalker's 120 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect 121 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, 122 // which may not be precise, since optimizeUses is capped. The result is 123 // correct, but we may not get as "far up" as possible to get which access is 124 // clobbering the one queried. 125 cl::opt<unsigned> llvm::SetLicmMssaOptCap( 126 "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, 127 cl::desc("Enable imprecision in LICM in pathological cases, in exchange " 128 "for faster compile. Caps the MemorySSA clobbering calls.")); 129 130 // Experimentally, memory promotion carries less importance than sinking and 131 // hoisting. Limit when we do promotion when using MemorySSA, in order to save 132 // compile time. 133 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( 134 "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, 135 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " 136 "effect. When MSSA in LICM is enabled, then this is the maximum " 137 "number of accesses allowed to be present in a loop in order to " 138 "enable memory promotion.")); 139 140 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 141 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 142 const LoopSafetyInfo *SafetyInfo, 143 TargetTransformInfo *TTI, bool &FreeInLoop, 144 bool LoopNestMode); 145 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 146 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, 147 MemorySSAUpdater *MSSAU, ScalarEvolution *SE, 148 OptimizationRemarkEmitter *ORE); 149 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 150 BlockFrequencyInfo *BFI, const Loop *CurLoop, 151 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, 152 OptimizationRemarkEmitter *ORE); 153 static bool isSafeToExecuteUnconditionally( 154 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, 155 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 156 OptimizationRemarkEmitter *ORE, const Instruction *CtxI, 157 bool AllowSpeculation); 158 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, 159 AliasSetTracker *CurAST, Loop *CurLoop, 160 AAResults *AA); 161 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, 162 Loop *CurLoop, Instruction &I, 163 SinkAndHoistLICMFlags &Flags); 164 static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, 165 MemoryUse &MU); 166 static Instruction *cloneInstructionInExitBlock( 167 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, 168 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); 169 170 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 171 MemorySSAUpdater *MSSAU); 172 173 static void moveInstructionBefore(Instruction &I, Instruction &Dest, 174 ICFLoopSafetyInfo &SafetyInfo, 175 MemorySSAUpdater *MSSAU, ScalarEvolution *SE); 176 177 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, 178 function_ref<void(Instruction *)> Fn); 179 static SmallVector<SmallSetVector<Value *, 8>, 0> 180 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L); 181 182 namespace { 183 struct LoopInvariantCodeMotion { 184 bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, 185 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, 186 TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, 187 OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); 188 189 LoopInvariantCodeMotion(unsigned LicmMssaOptCap, 190 unsigned LicmMssaNoAccForPromotionCap, 191 bool LicmAllowSpeculation) 192 : LicmMssaOptCap(LicmMssaOptCap), 193 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), 194 LicmAllowSpeculation(LicmAllowSpeculation) {} 195 196 private: 197 unsigned LicmMssaOptCap; 198 unsigned LicmMssaNoAccForPromotionCap; 199 bool LicmAllowSpeculation; 200 }; 201 202 struct LegacyLICMPass : public LoopPass { 203 static char ID; // Pass identification, replacement for typeid 204 LegacyLICMPass( 205 unsigned LicmMssaOptCap = SetLicmMssaOptCap, 206 unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, 207 bool LicmAllowSpeculation = true) 208 : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 209 LicmAllowSpeculation) { 210 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 211 } 212 213 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 214 if (skipLoop(L)) 215 return false; 216 217 LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block " 218 << L->getHeader()->getNameOrAsOperand() << "\n"); 219 220 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 221 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 222 bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); 223 BlockFrequencyInfo *BFI = 224 hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() 225 : nullptr; 226 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 227 // pass. Function analyses need to be preserved across loop transformations 228 // but ORE cannot be preserved (see comment before the pass definition). 229 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 230 return LICM.runOnLoop( 231 L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), 232 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 233 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI, 234 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( 235 *L->getHeader()->getParent()), 236 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 237 *L->getHeader()->getParent()), 238 SE ? &SE->getSE() : nullptr, MSSA, &ORE); 239 } 240 241 /// This transformation requires natural loop information & requires that 242 /// loop preheaders be inserted into the CFG... 243 /// 244 void getAnalysisUsage(AnalysisUsage &AU) const override { 245 AU.addPreserved<DominatorTreeWrapperPass>(); 246 AU.addPreserved<LoopInfoWrapperPass>(); 247 AU.addRequired<TargetLibraryInfoWrapperPass>(); 248 AU.addRequired<MemorySSAWrapperPass>(); 249 AU.addPreserved<MemorySSAWrapperPass>(); 250 AU.addRequired<TargetTransformInfoWrapperPass>(); 251 getLoopAnalysisUsage(AU); 252 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); 253 AU.addPreserved<LazyBlockFrequencyInfoPass>(); 254 AU.addPreserved<LazyBranchProbabilityInfoPass>(); 255 } 256 257 private: 258 LoopInvariantCodeMotion LICM; 259 }; 260 } // namespace 261 262 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 263 LoopStandardAnalysisResults &AR, LPMUpdater &) { 264 if (!AR.MSSA) 265 report_fatal_error("LICM requires MemorySSA (loop-mssa)"); 266 267 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis 268 // pass. Function analyses need to be preserved across loop transformations 269 // but ORE cannot be preserved (see comment before the pass definition). 270 OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); 271 272 LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 273 LicmAllowSpeculation); 274 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, 275 &AR.SE, AR.MSSA, &ORE)) 276 return PreservedAnalyses::all(); 277 278 auto PA = getLoopPassPreservedAnalyses(); 279 280 PA.preserve<DominatorTreeAnalysis>(); 281 PA.preserve<LoopAnalysis>(); 282 PA.preserve<MemorySSAAnalysis>(); 283 284 return PA; 285 } 286 287 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, 288 LoopStandardAnalysisResults &AR, 289 LPMUpdater &) { 290 if (!AR.MSSA) 291 report_fatal_error("LNICM requires MemorySSA (loop-mssa)"); 292 293 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis 294 // pass. Function analyses need to be preserved across loop transformations 295 // but ORE cannot be preserved (see comment before the pass definition). 296 OptimizationRemarkEmitter ORE(LN.getParent()); 297 298 LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 299 LicmAllowSpeculation); 300 301 Loop &OutermostLoop = LN.getOutermostLoop(); 302 bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, 303 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true); 304 305 if (!Changed) 306 return PreservedAnalyses::all(); 307 308 auto PA = getLoopPassPreservedAnalyses(); 309 310 PA.preserve<DominatorTreeAnalysis>(); 311 PA.preserve<LoopAnalysis>(); 312 PA.preserve<MemorySSAAnalysis>(); 313 314 return PA; 315 } 316 317 char LegacyLICMPass::ID = 0; 318 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 319 false, false) 320 INITIALIZE_PASS_DEPENDENCY(LoopPass) 321 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 322 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 323 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 324 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) 325 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 326 false) 327 328 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 329 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, 330 unsigned LicmMssaNoAccForPromotionCap, 331 bool LicmAllowSpeculation) { 332 return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 333 LicmAllowSpeculation); 334 } 335 336 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, 337 MemorySSA *MSSA) 338 : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, 339 IsSink, L, MSSA) {} 340 341 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( 342 unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, 343 Loop *L, MemorySSA *MSSA) 344 : LicmMssaOptCap(LicmMssaOptCap), 345 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), 346 IsSink(IsSink) { 347 assert(((L != nullptr) == (MSSA != nullptr)) && 348 "Unexpected values for SinkAndHoistLICMFlags"); 349 if (!MSSA) 350 return; 351 352 unsigned AccessCapCount = 0; 353 for (auto *BB : L->getBlocks()) 354 if (const auto *Accesses = MSSA->getBlockAccesses(BB)) 355 for (const auto &MA : *Accesses) { 356 (void)MA; 357 ++AccessCapCount; 358 if (AccessCapCount > LicmMssaNoAccForPromotionCap) { 359 NoOfMemAccTooLarge = true; 360 return; 361 } 362 } 363 } 364 365 /// Hoist expressions out of the specified loop. Note, alias info for inner 366 /// loop is not preserved so it is not a good idea to run LICM multiple 367 /// times on one loop. 368 bool LoopInvariantCodeMotion::runOnLoop( 369 Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, 370 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 371 ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, 372 bool LoopNestMode) { 373 bool Changed = false; 374 375 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 376 377 // If this loop has metadata indicating that LICM is not to be performed then 378 // just exit. 379 if (hasDisableLICMTransformsHint(L)) { 380 return false; 381 } 382 383 // Don't sink stores from loops with coroutine suspend instructions. 384 // LICM would sink instructions into the default destination of 385 // the coroutine switch. The default destination of the switch is to 386 // handle the case where the coroutine is suspended, by which point the 387 // coroutine frame may have been destroyed. No instruction can be sunk there. 388 // FIXME: This would unfortunately hurt the performance of coroutines, however 389 // there is currently no general solution for this. Similar issues could also 390 // potentially happen in other passes where instructions are being moved 391 // across that edge. 392 bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { 393 return llvm::any_of(*BB, [](Instruction &I) { 394 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); 395 return II && II->getIntrinsicID() == Intrinsic::coro_suspend; 396 }); 397 }); 398 399 MemorySSAUpdater MSSAU(MSSA); 400 SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 401 /*IsSink=*/true, L, MSSA); 402 403 // Get the preheader block to move instructions into... 404 BasicBlock *Preheader = L->getLoopPreheader(); 405 406 // Compute loop safety information. 407 ICFLoopSafetyInfo SafetyInfo; 408 SafetyInfo.computeLoopSafetyInfo(L); 409 410 // We want to visit all of the instructions in this loop... that are not parts 411 // of our subloops (they have already had their invariants hoisted out of 412 // their loop, into this loop, so there is no need to process the BODIES of 413 // the subloops). 414 // 415 // Traverse the body of the loop in depth first order on the dominator tree so 416 // that we are guaranteed to see definitions before we see uses. This allows 417 // us to sink instructions in one pass, without iteration. After sinking 418 // instructions, we perform another pass to hoist them out of the loop. 419 if (L->hasDedicatedExits()) 420 Changed |= LoopNestMode 421 ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, 422 DT, BFI, TLI, TTI, L, &MSSAU, 423 &SafetyInfo, Flags, ORE) 424 : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, 425 TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE); 426 Flags.setIsSink(false); 427 if (Preheader) 428 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, 429 &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, 430 LicmAllowSpeculation); 431 432 // Now that all loop invariants have been removed from the loop, promote any 433 // memory references to scalars that we can. 434 // Don't sink stores from loops without dedicated block exits. Exits 435 // containing indirect branches are not transformed by loop simplify, 436 // make sure we catch that. An additional load may be generated in the 437 // preheader for SSA updater, so also avoid sinking when no preheader 438 // is available. 439 if (!DisablePromotion && Preheader && L->hasDedicatedExits() && 440 !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) { 441 // Figure out the loop exits and their insertion points 442 SmallVector<BasicBlock *, 8> ExitBlocks; 443 L->getUniqueExitBlocks(ExitBlocks); 444 445 // We can't insert into a catchswitch. 446 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 447 return isa<CatchSwitchInst>(Exit->getTerminator()); 448 }); 449 450 if (!HasCatchSwitch) { 451 SmallVector<Instruction *, 8> InsertPts; 452 SmallVector<MemoryAccess *, 8> MSSAInsertPts; 453 InsertPts.reserve(ExitBlocks.size()); 454 MSSAInsertPts.reserve(ExitBlocks.size()); 455 for (BasicBlock *ExitBlock : ExitBlocks) { 456 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 457 MSSAInsertPts.push_back(nullptr); 458 } 459 460 PredIteratorCache PIC; 461 462 // Promoting one set of accesses may make the pointers for another set 463 // loop invariant, so run this in a loop (with the MaybePromotable set 464 // decreasing in size over time). 465 bool Promoted = false; 466 bool LocalPromoted; 467 do { 468 LocalPromoted = false; 469 for (const SmallSetVector<Value *, 8> &PointerMustAliases : 470 collectPromotionCandidates(MSSA, AA, L)) { 471 LocalPromoted |= promoteLoopAccessesToScalars( 472 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, 473 DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); 474 } 475 Promoted |= LocalPromoted; 476 } while (LocalPromoted); 477 478 // Once we have promoted values across the loop body we have to 479 // recursively reform LCSSA as any nested loop may now have values defined 480 // within the loop used in the outer loop. 481 // FIXME: This is really heavy handed. It would be a bit better to use an 482 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 483 // it as it went. 484 if (Promoted) 485 formLCSSARecursively(*L, *DT, LI, SE); 486 487 Changed |= Promoted; 488 } 489 } 490 491 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 492 // specifically moving instructions across the loop boundary and so it is 493 // especially in need of basic functional correctness checking here. 494 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 495 assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && 496 "Parent loop not left in LCSSA form after LICM!"); 497 498 if (VerifyMemorySSA) 499 MSSA->verifyMemorySSA(); 500 501 if (Changed && SE) 502 SE->forgetLoopDispositions(L); 503 return Changed; 504 } 505 506 /// Walk the specified region of the CFG (defined by all blocks dominated by 507 /// the specified block, and that are in the current loop) in reverse depth 508 /// first order w.r.t the DominatorTree. This allows us to visit uses before 509 /// definitions, allowing us to sink a loop body in one pass without iteration. 510 /// 511 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, 512 DominatorTree *DT, BlockFrequencyInfo *BFI, 513 TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 514 Loop *CurLoop, MemorySSAUpdater *MSSAU, 515 ICFLoopSafetyInfo *SafetyInfo, 516 SinkAndHoistLICMFlags &Flags, 517 OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { 518 519 // Verify inputs. 520 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 521 CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && 522 "Unexpected input to sinkRegion."); 523 524 // We want to visit children before parents. We will enque all the parents 525 // before their children in the worklist and process the worklist in reverse 526 // order. 527 SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); 528 529 bool Changed = false; 530 for (DomTreeNode *DTN : reverse(Worklist)) { 531 BasicBlock *BB = DTN->getBlock(); 532 // Only need to process the contents of this block if it is not part of a 533 // subloop (which would already have been processed). 534 if (inSubLoop(BB, CurLoop, LI)) 535 continue; 536 537 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 538 Instruction &I = *--II; 539 540 // The instruction is not used in the loop if it is dead. In this case, 541 // we just delete it instead of sinking it. 542 if (isInstructionTriviallyDead(&I, TLI)) { 543 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 544 salvageKnowledge(&I); 545 salvageDebugInfo(I); 546 ++II; 547 eraseInstruction(I, *SafetyInfo, MSSAU); 548 Changed = true; 549 continue; 550 } 551 552 // Check to see if we can sink this instruction to the exit blocks 553 // of the loop. We can do this if the all users of the instruction are 554 // outside of the loop. In this case, it doesn't even matter if the 555 // operands of the instruction are loop invariant. 556 // 557 bool FreeInLoop = false; 558 bool LoopNestMode = OutermostLoop != nullptr; 559 if (!I.mayHaveSideEffects() && 560 isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, 561 SafetyInfo, TTI, FreeInLoop, LoopNestMode) && 562 canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true, 563 &Flags, ORE)) { 564 if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { 565 if (!FreeInLoop) { 566 ++II; 567 salvageDebugInfo(I); 568 eraseInstruction(I, *SafetyInfo, MSSAU); 569 } 570 Changed = true; 571 } 572 } 573 } 574 } 575 if (VerifyMemorySSA) 576 MSSAU->getMemorySSA()->verifyMemorySSA(); 577 return Changed; 578 } 579 580 bool llvm::sinkRegionForLoopNest( 581 DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, 582 BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, 583 Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, 584 SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { 585 586 bool Changed = false; 587 SmallPriorityWorklist<Loop *, 4> Worklist; 588 Worklist.insert(CurLoop); 589 appendLoopsToWorklist(*CurLoop, Worklist); 590 while (!Worklist.empty()) { 591 Loop *L = Worklist.pop_back_val(); 592 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, 593 TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop); 594 } 595 return Changed; 596 } 597 598 namespace { 599 // This is a helper class for hoistRegion to make it able to hoist control flow 600 // in order to be able to hoist phis. The way this works is that we initially 601 // start hoisting to the loop preheader, and when we see a loop invariant branch 602 // we make note of this. When we then come to hoist an instruction that's 603 // conditional on such a branch we duplicate the branch and the relevant control 604 // flow, then hoist the instruction into the block corresponding to its original 605 // block in the duplicated control flow. 606 class ControlFlowHoister { 607 private: 608 // Information about the loop we are hoisting from 609 LoopInfo *LI; 610 DominatorTree *DT; 611 Loop *CurLoop; 612 MemorySSAUpdater *MSSAU; 613 614 // A map of blocks in the loop to the block their instructions will be hoisted 615 // to. 616 DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap; 617 618 // The branches that we can hoist, mapped to the block that marks a 619 // convergence point of their control flow. 620 DenseMap<BranchInst *, BasicBlock *> HoistableBranches; 621 622 public: 623 ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, 624 MemorySSAUpdater *MSSAU) 625 : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} 626 627 void registerPossiblyHoistableBranch(BranchInst *BI) { 628 // We can only hoist conditional branches with loop invariant operands. 629 if (!ControlFlowHoisting || !BI->isConditional() || 630 !CurLoop->hasLoopInvariantOperands(BI)) 631 return; 632 633 // The branch destinations need to be in the loop, and we don't gain 634 // anything by duplicating conditional branches with duplicate successors, 635 // as it's essentially the same as an unconditional branch. 636 BasicBlock *TrueDest = BI->getSuccessor(0); 637 BasicBlock *FalseDest = BI->getSuccessor(1); 638 if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || 639 TrueDest == FalseDest) 640 return; 641 642 // We can hoist BI if one branch destination is the successor of the other, 643 // or both have common successor which we check by seeing if the 644 // intersection of their successors is non-empty. 645 // TODO: This could be expanded to allowing branches where both ends 646 // eventually converge to a single block. 647 SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc; 648 TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); 649 FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); 650 BasicBlock *CommonSucc = nullptr; 651 if (TrueDestSucc.count(FalseDest)) { 652 CommonSucc = FalseDest; 653 } else if (FalseDestSucc.count(TrueDest)) { 654 CommonSucc = TrueDest; 655 } else { 656 set_intersect(TrueDestSucc, FalseDestSucc); 657 // If there's one common successor use that. 658 if (TrueDestSucc.size() == 1) 659 CommonSucc = *TrueDestSucc.begin(); 660 // If there's more than one pick whichever appears first in the block list 661 // (we can't use the value returned by TrueDestSucc.begin() as it's 662 // unpredicatable which element gets returned). 663 else if (!TrueDestSucc.empty()) { 664 Function *F = TrueDest->getParent(); 665 auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; 666 auto It = llvm::find_if(*F, IsSucc); 667 assert(It != F->end() && "Could not find successor in function"); 668 CommonSucc = &*It; 669 } 670 } 671 // The common successor has to be dominated by the branch, as otherwise 672 // there will be some other path to the successor that will not be 673 // controlled by this branch so any phi we hoist would be controlled by the 674 // wrong condition. This also takes care of avoiding hoisting of loop back 675 // edges. 676 // TODO: In some cases this could be relaxed if the successor is dominated 677 // by another block that's been hoisted and we can guarantee that the 678 // control flow has been replicated exactly. 679 if (CommonSucc && DT->dominates(BI, CommonSucc)) 680 HoistableBranches[BI] = CommonSucc; 681 } 682 683 bool canHoistPHI(PHINode *PN) { 684 // The phi must have loop invariant operands. 685 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) 686 return false; 687 // We can hoist phis if the block they are in is the target of hoistable 688 // branches which cover all of the predecessors of the block. 689 SmallPtrSet<BasicBlock *, 8> PredecessorBlocks; 690 BasicBlock *BB = PN->getParent(); 691 for (BasicBlock *PredBB : predecessors(BB)) 692 PredecessorBlocks.insert(PredBB); 693 // If we have less predecessor blocks than predecessors then the phi will 694 // have more than one incoming value for the same block which we can't 695 // handle. 696 // TODO: This could be handled be erasing some of the duplicate incoming 697 // values. 698 if (PredecessorBlocks.size() != pred_size(BB)) 699 return false; 700 for (auto &Pair : HoistableBranches) { 701 if (Pair.second == BB) { 702 // Which blocks are predecessors via this branch depends on if the 703 // branch is triangle-like or diamond-like. 704 if (Pair.first->getSuccessor(0) == BB) { 705 PredecessorBlocks.erase(Pair.first->getParent()); 706 PredecessorBlocks.erase(Pair.first->getSuccessor(1)); 707 } else if (Pair.first->getSuccessor(1) == BB) { 708 PredecessorBlocks.erase(Pair.first->getParent()); 709 PredecessorBlocks.erase(Pair.first->getSuccessor(0)); 710 } else { 711 PredecessorBlocks.erase(Pair.first->getSuccessor(0)); 712 PredecessorBlocks.erase(Pair.first->getSuccessor(1)); 713 } 714 } 715 } 716 // PredecessorBlocks will now be empty if for every predecessor of BB we 717 // found a hoistable branch source. 718 return PredecessorBlocks.empty(); 719 } 720 721 BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { 722 if (!ControlFlowHoisting) 723 return CurLoop->getLoopPreheader(); 724 // If BB has already been hoisted, return that 725 if (HoistDestinationMap.count(BB)) 726 return HoistDestinationMap[BB]; 727 728 // Check if this block is conditional based on a pending branch 729 auto HasBBAsSuccessor = 730 [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) { 731 return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || 732 Pair.first->getSuccessor(1) == BB); 733 }; 734 auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor); 735 736 // If not involved in a pending branch, hoist to preheader 737 BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); 738 if (It == HoistableBranches.end()) { 739 LLVM_DEBUG(dbgs() << "LICM using " 740 << InitialPreheader->getNameOrAsOperand() 741 << " as hoist destination for " 742 << BB->getNameOrAsOperand() << "\n"); 743 HoistDestinationMap[BB] = InitialPreheader; 744 return InitialPreheader; 745 } 746 BranchInst *BI = It->first; 747 assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == 748 HoistableBranches.end() && 749 "BB is expected to be the target of at most one branch"); 750 751 LLVMContext &C = BB->getContext(); 752 BasicBlock *TrueDest = BI->getSuccessor(0); 753 BasicBlock *FalseDest = BI->getSuccessor(1); 754 BasicBlock *CommonSucc = HoistableBranches[BI]; 755 BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); 756 757 // Create hoisted versions of blocks that currently don't have them 758 auto CreateHoistedBlock = [&](BasicBlock *Orig) { 759 if (HoistDestinationMap.count(Orig)) 760 return HoistDestinationMap[Orig]; 761 BasicBlock *New = 762 BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); 763 HoistDestinationMap[Orig] = New; 764 DT->addNewBlock(New, HoistTarget); 765 if (CurLoop->getParentLoop()) 766 CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); 767 ++NumCreatedBlocks; 768 LLVM_DEBUG(dbgs() << "LICM created " << New->getName() 769 << " as hoist destination for " << Orig->getName() 770 << "\n"); 771 return New; 772 }; 773 BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); 774 BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); 775 BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); 776 777 // Link up these blocks with branches. 778 if (!HoistCommonSucc->getTerminator()) { 779 // The new common successor we've generated will branch to whatever that 780 // hoist target branched to. 781 BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); 782 assert(TargetSucc && "Expected hoist target to have a single successor"); 783 HoistCommonSucc->moveBefore(TargetSucc); 784 BranchInst::Create(TargetSucc, HoistCommonSucc); 785 } 786 if (!HoistTrueDest->getTerminator()) { 787 HoistTrueDest->moveBefore(HoistCommonSucc); 788 BranchInst::Create(HoistCommonSucc, HoistTrueDest); 789 } 790 if (!HoistFalseDest->getTerminator()) { 791 HoistFalseDest->moveBefore(HoistCommonSucc); 792 BranchInst::Create(HoistCommonSucc, HoistFalseDest); 793 } 794 795 // If BI is being cloned to what was originally the preheader then 796 // HoistCommonSucc will now be the new preheader. 797 if (HoistTarget == InitialPreheader) { 798 // Phis in the loop header now need to use the new preheader. 799 InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); 800 MSSAU->wireOldPredecessorsToNewImmediatePredecessor( 801 HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); 802 // The new preheader dominates the loop header. 803 DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); 804 DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); 805 DT->changeImmediateDominator(HeaderNode, PreheaderNode); 806 // The preheader hoist destination is now the new preheader, with the 807 // exception of the hoist destination of this branch. 808 for (auto &Pair : HoistDestinationMap) 809 if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) 810 Pair.second = HoistCommonSucc; 811 } 812 813 // Now finally clone BI. 814 ReplaceInstWithInst( 815 HoistTarget->getTerminator(), 816 BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); 817 ++NumClonedBranches; 818 819 assert(CurLoop->getLoopPreheader() && 820 "Hoisting blocks should not have destroyed preheader"); 821 return HoistDestinationMap[BB]; 822 } 823 }; 824 } // namespace 825 826 /// Walk the specified region of the CFG (defined by all blocks dominated by 827 /// the specified block, and that are in the current loop) in depth first 828 /// order w.r.t the DominatorTree. This allows us to visit definitions before 829 /// uses, allowing us to hoist a loop body in one pass without iteration. 830 /// 831 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, 832 DominatorTree *DT, BlockFrequencyInfo *BFI, 833 TargetLibraryInfo *TLI, Loop *CurLoop, 834 MemorySSAUpdater *MSSAU, ScalarEvolution *SE, 835 ICFLoopSafetyInfo *SafetyInfo, 836 SinkAndHoistLICMFlags &Flags, 837 OptimizationRemarkEmitter *ORE, bool LoopNestMode, 838 bool AllowSpeculation) { 839 // Verify inputs. 840 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 841 CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && 842 "Unexpected input to hoistRegion."); 843 844 ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); 845 846 // Keep track of instructions that have been hoisted, as they may need to be 847 // re-hoisted if they end up not dominating all of their uses. 848 SmallVector<Instruction *, 16> HoistedInstructions; 849 850 // For PHI hoisting to work we need to hoist blocks before their successors. 851 // We can do this by iterating through the blocks in the loop in reverse 852 // post-order. 853 LoopBlocksRPO Worklist(CurLoop); 854 Worklist.perform(LI); 855 bool Changed = false; 856 for (BasicBlock *BB : Worklist) { 857 // Only need to process the contents of this block if it is not part of a 858 // subloop (which would already have been processed). 859 if (!LoopNestMode && inSubLoop(BB, CurLoop, LI)) 860 continue; 861 862 for (Instruction &I : llvm::make_early_inc_range(*BB)) { 863 // Try constant folding this instruction. If all the operands are 864 // constants, it is technically hoistable, but it would be better to 865 // just fold it. 866 if (Constant *C = ConstantFoldInstruction( 867 &I, I.getModule()->getDataLayout(), TLI)) { 868 LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C 869 << '\n'); 870 // FIXME MSSA: Such replacements may make accesses unoptimized (D51960). 871 I.replaceAllUsesWith(C); 872 if (isInstructionTriviallyDead(&I, TLI)) 873 eraseInstruction(I, *SafetyInfo, MSSAU); 874 Changed = true; 875 continue; 876 } 877 878 // Try hoisting the instruction out to the preheader. We can only do 879 // this if all of the operands of the instruction are loop invariant and 880 // if it is safe to hoist the instruction. We also check block frequency 881 // to make sure instruction only gets hoisted into colder blocks. 882 // TODO: It may be safe to hoist if we are hoisting to a conditional block 883 // and we have accurately duplicated the control flow from the loop header 884 // to that block. 885 if (CurLoop->hasLoopInvariantOperands(&I) && 886 canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU, 887 true, &Flags, ORE) && 888 isSafeToExecuteUnconditionally( 889 I, DT, TLI, CurLoop, SafetyInfo, ORE, 890 CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) { 891 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 892 MSSAU, SE, ORE); 893 HoistedInstructions.push_back(&I); 894 Changed = true; 895 continue; 896 } 897 898 // Attempt to remove floating point division out of the loop by 899 // converting it to a reciprocal multiplication. 900 if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() && 901 CurLoop->isLoopInvariant(I.getOperand(1))) { 902 auto Divisor = I.getOperand(1); 903 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 904 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 905 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 906 SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent()); 907 ReciprocalDivisor->insertBefore(&I); 908 909 auto Product = 910 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); 911 Product->setFastMathFlags(I.getFastMathFlags()); 912 SafetyInfo->insertInstructionTo(Product, I.getParent()); 913 Product->insertAfter(&I); 914 I.replaceAllUsesWith(Product); 915 eraseInstruction(I, *SafetyInfo, MSSAU); 916 917 hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), 918 SafetyInfo, MSSAU, SE, ORE); 919 HoistedInstructions.push_back(ReciprocalDivisor); 920 Changed = true; 921 continue; 922 } 923 924 auto IsInvariantStart = [&](Instruction &I) { 925 using namespace PatternMatch; 926 return I.use_empty() && 927 match(&I, m_Intrinsic<Intrinsic::invariant_start>()); 928 }; 929 auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { 930 return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && 931 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); 932 }; 933 if ((IsInvariantStart(I) || isGuard(&I)) && 934 CurLoop->hasLoopInvariantOperands(&I) && 935 MustExecuteWithoutWritesBefore(I)) { 936 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 937 MSSAU, SE, ORE); 938 HoistedInstructions.push_back(&I); 939 Changed = true; 940 continue; 941 } 942 943 if (PHINode *PN = dyn_cast<PHINode>(&I)) { 944 if (CFH.canHoistPHI(PN)) { 945 // Redirect incoming blocks first to ensure that we create hoisted 946 // versions of those blocks before we hoist the phi. 947 for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) 948 PN->setIncomingBlock( 949 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); 950 hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, 951 MSSAU, SE, ORE); 952 assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); 953 Changed = true; 954 continue; 955 } 956 } 957 958 // Remember possibly hoistable branches so we can actually hoist them 959 // later if needed. 960 if (BranchInst *BI = dyn_cast<BranchInst>(&I)) 961 CFH.registerPossiblyHoistableBranch(BI); 962 } 963 } 964 965 // If we hoisted instructions to a conditional block they may not dominate 966 // their uses that weren't hoisted (such as phis where some operands are not 967 // loop invariant). If so make them unconditional by moving them to their 968 // immediate dominator. We iterate through the instructions in reverse order 969 // which ensures that when we rehoist an instruction we rehoist its operands, 970 // and also keep track of where in the block we are rehoisting to to make sure 971 // that we rehoist instructions before the instructions that use them. 972 Instruction *HoistPoint = nullptr; 973 if (ControlFlowHoisting) { 974 for (Instruction *I : reverse(HoistedInstructions)) { 975 if (!llvm::all_of(I->uses(), 976 [&](Use &U) { return DT->dominates(I, U); })) { 977 BasicBlock *Dominator = 978 DT->getNode(I->getParent())->getIDom()->getBlock(); 979 if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) { 980 if (HoistPoint) 981 assert(DT->dominates(Dominator, HoistPoint->getParent()) && 982 "New hoist point expected to dominate old hoist point"); 983 HoistPoint = Dominator->getTerminator(); 984 } 985 LLVM_DEBUG(dbgs() << "LICM rehoisting to " 986 << HoistPoint->getParent()->getNameOrAsOperand() 987 << ": " << *I << "\n"); 988 moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); 989 HoistPoint = I; 990 Changed = true; 991 } 992 } 993 } 994 if (VerifyMemorySSA) 995 MSSAU->getMemorySSA()->verifyMemorySSA(); 996 997 // Now that we've finished hoisting make sure that LI and DT are still 998 // valid. 999 #ifdef EXPENSIVE_CHECKS 1000 if (Changed) { 1001 assert(DT->verify(DominatorTree::VerificationLevel::Fast) && 1002 "Dominator tree verification failed"); 1003 LI->verify(*DT); 1004 } 1005 #endif 1006 1007 return Changed; 1008 } 1009 1010 // Return true if LI is invariant within scope of the loop. LI is invariant if 1011 // CurLoop is dominated by an invariant.start representing the same memory 1012 // location and size as the memory location LI loads from, and also the 1013 // invariant.start has no uses. 1014 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 1015 Loop *CurLoop) { 1016 Value *Addr = LI->getOperand(0); 1017 const DataLayout &DL = LI->getModule()->getDataLayout(); 1018 const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); 1019 1020 // It is not currently possible for clang to generate an invariant.start 1021 // intrinsic with scalable vector types because we don't support thread local 1022 // sizeless types and we don't permit sizeless types in structs or classes. 1023 // Furthermore, even if support is added for this in future the intrinsic 1024 // itself is defined to have a size of -1 for variable sized objects. This 1025 // makes it impossible to verify if the intrinsic envelops our region of 1026 // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> 1027 // types would have a -1 parameter, but the former is clearly double the size 1028 // of the latter. 1029 if (LocSizeInBits.isScalable()) 1030 return false; 1031 1032 // if the type is i8 addrspace(x)*, we know this is the type of 1033 // llvm.invariant.start operand 1034 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 1035 LI->getPointerAddressSpace()); 1036 unsigned BitcastsVisited = 0; 1037 // Look through bitcasts until we reach the i8* type (this is invariant.start 1038 // operand type). 1039 while (Addr->getType() != PtrInt8Ty) { 1040 auto *BC = dyn_cast<BitCastInst>(Addr); 1041 // Avoid traversing high number of bitcast uses. 1042 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 1043 return false; 1044 Addr = BC->getOperand(0); 1045 } 1046 // If we've ended up at a global/constant, bail. We shouldn't be looking at 1047 // uselists for non-local Values in a loop pass. 1048 if (isa<Constant>(Addr)) 1049 return false; 1050 1051 unsigned UsesVisited = 0; 1052 // Traverse all uses of the load operand value, to see if invariant.start is 1053 // one of the uses, and whether it dominates the load instruction. 1054 for (auto *U : Addr->users()) { 1055 // Avoid traversing for Load operand with high number of users. 1056 if (++UsesVisited > MaxNumUsesTraversed) 1057 return false; 1058 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 1059 // If there are escaping uses of invariant.start instruction, the load maybe 1060 // non-invariant. 1061 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 1062 !II->use_empty()) 1063 continue; 1064 ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); 1065 // The intrinsic supports having a -1 argument for variable sized objects 1066 // so we should check for that here. 1067 if (InvariantSize->isNegative()) 1068 continue; 1069 uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; 1070 // Confirm the invariant.start location size contains the load operand size 1071 // in bits. Also, the invariant.start should dominate the load, and we 1072 // should not hoist the load out of a loop that contains this dominating 1073 // invariant.start. 1074 if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && 1075 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 1076 return true; 1077 } 1078 1079 return false; 1080 } 1081 1082 namespace { 1083 /// Return true if-and-only-if we know how to (mechanically) both hoist and 1084 /// sink a given instruction out of a loop. Does not address legality 1085 /// concerns such as aliasing or speculation safety. 1086 bool isHoistableAndSinkableInst(Instruction &I) { 1087 // Only these instructions are hoistable/sinkable. 1088 return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || 1089 isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || 1090 isa<BinaryOperator>(I) || isa<SelectInst>(I) || 1091 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 1092 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 1093 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 1094 isa<InsertValueInst>(I) || isa<FreezeInst>(I)); 1095 } 1096 /// Return true if all of the alias sets within this AST are known not to 1097 /// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop. 1098 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, 1099 const Loop *L) { 1100 if (CurAST) { 1101 for (AliasSet &AS : *CurAST) { 1102 if (!AS.isForwardingAliasSet() && AS.isMod()) { 1103 return false; 1104 } 1105 } 1106 return true; 1107 } else { /*MSSAU*/ 1108 for (auto *BB : L->getBlocks()) 1109 if (MSSAU->getMemorySSA()->getBlockDefs(BB)) 1110 return false; 1111 return true; 1112 } 1113 } 1114 1115 /// Return true if I is the only Instruction with a MemoryAccess in L. 1116 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, 1117 const MemorySSAUpdater *MSSAU) { 1118 for (auto *BB : L->getBlocks()) 1119 if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { 1120 int NotAPhi = 0; 1121 for (const auto &Acc : *Accs) { 1122 if (isa<MemoryPhi>(&Acc)) 1123 continue; 1124 const auto *MUD = cast<MemoryUseOrDef>(&Acc); 1125 if (MUD->getMemoryInst() != I || NotAPhi++ == 1) 1126 return false; 1127 } 1128 } 1129 return true; 1130 } 1131 } 1132 1133 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 1134 Loop *CurLoop, AliasSetTracker *CurAST, 1135 MemorySSAUpdater *MSSAU, 1136 bool TargetExecutesOncePerLoop, 1137 SinkAndHoistLICMFlags *Flags, 1138 OptimizationRemarkEmitter *ORE) { 1139 assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && 1140 "Either AliasSetTracker or MemorySSA should be initialized."); 1141 1142 // If we don't understand the instruction, bail early. 1143 if (!isHoistableAndSinkableInst(I)) 1144 return false; 1145 1146 MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; 1147 if (MSSA) 1148 assert(Flags != nullptr && "Flags cannot be null."); 1149 1150 // Loads have extra constraints we have to verify before we can hoist them. 1151 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 1152 if (!LI->isUnordered()) 1153 return false; // Don't sink/hoist volatile or ordered atomic loads! 1154 1155 // Loads from constant memory are always safe to move, even if they end up 1156 // in the same alias set as something that ends up being modified. 1157 if (AA->pointsToConstantMemory(LI->getOperand(0))) 1158 return true; 1159 if (LI->hasMetadata(LLVMContext::MD_invariant_load)) 1160 return true; 1161 1162 if (LI->isAtomic() && !TargetExecutesOncePerLoop) 1163 return false; // Don't risk duplicating unordered loads 1164 1165 // This checks for an invariant.start dominating the load. 1166 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 1167 return true; 1168 1169 bool Invalidated; 1170 if (CurAST) 1171 Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, 1172 CurLoop, AA); 1173 else 1174 Invalidated = pointerInvalidatedByLoopWithMSSA( 1175 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); 1176 // Check loop-invariant address because this may also be a sinkable load 1177 // whose address is not necessarily loop-invariant. 1178 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1179 ORE->emit([&]() { 1180 return OptimizationRemarkMissed( 1181 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 1182 << "failed to move load with loop-invariant address " 1183 "because the loop may invalidate its value"; 1184 }); 1185 1186 return !Invalidated; 1187 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 1188 // Don't sink or hoist dbg info; it's legal, but not useful. 1189 if (isa<DbgInfoIntrinsic>(I)) 1190 return false; 1191 1192 // Don't sink calls which can throw. 1193 if (CI->mayThrow()) 1194 return false; 1195 1196 // Convergent attribute has been used on operations that involve 1197 // inter-thread communication which results are implicitly affected by the 1198 // enclosing control flows. It is not safe to hoist or sink such operations 1199 // across control flow. 1200 if (CI->isConvergent()) 1201 return false; 1202 1203 using namespace PatternMatch; 1204 if (match(CI, m_Intrinsic<Intrinsic::assume>())) 1205 // Assumes don't actually alias anything or throw 1206 return true; 1207 1208 if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) 1209 // Widenable conditions don't actually alias anything or throw 1210 return true; 1211 1212 // Handle simple cases by querying alias analysis. 1213 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 1214 if (Behavior == FMRB_DoesNotAccessMemory) 1215 return true; 1216 if (AAResults::onlyReadsMemory(Behavior)) { 1217 // A readonly argmemonly function only reads from memory pointed to by 1218 // it's arguments with arbitrary offsets. If we can prove there are no 1219 // writes to this memory in the loop, we can hoist or sink. 1220 if (AAResults::onlyAccessesArgPointees(Behavior)) { 1221 // TODO: expand to writeable arguments 1222 for (Value *Op : CI->args()) 1223 if (Op->getType()->isPointerTy()) { 1224 bool Invalidated; 1225 if (CurAST) 1226 Invalidated = pointerInvalidatedByLoop( 1227 MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA); 1228 else 1229 Invalidated = pointerInvalidatedByLoopWithMSSA( 1230 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I, 1231 *Flags); 1232 if (Invalidated) 1233 return false; 1234 } 1235 return true; 1236 } 1237 1238 // If this call only reads from memory and there are no writes to memory 1239 // in the loop, we can hoist or sink the call as appropriate. 1240 if (isReadOnly(CurAST, MSSAU, CurLoop)) 1241 return true; 1242 } 1243 1244 // FIXME: This should use mod/ref information to see if we can hoist or 1245 // sink the call. 1246 1247 return false; 1248 } else if (auto *FI = dyn_cast<FenceInst>(&I)) { 1249 // Fences alias (most) everything to provide ordering. For the moment, 1250 // just give up if there are any other memory operations in the loop. 1251 if (CurAST) { 1252 auto Begin = CurAST->begin(); 1253 assert(Begin != CurAST->end() && "must contain FI"); 1254 if (std::next(Begin) != CurAST->end()) 1255 // constant memory for instance, TODO: handle better 1256 return false; 1257 auto *UniqueI = Begin->getUniqueInstruction(); 1258 if (!UniqueI) 1259 // other memory op, give up 1260 return false; 1261 (void)FI; // suppress unused variable warning 1262 assert(UniqueI == FI && "AS must contain FI"); 1263 return true; 1264 } else // MSSAU 1265 return isOnlyMemoryAccess(FI, CurLoop, MSSAU); 1266 } else if (auto *SI = dyn_cast<StoreInst>(&I)) { 1267 if (!SI->isUnordered()) 1268 return false; // Don't sink/hoist volatile or ordered atomic store! 1269 1270 // We can only hoist a store that we can prove writes a value which is not 1271 // read or overwritten within the loop. For those cases, we fallback to 1272 // load store promotion instead. TODO: We can extend this to cases where 1273 // there is exactly one write to the location and that write dominates an 1274 // arbitrary number of reads in the loop. 1275 if (CurAST) { 1276 auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); 1277 1278 if (AS.isRef() || !AS.isMustAlias()) 1279 // Quick exit test, handled by the full path below as well. 1280 return false; 1281 auto *UniqueI = AS.getUniqueInstruction(); 1282 if (!UniqueI) 1283 // other memory op, give up 1284 return false; 1285 assert(UniqueI == SI && "AS must contain SI"); 1286 return true; 1287 } else { // MSSAU 1288 if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) 1289 return true; 1290 // If there are more accesses than the Promotion cap or no "quota" to 1291 // check clobber, then give up as we're not walking a list that long. 1292 if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) 1293 return false; 1294 // If there are interfering Uses (i.e. their defining access is in the 1295 // loop), or ordered loads (stored as Defs!), don't move this store. 1296 // Could do better here, but this is conservatively correct. 1297 // TODO: Cache set of Uses on the first walk in runOnLoop, update when 1298 // moving accesses. Can also extend to dominating uses. 1299 auto *SIMD = MSSA->getMemoryAccess(SI); 1300 for (auto *BB : CurLoop->getBlocks()) 1301 if (auto *Accesses = MSSA->getBlockAccesses(BB)) { 1302 for (const auto &MA : *Accesses) 1303 if (const auto *MU = dyn_cast<MemoryUse>(&MA)) { 1304 auto *MD = MU->getDefiningAccess(); 1305 if (!MSSA->isLiveOnEntryDef(MD) && 1306 CurLoop->contains(MD->getBlock())) 1307 return false; 1308 // Disable hoisting past potentially interfering loads. Optimized 1309 // Uses may point to an access outside the loop, as getClobbering 1310 // checks the previous iteration when walking the backedge. 1311 // FIXME: More precise: no Uses that alias SI. 1312 if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) 1313 return false; 1314 } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) { 1315 if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) { 1316 (void)LI; // Silence warning. 1317 assert(!LI->isUnordered() && "Expected unordered load"); 1318 return false; 1319 } 1320 // Any call, while it may not be clobbering SI, it may be a use. 1321 if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) { 1322 // Check if the call may read from the memory location written 1323 // to by SI. Check CI's attributes and arguments; the number of 1324 // such checks performed is limited above by NoOfMemAccTooLarge. 1325 ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); 1326 if (isModOrRefSet(MRI)) 1327 return false; 1328 } 1329 } 1330 } 1331 auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); 1332 Flags->incrementClobberingCalls(); 1333 // If there are no clobbering Defs in the loop, store is safe to hoist. 1334 return MSSA->isLiveOnEntryDef(Source) || 1335 !CurLoop->contains(Source->getBlock()); 1336 } 1337 } 1338 1339 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); 1340 1341 // We've established mechanical ability and aliasing, it's up to the caller 1342 // to check fault safety 1343 return true; 1344 } 1345 1346 /// Returns true if a PHINode is a trivially replaceable with an 1347 /// Instruction. 1348 /// This is true when all incoming values are that instruction. 1349 /// This pattern occurs most often with LCSSA PHI nodes. 1350 /// 1351 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { 1352 for (const Value *IncValue : PN.incoming_values()) 1353 if (IncValue != &I) 1354 return false; 1355 1356 return true; 1357 } 1358 1359 /// Return true if the instruction is free in the loop. 1360 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, 1361 const TargetTransformInfo *TTI) { 1362 1363 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 1364 if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != 1365 TargetTransformInfo::TCC_Free) 1366 return false; 1367 // For a GEP, we cannot simply use getUserCost because currently it 1368 // optimistically assumes that a GEP will fold into addressing mode 1369 // regardless of its users. 1370 const BasicBlock *BB = GEP->getParent(); 1371 for (const User *U : GEP->users()) { 1372 const Instruction *UI = cast<Instruction>(U); 1373 if (CurLoop->contains(UI) && 1374 (BB != UI->getParent() || 1375 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) 1376 return false; 1377 } 1378 return true; 1379 } else 1380 return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == 1381 TargetTransformInfo::TCC_Free; 1382 } 1383 1384 /// Return true if the only users of this instruction are outside of 1385 /// the loop. If this is true, we can sink the instruction to the exit 1386 /// blocks of the loop. 1387 /// 1388 /// We also return true if the instruction could be folded away in lowering. 1389 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). 1390 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, 1391 const LoopSafetyInfo *SafetyInfo, 1392 TargetTransformInfo *TTI, bool &FreeInLoop, 1393 bool LoopNestMode) { 1394 const auto &BlockColors = SafetyInfo->getBlockColors(); 1395 bool IsFree = isFreeInLoop(I, CurLoop, TTI); 1396 for (const User *U : I.users()) { 1397 const Instruction *UI = cast<Instruction>(U); 1398 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 1399 const BasicBlock *BB = PN->getParent(); 1400 // We cannot sink uses in catchswitches. 1401 if (isa<CatchSwitchInst>(BB->getTerminator())) 1402 return false; 1403 1404 // We need to sink a callsite to a unique funclet. Avoid sinking if the 1405 // phi use is too muddled. 1406 if (isa<CallInst>(I)) 1407 if (!BlockColors.empty() && 1408 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 1409 return false; 1410 1411 if (LoopNestMode) { 1412 while (isa<PHINode>(UI) && UI->hasOneUser() && 1413 UI->getNumOperands() == 1) { 1414 if (!CurLoop->contains(UI)) 1415 break; 1416 UI = cast<Instruction>(UI->user_back()); 1417 } 1418 } 1419 } 1420 1421 if (CurLoop->contains(UI)) { 1422 if (IsFree) { 1423 FreeInLoop = true; 1424 continue; 1425 } 1426 return false; 1427 } 1428 } 1429 return true; 1430 } 1431 1432 static Instruction *cloneInstructionInExitBlock( 1433 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, 1434 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { 1435 Instruction *New; 1436 if (auto *CI = dyn_cast<CallInst>(&I)) { 1437 const auto &BlockColors = SafetyInfo->getBlockColors(); 1438 1439 // Sinking call-sites need to be handled differently from other 1440 // instructions. The cloned call-site needs a funclet bundle operand 1441 // appropriate for its location in the CFG. 1442 SmallVector<OperandBundleDef, 1> OpBundles; 1443 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 1444 BundleIdx != BundleEnd; ++BundleIdx) { 1445 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 1446 if (Bundle.getTagID() == LLVMContext::OB_funclet) 1447 continue; 1448 1449 OpBundles.emplace_back(Bundle); 1450 } 1451 1452 if (!BlockColors.empty()) { 1453 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 1454 assert(CV.size() == 1 && "non-unique color for exit block!"); 1455 BasicBlock *BBColor = CV.front(); 1456 Instruction *EHPad = BBColor->getFirstNonPHI(); 1457 if (EHPad->isEHPad()) 1458 OpBundles.emplace_back("funclet", EHPad); 1459 } 1460 1461 New = CallInst::Create(CI, OpBundles); 1462 } else { 1463 New = I.clone(); 1464 } 1465 1466 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 1467 if (!I.getName().empty()) 1468 New->setName(I.getName() + ".le"); 1469 1470 if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { 1471 // Create a new MemoryAccess and let MemorySSA set its defining access. 1472 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( 1473 New, nullptr, New->getParent(), MemorySSA::Beginning); 1474 if (NewMemAcc) { 1475 if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) 1476 MSSAU->insertDef(MemDef, /*RenameUses=*/true); 1477 else { 1478 auto *MemUse = cast<MemoryUse>(NewMemAcc); 1479 MSSAU->insertUse(MemUse, /*RenameUses=*/true); 1480 } 1481 } 1482 } 1483 1484 // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that 1485 // this is particularly cheap because we can rip off the PHI node that we're 1486 // replacing for the number and blocks of the predecessors. 1487 // OPT: If this shows up in a profile, we can instead finish sinking all 1488 // invariant instructions, and then walk their operands to re-establish 1489 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 1490 // sinking bottom-up. 1491 for (Use &Op : New->operands()) 1492 if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) { 1493 auto *OInst = cast<Instruction>(Op.get()); 1494 PHINode *OpPN = 1495 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 1496 OInst->getName() + ".lcssa", &ExitBlock.front()); 1497 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 1498 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 1499 Op = OpPN; 1500 } 1501 return New; 1502 } 1503 1504 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, 1505 MemorySSAUpdater *MSSAU) { 1506 if (MSSAU) 1507 MSSAU->removeMemoryAccess(&I); 1508 SafetyInfo.removeInstruction(&I); 1509 I.eraseFromParent(); 1510 } 1511 1512 static void moveInstructionBefore(Instruction &I, Instruction &Dest, 1513 ICFLoopSafetyInfo &SafetyInfo, 1514 MemorySSAUpdater *MSSAU, 1515 ScalarEvolution *SE) { 1516 SafetyInfo.removeInstruction(&I); 1517 SafetyInfo.insertInstructionTo(&I, Dest.getParent()); 1518 I.moveBefore(&Dest); 1519 if (MSSAU) 1520 if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( 1521 MSSAU->getMemorySSA()->getMemoryAccess(&I))) 1522 MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), 1523 MemorySSA::BeforeTerminator); 1524 if (SE) 1525 SE->forgetValue(&I); 1526 } 1527 1528 static Instruction *sinkThroughTriviallyReplaceablePHI( 1529 PHINode *TPN, Instruction *I, LoopInfo *LI, 1530 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, 1531 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, 1532 MemorySSAUpdater *MSSAU) { 1533 assert(isTriviallyReplaceablePHI(*TPN, *I) && 1534 "Expect only trivially replaceable PHI"); 1535 BasicBlock *ExitBlock = TPN->getParent(); 1536 Instruction *New; 1537 auto It = SunkCopies.find(ExitBlock); 1538 if (It != SunkCopies.end()) 1539 New = It->second; 1540 else 1541 New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock( 1542 *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); 1543 return New; 1544 } 1545 1546 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { 1547 BasicBlock *BB = PN->getParent(); 1548 if (!BB->canSplitPredecessors()) 1549 return false; 1550 // It's not impossible to split EHPad blocks, but if BlockColors already exist 1551 // it require updating BlockColors for all offspring blocks accordingly. By 1552 // skipping such corner case, we can make updating BlockColors after splitting 1553 // predecessor fairly simple. 1554 if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) 1555 return false; 1556 for (BasicBlock *BBPred : predecessors(BB)) { 1557 if (isa<IndirectBrInst>(BBPred->getTerminator()) || 1558 isa<CallBrInst>(BBPred->getTerminator())) 1559 return false; 1560 } 1561 return true; 1562 } 1563 1564 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, 1565 LoopInfo *LI, const Loop *CurLoop, 1566 LoopSafetyInfo *SafetyInfo, 1567 MemorySSAUpdater *MSSAU) { 1568 #ifndef NDEBUG 1569 SmallVector<BasicBlock *, 32> ExitBlocks; 1570 CurLoop->getUniqueExitBlocks(ExitBlocks); 1571 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 1572 ExitBlocks.end()); 1573 #endif 1574 BasicBlock *ExitBB = PN->getParent(); 1575 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); 1576 1577 // Split predecessors of the loop exit to make instructions in the loop are 1578 // exposed to exit blocks through trivially replaceable PHIs while keeping the 1579 // loop in the canonical form where each predecessor of each exit block should 1580 // be contained within the loop. For example, this will convert the loop below 1581 // from 1582 // 1583 // LB1: 1584 // %v1 = 1585 // br %LE, %LB2 1586 // LB2: 1587 // %v2 = 1588 // br %LE, %LB1 1589 // LE: 1590 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable 1591 // 1592 // to 1593 // 1594 // LB1: 1595 // %v1 = 1596 // br %LE.split, %LB2 1597 // LB2: 1598 // %v2 = 1599 // br %LE.split2, %LB1 1600 // LE.split: 1601 // %p1 = phi [%v1, %LB1] <-- trivially replaceable 1602 // br %LE 1603 // LE.split2: 1604 // %p2 = phi [%v2, %LB2] <-- trivially replaceable 1605 // br %LE 1606 // LE: 1607 // %p = phi [%p1, %LE.split], [%p2, %LE.split2] 1608 // 1609 const auto &BlockColors = SafetyInfo->getBlockColors(); 1610 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); 1611 while (!PredBBs.empty()) { 1612 BasicBlock *PredBB = *PredBBs.begin(); 1613 assert(CurLoop->contains(PredBB) && 1614 "Expect all predecessors are in the loop"); 1615 if (PN->getBasicBlockIndex(PredBB) >= 0) { 1616 BasicBlock *NewPred = SplitBlockPredecessors( 1617 ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); 1618 // Since we do not allow splitting EH-block with BlockColors in 1619 // canSplitPredecessors(), we can simply assign predecessor's color to 1620 // the new block. 1621 if (!BlockColors.empty()) 1622 // Grab a reference to the ColorVector to be inserted before getting the 1623 // reference to the vector we are copying because inserting the new 1624 // element in BlockColors might cause the map to be reallocated. 1625 SafetyInfo->copyColors(NewPred, PredBB); 1626 } 1627 PredBBs.remove(PredBB); 1628 } 1629 } 1630 1631 /// When an instruction is found to only be used outside of the loop, this 1632 /// function moves it to the exit blocks and patches up SSA form as needed. 1633 /// This method is guaranteed to remove the original instruction from its 1634 /// position, and may either delete it or move it to outside of the loop. 1635 /// 1636 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, 1637 BlockFrequencyInfo *BFI, const Loop *CurLoop, 1638 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, 1639 OptimizationRemarkEmitter *ORE) { 1640 bool Changed = false; 1641 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 1642 1643 // Iterate over users to be ready for actual sinking. Replace users via 1644 // unreachable blocks with undef and make all user PHIs trivially replaceable. 1645 SmallPtrSet<Instruction *, 8> VisitedUsers; 1646 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { 1647 auto *User = cast<Instruction>(*UI); 1648 Use &U = UI.getUse(); 1649 ++UI; 1650 1651 if (VisitedUsers.count(User) || CurLoop->contains(User)) 1652 continue; 1653 1654 if (!DT->isReachableFromEntry(User->getParent())) { 1655 U = UndefValue::get(I.getType()); 1656 Changed = true; 1657 continue; 1658 } 1659 1660 // The user must be a PHI node. 1661 PHINode *PN = cast<PHINode>(User); 1662 1663 // Surprisingly, instructions can be used outside of loops without any 1664 // exits. This can only happen in PHI nodes if the incoming block is 1665 // unreachable. 1666 BasicBlock *BB = PN->getIncomingBlock(U); 1667 if (!DT->isReachableFromEntry(BB)) { 1668 U = UndefValue::get(I.getType()); 1669 Changed = true; 1670 continue; 1671 } 1672 1673 VisitedUsers.insert(PN); 1674 if (isTriviallyReplaceablePHI(*PN, I)) 1675 continue; 1676 1677 if (!canSplitPredecessors(PN, SafetyInfo)) 1678 return Changed; 1679 1680 // Split predecessors of the PHI so that we can make users trivially 1681 // replaceable. 1682 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); 1683 1684 // Should rebuild the iterators, as they may be invalidated by 1685 // splitPredecessorsOfLoopExit(). 1686 UI = I.user_begin(); 1687 UE = I.user_end(); 1688 } 1689 1690 if (VisitedUsers.empty()) 1691 return Changed; 1692 1693 ORE->emit([&]() { 1694 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 1695 << "sinking " << ore::NV("Inst", &I); 1696 }); 1697 if (isa<LoadInst>(I)) 1698 ++NumMovedLoads; 1699 else if (isa<CallInst>(I)) 1700 ++NumMovedCalls; 1701 ++NumSunk; 1702 1703 #ifndef NDEBUG 1704 SmallVector<BasicBlock *, 32> ExitBlocks; 1705 CurLoop->getUniqueExitBlocks(ExitBlocks); 1706 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 1707 ExitBlocks.end()); 1708 #endif 1709 1710 // Clones of this instruction. Don't create more than one per exit block! 1711 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 1712 1713 // If this instruction is only used outside of the loop, then all users are 1714 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 1715 // the instruction. 1716 // First check if I is worth sinking for all uses. Sink only when it is worth 1717 // across all uses. 1718 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); 1719 for (auto *UI : Users) { 1720 auto *User = cast<Instruction>(UI); 1721 1722 if (CurLoop->contains(User)) 1723 continue; 1724 1725 PHINode *PN = cast<PHINode>(User); 1726 assert(ExitBlockSet.count(PN->getParent()) && 1727 "The LCSSA PHI is not in an exit block!"); 1728 1729 // The PHI must be trivially replaceable. 1730 Instruction *New = sinkThroughTriviallyReplaceablePHI( 1731 PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); 1732 PN->replaceAllUsesWith(New); 1733 eraseInstruction(*PN, *SafetyInfo, nullptr); 1734 Changed = true; 1735 } 1736 return Changed; 1737 } 1738 1739 /// When an instruction is found to only use loop invariant operands that 1740 /// is safe to hoist, this instruction is called to do the dirty work. 1741 /// 1742 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 1743 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, 1744 MemorySSAUpdater *MSSAU, ScalarEvolution *SE, 1745 OptimizationRemarkEmitter *ORE) { 1746 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " 1747 << I << "\n"); 1748 ORE->emit([&]() { 1749 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " 1750 << ore::NV("Inst", &I); 1751 }); 1752 1753 // Metadata can be dependent on conditions we are hoisting above. 1754 // Conservatively strip all metadata on the instruction unless we were 1755 // guaranteed to execute I if we entered the loop, in which case the metadata 1756 // is valid in the loop preheader. 1757 // Similarly, If I is a call and it is not guaranteed to execute in the loop, 1758 // then moving to the preheader means we should strip attributes on the call 1759 // that can cause UB since we may be hoisting above conditions that allowed 1760 // inferring those attributes. They may not be valid at the preheader. 1761 if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) && 1762 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 1763 // time in isGuaranteedToExecute if we don't actually have anything to 1764 // drop. It is a compile time optimization, not required for correctness. 1765 !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) 1766 I.dropUndefImplyingAttrsAndUnknownMetadata(); 1767 1768 if (isa<PHINode>(I)) 1769 // Move the new node to the end of the phi list in the destination block. 1770 moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE); 1771 else 1772 // Move the new node to the destination block, before its terminator. 1773 moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); 1774 1775 I.updateLocationAfterHoist(); 1776 1777 if (isa<LoadInst>(I)) 1778 ++NumMovedLoads; 1779 else if (isa<CallInst>(I)) 1780 ++NumMovedCalls; 1781 ++NumHoisted; 1782 } 1783 1784 /// Only sink or hoist an instruction if it is not a trapping instruction, 1785 /// or if the instruction is known not to trap when moved to the preheader. 1786 /// or if it is a trapping instruction and is guaranteed to execute. 1787 static bool isSafeToExecuteUnconditionally( 1788 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, 1789 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, 1790 OptimizationRemarkEmitter *ORE, const Instruction *CtxI, 1791 bool AllowSpeculation) { 1792 if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) 1793 return true; 1794 1795 bool GuaranteedToExecute = 1796 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); 1797 1798 if (!GuaranteedToExecute) { 1799 auto *LI = dyn_cast<LoadInst>(&Inst); 1800 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 1801 ORE->emit([&]() { 1802 return OptimizationRemarkMissed( 1803 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 1804 << "failed to hoist load with loop-invariant address " 1805 "because load is conditionally executed"; 1806 }); 1807 } 1808 1809 return GuaranteedToExecute; 1810 } 1811 1812 namespace { 1813 class LoopPromoter : public LoadAndStorePromoter { 1814 Value *SomePtr; // Designated pointer to store to. 1815 const SmallSetVector<Value *, 8> &PointerMustAliases; 1816 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 1817 SmallVectorImpl<Instruction *> &LoopInsertPts; 1818 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts; 1819 PredIteratorCache &PredCache; 1820 MemorySSAUpdater *MSSAU; 1821 LoopInfo &LI; 1822 DebugLoc DL; 1823 Align Alignment; 1824 bool UnorderedAtomic; 1825 AAMDNodes AATags; 1826 ICFLoopSafetyInfo &SafetyInfo; 1827 bool CanInsertStoresInExitBlocks; 1828 1829 // We're about to add a use of V in a loop exit block. Insert an LCSSA phi 1830 // (if legal) if doing so would add an out-of-loop use to an instruction 1831 // defined in-loop. 1832 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 1833 if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB)) 1834 return V; 1835 1836 Instruction *I = cast<Instruction>(V); 1837 // We need to create an LCSSA PHI node for the incoming value and 1838 // store that. 1839 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 1840 I->getName() + ".lcssa", &BB->front()); 1841 for (BasicBlock *Pred : PredCache.get(BB)) 1842 PN->addIncoming(I, Pred); 1843 return PN; 1844 } 1845 1846 public: 1847 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 1848 const SmallSetVector<Value *, 8> &PMA, 1849 SmallVectorImpl<BasicBlock *> &LEB, 1850 SmallVectorImpl<Instruction *> &LIP, 1851 SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, 1852 MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, 1853 Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, 1854 ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) 1855 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 1856 LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), 1857 PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), 1858 Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), 1859 SafetyInfo(SafetyInfo), 1860 CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} 1861 1862 bool isInstInList(Instruction *I, 1863 const SmallVectorImpl<Instruction *> &) const override { 1864 Value *Ptr; 1865 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1866 Ptr = LI->getOperand(0); 1867 else 1868 Ptr = cast<StoreInst>(I)->getPointerOperand(); 1869 return PointerMustAliases.count(Ptr); 1870 } 1871 1872 void insertStoresInLoopExitBlocks() { 1873 // Insert stores after in the loop exit blocks. Each exit block gets a 1874 // store of the live-out values that feed them. Since we've already told 1875 // the SSA updater about the defs in the loop and the preheader 1876 // definition, it is all set and we can start using it. 1877 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 1878 BasicBlock *ExitBlock = LoopExitBlocks[i]; 1879 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 1880 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 1881 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 1882 Instruction *InsertPos = LoopInsertPts[i]; 1883 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 1884 if (UnorderedAtomic) 1885 NewSI->setOrdering(AtomicOrdering::Unordered); 1886 NewSI->setAlignment(Alignment); 1887 NewSI->setDebugLoc(DL); 1888 if (AATags) 1889 NewSI->setAAMetadata(AATags); 1890 1891 MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; 1892 MemoryAccess *NewMemAcc; 1893 if (!MSSAInsertPoint) { 1894 NewMemAcc = MSSAU->createMemoryAccessInBB( 1895 NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); 1896 } else { 1897 NewMemAcc = 1898 MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); 1899 } 1900 MSSAInsertPts[i] = NewMemAcc; 1901 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true); 1902 // FIXME: true for safety, false may still be correct. 1903 } 1904 } 1905 1906 void doExtraRewritesBeforeFinalDeletion() override { 1907 if (CanInsertStoresInExitBlocks) 1908 insertStoresInLoopExitBlocks(); 1909 } 1910 1911 void instructionDeleted(Instruction *I) const override { 1912 SafetyInfo.removeInstruction(I); 1913 MSSAU->removeMemoryAccess(I); 1914 } 1915 1916 bool shouldDelete(Instruction *I) const override { 1917 if (isa<StoreInst>(I)) 1918 return CanInsertStoresInExitBlocks; 1919 return true; 1920 } 1921 }; 1922 1923 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, 1924 DominatorTree *DT) { 1925 // We can perform the captured-before check against any instruction in the 1926 // loop header, as the loop header is reachable from any instruction inside 1927 // the loop. 1928 // TODO: ReturnCaptures=true shouldn't be necessary here. 1929 return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true, 1930 /* StoreCaptures */ true, 1931 L->getHeader()->getTerminator(), DT); 1932 } 1933 1934 /// Return true if we can prove that a caller cannot inspect the object if an 1935 /// unwind occurs inside the loop. 1936 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L, 1937 DominatorTree *DT) { 1938 bool RequiresNoCaptureBeforeUnwind; 1939 if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind)) 1940 return false; 1941 1942 return !RequiresNoCaptureBeforeUnwind || 1943 isNotCapturedBeforeOrInLoop(Object, L, DT); 1944 } 1945 1946 } // namespace 1947 1948 /// Try to promote memory values to scalars by sinking stores out of the 1949 /// loop and moving loads to before the loop. We do this by looping over 1950 /// the stores in the loop, looking for stores to Must pointers which are 1951 /// loop invariant. 1952 /// 1953 bool llvm::promoteLoopAccessesToScalars( 1954 const SmallSetVector<Value *, 8> &PointerMustAliases, 1955 SmallVectorImpl<BasicBlock *> &ExitBlocks, 1956 SmallVectorImpl<Instruction *> &InsertPts, 1957 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC, 1958 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1959 Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, 1960 OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { 1961 // Verify inputs. 1962 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1963 SafetyInfo != nullptr && 1964 "Unexpected Input to promoteLoopAccessesToScalars"); 1965 1966 Value *SomePtr = *PointerMustAliases.begin(); 1967 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1968 1969 // It is not safe to promote a load/store from the loop if the load/store is 1970 // conditional. For example, turning: 1971 // 1972 // for () { if (c) *P += 1; } 1973 // 1974 // into: 1975 // 1976 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1977 // 1978 // is not safe, because *P may only be valid to access if 'c' is true. 1979 // 1980 // The safety property divides into two parts: 1981 // p1) The memory may not be dereferenceable on entry to the loop. In this 1982 // case, we can't insert the required load in the preheader. 1983 // p2) The memory model does not allow us to insert a store along any dynamic 1984 // path which did not originally have one. 1985 // 1986 // If at least one store is guaranteed to execute, both properties are 1987 // satisfied, and promotion is legal. 1988 // 1989 // This, however, is not a necessary condition. Even if no store/load is 1990 // guaranteed to execute, we can still establish these properties. 1991 // We can establish (p1) by proving that hoisting the load into the preheader 1992 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1993 // can use any access within the alias set to prove dereferenceability, 1994 // since they're all must alias. 1995 // 1996 // There are two ways establish (p2): 1997 // a) Prove the location is thread-local. In this case the memory model 1998 // requirement does not apply, and stores are safe to insert. 1999 // b) Prove a store dominates every exit block. In this case, if an exit 2000 // blocks is reached, the original dynamic path would have taken us through 2001 // the store, so inserting a store into the exit block is safe. Note that this 2002 // is different from the store being guaranteed to execute. For instance, 2003 // if an exception is thrown on the first iteration of the loop, the original 2004 // store is never executed, but the exit blocks are not executed either. 2005 2006 bool DereferenceableInPH = false; 2007 bool SafeToInsertStore = false; 2008 bool FoundLoadToPromote = false; 2009 2010 SmallVector<Instruction *, 64> LoopUses; 2011 2012 // We start with an alignment of one and try to find instructions that allow 2013 // us to prove better alignment. 2014 Align Alignment; 2015 // Keep track of which types of access we see 2016 bool SawUnorderedAtomic = false; 2017 bool SawNotAtomic = false; 2018 AAMDNodes AATags; 2019 2020 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 2021 2022 bool IsKnownThreadLocalObject = false; 2023 if (SafetyInfo->anyBlockMayThrow()) { 2024 // If a loop can throw, we have to insert a store along each unwind edge. 2025 // That said, we can't actually make the unwind edge explicit. Therefore, 2026 // we have to prove that the store is dead along the unwind edge. We do 2027 // this by proving that the caller can't have a reference to the object 2028 // after return and thus can't possibly load from the object. 2029 Value *Object = getUnderlyingObject(SomePtr); 2030 if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) 2031 return false; 2032 // Subtlety: Alloca's aren't visible to callers, but *are* potentially 2033 // visible to other threads if captured and used during their lifetimes. 2034 IsKnownThreadLocalObject = !isa<AllocaInst>(Object); 2035 } 2036 2037 // Check that all accesses to pointers in the aliass set use the same type. 2038 // We cannot (yet) promote a memory location that is loaded and stored in 2039 // different sizes. While we are at it, collect alignment and AA info. 2040 Type *AccessTy = nullptr; 2041 for (Value *ASIV : PointerMustAliases) { 2042 for (User *U : ASIV->users()) { 2043 // Ignore instructions that are outside the loop. 2044 Instruction *UI = dyn_cast<Instruction>(U); 2045 if (!UI || !CurLoop->contains(UI)) 2046 continue; 2047 2048 // If there is an non-load/store instruction in the loop, we can't promote 2049 // it. 2050 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 2051 if (!Load->isUnordered()) 2052 return false; 2053 2054 SawUnorderedAtomic |= Load->isAtomic(); 2055 SawNotAtomic |= !Load->isAtomic(); 2056 FoundLoadToPromote = true; 2057 2058 Align InstAlignment = Load->getAlign(); 2059 2060 // Note that proving a load safe to speculate requires proving 2061 // sufficient alignment at the target location. Proving it guaranteed 2062 // to execute does as well. Thus we can increase our guaranteed 2063 // alignment as well. 2064 if (!DereferenceableInPH || (InstAlignment > Alignment)) 2065 if (isSafeToExecuteUnconditionally( 2066 *Load, DT, TLI, CurLoop, SafetyInfo, ORE, 2067 Preheader->getTerminator(), AllowSpeculation)) { 2068 DereferenceableInPH = true; 2069 Alignment = std::max(Alignment, InstAlignment); 2070 } 2071 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 2072 // Stores *of* the pointer are not interesting, only stores *to* the 2073 // pointer. 2074 if (UI->getOperand(1) != ASIV) 2075 continue; 2076 if (!Store->isUnordered()) 2077 return false; 2078 2079 SawUnorderedAtomic |= Store->isAtomic(); 2080 SawNotAtomic |= !Store->isAtomic(); 2081 2082 // If the store is guaranteed to execute, both properties are satisfied. 2083 // We may want to check if a store is guaranteed to execute even if we 2084 // already know that promotion is safe, since it may have higher 2085 // alignment than any other guaranteed stores, in which case we can 2086 // raise the alignment on the promoted store. 2087 Align InstAlignment = Store->getAlign(); 2088 2089 if (!DereferenceableInPH || !SafeToInsertStore || 2090 (InstAlignment > Alignment)) { 2091 if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { 2092 DereferenceableInPH = true; 2093 SafeToInsertStore = true; 2094 Alignment = std::max(Alignment, InstAlignment); 2095 } 2096 } 2097 2098 // If a store dominates all exit blocks, it is safe to sink. 2099 // As explained above, if an exit block was executed, a dominating 2100 // store must have been executed at least once, so we are not 2101 // introducing stores on paths that did not have them. 2102 // Note that this only looks at explicit exit blocks. If we ever 2103 // start sinking stores into unwind edges (see above), this will break. 2104 if (!SafeToInsertStore) 2105 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 2106 return DT->dominates(Store->getParent(), Exit); 2107 }); 2108 2109 // If the store is not guaranteed to execute, we may still get 2110 // deref info through it. 2111 if (!DereferenceableInPH) { 2112 DereferenceableInPH = isDereferenceableAndAlignedPointer( 2113 Store->getPointerOperand(), Store->getValueOperand()->getType(), 2114 Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); 2115 } 2116 } else 2117 return false; // Not a load or store. 2118 2119 if (!AccessTy) 2120 AccessTy = getLoadStoreType(UI); 2121 else if (AccessTy != getLoadStoreType(UI)) 2122 return false; 2123 2124 // Merge the AA tags. 2125 if (LoopUses.empty()) { 2126 // On the first load/store, just take its AA tags. 2127 AATags = UI->getAAMetadata(); 2128 } else if (AATags) { 2129 AATags = AATags.merge(UI->getAAMetadata()); 2130 } 2131 2132 LoopUses.push_back(UI); 2133 } 2134 } 2135 2136 // If we found both an unordered atomic instruction and a non-atomic memory 2137 // access, bail. We can't blindly promote non-atomic to atomic since we 2138 // might not be able to lower the result. We can't downgrade since that 2139 // would violate memory model. Also, align 0 is an error for atomics. 2140 if (SawUnorderedAtomic && SawNotAtomic) 2141 return false; 2142 2143 // If we're inserting an atomic load in the preheader, we must be able to 2144 // lower it. We're only guaranteed to be able to lower naturally aligned 2145 // atomics. 2146 if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy)) 2147 return false; 2148 2149 // If we couldn't prove we can hoist the load, bail. 2150 if (!DereferenceableInPH) 2151 return false; 2152 2153 // We know we can hoist the load, but don't have a guaranteed store. 2154 // Check whether the location is thread-local. If it is, then we can insert 2155 // stores along paths which originally didn't have them without violating the 2156 // memory model. 2157 if (!SafeToInsertStore) { 2158 if (IsKnownThreadLocalObject) 2159 SafeToInsertStore = true; 2160 else { 2161 Value *Object = getUnderlyingObject(SomePtr); 2162 SafeToInsertStore = 2163 (isNoAliasCall(Object) || isa<AllocaInst>(Object)) && 2164 isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); 2165 } 2166 } 2167 2168 // If we've still failed to prove we can sink the store, hoist the load 2169 // only, if possible. 2170 if (!SafeToInsertStore && !FoundLoadToPromote) 2171 // If we cannot hoist the load either, give up. 2172 return false; 2173 2174 // Lets do the promotion! 2175 if (SafeToInsertStore) 2176 LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr 2177 << '\n'); 2178 else 2179 LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr 2180 << '\n'); 2181 2182 ORE->emit([&]() { 2183 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", 2184 LoopUses[0]) 2185 << "Moving accesses to memory location out of the loop"; 2186 }); 2187 ++NumPromoted; 2188 2189 // Look at all the loop uses, and try to merge their locations. 2190 std::vector<const DILocation *> LoopUsesLocs; 2191 for (auto U : LoopUses) 2192 LoopUsesLocs.push_back(U->getDebugLoc().get()); 2193 auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); 2194 2195 // We use the SSAUpdater interface to insert phi nodes as required. 2196 SmallVector<PHINode *, 16> NewPHIs; 2197 SSAUpdater SSA(&NewPHIs); 2198 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 2199 InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, 2200 Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, 2201 SafeToInsertStore); 2202 2203 // Set up the preheader to have a definition of the value. It is the live-out 2204 // value from the preheader that uses in the loop will use. 2205 LoadInst *PreheaderLoad = new LoadInst( 2206 AccessTy, SomePtr, SomePtr->getName() + ".promoted", 2207 Preheader->getTerminator()); 2208 if (SawUnorderedAtomic) 2209 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 2210 PreheaderLoad->setAlignment(Alignment); 2211 PreheaderLoad->setDebugLoc(DebugLoc()); 2212 if (AATags) 2213 PreheaderLoad->setAAMetadata(AATags); 2214 SSA.AddAvailableValue(Preheader, PreheaderLoad); 2215 2216 MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( 2217 PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); 2218 MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess); 2219 MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); 2220 2221 if (VerifyMemorySSA) 2222 MSSAU->getMemorySSA()->verifyMemorySSA(); 2223 // Rewrite all the loads in the loop and remember all the definitions from 2224 // stores in the loop. 2225 Promoter.run(LoopUses); 2226 2227 if (VerifyMemorySSA) 2228 MSSAU->getMemorySSA()->verifyMemorySSA(); 2229 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 2230 if (PreheaderLoad->use_empty()) 2231 eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU); 2232 2233 return true; 2234 } 2235 2236 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, 2237 function_ref<void(Instruction *)> Fn) { 2238 for (const BasicBlock *BB : L->blocks()) 2239 if (const auto *Accesses = MSSA->getBlockAccesses(BB)) 2240 for (const auto &Access : *Accesses) 2241 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access)) 2242 Fn(MUD->getMemoryInst()); 2243 } 2244 2245 static SmallVector<SmallSetVector<Value *, 8>, 0> 2246 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { 2247 AliasSetTracker AST(*AA); 2248 2249 auto IsPotentiallyPromotable = [L](const Instruction *I) { 2250 if (const auto *SI = dyn_cast<StoreInst>(I)) 2251 return L->isLoopInvariant(SI->getPointerOperand()); 2252 if (const auto *LI = dyn_cast<LoadInst>(I)) 2253 return L->isLoopInvariant(LI->getPointerOperand()); 2254 return false; 2255 }; 2256 2257 // Populate AST with potentially promotable accesses and remove them from 2258 // MaybePromotable, so they will not be checked again on the next iteration. 2259 SmallPtrSet<Value *, 16> AttemptingPromotion; 2260 foreachMemoryAccess(MSSA, L, [&](Instruction *I) { 2261 if (IsPotentiallyPromotable(I)) { 2262 AttemptingPromotion.insert(I); 2263 AST.add(I); 2264 } 2265 }); 2266 2267 // We're only interested in must-alias sets that contain a mod. 2268 SmallVector<const AliasSet *, 8> Sets; 2269 for (AliasSet &AS : AST) 2270 if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) 2271 Sets.push_back(&AS); 2272 2273 if (Sets.empty()) 2274 return {}; // Nothing to promote... 2275 2276 // Discard any sets for which there is an aliasing non-promotable access. 2277 foreachMemoryAccess(MSSA, L, [&](Instruction *I) { 2278 if (AttemptingPromotion.contains(I)) 2279 return; 2280 2281 llvm::erase_if(Sets, [&](const AliasSet *AS) { 2282 return AS->aliasesUnknownInst(I, *AA); 2283 }); 2284 }); 2285 2286 SmallVector<SmallSetVector<Value *, 8>, 0> Result; 2287 for (const AliasSet *Set : Sets) { 2288 SmallSetVector<Value *, 8> PointerMustAliases; 2289 for (const auto &ASI : *Set) 2290 PointerMustAliases.insert(ASI.getValue()); 2291 Result.push_back(std::move(PointerMustAliases)); 2292 } 2293 2294 return Result; 2295 } 2296 2297 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, 2298 AliasSetTracker *CurAST, Loop *CurLoop, 2299 AAResults *AA) { 2300 return CurAST->getAliasSetFor(MemLoc).isMod(); 2301 } 2302 2303 bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, 2304 Loop *CurLoop, Instruction &I, 2305 SinkAndHoistLICMFlags &Flags) { 2306 // For hoisting, use the walker to determine safety 2307 if (!Flags.getIsSink()) { 2308 MemoryAccess *Source; 2309 // See declaration of SetLicmMssaOptCap for usage details. 2310 if (Flags.tooManyClobberingCalls()) 2311 Source = MU->getDefiningAccess(); 2312 else { 2313 Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); 2314 Flags.incrementClobberingCalls(); 2315 } 2316 return !MSSA->isLiveOnEntryDef(Source) && 2317 CurLoop->contains(Source->getBlock()); 2318 } 2319 2320 // For sinking, we'd need to check all Defs below this use. The getClobbering 2321 // call will look on the backedge of the loop, but will check aliasing with 2322 // the instructions on the previous iteration. 2323 // For example: 2324 // for (i ... ) 2325 // load a[i] ( Use (LoE) 2326 // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. 2327 // i++; 2328 // The load sees no clobbering inside the loop, as the backedge alias check 2329 // does phi translation, and will check aliasing against store a[i-1]. 2330 // However sinking the load outside the loop, below the store is incorrect. 2331 2332 // For now, only sink if there are no Defs in the loop, and the existing ones 2333 // precede the use and are in the same block. 2334 // FIXME: Increase precision: Safe to sink if Use post dominates the Def; 2335 // needs PostDominatorTreeAnalysis. 2336 // FIXME: More precise: no Defs that alias this Use. 2337 if (Flags.tooManyMemoryAccesses()) 2338 return true; 2339 for (auto *BB : CurLoop->getBlocks()) 2340 if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) 2341 return true; 2342 // When sinking, the source block may not be part of the loop so check it. 2343 if (!CurLoop->contains(&I)) 2344 return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); 2345 2346 return false; 2347 } 2348 2349 bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, 2350 MemoryUse &MU) { 2351 if (const auto *Accesses = MSSA.getBlockDefs(&BB)) 2352 for (const auto &MA : *Accesses) 2353 if (const auto *MD = dyn_cast<MemoryDef>(&MA)) 2354 if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) 2355 return true; 2356 return false; 2357 } 2358 2359 /// Little predicate that returns true if the specified basic block is in 2360 /// a subloop of the current one, not the current one itself. 2361 /// 2362 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 2363 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 2364 return LI->getLoopFor(BB) != CurLoop; 2365 } 2366