1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion, attempting to remove as much 11 // code from the body of a loop as possible. It does this by either hoisting 12 // code into the preheader block, or by sinking code to the exit blocks if it is 13 // safe. This pass also promotes must-aliased memory locations in the loop to 14 // live in registers, thus hoisting and sinking "invariant" loads and stores. 15 // 16 // This pass uses alias analysis for two purposes: 17 // 18 // 1. Moving loop invariant loads and calls out of loops. If we can determine 19 // that a load or call inside of a loop never aliases anything stored to, 20 // we can hoist it or sink it like any other instruction. 21 // 2. Scalar Promotion of Memory - If there is a store instruction inside of 22 // the loop, we try to move the store to happen AFTER the loop instead of 23 // inside of the loop. This can only happen if a few conditions are true: 24 // A. The pointer stored through is loop invariant 25 // B. There are no stores or loads in the loop which _may_ alias the 26 // pointer. There are no calls in the loop which mod/ref the pointer. 27 // If these conditions are true, we can promote the loads and stores in the 28 // loop of the pointer to use a temporary alloca'd variable. We then use 29 // the SSAUpdater to construct the appropriate SSA form for the value. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "llvm/Transforms/Scalar/LICM.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/AliasSetTracker.h" 37 #include "llvm/Analysis/BasicAliasAnalysis.h" 38 #include "llvm/Analysis/CaptureTracking.h" 39 #include "llvm/Analysis/ConstantFolding.h" 40 #include "llvm/Analysis/GlobalsModRef.h" 41 #include "llvm/Analysis/Loads.h" 42 #include "llvm/Analysis/LoopInfo.h" 43 #include "llvm/Analysis/LoopPass.h" 44 #include "llvm/Analysis/MemoryBuiltins.h" 45 #include "llvm/Analysis/OptimizationDiagnosticInfo.h" 46 #include "llvm/Analysis/ScalarEvolution.h" 47 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 48 #include "llvm/Analysis/TargetLibraryInfo.h" 49 #include "llvm/Analysis/ValueTracking.h" 50 #include "llvm/IR/CFG.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/LLVMContext.h" 58 #include "llvm/IR/Metadata.h" 59 #include "llvm/IR/PredIteratorCache.h" 60 #include "llvm/Support/CommandLine.h" 61 #include "llvm/Support/Debug.h" 62 #include "llvm/Support/raw_ostream.h" 63 #include "llvm/Transforms/Scalar.h" 64 #include "llvm/Transforms/Scalar/LoopPassManager.h" 65 #include "llvm/Transforms/Utils/Local.h" 66 #include "llvm/Transforms/Utils/LoopUtils.h" 67 #include "llvm/Transforms/Utils/SSAUpdater.h" 68 #include <algorithm> 69 #include <utility> 70 using namespace llvm; 71 72 #define DEBUG_TYPE "licm" 73 74 STATISTIC(NumSunk, "Number of instructions sunk out of loop"); 75 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); 76 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 77 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 78 STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); 79 80 /// Memory promotion is enabled by default. 81 static cl::opt<bool> 82 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), 83 cl::desc("Disable memory promotion in LICM pass")); 84 85 static cl::opt<uint32_t> MaxNumUsesTraversed( 86 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), 87 cl::desc("Max num uses visited for identifying load " 88 "invariance in loop using invariant start (default = 8)")); 89 90 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); 91 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, 92 const LoopSafetyInfo *SafetyInfo); 93 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 94 const LoopSafetyInfo *SafetyInfo, 95 OptimizationRemarkEmitter *ORE); 96 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, 97 const Loop *CurLoop, AliasSetTracker *CurAST, 98 const LoopSafetyInfo *SafetyInfo, 99 OptimizationRemarkEmitter *ORE); 100 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 101 const DominatorTree *DT, 102 const Loop *CurLoop, 103 const LoopSafetyInfo *SafetyInfo, 104 OptimizationRemarkEmitter *ORE, 105 const Instruction *CtxI = nullptr); 106 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 107 const AAMDNodes &AAInfo, 108 AliasSetTracker *CurAST); 109 static Instruction * 110 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 111 const LoopInfo *LI, 112 const LoopSafetyInfo *SafetyInfo); 113 114 namespace { 115 struct LoopInvariantCodeMotion { 116 bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, 117 TargetLibraryInfo *TLI, ScalarEvolution *SE, 118 OptimizationRemarkEmitter *ORE, bool DeleteAST); 119 120 DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { 121 return LoopToAliasSetMap; 122 } 123 124 private: 125 DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap; 126 127 AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 128 AliasAnalysis *AA); 129 }; 130 131 struct LegacyLICMPass : public LoopPass { 132 static char ID; // Pass identification, replacement for typeid 133 LegacyLICMPass() : LoopPass(ID) { 134 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); 135 } 136 137 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 138 if (skipLoop(L)) { 139 // If we have run LICM on a previous loop but now we are skipping 140 // (because we've hit the opt-bisect limit), we need to clear the 141 // loop alias information. 142 for (auto <AS : LICM.getLoopToAliasSetMap()) 143 delete LTAS.second; 144 LICM.getLoopToAliasSetMap().clear(); 145 return false; 146 } 147 148 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 149 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 150 // pass. Function analyses need to be preserved across loop transformations 151 // but ORE cannot be preserved (see comment before the pass definition). 152 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 153 return LICM.runOnLoop(L, 154 &getAnalysis<AAResultsWrapperPass>().getAAResults(), 155 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 156 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 157 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), 158 SE ? &SE->getSE() : nullptr, &ORE, false); 159 } 160 161 /// This transformation requires natural loop information & requires that 162 /// loop preheaders be inserted into the CFG... 163 /// 164 void getAnalysisUsage(AnalysisUsage &AU) const override { 165 AU.setPreservesCFG(); 166 AU.addRequired<TargetLibraryInfoWrapperPass>(); 167 getLoopAnalysisUsage(AU); 168 } 169 170 using llvm::Pass::doFinalization; 171 172 bool doFinalization() override { 173 assert(LICM.getLoopToAliasSetMap().empty() && 174 "Didn't free loop alias sets"); 175 return false; 176 } 177 178 private: 179 LoopInvariantCodeMotion LICM; 180 181 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 182 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 183 Loop *L) override; 184 185 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 186 /// set. 187 void deleteAnalysisValue(Value *V, Loop *L) override; 188 189 /// Simple Analysis hook. Delete loop L from alias set map. 190 void deleteAnalysisLoop(Loop *L) override; 191 }; 192 } 193 194 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, 195 LoopStandardAnalysisResults &AR, LPMUpdater &) { 196 const auto &FAM = 197 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 198 Function *F = L.getHeader()->getParent(); 199 200 auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); 201 // FIXME: This should probably be optional rather than required. 202 if (!ORE) 203 report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " 204 "cached at a higher level"); 205 206 LoopInvariantCodeMotion LICM; 207 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) 208 return PreservedAnalyses::all(); 209 210 auto PA = getLoopPassPreservedAnalyses(); 211 PA.preserveSet<CFGAnalyses>(); 212 return PA; 213 } 214 215 char LegacyLICMPass::ID = 0; 216 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", 217 false, false) 218 INITIALIZE_PASS_DEPENDENCY(LoopPass) 219 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 220 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, 221 false) 222 223 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } 224 225 /// Hoist expressions out of the specified loop. Note, alias info for inner 226 /// loop is not preserved so it is not a good idea to run LICM multiple 227 /// times on one loop. 228 /// We should delete AST for inner loops in the new pass manager to avoid 229 /// memory leak. 230 /// 231 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, 232 LoopInfo *LI, DominatorTree *DT, 233 TargetLibraryInfo *TLI, 234 ScalarEvolution *SE, 235 OptimizationRemarkEmitter *ORE, 236 bool DeleteAST) { 237 bool Changed = false; 238 239 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); 240 241 AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); 242 243 // Get the preheader block to move instructions into... 244 BasicBlock *Preheader = L->getLoopPreheader(); 245 246 // Compute loop safety information. 247 LoopSafetyInfo SafetyInfo; 248 computeLoopSafetyInfo(&SafetyInfo, L); 249 250 // We want to visit all of the instructions in this loop... that are not parts 251 // of our subloops (they have already had their invariants hoisted out of 252 // their loop, into this loop, so there is no need to process the BODIES of 253 // the subloops). 254 // 255 // Traverse the body of the loop in depth first order on the dominator tree so 256 // that we are guaranteed to see definitions before we see uses. This allows 257 // us to sink instructions in one pass, without iteration. After sinking 258 // instructions, we perform another pass to hoist them out of the loop. 259 // 260 if (L->hasDedicatedExits()) 261 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 262 CurAST, &SafetyInfo, ORE); 263 if (Preheader) 264 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, 265 CurAST, &SafetyInfo, ORE); 266 267 // Now that all loop invariants have been removed from the loop, promote any 268 // memory references to scalars that we can. 269 // Don't sink stores from loops without dedicated block exits. Exits 270 // containing indirect branches are not transformed by loop simplify, 271 // make sure we catch that. An additional load may be generated in the 272 // preheader for SSA updater, so also avoid sinking when no preheader 273 // is available. 274 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 275 // Figure out the loop exits and their insertion points 276 SmallVector<BasicBlock *, 8> ExitBlocks; 277 L->getUniqueExitBlocks(ExitBlocks); 278 279 // We can't insert into a catchswitch. 280 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { 281 return isa<CatchSwitchInst>(Exit->getTerminator()); 282 }); 283 284 if (!HasCatchSwitch) { 285 SmallVector<Instruction *, 8> InsertPts; 286 InsertPts.reserve(ExitBlocks.size()); 287 for (BasicBlock *ExitBlock : ExitBlocks) 288 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 289 290 PredIteratorCache PIC; 291 292 bool Promoted = false; 293 294 // Loop over all of the alias sets in the tracker object. 295 for (AliasSet &AS : *CurAST) 296 Promoted |= 297 promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, 298 TLI, L, CurAST, &SafetyInfo, ORE); 299 300 // Once we have promoted values across the loop body we have to 301 // recursively reform LCSSA as any nested loop may now have values defined 302 // within the loop used in the outer loop. 303 // FIXME: This is really heavy handed. It would be a bit better to use an 304 // SSAUpdater strategy during promotion that was LCSSA aware and reformed 305 // it as it went. 306 if (Promoted) 307 formLCSSARecursively(*L, *DT, LI, SE); 308 309 Changed |= Promoted; 310 } 311 } 312 313 // Check that neither this loop nor its parent have had LCSSA broken. LICM is 314 // specifically moving instructions across the loop boundary and so it is 315 // especially in need of sanity checking here. 316 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); 317 assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && 318 "Parent loop not left in LCSSA form after LICM!"); 319 320 // If this loop is nested inside of another one, save the alias information 321 // for when we process the outer loop. 322 if (L->getParentLoop() && !DeleteAST) 323 LoopToAliasSetMap[L] = CurAST; 324 else 325 delete CurAST; 326 327 if (Changed && SE) 328 SE->forgetLoopDispositions(L); 329 return Changed; 330 } 331 332 /// Walk the specified region of the CFG (defined by all blocks dominated by 333 /// the specified block, and that are in the current loop) in reverse depth 334 /// first order w.r.t the DominatorTree. This allows us to visit uses before 335 /// definitions, allowing us to sink a loop body in one pass without iteration. 336 /// 337 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 338 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 339 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 340 OptimizationRemarkEmitter *ORE) { 341 342 // Verify inputs. 343 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 344 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 345 "Unexpected input to sinkRegion"); 346 347 BasicBlock *BB = N->getBlock(); 348 // If this subregion is not in the top level loop at all, exit. 349 if (!CurLoop->contains(BB)) 350 return false; 351 352 // We are processing blocks in reverse dfo, so process children first. 353 bool Changed = false; 354 const std::vector<DomTreeNode *> &Children = N->getChildren(); 355 for (DomTreeNode *Child : Children) 356 Changed |= 357 sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); 358 359 // Only need to process the contents of this block if it is not part of a 360 // subloop (which would already have been processed). 361 if (inSubLoop(BB, CurLoop, LI)) 362 return Changed; 363 364 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { 365 Instruction &I = *--II; 366 367 // If the instruction is dead, we would try to sink it because it isn't used 368 // in the loop, instead, just delete it. 369 if (isInstructionTriviallyDead(&I, TLI)) { 370 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 371 ++II; 372 CurAST->deleteValue(&I); 373 I.eraseFromParent(); 374 Changed = true; 375 continue; 376 } 377 378 // Check to see if we can sink this instruction to the exit blocks 379 // of the loop. We can do this if the all users of the instruction are 380 // outside of the loop. In this case, it doesn't even matter if the 381 // operands of the instruction are loop invariant. 382 // 383 if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && 384 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { 385 ++II; 386 Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); 387 } 388 } 389 return Changed; 390 } 391 392 /// Walk the specified region of the CFG (defined by all blocks dominated by 393 /// the specified block, and that are in the current loop) in depth first 394 /// order w.r.t the DominatorTree. This allows us to visit definitions before 395 /// uses, allowing us to hoist a loop body in one pass without iteration. 396 /// 397 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, 398 DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, 399 AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 400 OptimizationRemarkEmitter *ORE) { 401 // Verify inputs. 402 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && 403 CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && 404 "Unexpected input to hoistRegion"); 405 406 BasicBlock *BB = N->getBlock(); 407 408 // If this subregion is not in the top level loop at all, exit. 409 if (!CurLoop->contains(BB)) 410 return false; 411 412 // Only need to process the contents of this block if it is not part of a 413 // subloop (which would already have been processed). 414 bool Changed = false; 415 if (!inSubLoop(BB, CurLoop, LI)) 416 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { 417 Instruction &I = *II++; 418 // Try constant folding this instruction. If all the operands are 419 // constants, it is technically hoistable, but it would be better to just 420 // fold it. 421 if (Constant *C = ConstantFoldInstruction( 422 &I, I.getModule()->getDataLayout(), TLI)) { 423 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 424 CurAST->copyValue(&I, C); 425 I.replaceAllUsesWith(C); 426 if (isInstructionTriviallyDead(&I, TLI)) { 427 CurAST->deleteValue(&I); 428 I.eraseFromParent(); 429 } 430 Changed = true; 431 continue; 432 } 433 434 // Attempt to remove floating point division out of the loop by converting 435 // it to a reciprocal multiplication. 436 if (I.getOpcode() == Instruction::FDiv && 437 CurLoop->isLoopInvariant(I.getOperand(1)) && 438 I.hasAllowReciprocal()) { 439 auto Divisor = I.getOperand(1); 440 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); 441 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); 442 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); 443 ReciprocalDivisor->insertBefore(&I); 444 445 auto Product = BinaryOperator::CreateFMul(I.getOperand(0), 446 ReciprocalDivisor); 447 Product->setFastMathFlags(I.getFastMathFlags()); 448 Product->insertAfter(&I); 449 I.replaceAllUsesWith(Product); 450 I.eraseFromParent(); 451 452 hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); 453 Changed = true; 454 continue; 455 } 456 457 // Try hoisting the instruction out to the preheader. We can only do this 458 // if all of the operands of the instruction are loop invariant and if it 459 // is safe to hoist the instruction. 460 // 461 if (CurLoop->hasLoopInvariantOperands(&I) && 462 canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && 463 isSafeToExecuteUnconditionally( 464 I, DT, CurLoop, SafetyInfo, ORE, 465 CurLoop->getLoopPreheader()->getTerminator())) 466 Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); 467 } 468 469 const std::vector<DomTreeNode *> &Children = N->getChildren(); 470 for (DomTreeNode *Child : Children) 471 Changed |= 472 hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); 473 return Changed; 474 } 475 476 /// Computes loop safety information, checks loop body & header 477 /// for the possibility of may throw exception. 478 /// 479 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { 480 assert(CurLoop != nullptr && "CurLoop cant be null"); 481 BasicBlock *Header = CurLoop->getHeader(); 482 // Setting default safety values. 483 SafetyInfo->MayThrow = false; 484 SafetyInfo->HeaderMayThrow = false; 485 // Iterate over header and compute safety info. 486 for (BasicBlock::iterator I = Header->begin(), E = Header->end(); 487 (I != E) && !SafetyInfo->HeaderMayThrow; ++I) 488 SafetyInfo->HeaderMayThrow |= 489 !isGuaranteedToTransferExecutionToSuccessor(&*I); 490 491 SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; 492 // Iterate over loop instructions and compute safety info. 493 // Skip header as it has been computed and stored in HeaderMayThrow. 494 // The first block in loopinfo.Blocks is guaranteed to be the header. 495 assert(Header == *CurLoop->getBlocks().begin() && "First block must be header"); 496 for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), 497 BBE = CurLoop->block_end(); 498 (BB != BBE) && !SafetyInfo->MayThrow; ++BB) 499 for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); 500 (I != E) && !SafetyInfo->MayThrow; ++I) 501 SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); 502 503 // Compute funclet colors if we might sink/hoist in a function with a funclet 504 // personality routine. 505 Function *Fn = CurLoop->getHeader()->getParent(); 506 if (Fn->hasPersonalityFn()) 507 if (Constant *PersonalityFn = Fn->getPersonalityFn()) 508 if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) 509 SafetyInfo->BlockColors = colorEHFunclets(*Fn); 510 } 511 512 // Return true if LI is invariant within scope of the loop. LI is invariant if 513 // CurLoop is dominated by an invariant.start representing the same memory location 514 // and size as the memory location LI loads from, and also the invariant.start 515 // has no uses. 516 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, 517 Loop *CurLoop) { 518 Value *Addr = LI->getOperand(0); 519 const DataLayout &DL = LI->getModule()->getDataLayout(); 520 const uint32_t LocSizeInBits = DL.getTypeSizeInBits( 521 cast<PointerType>(Addr->getType())->getElementType()); 522 523 // if the type is i8 addrspace(x)*, we know this is the type of 524 // llvm.invariant.start operand 525 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), 526 LI->getPointerAddressSpace()); 527 unsigned BitcastsVisited = 0; 528 // Look through bitcasts until we reach the i8* type (this is invariant.start 529 // operand type). 530 while (Addr->getType() != PtrInt8Ty) { 531 auto *BC = dyn_cast<BitCastInst>(Addr); 532 // Avoid traversing high number of bitcast uses. 533 if (++BitcastsVisited > MaxNumUsesTraversed || !BC) 534 return false; 535 Addr = BC->getOperand(0); 536 } 537 538 unsigned UsesVisited = 0; 539 // Traverse all uses of the load operand value, to see if invariant.start is 540 // one of the uses, and whether it dominates the load instruction. 541 for (auto *U : Addr->users()) { 542 // Avoid traversing for Load operand with high number of users. 543 if (++UsesVisited > MaxNumUsesTraversed) 544 return false; 545 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 546 // If there are escaping uses of invariant.start instruction, the load maybe 547 // non-invariant. 548 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || 549 !II->use_empty()) 550 continue; 551 unsigned InvariantSizeInBits = 552 cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; 553 // Confirm the invariant.start location size contains the load operand size 554 // in bits. Also, the invariant.start should dominate the load, and we 555 // should not hoist the load out of a loop that contains this dominating 556 // invariant.start. 557 if (LocSizeInBits <= InvariantSizeInBits && 558 DT->properlyDominates(II->getParent(), CurLoop->getHeader())) 559 return true; 560 } 561 562 return false; 563 } 564 565 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, 566 Loop *CurLoop, AliasSetTracker *CurAST, 567 LoopSafetyInfo *SafetyInfo, 568 OptimizationRemarkEmitter *ORE) { 569 // Loads have extra constraints we have to verify before we can hoist them. 570 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 571 if (!LI->isUnordered()) 572 return false; // Don't hoist volatile/atomic loads! 573 574 // Loads from constant memory are always safe to move, even if they end up 575 // in the same alias set as something that ends up being modified. 576 if (AA->pointsToConstantMemory(LI->getOperand(0))) 577 return true; 578 if (LI->getMetadata(LLVMContext::MD_invariant_load)) 579 return true; 580 581 // This checks for an invariant.start dominating the load. 582 if (isLoadInvariantInLoop(LI, DT, CurLoop)) 583 return true; 584 585 // Don't hoist loads which have may-aliased stores in loop. 586 uint64_t Size = 0; 587 if (LI->getType()->isSized()) 588 Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); 589 590 AAMDNodes AAInfo; 591 LI->getAAMetadata(AAInfo); 592 593 bool Invalidated = 594 pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); 595 // Check loop-invariant address because this may also be a sinkable load 596 // whose address is not necessarily loop-invariant. 597 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) 598 ORE->emit(OptimizationRemarkMissed( 599 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) 600 << "failed to move load with loop-invariant address " 601 "because the loop may invalidate its value"); 602 603 return !Invalidated; 604 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 605 // Don't sink or hoist dbg info; it's legal, but not useful. 606 if (isa<DbgInfoIntrinsic>(I)) 607 return false; 608 609 // Don't sink calls which can throw. 610 if (CI->mayThrow()) 611 return false; 612 613 // Handle simple cases by querying alias analysis. 614 FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); 615 if (Behavior == FMRB_DoesNotAccessMemory) 616 return true; 617 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 618 // A readonly argmemonly function only reads from memory pointed to by 619 // it's arguments with arbitrary offsets. If we can prove there are no 620 // writes to this memory in the loop, we can hoist or sink. 621 if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { 622 for (Value *Op : CI->arg_operands()) 623 if (Op->getType()->isPointerTy() && 624 pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, 625 AAMDNodes(), CurAST)) 626 return false; 627 return true; 628 } 629 // If this call only reads from memory and there are no writes to memory 630 // in the loop, we can hoist or sink the call as appropriate. 631 bool FoundMod = false; 632 for (AliasSet &AS : *CurAST) { 633 if (!AS.isForwardingAliasSet() && AS.isMod()) { 634 FoundMod = true; 635 break; 636 } 637 } 638 if (!FoundMod) 639 return true; 640 } 641 642 // FIXME: This should use mod/ref information to see if we can hoist or 643 // sink the call. 644 645 return false; 646 } 647 648 // Only these instructions are hoistable/sinkable. 649 if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && 650 !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && 651 !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && 652 !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && 653 !isa<InsertValueInst>(I)) 654 return false; 655 656 // SafetyInfo is nullptr if we are checking for sinking from preheader to 657 // loop body. It will be always safe as there is no speculative execution. 658 if (!SafetyInfo) 659 return true; 660 661 // TODO: Plumb the context instruction through to make hoisting and sinking 662 // more powerful. Hoisting of loads already works due to the special casing 663 // above. 664 return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); 665 } 666 667 /// Returns true if a PHINode is a trivially replaceable with an 668 /// Instruction. 669 /// This is true when all incoming values are that instruction. 670 /// This pattern occurs most often with LCSSA PHI nodes. 671 /// 672 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { 673 for (const Value *IncValue : PN.incoming_values()) 674 if (IncValue != &I) 675 return false; 676 677 return true; 678 } 679 680 /// Return true if the only users of this instruction are outside of 681 /// the loop. If this is true, we can sink the instruction to the exit 682 /// blocks of the loop. 683 /// 684 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, 685 const LoopSafetyInfo *SafetyInfo) { 686 const auto &BlockColors = SafetyInfo->BlockColors; 687 for (const User *U : I.users()) { 688 const Instruction *UI = cast<Instruction>(U); 689 if (const PHINode *PN = dyn_cast<PHINode>(UI)) { 690 const BasicBlock *BB = PN->getParent(); 691 // We cannot sink uses in catchswitches. 692 if (isa<CatchSwitchInst>(BB->getTerminator())) 693 return false; 694 695 // We need to sink a callsite to a unique funclet. Avoid sinking if the 696 // phi use is too muddled. 697 if (isa<CallInst>(I)) 698 if (!BlockColors.empty() && 699 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) 700 return false; 701 702 // A PHI node where all of the incoming values are this instruction are 703 // special -- they can just be RAUW'ed with the instruction and thus 704 // don't require a use in the predecessor. This is a particular important 705 // special case because it is the pattern found in LCSSA form. 706 if (isTriviallyReplacablePHI(*PN, I)) { 707 if (CurLoop->contains(PN)) 708 return false; 709 else 710 continue; 711 } 712 713 // Otherwise, PHI node uses occur in predecessor blocks if the incoming 714 // values. Check for such a use being inside the loop. 715 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 716 if (PN->getIncomingValue(i) == &I) 717 if (CurLoop->contains(PN->getIncomingBlock(i))) 718 return false; 719 720 continue; 721 } 722 723 if (CurLoop->contains(UI)) 724 return false; 725 } 726 return true; 727 } 728 729 static Instruction * 730 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, 731 const LoopInfo *LI, 732 const LoopSafetyInfo *SafetyInfo) { 733 Instruction *New; 734 if (auto *CI = dyn_cast<CallInst>(&I)) { 735 const auto &BlockColors = SafetyInfo->BlockColors; 736 737 // Sinking call-sites need to be handled differently from other 738 // instructions. The cloned call-site needs a funclet bundle operand 739 // appropriate for it's location in the CFG. 740 SmallVector<OperandBundleDef, 1> OpBundles; 741 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); 742 BundleIdx != BundleEnd; ++BundleIdx) { 743 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); 744 if (Bundle.getTagID() == LLVMContext::OB_funclet) 745 continue; 746 747 OpBundles.emplace_back(Bundle); 748 } 749 750 if (!BlockColors.empty()) { 751 const ColorVector &CV = BlockColors.find(&ExitBlock)->second; 752 assert(CV.size() == 1 && "non-unique color for exit block!"); 753 BasicBlock *BBColor = CV.front(); 754 Instruction *EHPad = BBColor->getFirstNonPHI(); 755 if (EHPad->isEHPad()) 756 OpBundles.emplace_back("funclet", EHPad); 757 } 758 759 New = CallInst::Create(CI, OpBundles); 760 } else { 761 New = I.clone(); 762 } 763 764 ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); 765 if (!I.getName().empty()) 766 New->setName(I.getName() + ".le"); 767 768 // Build LCSSA PHI nodes for any in-loop operands. Note that this is 769 // particularly cheap because we can rip off the PHI node that we're 770 // replacing for the number and blocks of the predecessors. 771 // OPT: If this shows up in a profile, we can instead finish sinking all 772 // invariant instructions, and then walk their operands to re-establish 773 // LCSSA. That will eliminate creating PHI nodes just to nuke them when 774 // sinking bottom-up. 775 for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; 776 ++OI) 777 if (Instruction *OInst = dyn_cast<Instruction>(*OI)) 778 if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) 779 if (!OLoop->contains(&PN)) { 780 PHINode *OpPN = 781 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), 782 OInst->getName() + ".lcssa", &ExitBlock.front()); 783 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 784 OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); 785 *OI = OpPN; 786 } 787 return New; 788 } 789 790 /// When an instruction is found to only be used outside of the loop, this 791 /// function moves it to the exit blocks and patches up SSA form as needed. 792 /// This method is guaranteed to remove the original instruction from its 793 /// position, and may either delete it or move it to outside of the loop. 794 /// 795 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, 796 const Loop *CurLoop, AliasSetTracker *CurAST, 797 const LoopSafetyInfo *SafetyInfo, 798 OptimizationRemarkEmitter *ORE) { 799 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 800 ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) 801 << "sinking " << ore::NV("Inst", &I)); 802 bool Changed = false; 803 if (isa<LoadInst>(I)) 804 ++NumMovedLoads; 805 else if (isa<CallInst>(I)) 806 ++NumMovedCalls; 807 ++NumSunk; 808 Changed = true; 809 810 #ifndef NDEBUG 811 SmallVector<BasicBlock *, 32> ExitBlocks; 812 CurLoop->getUniqueExitBlocks(ExitBlocks); 813 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 814 ExitBlocks.end()); 815 #endif 816 817 // Clones of this instruction. Don't create more than one per exit block! 818 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; 819 820 // If this instruction is only used outside of the loop, then all users are 821 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of 822 // the instruction. 823 while (!I.use_empty()) { 824 Value::user_iterator UI = I.user_begin(); 825 auto *User = cast<Instruction>(*UI); 826 if (!DT->isReachableFromEntry(User->getParent())) { 827 User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); 828 continue; 829 } 830 // The user must be a PHI node. 831 PHINode *PN = cast<PHINode>(User); 832 833 // Surprisingly, instructions can be used outside of loops without any 834 // exits. This can only happen in PHI nodes if the incoming block is 835 // unreachable. 836 Use &U = UI.getUse(); 837 BasicBlock *BB = PN->getIncomingBlock(U); 838 if (!DT->isReachableFromEntry(BB)) { 839 U = UndefValue::get(I.getType()); 840 continue; 841 } 842 843 BasicBlock *ExitBlock = PN->getParent(); 844 assert(ExitBlockSet.count(ExitBlock) && 845 "The LCSSA PHI is not in an exit block!"); 846 847 Instruction *New; 848 auto It = SunkCopies.find(ExitBlock); 849 if (It != SunkCopies.end()) 850 New = It->second; 851 else 852 New = SunkCopies[ExitBlock] = 853 CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); 854 855 PN->replaceAllUsesWith(New); 856 PN->eraseFromParent(); 857 } 858 859 CurAST->deleteValue(&I); 860 I.eraseFromParent(); 861 return Changed; 862 } 863 864 /// When an instruction is found to only use loop invariant operands that 865 /// is safe to hoist, this instruction is called to do the dirty work. 866 /// 867 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, 868 const LoopSafetyInfo *SafetyInfo, 869 OptimizationRemarkEmitter *ORE) { 870 auto *Preheader = CurLoop->getLoopPreheader(); 871 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I 872 << "\n"); 873 ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) 874 << "hoisting " << ore::NV("Inst", &I)); 875 876 // Metadata can be dependent on conditions we are hoisting above. 877 // Conservatively strip all metadata on the instruction unless we were 878 // guaranteed to execute I if we entered the loop, in which case the metadata 879 // is valid in the loop preheader. 880 if (I.hasMetadataOtherThanDebugLoc() && 881 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning 882 // time in isGuaranteedToExecute if we don't actually have anything to 883 // drop. It is a compile time optimization, not required for correctness. 884 !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) 885 I.dropUnknownNonDebugMetadata(); 886 887 // Move the new node to the Preheader, before its terminator. 888 I.moveBefore(Preheader->getTerminator()); 889 890 // Do not retain debug locations when we are moving instructions to different 891 // basic blocks, because we want to avoid jumpy line tables. Calls, however, 892 // need to retain their debug locs because they may be inlined. 893 // FIXME: How do we retain source locations without causing poor debugging 894 // behavior? 895 if (!isa<CallInst>(I)) 896 I.setDebugLoc(DebugLoc()); 897 898 if (isa<LoadInst>(I)) 899 ++NumMovedLoads; 900 else if (isa<CallInst>(I)) 901 ++NumMovedCalls; 902 ++NumHoisted; 903 return true; 904 } 905 906 /// Only sink or hoist an instruction if it is not a trapping instruction, 907 /// or if the instruction is known not to trap when moved to the preheader. 908 /// or if it is a trapping instruction and is guaranteed to execute. 909 static bool isSafeToExecuteUnconditionally(Instruction &Inst, 910 const DominatorTree *DT, 911 const Loop *CurLoop, 912 const LoopSafetyInfo *SafetyInfo, 913 OptimizationRemarkEmitter *ORE, 914 const Instruction *CtxI) { 915 if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) 916 return true; 917 918 bool GuaranteedToExecute = 919 isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); 920 921 if (!GuaranteedToExecute) { 922 auto *LI = dyn_cast<LoadInst>(&Inst); 923 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) 924 ORE->emit(OptimizationRemarkMissed( 925 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) 926 << "failed to hoist load with loop-invariant address " 927 "because load is conditionally executed"); 928 } 929 930 return GuaranteedToExecute; 931 } 932 933 namespace { 934 class LoopPromoter : public LoadAndStorePromoter { 935 Value *SomePtr; // Designated pointer to store to. 936 SmallPtrSetImpl<Value *> &PointerMustAliases; 937 SmallVectorImpl<BasicBlock *> &LoopExitBlocks; 938 SmallVectorImpl<Instruction *> &LoopInsertPts; 939 PredIteratorCache &PredCache; 940 AliasSetTracker &AST; 941 LoopInfo &LI; 942 DebugLoc DL; 943 int Alignment; 944 bool UnorderedAtomic; 945 AAMDNodes AATags; 946 947 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { 948 if (Instruction *I = dyn_cast<Instruction>(V)) 949 if (Loop *L = LI.getLoopFor(I->getParent())) 950 if (!L->contains(BB)) { 951 // We need to create an LCSSA PHI node for the incoming value and 952 // store that. 953 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), 954 I->getName() + ".lcssa", &BB->front()); 955 for (BasicBlock *Pred : PredCache.get(BB)) 956 PN->addIncoming(I, Pred); 957 return PN; 958 } 959 return V; 960 } 961 962 public: 963 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, 964 SmallPtrSetImpl<Value *> &PMA, 965 SmallVectorImpl<BasicBlock *> &LEB, 966 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, 967 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, 968 bool UnorderedAtomic, const AAMDNodes &AATags) 969 : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), 970 LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), 971 LI(li), DL(std::move(dl)), Alignment(alignment), 972 UnorderedAtomic(UnorderedAtomic),AATags(AATags) {} 973 974 bool isInstInList(Instruction *I, 975 const SmallVectorImpl<Instruction *> &) const override { 976 Value *Ptr; 977 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 978 Ptr = LI->getOperand(0); 979 else 980 Ptr = cast<StoreInst>(I)->getPointerOperand(); 981 return PointerMustAliases.count(Ptr); 982 } 983 984 void doExtraRewritesBeforeFinalDeletion() const override { 985 // Insert stores after in the loop exit blocks. Each exit block gets a 986 // store of the live-out values that feed them. Since we've already told 987 // the SSA updater about the defs in the loop and the preheader 988 // definition, it is all set and we can start using it. 989 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 990 BasicBlock *ExitBlock = LoopExitBlocks[i]; 991 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 992 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); 993 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); 994 Instruction *InsertPos = LoopInsertPts[i]; 995 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); 996 if (UnorderedAtomic) 997 NewSI->setOrdering(AtomicOrdering::Unordered); 998 NewSI->setAlignment(Alignment); 999 NewSI->setDebugLoc(DL); 1000 if (AATags) 1001 NewSI->setAAMetadata(AATags); 1002 } 1003 } 1004 1005 void replaceLoadWithValue(LoadInst *LI, Value *V) const override { 1006 // Update alias analysis. 1007 AST.copyValue(LI, V); 1008 } 1009 void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } 1010 }; 1011 } // end anon namespace 1012 1013 /// Try to promote memory values to scalars by sinking stores out of the 1014 /// loop and moving loads to before the loop. We do this by looping over 1015 /// the stores in the loop, looking for stores to Must pointers which are 1016 /// loop invariant. 1017 /// 1018 bool llvm::promoteLoopAccessesToScalars( 1019 AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks, 1020 SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, 1021 LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, 1022 Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, 1023 OptimizationRemarkEmitter *ORE) { 1024 // Verify inputs. 1025 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && 1026 CurAST != nullptr && SafetyInfo != nullptr && 1027 "Unexpected Input to promoteLoopAccessesToScalars"); 1028 1029 // We can promote this alias set if it has a store, if it is a "Must" alias 1030 // set, if the pointer is loop invariant, and if we are not eliminating any 1031 // volatile loads or stores. 1032 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 1033 AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) 1034 return false; 1035 1036 assert(!AS.empty() && 1037 "Must alias set should have at least one pointer element in it!"); 1038 1039 Value *SomePtr = AS.begin()->getValue(); 1040 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 1041 1042 // It isn't safe to promote a load/store from the loop if the load/store is 1043 // conditional. For example, turning: 1044 // 1045 // for () { if (c) *P += 1; } 1046 // 1047 // into: 1048 // 1049 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 1050 // 1051 // is not safe, because *P may only be valid to access if 'c' is true. 1052 // 1053 // The safety property divides into two parts: 1054 // p1) The memory may not be dereferenceable on entry to the loop. In this 1055 // case, we can't insert the required load in the preheader. 1056 // p2) The memory model does not allow us to insert a store along any dynamic 1057 // path which did not originally have one. 1058 // 1059 // If at least one store is guaranteed to execute, both properties are 1060 // satisfied, and promotion is legal. 1061 // 1062 // This, however, is not a necessary condition. Even if no store/load is 1063 // guaranteed to execute, we can still establish these properties. 1064 // We can establish (p1) by proving that hoisting the load into the preheader 1065 // is safe (i.e. proving dereferenceability on all paths through the loop). We 1066 // can use any access within the alias set to prove dereferenceability, 1067 // since they're all must alias. 1068 // 1069 // There are two ways establish (p2): 1070 // a) Prove the location is thread-local. In this case the memory model 1071 // requirement does not apply, and stores are safe to insert. 1072 // b) Prove a store dominates every exit block. In this case, if an exit 1073 // blocks is reached, the original dynamic path would have taken us through 1074 // the store, so inserting a store into the exit block is safe. Note that this 1075 // is different from the store being guaranteed to execute. For instance, 1076 // if an exception is thrown on the first iteration of the loop, the original 1077 // store is never executed, but the exit blocks are not executed either. 1078 1079 bool DereferenceableInPH = false; 1080 bool SafeToInsertStore = false; 1081 1082 SmallVector<Instruction *, 64> LoopUses; 1083 SmallPtrSet<Value *, 4> PointerMustAliases; 1084 1085 // We start with an alignment of one and try to find instructions that allow 1086 // us to prove better alignment. 1087 unsigned Alignment = 1; 1088 // Keep track of which types of access we see 1089 bool SawUnorderedAtomic = false; 1090 bool SawNotAtomic = false; 1091 AAMDNodes AATags; 1092 1093 const DataLayout &MDL = Preheader->getModule()->getDataLayout(); 1094 1095 // Do we know this object does not escape ? 1096 bool IsKnownNonEscapingObject = false; 1097 if (SafetyInfo->MayThrow) { 1098 // If a loop can throw, we have to insert a store along each unwind edge. 1099 // That said, we can't actually make the unwind edge explicit. Therefore, 1100 // we have to prove that the store is dead along the unwind edge. 1101 // 1102 // If the underlying object is not an alloca, nor a pointer that does not 1103 // escape, then we can not effectively prove that the store is dead along 1104 // the unwind edge. i.e. the caller of this function could have ways to 1105 // access the pointed object. 1106 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1107 // If this is a base pointer we do not understand, simply bail. 1108 // We only handle alloca and return value from alloc-like fn right now. 1109 if (!isa<AllocaInst>(Object)) { 1110 if (!isAllocLikeFn(Object, TLI)) 1111 return false; 1112 // If this is an alloc like fn. There are more constraints we need to verify. 1113 // More specifically, we must make sure that the pointer can not escape. 1114 // 1115 // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped 1116 // even though its not captured by the enclosing function. Standard allocation 1117 // functions like malloc, calloc, and operator new return values which can 1118 // be assumed not to have previously escaped. 1119 if (PointerMayBeCaptured(Object, true, true)) 1120 return false; 1121 IsKnownNonEscapingObject = true; 1122 } 1123 } 1124 1125 // Check that all of the pointers in the alias set have the same type. We 1126 // cannot (yet) promote a memory location that is loaded and stored in 1127 // different sizes. While we are at it, collect alignment and AA info. 1128 for (const auto &ASI : AS) { 1129 Value *ASIV = ASI.getValue(); 1130 PointerMustAliases.insert(ASIV); 1131 1132 // Check that all of the pointers in the alias set have the same type. We 1133 // cannot (yet) promote a memory location that is loaded and stored in 1134 // different sizes. 1135 if (SomePtr->getType() != ASIV->getType()) 1136 return false; 1137 1138 for (User *U : ASIV->users()) { 1139 // Ignore instructions that are outside the loop. 1140 Instruction *UI = dyn_cast<Instruction>(U); 1141 if (!UI || !CurLoop->contains(UI)) 1142 continue; 1143 1144 // If there is an non-load/store instruction in the loop, we can't promote 1145 // it. 1146 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { 1147 assert(!Load->isVolatile() && "AST broken"); 1148 if (!Load->isUnordered()) 1149 return false; 1150 1151 SawUnorderedAtomic |= Load->isAtomic(); 1152 SawNotAtomic |= !Load->isAtomic(); 1153 1154 if (!DereferenceableInPH) 1155 DereferenceableInPH = isSafeToExecuteUnconditionally( 1156 *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); 1157 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { 1158 // Stores *of* the pointer are not interesting, only stores *to* the 1159 // pointer. 1160 if (UI->getOperand(1) != ASIV) 1161 continue; 1162 assert(!Store->isVolatile() && "AST broken"); 1163 if (!Store->isUnordered()) 1164 return false; 1165 1166 SawUnorderedAtomic |= Store->isAtomic(); 1167 SawNotAtomic |= !Store->isAtomic(); 1168 1169 // If the store is guaranteed to execute, both properties are satisfied. 1170 // We may want to check if a store is guaranteed to execute even if we 1171 // already know that promotion is safe, since it may have higher 1172 // alignment than any other guaranteed stores, in which case we can 1173 // raise the alignment on the promoted store. 1174 unsigned InstAlignment = Store->getAlignment(); 1175 if (!InstAlignment) 1176 InstAlignment = 1177 MDL.getABITypeAlignment(Store->getValueOperand()->getType()); 1178 1179 if (!DereferenceableInPH || !SafeToInsertStore || 1180 (InstAlignment > Alignment)) { 1181 if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { 1182 DereferenceableInPH = true; 1183 SafeToInsertStore = true; 1184 Alignment = std::max(Alignment, InstAlignment); 1185 } 1186 } 1187 1188 // If a store dominates all exit blocks, it is safe to sink. 1189 // As explained above, if an exit block was executed, a dominating 1190 // store must have been been executed at least once, so we are not 1191 // introducing stores on paths that did not have them. 1192 // Note that this only looks at explicit exit blocks. If we ever 1193 // start sinking stores into unwind edges (see above), this will break. 1194 if (!SafeToInsertStore) 1195 SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { 1196 return DT->dominates(Store->getParent(), Exit); 1197 }); 1198 1199 // If the store is not guaranteed to execute, we may still get 1200 // deref info through it. 1201 if (!DereferenceableInPH) { 1202 DereferenceableInPH = isDereferenceableAndAlignedPointer( 1203 Store->getPointerOperand(), Store->getAlignment(), MDL, 1204 Preheader->getTerminator(), DT); 1205 } 1206 } else 1207 return false; // Not a load or store. 1208 1209 // Merge the AA tags. 1210 if (LoopUses.empty()) { 1211 // On the first load/store, just take its AA tags. 1212 UI->getAAMetadata(AATags); 1213 } else if (AATags) { 1214 UI->getAAMetadata(AATags, /* Merge = */ true); 1215 } 1216 1217 LoopUses.push_back(UI); 1218 } 1219 } 1220 1221 // If we found both an unordered atomic instruction and a non-atomic memory 1222 // access, bail. We can't blindly promote non-atomic to atomic since we 1223 // might not be able to lower the result. We can't downgrade since that 1224 // would violate memory model. Also, align 0 is an error for atomics. 1225 if (SawUnorderedAtomic && SawNotAtomic) 1226 return false; 1227 1228 // If we couldn't prove we can hoist the load, bail. 1229 if (!DereferenceableInPH) 1230 return false; 1231 1232 // We know we can hoist the load, but don't have a guaranteed store. 1233 // Check whether the location is thread-local. If it is, then we can insert 1234 // stores along paths which originally didn't have them without violating the 1235 // memory model. 1236 if (!SafeToInsertStore) { 1237 // If this is a known non-escaping object, it is safe to insert the stores. 1238 if (IsKnownNonEscapingObject) 1239 SafeToInsertStore = true; 1240 else { 1241 Value *Object = GetUnderlyingObject(SomePtr, MDL); 1242 SafeToInsertStore = 1243 (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && 1244 !PointerMayBeCaptured(Object, true, true); 1245 } 1246 } 1247 1248 // If we've still failed to prove we can sink the store, give up. 1249 if (!SafeToInsertStore) 1250 return false; 1251 1252 // Otherwise, this is safe to promote, lets do it! 1253 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr 1254 << '\n'); 1255 ORE->emit( 1256 OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) 1257 << "Moving accesses to memory location out of the loop"); 1258 ++NumPromoted; 1259 1260 // Grab a debug location for the inserted loads/stores; given that the 1261 // inserted loads/stores have little relation to the original loads/stores, 1262 // this code just arbitrarily picks a location from one, since any debug 1263 // location is better than none. 1264 DebugLoc DL = LoopUses[0]->getDebugLoc(); 1265 1266 // We use the SSAUpdater interface to insert phi nodes as required. 1267 SmallVector<PHINode *, 16> NewPHIs; 1268 SSAUpdater SSA(&NewPHIs); 1269 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 1270 InsertPts, PIC, *CurAST, *LI, DL, Alignment, 1271 SawUnorderedAtomic, AATags); 1272 1273 // Set up the preheader to have a definition of the value. It is the live-out 1274 // value from the preheader that uses in the loop will use. 1275 LoadInst *PreheaderLoad = new LoadInst( 1276 SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); 1277 if (SawUnorderedAtomic) 1278 PreheaderLoad->setOrdering(AtomicOrdering::Unordered); 1279 PreheaderLoad->setAlignment(Alignment); 1280 PreheaderLoad->setDebugLoc(DL); 1281 if (AATags) 1282 PreheaderLoad->setAAMetadata(AATags); 1283 SSA.AddAvailableValue(Preheader, PreheaderLoad); 1284 1285 // Rewrite all the loads in the loop and remember all the definitions from 1286 // stores in the loop. 1287 Promoter.run(LoopUses); 1288 1289 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 1290 if (PreheaderLoad->use_empty()) 1291 PreheaderLoad->eraseFromParent(); 1292 1293 return true; 1294 } 1295 1296 /// Returns an owning pointer to an alias set which incorporates aliasing info 1297 /// from L and all subloops of L. 1298 /// FIXME: In new pass manager, there is no helper function to handle loop 1299 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed 1300 /// from scratch for every loop. Hook up with the helper functions when 1301 /// available in the new pass manager to avoid redundant computation. 1302 AliasSetTracker * 1303 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, 1304 AliasAnalysis *AA) { 1305 AliasSetTracker *CurAST = nullptr; 1306 SmallVector<Loop *, 4> RecomputeLoops; 1307 for (Loop *InnerL : L->getSubLoops()) { 1308 auto MapI = LoopToAliasSetMap.find(InnerL); 1309 // If the AST for this inner loop is missing it may have been merged into 1310 // some other loop's AST and then that loop unrolled, and so we need to 1311 // recompute it. 1312 if (MapI == LoopToAliasSetMap.end()) { 1313 RecomputeLoops.push_back(InnerL); 1314 continue; 1315 } 1316 AliasSetTracker *InnerAST = MapI->second; 1317 1318 if (CurAST != nullptr) { 1319 // What if InnerLoop was modified by other passes ? 1320 CurAST->add(*InnerAST); 1321 1322 // Once we've incorporated the inner loop's AST into ours, we don't need 1323 // the subloop's anymore. 1324 delete InnerAST; 1325 } else { 1326 CurAST = InnerAST; 1327 } 1328 LoopToAliasSetMap.erase(MapI); 1329 } 1330 if (CurAST == nullptr) 1331 CurAST = new AliasSetTracker(*AA); 1332 1333 auto mergeLoop = [&](Loop *L) { 1334 // Loop over the body of this loop, looking for calls, invokes, and stores. 1335 for (BasicBlock *BB : L->blocks()) 1336 CurAST->add(*BB); // Incorporate the specified basic block 1337 }; 1338 1339 // Add everything from the sub loops that are no longer directly available. 1340 for (Loop *InnerL : RecomputeLoops) 1341 mergeLoop(InnerL); 1342 1343 // And merge in this loop. 1344 mergeLoop(L); 1345 1346 return CurAST; 1347 } 1348 1349 /// Simple analysis hook. Clone alias set info. 1350 /// 1351 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, 1352 Loop *L) { 1353 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1354 if (!AST) 1355 return; 1356 1357 AST->copyValue(From, To); 1358 } 1359 1360 /// Simple Analysis hook. Delete value V from alias set 1361 /// 1362 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { 1363 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1364 if (!AST) 1365 return; 1366 1367 AST->deleteValue(V); 1368 } 1369 1370 /// Simple Analysis hook. Delete value L from alias set map. 1371 /// 1372 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { 1373 AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); 1374 if (!AST) 1375 return; 1376 1377 delete AST; 1378 LICM.getLoopToAliasSetMap().erase(L); 1379 } 1380 1381 /// Return true if the body of this loop may store into the memory 1382 /// location pointed to by V. 1383 /// 1384 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 1385 const AAMDNodes &AAInfo, 1386 AliasSetTracker *CurAST) { 1387 // Check to see if any of the basic blocks in CurLoop invalidate *V. 1388 return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); 1389 } 1390 1391 /// Little predicate that returns true if the specified basic block is in 1392 /// a subloop of the current one, not the current one itself. 1393 /// 1394 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { 1395 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 1396 return LI->getLoopFor(BB) != CurLoop; 1397 } 1398