1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion, attempting to remove as much
11 // code from the body of a loop as possible.  It does this by either hoisting
12 // code into the preheader block, or by sinking code to the exit blocks if it is
13 // safe.  This pass also promotes must-aliased memory locations in the loop to
14 // live in registers, thus hoisting and sinking "invariant" loads and stores.
15 //
16 // This pass uses alias analysis for two purposes:
17 //
18 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
19 //     that a load or call inside of a loop never aliases anything stored to,
20 //     we can hoist it or sink it like any other instruction.
21 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
22 //     the loop, we try to move the store to happen AFTER the loop instead of
23 //     inside of the loop.  This can only happen if a few conditions are true:
24 //       A. The pointer stored through is loop invariant
25 //       B. There are no stores or loads in the loop which _may_ alias the
26 //          pointer.  There are no calls in the loop which mod/ref the pointer.
27 //     If these conditions are true, we can promote the loads and stores in the
28 //     loop of the pointer to use a temporary alloca'd variable.  We then use
29 //     the SSAUpdater to construct the appropriate SSA form for the value.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "llvm/Transforms/Scalar/LICM.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/AliasSetTracker.h"
37 #include "llvm/Analysis/BasicAliasAnalysis.h"
38 #include "llvm/Analysis/CaptureTracking.h"
39 #include "llvm/Analysis/ConstantFolding.h"
40 #include "llvm/Analysis/GlobalsModRef.h"
41 #include "llvm/Analysis/Loads.h"
42 #include "llvm/Analysis/LoopInfo.h"
43 #include "llvm/Analysis/LoopPass.h"
44 #include "llvm/Analysis/MemoryBuiltins.h"
45 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
46 #include "llvm/Analysis/ScalarEvolution.h"
47 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
48 #include "llvm/Analysis/TargetLibraryInfo.h"
49 #include "llvm/Analysis/ValueTracking.h"
50 #include "llvm/IR/CFG.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/PredIteratorCache.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include "llvm/Transforms/Scalar.h"
64 #include "llvm/Transforms/Scalar/LoopPassManager.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include "llvm/Transforms/Utils/LoopUtils.h"
67 #include "llvm/Transforms/Utils/SSAUpdater.h"
68 #include <algorithm>
69 #include <utility>
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "licm"
73 
74 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
75 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
76 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
77 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
78 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
79 
80 /// Memory promotion is enabled by default.
81 static cl::opt<bool>
82     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
83                      cl::desc("Disable memory promotion in LICM pass"));
84 
85 static cl::opt<uint32_t> MaxNumUsesTraversed(
86     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
87     cl::desc("Max num uses visited for identifying load "
88              "invariance in loop using invariant start (default = 8)"));
89 
90 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
91 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
92                             const LoopSafetyInfo *SafetyInfo);
93 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
94                   const LoopSafetyInfo *SafetyInfo,
95                   OptimizationRemarkEmitter *ORE);
96 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
97                  const Loop *CurLoop, AliasSetTracker *CurAST,
98                  const LoopSafetyInfo *SafetyInfo,
99                  OptimizationRemarkEmitter *ORE);
100 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
101                                            const DominatorTree *DT,
102                                            const Loop *CurLoop,
103                                            const LoopSafetyInfo *SafetyInfo,
104                                            OptimizationRemarkEmitter *ORE,
105                                            const Instruction *CtxI = nullptr);
106 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
107                                      const AAMDNodes &AAInfo,
108                                      AliasSetTracker *CurAST);
109 static Instruction *
110 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
111                             const LoopInfo *LI,
112                             const LoopSafetyInfo *SafetyInfo);
113 
114 namespace {
115 struct LoopInvariantCodeMotion {
116   bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
117                  TargetLibraryInfo *TLI, ScalarEvolution *SE,
118                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
119 
120   DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
121     return LoopToAliasSetMap;
122   }
123 
124 private:
125   DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
126 
127   AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
128                                            AliasAnalysis *AA);
129 };
130 
131 struct LegacyLICMPass : public LoopPass {
132   static char ID; // Pass identification, replacement for typeid
133   LegacyLICMPass() : LoopPass(ID) {
134     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
135   }
136 
137   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
138     if (skipLoop(L)) {
139       // If we have run LICM on a previous loop but now we are skipping
140       // (because we've hit the opt-bisect limit), we need to clear the
141       // loop alias information.
142       for (auto &LTAS : LICM.getLoopToAliasSetMap())
143         delete LTAS.second;
144       LICM.getLoopToAliasSetMap().clear();
145       return false;
146     }
147 
148     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
149     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
150     // pass.  Function analyses need to be preserved across loop transformations
151     // but ORE cannot be preserved (see comment before the pass definition).
152     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
153     return LICM.runOnLoop(L,
154                           &getAnalysis<AAResultsWrapperPass>().getAAResults(),
155                           &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
156                           &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
157                           &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
158                           SE ? &SE->getSE() : nullptr, &ORE, false);
159   }
160 
161   /// This transformation requires natural loop information & requires that
162   /// loop preheaders be inserted into the CFG...
163   ///
164   void getAnalysisUsage(AnalysisUsage &AU) const override {
165     AU.setPreservesCFG();
166     AU.addRequired<TargetLibraryInfoWrapperPass>();
167     getLoopAnalysisUsage(AU);
168   }
169 
170   using llvm::Pass::doFinalization;
171 
172   bool doFinalization() override {
173     assert(LICM.getLoopToAliasSetMap().empty() &&
174            "Didn't free loop alias sets");
175     return false;
176   }
177 
178 private:
179   LoopInvariantCodeMotion LICM;
180 
181   /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
182   void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
183                                Loop *L) override;
184 
185   /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
186   /// set.
187   void deleteAnalysisValue(Value *V, Loop *L) override;
188 
189   /// Simple Analysis hook. Delete loop L from alias set map.
190   void deleteAnalysisLoop(Loop *L) override;
191 };
192 }
193 
194 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
195                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
196   const auto &FAM =
197       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
198   Function *F = L.getHeader()->getParent();
199 
200   auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
201   // FIXME: This should probably be optional rather than required.
202   if (!ORE)
203     report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
204                        "cached at a higher level");
205 
206   LoopInvariantCodeMotion LICM;
207   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))
208     return PreservedAnalyses::all();
209 
210   auto PA = getLoopPassPreservedAnalyses();
211   PA.preserveSet<CFGAnalyses>();
212   return PA;
213 }
214 
215 char LegacyLICMPass::ID = 0;
216 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
217                       false, false)
218 INITIALIZE_PASS_DEPENDENCY(LoopPass)
219 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
220 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
221                     false)
222 
223 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
224 
225 /// Hoist expressions out of the specified loop. Note, alias info for inner
226 /// loop is not preserved so it is not a good idea to run LICM multiple
227 /// times on one loop.
228 /// We should delete AST for inner loops in the new pass manager to avoid
229 /// memory leak.
230 ///
231 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
232                                         LoopInfo *LI, DominatorTree *DT,
233                                         TargetLibraryInfo *TLI,
234                                         ScalarEvolution *SE,
235                                         OptimizationRemarkEmitter *ORE,
236                                         bool DeleteAST) {
237   bool Changed = false;
238 
239   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
240 
241   AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
242 
243   // Get the preheader block to move instructions into...
244   BasicBlock *Preheader = L->getLoopPreheader();
245 
246   // Compute loop safety information.
247   LoopSafetyInfo SafetyInfo;
248   computeLoopSafetyInfo(&SafetyInfo, L);
249 
250   // We want to visit all of the instructions in this loop... that are not parts
251   // of our subloops (they have already had their invariants hoisted out of
252   // their loop, into this loop, so there is no need to process the BODIES of
253   // the subloops).
254   //
255   // Traverse the body of the loop in depth first order on the dominator tree so
256   // that we are guaranteed to see definitions before we see uses.  This allows
257   // us to sink instructions in one pass, without iteration.  After sinking
258   // instructions, we perform another pass to hoist them out of the loop.
259   //
260   if (L->hasDedicatedExits())
261     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
262                           CurAST, &SafetyInfo, ORE);
263   if (Preheader)
264     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
265                            CurAST, &SafetyInfo, ORE);
266 
267   // Now that all loop invariants have been removed from the loop, promote any
268   // memory references to scalars that we can.
269   // Don't sink stores from loops without dedicated block exits. Exits
270   // containing indirect branches are not transformed by loop simplify,
271   // make sure we catch that. An additional load may be generated in the
272   // preheader for SSA updater, so also avoid sinking when no preheader
273   // is available.
274   if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
275     // Figure out the loop exits and their insertion points
276     SmallVector<BasicBlock *, 8> ExitBlocks;
277     L->getUniqueExitBlocks(ExitBlocks);
278 
279     // We can't insert into a catchswitch.
280     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
281       return isa<CatchSwitchInst>(Exit->getTerminator());
282     });
283 
284     if (!HasCatchSwitch) {
285       SmallVector<Instruction *, 8> InsertPts;
286       InsertPts.reserve(ExitBlocks.size());
287       for (BasicBlock *ExitBlock : ExitBlocks)
288         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
289 
290       PredIteratorCache PIC;
291 
292       bool Promoted = false;
293 
294       // Loop over all of the alias sets in the tracker object.
295       for (AliasSet &AS : *CurAST)
296         Promoted |=
297             promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT,
298                                          TLI, L, CurAST, &SafetyInfo, ORE);
299 
300       // Once we have promoted values across the loop body we have to
301       // recursively reform LCSSA as any nested loop may now have values defined
302       // within the loop used in the outer loop.
303       // FIXME: This is really heavy handed. It would be a bit better to use an
304       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
305       // it as it went.
306       if (Promoted)
307         formLCSSARecursively(*L, *DT, LI, SE);
308 
309       Changed |= Promoted;
310     }
311   }
312 
313   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
314   // specifically moving instructions across the loop boundary and so it is
315   // especially in need of sanity checking here.
316   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
317   assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
318          "Parent loop not left in LCSSA form after LICM!");
319 
320   // If this loop is nested inside of another one, save the alias information
321   // for when we process the outer loop.
322   if (L->getParentLoop() && !DeleteAST)
323     LoopToAliasSetMap[L] = CurAST;
324   else
325     delete CurAST;
326 
327   if (Changed && SE)
328     SE->forgetLoopDispositions(L);
329   return Changed;
330 }
331 
332 /// Walk the specified region of the CFG (defined by all blocks dominated by
333 /// the specified block, and that are in the current loop) in reverse depth
334 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
335 /// definitions, allowing us to sink a loop body in one pass without iteration.
336 ///
337 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
338                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
339                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
340                       OptimizationRemarkEmitter *ORE) {
341 
342   // Verify inputs.
343   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
344          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
345          "Unexpected input to sinkRegion");
346 
347   BasicBlock *BB = N->getBlock();
348   // If this subregion is not in the top level loop at all, exit.
349   if (!CurLoop->contains(BB))
350     return false;
351 
352   // We are processing blocks in reverse dfo, so process children first.
353   bool Changed = false;
354   const std::vector<DomTreeNode *> &Children = N->getChildren();
355   for (DomTreeNode *Child : Children)
356     Changed |=
357         sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE);
358 
359   // Only need to process the contents of this block if it is not part of a
360   // subloop (which would already have been processed).
361   if (inSubLoop(BB, CurLoop, LI))
362     return Changed;
363 
364   for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
365     Instruction &I = *--II;
366 
367     // If the instruction is dead, we would try to sink it because it isn't used
368     // in the loop, instead, just delete it.
369     if (isInstructionTriviallyDead(&I, TLI)) {
370       DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
371       ++II;
372       CurAST->deleteValue(&I);
373       I.eraseFromParent();
374       Changed = true;
375       continue;
376     }
377 
378     // Check to see if we can sink this instruction to the exit blocks
379     // of the loop.  We can do this if the all users of the instruction are
380     // outside of the loop.  In this case, it doesn't even matter if the
381     // operands of the instruction are loop invariant.
382     //
383     if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
384         canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
385       ++II;
386       Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE);
387     }
388   }
389   return Changed;
390 }
391 
392 /// Walk the specified region of the CFG (defined by all blocks dominated by
393 /// the specified block, and that are in the current loop) in depth first
394 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
395 /// uses, allowing us to hoist a loop body in one pass without iteration.
396 ///
397 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
398                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
399                        AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
400                        OptimizationRemarkEmitter *ORE) {
401   // Verify inputs.
402   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
403          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
404          "Unexpected input to hoistRegion");
405 
406   BasicBlock *BB = N->getBlock();
407 
408   // If this subregion is not in the top level loop at all, exit.
409   if (!CurLoop->contains(BB))
410     return false;
411 
412   // Only need to process the contents of this block if it is not part of a
413   // subloop (which would already have been processed).
414   bool Changed = false;
415   if (!inSubLoop(BB, CurLoop, LI))
416     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
417       Instruction &I = *II++;
418       // Try constant folding this instruction.  If all the operands are
419       // constants, it is technically hoistable, but it would be better to just
420       // fold it.
421       if (Constant *C = ConstantFoldInstruction(
422               &I, I.getModule()->getDataLayout(), TLI)) {
423         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
424         CurAST->copyValue(&I, C);
425         I.replaceAllUsesWith(C);
426         if (isInstructionTriviallyDead(&I, TLI)) {
427           CurAST->deleteValue(&I);
428           I.eraseFromParent();
429         }
430         Changed = true;
431         continue;
432       }
433 
434       // Attempt to remove floating point division out of the loop by converting
435       // it to a reciprocal multiplication.
436       if (I.getOpcode() == Instruction::FDiv &&
437           CurLoop->isLoopInvariant(I.getOperand(1)) &&
438           I.hasAllowReciprocal()) {
439         auto Divisor = I.getOperand(1);
440         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
441         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
442         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
443         ReciprocalDivisor->insertBefore(&I);
444 
445         auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
446                                                   ReciprocalDivisor);
447         Product->setFastMathFlags(I.getFastMathFlags());
448         Product->insertAfter(&I);
449         I.replaceAllUsesWith(Product);
450         I.eraseFromParent();
451 
452         hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
453         Changed = true;
454         continue;
455       }
456 
457       // Try hoisting the instruction out to the preheader.  We can only do this
458       // if all of the operands of the instruction are loop invariant and if it
459       // is safe to hoist the instruction.
460       //
461       if (CurLoop->hasLoopInvariantOperands(&I) &&
462           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
463           isSafeToExecuteUnconditionally(
464               I, DT, CurLoop, SafetyInfo, ORE,
465               CurLoop->getLoopPreheader()->getTerminator()))
466         Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
467     }
468 
469   const std::vector<DomTreeNode *> &Children = N->getChildren();
470   for (DomTreeNode *Child : Children)
471     Changed |=
472         hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE);
473   return Changed;
474 }
475 
476 /// Computes loop safety information, checks loop body & header
477 /// for the possibility of may throw exception.
478 ///
479 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
480   assert(CurLoop != nullptr && "CurLoop cant be null");
481   BasicBlock *Header = CurLoop->getHeader();
482   // Setting default safety values.
483   SafetyInfo->MayThrow = false;
484   SafetyInfo->HeaderMayThrow = false;
485   // Iterate over header and compute safety info.
486   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
487        (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
488     SafetyInfo->HeaderMayThrow |=
489         !isGuaranteedToTransferExecutionToSuccessor(&*I);
490 
491   SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
492   // Iterate over loop instructions and compute safety info.
493   // Skip header as it has been computed and stored in HeaderMayThrow.
494   // The first block in loopinfo.Blocks is guaranteed to be the header.
495   assert(Header == *CurLoop->getBlocks().begin() && "First block must be header");
496   for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
497                             BBE = CurLoop->block_end();
498        (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
499     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
500          (I != E) && !SafetyInfo->MayThrow; ++I)
501       SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
502 
503   // Compute funclet colors if we might sink/hoist in a function with a funclet
504   // personality routine.
505   Function *Fn = CurLoop->getHeader()->getParent();
506   if (Fn->hasPersonalityFn())
507     if (Constant *PersonalityFn = Fn->getPersonalityFn())
508       if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
509         SafetyInfo->BlockColors = colorEHFunclets(*Fn);
510 }
511 
512 // Return true if LI is invariant within scope of the loop. LI is invariant if
513 // CurLoop is dominated by an invariant.start representing the same memory location
514 // and size as the memory location LI loads from, and also the invariant.start
515 // has no uses.
516 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
517                                   Loop *CurLoop) {
518   Value *Addr = LI->getOperand(0);
519   const DataLayout &DL = LI->getModule()->getDataLayout();
520   const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
521       cast<PointerType>(Addr->getType())->getElementType());
522 
523   // if the type is i8 addrspace(x)*, we know this is the type of
524   // llvm.invariant.start operand
525   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
526                                      LI->getPointerAddressSpace());
527   unsigned BitcastsVisited = 0;
528   // Look through bitcasts until we reach the i8* type (this is invariant.start
529   // operand type).
530   while (Addr->getType() != PtrInt8Ty) {
531     auto *BC = dyn_cast<BitCastInst>(Addr);
532     // Avoid traversing high number of bitcast uses.
533     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
534       return false;
535     Addr = BC->getOperand(0);
536   }
537 
538   unsigned UsesVisited = 0;
539   // Traverse all uses of the load operand value, to see if invariant.start is
540   // one of the uses, and whether it dominates the load instruction.
541   for (auto *U : Addr->users()) {
542     // Avoid traversing for Load operand with high number of users.
543     if (++UsesVisited > MaxNumUsesTraversed)
544       return false;
545     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
546     // If there are escaping uses of invariant.start instruction, the load maybe
547     // non-invariant.
548     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
549         !II->use_empty())
550       continue;
551     unsigned InvariantSizeInBits =
552         cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
553     // Confirm the invariant.start location size contains the load operand size
554     // in bits. Also, the invariant.start should dominate the load, and we
555     // should not hoist the load out of a loop that contains this dominating
556     // invariant.start.
557     if (LocSizeInBits <= InvariantSizeInBits &&
558         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
559       return true;
560   }
561 
562   return false;
563 }
564 
565 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
566                               Loop *CurLoop, AliasSetTracker *CurAST,
567                               LoopSafetyInfo *SafetyInfo,
568                               OptimizationRemarkEmitter *ORE) {
569   // Loads have extra constraints we have to verify before we can hoist them.
570   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
571     if (!LI->isUnordered())
572       return false; // Don't hoist volatile/atomic loads!
573 
574     // Loads from constant memory are always safe to move, even if they end up
575     // in the same alias set as something that ends up being modified.
576     if (AA->pointsToConstantMemory(LI->getOperand(0)))
577       return true;
578     if (LI->getMetadata(LLVMContext::MD_invariant_load))
579       return true;
580 
581     // This checks for an invariant.start dominating the load.
582     if (isLoadInvariantInLoop(LI, DT, CurLoop))
583       return true;
584 
585     // Don't hoist loads which have may-aliased stores in loop.
586     uint64_t Size = 0;
587     if (LI->getType()->isSized())
588       Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
589 
590     AAMDNodes AAInfo;
591     LI->getAAMetadata(AAInfo);
592 
593     bool Invalidated =
594         pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
595     // Check loop-invariant address because this may also be a sinkable load
596     // whose address is not necessarily loop-invariant.
597     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
598       ORE->emit(OptimizationRemarkMissed(
599                     DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
600                 << "failed to move load with loop-invariant address "
601                    "because the loop may invalidate its value");
602 
603     return !Invalidated;
604   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
605     // Don't sink or hoist dbg info; it's legal, but not useful.
606     if (isa<DbgInfoIntrinsic>(I))
607       return false;
608 
609     // Don't sink calls which can throw.
610     if (CI->mayThrow())
611       return false;
612 
613     // Handle simple cases by querying alias analysis.
614     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
615     if (Behavior == FMRB_DoesNotAccessMemory)
616       return true;
617     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
618       // A readonly argmemonly function only reads from memory pointed to by
619       // it's arguments with arbitrary offsets.  If we can prove there are no
620       // writes to this memory in the loop, we can hoist or sink.
621       if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
622         for (Value *Op : CI->arg_operands())
623           if (Op->getType()->isPointerTy() &&
624               pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
625                                        AAMDNodes(), CurAST))
626             return false;
627         return true;
628       }
629       // If this call only reads from memory and there are no writes to memory
630       // in the loop, we can hoist or sink the call as appropriate.
631       bool FoundMod = false;
632       for (AliasSet &AS : *CurAST) {
633         if (!AS.isForwardingAliasSet() && AS.isMod()) {
634           FoundMod = true;
635           break;
636         }
637       }
638       if (!FoundMod)
639         return true;
640     }
641 
642     // FIXME: This should use mod/ref information to see if we can hoist or
643     // sink the call.
644 
645     return false;
646   }
647 
648   // Only these instructions are hoistable/sinkable.
649   if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
650       !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
651       !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
652       !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
653       !isa<InsertValueInst>(I))
654     return false;
655 
656   // SafetyInfo is nullptr if we are checking for sinking from preheader to
657   // loop body. It will be always safe as there is no speculative execution.
658   if (!SafetyInfo)
659     return true;
660 
661   // TODO: Plumb the context instruction through to make hoisting and sinking
662   // more powerful. Hoisting of loads already works due to the special casing
663   // above.
664   return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
665 }
666 
667 /// Returns true if a PHINode is a trivially replaceable with an
668 /// Instruction.
669 /// This is true when all incoming values are that instruction.
670 /// This pattern occurs most often with LCSSA PHI nodes.
671 ///
672 static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
673   for (const Value *IncValue : PN.incoming_values())
674     if (IncValue != &I)
675       return false;
676 
677   return true;
678 }
679 
680 /// Return true if the only users of this instruction are outside of
681 /// the loop. If this is true, we can sink the instruction to the exit
682 /// blocks of the loop.
683 ///
684 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
685                             const LoopSafetyInfo *SafetyInfo) {
686   const auto &BlockColors = SafetyInfo->BlockColors;
687   for (const User *U : I.users()) {
688     const Instruction *UI = cast<Instruction>(U);
689     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
690       const BasicBlock *BB = PN->getParent();
691       // We cannot sink uses in catchswitches.
692       if (isa<CatchSwitchInst>(BB->getTerminator()))
693         return false;
694 
695       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
696       // phi use is too muddled.
697       if (isa<CallInst>(I))
698         if (!BlockColors.empty() &&
699             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
700           return false;
701 
702       // A PHI node where all of the incoming values are this instruction are
703       // special -- they can just be RAUW'ed with the instruction and thus
704       // don't require a use in the predecessor. This is a particular important
705       // special case because it is the pattern found in LCSSA form.
706       if (isTriviallyReplacablePHI(*PN, I)) {
707         if (CurLoop->contains(PN))
708           return false;
709         else
710           continue;
711       }
712 
713       // Otherwise, PHI node uses occur in predecessor blocks if the incoming
714       // values. Check for such a use being inside the loop.
715       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
716         if (PN->getIncomingValue(i) == &I)
717           if (CurLoop->contains(PN->getIncomingBlock(i)))
718             return false;
719 
720       continue;
721     }
722 
723     if (CurLoop->contains(UI))
724       return false;
725   }
726   return true;
727 }
728 
729 static Instruction *
730 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
731                             const LoopInfo *LI,
732                             const LoopSafetyInfo *SafetyInfo) {
733   Instruction *New;
734   if (auto *CI = dyn_cast<CallInst>(&I)) {
735     const auto &BlockColors = SafetyInfo->BlockColors;
736 
737     // Sinking call-sites need to be handled differently from other
738     // instructions.  The cloned call-site needs a funclet bundle operand
739     // appropriate for it's location in the CFG.
740     SmallVector<OperandBundleDef, 1> OpBundles;
741     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
742          BundleIdx != BundleEnd; ++BundleIdx) {
743       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
744       if (Bundle.getTagID() == LLVMContext::OB_funclet)
745         continue;
746 
747       OpBundles.emplace_back(Bundle);
748     }
749 
750     if (!BlockColors.empty()) {
751       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
752       assert(CV.size() == 1 && "non-unique color for exit block!");
753       BasicBlock *BBColor = CV.front();
754       Instruction *EHPad = BBColor->getFirstNonPHI();
755       if (EHPad->isEHPad())
756         OpBundles.emplace_back("funclet", EHPad);
757     }
758 
759     New = CallInst::Create(CI, OpBundles);
760   } else {
761     New = I.clone();
762   }
763 
764   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
765   if (!I.getName().empty())
766     New->setName(I.getName() + ".le");
767 
768   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
769   // particularly cheap because we can rip off the PHI node that we're
770   // replacing for the number and blocks of the predecessors.
771   // OPT: If this shows up in a profile, we can instead finish sinking all
772   // invariant instructions, and then walk their operands to re-establish
773   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
774   // sinking bottom-up.
775   for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
776        ++OI)
777     if (Instruction *OInst = dyn_cast<Instruction>(*OI))
778       if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
779         if (!OLoop->contains(&PN)) {
780           PHINode *OpPN =
781               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
782                               OInst->getName() + ".lcssa", &ExitBlock.front());
783           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
784             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
785           *OI = OpPN;
786         }
787   return New;
788 }
789 
790 /// When an instruction is found to only be used outside of the loop, this
791 /// function moves it to the exit blocks and patches up SSA form as needed.
792 /// This method is guaranteed to remove the original instruction from its
793 /// position, and may either delete it or move it to outside of the loop.
794 ///
795 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
796                  const Loop *CurLoop, AliasSetTracker *CurAST,
797                  const LoopSafetyInfo *SafetyInfo,
798                  OptimizationRemarkEmitter *ORE) {
799   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
800   ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
801             << "sinking " << ore::NV("Inst", &I));
802   bool Changed = false;
803   if (isa<LoadInst>(I))
804     ++NumMovedLoads;
805   else if (isa<CallInst>(I))
806     ++NumMovedCalls;
807   ++NumSunk;
808   Changed = true;
809 
810 #ifndef NDEBUG
811   SmallVector<BasicBlock *, 32> ExitBlocks;
812   CurLoop->getUniqueExitBlocks(ExitBlocks);
813   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
814                                              ExitBlocks.end());
815 #endif
816 
817   // Clones of this instruction. Don't create more than one per exit block!
818   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
819 
820   // If this instruction is only used outside of the loop, then all users are
821   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
822   // the instruction.
823   while (!I.use_empty()) {
824     Value::user_iterator UI = I.user_begin();
825     auto *User = cast<Instruction>(*UI);
826     if (!DT->isReachableFromEntry(User->getParent())) {
827       User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
828       continue;
829     }
830     // The user must be a PHI node.
831     PHINode *PN = cast<PHINode>(User);
832 
833     // Surprisingly, instructions can be used outside of loops without any
834     // exits.  This can only happen in PHI nodes if the incoming block is
835     // unreachable.
836     Use &U = UI.getUse();
837     BasicBlock *BB = PN->getIncomingBlock(U);
838     if (!DT->isReachableFromEntry(BB)) {
839       U = UndefValue::get(I.getType());
840       continue;
841     }
842 
843     BasicBlock *ExitBlock = PN->getParent();
844     assert(ExitBlockSet.count(ExitBlock) &&
845            "The LCSSA PHI is not in an exit block!");
846 
847     Instruction *New;
848     auto It = SunkCopies.find(ExitBlock);
849     if (It != SunkCopies.end())
850       New = It->second;
851     else
852       New = SunkCopies[ExitBlock] =
853           CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo);
854 
855     PN->replaceAllUsesWith(New);
856     PN->eraseFromParent();
857   }
858 
859   CurAST->deleteValue(&I);
860   I.eraseFromParent();
861   return Changed;
862 }
863 
864 /// When an instruction is found to only use loop invariant operands that
865 /// is safe to hoist, this instruction is called to do the dirty work.
866 ///
867 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
868                   const LoopSafetyInfo *SafetyInfo,
869                   OptimizationRemarkEmitter *ORE) {
870   auto *Preheader = CurLoop->getLoopPreheader();
871   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
872                << "\n");
873   ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
874             << "hoisting " << ore::NV("Inst", &I));
875 
876   // Metadata can be dependent on conditions we are hoisting above.
877   // Conservatively strip all metadata on the instruction unless we were
878   // guaranteed to execute I if we entered the loop, in which case the metadata
879   // is valid in the loop preheader.
880   if (I.hasMetadataOtherThanDebugLoc() &&
881       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
882       // time in isGuaranteedToExecute if we don't actually have anything to
883       // drop.  It is a compile time optimization, not required for correctness.
884       !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
885     I.dropUnknownNonDebugMetadata();
886 
887   // Move the new node to the Preheader, before its terminator.
888   I.moveBefore(Preheader->getTerminator());
889 
890   // Do not retain debug locations when we are moving instructions to different
891   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
892   // need to retain their debug locs because they may be inlined.
893   // FIXME: How do we retain source locations without causing poor debugging
894   // behavior?
895   if (!isa<CallInst>(I))
896     I.setDebugLoc(DebugLoc());
897 
898   if (isa<LoadInst>(I))
899     ++NumMovedLoads;
900   else if (isa<CallInst>(I))
901     ++NumMovedCalls;
902   ++NumHoisted;
903   return true;
904 }
905 
906 /// Only sink or hoist an instruction if it is not a trapping instruction,
907 /// or if the instruction is known not to trap when moved to the preheader.
908 /// or if it is a trapping instruction and is guaranteed to execute.
909 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
910                                            const DominatorTree *DT,
911                                            const Loop *CurLoop,
912                                            const LoopSafetyInfo *SafetyInfo,
913                                            OptimizationRemarkEmitter *ORE,
914                                            const Instruction *CtxI) {
915   if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
916     return true;
917 
918   bool GuaranteedToExecute =
919       isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
920 
921   if (!GuaranteedToExecute) {
922     auto *LI = dyn_cast<LoadInst>(&Inst);
923     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
924       ORE->emit(OptimizationRemarkMissed(
925                     DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
926                 << "failed to hoist load with loop-invariant address "
927                    "because load is conditionally executed");
928   }
929 
930   return GuaranteedToExecute;
931 }
932 
933 namespace {
934 class LoopPromoter : public LoadAndStorePromoter {
935   Value *SomePtr; // Designated pointer to store to.
936   SmallPtrSetImpl<Value *> &PointerMustAliases;
937   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
938   SmallVectorImpl<Instruction *> &LoopInsertPts;
939   PredIteratorCache &PredCache;
940   AliasSetTracker &AST;
941   LoopInfo &LI;
942   DebugLoc DL;
943   int Alignment;
944   bool UnorderedAtomic;
945   AAMDNodes AATags;
946 
947   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
948     if (Instruction *I = dyn_cast<Instruction>(V))
949       if (Loop *L = LI.getLoopFor(I->getParent()))
950         if (!L->contains(BB)) {
951           // We need to create an LCSSA PHI node for the incoming value and
952           // store that.
953           PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
954                                         I->getName() + ".lcssa", &BB->front());
955           for (BasicBlock *Pred : PredCache.get(BB))
956             PN->addIncoming(I, Pred);
957           return PN;
958         }
959     return V;
960   }
961 
962 public:
963   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
964                SmallPtrSetImpl<Value *> &PMA,
965                SmallVectorImpl<BasicBlock *> &LEB,
966                SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
967                AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
968                bool UnorderedAtomic, const AAMDNodes &AATags)
969       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
970         LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
971         LI(li), DL(std::move(dl)), Alignment(alignment),
972         UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
973 
974   bool isInstInList(Instruction *I,
975                     const SmallVectorImpl<Instruction *> &) const override {
976     Value *Ptr;
977     if (LoadInst *LI = dyn_cast<LoadInst>(I))
978       Ptr = LI->getOperand(0);
979     else
980       Ptr = cast<StoreInst>(I)->getPointerOperand();
981     return PointerMustAliases.count(Ptr);
982   }
983 
984   void doExtraRewritesBeforeFinalDeletion() const override {
985     // Insert stores after in the loop exit blocks.  Each exit block gets a
986     // store of the live-out values that feed them.  Since we've already told
987     // the SSA updater about the defs in the loop and the preheader
988     // definition, it is all set and we can start using it.
989     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
990       BasicBlock *ExitBlock = LoopExitBlocks[i];
991       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
992       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
993       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
994       Instruction *InsertPos = LoopInsertPts[i];
995       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
996       if (UnorderedAtomic)
997         NewSI->setOrdering(AtomicOrdering::Unordered);
998       NewSI->setAlignment(Alignment);
999       NewSI->setDebugLoc(DL);
1000       if (AATags)
1001         NewSI->setAAMetadata(AATags);
1002     }
1003   }
1004 
1005   void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1006     // Update alias analysis.
1007     AST.copyValue(LI, V);
1008   }
1009   void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
1010 };
1011 } // end anon namespace
1012 
1013 /// Try to promote memory values to scalars by sinking stores out of the
1014 /// loop and moving loads to before the loop.  We do this by looping over
1015 /// the stores in the loop, looking for stores to Must pointers which are
1016 /// loop invariant.
1017 ///
1018 bool llvm::promoteLoopAccessesToScalars(
1019     AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks,
1020     SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
1021     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1022     Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
1023     OptimizationRemarkEmitter *ORE) {
1024   // Verify inputs.
1025   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1026          CurAST != nullptr && SafetyInfo != nullptr &&
1027          "Unexpected Input to promoteLoopAccessesToScalars");
1028 
1029   // We can promote this alias set if it has a store, if it is a "Must" alias
1030   // set, if the pointer is loop invariant, and if we are not eliminating any
1031   // volatile loads or stores.
1032   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
1033       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
1034     return false;
1035 
1036   assert(!AS.empty() &&
1037          "Must alias set should have at least one pointer element in it!");
1038 
1039   Value *SomePtr = AS.begin()->getValue();
1040   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1041 
1042   // It isn't safe to promote a load/store from the loop if the load/store is
1043   // conditional.  For example, turning:
1044   //
1045   //    for () { if (c) *P += 1; }
1046   //
1047   // into:
1048   //
1049   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1050   //
1051   // is not safe, because *P may only be valid to access if 'c' is true.
1052   //
1053   // The safety property divides into two parts:
1054   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1055   //    case, we can't insert the required load in the preheader.
1056   // p2) The memory model does not allow us to insert a store along any dynamic
1057   //    path which did not originally have one.
1058   //
1059   // If at least one store is guaranteed to execute, both properties are
1060   // satisfied, and promotion is legal.
1061   //
1062   // This, however, is not a necessary condition. Even if no store/load is
1063   // guaranteed to execute, we can still establish these properties.
1064   // We can establish (p1) by proving that hoisting the load into the preheader
1065   // is safe (i.e. proving dereferenceability on all paths through the loop). We
1066   // can use any access within the alias set to prove dereferenceability,
1067   // since they're all must alias.
1068   //
1069   // There are two ways establish (p2):
1070   // a) Prove the location is thread-local. In this case the memory model
1071   // requirement does not apply, and stores are safe to insert.
1072   // b) Prove a store dominates every exit block. In this case, if an exit
1073   // blocks is reached, the original dynamic path would have taken us through
1074   // the store, so inserting a store into the exit block is safe. Note that this
1075   // is different from the store being guaranteed to execute. For instance,
1076   // if an exception is thrown on the first iteration of the loop, the original
1077   // store is never executed, but the exit blocks are not executed either.
1078 
1079   bool DereferenceableInPH = false;
1080   bool SafeToInsertStore = false;
1081 
1082   SmallVector<Instruction *, 64> LoopUses;
1083   SmallPtrSet<Value *, 4> PointerMustAliases;
1084 
1085   // We start with an alignment of one and try to find instructions that allow
1086   // us to prove better alignment.
1087   unsigned Alignment = 1;
1088   // Keep track of which types of access we see
1089   bool SawUnorderedAtomic = false;
1090   bool SawNotAtomic = false;
1091   AAMDNodes AATags;
1092 
1093   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1094 
1095   // Do we know this object does not escape ?
1096   bool IsKnownNonEscapingObject = false;
1097   if (SafetyInfo->MayThrow) {
1098     // If a loop can throw, we have to insert a store along each unwind edge.
1099     // That said, we can't actually make the unwind edge explicit. Therefore,
1100     // we have to prove that the store is dead along the unwind edge.
1101     //
1102     // If the underlying object is not an alloca, nor a pointer that does not
1103     // escape, then we can not effectively prove that the store is dead along
1104     // the unwind edge. i.e. the caller of this function could have ways to
1105     // access the pointed object.
1106     Value *Object = GetUnderlyingObject(SomePtr, MDL);
1107     // If this is a base pointer we do not understand, simply bail.
1108     // We only handle alloca and return value from alloc-like fn right now.
1109     if (!isa<AllocaInst>(Object)) {
1110         if (!isAllocLikeFn(Object, TLI))
1111           return false;
1112       // If this is an alloc like fn. There are more constraints we need to verify.
1113       // More specifically, we must make sure that the pointer can not escape.
1114       //
1115       // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped
1116       // even though its not captured by the enclosing function. Standard allocation
1117       // functions like malloc, calloc, and operator new return values which can
1118       // be assumed not to have previously escaped.
1119       if (PointerMayBeCaptured(Object, true, true))
1120         return false;
1121       IsKnownNonEscapingObject = true;
1122     }
1123   }
1124 
1125   // Check that all of the pointers in the alias set have the same type.  We
1126   // cannot (yet) promote a memory location that is loaded and stored in
1127   // different sizes.  While we are at it, collect alignment and AA info.
1128   for (const auto &ASI : AS) {
1129     Value *ASIV = ASI.getValue();
1130     PointerMustAliases.insert(ASIV);
1131 
1132     // Check that all of the pointers in the alias set have the same type.  We
1133     // cannot (yet) promote a memory location that is loaded and stored in
1134     // different sizes.
1135     if (SomePtr->getType() != ASIV->getType())
1136       return false;
1137 
1138     for (User *U : ASIV->users()) {
1139       // Ignore instructions that are outside the loop.
1140       Instruction *UI = dyn_cast<Instruction>(U);
1141       if (!UI || !CurLoop->contains(UI))
1142         continue;
1143 
1144       // If there is an non-load/store instruction in the loop, we can't promote
1145       // it.
1146       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1147         assert(!Load->isVolatile() && "AST broken");
1148         if (!Load->isUnordered())
1149           return false;
1150 
1151         SawUnorderedAtomic |= Load->isAtomic();
1152         SawNotAtomic |= !Load->isAtomic();
1153 
1154         if (!DereferenceableInPH)
1155           DereferenceableInPH = isSafeToExecuteUnconditionally(
1156               *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
1157       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1158         // Stores *of* the pointer are not interesting, only stores *to* the
1159         // pointer.
1160         if (UI->getOperand(1) != ASIV)
1161           continue;
1162         assert(!Store->isVolatile() && "AST broken");
1163         if (!Store->isUnordered())
1164           return false;
1165 
1166         SawUnorderedAtomic |= Store->isAtomic();
1167         SawNotAtomic |= !Store->isAtomic();
1168 
1169         // If the store is guaranteed to execute, both properties are satisfied.
1170         // We may want to check if a store is guaranteed to execute even if we
1171         // already know that promotion is safe, since it may have higher
1172         // alignment than any other guaranteed stores, in which case we can
1173         // raise the alignment on the promoted store.
1174         unsigned InstAlignment = Store->getAlignment();
1175         if (!InstAlignment)
1176           InstAlignment =
1177               MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1178 
1179         if (!DereferenceableInPH || !SafeToInsertStore ||
1180             (InstAlignment > Alignment)) {
1181           if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
1182             DereferenceableInPH = true;
1183             SafeToInsertStore = true;
1184             Alignment = std::max(Alignment, InstAlignment);
1185           }
1186         }
1187 
1188         // If a store dominates all exit blocks, it is safe to sink.
1189         // As explained above, if an exit block was executed, a dominating
1190         // store must have been been executed at least once, so we are not
1191         // introducing stores on paths that did not have them.
1192         // Note that this only looks at explicit exit blocks. If we ever
1193         // start sinking stores into unwind edges (see above), this will break.
1194         if (!SafeToInsertStore)
1195           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
1196             return DT->dominates(Store->getParent(), Exit);
1197           });
1198 
1199         // If the store is not guaranteed to execute, we may still get
1200         // deref info through it.
1201         if (!DereferenceableInPH) {
1202           DereferenceableInPH = isDereferenceableAndAlignedPointer(
1203               Store->getPointerOperand(), Store->getAlignment(), MDL,
1204               Preheader->getTerminator(), DT);
1205         }
1206       } else
1207         return false; // Not a load or store.
1208 
1209       // Merge the AA tags.
1210       if (LoopUses.empty()) {
1211         // On the first load/store, just take its AA tags.
1212         UI->getAAMetadata(AATags);
1213       } else if (AATags) {
1214         UI->getAAMetadata(AATags, /* Merge = */ true);
1215       }
1216 
1217       LoopUses.push_back(UI);
1218     }
1219   }
1220 
1221   // If we found both an unordered atomic instruction and a non-atomic memory
1222   // access, bail.  We can't blindly promote non-atomic to atomic since we
1223   // might not be able to lower the result.  We can't downgrade since that
1224   // would violate memory model.  Also, align 0 is an error for atomics.
1225   if (SawUnorderedAtomic && SawNotAtomic)
1226     return false;
1227 
1228   // If we couldn't prove we can hoist the load, bail.
1229   if (!DereferenceableInPH)
1230     return false;
1231 
1232   // We know we can hoist the load, but don't have a guaranteed store.
1233   // Check whether the location is thread-local. If it is, then we can insert
1234   // stores along paths which originally didn't have them without violating the
1235   // memory model.
1236   if (!SafeToInsertStore) {
1237     // If this is a known non-escaping object, it is safe to insert the stores.
1238     if (IsKnownNonEscapingObject)
1239       SafeToInsertStore = true;
1240     else {
1241       Value *Object = GetUnderlyingObject(SomePtr, MDL);
1242       SafeToInsertStore =
1243         (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
1244         !PointerMayBeCaptured(Object, true, true);
1245     }
1246   }
1247 
1248   // If we've still failed to prove we can sink the store, give up.
1249   if (!SafeToInsertStore)
1250     return false;
1251 
1252   // Otherwise, this is safe to promote, lets do it!
1253   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
1254                << '\n');
1255   ORE->emit(
1256       OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0])
1257       << "Moving accesses to memory location out of the loop");
1258   ++NumPromoted;
1259 
1260   // Grab a debug location for the inserted loads/stores; given that the
1261   // inserted loads/stores have little relation to the original loads/stores,
1262   // this code just arbitrarily picks a location from one, since any debug
1263   // location is better than none.
1264   DebugLoc DL = LoopUses[0]->getDebugLoc();
1265 
1266   // We use the SSAUpdater interface to insert phi nodes as required.
1267   SmallVector<PHINode *, 16> NewPHIs;
1268   SSAUpdater SSA(&NewPHIs);
1269   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
1270                         InsertPts, PIC, *CurAST, *LI, DL, Alignment,
1271                         SawUnorderedAtomic, AATags);
1272 
1273   // Set up the preheader to have a definition of the value.  It is the live-out
1274   // value from the preheader that uses in the loop will use.
1275   LoadInst *PreheaderLoad = new LoadInst(
1276       SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
1277   if (SawUnorderedAtomic)
1278     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
1279   PreheaderLoad->setAlignment(Alignment);
1280   PreheaderLoad->setDebugLoc(DL);
1281   if (AATags)
1282     PreheaderLoad->setAAMetadata(AATags);
1283   SSA.AddAvailableValue(Preheader, PreheaderLoad);
1284 
1285   // Rewrite all the loads in the loop and remember all the definitions from
1286   // stores in the loop.
1287   Promoter.run(LoopUses);
1288 
1289   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
1290   if (PreheaderLoad->use_empty())
1291     PreheaderLoad->eraseFromParent();
1292 
1293   return true;
1294 }
1295 
1296 /// Returns an owning pointer to an alias set which incorporates aliasing info
1297 /// from L and all subloops of L.
1298 /// FIXME: In new pass manager, there is no helper function to handle loop
1299 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
1300 /// from scratch for every loop. Hook up with the helper functions when
1301 /// available in the new pass manager to avoid redundant computation.
1302 AliasSetTracker *
1303 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
1304                                                  AliasAnalysis *AA) {
1305   AliasSetTracker *CurAST = nullptr;
1306   SmallVector<Loop *, 4> RecomputeLoops;
1307   for (Loop *InnerL : L->getSubLoops()) {
1308     auto MapI = LoopToAliasSetMap.find(InnerL);
1309     // If the AST for this inner loop is missing it may have been merged into
1310     // some other loop's AST and then that loop unrolled, and so we need to
1311     // recompute it.
1312     if (MapI == LoopToAliasSetMap.end()) {
1313       RecomputeLoops.push_back(InnerL);
1314       continue;
1315     }
1316     AliasSetTracker *InnerAST = MapI->second;
1317 
1318     if (CurAST != nullptr) {
1319       // What if InnerLoop was modified by other passes ?
1320       CurAST->add(*InnerAST);
1321 
1322       // Once we've incorporated the inner loop's AST into ours, we don't need
1323       // the subloop's anymore.
1324       delete InnerAST;
1325     } else {
1326       CurAST = InnerAST;
1327     }
1328     LoopToAliasSetMap.erase(MapI);
1329   }
1330   if (CurAST == nullptr)
1331     CurAST = new AliasSetTracker(*AA);
1332 
1333   auto mergeLoop = [&](Loop *L) {
1334     // Loop over the body of this loop, looking for calls, invokes, and stores.
1335     for (BasicBlock *BB : L->blocks())
1336         CurAST->add(*BB);          // Incorporate the specified basic block
1337   };
1338 
1339   // Add everything from the sub loops that are no longer directly available.
1340   for (Loop *InnerL : RecomputeLoops)
1341     mergeLoop(InnerL);
1342 
1343   // And merge in this loop.
1344   mergeLoop(L);
1345 
1346   return CurAST;
1347 }
1348 
1349 /// Simple analysis hook. Clone alias set info.
1350 ///
1351 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
1352                                              Loop *L) {
1353   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1354   if (!AST)
1355     return;
1356 
1357   AST->copyValue(From, To);
1358 }
1359 
1360 /// Simple Analysis hook. Delete value V from alias set
1361 ///
1362 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
1363   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1364   if (!AST)
1365     return;
1366 
1367   AST->deleteValue(V);
1368 }
1369 
1370 /// Simple Analysis hook. Delete value L from alias set map.
1371 ///
1372 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
1373   AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1374   if (!AST)
1375     return;
1376 
1377   delete AST;
1378   LICM.getLoopToAliasSetMap().erase(L);
1379 }
1380 
1381 /// Return true if the body of this loop may store into the memory
1382 /// location pointed to by V.
1383 ///
1384 static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
1385                                      const AAMDNodes &AAInfo,
1386                                      AliasSetTracker *CurAST) {
1387   // Check to see if any of the basic blocks in CurLoop invalidate *V.
1388   return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
1389 }
1390 
1391 /// Little predicate that returns true if the specified basic block is in
1392 /// a subloop of the current one, not the current one itself.
1393 ///
1394 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
1395   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
1396   return LI->getLoopFor(BB) != CurLoop;
1397 }
1398