1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs loop invariant code motion, attempting to remove as much
11 // code from the body of a loop as possible.  It does this by either hoisting
12 // code into the preheader block, or by sinking code to the exit blocks if it is
13 // safe.  This pass also promotes must-aliased memory locations in the loop to
14 // live in registers, thus hoisting and sinking "invariant" loads and stores.
15 //
16 // This pass uses alias analysis for two purposes:
17 //
18 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
19 //     that a load or call inside of a loop never aliases anything stored to,
20 //     we can hoist it or sink it like any other instruction.
21 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
22 //     the loop, we try to move the store to happen AFTER the loop instead of
23 //     inside of the loop.  This can only happen if a few conditions are true:
24 //       A. The pointer stored through is loop invariant
25 //       B. There are no stores or loads in the loop which _may_ alias the
26 //          pointer.  There are no calls in the loop which mod/ref the pointer.
27 //     If these conditions are true, we can promote the loads and stores in the
28 //     loop of the pointer to use a temporary alloca'd variable.  We then use
29 //     the SSAUpdater to construct the appropriate SSA form for the value.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "llvm/Transforms/Scalar/LICM.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/AliasSetTracker.h"
37 #include "llvm/Analysis/BasicAliasAnalysis.h"
38 #include "llvm/Analysis/CaptureTracking.h"
39 #include "llvm/Analysis/ConstantFolding.h"
40 #include "llvm/Analysis/GlobalsModRef.h"
41 #include "llvm/Analysis/GuardUtils.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
44 #include "llvm/Analysis/LoopPass.h"
45 #include "llvm/Analysis/MemoryBuiltins.h"
46 #include "llvm/Analysis/MemorySSA.h"
47 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
48 #include "llvm/Analysis/ScalarEvolution.h"
49 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
50 #include "llvm/Analysis/TargetLibraryInfo.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include "llvm/Analysis/ValueTracking.h"
53 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DerivedTypes.h"
57 #include "llvm/IR/Dominators.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/LLVMContext.h"
61 #include "llvm/IR/Metadata.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/PredIteratorCache.h"
64 #include "llvm/Support/CommandLine.h"
65 #include "llvm/Support/Debug.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Transforms/Scalar.h"
68 #include "llvm/Transforms/Scalar/LoopPassManager.h"
69 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
70 #include "llvm/Transforms/Utils/LoopUtils.h"
71 #include "llvm/Transforms/Utils/SSAUpdater.h"
72 #include <algorithm>
73 #include <utility>
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "licm"
77 
78 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
79 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
80 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
81 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
82 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
83 
84 /// Memory promotion is enabled by default.
85 static cl::opt<bool>
86     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
87                      cl::desc("Disable memory promotion in LICM pass"));
88 
89 static cl::opt<uint32_t> MaxNumUsesTraversed(
90     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
91     cl::desc("Max num uses visited for identifying load "
92              "invariance in loop using invariant start (default = 8)"));
93 
94 // Default value of zero implies we use the regular alias set tracker mechanism
95 // instead of the cross product using AA to identify aliasing of the memory
96 // location we are interested in.
97 static cl::opt<int>
98 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
99                cl::desc("How many instruction to cross product using AA"));
100 
101 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
102 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
103                                   const LoopSafetyInfo *SafetyInfo,
104                                   TargetTransformInfo *TTI, bool &FreeInLoop);
105 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
106                   ICFLoopSafetyInfo *SafetyInfo,
107                   OptimizationRemarkEmitter *ORE);
108 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
109                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
110                  OptimizationRemarkEmitter *ORE, bool FreeInLoop);
111 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
112                                            const DominatorTree *DT,
113                                            const Loop *CurLoop,
114                                            const LoopSafetyInfo *SafetyInfo,
115                                            OptimizationRemarkEmitter *ORE,
116                                            const Instruction *CtxI = nullptr);
117 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
118                                      AliasSetTracker *CurAST, Loop *CurLoop,
119                                      AliasAnalysis *AA);
120 
121 static Instruction *
122 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
123                             const LoopInfo *LI,
124                             const LoopSafetyInfo *SafetyInfo);
125 
126 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
127                              AliasSetTracker *AST);
128 
129 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
130                                   ICFLoopSafetyInfo &SafetyInfo);
131 
132 namespace {
133 struct LoopInvariantCodeMotion {
134   using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
135   bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
136                  TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
137                  ScalarEvolution *SE, MemorySSA *MSSA,
138                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
139 
140   ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
141 
142 private:
143   ASTrackerMapTy LoopToAliasSetMap;
144 
145   std::unique_ptr<AliasSetTracker>
146   collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
147 };
148 
149 struct LegacyLICMPass : public LoopPass {
150   static char ID; // Pass identification, replacement for typeid
151   LegacyLICMPass() : LoopPass(ID) {
152     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
153   }
154 
155   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
156     if (skipLoop(L)) {
157       // If we have run LICM on a previous loop but now we are skipping
158       // (because we've hit the opt-bisect limit), we need to clear the
159       // loop alias information.
160       LICM.getLoopToAliasSetMap().clear();
161       return false;
162     }
163 
164     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
165     MemorySSA *MSSA = EnableMSSALoopDependency
166                           ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
167                           : nullptr;
168     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
169     // pass.  Function analyses need to be preserved across loop transformations
170     // but ORE cannot be preserved (see comment before the pass definition).
171     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
172     return LICM.runOnLoop(L,
173                           &getAnalysis<AAResultsWrapperPass>().getAAResults(),
174                           &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
175                           &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
176                           &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
177                           &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
178                               *L->getHeader()->getParent()),
179                           SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
180   }
181 
182   /// This transformation requires natural loop information & requires that
183   /// loop preheaders be inserted into the CFG...
184   ///
185   void getAnalysisUsage(AnalysisUsage &AU) const override {
186     AU.addPreserved<DominatorTreeWrapperPass>();
187     AU.addPreserved<LoopInfoWrapperPass>();
188     AU.addRequired<TargetLibraryInfoWrapperPass>();
189     if (EnableMSSALoopDependency)
190       AU.addRequired<MemorySSAWrapperPass>();
191     AU.addRequired<TargetTransformInfoWrapperPass>();
192     getLoopAnalysisUsage(AU);
193   }
194 
195   using llvm::Pass::doFinalization;
196 
197   bool doFinalization() override {
198     assert(LICM.getLoopToAliasSetMap().empty() &&
199            "Didn't free loop alias sets");
200     return false;
201   }
202 
203 private:
204   LoopInvariantCodeMotion LICM;
205 
206   /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
207   void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
208                                Loop *L) override;
209 
210   /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
211   /// set.
212   void deleteAnalysisValue(Value *V, Loop *L) override;
213 
214   /// Simple Analysis hook. Delete loop L from alias set map.
215   void deleteAnalysisLoop(Loop *L) override;
216 };
217 } // namespace
218 
219 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
220                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
221   const auto &FAM =
222       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
223   Function *F = L.getHeader()->getParent();
224 
225   auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
226   // FIXME: This should probably be optional rather than required.
227   if (!ORE)
228     report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
229                        "cached at a higher level");
230 
231   LoopInvariantCodeMotion LICM;
232   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
233                       AR.MSSA, ORE, true))
234     return PreservedAnalyses::all();
235 
236   auto PA = getLoopPassPreservedAnalyses();
237 
238   PA.preserve<DominatorTreeAnalysis>();
239   PA.preserve<LoopAnalysis>();
240 
241   return PA;
242 }
243 
244 char LegacyLICMPass::ID = 0;
245 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
246                       false, false)
247 INITIALIZE_PASS_DEPENDENCY(LoopPass)
248 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
249 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
250 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
251 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
252                     false)
253 
254 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
255 
256 /// Hoist expressions out of the specified loop. Note, alias info for inner
257 /// loop is not preserved so it is not a good idea to run LICM multiple
258 /// times on one loop.
259 /// We should delete AST for inner loops in the new pass manager to avoid
260 /// memory leak.
261 ///
262 bool LoopInvariantCodeMotion::runOnLoop(
263     Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
264     TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
265     MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
266   bool Changed = false;
267 
268   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
269 
270   std::unique_ptr<AliasSetTracker> CurAST = collectAliasInfoForLoop(L, LI, AA);
271 
272   // Get the preheader block to move instructions into...
273   BasicBlock *Preheader = L->getLoopPreheader();
274 
275   // Compute loop safety information.
276   ICFLoopSafetyInfo SafetyInfo(DT);
277   SafetyInfo.computeLoopSafetyInfo(L);
278 
279   // We want to visit all of the instructions in this loop... that are not parts
280   // of our subloops (they have already had their invariants hoisted out of
281   // their loop, into this loop, so there is no need to process the BODIES of
282   // the subloops).
283   //
284   // Traverse the body of the loop in depth first order on the dominator tree so
285   // that we are guaranteed to see definitions before we see uses.  This allows
286   // us to sink instructions in one pass, without iteration.  After sinking
287   // instructions, we perform another pass to hoist them out of the loop.
288   //
289   if (L->hasDedicatedExits())
290     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
291                           CurAST.get(), &SafetyInfo, ORE);
292   if (Preheader)
293     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
294                            CurAST.get(), &SafetyInfo, ORE);
295 
296   // Now that all loop invariants have been removed from the loop, promote any
297   // memory references to scalars that we can.
298   // Don't sink stores from loops without dedicated block exits. Exits
299   // containing indirect branches are not transformed by loop simplify,
300   // make sure we catch that. An additional load may be generated in the
301   // preheader for SSA updater, so also avoid sinking when no preheader
302   // is available.
303   if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
304     // Figure out the loop exits and their insertion points
305     SmallVector<BasicBlock *, 8> ExitBlocks;
306     L->getUniqueExitBlocks(ExitBlocks);
307 
308     // We can't insert into a catchswitch.
309     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
310       return isa<CatchSwitchInst>(Exit->getTerminator());
311     });
312 
313     if (!HasCatchSwitch) {
314       SmallVector<Instruction *, 8> InsertPts;
315       InsertPts.reserve(ExitBlocks.size());
316       for (BasicBlock *ExitBlock : ExitBlocks)
317         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
318 
319       PredIteratorCache PIC;
320 
321       bool Promoted = false;
322 
323       // Loop over all of the alias sets in the tracker object.
324       for (AliasSet &AS : *CurAST) {
325         // We can promote this alias set if it has a store, if it is a "Must"
326         // alias set, if the pointer is loop invariant, and if we are not
327         // eliminating any volatile loads or stores.
328         if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
329             !L->isLoopInvariant(AS.begin()->getValue()))
330           continue;
331 
332         assert(
333             !AS.empty() &&
334             "Must alias set should have at least one pointer element in it!");
335 
336         SmallSetVector<Value *, 8> PointerMustAliases;
337         for (const auto &ASI : AS)
338           PointerMustAliases.insert(ASI.getValue());
339 
340         Promoted |= promoteLoopAccessesToScalars(
341             PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
342             CurAST.get(), &SafetyInfo, ORE);
343       }
344 
345       // Once we have promoted values across the loop body we have to
346       // recursively reform LCSSA as any nested loop may now have values defined
347       // within the loop used in the outer loop.
348       // FIXME: This is really heavy handed. It would be a bit better to use an
349       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
350       // it as it went.
351       if (Promoted)
352         formLCSSARecursively(*L, *DT, LI, SE);
353 
354       Changed |= Promoted;
355     }
356   }
357 
358   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
359   // specifically moving instructions across the loop boundary and so it is
360   // especially in need of sanity checking here.
361   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
362   assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
363          "Parent loop not left in LCSSA form after LICM!");
364 
365   // If this loop is nested inside of another one, save the alias information
366   // for when we process the outer loop.
367   if (L->getParentLoop() && !DeleteAST)
368     LoopToAliasSetMap[L] = std::move(CurAST);
369 
370   if (Changed && SE)
371     SE->forgetLoopDispositions(L);
372   return Changed;
373 }
374 
375 /// Walk the specified region of the CFG (defined by all blocks dominated by
376 /// the specified block, and that are in the current loop) in reverse depth
377 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
378 /// definitions, allowing us to sink a loop body in one pass without iteration.
379 ///
380 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
381                       DominatorTree *DT, TargetLibraryInfo *TLI,
382                       TargetTransformInfo *TTI, Loop *CurLoop,
383                       AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
384                       OptimizationRemarkEmitter *ORE) {
385 
386   // Verify inputs.
387   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
388          CurLoop != nullptr && CurAST && SafetyInfo != nullptr &&
389          "Unexpected input to sinkRegion");
390 
391   // We want to visit children before parents. We will enque all the parents
392   // before their children in the worklist and process the worklist in reverse
393   // order.
394   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
395 
396   bool Changed = false;
397   for (DomTreeNode *DTN : reverse(Worklist)) {
398     BasicBlock *BB = DTN->getBlock();
399     // Only need to process the contents of this block if it is not part of a
400     // subloop (which would already have been processed).
401     if (inSubLoop(BB, CurLoop, LI))
402       continue;
403 
404     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
405       Instruction &I = *--II;
406 
407       // If the instruction is dead, we would try to sink it because it isn't
408       // used in the loop, instead, just delete it.
409       if (isInstructionTriviallyDead(&I, TLI)) {
410         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
411         salvageDebugInfo(I);
412         ++II;
413         eraseInstruction(I, *SafetyInfo, CurAST);
414         Changed = true;
415         continue;
416       }
417 
418       // Check to see if we can sink this instruction to the exit blocks
419       // of the loop.  We can do this if the all users of the instruction are
420       // outside of the loop.  In this case, it doesn't even matter if the
421       // operands of the instruction are loop invariant.
422       //
423       bool FreeInLoop = false;
424       if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
425           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
426           !I.mayHaveSideEffects()) {
427         if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
428           if (!FreeInLoop) {
429             ++II;
430             eraseInstruction(I, *SafetyInfo, CurAST);
431           }
432           Changed = true;
433         }
434       }
435     }
436   }
437   return Changed;
438 }
439 
440 /// Walk the specified region of the CFG (defined by all blocks dominated by
441 /// the specified block, and that are in the current loop) in depth first
442 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
443 /// uses, allowing us to hoist a loop body in one pass without iteration.
444 ///
445 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
446                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
447                        AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
448                        OptimizationRemarkEmitter *ORE) {
449   // Verify inputs.
450   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
451          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
452          "Unexpected input to hoistRegion");
453 
454   // We want to visit parents before children. We will enque all the parents
455   // before their children in the worklist and process the worklist in order.
456   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
457 
458   bool Changed = false;
459   for (DomTreeNode *DTN : Worklist) {
460     BasicBlock *BB = DTN->getBlock();
461     // Only need to process the contents of this block if it is not part of a
462     // subloop (which would already have been processed).
463     if (inSubLoop(BB, CurLoop, LI))
464       continue;
465 
466     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
467       Instruction &I = *II++;
468       // Try constant folding this instruction.  If all the operands are
469       // constants, it is technically hoistable, but it would be better to
470       // just fold it.
471       if (Constant *C = ConstantFoldInstruction(
472               &I, I.getModule()->getDataLayout(), TLI)) {
473         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
474                           << '\n');
475         CurAST->copyValue(&I, C);
476         I.replaceAllUsesWith(C);
477         if (isInstructionTriviallyDead(&I, TLI))
478           eraseInstruction(I, *SafetyInfo, CurAST);
479         Changed = true;
480         continue;
481       }
482 
483       // Try hoisting the instruction out to the preheader.  We can only do
484       // this if all of the operands of the instruction are loop invariant and
485       // if it is safe to hoist the instruction.
486       //
487       if (CurLoop->hasLoopInvariantOperands(&I) &&
488           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
489           isSafeToExecuteUnconditionally(
490               I, DT, CurLoop, SafetyInfo, ORE,
491               CurLoop->getLoopPreheader()->getTerminator())) {
492         hoist(I, DT, CurLoop, SafetyInfo, ORE);
493         Changed = true;
494         continue;
495       }
496 
497       // Attempt to remove floating point division out of the loop by
498       // converting it to a reciprocal multiplication.
499       if (I.getOpcode() == Instruction::FDiv &&
500           CurLoop->isLoopInvariant(I.getOperand(1)) &&
501           I.hasAllowReciprocal()) {
502         auto Divisor = I.getOperand(1);
503         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
504         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
505         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
506         SafetyInfo->insertInstructionTo(I.getParent());
507         ReciprocalDivisor->insertBefore(&I);
508 
509         auto Product =
510             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
511         Product->setFastMathFlags(I.getFastMathFlags());
512         SafetyInfo->insertInstructionTo(I.getParent());
513         Product->insertAfter(&I);
514         I.replaceAllUsesWith(Product);
515         eraseInstruction(I, *SafetyInfo, CurAST);
516 
517         hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
518         Changed = true;
519         continue;
520       }
521 
522       using namespace PatternMatch;
523       if (((I.use_empty() &&
524             match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
525            isGuard(&I)) &&
526           CurLoop->hasLoopInvariantOperands(&I) &&
527           SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
528           SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
529         hoist(I, DT, CurLoop, SafetyInfo, ORE);
530         Changed = true;
531         continue;
532       }
533     }
534   }
535 
536   return Changed;
537 }
538 
539 // Return true if LI is invariant within scope of the loop. LI is invariant if
540 // CurLoop is dominated by an invariant.start representing the same memory
541 // location and size as the memory location LI loads from, and also the
542 // invariant.start has no uses.
543 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
544                                   Loop *CurLoop) {
545   Value *Addr = LI->getOperand(0);
546   const DataLayout &DL = LI->getModule()->getDataLayout();
547   const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
548       cast<PointerType>(Addr->getType())->getElementType());
549 
550   // if the type is i8 addrspace(x)*, we know this is the type of
551   // llvm.invariant.start operand
552   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
553                                      LI->getPointerAddressSpace());
554   unsigned BitcastsVisited = 0;
555   // Look through bitcasts until we reach the i8* type (this is invariant.start
556   // operand type).
557   while (Addr->getType() != PtrInt8Ty) {
558     auto *BC = dyn_cast<BitCastInst>(Addr);
559     // Avoid traversing high number of bitcast uses.
560     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
561       return false;
562     Addr = BC->getOperand(0);
563   }
564 
565   unsigned UsesVisited = 0;
566   // Traverse all uses of the load operand value, to see if invariant.start is
567   // one of the uses, and whether it dominates the load instruction.
568   for (auto *U : Addr->users()) {
569     // Avoid traversing for Load operand with high number of users.
570     if (++UsesVisited > MaxNumUsesTraversed)
571       return false;
572     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
573     // If there are escaping uses of invariant.start instruction, the load maybe
574     // non-invariant.
575     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
576         !II->use_empty())
577       continue;
578     unsigned InvariantSizeInBits =
579         cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
580     // Confirm the invariant.start location size contains the load operand size
581     // in bits. Also, the invariant.start should dominate the load, and we
582     // should not hoist the load out of a loop that contains this dominating
583     // invariant.start.
584     if (LocSizeInBits <= InvariantSizeInBits &&
585         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
586       return true;
587   }
588 
589   return false;
590 }
591 
592 namespace {
593 /// Return true if-and-only-if we know how to (mechanically) both hoist and
594 /// sink a given instruction out of a loop.  Does not address legality
595 /// concerns such as aliasing or speculation safety.
596 bool isHoistableAndSinkableInst(Instruction &I) {
597   // Only these instructions are hoistable/sinkable.
598   return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
599           isa<CallInst>(I) || isa<FenceInst>(I) ||
600           isa<BinaryOperator>(I) || isa<CastInst>(I) ||
601           isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
602           isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
603           isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
604           isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
605 }
606 /// Return true if all of the alias sets within this AST are known not to
607 /// contain a Mod.
608 bool isReadOnly(AliasSetTracker *CurAST) {
609   for (AliasSet &AS : *CurAST) {
610     if (!AS.isForwardingAliasSet() && AS.isMod()) {
611       return false;
612     }
613   }
614   return true;
615 }
616 }
617 
618 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
619                               Loop *CurLoop, AliasSetTracker *CurAST,
620                               bool TargetExecutesOncePerLoop,
621                               OptimizationRemarkEmitter *ORE) {
622   // If we don't understand the instruction, bail early.
623   if (!isHoistableAndSinkableInst(I))
624     return false;
625 
626   // Loads have extra constraints we have to verify before we can hoist them.
627   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
628     if (!LI->isUnordered())
629       return false; // Don't sink/hoist volatile or ordered atomic loads!
630 
631     // Loads from constant memory are always safe to move, even if they end up
632     // in the same alias set as something that ends up being modified.
633     if (AA->pointsToConstantMemory(LI->getOperand(0)))
634       return true;
635     if (LI->getMetadata(LLVMContext::MD_invariant_load))
636       return true;
637 
638     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
639       return false; // Don't risk duplicating unordered loads
640 
641     // This checks for an invariant.start dominating the load.
642     if (isLoadInvariantInLoop(LI, DT, CurLoop))
643       return true;
644 
645     bool Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI),
646                                                 CurAST, CurLoop, AA);
647     // Check loop-invariant address because this may also be a sinkable load
648     // whose address is not necessarily loop-invariant.
649     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
650       ORE->emit([&]() {
651         return OptimizationRemarkMissed(
652                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
653                << "failed to move load with loop-invariant address "
654                   "because the loop may invalidate its value";
655       });
656 
657     return !Invalidated;
658   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
659     // Don't sink or hoist dbg info; it's legal, but not useful.
660     if (isa<DbgInfoIntrinsic>(I))
661       return false;
662 
663     // Don't sink calls which can throw.
664     if (CI->mayThrow())
665       return false;
666 
667     using namespace PatternMatch;
668     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
669       // Assumes don't actually alias anything or throw
670       return true;
671 
672     // Handle simple cases by querying alias analysis.
673     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
674     if (Behavior == FMRB_DoesNotAccessMemory)
675       return true;
676     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
677       // A readonly argmemonly function only reads from memory pointed to by
678       // it's arguments with arbitrary offsets.  If we can prove there are no
679       // writes to this memory in the loop, we can hoist or sink.
680       if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
681         // TODO: expand to writeable arguments
682         for (Value *Op : CI->arg_operands())
683           if (Op->getType()->isPointerTy() &&
684               pointerInvalidatedByLoop(
685                   MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
686                   CurAST, CurLoop, AA))
687             return false;
688         return true;
689       }
690 
691       // If this call only reads from memory and there are no writes to memory
692       // in the loop, we can hoist or sink the call as appropriate.
693       if (isReadOnly(CurAST))
694         return true;
695     }
696 
697     // FIXME: This should use mod/ref information to see if we can hoist or
698     // sink the call.
699 
700     return false;
701   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
702     // Fences alias (most) everything to provide ordering.  For the moment,
703     // just give up if there are any other memory operations in the loop.
704     auto Begin = CurAST->begin();
705     assert(Begin != CurAST->end() && "must contain FI");
706     if (std::next(Begin) != CurAST->end())
707       // constant memory for instance, TODO: handle better
708       return false;
709     auto *UniqueI = Begin->getUniqueInstruction();
710     if (!UniqueI)
711       // other memory op, give up
712       return false;
713     (void)FI; //suppress unused variable warning
714     assert(UniqueI == FI && "AS must contain FI");
715     return true;
716   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
717     if (!SI->isUnordered())
718       return false; // Don't sink/hoist volatile or ordered atomic store!
719 
720     // We can only hoist a store that we can prove writes a value which is not
721     // read or overwritten within the loop.  For those cases, we fallback to
722     // load store promotion instead.  TODO: We can extend this to cases where
723     // there is exactly one write to the location and that write dominates an
724     // arbitrary number of reads in the loop.
725     auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
726 
727     if (AS.isRef() || !AS.isMustAlias())
728       // Quick exit test, handled by the full path below as well.
729       return false;
730     auto *UniqueI = AS.getUniqueInstruction();
731     if (!UniqueI)
732       // other memory op, give up
733       return false;
734     assert(UniqueI == SI && "AS must contain SI");
735     return true;
736   }
737 
738   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
739 
740   // We've established mechanical ability and aliasing, it's up to the caller
741   // to check fault safety
742   return true;
743 }
744 
745 /// Returns true if a PHINode is a trivially replaceable with an
746 /// Instruction.
747 /// This is true when all incoming values are that instruction.
748 /// This pattern occurs most often with LCSSA PHI nodes.
749 ///
750 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
751   for (const Value *IncValue : PN.incoming_values())
752     if (IncValue != &I)
753       return false;
754 
755   return true;
756 }
757 
758 /// Return true if the instruction is free in the loop.
759 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
760                          const TargetTransformInfo *TTI) {
761 
762   if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
763     if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free)
764       return false;
765     // For a GEP, we cannot simply use getUserCost because currently it
766     // optimistically assume that a GEP will fold into addressing mode
767     // regardless of its users.
768     const BasicBlock *BB = GEP->getParent();
769     for (const User *U : GEP->users()) {
770       const Instruction *UI = cast<Instruction>(U);
771       if (CurLoop->contains(UI) &&
772           (BB != UI->getParent() ||
773            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
774         return false;
775     }
776     return true;
777   } else
778     return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
779 }
780 
781 /// Return true if the only users of this instruction are outside of
782 /// the loop. If this is true, we can sink the instruction to the exit
783 /// blocks of the loop.
784 ///
785 /// We also return true if the instruction could be folded away in lowering.
786 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
787 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
788                                   const LoopSafetyInfo *SafetyInfo,
789                                   TargetTransformInfo *TTI, bool &FreeInLoop) {
790   const auto &BlockColors = SafetyInfo->getBlockColors();
791   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
792   for (const User *U : I.users()) {
793     const Instruction *UI = cast<Instruction>(U);
794     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
795       const BasicBlock *BB = PN->getParent();
796       // We cannot sink uses in catchswitches.
797       if (isa<CatchSwitchInst>(BB->getTerminator()))
798         return false;
799 
800       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
801       // phi use is too muddled.
802       if (isa<CallInst>(I))
803         if (!BlockColors.empty() &&
804             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
805           return false;
806     }
807 
808     if (CurLoop->contains(UI)) {
809       if (IsFree) {
810         FreeInLoop = true;
811         continue;
812       }
813       return false;
814     }
815   }
816   return true;
817 }
818 
819 static Instruction *
820 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
821                             const LoopInfo *LI,
822                             const LoopSafetyInfo *SafetyInfo) {
823   Instruction *New;
824   if (auto *CI = dyn_cast<CallInst>(&I)) {
825     const auto &BlockColors = SafetyInfo->getBlockColors();
826 
827     // Sinking call-sites need to be handled differently from other
828     // instructions.  The cloned call-site needs a funclet bundle operand
829     // appropriate for it's location in the CFG.
830     SmallVector<OperandBundleDef, 1> OpBundles;
831     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
832          BundleIdx != BundleEnd; ++BundleIdx) {
833       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
834       if (Bundle.getTagID() == LLVMContext::OB_funclet)
835         continue;
836 
837       OpBundles.emplace_back(Bundle);
838     }
839 
840     if (!BlockColors.empty()) {
841       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
842       assert(CV.size() == 1 && "non-unique color for exit block!");
843       BasicBlock *BBColor = CV.front();
844       Instruction *EHPad = BBColor->getFirstNonPHI();
845       if (EHPad->isEHPad())
846         OpBundles.emplace_back("funclet", EHPad);
847     }
848 
849     New = CallInst::Create(CI, OpBundles);
850   } else {
851     New = I.clone();
852   }
853 
854   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
855   if (!I.getName().empty())
856     New->setName(I.getName() + ".le");
857 
858   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
859   // particularly cheap because we can rip off the PHI node that we're
860   // replacing for the number and blocks of the predecessors.
861   // OPT: If this shows up in a profile, we can instead finish sinking all
862   // invariant instructions, and then walk their operands to re-establish
863   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
864   // sinking bottom-up.
865   for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
866        ++OI)
867     if (Instruction *OInst = dyn_cast<Instruction>(*OI))
868       if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
869         if (!OLoop->contains(&PN)) {
870           PHINode *OpPN =
871               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
872                               OInst->getName() + ".lcssa", &ExitBlock.front());
873           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
874             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
875           *OI = OpPN;
876         }
877   return New;
878 }
879 
880 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
881                              AliasSetTracker *AST) {
882   if (AST)
883     AST->deleteValue(&I);
884   SafetyInfo.removeInstruction(&I);
885   I.eraseFromParent();
886 }
887 
888 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
889                                   ICFLoopSafetyInfo &SafetyInfo) {
890   SafetyInfo.removeInstruction(&I);
891   SafetyInfo.insertInstructionTo(Dest.getParent());
892   I.moveBefore(&Dest);
893 }
894 
895 static Instruction *sinkThroughTriviallyReplaceablePHI(
896     PHINode *TPN, Instruction *I, LoopInfo *LI,
897     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
898     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) {
899   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
900          "Expect only trivially replaceable PHI");
901   BasicBlock *ExitBlock = TPN->getParent();
902   Instruction *New;
903   auto It = SunkCopies.find(ExitBlock);
904   if (It != SunkCopies.end())
905     New = It->second;
906   else
907     New = SunkCopies[ExitBlock] =
908         CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo);
909   return New;
910 }
911 
912 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
913   BasicBlock *BB = PN->getParent();
914   if (!BB->canSplitPredecessors())
915     return false;
916   // It's not impossible to split EHPad blocks, but if BlockColors already exist
917   // it require updating BlockColors for all offspring blocks accordingly. By
918   // skipping such corner case, we can make updating BlockColors after splitting
919   // predecessor fairly simple.
920   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
921     return false;
922   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
923     BasicBlock *BBPred = *PI;
924     if (isa<IndirectBrInst>(BBPred->getTerminator()))
925       return false;
926   }
927   return true;
928 }
929 
930 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
931                                         LoopInfo *LI, const Loop *CurLoop,
932                                         LoopSafetyInfo *SafetyInfo) {
933 #ifndef NDEBUG
934   SmallVector<BasicBlock *, 32> ExitBlocks;
935   CurLoop->getUniqueExitBlocks(ExitBlocks);
936   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
937                                              ExitBlocks.end());
938 #endif
939   BasicBlock *ExitBB = PN->getParent();
940   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
941 
942   // Split predecessors of the loop exit to make instructions in the loop are
943   // exposed to exit blocks through trivially replaceable PHIs while keeping the
944   // loop in the canonical form where each predecessor of each exit block should
945   // be contained within the loop. For example, this will convert the loop below
946   // from
947   //
948   // LB1:
949   //   %v1 =
950   //   br %LE, %LB2
951   // LB2:
952   //   %v2 =
953   //   br %LE, %LB1
954   // LE:
955   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
956   //
957   // to
958   //
959   // LB1:
960   //   %v1 =
961   //   br %LE.split, %LB2
962   // LB2:
963   //   %v2 =
964   //   br %LE.split2, %LB1
965   // LE.split:
966   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
967   //   br %LE
968   // LE.split2:
969   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
970   //   br %LE
971   // LE:
972   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
973   //
974   const auto &BlockColors = SafetyInfo->getBlockColors();
975   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
976   while (!PredBBs.empty()) {
977     BasicBlock *PredBB = *PredBBs.begin();
978     assert(CurLoop->contains(PredBB) &&
979            "Expect all predecessors are in the loop");
980     if (PN->getBasicBlockIndex(PredBB) >= 0) {
981       BasicBlock *NewPred = SplitBlockPredecessors(
982           ExitBB, PredBB, ".split.loop.exit", DT, LI, nullptr, true);
983       // Since we do not allow splitting EH-block with BlockColors in
984       // canSplitPredecessors(), we can simply assign predecessor's color to
985       // the new block.
986       if (!BlockColors.empty())
987         // Grab a reference to the ColorVector to be inserted before getting the
988         // reference to the vector we are copying because inserting the new
989         // element in BlockColors might cause the map to be reallocated.
990         SafetyInfo->copyColors(NewPred, PredBB);
991     }
992     PredBBs.remove(PredBB);
993   }
994 }
995 
996 /// When an instruction is found to only be used outside of the loop, this
997 /// function moves it to the exit blocks and patches up SSA form as needed.
998 /// This method is guaranteed to remove the original instruction from its
999 /// position, and may either delete it or move it to outside of the loop.
1000 ///
1001 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1002                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1003                  OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
1004   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1005   ORE->emit([&]() {
1006     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1007            << "sinking " << ore::NV("Inst", &I);
1008   });
1009   bool Changed = false;
1010   if (isa<LoadInst>(I))
1011     ++NumMovedLoads;
1012   else if (isa<CallInst>(I))
1013     ++NumMovedCalls;
1014   ++NumSunk;
1015 
1016   // Iterate over users to be ready for actual sinking. Replace users via
1017   // unrechable blocks with undef and make all user PHIs trivially replcable.
1018   SmallPtrSet<Instruction *, 8> VisitedUsers;
1019   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1020     auto *User = cast<Instruction>(*UI);
1021     Use &U = UI.getUse();
1022     ++UI;
1023 
1024     if (VisitedUsers.count(User) || CurLoop->contains(User))
1025       continue;
1026 
1027     if (!DT->isReachableFromEntry(User->getParent())) {
1028       U = UndefValue::get(I.getType());
1029       Changed = true;
1030       continue;
1031     }
1032 
1033     // The user must be a PHI node.
1034     PHINode *PN = cast<PHINode>(User);
1035 
1036     // Surprisingly, instructions can be used outside of loops without any
1037     // exits.  This can only happen in PHI nodes if the incoming block is
1038     // unreachable.
1039     BasicBlock *BB = PN->getIncomingBlock(U);
1040     if (!DT->isReachableFromEntry(BB)) {
1041       U = UndefValue::get(I.getType());
1042       Changed = true;
1043       continue;
1044     }
1045 
1046     VisitedUsers.insert(PN);
1047     if (isTriviallyReplaceablePHI(*PN, I))
1048       continue;
1049 
1050     if (!canSplitPredecessors(PN, SafetyInfo))
1051       return Changed;
1052 
1053     // Split predecessors of the PHI so that we can make users trivially
1054     // replaceable.
1055     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
1056 
1057     // Should rebuild the iterators, as they may be invalidated by
1058     // splitPredecessorsOfLoopExit().
1059     UI = I.user_begin();
1060     UE = I.user_end();
1061   }
1062 
1063   if (VisitedUsers.empty())
1064     return Changed;
1065 
1066 #ifndef NDEBUG
1067   SmallVector<BasicBlock *, 32> ExitBlocks;
1068   CurLoop->getUniqueExitBlocks(ExitBlocks);
1069   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1070                                              ExitBlocks.end());
1071 #endif
1072 
1073   // Clones of this instruction. Don't create more than one per exit block!
1074   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1075 
1076   // If this instruction is only used outside of the loop, then all users are
1077   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1078   // the instruction.
1079   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1080   for (auto *UI : Users) {
1081     auto *User = cast<Instruction>(UI);
1082 
1083     if (CurLoop->contains(User))
1084       continue;
1085 
1086     PHINode *PN = cast<PHINode>(User);
1087     assert(ExitBlockSet.count(PN->getParent()) &&
1088            "The LCSSA PHI is not in an exit block!");
1089     // The PHI must be trivially replaceable.
1090     Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies,
1091                                                           SafetyInfo, CurLoop);
1092     PN->replaceAllUsesWith(New);
1093     eraseInstruction(*PN, *SafetyInfo, nullptr);
1094     Changed = true;
1095   }
1096   return Changed;
1097 }
1098 
1099 /// When an instruction is found to only use loop invariant operands that
1100 /// is safe to hoist, this instruction is called to do the dirty work.
1101 ///
1102 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1103                   ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1104   auto *Preheader = CurLoop->getLoopPreheader();
1105   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
1106                     << "\n");
1107   ORE->emit([&]() {
1108     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1109                                                          << ore::NV("Inst", &I);
1110   });
1111 
1112   // Metadata can be dependent on conditions we are hoisting above.
1113   // Conservatively strip all metadata on the instruction unless we were
1114   // guaranteed to execute I if we entered the loop, in which case the metadata
1115   // is valid in the loop preheader.
1116   if (I.hasMetadataOtherThanDebugLoc() &&
1117       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1118       // time in isGuaranteedToExecute if we don't actually have anything to
1119       // drop.  It is a compile time optimization, not required for correctness.
1120       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1121     I.dropUnknownNonDebugMetadata();
1122 
1123   // Move the new node to the Preheader, before its terminator.
1124   moveInstructionBefore(I, *Preheader->getTerminator(), *SafetyInfo);
1125 
1126   // Do not retain debug locations when we are moving instructions to different
1127   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
1128   // need to retain their debug locs because they may be inlined.
1129   // FIXME: How do we retain source locations without causing poor debugging
1130   // behavior?
1131   if (!isa<CallInst>(I))
1132     I.setDebugLoc(DebugLoc());
1133 
1134   if (isa<LoadInst>(I))
1135     ++NumMovedLoads;
1136   else if (isa<CallInst>(I))
1137     ++NumMovedCalls;
1138   ++NumHoisted;
1139 }
1140 
1141 /// Only sink or hoist an instruction if it is not a trapping instruction,
1142 /// or if the instruction is known not to trap when moved to the preheader.
1143 /// or if it is a trapping instruction and is guaranteed to execute.
1144 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
1145                                            const DominatorTree *DT,
1146                                            const Loop *CurLoop,
1147                                            const LoopSafetyInfo *SafetyInfo,
1148                                            OptimizationRemarkEmitter *ORE,
1149                                            const Instruction *CtxI) {
1150   if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1151     return true;
1152 
1153   bool GuaranteedToExecute =
1154       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1155 
1156   if (!GuaranteedToExecute) {
1157     auto *LI = dyn_cast<LoadInst>(&Inst);
1158     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1159       ORE->emit([&]() {
1160         return OptimizationRemarkMissed(
1161                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1162                << "failed to hoist load with loop-invariant address "
1163                   "because load is conditionally executed";
1164       });
1165   }
1166 
1167   return GuaranteedToExecute;
1168 }
1169 
1170 namespace {
1171 class LoopPromoter : public LoadAndStorePromoter {
1172   Value *SomePtr; // Designated pointer to store to.
1173   const SmallSetVector<Value *, 8> &PointerMustAliases;
1174   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1175   SmallVectorImpl<Instruction *> &LoopInsertPts;
1176   PredIteratorCache &PredCache;
1177   AliasSetTracker &AST;
1178   LoopInfo &LI;
1179   DebugLoc DL;
1180   int Alignment;
1181   bool UnorderedAtomic;
1182   AAMDNodes AATags;
1183   ICFLoopSafetyInfo &SafetyInfo;
1184 
1185   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1186     if (Instruction *I = dyn_cast<Instruction>(V))
1187       if (Loop *L = LI.getLoopFor(I->getParent()))
1188         if (!L->contains(BB)) {
1189           // We need to create an LCSSA PHI node for the incoming value and
1190           // store that.
1191           PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1192                                         I->getName() + ".lcssa", &BB->front());
1193           for (BasicBlock *Pred : PredCache.get(BB))
1194             PN->addIncoming(I, Pred);
1195           return PN;
1196         }
1197     return V;
1198   }
1199 
1200 public:
1201   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1202                const SmallSetVector<Value *, 8> &PMA,
1203                SmallVectorImpl<BasicBlock *> &LEB,
1204                SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
1205                AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
1206                bool UnorderedAtomic, const AAMDNodes &AATags,
1207                ICFLoopSafetyInfo &SafetyInfo)
1208       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1209         LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
1210         LI(li), DL(std::move(dl)), Alignment(alignment),
1211         UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
1212       {}
1213 
1214   bool isInstInList(Instruction *I,
1215                     const SmallVectorImpl<Instruction *> &) const override {
1216     Value *Ptr;
1217     if (LoadInst *LI = dyn_cast<LoadInst>(I))
1218       Ptr = LI->getOperand(0);
1219     else
1220       Ptr = cast<StoreInst>(I)->getPointerOperand();
1221     return PointerMustAliases.count(Ptr);
1222   }
1223 
1224   void doExtraRewritesBeforeFinalDeletion() const override {
1225     // Insert stores after in the loop exit blocks.  Each exit block gets a
1226     // store of the live-out values that feed them.  Since we've already told
1227     // the SSA updater about the defs in the loop and the preheader
1228     // definition, it is all set and we can start using it.
1229     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1230       BasicBlock *ExitBlock = LoopExitBlocks[i];
1231       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1232       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1233       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1234       Instruction *InsertPos = LoopInsertPts[i];
1235       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1236       if (UnorderedAtomic)
1237         NewSI->setOrdering(AtomicOrdering::Unordered);
1238       NewSI->setAlignment(Alignment);
1239       NewSI->setDebugLoc(DL);
1240       if (AATags)
1241         NewSI->setAAMetadata(AATags);
1242     }
1243   }
1244 
1245   void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1246     // Update alias analysis.
1247     AST.copyValue(LI, V);
1248   }
1249   void instructionDeleted(Instruction *I) const override {
1250     SafetyInfo.removeInstruction(I);
1251     AST.deleteValue(I);
1252   }
1253 };
1254 
1255 
1256 /// Return true iff we can prove that a caller of this function can not inspect
1257 /// the contents of the provided object in a well defined program.
1258 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1259   if (isa<AllocaInst>(Object))
1260     // Since the alloca goes out of scope, we know the caller can't retain a
1261     // reference to it and be well defined.  Thus, we don't need to check for
1262     // capture.
1263     return true;
1264 
1265   // For all other objects we need to know that the caller can't possibly
1266   // have gotten a reference to the object.  There are two components of
1267   // that:
1268   //   1) Object can't be escaped by this function.  This is what
1269   //      PointerMayBeCaptured checks.
1270   //   2) Object can't have been captured at definition site.  For this, we
1271   //      need to know the return value is noalias.  At the moment, we use a
1272   //      weaker condition and handle only AllocLikeFunctions (which are
1273   //      known to be noalias).  TODO
1274   return isAllocLikeFn(Object, TLI) &&
1275     !PointerMayBeCaptured(Object, true, true);
1276 }
1277 
1278 } // namespace
1279 
1280 /// Try to promote memory values to scalars by sinking stores out of the
1281 /// loop and moving loads to before the loop.  We do this by looping over
1282 /// the stores in the loop, looking for stores to Must pointers which are
1283 /// loop invariant.
1284 ///
1285 bool llvm::promoteLoopAccessesToScalars(
1286     const SmallSetVector<Value *, 8> &PointerMustAliases,
1287     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1288     SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
1289     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1290     Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
1291     OptimizationRemarkEmitter *ORE) {
1292   // Verify inputs.
1293   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1294          CurAST != nullptr && SafetyInfo != nullptr &&
1295          "Unexpected Input to promoteLoopAccessesToScalars");
1296 
1297   Value *SomePtr = *PointerMustAliases.begin();
1298   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1299 
1300   // It is not safe to promote a load/store from the loop if the load/store is
1301   // conditional.  For example, turning:
1302   //
1303   //    for () { if (c) *P += 1; }
1304   //
1305   // into:
1306   //
1307   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1308   //
1309   // is not safe, because *P may only be valid to access if 'c' is true.
1310   //
1311   // The safety property divides into two parts:
1312   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1313   //    case, we can't insert the required load in the preheader.
1314   // p2) The memory model does not allow us to insert a store along any dynamic
1315   //    path which did not originally have one.
1316   //
1317   // If at least one store is guaranteed to execute, both properties are
1318   // satisfied, and promotion is legal.
1319   //
1320   // This, however, is not a necessary condition. Even if no store/load is
1321   // guaranteed to execute, we can still establish these properties.
1322   // We can establish (p1) by proving that hoisting the load into the preheader
1323   // is safe (i.e. proving dereferenceability on all paths through the loop). We
1324   // can use any access within the alias set to prove dereferenceability,
1325   // since they're all must alias.
1326   //
1327   // There are two ways establish (p2):
1328   // a) Prove the location is thread-local. In this case the memory model
1329   // requirement does not apply, and stores are safe to insert.
1330   // b) Prove a store dominates every exit block. In this case, if an exit
1331   // blocks is reached, the original dynamic path would have taken us through
1332   // the store, so inserting a store into the exit block is safe. Note that this
1333   // is different from the store being guaranteed to execute. For instance,
1334   // if an exception is thrown on the first iteration of the loop, the original
1335   // store is never executed, but the exit blocks are not executed either.
1336 
1337   bool DereferenceableInPH = false;
1338   bool SafeToInsertStore = false;
1339 
1340   SmallVector<Instruction *, 64> LoopUses;
1341 
1342   // We start with an alignment of one and try to find instructions that allow
1343   // us to prove better alignment.
1344   unsigned Alignment = 1;
1345   // Keep track of which types of access we see
1346   bool SawUnorderedAtomic = false;
1347   bool SawNotAtomic = false;
1348   AAMDNodes AATags;
1349 
1350   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1351 
1352   bool IsKnownThreadLocalObject = false;
1353   if (SafetyInfo->anyBlockMayThrow()) {
1354     // If a loop can throw, we have to insert a store along each unwind edge.
1355     // That said, we can't actually make the unwind edge explicit. Therefore,
1356     // we have to prove that the store is dead along the unwind edge.  We do
1357     // this by proving that the caller can't have a reference to the object
1358     // after return and thus can't possibly load from the object.
1359     Value *Object = GetUnderlyingObject(SomePtr, MDL);
1360     if (!isKnownNonEscaping(Object, TLI))
1361       return false;
1362     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1363     // visible to other threads if captured and used during their lifetimes.
1364     IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1365   }
1366 
1367   // Check that all of the pointers in the alias set have the same type.  We
1368   // cannot (yet) promote a memory location that is loaded and stored in
1369   // different sizes.  While we are at it, collect alignment and AA info.
1370   for (Value *ASIV : PointerMustAliases) {
1371     // Check that all of the pointers in the alias set have the same type.  We
1372     // cannot (yet) promote a memory location that is loaded and stored in
1373     // different sizes.
1374     if (SomePtr->getType() != ASIV->getType())
1375       return false;
1376 
1377     for (User *U : ASIV->users()) {
1378       // Ignore instructions that are outside the loop.
1379       Instruction *UI = dyn_cast<Instruction>(U);
1380       if (!UI || !CurLoop->contains(UI))
1381         continue;
1382 
1383       // If there is an non-load/store instruction in the loop, we can't promote
1384       // it.
1385       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1386         if (!Load->isUnordered())
1387           return false;
1388 
1389         SawUnorderedAtomic |= Load->isAtomic();
1390         SawNotAtomic |= !Load->isAtomic();
1391 
1392         if (!DereferenceableInPH)
1393           DereferenceableInPH = isSafeToExecuteUnconditionally(
1394               *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
1395       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1396         // Stores *of* the pointer are not interesting, only stores *to* the
1397         // pointer.
1398         if (UI->getOperand(1) != ASIV)
1399           continue;
1400         if (!Store->isUnordered())
1401           return false;
1402 
1403         SawUnorderedAtomic |= Store->isAtomic();
1404         SawNotAtomic |= !Store->isAtomic();
1405 
1406         // If the store is guaranteed to execute, both properties are satisfied.
1407         // We may want to check if a store is guaranteed to execute even if we
1408         // already know that promotion is safe, since it may have higher
1409         // alignment than any other guaranteed stores, in which case we can
1410         // raise the alignment on the promoted store.
1411         unsigned InstAlignment = Store->getAlignment();
1412         if (!InstAlignment)
1413           InstAlignment =
1414               MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1415 
1416         if (!DereferenceableInPH || !SafeToInsertStore ||
1417             (InstAlignment > Alignment)) {
1418           if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1419             DereferenceableInPH = true;
1420             SafeToInsertStore = true;
1421             Alignment = std::max(Alignment, InstAlignment);
1422           }
1423         }
1424 
1425         // If a store dominates all exit blocks, it is safe to sink.
1426         // As explained above, if an exit block was executed, a dominating
1427         // store must have been executed at least once, so we are not
1428         // introducing stores on paths that did not have them.
1429         // Note that this only looks at explicit exit blocks. If we ever
1430         // start sinking stores into unwind edges (see above), this will break.
1431         if (!SafeToInsertStore)
1432           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
1433             return DT->dominates(Store->getParent(), Exit);
1434           });
1435 
1436         // If the store is not guaranteed to execute, we may still get
1437         // deref info through it.
1438         if (!DereferenceableInPH) {
1439           DereferenceableInPH = isDereferenceableAndAlignedPointer(
1440               Store->getPointerOperand(), Store->getAlignment(), MDL,
1441               Preheader->getTerminator(), DT);
1442         }
1443       } else
1444         return false; // Not a load or store.
1445 
1446       // Merge the AA tags.
1447       if (LoopUses.empty()) {
1448         // On the first load/store, just take its AA tags.
1449         UI->getAAMetadata(AATags);
1450       } else if (AATags) {
1451         UI->getAAMetadata(AATags, /* Merge = */ true);
1452       }
1453 
1454       LoopUses.push_back(UI);
1455     }
1456   }
1457 
1458   // If we found both an unordered atomic instruction and a non-atomic memory
1459   // access, bail.  We can't blindly promote non-atomic to atomic since we
1460   // might not be able to lower the result.  We can't downgrade since that
1461   // would violate memory model.  Also, align 0 is an error for atomics.
1462   if (SawUnorderedAtomic && SawNotAtomic)
1463     return false;
1464 
1465   // If we couldn't prove we can hoist the load, bail.
1466   if (!DereferenceableInPH)
1467     return false;
1468 
1469   // We know we can hoist the load, but don't have a guaranteed store.
1470   // Check whether the location is thread-local. If it is, then we can insert
1471   // stores along paths which originally didn't have them without violating the
1472   // memory model.
1473   if (!SafeToInsertStore) {
1474     if (IsKnownThreadLocalObject)
1475       SafeToInsertStore = true;
1476     else {
1477       Value *Object = GetUnderlyingObject(SomePtr, MDL);
1478       SafeToInsertStore =
1479           (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
1480           !PointerMayBeCaptured(Object, true, true);
1481     }
1482   }
1483 
1484   // If we've still failed to prove we can sink the store, give up.
1485   if (!SafeToInsertStore)
1486     return false;
1487 
1488   // Otherwise, this is safe to promote, lets do it!
1489   LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
1490                     << '\n');
1491   ORE->emit([&]() {
1492     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
1493                               LoopUses[0])
1494            << "Moving accesses to memory location out of the loop";
1495   });
1496   ++NumPromoted;
1497 
1498   // Grab a debug location for the inserted loads/stores; given that the
1499   // inserted loads/stores have little relation to the original loads/stores,
1500   // this code just arbitrarily picks a location from one, since any debug
1501   // location is better than none.
1502   DebugLoc DL = LoopUses[0]->getDebugLoc();
1503 
1504   // We use the SSAUpdater interface to insert phi nodes as required.
1505   SmallVector<PHINode *, 16> NewPHIs;
1506   SSAUpdater SSA(&NewPHIs);
1507   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
1508                         InsertPts, PIC, *CurAST, *LI, DL, Alignment,
1509                         SawUnorderedAtomic, AATags, *SafetyInfo);
1510 
1511   // Set up the preheader to have a definition of the value.  It is the live-out
1512   // value from the preheader that uses in the loop will use.
1513   LoadInst *PreheaderLoad = new LoadInst(
1514       SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
1515   if (SawUnorderedAtomic)
1516     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
1517   PreheaderLoad->setAlignment(Alignment);
1518   PreheaderLoad->setDebugLoc(DL);
1519   if (AATags)
1520     PreheaderLoad->setAAMetadata(AATags);
1521   SSA.AddAvailableValue(Preheader, PreheaderLoad);
1522 
1523   // Rewrite all the loads in the loop and remember all the definitions from
1524   // stores in the loop.
1525   Promoter.run(LoopUses);
1526 
1527   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
1528   if (PreheaderLoad->use_empty())
1529     eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST);
1530 
1531   return true;
1532 }
1533 
1534 /// Returns an owning pointer to an alias set which incorporates aliasing info
1535 /// from L and all subloops of L.
1536 /// FIXME: In new pass manager, there is no helper function to handle loop
1537 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
1538 /// from scratch for every loop. Hook up with the helper functions when
1539 /// available in the new pass manager to avoid redundant computation.
1540 std::unique_ptr<AliasSetTracker>
1541 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
1542                                                  AliasAnalysis *AA) {
1543   std::unique_ptr<AliasSetTracker> CurAST;
1544   SmallVector<Loop *, 4> RecomputeLoops;
1545   for (Loop *InnerL : L->getSubLoops()) {
1546     auto MapI = LoopToAliasSetMap.find(InnerL);
1547     // If the AST for this inner loop is missing it may have been merged into
1548     // some other loop's AST and then that loop unrolled, and so we need to
1549     // recompute it.
1550     if (MapI == LoopToAliasSetMap.end()) {
1551       RecomputeLoops.push_back(InnerL);
1552       continue;
1553     }
1554     std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
1555 
1556     if (CurAST) {
1557       // What if InnerLoop was modified by other passes ?
1558       // Once we've incorporated the inner loop's AST into ours, we don't need
1559       // the subloop's anymore.
1560       CurAST->add(*InnerAST);
1561     } else {
1562       CurAST = std::move(InnerAST);
1563     }
1564     LoopToAliasSetMap.erase(MapI);
1565   }
1566   if (!CurAST)
1567     CurAST = make_unique<AliasSetTracker>(*AA);
1568 
1569   // Add everything from the sub loops that are no longer directly available.
1570   for (Loop *InnerL : RecomputeLoops)
1571     for (BasicBlock *BB : InnerL->blocks())
1572       CurAST->add(*BB);
1573 
1574   // And merge in this loop (without anything from inner loops).
1575   for (BasicBlock *BB : L->blocks())
1576     if (LI->getLoopFor(BB) == L)
1577       CurAST->add(*BB);
1578 
1579   return CurAST;
1580 }
1581 
1582 /// Simple analysis hook. Clone alias set info.
1583 ///
1584 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
1585                                              Loop *L) {
1586   auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
1587   if (ASTIt == LICM.getLoopToAliasSetMap().end())
1588     return;
1589 
1590   ASTIt->second->copyValue(From, To);
1591 }
1592 
1593 /// Simple Analysis hook. Delete value V from alias set
1594 ///
1595 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
1596   auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
1597   if (ASTIt == LICM.getLoopToAliasSetMap().end())
1598     return;
1599 
1600   ASTIt->second->deleteValue(V);
1601 }
1602 
1603 /// Simple Analysis hook. Delete value L from alias set map.
1604 ///
1605 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
1606   if (!LICM.getLoopToAliasSetMap().count(L))
1607     return;
1608 
1609   LICM.getLoopToAliasSetMap().erase(L);
1610 }
1611 
1612 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
1613                                      AliasSetTracker *CurAST, Loop *CurLoop,
1614                                      AliasAnalysis *AA) {
1615   // First check to see if any of the basic blocks in CurLoop invalidate *V.
1616   bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
1617 
1618   if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
1619     return isInvalidatedAccordingToAST;
1620 
1621   // Check with a diagnostic analysis if we can refine the information above.
1622   // This is to identify the limitations of using the AST.
1623   // The alias set mechanism used by LICM has a major weakness in that it
1624   // combines all things which may alias into a single set *before* asking
1625   // modref questions. As a result, a single readonly call within a loop will
1626   // collapse all loads and stores into a single alias set and report
1627   // invalidation if the loop contains any store. For example, readonly calls
1628   // with deopt states have this form and create a general alias set with all
1629   // loads and stores.  In order to get any LICM in loops containing possible
1630   // deopt states we need a more precise invalidation of checking the mod ref
1631   // info of each instruction within the loop and LI. This has a complexity of
1632   // O(N^2), so currently, it is used only as a diagnostic tool since the
1633   // default value of LICMN2Threshold is zero.
1634 
1635   // Don't look at nested loops.
1636   if (CurLoop->begin() != CurLoop->end())
1637     return true;
1638 
1639   int N = 0;
1640   for (BasicBlock *BB : CurLoop->getBlocks())
1641     for (Instruction &I : *BB) {
1642       if (N >= LICMN2Theshold) {
1643         LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
1644                           << *(MemLoc.Ptr) << "\n");
1645         return true;
1646       }
1647       N++;
1648       auto Res = AA->getModRefInfo(&I, MemLoc);
1649       if (isModSet(Res)) {
1650         LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
1651                           << *(MemLoc.Ptr) << "\n");
1652         return true;
1653       }
1654     }
1655   LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
1656   return false;
1657 }
1658 
1659 /// Little predicate that returns true if the specified basic block is in
1660 /// a subloop of the current one, not the current one itself.
1661 ///
1662 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
1663   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
1664   return LI->getLoopFor(BB) != CurLoop;
1665 }
1666