1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/CaptureTracking.h"
46 #include "llvm/Analysis/ConstantFolding.h"
47 #include "llvm/Analysis/GuardUtils.h"
48 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
49 #include "llvm/Analysis/Loads.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/LoopIterator.h"
52 #include "llvm/Analysis/LoopNestAnalysis.h"
53 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/Analysis/MemorySSA.h"
55 #include "llvm/Analysis/MemorySSAUpdater.h"
56 #include "llvm/Analysis/MustExecute.h"
57 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
58 #include "llvm/Analysis/ScalarEvolution.h"
59 #include "llvm/Analysis/TargetLibraryInfo.h"
60 #include "llvm/Analysis/TargetTransformInfo.h"
61 #include "llvm/Analysis/ValueTracking.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugInfoMetadata.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/LLVMContext.h"
71 #include "llvm/IR/Metadata.h"
72 #include "llvm/IR/PatternMatch.h"
73 #include "llvm/IR/PredIteratorCache.h"
74 #include "llvm/InitializePasses.h"
75 #include "llvm/Support/CommandLine.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include "llvm/Transforms/Scalar.h"
79 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
80 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
81 #include "llvm/Transforms/Utils/Local.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SSAUpdater.h"
84 #include <algorithm>
85 #include <utility>
86 using namespace llvm;
87 
88 namespace llvm {
89 class BlockFrequencyInfo;
90 class LPMUpdater;
91 } // namespace llvm
92 
93 #define DEBUG_TYPE "licm"
94 
95 STATISTIC(NumCreatedBlocks, "Number of blocks created");
96 STATISTIC(NumClonedBranches, "Number of branches cloned");
97 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
98 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
99 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
100 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
101 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
102 
103 /// Memory promotion is enabled by default.
104 static cl::opt<bool>
105     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
106                      cl::desc("Disable memory promotion in LICM pass"));
107 
108 static cl::opt<bool> ControlFlowHoisting(
109     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
110     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
111 
112 static cl::opt<uint32_t> MaxNumUsesTraversed(
113     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
114     cl::desc("Max num uses visited for identifying load "
115              "invariance in loop using invariant start (default = 8)"));
116 
117 // Experimental option to allow imprecision in LICM in pathological cases, in
118 // exchange for faster compile. This is to be removed if MemorySSA starts to
119 // address the same issue. This flag applies only when LICM uses MemorySSA
120 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
121 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
122 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
123 // which may not be precise, since optimizeUses is capped. The result is
124 // correct, but we may not get as "far up" as possible to get which access is
125 // clobbering the one queried.
126 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
127     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
128     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
129              "for faster compile. Caps the MemorySSA clobbering calls."));
130 
131 // Experimentally, memory promotion carries less importance than sinking and
132 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
133 // compile time.
134 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
135     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
136     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
137              "effect. When MSSA in LICM is enabled, then this is the maximum "
138              "number of accesses allowed to be present in a loop in order to "
139              "enable memory promotion."));
140 
141 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
142 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
143                                   const LoopSafetyInfo *SafetyInfo,
144                                   TargetTransformInfo *TTI, bool &FreeInLoop,
145                                   bool LoopNestMode);
146 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
147                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
148                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
149                   OptimizationRemarkEmitter *ORE);
150 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
151                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
152                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
153                  OptimizationRemarkEmitter *ORE);
154 static bool isSafeToExecuteUnconditionally(
155     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
156     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
157     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
158     bool AllowSpeculation);
159 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
160                                      AliasSetTracker *CurAST, Loop *CurLoop,
161                                      AAResults *AA);
162 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
163                                              Loop *CurLoop, Instruction &I,
164                                              SinkAndHoistLICMFlags &Flags);
165 static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
166                                               MemoryUse &MU);
167 static Instruction *cloneInstructionInExitBlock(
168     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
169     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
170 
171 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
172                              MemorySSAUpdater *MSSAU);
173 
174 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
175                                   ICFLoopSafetyInfo &SafetyInfo,
176                                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
177 
178 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
179                                 function_ref<void(Instruction *)> Fn);
180 static SmallVector<SmallSetVector<Value *, 8>, 0>
181 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
182 
183 namespace {
184 struct LoopInvariantCodeMotion {
185   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
186                  BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
187                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
188                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
189 
190   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
191                           unsigned LicmMssaNoAccForPromotionCap,
192                           bool LicmAllowSpeculation)
193       : LicmMssaOptCap(LicmMssaOptCap),
194         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
195         LicmAllowSpeculation(LicmAllowSpeculation) {}
196 
197 private:
198   unsigned LicmMssaOptCap;
199   unsigned LicmMssaNoAccForPromotionCap;
200   bool LicmAllowSpeculation;
201 };
202 
203 struct LegacyLICMPass : public LoopPass {
204   static char ID; // Pass identification, replacement for typeid
205   LegacyLICMPass(
206       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
207       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
208       bool LicmAllowSpeculation = true)
209       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
210                            LicmAllowSpeculation) {
211     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
212   }
213 
214   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
215     if (skipLoop(L))
216       return false;
217 
218     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
219                       << L->getHeader()->getNameOrAsOperand() << "\n");
220 
221     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
222     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
223     bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
224     BlockFrequencyInfo *BFI =
225         hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
226                        : nullptr;
227     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
228     // pass. Function analyses need to be preserved across loop transformations
229     // but ORE cannot be preserved (see comment before the pass definition).
230     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
231     return LICM.runOnLoop(
232         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
233         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
234         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
235         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
236             *L->getHeader()->getParent()),
237         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
238             *L->getHeader()->getParent()),
239         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
240   }
241 
242   /// This transformation requires natural loop information & requires that
243   /// loop preheaders be inserted into the CFG...
244   ///
245   void getAnalysisUsage(AnalysisUsage &AU) const override {
246     AU.addPreserved<DominatorTreeWrapperPass>();
247     AU.addPreserved<LoopInfoWrapperPass>();
248     AU.addRequired<TargetLibraryInfoWrapperPass>();
249     AU.addRequired<MemorySSAWrapperPass>();
250     AU.addPreserved<MemorySSAWrapperPass>();
251     AU.addRequired<TargetTransformInfoWrapperPass>();
252     getLoopAnalysisUsage(AU);
253     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
254     AU.addPreserved<LazyBlockFrequencyInfoPass>();
255     AU.addPreserved<LazyBranchProbabilityInfoPass>();
256   }
257 
258 private:
259   LoopInvariantCodeMotion LICM;
260 };
261 } // namespace
262 
263 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
264                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
265   if (!AR.MSSA)
266     report_fatal_error("LICM requires MemorySSA (loop-mssa)");
267 
268   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
269   // pass.  Function analyses need to be preserved across loop transformations
270   // but ORE cannot be preserved (see comment before the pass definition).
271   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
272 
273   LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
274                                LicmAllowSpeculation);
275   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
276                       &AR.SE, AR.MSSA, &ORE))
277     return PreservedAnalyses::all();
278 
279   auto PA = getLoopPassPreservedAnalyses();
280 
281   PA.preserve<DominatorTreeAnalysis>();
282   PA.preserve<LoopAnalysis>();
283   PA.preserve<MemorySSAAnalysis>();
284 
285   return PA;
286 }
287 
288 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
289                                  LoopStandardAnalysisResults &AR,
290                                  LPMUpdater &) {
291   if (!AR.MSSA)
292     report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
293 
294   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
295   // pass.  Function analyses need to be preserved across loop transformations
296   // but ORE cannot be preserved (see comment before the pass definition).
297   OptimizationRemarkEmitter ORE(LN.getParent());
298 
299   LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
300                                LicmAllowSpeculation);
301 
302   Loop &OutermostLoop = LN.getOutermostLoop();
303   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
304                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
305 
306   if (!Changed)
307     return PreservedAnalyses::all();
308 
309   auto PA = getLoopPassPreservedAnalyses();
310 
311   PA.preserve<DominatorTreeAnalysis>();
312   PA.preserve<LoopAnalysis>();
313   PA.preserve<MemorySSAAnalysis>();
314 
315   return PA;
316 }
317 
318 char LegacyLICMPass::ID = 0;
319 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
320                       false, false)
321 INITIALIZE_PASS_DEPENDENCY(LoopPass)
322 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
323 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
324 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
325 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
326 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
327                     false)
328 
329 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
330 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
331                            unsigned LicmMssaNoAccForPromotionCap,
332                            bool LicmAllowSpeculation) {
333   return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
334                             LicmAllowSpeculation);
335 }
336 
337 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
338                                                    MemorySSA *MSSA)
339     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
340                             IsSink, L, MSSA) {}
341 
342 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
343     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
344     Loop *L, MemorySSA *MSSA)
345     : LicmMssaOptCap(LicmMssaOptCap),
346       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
347       IsSink(IsSink) {
348   assert(((L != nullptr) == (MSSA != nullptr)) &&
349          "Unexpected values for SinkAndHoistLICMFlags");
350   if (!MSSA)
351     return;
352 
353   unsigned AccessCapCount = 0;
354   for (auto *BB : L->getBlocks())
355     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
356       for (const auto &MA : *Accesses) {
357         (void)MA;
358         ++AccessCapCount;
359         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
360           NoOfMemAccTooLarge = true;
361           return;
362         }
363       }
364 }
365 
366 /// Hoist expressions out of the specified loop. Note, alias info for inner
367 /// loop is not preserved so it is not a good idea to run LICM multiple
368 /// times on one loop.
369 bool LoopInvariantCodeMotion::runOnLoop(
370     Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
371     BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
372     ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
373     bool LoopNestMode) {
374   bool Changed = false;
375 
376   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
377 
378   // If this loop has metadata indicating that LICM is not to be performed then
379   // just exit.
380   if (hasDisableLICMTransformsHint(L)) {
381     return false;
382   }
383 
384   // Don't sink stores from loops with coroutine suspend instructions.
385   // LICM would sink instructions into the default destination of
386   // the coroutine switch. The default destination of the switch is to
387   // handle the case where the coroutine is suspended, by which point the
388   // coroutine frame may have been destroyed. No instruction can be sunk there.
389   // FIXME: This would unfortunately hurt the performance of coroutines, however
390   // there is currently no general solution for this. Similar issues could also
391   // potentially happen in other passes where instructions are being moved
392   // across that edge.
393   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
394     return llvm::any_of(*BB, [](Instruction &I) {
395       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
396       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
397     });
398   });
399 
400   MemorySSAUpdater MSSAU(MSSA);
401   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
402                               /*IsSink=*/true, L, MSSA);
403 
404   // Get the preheader block to move instructions into...
405   BasicBlock *Preheader = L->getLoopPreheader();
406 
407   // Compute loop safety information.
408   ICFLoopSafetyInfo SafetyInfo;
409   SafetyInfo.computeLoopSafetyInfo(L);
410 
411   // We want to visit all of the instructions in this loop... that are not parts
412   // of our subloops (they have already had their invariants hoisted out of
413   // their loop, into this loop, so there is no need to process the BODIES of
414   // the subloops).
415   //
416   // Traverse the body of the loop in depth first order on the dominator tree so
417   // that we are guaranteed to see definitions before we see uses.  This allows
418   // us to sink instructions in one pass, without iteration.  After sinking
419   // instructions, we perform another pass to hoist them out of the loop.
420   if (L->hasDedicatedExits())
421     Changed |= LoopNestMode
422                    ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
423                                            DT, BFI, TLI, TTI, L, &MSSAU,
424                                            &SafetyInfo, Flags, ORE)
425                    : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
426                                 TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
427   Flags.setIsSink(false);
428   if (Preheader)
429     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
430                            &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
431                            LicmAllowSpeculation);
432 
433   // Now that all loop invariants have been removed from the loop, promote any
434   // memory references to scalars that we can.
435   // Don't sink stores from loops without dedicated block exits. Exits
436   // containing indirect branches are not transformed by loop simplify,
437   // make sure we catch that. An additional load may be generated in the
438   // preheader for SSA updater, so also avoid sinking when no preheader
439   // is available.
440   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
441       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
442     // Figure out the loop exits and their insertion points
443     SmallVector<BasicBlock *, 8> ExitBlocks;
444     L->getUniqueExitBlocks(ExitBlocks);
445 
446     // We can't insert into a catchswitch.
447     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
448       return isa<CatchSwitchInst>(Exit->getTerminator());
449     });
450 
451     if (!HasCatchSwitch) {
452       SmallVector<Instruction *, 8> InsertPts;
453       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
454       InsertPts.reserve(ExitBlocks.size());
455       MSSAInsertPts.reserve(ExitBlocks.size());
456       for (BasicBlock *ExitBlock : ExitBlocks) {
457         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
458         MSSAInsertPts.push_back(nullptr);
459       }
460 
461       PredIteratorCache PIC;
462 
463       // Promoting one set of accesses may make the pointers for another set
464       // loop invariant, so run this in a loop (with the MaybePromotable set
465       // decreasing in size over time).
466       bool Promoted = false;
467       bool LocalPromoted;
468       do {
469         LocalPromoted = false;
470         for (const SmallSetVector<Value *, 8> &PointerMustAliases :
471              collectPromotionCandidates(MSSA, AA, L)) {
472           LocalPromoted |= promoteLoopAccessesToScalars(
473               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
474               DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
475         }
476         Promoted |= LocalPromoted;
477       } while (LocalPromoted);
478 
479       // Once we have promoted values across the loop body we have to
480       // recursively reform LCSSA as any nested loop may now have values defined
481       // within the loop used in the outer loop.
482       // FIXME: This is really heavy handed. It would be a bit better to use an
483       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
484       // it as it went.
485       if (Promoted)
486         formLCSSARecursively(*L, *DT, LI, SE);
487 
488       Changed |= Promoted;
489     }
490   }
491 
492   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
493   // specifically moving instructions across the loop boundary and so it is
494   // especially in need of basic functional correctness checking here.
495   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
496   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
497          "Parent loop not left in LCSSA form after LICM!");
498 
499   if (VerifyMemorySSA)
500     MSSA->verifyMemorySSA();
501 
502   if (Changed && SE)
503     SE->forgetLoopDispositions(L);
504   return Changed;
505 }
506 
507 /// Walk the specified region of the CFG (defined by all blocks dominated by
508 /// the specified block, and that are in the current loop) in reverse depth
509 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
510 /// definitions, allowing us to sink a loop body in one pass without iteration.
511 ///
512 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
513                       DominatorTree *DT, BlockFrequencyInfo *BFI,
514                       TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
515                       Loop *CurLoop, MemorySSAUpdater *MSSAU,
516                       ICFLoopSafetyInfo *SafetyInfo,
517                       SinkAndHoistLICMFlags &Flags,
518                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
519 
520   // Verify inputs.
521   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
522          CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
523          "Unexpected input to sinkRegion.");
524 
525   // We want to visit children before parents. We will enque all the parents
526   // before their children in the worklist and process the worklist in reverse
527   // order.
528   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
529 
530   bool Changed = false;
531   for (DomTreeNode *DTN : reverse(Worklist)) {
532     BasicBlock *BB = DTN->getBlock();
533     // Only need to process the contents of this block if it is not part of a
534     // subloop (which would already have been processed).
535     if (inSubLoop(BB, CurLoop, LI))
536       continue;
537 
538     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
539       Instruction &I = *--II;
540 
541       // The instruction is not used in the loop if it is dead.  In this case,
542       // we just delete it instead of sinking it.
543       if (isInstructionTriviallyDead(&I, TLI)) {
544         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
545         salvageKnowledge(&I);
546         salvageDebugInfo(I);
547         ++II;
548         eraseInstruction(I, *SafetyInfo, MSSAU);
549         Changed = true;
550         continue;
551       }
552 
553       // Check to see if we can sink this instruction to the exit blocks
554       // of the loop.  We can do this if the all users of the instruction are
555       // outside of the loop.  In this case, it doesn't even matter if the
556       // operands of the instruction are loop invariant.
557       //
558       bool FreeInLoop = false;
559       bool LoopNestMode = OutermostLoop != nullptr;
560       if (!I.mayHaveSideEffects() &&
561           isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
562                                 SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
563           canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
564                              &Flags, ORE)) {
565         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
566           if (!FreeInLoop) {
567             ++II;
568             salvageDebugInfo(I);
569             eraseInstruction(I, *SafetyInfo, MSSAU);
570           }
571           Changed = true;
572         }
573       }
574     }
575   }
576   if (VerifyMemorySSA)
577     MSSAU->getMemorySSA()->verifyMemorySSA();
578   return Changed;
579 }
580 
581 bool llvm::sinkRegionForLoopNest(
582     DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
583     BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
584     Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
585     SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
586 
587   bool Changed = false;
588   SmallPriorityWorklist<Loop *, 4> Worklist;
589   Worklist.insert(CurLoop);
590   appendLoopsToWorklist(*CurLoop, Worklist);
591   while (!Worklist.empty()) {
592     Loop *L = Worklist.pop_back_val();
593     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
594                           TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
595   }
596   return Changed;
597 }
598 
599 namespace {
600 // This is a helper class for hoistRegion to make it able to hoist control flow
601 // in order to be able to hoist phis. The way this works is that we initially
602 // start hoisting to the loop preheader, and when we see a loop invariant branch
603 // we make note of this. When we then come to hoist an instruction that's
604 // conditional on such a branch we duplicate the branch and the relevant control
605 // flow, then hoist the instruction into the block corresponding to its original
606 // block in the duplicated control flow.
607 class ControlFlowHoister {
608 private:
609   // Information about the loop we are hoisting from
610   LoopInfo *LI;
611   DominatorTree *DT;
612   Loop *CurLoop;
613   MemorySSAUpdater *MSSAU;
614 
615   // A map of blocks in the loop to the block their instructions will be hoisted
616   // to.
617   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
618 
619   // The branches that we can hoist, mapped to the block that marks a
620   // convergence point of their control flow.
621   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
622 
623 public:
624   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
625                      MemorySSAUpdater *MSSAU)
626       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
627 
628   void registerPossiblyHoistableBranch(BranchInst *BI) {
629     // We can only hoist conditional branches with loop invariant operands.
630     if (!ControlFlowHoisting || !BI->isConditional() ||
631         !CurLoop->hasLoopInvariantOperands(BI))
632       return;
633 
634     // The branch destinations need to be in the loop, and we don't gain
635     // anything by duplicating conditional branches with duplicate successors,
636     // as it's essentially the same as an unconditional branch.
637     BasicBlock *TrueDest = BI->getSuccessor(0);
638     BasicBlock *FalseDest = BI->getSuccessor(1);
639     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
640         TrueDest == FalseDest)
641       return;
642 
643     // We can hoist BI if one branch destination is the successor of the other,
644     // or both have common successor which we check by seeing if the
645     // intersection of their successors is non-empty.
646     // TODO: This could be expanded to allowing branches where both ends
647     // eventually converge to a single block.
648     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
649     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
650     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
651     BasicBlock *CommonSucc = nullptr;
652     if (TrueDestSucc.count(FalseDest)) {
653       CommonSucc = FalseDest;
654     } else if (FalseDestSucc.count(TrueDest)) {
655       CommonSucc = TrueDest;
656     } else {
657       set_intersect(TrueDestSucc, FalseDestSucc);
658       // If there's one common successor use that.
659       if (TrueDestSucc.size() == 1)
660         CommonSucc = *TrueDestSucc.begin();
661       // If there's more than one pick whichever appears first in the block list
662       // (we can't use the value returned by TrueDestSucc.begin() as it's
663       // unpredicatable which element gets returned).
664       else if (!TrueDestSucc.empty()) {
665         Function *F = TrueDest->getParent();
666         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
667         auto It = llvm::find_if(*F, IsSucc);
668         assert(It != F->end() && "Could not find successor in function");
669         CommonSucc = &*It;
670       }
671     }
672     // The common successor has to be dominated by the branch, as otherwise
673     // there will be some other path to the successor that will not be
674     // controlled by this branch so any phi we hoist would be controlled by the
675     // wrong condition. This also takes care of avoiding hoisting of loop back
676     // edges.
677     // TODO: In some cases this could be relaxed if the successor is dominated
678     // by another block that's been hoisted and we can guarantee that the
679     // control flow has been replicated exactly.
680     if (CommonSucc && DT->dominates(BI, CommonSucc))
681       HoistableBranches[BI] = CommonSucc;
682   }
683 
684   bool canHoistPHI(PHINode *PN) {
685     // The phi must have loop invariant operands.
686     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
687       return false;
688     // We can hoist phis if the block they are in is the target of hoistable
689     // branches which cover all of the predecessors of the block.
690     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
691     BasicBlock *BB = PN->getParent();
692     for (BasicBlock *PredBB : predecessors(BB))
693       PredecessorBlocks.insert(PredBB);
694     // If we have less predecessor blocks than predecessors then the phi will
695     // have more than one incoming value for the same block which we can't
696     // handle.
697     // TODO: This could be handled be erasing some of the duplicate incoming
698     // values.
699     if (PredecessorBlocks.size() != pred_size(BB))
700       return false;
701     for (auto &Pair : HoistableBranches) {
702       if (Pair.second == BB) {
703         // Which blocks are predecessors via this branch depends on if the
704         // branch is triangle-like or diamond-like.
705         if (Pair.first->getSuccessor(0) == BB) {
706           PredecessorBlocks.erase(Pair.first->getParent());
707           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
708         } else if (Pair.first->getSuccessor(1) == BB) {
709           PredecessorBlocks.erase(Pair.first->getParent());
710           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
711         } else {
712           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
713           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
714         }
715       }
716     }
717     // PredecessorBlocks will now be empty if for every predecessor of BB we
718     // found a hoistable branch source.
719     return PredecessorBlocks.empty();
720   }
721 
722   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
723     if (!ControlFlowHoisting)
724       return CurLoop->getLoopPreheader();
725     // If BB has already been hoisted, return that
726     if (HoistDestinationMap.count(BB))
727       return HoistDestinationMap[BB];
728 
729     // Check if this block is conditional based on a pending branch
730     auto HasBBAsSuccessor =
731         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
732           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
733                                        Pair.first->getSuccessor(1) == BB);
734         };
735     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
736 
737     // If not involved in a pending branch, hoist to preheader
738     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
739     if (It == HoistableBranches.end()) {
740       LLVM_DEBUG(dbgs() << "LICM using "
741                         << InitialPreheader->getNameOrAsOperand()
742                         << " as hoist destination for "
743                         << BB->getNameOrAsOperand() << "\n");
744       HoistDestinationMap[BB] = InitialPreheader;
745       return InitialPreheader;
746     }
747     BranchInst *BI = It->first;
748     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
749                HoistableBranches.end() &&
750            "BB is expected to be the target of at most one branch");
751 
752     LLVMContext &C = BB->getContext();
753     BasicBlock *TrueDest = BI->getSuccessor(0);
754     BasicBlock *FalseDest = BI->getSuccessor(1);
755     BasicBlock *CommonSucc = HoistableBranches[BI];
756     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
757 
758     // Create hoisted versions of blocks that currently don't have them
759     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
760       if (HoistDestinationMap.count(Orig))
761         return HoistDestinationMap[Orig];
762       BasicBlock *New =
763           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
764       HoistDestinationMap[Orig] = New;
765       DT->addNewBlock(New, HoistTarget);
766       if (CurLoop->getParentLoop())
767         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
768       ++NumCreatedBlocks;
769       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
770                         << " as hoist destination for " << Orig->getName()
771                         << "\n");
772       return New;
773     };
774     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
775     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
776     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
777 
778     // Link up these blocks with branches.
779     if (!HoistCommonSucc->getTerminator()) {
780       // The new common successor we've generated will branch to whatever that
781       // hoist target branched to.
782       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
783       assert(TargetSucc && "Expected hoist target to have a single successor");
784       HoistCommonSucc->moveBefore(TargetSucc);
785       BranchInst::Create(TargetSucc, HoistCommonSucc);
786     }
787     if (!HoistTrueDest->getTerminator()) {
788       HoistTrueDest->moveBefore(HoistCommonSucc);
789       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
790     }
791     if (!HoistFalseDest->getTerminator()) {
792       HoistFalseDest->moveBefore(HoistCommonSucc);
793       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
794     }
795 
796     // If BI is being cloned to what was originally the preheader then
797     // HoistCommonSucc will now be the new preheader.
798     if (HoistTarget == InitialPreheader) {
799       // Phis in the loop header now need to use the new preheader.
800       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
801       MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
802           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
803       // The new preheader dominates the loop header.
804       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
805       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
806       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
807       // The preheader hoist destination is now the new preheader, with the
808       // exception of the hoist destination of this branch.
809       for (auto &Pair : HoistDestinationMap)
810         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
811           Pair.second = HoistCommonSucc;
812     }
813 
814     // Now finally clone BI.
815     ReplaceInstWithInst(
816         HoistTarget->getTerminator(),
817         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
818     ++NumClonedBranches;
819 
820     assert(CurLoop->getLoopPreheader() &&
821            "Hoisting blocks should not have destroyed preheader");
822     return HoistDestinationMap[BB];
823   }
824 };
825 } // namespace
826 
827 /// Walk the specified region of the CFG (defined by all blocks dominated by
828 /// the specified block, and that are in the current loop) in depth first
829 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
830 /// uses, allowing us to hoist a loop body in one pass without iteration.
831 ///
832 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
833                        DominatorTree *DT, BlockFrequencyInfo *BFI,
834                        TargetLibraryInfo *TLI, Loop *CurLoop,
835                        MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
836                        ICFLoopSafetyInfo *SafetyInfo,
837                        SinkAndHoistLICMFlags &Flags,
838                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
839                        bool AllowSpeculation) {
840   // Verify inputs.
841   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
842          CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
843          "Unexpected input to hoistRegion.");
844 
845   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
846 
847   // Keep track of instructions that have been hoisted, as they may need to be
848   // re-hoisted if they end up not dominating all of their uses.
849   SmallVector<Instruction *, 16> HoistedInstructions;
850 
851   // For PHI hoisting to work we need to hoist blocks before their successors.
852   // We can do this by iterating through the blocks in the loop in reverse
853   // post-order.
854   LoopBlocksRPO Worklist(CurLoop);
855   Worklist.perform(LI);
856   bool Changed = false;
857   for (BasicBlock *BB : Worklist) {
858     // Only need to process the contents of this block if it is not part of a
859     // subloop (which would already have been processed).
860     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
861       continue;
862 
863     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
864       // Try constant folding this instruction.  If all the operands are
865       // constants, it is technically hoistable, but it would be better to
866       // just fold it.
867       if (Constant *C = ConstantFoldInstruction(
868               &I, I.getModule()->getDataLayout(), TLI)) {
869         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
870                           << '\n');
871         // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
872         I.replaceAllUsesWith(C);
873         if (isInstructionTriviallyDead(&I, TLI))
874           eraseInstruction(I, *SafetyInfo, MSSAU);
875         Changed = true;
876         continue;
877       }
878 
879       // Try hoisting the instruction out to the preheader.  We can only do
880       // this if all of the operands of the instruction are loop invariant and
881       // if it is safe to hoist the instruction. We also check block frequency
882       // to make sure instruction only gets hoisted into colder blocks.
883       // TODO: It may be safe to hoist if we are hoisting to a conditional block
884       // and we have accurately duplicated the control flow from the loop header
885       // to that block.
886       if (CurLoop->hasLoopInvariantOperands(&I) &&
887           canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
888                              true, &Flags, ORE) &&
889           isSafeToExecuteUnconditionally(
890               I, DT, TLI, CurLoop, SafetyInfo, ORE,
891               CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
892         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
893               MSSAU, SE, ORE);
894         HoistedInstructions.push_back(&I);
895         Changed = true;
896         continue;
897       }
898 
899       // Attempt to remove floating point division out of the loop by
900       // converting it to a reciprocal multiplication.
901       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
902           CurLoop->isLoopInvariant(I.getOperand(1))) {
903         auto Divisor = I.getOperand(1);
904         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
905         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
906         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
907         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
908         ReciprocalDivisor->insertBefore(&I);
909 
910         auto Product =
911             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
912         Product->setFastMathFlags(I.getFastMathFlags());
913         SafetyInfo->insertInstructionTo(Product, I.getParent());
914         Product->insertAfter(&I);
915         I.replaceAllUsesWith(Product);
916         eraseInstruction(I, *SafetyInfo, MSSAU);
917 
918         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
919               SafetyInfo, MSSAU, SE, ORE);
920         HoistedInstructions.push_back(ReciprocalDivisor);
921         Changed = true;
922         continue;
923       }
924 
925       auto IsInvariantStart = [&](Instruction &I) {
926         using namespace PatternMatch;
927         return I.use_empty() &&
928                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
929       };
930       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
931         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
932                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
933       };
934       if ((IsInvariantStart(I) || isGuard(&I)) &&
935           CurLoop->hasLoopInvariantOperands(&I) &&
936           MustExecuteWithoutWritesBefore(I)) {
937         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
938               MSSAU, SE, ORE);
939         HoistedInstructions.push_back(&I);
940         Changed = true;
941         continue;
942       }
943 
944       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
945         if (CFH.canHoistPHI(PN)) {
946           // Redirect incoming blocks first to ensure that we create hoisted
947           // versions of those blocks before we hoist the phi.
948           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
949             PN->setIncomingBlock(
950                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
951           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
952                 MSSAU, SE, ORE);
953           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
954           Changed = true;
955           continue;
956         }
957       }
958 
959       // Remember possibly hoistable branches so we can actually hoist them
960       // later if needed.
961       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
962         CFH.registerPossiblyHoistableBranch(BI);
963     }
964   }
965 
966   // If we hoisted instructions to a conditional block they may not dominate
967   // their uses that weren't hoisted (such as phis where some operands are not
968   // loop invariant). If so make them unconditional by moving them to their
969   // immediate dominator. We iterate through the instructions in reverse order
970   // which ensures that when we rehoist an instruction we rehoist its operands,
971   // and also keep track of where in the block we are rehoisting to to make sure
972   // that we rehoist instructions before the instructions that use them.
973   Instruction *HoistPoint = nullptr;
974   if (ControlFlowHoisting) {
975     for (Instruction *I : reverse(HoistedInstructions)) {
976       if (!llvm::all_of(I->uses(),
977                         [&](Use &U) { return DT->dominates(I, U); })) {
978         BasicBlock *Dominator =
979             DT->getNode(I->getParent())->getIDom()->getBlock();
980         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
981           if (HoistPoint)
982             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
983                    "New hoist point expected to dominate old hoist point");
984           HoistPoint = Dominator->getTerminator();
985         }
986         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
987                           << HoistPoint->getParent()->getNameOrAsOperand()
988                           << ": " << *I << "\n");
989         moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
990         HoistPoint = I;
991         Changed = true;
992       }
993     }
994   }
995   if (VerifyMemorySSA)
996     MSSAU->getMemorySSA()->verifyMemorySSA();
997 
998     // Now that we've finished hoisting make sure that LI and DT are still
999     // valid.
1000 #ifdef EXPENSIVE_CHECKS
1001   if (Changed) {
1002     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1003            "Dominator tree verification failed");
1004     LI->verify(*DT);
1005   }
1006 #endif
1007 
1008   return Changed;
1009 }
1010 
1011 // Return true if LI is invariant within scope of the loop. LI is invariant if
1012 // CurLoop is dominated by an invariant.start representing the same memory
1013 // location and size as the memory location LI loads from, and also the
1014 // invariant.start has no uses.
1015 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1016                                   Loop *CurLoop) {
1017   Value *Addr = LI->getOperand(0);
1018   const DataLayout &DL = LI->getModule()->getDataLayout();
1019   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1020 
1021   // It is not currently possible for clang to generate an invariant.start
1022   // intrinsic with scalable vector types because we don't support thread local
1023   // sizeless types and we don't permit sizeless types in structs or classes.
1024   // Furthermore, even if support is added for this in future the intrinsic
1025   // itself is defined to have a size of -1 for variable sized objects. This
1026   // makes it impossible to verify if the intrinsic envelops our region of
1027   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1028   // types would have a -1 parameter, but the former is clearly double the size
1029   // of the latter.
1030   if (LocSizeInBits.isScalable())
1031     return false;
1032 
1033   // if the type is i8 addrspace(x)*, we know this is the type of
1034   // llvm.invariant.start operand
1035   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1036                                      LI->getPointerAddressSpace());
1037   unsigned BitcastsVisited = 0;
1038   // Look through bitcasts until we reach the i8* type (this is invariant.start
1039   // operand type).
1040   while (Addr->getType() != PtrInt8Ty) {
1041     auto *BC = dyn_cast<BitCastInst>(Addr);
1042     // Avoid traversing high number of bitcast uses.
1043     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1044       return false;
1045     Addr = BC->getOperand(0);
1046   }
1047   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1048   // uselists for non-local Values in a loop pass.
1049   if (isa<Constant>(Addr))
1050     return false;
1051 
1052   unsigned UsesVisited = 0;
1053   // Traverse all uses of the load operand value, to see if invariant.start is
1054   // one of the uses, and whether it dominates the load instruction.
1055   for (auto *U : Addr->users()) {
1056     // Avoid traversing for Load operand with high number of users.
1057     if (++UsesVisited > MaxNumUsesTraversed)
1058       return false;
1059     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1060     // If there are escaping uses of invariant.start instruction, the load maybe
1061     // non-invariant.
1062     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1063         !II->use_empty())
1064       continue;
1065     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1066     // The intrinsic supports having a -1 argument for variable sized objects
1067     // so we should check for that here.
1068     if (InvariantSize->isNegative())
1069       continue;
1070     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1071     // Confirm the invariant.start location size contains the load operand size
1072     // in bits. Also, the invariant.start should dominate the load, and we
1073     // should not hoist the load out of a loop that contains this dominating
1074     // invariant.start.
1075     if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
1076         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1077       return true;
1078   }
1079 
1080   return false;
1081 }
1082 
1083 namespace {
1084 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1085 /// sink a given instruction out of a loop.  Does not address legality
1086 /// concerns such as aliasing or speculation safety.
1087 bool isHoistableAndSinkableInst(Instruction &I) {
1088   // Only these instructions are hoistable/sinkable.
1089   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1090           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1091           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1092           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1093           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1094           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1095           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1096 }
1097 /// Return true if all of the alias sets within this AST are known not to
1098 /// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop.
1099 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1100                 const Loop *L) {
1101   if (CurAST) {
1102     for (AliasSet &AS : *CurAST) {
1103       if (!AS.isForwardingAliasSet() && AS.isMod()) {
1104         return false;
1105       }
1106     }
1107     return true;
1108   } else { /*MSSAU*/
1109     for (auto *BB : L->getBlocks())
1110       if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1111         return false;
1112     return true;
1113   }
1114 }
1115 
1116 /// Return true if I is the only Instruction with a MemoryAccess in L.
1117 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1118                         const MemorySSAUpdater *MSSAU) {
1119   for (auto *BB : L->getBlocks())
1120     if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1121       int NotAPhi = 0;
1122       for (const auto &Acc : *Accs) {
1123         if (isa<MemoryPhi>(&Acc))
1124           continue;
1125         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1126         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1127           return false;
1128       }
1129     }
1130   return true;
1131 }
1132 }
1133 
1134 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1135                               Loop *CurLoop, AliasSetTracker *CurAST,
1136                               MemorySSAUpdater *MSSAU,
1137                               bool TargetExecutesOncePerLoop,
1138                               SinkAndHoistLICMFlags *Flags,
1139                               OptimizationRemarkEmitter *ORE) {
1140   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
1141          "Either AliasSetTracker or MemorySSA should be initialized.");
1142 
1143   // If we don't understand the instruction, bail early.
1144   if (!isHoistableAndSinkableInst(I))
1145     return false;
1146 
1147   MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1148   if (MSSA)
1149     assert(Flags != nullptr && "Flags cannot be null.");
1150 
1151   // Loads have extra constraints we have to verify before we can hoist them.
1152   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1153     if (!LI->isUnordered())
1154       return false; // Don't sink/hoist volatile or ordered atomic loads!
1155 
1156     // Loads from constant memory are always safe to move, even if they end up
1157     // in the same alias set as something that ends up being modified.
1158     if (AA->pointsToConstantMemory(LI->getOperand(0)))
1159       return true;
1160     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1161       return true;
1162 
1163     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1164       return false; // Don't risk duplicating unordered loads
1165 
1166     // This checks for an invariant.start dominating the load.
1167     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1168       return true;
1169 
1170     bool Invalidated;
1171     if (CurAST)
1172       Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1173                                              CurLoop, AA);
1174     else
1175       Invalidated = pointerInvalidatedByLoopWithMSSA(
1176           MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
1177     // Check loop-invariant address because this may also be a sinkable load
1178     // whose address is not necessarily loop-invariant.
1179     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1180       ORE->emit([&]() {
1181         return OptimizationRemarkMissed(
1182                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1183                << "failed to move load with loop-invariant address "
1184                   "because the loop may invalidate its value";
1185       });
1186 
1187     return !Invalidated;
1188   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1189     // Don't sink or hoist dbg info; it's legal, but not useful.
1190     if (isa<DbgInfoIntrinsic>(I))
1191       return false;
1192 
1193     // Don't sink calls which can throw.
1194     if (CI->mayThrow())
1195       return false;
1196 
1197     // Convergent attribute has been used on operations that involve
1198     // inter-thread communication which results are implicitly affected by the
1199     // enclosing control flows. It is not safe to hoist or sink such operations
1200     // across control flow.
1201     if (CI->isConvergent())
1202       return false;
1203 
1204     using namespace PatternMatch;
1205     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1206       // Assumes don't actually alias anything or throw
1207       return true;
1208 
1209     if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
1210       // Widenable conditions don't actually alias anything or throw
1211       return true;
1212 
1213     // Handle simple cases by querying alias analysis.
1214     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1215     if (Behavior == FMRB_DoesNotAccessMemory)
1216       return true;
1217     if (AAResults::onlyReadsMemory(Behavior)) {
1218       // A readonly argmemonly function only reads from memory pointed to by
1219       // it's arguments with arbitrary offsets.  If we can prove there are no
1220       // writes to this memory in the loop, we can hoist or sink.
1221       if (AAResults::onlyAccessesArgPointees(Behavior)) {
1222         // TODO: expand to writeable arguments
1223         for (Value *Op : CI->args())
1224           if (Op->getType()->isPointerTy()) {
1225             bool Invalidated;
1226             if (CurAST)
1227               Invalidated = pointerInvalidatedByLoop(
1228                   MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
1229             else
1230               Invalidated = pointerInvalidatedByLoopWithMSSA(
1231                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1232                   *Flags);
1233             if (Invalidated)
1234               return false;
1235           }
1236         return true;
1237       }
1238 
1239       // If this call only reads from memory and there are no writes to memory
1240       // in the loop, we can hoist or sink the call as appropriate.
1241       if (isReadOnly(CurAST, MSSAU, CurLoop))
1242         return true;
1243     }
1244 
1245     // FIXME: This should use mod/ref information to see if we can hoist or
1246     // sink the call.
1247 
1248     return false;
1249   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1250     // Fences alias (most) everything to provide ordering.  For the moment,
1251     // just give up if there are any other memory operations in the loop.
1252     if (CurAST) {
1253       auto Begin = CurAST->begin();
1254       assert(Begin != CurAST->end() && "must contain FI");
1255       if (std::next(Begin) != CurAST->end())
1256         // constant memory for instance, TODO: handle better
1257         return false;
1258       auto *UniqueI = Begin->getUniqueInstruction();
1259       if (!UniqueI)
1260         // other memory op, give up
1261         return false;
1262       (void)FI; // suppress unused variable warning
1263       assert(UniqueI == FI && "AS must contain FI");
1264       return true;
1265     } else // MSSAU
1266       return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1267   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1268     if (!SI->isUnordered())
1269       return false; // Don't sink/hoist volatile or ordered atomic store!
1270 
1271     // We can only hoist a store that we can prove writes a value which is not
1272     // read or overwritten within the loop.  For those cases, we fallback to
1273     // load store promotion instead.  TODO: We can extend this to cases where
1274     // there is exactly one write to the location and that write dominates an
1275     // arbitrary number of reads in the loop.
1276     if (CurAST) {
1277       auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1278 
1279       if (AS.isRef() || !AS.isMustAlias())
1280         // Quick exit test, handled by the full path below as well.
1281         return false;
1282       auto *UniqueI = AS.getUniqueInstruction();
1283       if (!UniqueI)
1284         // other memory op, give up
1285         return false;
1286       assert(UniqueI == SI && "AS must contain SI");
1287       return true;
1288     } else { // MSSAU
1289       if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1290         return true;
1291       // If there are more accesses than the Promotion cap or no "quota" to
1292       // check clobber, then give up as we're not walking a list that long.
1293       if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
1294         return false;
1295       // If there are interfering Uses (i.e. their defining access is in the
1296       // loop), or ordered loads (stored as Defs!), don't move this store.
1297       // Could do better here, but this is conservatively correct.
1298       // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1299       // moving accesses. Can also extend to dominating uses.
1300       auto *SIMD = MSSA->getMemoryAccess(SI);
1301       for (auto *BB : CurLoop->getBlocks())
1302         if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1303           for (const auto &MA : *Accesses)
1304             if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1305               auto *MD = MU->getDefiningAccess();
1306               if (!MSSA->isLiveOnEntryDef(MD) &&
1307                   CurLoop->contains(MD->getBlock()))
1308                 return false;
1309               // Disable hoisting past potentially interfering loads. Optimized
1310               // Uses may point to an access outside the loop, as getClobbering
1311               // checks the previous iteration when walking the backedge.
1312               // FIXME: More precise: no Uses that alias SI.
1313               if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
1314                 return false;
1315             } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1316               if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1317                 (void)LI; // Silence warning.
1318                 assert(!LI->isUnordered() && "Expected unordered load");
1319                 return false;
1320               }
1321               // Any call, while it may not be clobbering SI, it may be a use.
1322               if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1323                 // Check if the call may read from the memory location written
1324                 // to by SI. Check CI's attributes and arguments; the number of
1325                 // such checks performed is limited above by NoOfMemAccTooLarge.
1326                 ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
1327                 if (isModOrRefSet(MRI))
1328                   return false;
1329               }
1330             }
1331         }
1332       auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1333       Flags->incrementClobberingCalls();
1334       // If there are no clobbering Defs in the loop, store is safe to hoist.
1335       return MSSA->isLiveOnEntryDef(Source) ||
1336              !CurLoop->contains(Source->getBlock());
1337     }
1338   }
1339 
1340   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1341 
1342   // We've established mechanical ability and aliasing, it's up to the caller
1343   // to check fault safety
1344   return true;
1345 }
1346 
1347 /// Returns true if a PHINode is a trivially replaceable with an
1348 /// Instruction.
1349 /// This is true when all incoming values are that instruction.
1350 /// This pattern occurs most often with LCSSA PHI nodes.
1351 ///
1352 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1353   for (const Value *IncValue : PN.incoming_values())
1354     if (IncValue != &I)
1355       return false;
1356 
1357   return true;
1358 }
1359 
1360 /// Return true if the instruction is free in the loop.
1361 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1362                          const TargetTransformInfo *TTI) {
1363 
1364   if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1365     if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
1366         TargetTransformInfo::TCC_Free)
1367       return false;
1368     // For a GEP, we cannot simply use getUserCost because currently it
1369     // optimistically assumes that a GEP will fold into addressing mode
1370     // regardless of its users.
1371     const BasicBlock *BB = GEP->getParent();
1372     for (const User *U : GEP->users()) {
1373       const Instruction *UI = cast<Instruction>(U);
1374       if (CurLoop->contains(UI) &&
1375           (BB != UI->getParent() ||
1376            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1377         return false;
1378     }
1379     return true;
1380   } else
1381     return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1382            TargetTransformInfo::TCC_Free;
1383 }
1384 
1385 /// Return true if the only users of this instruction are outside of
1386 /// the loop. If this is true, we can sink the instruction to the exit
1387 /// blocks of the loop.
1388 ///
1389 /// We also return true if the instruction could be folded away in lowering.
1390 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
1391 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1392                                   const LoopSafetyInfo *SafetyInfo,
1393                                   TargetTransformInfo *TTI, bool &FreeInLoop,
1394                                   bool LoopNestMode) {
1395   const auto &BlockColors = SafetyInfo->getBlockColors();
1396   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1397   for (const User *U : I.users()) {
1398     const Instruction *UI = cast<Instruction>(U);
1399     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1400       const BasicBlock *BB = PN->getParent();
1401       // We cannot sink uses in catchswitches.
1402       if (isa<CatchSwitchInst>(BB->getTerminator()))
1403         return false;
1404 
1405       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1406       // phi use is too muddled.
1407       if (isa<CallInst>(I))
1408         if (!BlockColors.empty() &&
1409             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1410           return false;
1411 
1412       if (LoopNestMode) {
1413         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1414                UI->getNumOperands() == 1) {
1415           if (!CurLoop->contains(UI))
1416             break;
1417           UI = cast<Instruction>(UI->user_back());
1418         }
1419       }
1420     }
1421 
1422     if (CurLoop->contains(UI)) {
1423       if (IsFree) {
1424         FreeInLoop = true;
1425         continue;
1426       }
1427       return false;
1428     }
1429   }
1430   return true;
1431 }
1432 
1433 static Instruction *cloneInstructionInExitBlock(
1434     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1435     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1436   Instruction *New;
1437   if (auto *CI = dyn_cast<CallInst>(&I)) {
1438     const auto &BlockColors = SafetyInfo->getBlockColors();
1439 
1440     // Sinking call-sites need to be handled differently from other
1441     // instructions.  The cloned call-site needs a funclet bundle operand
1442     // appropriate for its location in the CFG.
1443     SmallVector<OperandBundleDef, 1> OpBundles;
1444     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1445          BundleIdx != BundleEnd; ++BundleIdx) {
1446       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1447       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1448         continue;
1449 
1450       OpBundles.emplace_back(Bundle);
1451     }
1452 
1453     if (!BlockColors.empty()) {
1454       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1455       assert(CV.size() == 1 && "non-unique color for exit block!");
1456       BasicBlock *BBColor = CV.front();
1457       Instruction *EHPad = BBColor->getFirstNonPHI();
1458       if (EHPad->isEHPad())
1459         OpBundles.emplace_back("funclet", EHPad);
1460     }
1461 
1462     New = CallInst::Create(CI, OpBundles);
1463   } else {
1464     New = I.clone();
1465   }
1466 
1467   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1468   if (!I.getName().empty())
1469     New->setName(I.getName() + ".le");
1470 
1471   if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
1472     // Create a new MemoryAccess and let MemorySSA set its defining access.
1473     MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1474         New, nullptr, New->getParent(), MemorySSA::Beginning);
1475     if (NewMemAcc) {
1476       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1477         MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1478       else {
1479         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1480         MSSAU->insertUse(MemUse, /*RenameUses=*/true);
1481       }
1482     }
1483   }
1484 
1485   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1486   // this is particularly cheap because we can rip off the PHI node that we're
1487   // replacing for the number and blocks of the predecessors.
1488   // OPT: If this shows up in a profile, we can instead finish sinking all
1489   // invariant instructions, and then walk their operands to re-establish
1490   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1491   // sinking bottom-up.
1492   for (Use &Op : New->operands())
1493     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1494       auto *OInst = cast<Instruction>(Op.get());
1495       PHINode *OpPN =
1496         PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1497                         OInst->getName() + ".lcssa", &ExitBlock.front());
1498       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1499         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1500       Op = OpPN;
1501     }
1502   return New;
1503 }
1504 
1505 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1506                              MemorySSAUpdater *MSSAU) {
1507   if (MSSAU)
1508     MSSAU->removeMemoryAccess(&I);
1509   SafetyInfo.removeInstruction(&I);
1510   I.eraseFromParent();
1511 }
1512 
1513 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1514                                   ICFLoopSafetyInfo &SafetyInfo,
1515                                   MemorySSAUpdater *MSSAU,
1516                                   ScalarEvolution *SE) {
1517   SafetyInfo.removeInstruction(&I);
1518   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1519   I.moveBefore(&Dest);
1520   if (MSSAU)
1521     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1522             MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1523       MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
1524                          MemorySSA::BeforeTerminator);
1525   if (SE)
1526     SE->forgetValue(&I);
1527 }
1528 
1529 static Instruction *sinkThroughTriviallyReplaceablePHI(
1530     PHINode *TPN, Instruction *I, LoopInfo *LI,
1531     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1532     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1533     MemorySSAUpdater *MSSAU) {
1534   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1535          "Expect only trivially replaceable PHI");
1536   BasicBlock *ExitBlock = TPN->getParent();
1537   Instruction *New;
1538   auto It = SunkCopies.find(ExitBlock);
1539   if (It != SunkCopies.end())
1540     New = It->second;
1541   else
1542     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1543         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1544   return New;
1545 }
1546 
1547 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1548   BasicBlock *BB = PN->getParent();
1549   if (!BB->canSplitPredecessors())
1550     return false;
1551   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1552   // it require updating BlockColors for all offspring blocks accordingly. By
1553   // skipping such corner case, we can make updating BlockColors after splitting
1554   // predecessor fairly simple.
1555   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1556     return false;
1557   for (BasicBlock *BBPred : predecessors(BB)) {
1558     if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
1559         isa<CallBrInst>(BBPred->getTerminator()))
1560       return false;
1561   }
1562   return true;
1563 }
1564 
1565 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1566                                         LoopInfo *LI, const Loop *CurLoop,
1567                                         LoopSafetyInfo *SafetyInfo,
1568                                         MemorySSAUpdater *MSSAU) {
1569 #ifndef NDEBUG
1570   SmallVector<BasicBlock *, 32> ExitBlocks;
1571   CurLoop->getUniqueExitBlocks(ExitBlocks);
1572   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1573                                              ExitBlocks.end());
1574 #endif
1575   BasicBlock *ExitBB = PN->getParent();
1576   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1577 
1578   // Split predecessors of the loop exit to make instructions in the loop are
1579   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1580   // loop in the canonical form where each predecessor of each exit block should
1581   // be contained within the loop. For example, this will convert the loop below
1582   // from
1583   //
1584   // LB1:
1585   //   %v1 =
1586   //   br %LE, %LB2
1587   // LB2:
1588   //   %v2 =
1589   //   br %LE, %LB1
1590   // LE:
1591   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1592   //
1593   // to
1594   //
1595   // LB1:
1596   //   %v1 =
1597   //   br %LE.split, %LB2
1598   // LB2:
1599   //   %v2 =
1600   //   br %LE.split2, %LB1
1601   // LE.split:
1602   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1603   //   br %LE
1604   // LE.split2:
1605   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1606   //   br %LE
1607   // LE:
1608   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1609   //
1610   const auto &BlockColors = SafetyInfo->getBlockColors();
1611   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1612   while (!PredBBs.empty()) {
1613     BasicBlock *PredBB = *PredBBs.begin();
1614     assert(CurLoop->contains(PredBB) &&
1615            "Expect all predecessors are in the loop");
1616     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1617       BasicBlock *NewPred = SplitBlockPredecessors(
1618           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1619       // Since we do not allow splitting EH-block with BlockColors in
1620       // canSplitPredecessors(), we can simply assign predecessor's color to
1621       // the new block.
1622       if (!BlockColors.empty())
1623         // Grab a reference to the ColorVector to be inserted before getting the
1624         // reference to the vector we are copying because inserting the new
1625         // element in BlockColors might cause the map to be reallocated.
1626         SafetyInfo->copyColors(NewPred, PredBB);
1627     }
1628     PredBBs.remove(PredBB);
1629   }
1630 }
1631 
1632 /// When an instruction is found to only be used outside of the loop, this
1633 /// function moves it to the exit blocks and patches up SSA form as needed.
1634 /// This method is guaranteed to remove the original instruction from its
1635 /// position, and may either delete it or move it to outside of the loop.
1636 ///
1637 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1638                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
1639                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
1640                  OptimizationRemarkEmitter *ORE) {
1641   bool Changed = false;
1642   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1643 
1644   // Iterate over users to be ready for actual sinking. Replace users via
1645   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1646   SmallPtrSet<Instruction *, 8> VisitedUsers;
1647   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1648     auto *User = cast<Instruction>(*UI);
1649     Use &U = UI.getUse();
1650     ++UI;
1651 
1652     if (VisitedUsers.count(User) || CurLoop->contains(User))
1653       continue;
1654 
1655     if (!DT->isReachableFromEntry(User->getParent())) {
1656       U = UndefValue::get(I.getType());
1657       Changed = true;
1658       continue;
1659     }
1660 
1661     // The user must be a PHI node.
1662     PHINode *PN = cast<PHINode>(User);
1663 
1664     // Surprisingly, instructions can be used outside of loops without any
1665     // exits.  This can only happen in PHI nodes if the incoming block is
1666     // unreachable.
1667     BasicBlock *BB = PN->getIncomingBlock(U);
1668     if (!DT->isReachableFromEntry(BB)) {
1669       U = UndefValue::get(I.getType());
1670       Changed = true;
1671       continue;
1672     }
1673 
1674     VisitedUsers.insert(PN);
1675     if (isTriviallyReplaceablePHI(*PN, I))
1676       continue;
1677 
1678     if (!canSplitPredecessors(PN, SafetyInfo))
1679       return Changed;
1680 
1681     // Split predecessors of the PHI so that we can make users trivially
1682     // replaceable.
1683     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1684 
1685     // Should rebuild the iterators, as they may be invalidated by
1686     // splitPredecessorsOfLoopExit().
1687     UI = I.user_begin();
1688     UE = I.user_end();
1689   }
1690 
1691   if (VisitedUsers.empty())
1692     return Changed;
1693 
1694   ORE->emit([&]() {
1695     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1696            << "sinking " << ore::NV("Inst", &I);
1697   });
1698   if (isa<LoadInst>(I))
1699     ++NumMovedLoads;
1700   else if (isa<CallInst>(I))
1701     ++NumMovedCalls;
1702   ++NumSunk;
1703 
1704 #ifndef NDEBUG
1705   SmallVector<BasicBlock *, 32> ExitBlocks;
1706   CurLoop->getUniqueExitBlocks(ExitBlocks);
1707   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1708                                              ExitBlocks.end());
1709 #endif
1710 
1711   // Clones of this instruction. Don't create more than one per exit block!
1712   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1713 
1714   // If this instruction is only used outside of the loop, then all users are
1715   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1716   // the instruction.
1717   // First check if I is worth sinking for all uses. Sink only when it is worth
1718   // across all uses.
1719   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1720   for (auto *UI : Users) {
1721     auto *User = cast<Instruction>(UI);
1722 
1723     if (CurLoop->contains(User))
1724       continue;
1725 
1726     PHINode *PN = cast<PHINode>(User);
1727     assert(ExitBlockSet.count(PN->getParent()) &&
1728            "The LCSSA PHI is not in an exit block!");
1729 
1730     // The PHI must be trivially replaceable.
1731     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1732         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1733     PN->replaceAllUsesWith(New);
1734     eraseInstruction(*PN, *SafetyInfo, nullptr);
1735     Changed = true;
1736   }
1737   return Changed;
1738 }
1739 
1740 /// When an instruction is found to only use loop invariant operands that
1741 /// is safe to hoist, this instruction is called to do the dirty work.
1742 ///
1743 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1744                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1745                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
1746                   OptimizationRemarkEmitter *ORE) {
1747   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1748                     << I << "\n");
1749   ORE->emit([&]() {
1750     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1751                                                          << ore::NV("Inst", &I);
1752   });
1753 
1754   // Metadata can be dependent on conditions we are hoisting above.
1755   // Conservatively strip all metadata on the instruction unless we were
1756   // guaranteed to execute I if we entered the loop, in which case the metadata
1757   // is valid in the loop preheader.
1758   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1759   // then moving to the preheader means we should strip attributes on the call
1760   // that can cause UB since we may be hoisting above conditions that allowed
1761   // inferring those attributes. They may not be valid at the preheader.
1762   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1763       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1764       // time in isGuaranteedToExecute if we don't actually have anything to
1765       // drop.  It is a compile time optimization, not required for correctness.
1766       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1767     I.dropUndefImplyingAttrsAndUnknownMetadata();
1768 
1769   if (isa<PHINode>(I))
1770     // Move the new node to the end of the phi list in the destination block.
1771     moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1772   else
1773     // Move the new node to the destination block, before its terminator.
1774     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1775 
1776   I.updateLocationAfterHoist();
1777 
1778   if (isa<LoadInst>(I))
1779     ++NumMovedLoads;
1780   else if (isa<CallInst>(I))
1781     ++NumMovedCalls;
1782   ++NumHoisted;
1783 }
1784 
1785 /// Only sink or hoist an instruction if it is not a trapping instruction,
1786 /// or if the instruction is known not to trap when moved to the preheader.
1787 /// or if it is a trapping instruction and is guaranteed to execute.
1788 static bool isSafeToExecuteUnconditionally(
1789     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1790     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1791     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1792     bool AllowSpeculation) {
1793   if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
1794     return true;
1795 
1796   bool GuaranteedToExecute =
1797       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1798 
1799   if (!GuaranteedToExecute) {
1800     auto *LI = dyn_cast<LoadInst>(&Inst);
1801     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1802       ORE->emit([&]() {
1803         return OptimizationRemarkMissed(
1804                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1805                << "failed to hoist load with loop-invariant address "
1806                   "because load is conditionally executed";
1807       });
1808   }
1809 
1810   return GuaranteedToExecute;
1811 }
1812 
1813 namespace {
1814 class LoopPromoter : public LoadAndStorePromoter {
1815   Value *SomePtr; // Designated pointer to store to.
1816   const SmallSetVector<Value *, 8> &PointerMustAliases;
1817   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1818   SmallVectorImpl<Instruction *> &LoopInsertPts;
1819   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1820   PredIteratorCache &PredCache;
1821   MemorySSAUpdater *MSSAU;
1822   LoopInfo &LI;
1823   DebugLoc DL;
1824   Align Alignment;
1825   bool UnorderedAtomic;
1826   AAMDNodes AATags;
1827   ICFLoopSafetyInfo &SafetyInfo;
1828   bool CanInsertStoresInExitBlocks;
1829 
1830   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1831   // (if legal) if doing so would add an out-of-loop use to an instruction
1832   // defined in-loop.
1833   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1834     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1835       return V;
1836 
1837     Instruction *I = cast<Instruction>(V);
1838     // We need to create an LCSSA PHI node for the incoming value and
1839     // store that.
1840     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1841                                   I->getName() + ".lcssa", &BB->front());
1842     for (BasicBlock *Pred : PredCache.get(BB))
1843       PN->addIncoming(I, Pred);
1844     return PN;
1845   }
1846 
1847 public:
1848   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1849                const SmallSetVector<Value *, 8> &PMA,
1850                SmallVectorImpl<BasicBlock *> &LEB,
1851                SmallVectorImpl<Instruction *> &LIP,
1852                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1853                MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
1854                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1855                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1856       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1857         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1858         PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1859         Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1860         SafetyInfo(SafetyInfo),
1861         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
1862 
1863   bool isInstInList(Instruction *I,
1864                     const SmallVectorImpl<Instruction *> &) const override {
1865     Value *Ptr;
1866     if (LoadInst *LI = dyn_cast<LoadInst>(I))
1867       Ptr = LI->getOperand(0);
1868     else
1869       Ptr = cast<StoreInst>(I)->getPointerOperand();
1870     return PointerMustAliases.count(Ptr);
1871   }
1872 
1873   void insertStoresInLoopExitBlocks() {
1874     // Insert stores after in the loop exit blocks.  Each exit block gets a
1875     // store of the live-out values that feed them.  Since we've already told
1876     // the SSA updater about the defs in the loop and the preheader
1877     // definition, it is all set and we can start using it.
1878     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1879       BasicBlock *ExitBlock = LoopExitBlocks[i];
1880       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1881       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1882       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1883       Instruction *InsertPos = LoopInsertPts[i];
1884       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1885       if (UnorderedAtomic)
1886         NewSI->setOrdering(AtomicOrdering::Unordered);
1887       NewSI->setAlignment(Alignment);
1888       NewSI->setDebugLoc(DL);
1889       if (AATags)
1890         NewSI->setAAMetadata(AATags);
1891 
1892       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1893       MemoryAccess *NewMemAcc;
1894       if (!MSSAInsertPoint) {
1895         NewMemAcc = MSSAU->createMemoryAccessInBB(
1896             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1897       } else {
1898         NewMemAcc =
1899             MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1900       }
1901       MSSAInsertPts[i] = NewMemAcc;
1902       MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1903       // FIXME: true for safety, false may still be correct.
1904     }
1905   }
1906 
1907   void doExtraRewritesBeforeFinalDeletion() override {
1908     if (CanInsertStoresInExitBlocks)
1909       insertStoresInLoopExitBlocks();
1910   }
1911 
1912   void instructionDeleted(Instruction *I) const override {
1913     SafetyInfo.removeInstruction(I);
1914     MSSAU->removeMemoryAccess(I);
1915   }
1916 
1917   bool shouldDelete(Instruction *I) const override {
1918     if (isa<StoreInst>(I))
1919       return CanInsertStoresInExitBlocks;
1920     return true;
1921   }
1922 };
1923 
1924 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1925                                  DominatorTree *DT) {
1926   // We can perform the captured-before check against any instruction in the
1927   // loop header, as the loop header is reachable from any instruction inside
1928   // the loop.
1929   // TODO: ReturnCaptures=true shouldn't be necessary here.
1930   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1931                                      /* StoreCaptures */ true,
1932                                      L->getHeader()->getTerminator(), DT);
1933 }
1934 
1935 /// Return true if we can prove that a caller cannot inspect the object if an
1936 /// unwind occurs inside the loop.
1937 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1938                                 DominatorTree *DT) {
1939   bool RequiresNoCaptureBeforeUnwind;
1940   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1941     return false;
1942 
1943   return !RequiresNoCaptureBeforeUnwind ||
1944          isNotCapturedBeforeOrInLoop(Object, L, DT);
1945 }
1946 
1947 } // namespace
1948 
1949 /// Try to promote memory values to scalars by sinking stores out of the
1950 /// loop and moving loads to before the loop.  We do this by looping over
1951 /// the stores in the loop, looking for stores to Must pointers which are
1952 /// loop invariant.
1953 ///
1954 bool llvm::promoteLoopAccessesToScalars(
1955     const SmallSetVector<Value *, 8> &PointerMustAliases,
1956     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1957     SmallVectorImpl<Instruction *> &InsertPts,
1958     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1959     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1960     Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1961     OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
1962   // Verify inputs.
1963   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1964          SafetyInfo != nullptr &&
1965          "Unexpected Input to promoteLoopAccessesToScalars");
1966 
1967   Value *SomePtr = *PointerMustAliases.begin();
1968   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1969 
1970   // It is not safe to promote a load/store from the loop if the load/store is
1971   // conditional.  For example, turning:
1972   //
1973   //    for () { if (c) *P += 1; }
1974   //
1975   // into:
1976   //
1977   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1978   //
1979   // is not safe, because *P may only be valid to access if 'c' is true.
1980   //
1981   // The safety property divides into two parts:
1982   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1983   //    case, we can't insert the required load in the preheader.
1984   // p2) The memory model does not allow us to insert a store along any dynamic
1985   //    path which did not originally have one.
1986   //
1987   // If at least one store is guaranteed to execute, both properties are
1988   // satisfied, and promotion is legal.
1989   //
1990   // This, however, is not a necessary condition. Even if no store/load is
1991   // guaranteed to execute, we can still establish these properties.
1992   // We can establish (p1) by proving that hoisting the load into the preheader
1993   // is safe (i.e. proving dereferenceability on all paths through the loop). We
1994   // can use any access within the alias set to prove dereferenceability,
1995   // since they're all must alias.
1996   //
1997   // There are two ways establish (p2):
1998   // a) Prove the location is thread-local. In this case the memory model
1999   // requirement does not apply, and stores are safe to insert.
2000   // b) Prove a store dominates every exit block. In this case, if an exit
2001   // blocks is reached, the original dynamic path would have taken us through
2002   // the store, so inserting a store into the exit block is safe. Note that this
2003   // is different from the store being guaranteed to execute. For instance,
2004   // if an exception is thrown on the first iteration of the loop, the original
2005   // store is never executed, but the exit blocks are not executed either.
2006 
2007   bool DereferenceableInPH = false;
2008   bool SafeToInsertStore = false;
2009   bool FoundLoadToPromote = false;
2010 
2011   SmallVector<Instruction *, 64> LoopUses;
2012 
2013   // We start with an alignment of one and try to find instructions that allow
2014   // us to prove better alignment.
2015   Align Alignment;
2016   // Keep track of which types of access we see
2017   bool SawUnorderedAtomic = false;
2018   bool SawNotAtomic = false;
2019   AAMDNodes AATags;
2020 
2021   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
2022 
2023   bool IsKnownThreadLocalObject = false;
2024   if (SafetyInfo->anyBlockMayThrow()) {
2025     // If a loop can throw, we have to insert a store along each unwind edge.
2026     // That said, we can't actually make the unwind edge explicit. Therefore,
2027     // we have to prove that the store is dead along the unwind edge.  We do
2028     // this by proving that the caller can't have a reference to the object
2029     // after return and thus can't possibly load from the object.
2030     Value *Object = getUnderlyingObject(SomePtr);
2031     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2032       return false;
2033     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
2034     // visible to other threads if captured and used during their lifetimes.
2035     IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
2036   }
2037 
2038   // Check that all accesses to pointers in the aliass set use the same type.
2039   // We cannot (yet) promote a memory location that is loaded and stored in
2040   // different sizes.  While we are at it, collect alignment and AA info.
2041   Type *AccessTy = nullptr;
2042   for (Value *ASIV : PointerMustAliases) {
2043     for (User *U : ASIV->users()) {
2044       // Ignore instructions that are outside the loop.
2045       Instruction *UI = dyn_cast<Instruction>(U);
2046       if (!UI || !CurLoop->contains(UI))
2047         continue;
2048 
2049       // If there is an non-load/store instruction in the loop, we can't promote
2050       // it.
2051       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2052         if (!Load->isUnordered())
2053           return false;
2054 
2055         SawUnorderedAtomic |= Load->isAtomic();
2056         SawNotAtomic |= !Load->isAtomic();
2057         FoundLoadToPromote = true;
2058 
2059         Align InstAlignment = Load->getAlign();
2060 
2061         // Note that proving a load safe to speculate requires proving
2062         // sufficient alignment at the target location.  Proving it guaranteed
2063         // to execute does as well.  Thus we can increase our guaranteed
2064         // alignment as well.
2065         if (!DereferenceableInPH || (InstAlignment > Alignment))
2066           if (isSafeToExecuteUnconditionally(
2067                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2068                   Preheader->getTerminator(), AllowSpeculation)) {
2069             DereferenceableInPH = true;
2070             Alignment = std::max(Alignment, InstAlignment);
2071           }
2072       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2073         // Stores *of* the pointer are not interesting, only stores *to* the
2074         // pointer.
2075         if (UI->getOperand(1) != ASIV)
2076           continue;
2077         if (!Store->isUnordered())
2078           return false;
2079 
2080         SawUnorderedAtomic |= Store->isAtomic();
2081         SawNotAtomic |= !Store->isAtomic();
2082 
2083         // If the store is guaranteed to execute, both properties are satisfied.
2084         // We may want to check if a store is guaranteed to execute even if we
2085         // already know that promotion is safe, since it may have higher
2086         // alignment than any other guaranteed stores, in which case we can
2087         // raise the alignment on the promoted store.
2088         Align InstAlignment = Store->getAlign();
2089 
2090         if (!DereferenceableInPH || !SafeToInsertStore ||
2091             (InstAlignment > Alignment)) {
2092           if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2093             DereferenceableInPH = true;
2094             SafeToInsertStore = true;
2095             Alignment = std::max(Alignment, InstAlignment);
2096           }
2097         }
2098 
2099         // If a store dominates all exit blocks, it is safe to sink.
2100         // As explained above, if an exit block was executed, a dominating
2101         // store must have been executed at least once, so we are not
2102         // introducing stores on paths that did not have them.
2103         // Note that this only looks at explicit exit blocks. If we ever
2104         // start sinking stores into unwind edges (see above), this will break.
2105         if (!SafeToInsertStore)
2106           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2107             return DT->dominates(Store->getParent(), Exit);
2108           });
2109 
2110         // If the store is not guaranteed to execute, we may still get
2111         // deref info through it.
2112         if (!DereferenceableInPH) {
2113           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2114               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2115               Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
2116         }
2117       } else
2118         return false; // Not a load or store.
2119 
2120       if (!AccessTy)
2121         AccessTy = getLoadStoreType(UI);
2122       else if (AccessTy != getLoadStoreType(UI))
2123         return false;
2124 
2125       // Merge the AA tags.
2126       if (LoopUses.empty()) {
2127         // On the first load/store, just take its AA tags.
2128         AATags = UI->getAAMetadata();
2129       } else if (AATags) {
2130         AATags = AATags.merge(UI->getAAMetadata());
2131       }
2132 
2133       LoopUses.push_back(UI);
2134     }
2135   }
2136 
2137   // If we found both an unordered atomic instruction and a non-atomic memory
2138   // access, bail.  We can't blindly promote non-atomic to atomic since we
2139   // might not be able to lower the result.  We can't downgrade since that
2140   // would violate memory model.  Also, align 0 is an error for atomics.
2141   if (SawUnorderedAtomic && SawNotAtomic)
2142     return false;
2143 
2144   // If we're inserting an atomic load in the preheader, we must be able to
2145   // lower it.  We're only guaranteed to be able to lower naturally aligned
2146   // atomics.
2147   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2148     return false;
2149 
2150   // If we couldn't prove we can hoist the load, bail.
2151   if (!DereferenceableInPH)
2152     return false;
2153 
2154   // We know we can hoist the load, but don't have a guaranteed store.
2155   // Check whether the location is thread-local. If it is, then we can insert
2156   // stores along paths which originally didn't have them without violating the
2157   // memory model.
2158   if (!SafeToInsertStore) {
2159     if (IsKnownThreadLocalObject)
2160       SafeToInsertStore = true;
2161     else {
2162       Value *Object = getUnderlyingObject(SomePtr);
2163       SafeToInsertStore =
2164           (isNoAliasCall(Object) || isa<AllocaInst>(Object)) &&
2165           isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
2166     }
2167   }
2168 
2169   // If we've still failed to prove we can sink the store, hoist the load
2170   // only, if possible.
2171   if (!SafeToInsertStore && !FoundLoadToPromote)
2172     // If we cannot hoist the load either, give up.
2173     return false;
2174 
2175   // Lets do the promotion!
2176   if (SafeToInsertStore)
2177     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2178                       << '\n');
2179   else
2180     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2181                       << '\n');
2182 
2183   ORE->emit([&]() {
2184     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2185                               LoopUses[0])
2186            << "Moving accesses to memory location out of the loop";
2187   });
2188   ++NumPromoted;
2189 
2190   // Look at all the loop uses, and try to merge their locations.
2191   std::vector<const DILocation *> LoopUsesLocs;
2192   for (auto U : LoopUses)
2193     LoopUsesLocs.push_back(U->getDebugLoc().get());
2194   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2195 
2196   // We use the SSAUpdater interface to insert phi nodes as required.
2197   SmallVector<PHINode *, 16> NewPHIs;
2198   SSAUpdater SSA(&NewPHIs);
2199   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2200                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
2201                         Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
2202                         SafeToInsertStore);
2203 
2204   // Set up the preheader to have a definition of the value.  It is the live-out
2205   // value from the preheader that uses in the loop will use.
2206   LoadInst *PreheaderLoad = new LoadInst(
2207       AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2208       Preheader->getTerminator());
2209   if (SawUnorderedAtomic)
2210     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2211   PreheaderLoad->setAlignment(Alignment);
2212   PreheaderLoad->setDebugLoc(DebugLoc());
2213   if (AATags)
2214     PreheaderLoad->setAAMetadata(AATags);
2215   SSA.AddAvailableValue(Preheader, PreheaderLoad);
2216 
2217   MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2218       PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2219   MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2220   MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
2221 
2222   if (VerifyMemorySSA)
2223     MSSAU->getMemorySSA()->verifyMemorySSA();
2224   // Rewrite all the loads in the loop and remember all the definitions from
2225   // stores in the loop.
2226   Promoter.run(LoopUses);
2227 
2228   if (VerifyMemorySSA)
2229     MSSAU->getMemorySSA()->verifyMemorySSA();
2230   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2231   if (PreheaderLoad->use_empty())
2232     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2233 
2234   return true;
2235 }
2236 
2237 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2238                                 function_ref<void(Instruction *)> Fn) {
2239   for (const BasicBlock *BB : L->blocks())
2240     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2241       for (const auto &Access : *Accesses)
2242         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2243           Fn(MUD->getMemoryInst());
2244 }
2245 
2246 static SmallVector<SmallSetVector<Value *, 8>, 0>
2247 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2248   AliasSetTracker AST(*AA);
2249 
2250   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2251     if (const auto *SI = dyn_cast<StoreInst>(I))
2252       return L->isLoopInvariant(SI->getPointerOperand());
2253     if (const auto *LI = dyn_cast<LoadInst>(I))
2254       return L->isLoopInvariant(LI->getPointerOperand());
2255     return false;
2256   };
2257 
2258   // Populate AST with potentially promotable accesses and remove them from
2259   // MaybePromotable, so they will not be checked again on the next iteration.
2260   SmallPtrSet<Value *, 16> AttemptingPromotion;
2261   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2262     if (IsPotentiallyPromotable(I)) {
2263       AttemptingPromotion.insert(I);
2264       AST.add(I);
2265     }
2266   });
2267 
2268   // We're only interested in must-alias sets that contain a mod.
2269   SmallVector<const AliasSet *, 8> Sets;
2270   for (AliasSet &AS : AST)
2271     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2272       Sets.push_back(&AS);
2273 
2274   if (Sets.empty())
2275     return {}; // Nothing to promote...
2276 
2277   // Discard any sets for which there is an aliasing non-promotable access.
2278   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2279     if (AttemptingPromotion.contains(I))
2280       return;
2281 
2282     llvm::erase_if(Sets, [&](const AliasSet *AS) {
2283       return AS->aliasesUnknownInst(I, *AA);
2284     });
2285   });
2286 
2287   SmallVector<SmallSetVector<Value *, 8>, 0> Result;
2288   for (const AliasSet *Set : Sets) {
2289     SmallSetVector<Value *, 8> PointerMustAliases;
2290     for (const auto &ASI : *Set)
2291       PointerMustAliases.insert(ASI.getValue());
2292     Result.push_back(std::move(PointerMustAliases));
2293   }
2294 
2295   return Result;
2296 }
2297 
2298 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
2299                                      AliasSetTracker *CurAST, Loop *CurLoop,
2300                                      AAResults *AA) {
2301   return CurAST->getAliasSetFor(MemLoc).isMod();
2302 }
2303 
2304 bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
2305                                       Loop *CurLoop, Instruction &I,
2306                                       SinkAndHoistLICMFlags &Flags) {
2307   // For hoisting, use the walker to determine safety
2308   if (!Flags.getIsSink()) {
2309     MemoryAccess *Source;
2310     // See declaration of SetLicmMssaOptCap for usage details.
2311     if (Flags.tooManyClobberingCalls())
2312       Source = MU->getDefiningAccess();
2313     else {
2314       Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2315       Flags.incrementClobberingCalls();
2316     }
2317     return !MSSA->isLiveOnEntryDef(Source) &&
2318            CurLoop->contains(Source->getBlock());
2319   }
2320 
2321   // For sinking, we'd need to check all Defs below this use. The getClobbering
2322   // call will look on the backedge of the loop, but will check aliasing with
2323   // the instructions on the previous iteration.
2324   // For example:
2325   // for (i ... )
2326   //   load a[i] ( Use (LoE)
2327   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2328   //   i++;
2329   // The load sees no clobbering inside the loop, as the backedge alias check
2330   // does phi translation, and will check aliasing against store a[i-1].
2331   // However sinking the load outside the loop, below the store is incorrect.
2332 
2333   // For now, only sink if there are no Defs in the loop, and the existing ones
2334   // precede the use and are in the same block.
2335   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2336   // needs PostDominatorTreeAnalysis.
2337   // FIXME: More precise: no Defs that alias this Use.
2338   if (Flags.tooManyMemoryAccesses())
2339     return true;
2340   for (auto *BB : CurLoop->getBlocks())
2341     if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
2342       return true;
2343   // When sinking, the source block may not be part of the loop so check it.
2344   if (!CurLoop->contains(&I))
2345     return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
2346 
2347   return false;
2348 }
2349 
2350 bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
2351                                        MemoryUse &MU) {
2352   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2353     for (const auto &MA : *Accesses)
2354       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2355         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2356           return true;
2357   return false;
2358 }
2359 
2360 /// Little predicate that returns true if the specified basic block is in
2361 /// a subloop of the current one, not the current one itself.
2362 ///
2363 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2364   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2365   return LI->getLoopFor(BB) != CurLoop;
2366 }
2367