11725c8c3SChandler Carruth //===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
2b94c09baSDehao Chen //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b94c09baSDehao Chen //
7b94c09baSDehao Chen //===----------------------------------------------------------------------===//
8b94c09baSDehao Chen //
9b94c09baSDehao Chen // This pass does the inverse transformation of what LICM does.
10b94c09baSDehao Chen // It traverses all of the instructions in the loop's preheader and sinks
11b94c09baSDehao Chen // them to the loop body where frequency is lower than the loop's preheader.
12b94c09baSDehao Chen // This pass is a reverse-transformation of LICM. It differs from the Sink
13b94c09baSDehao Chen // pass in the following ways:
14b94c09baSDehao Chen //
15b94c09baSDehao Chen // * It only handles sinking of instructions from the loop's preheader to the
16b94c09baSDehao Chen //   loop's body
17b94c09baSDehao Chen // * It uses alias set tracker to get more accurate alias info
18b94c09baSDehao Chen // * It uses block frequency info to find the optimal sinking locations
19b94c09baSDehao Chen //
20b94c09baSDehao Chen // Overall algorithm:
21b94c09baSDehao Chen //
22b94c09baSDehao Chen // For I in Preheader:
23b94c09baSDehao Chen //   InsertBBs = BBs that uses I
24b94c09baSDehao Chen //   For BB in sorted(LoopBBs):
25b94c09baSDehao Chen //     DomBBs = BBs in InsertBBs that are dominated by BB
26b94c09baSDehao Chen //     if freq(DomBBs) > freq(BB)
27b94c09baSDehao Chen //       InsertBBs = UseBBs - DomBBs + BB
28b94c09baSDehao Chen //   For BB in InsertBBs:
29b94c09baSDehao Chen //     Insert I at BB's beginning
301725c8c3SChandler Carruth //
31b94c09baSDehao Chen //===----------------------------------------------------------------------===//
32b94c09baSDehao Chen 
33e9b18e3dSChandler Carruth #include "llvm/Transforms/Scalar/LoopSink.h"
34d6391209SKazu Hirata #include "llvm/ADT/SetOperations.h"
35b94c09baSDehao Chen #include "llvm/ADT/Statistic.h"
36b94c09baSDehao Chen #include "llvm/Analysis/AliasAnalysis.h"
37b94c09baSDehao Chen #include "llvm/Analysis/BlockFrequencyInfo.h"
38b94c09baSDehao Chen #include "llvm/Analysis/LoopInfo.h"
39b94c09baSDehao Chen #include "llvm/Analysis/LoopPass.h"
407f6360cdSJamie Schmeiser #include "llvm/Analysis/MemorySSA.h"
417f6360cdSJamie Schmeiser #include "llvm/Analysis/MemorySSAUpdater.h"
42b94c09baSDehao Chen #include "llvm/Analysis/ScalarEvolution.h"
43b94c09baSDehao Chen #include "llvm/IR/Dominators.h"
44b94c09baSDehao Chen #include "llvm/IR/Instructions.h"
4505da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
46a494ae43Sserge-sans-paille #include "llvm/Support/BranchProbability.h"
47b94c09baSDehao Chen #include "llvm/Support/CommandLine.h"
48b94c09baSDehao Chen #include "llvm/Transforms/Scalar.h"
4905da2fe5SReid Kleckner #include "llvm/Transforms/Utils/Local.h"
50b94c09baSDehao Chen #include "llvm/Transforms/Utils/LoopUtils.h"
51b94c09baSDehao Chen using namespace llvm;
52b94c09baSDehao Chen 
53b94c09baSDehao Chen #define DEBUG_TYPE "loopsink"
54b94c09baSDehao Chen 
55b94c09baSDehao Chen STATISTIC(NumLoopSunk, "Number of instructions sunk into loop");
56b94c09baSDehao Chen STATISTIC(NumLoopSunkCloned, "Number of cloned instructions sunk into loop");
57b94c09baSDehao Chen 
58b94c09baSDehao Chen static cl::opt<unsigned> SinkFrequencyPercentThreshold(
59b94c09baSDehao Chen     "sink-freq-percent-threshold", cl::Hidden, cl::init(90),
60b94c09baSDehao Chen     cl::desc("Do not sink instructions that require cloning unless they "
61b94c09baSDehao Chen              "execute less than this percent of the time."));
62b94c09baSDehao Chen 
63b94c09baSDehao Chen static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
64b94c09baSDehao Chen     "max-uses-for-sinking", cl::Hidden, cl::init(30),
65b94c09baSDehao Chen     cl::desc("Do not sink instructions that have too many uses."));
66b94c09baSDehao Chen 
67b94c09baSDehao Chen /// Return adjusted total frequency of \p BBs.
68b94c09baSDehao Chen ///
69b94c09baSDehao Chen /// * If there is only one BB, sinking instruction will not introduce code
70b94c09baSDehao Chen ///   size increase. Thus there is no need to adjust the frequency.
71b94c09baSDehao Chen /// * If there are more than one BB, sinking would lead to code size increase.
72b94c09baSDehao Chen ///   In this case, we add some "tax" to the total frequency to make it harder
73b94c09baSDehao Chen ///   to sink. E.g.
74b94c09baSDehao Chen ///     Freq(Preheader) = 100
75b94c09baSDehao Chen ///     Freq(BBs) = sum(50, 49) = 99
76b94c09baSDehao Chen ///   Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
77b94c09baSDehao Chen ///   BBs as the difference is too small to justify the code size increase.
78b94c09baSDehao Chen ///   To model this, The adjusted Freq(BBs) will be:
79b94c09baSDehao Chen ///     AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
adjustedSumFreq(SmallPtrSetImpl<BasicBlock * > & BBs,BlockFrequencyInfo & BFI)80b94c09baSDehao Chen static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
81b94c09baSDehao Chen                                       BlockFrequencyInfo &BFI) {
82b94c09baSDehao Chen   BlockFrequency T = 0;
83b94c09baSDehao Chen   for (BasicBlock *B : BBs)
84b94c09baSDehao Chen     T += BFI.getBlockFreq(B);
85b94c09baSDehao Chen   if (BBs.size() > 1)
86b94c09baSDehao Chen     T /= BranchProbability(SinkFrequencyPercentThreshold, 100);
87b94c09baSDehao Chen   return T;
88b94c09baSDehao Chen }
89b94c09baSDehao Chen 
90b94c09baSDehao Chen /// Return a set of basic blocks to insert sinked instructions.
91b94c09baSDehao Chen ///
92b94c09baSDehao Chen /// The returned set of basic blocks (BBsToSinkInto) should satisfy:
93b94c09baSDehao Chen ///
94b94c09baSDehao Chen /// * Inside the loop \p L
95b94c09baSDehao Chen /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
96b94c09baSDehao Chen ///   that domintates the UseBB
97b94c09baSDehao Chen /// * Has minimum total frequency that is no greater than preheader frequency
98b94c09baSDehao Chen ///
99b94c09baSDehao Chen /// The purpose of the function is to find the optimal sinking points to
100b94c09baSDehao Chen /// minimize execution cost, which is defined as "sum of frequency of
101b94c09baSDehao Chen /// BBsToSinkInto".
102b94c09baSDehao Chen /// As a result, the returned BBsToSinkInto needs to have minimum total
103b94c09baSDehao Chen /// frequency.
104b94c09baSDehao Chen /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
105b94c09baSDehao Chen /// frequency, the optimal solution is not sinking (return empty set).
106b94c09baSDehao Chen ///
107b94c09baSDehao Chen /// \p ColdLoopBBs is used to help find the optimal sinking locations.
108b94c09baSDehao Chen /// It stores a list of BBs that is:
109b94c09baSDehao Chen ///
110b94c09baSDehao Chen /// * Inside the loop \p L
111b94c09baSDehao Chen /// * Has a frequency no larger than the loop's preheader
112b94c09baSDehao Chen /// * Sorted by BB frequency
113b94c09baSDehao Chen ///
114b94c09baSDehao Chen /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
115b94c09baSDehao Chen /// To avoid expensive computation, we cap the maximum UseBBs.size() in its
116b94c09baSDehao Chen /// caller.
117b94c09baSDehao Chen static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop & L,const SmallPtrSetImpl<BasicBlock * > & UseBBs,const SmallVectorImpl<BasicBlock * > & ColdLoopBBs,DominatorTree & DT,BlockFrequencyInfo & BFI)118b94c09baSDehao Chen findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
119b94c09baSDehao Chen                   const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
120b94c09baSDehao Chen                   DominatorTree &DT, BlockFrequencyInfo &BFI) {
121b94c09baSDehao Chen   SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
122b94c09baSDehao Chen   if (UseBBs.size() == 0)
123b94c09baSDehao Chen     return BBsToSinkInto;
124b94c09baSDehao Chen 
125b94c09baSDehao Chen   BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
126b94c09baSDehao Chen   SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;
127b94c09baSDehao Chen 
128b94c09baSDehao Chen   // For every iteration:
129b94c09baSDehao Chen   //   * Pick the ColdestBB from ColdLoopBBs
130b94c09baSDehao Chen   //   * Find the set BBsDominatedByColdestBB that satisfy:
131b94c09baSDehao Chen   //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
132b94c09baSDehao Chen   //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
133b94c09baSDehao Chen   //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
134b94c09baSDehao Chen   //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
135b94c09baSDehao Chen   //     BBsToSinkInto
136b94c09baSDehao Chen   for (BasicBlock *ColdestBB : ColdLoopBBs) {
137b94c09baSDehao Chen     BBsDominatedByColdestBB.clear();
138b94c09baSDehao Chen     for (BasicBlock *SinkedBB : BBsToSinkInto)
139b94c09baSDehao Chen       if (DT.dominates(ColdestBB, SinkedBB))
140b94c09baSDehao Chen         BBsDominatedByColdestBB.insert(SinkedBB);
141b94c09baSDehao Chen     if (BBsDominatedByColdestBB.size() == 0)
142b94c09baSDehao Chen       continue;
143b94c09baSDehao Chen     if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
144b94c09baSDehao Chen         BFI.getBlockFreq(ColdestBB)) {
145b94c09baSDehao Chen       for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
146b94c09baSDehao Chen         BBsToSinkInto.erase(DominatedBB);
147b94c09baSDehao Chen       }
148b94c09baSDehao Chen       BBsToSinkInto.insert(ColdestBB);
149b94c09baSDehao Chen     }
150b94c09baSDehao Chen   }
151b94c09baSDehao Chen 
152e0f3e928SHans Wennborg   // Can't sink into blocks that have no valid insertion point.
153e0f3e928SHans Wennborg   for (BasicBlock *BB : BBsToSinkInto) {
154e0f3e928SHans Wennborg     if (BB->getFirstInsertionPt() == BB->end()) {
155e0f3e928SHans Wennborg       BBsToSinkInto.clear();
156e0f3e928SHans Wennborg       break;
157e0f3e928SHans Wennborg     }
158e0f3e928SHans Wennborg   }
159e0f3e928SHans Wennborg 
160b94c09baSDehao Chen   // If the total frequency of BBsToSinkInto is larger than preheader frequency,
161b94c09baSDehao Chen   // do not sink.
162b94c09baSDehao Chen   if (adjustedSumFreq(BBsToSinkInto, BFI) >
163b94c09baSDehao Chen       BFI.getBlockFreq(L.getLoopPreheader()))
164b94c09baSDehao Chen     BBsToSinkInto.clear();
165b94c09baSDehao Chen   return BBsToSinkInto;
166b94c09baSDehao Chen }
167b94c09baSDehao Chen 
168b94c09baSDehao Chen // Sinks \p I from the loop \p L's preheader to its uses. Returns true if
169b94c09baSDehao Chen // sinking is successful.
170b94c09baSDehao Chen // \p LoopBlockNumber is used to sort the insertion blocks to ensure
171b94c09baSDehao Chen // determinism.
sinkInstruction(Loop & L,Instruction & I,const SmallVectorImpl<BasicBlock * > & ColdLoopBBs,const SmallDenseMap<BasicBlock *,int,16> & LoopBlockNumber,LoopInfo & LI,DominatorTree & DT,BlockFrequencyInfo & BFI,MemorySSAUpdater * MSSAU)1727f6360cdSJamie Schmeiser static bool sinkInstruction(
1737f6360cdSJamie Schmeiser     Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
1747f6360cdSJamie Schmeiser     const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
1757f6360cdSJamie Schmeiser     DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
176b94c09baSDehao Chen   // Compute the set of blocks in loop L which contain a use of I.
177b94c09baSDehao Chen   SmallPtrSet<BasicBlock *, 2> BBs;
178b94c09baSDehao Chen   for (auto &U : I.uses()) {
179b94c09baSDehao Chen     Instruction *UI = cast<Instruction>(U.getUser());
180b94c09baSDehao Chen     // We cannot sink I to PHI-uses.
1818ed16361SKazu Hirata     if (isa<PHINode>(UI))
182b94c09baSDehao Chen       return false;
183b94c09baSDehao Chen     // We cannot sink I if it has uses outside of the loop.
184b94c09baSDehao Chen     if (!L.contains(LI.getLoopFor(UI->getParent())))
185b94c09baSDehao Chen       return false;
186b94c09baSDehao Chen     BBs.insert(UI->getParent());
187b94c09baSDehao Chen   }
188b94c09baSDehao Chen 
189b94c09baSDehao Chen   // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
190b94c09baSDehao Chen   // BBs.size() to avoid expensive computation.
191b94c09baSDehao Chen   // FIXME: Handle code size growth for min_size and opt_size.
192b94c09baSDehao Chen   if (BBs.size() > MaxNumberOfUseBBsForSinking)
193b94c09baSDehao Chen     return false;
194b94c09baSDehao Chen 
195b94c09baSDehao Chen   // Find the set of BBs that we should insert a copy of I.
196b94c09baSDehao Chen   SmallPtrSet<BasicBlock *, 2> BBsToSinkInto =
197b94c09baSDehao Chen       findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI);
198b94c09baSDehao Chen   if (BBsToSinkInto.empty())
199b94c09baSDehao Chen     return false;
200b94c09baSDehao Chen 
201d47d188bSMandeep Singh Grang   // Return if any of the candidate blocks to sink into is non-cold.
202d6391209SKazu Hirata   if (BBsToSinkInto.size() > 1 &&
203d6391209SKazu Hirata       !llvm::set_is_subset(BBsToSinkInto, LoopBlockNumber))
204d47d188bSMandeep Singh Grang     return false;
205d47d188bSMandeep Singh Grang 
206b94c09baSDehao Chen   // Copy the final BBs into a vector and sort them using the total ordering
207b94c09baSDehao Chen   // of the loop block numbers as iterating the set doesn't give a useful
208b94c09baSDehao Chen   // order. No need to stable sort as the block numbers are a total ordering.
209b94c09baSDehao Chen   SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
2108299fb8fSKazu Hirata   llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
2110cac726aSFangrui Song   llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
2120cac726aSFangrui Song     return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
213b94c09baSDehao Chen   });
214b94c09baSDehao Chen 
215b94c09baSDehao Chen   BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
216b94c09baSDehao Chen   // FIXME: Optimize the efficiency for cloned value replacement. The current
217b94c09baSDehao Chen   //        implementation is O(SortedBBsToSinkInto.size() * I.num_uses()).
2183687ac52SBenjamin Kramer   for (BasicBlock *N : makeArrayRef(SortedBBsToSinkInto).drop_front(1)) {
2193687ac52SBenjamin Kramer     assert(LoopBlockNumber.find(N)->second >
2203687ac52SBenjamin Kramer                LoopBlockNumber.find(MoveBB)->second &&
2213687ac52SBenjamin Kramer            "BBs not sorted!");
222b94c09baSDehao Chen     // Clone I and replace its uses.
223b94c09baSDehao Chen     Instruction *IC = I.clone();
224b94c09baSDehao Chen     IC->setName(I.getName());
225b94c09baSDehao Chen     IC->insertBefore(&*N->getFirstInsertionPt());
2267f6360cdSJamie Schmeiser 
2277f6360cdSJamie Schmeiser     if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
2287f6360cdSJamie Schmeiser       // Create a new MemoryAccess and let MemorySSA set its defining access.
2297f6360cdSJamie Schmeiser       MemoryAccess *NewMemAcc =
2307f6360cdSJamie Schmeiser           MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
2317f6360cdSJamie Schmeiser       if (NewMemAcc) {
2327f6360cdSJamie Schmeiser         if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
2337f6360cdSJamie Schmeiser           MSSAU->insertDef(MemDef, /*RenameUses=*/true);
2347f6360cdSJamie Schmeiser         else {
2357f6360cdSJamie Schmeiser           auto *MemUse = cast<MemoryUse>(NewMemAcc);
2367f6360cdSJamie Schmeiser           MSSAU->insertUse(MemUse, /*RenameUses=*/true);
2377f6360cdSJamie Schmeiser         }
2387f6360cdSJamie Schmeiser       }
2397f6360cdSJamie Schmeiser     }
2407f6360cdSJamie Schmeiser 
241b94c09baSDehao Chen     // Replaces uses of I with IC in N
242081e990dSRoman Lebedev     I.replaceUsesWithIf(IC, [N](Use &U) {
243081e990dSRoman Lebedev       return cast<Instruction>(U.getUser())->getParent() == N;
244081e990dSRoman Lebedev     });
245b94c09baSDehao Chen     // Replaces uses of I with IC in blocks dominated by N
246b94c09baSDehao Chen     replaceDominatedUsesWith(&I, IC, DT, N);
247d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
248b94c09baSDehao Chen                       << '\n');
249b94c09baSDehao Chen     NumLoopSunkCloned++;
250b94c09baSDehao Chen   }
251d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n');
252b94c09baSDehao Chen   NumLoopSunk++;
253b94c09baSDehao Chen   I.moveBefore(&*MoveBB->getFirstInsertionPt());
254b94c09baSDehao Chen 
2557f6360cdSJamie Schmeiser   if (MSSAU)
2567f6360cdSJamie Schmeiser     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
2577f6360cdSJamie Schmeiser             MSSAU->getMemorySSA()->getMemoryAccess(&I)))
2587f6360cdSJamie Schmeiser       MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
2597f6360cdSJamie Schmeiser 
260b94c09baSDehao Chen   return true;
261b94c09baSDehao Chen }
262b94c09baSDehao Chen 
263b94c09baSDehao Chen /// Sinks instructions from loop's preheader to the loop body if the
264b94c09baSDehao Chen /// sum frequency of inserted copy is smaller than preheader's frequency.
sinkLoopInvariantInstructions(Loop & L,AAResults & AA,LoopInfo & LI,DominatorTree & DT,BlockFrequencyInfo & BFI,MemorySSA & MSSA,ScalarEvolution * SE)265b94c09baSDehao Chen static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
266b94c09baSDehao Chen                                           DominatorTree &DT,
267b94c09baSDehao Chen                                           BlockFrequencyInfo &BFI,
2685cefe7d9SNikita Popov                                           MemorySSA &MSSA,
2695cefe7d9SNikita Popov                                           ScalarEvolution *SE) {
270b94c09baSDehao Chen   BasicBlock *Preheader = L.getLoopPreheader();
2717f6360cdSJamie Schmeiser   assert(Preheader && "Expected loop to have preheader");
272b94c09baSDehao Chen 
2737f6360cdSJamie Schmeiser   assert(Preheader->getParent()->hasProfileData() &&
2747f6360cdSJamie Schmeiser          "Unexpected call when profile data unavailable.");
275947dbe12SDehao Chen 
276b94c09baSDehao Chen   const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
277b94c09baSDehao Chen   // If there are no basic blocks with lower frequency than the preheader then
278b94c09baSDehao Chen   // we can avoid the detailed analysis as we will never find profitable sinking
279b94c09baSDehao Chen   // opportunities.
280b94c09baSDehao Chen   if (all_of(L.blocks(), [&](const BasicBlock *BB) {
281b94c09baSDehao Chen         return BFI.getBlockFreq(BB) > PreheaderFreq;
282b94c09baSDehao Chen       }))
283b94c09baSDehao Chen     return false;
284b94c09baSDehao Chen 
2855cefe7d9SNikita Popov   MemorySSAUpdater MSSAU(&MSSA);
2865cefe7d9SNikita Popov   SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA);
287cff479b1SJamie Schmeiser 
2887f6360cdSJamie Schmeiser   bool Changed = false;
289b94c09baSDehao Chen 
290b94c09baSDehao Chen   // Sort loop's basic blocks by frequency
291b94c09baSDehao Chen   SmallVector<BasicBlock *, 10> ColdLoopBBs;
292b94c09baSDehao Chen   SmallDenseMap<BasicBlock *, int, 16> LoopBlockNumber;
293b94c09baSDehao Chen   int i = 0;
294b94c09baSDehao Chen   for (BasicBlock *B : L.blocks())
295b94c09baSDehao Chen     if (BFI.getBlockFreq(B) < BFI.getBlockFreq(L.getLoopPreheader())) {
296b94c09baSDehao Chen       ColdLoopBBs.push_back(B);
297b94c09baSDehao Chen       LoopBlockNumber[B] = ++i;
298b94c09baSDehao Chen     }
299efd94c56SFangrui Song   llvm::stable_sort(ColdLoopBBs, [&](BasicBlock *A, BasicBlock *B) {
300b94c09baSDehao Chen     return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
301b94c09baSDehao Chen   });
302b94c09baSDehao Chen 
303b94c09baSDehao Chen   // Traverse preheader's instructions in reverse order becaue if A depends
304b94c09baSDehao Chen   // on B (A appears after B), A needs to be sinked first before B can be
305b94c09baSDehao Chen   // sinked.
3061b108ab9SKazu Hirata   for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
307bc00f47cSFlorian Hahn     if (isa<PHINode>(&I))
308bc00f47cSFlorian Hahn       continue;
30912c8cb37SXin Tong     // No need to check for instruction's operands are loop invariant.
3101b108ab9SKazu Hirata     assert(L.hasLoopInvariantOperands(&I) &&
31112c8cb37SXin Tong            "Insts in a loop's preheader should have loop invariant operands!");
312*c8c63625SNikita Popov     if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags))
313b94c09baSDehao Chen       continue;
3141b108ab9SKazu Hirata     if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
3155cefe7d9SNikita Popov                         &MSSAU))
316b94c09baSDehao Chen       Changed = true;
317b94c09baSDehao Chen   }
318b94c09baSDehao Chen 
319b94c09baSDehao Chen   if (Changed && SE)
320b94c09baSDehao Chen     SE->forgetLoopDispositions(&L);
321b94c09baSDehao Chen   return Changed;
322b94c09baSDehao Chen }
323b94c09baSDehao Chen 
run(Function & F,FunctionAnalysisManager & FAM)324e9b18e3dSChandler Carruth PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
325e9b18e3dSChandler Carruth   LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
326e9b18e3dSChandler Carruth   // Nothing to do if there are no loops.
327e9b18e3dSChandler Carruth   if (LI.empty())
328e9b18e3dSChandler Carruth     return PreservedAnalyses::all();
329e9b18e3dSChandler Carruth 
330e9b18e3dSChandler Carruth   AAResults &AA = FAM.getResult<AAManager>(F);
331e9b18e3dSChandler Carruth   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
332e9b18e3dSChandler Carruth   BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
3335cefe7d9SNikita Popov   MemorySSA &MSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
3347f6360cdSJamie Schmeiser 
335e9b18e3dSChandler Carruth   // We want to do a postorder walk over the loops. Since loops are a tree this
336e9b18e3dSChandler Carruth   // is equivalent to a reversed preorder walk and preorder is easy to compute
337e9b18e3dSChandler Carruth   // without recursion. Since we reverse the preorder, we will visit siblings
338e9b18e3dSChandler Carruth   // in reverse program order. This isn't expected to matter at all but is more
339e9b18e3dSChandler Carruth   // consistent with sinking algorithms which generally work bottom-up.
340e9b18e3dSChandler Carruth   SmallVector<Loop *, 4> PreorderLoops = LI.getLoopsInPreorder();
341e9b18e3dSChandler Carruth 
342e9b18e3dSChandler Carruth   bool Changed = false;
343e9b18e3dSChandler Carruth   do {
344e9b18e3dSChandler Carruth     Loop &L = *PreorderLoops.pop_back_val();
345e9b18e3dSChandler Carruth 
3467f6360cdSJamie Schmeiser     BasicBlock *Preheader = L.getLoopPreheader();
3477f6360cdSJamie Schmeiser     if (!Preheader)
3487f6360cdSJamie Schmeiser       continue;
3497f6360cdSJamie Schmeiser 
3507f6360cdSJamie Schmeiser     // Enable LoopSink only when runtime profile is available.
3517f6360cdSJamie Schmeiser     // With static profile, the sinking decision may be sub-optimal.
3527f6360cdSJamie Schmeiser     if (!Preheader->getParent()->hasProfileData())
3537f6360cdSJamie Schmeiser       continue;
3547f6360cdSJamie Schmeiser 
355e9b18e3dSChandler Carruth     // Note that we don't pass SCEV here because it is only used to invalidate
356e9b18e3dSChandler Carruth     // loops in SCEV and we don't preserve (or request) SCEV at all making that
357e9b18e3dSChandler Carruth     // unnecessary.
3585cefe7d9SNikita Popov     Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA,
3595cefe7d9SNikita Popov                                              /*ScalarEvolution*/ nullptr);
360e9b18e3dSChandler Carruth   } while (!PreorderLoops.empty());
361e9b18e3dSChandler Carruth 
362e9b18e3dSChandler Carruth   if (!Changed)
363e9b18e3dSChandler Carruth     return PreservedAnalyses::all();
364e9b18e3dSChandler Carruth 
365e9b18e3dSChandler Carruth   PreservedAnalyses PA;
366e9b18e3dSChandler Carruth   PA.preserveSet<CFGAnalyses>();
3677f6360cdSJamie Schmeiser   PA.preserve<MemorySSAAnalysis>();
3687f6360cdSJamie Schmeiser 
3697f6360cdSJamie Schmeiser   if (VerifyMemorySSA)
3705cefe7d9SNikita Popov     MSSA.verifyMemorySSA();
3717f6360cdSJamie Schmeiser 
372e9b18e3dSChandler Carruth   return PA;
373e9b18e3dSChandler Carruth }
374e9b18e3dSChandler Carruth 
375b94c09baSDehao Chen namespace {
376b94c09baSDehao Chen struct LegacyLoopSinkPass : public LoopPass {
377b94c09baSDehao Chen   static char ID;
LegacyLoopSinkPass__anon614bca720511::LegacyLoopSinkPass378b94c09baSDehao Chen   LegacyLoopSinkPass() : LoopPass(ID) {
379b94c09baSDehao Chen     initializeLegacyLoopSinkPassPass(*PassRegistry::getPassRegistry());
380b94c09baSDehao Chen   }
381b94c09baSDehao Chen 
runOnLoop__anon614bca720511::LegacyLoopSinkPass382b94c09baSDehao Chen   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
383b94c09baSDehao Chen     if (skipLoop(L))
384b94c09baSDehao Chen       return false;
385b94c09baSDehao Chen 
3867f6360cdSJamie Schmeiser     BasicBlock *Preheader = L->getLoopPreheader();
3877f6360cdSJamie Schmeiser     if (!Preheader)
3887f6360cdSJamie Schmeiser       return false;
3897f6360cdSJamie Schmeiser 
3907f6360cdSJamie Schmeiser     // Enable LoopSink only when runtime profile is available.
3917f6360cdSJamie Schmeiser     // With static profile, the sinking decision may be sub-optimal.
3927f6360cdSJamie Schmeiser     if (!Preheader->getParent()->hasProfileData())
3937f6360cdSJamie Schmeiser       return false;
3947f6360cdSJamie Schmeiser 
3957f6360cdSJamie Schmeiser     AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3965cefe7d9SNikita Popov     MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
397b94c09baSDehao Chen     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3987f6360cdSJamie Schmeiser     bool Changed = sinkLoopInvariantInstructions(
3997f6360cdSJamie Schmeiser         *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
400b94c09baSDehao Chen         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
401b94c09baSDehao Chen         getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
4025cefe7d9SNikita Popov         MSSA, SE ? &SE->getSE() : nullptr);
4037f6360cdSJamie Schmeiser 
4045cefe7d9SNikita Popov     if (VerifyMemorySSA)
4055cefe7d9SNikita Popov       MSSA.verifyMemorySSA();
4067f6360cdSJamie Schmeiser 
4077f6360cdSJamie Schmeiser     return Changed;
408b94c09baSDehao Chen   }
409b94c09baSDehao Chen 
getAnalysisUsage__anon614bca720511::LegacyLoopSinkPass410b94c09baSDehao Chen   void getAnalysisUsage(AnalysisUsage &AU) const override {
411b94c09baSDehao Chen     AU.setPreservesCFG();
412b94c09baSDehao Chen     AU.addRequired<BlockFrequencyInfoWrapperPass>();
413b94c09baSDehao Chen     getLoopAnalysisUsage(AU);
4147f6360cdSJamie Schmeiser     AU.addRequired<MemorySSAWrapperPass>();
4157f6360cdSJamie Schmeiser     AU.addPreserved<MemorySSAWrapperPass>();
4167f6360cdSJamie Schmeiser   }
417b94c09baSDehao Chen };
418b94c09baSDehao Chen }
419b94c09baSDehao Chen 
420b94c09baSDehao Chen char LegacyLoopSinkPass::ID = 0;
421b94c09baSDehao Chen INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
422b94c09baSDehao Chen                       false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)423b94c09baSDehao Chen INITIALIZE_PASS_DEPENDENCY(LoopPass)
424b94c09baSDehao Chen INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
4257f6360cdSJamie Schmeiser INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
426b94c09baSDehao Chen INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
427b94c09baSDehao Chen 
428b94c09baSDehao Chen Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
429