11725c8c3SChandler Carruth //===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
2b94c09baSDehao Chen //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b94c09baSDehao Chen //
7b94c09baSDehao Chen //===----------------------------------------------------------------------===//
8b94c09baSDehao Chen //
9b94c09baSDehao Chen // This pass does the inverse transformation of what LICM does.
10b94c09baSDehao Chen // It traverses all of the instructions in the loop's preheader and sinks
11b94c09baSDehao Chen // them to the loop body where frequency is lower than the loop's preheader.
12b94c09baSDehao Chen // This pass is a reverse-transformation of LICM. It differs from the Sink
13b94c09baSDehao Chen // pass in the following ways:
14b94c09baSDehao Chen //
15b94c09baSDehao Chen // * It only handles sinking of instructions from the loop's preheader to the
16b94c09baSDehao Chen // loop's body
17b94c09baSDehao Chen // * It uses alias set tracker to get more accurate alias info
18b94c09baSDehao Chen // * It uses block frequency info to find the optimal sinking locations
19b94c09baSDehao Chen //
20b94c09baSDehao Chen // Overall algorithm:
21b94c09baSDehao Chen //
22b94c09baSDehao Chen // For I in Preheader:
23b94c09baSDehao Chen // InsertBBs = BBs that uses I
24b94c09baSDehao Chen // For BB in sorted(LoopBBs):
25b94c09baSDehao Chen // DomBBs = BBs in InsertBBs that are dominated by BB
26b94c09baSDehao Chen // if freq(DomBBs) > freq(BB)
27b94c09baSDehao Chen // InsertBBs = UseBBs - DomBBs + BB
28b94c09baSDehao Chen // For BB in InsertBBs:
29b94c09baSDehao Chen // Insert I at BB's beginning
301725c8c3SChandler Carruth //
31b94c09baSDehao Chen //===----------------------------------------------------------------------===//
32b94c09baSDehao Chen
33e9b18e3dSChandler Carruth #include "llvm/Transforms/Scalar/LoopSink.h"
34d6391209SKazu Hirata #include "llvm/ADT/SetOperations.h"
35b94c09baSDehao Chen #include "llvm/ADT/Statistic.h"
36b94c09baSDehao Chen #include "llvm/Analysis/AliasAnalysis.h"
37b94c09baSDehao Chen #include "llvm/Analysis/BlockFrequencyInfo.h"
38b94c09baSDehao Chen #include "llvm/Analysis/LoopInfo.h"
39b94c09baSDehao Chen #include "llvm/Analysis/LoopPass.h"
407f6360cdSJamie Schmeiser #include "llvm/Analysis/MemorySSA.h"
417f6360cdSJamie Schmeiser #include "llvm/Analysis/MemorySSAUpdater.h"
42b94c09baSDehao Chen #include "llvm/Analysis/ScalarEvolution.h"
43b94c09baSDehao Chen #include "llvm/IR/Dominators.h"
44b94c09baSDehao Chen #include "llvm/IR/Instructions.h"
4505da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
46a494ae43Sserge-sans-paille #include "llvm/Support/BranchProbability.h"
47b94c09baSDehao Chen #include "llvm/Support/CommandLine.h"
48b94c09baSDehao Chen #include "llvm/Transforms/Scalar.h"
4905da2fe5SReid Kleckner #include "llvm/Transforms/Utils/Local.h"
50b94c09baSDehao Chen #include "llvm/Transforms/Utils/LoopUtils.h"
51b94c09baSDehao Chen using namespace llvm;
52b94c09baSDehao Chen
53b94c09baSDehao Chen #define DEBUG_TYPE "loopsink"
54b94c09baSDehao Chen
55b94c09baSDehao Chen STATISTIC(NumLoopSunk, "Number of instructions sunk into loop");
56b94c09baSDehao Chen STATISTIC(NumLoopSunkCloned, "Number of cloned instructions sunk into loop");
57b94c09baSDehao Chen
58b94c09baSDehao Chen static cl::opt<unsigned> SinkFrequencyPercentThreshold(
59b94c09baSDehao Chen "sink-freq-percent-threshold", cl::Hidden, cl::init(90),
60b94c09baSDehao Chen cl::desc("Do not sink instructions that require cloning unless they "
61b94c09baSDehao Chen "execute less than this percent of the time."));
62b94c09baSDehao Chen
63b94c09baSDehao Chen static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
64b94c09baSDehao Chen "max-uses-for-sinking", cl::Hidden, cl::init(30),
65b94c09baSDehao Chen cl::desc("Do not sink instructions that have too many uses."));
66b94c09baSDehao Chen
67b94c09baSDehao Chen /// Return adjusted total frequency of \p BBs.
68b94c09baSDehao Chen ///
69b94c09baSDehao Chen /// * If there is only one BB, sinking instruction will not introduce code
70b94c09baSDehao Chen /// size increase. Thus there is no need to adjust the frequency.
71b94c09baSDehao Chen /// * If there are more than one BB, sinking would lead to code size increase.
72b94c09baSDehao Chen /// In this case, we add some "tax" to the total frequency to make it harder
73b94c09baSDehao Chen /// to sink. E.g.
74b94c09baSDehao Chen /// Freq(Preheader) = 100
75b94c09baSDehao Chen /// Freq(BBs) = sum(50, 49) = 99
76b94c09baSDehao Chen /// Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
77b94c09baSDehao Chen /// BBs as the difference is too small to justify the code size increase.
78b94c09baSDehao Chen /// To model this, The adjusted Freq(BBs) will be:
79b94c09baSDehao Chen /// AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
adjustedSumFreq(SmallPtrSetImpl<BasicBlock * > & BBs,BlockFrequencyInfo & BFI)80b94c09baSDehao Chen static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
81b94c09baSDehao Chen BlockFrequencyInfo &BFI) {
82b94c09baSDehao Chen BlockFrequency T = 0;
83b94c09baSDehao Chen for (BasicBlock *B : BBs)
84b94c09baSDehao Chen T += BFI.getBlockFreq(B);
85b94c09baSDehao Chen if (BBs.size() > 1)
86b94c09baSDehao Chen T /= BranchProbability(SinkFrequencyPercentThreshold, 100);
87b94c09baSDehao Chen return T;
88b94c09baSDehao Chen }
89b94c09baSDehao Chen
90b94c09baSDehao Chen /// Return a set of basic blocks to insert sinked instructions.
91b94c09baSDehao Chen ///
92b94c09baSDehao Chen /// The returned set of basic blocks (BBsToSinkInto) should satisfy:
93b94c09baSDehao Chen ///
94b94c09baSDehao Chen /// * Inside the loop \p L
95b94c09baSDehao Chen /// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
96b94c09baSDehao Chen /// that domintates the UseBB
97b94c09baSDehao Chen /// * Has minimum total frequency that is no greater than preheader frequency
98b94c09baSDehao Chen ///
99b94c09baSDehao Chen /// The purpose of the function is to find the optimal sinking points to
100b94c09baSDehao Chen /// minimize execution cost, which is defined as "sum of frequency of
101b94c09baSDehao Chen /// BBsToSinkInto".
102b94c09baSDehao Chen /// As a result, the returned BBsToSinkInto needs to have minimum total
103b94c09baSDehao Chen /// frequency.
104b94c09baSDehao Chen /// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
105b94c09baSDehao Chen /// frequency, the optimal solution is not sinking (return empty set).
106b94c09baSDehao Chen ///
107b94c09baSDehao Chen /// \p ColdLoopBBs is used to help find the optimal sinking locations.
108b94c09baSDehao Chen /// It stores a list of BBs that is:
109b94c09baSDehao Chen ///
110b94c09baSDehao Chen /// * Inside the loop \p L
111b94c09baSDehao Chen /// * Has a frequency no larger than the loop's preheader
112b94c09baSDehao Chen /// * Sorted by BB frequency
113b94c09baSDehao Chen ///
114b94c09baSDehao Chen /// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
115b94c09baSDehao Chen /// To avoid expensive computation, we cap the maximum UseBBs.size() in its
116b94c09baSDehao Chen /// caller.
117b94c09baSDehao Chen static SmallPtrSet<BasicBlock *, 2>
findBBsToSinkInto(const Loop & L,const SmallPtrSetImpl<BasicBlock * > & UseBBs,const SmallVectorImpl<BasicBlock * > & ColdLoopBBs,DominatorTree & DT,BlockFrequencyInfo & BFI)118b94c09baSDehao Chen findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
119b94c09baSDehao Chen const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
120b94c09baSDehao Chen DominatorTree &DT, BlockFrequencyInfo &BFI) {
121b94c09baSDehao Chen SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
122b94c09baSDehao Chen if (UseBBs.size() == 0)
123b94c09baSDehao Chen return BBsToSinkInto;
124b94c09baSDehao Chen
125b94c09baSDehao Chen BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
126b94c09baSDehao Chen SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;
127b94c09baSDehao Chen
128b94c09baSDehao Chen // For every iteration:
129b94c09baSDehao Chen // * Pick the ColdestBB from ColdLoopBBs
130b94c09baSDehao Chen // * Find the set BBsDominatedByColdestBB that satisfy:
131b94c09baSDehao Chen // - BBsDominatedByColdestBB is a subset of BBsToSinkInto
132b94c09baSDehao Chen // - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
133b94c09baSDehao Chen // * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
134b94c09baSDehao Chen // BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
135b94c09baSDehao Chen // BBsToSinkInto
136b94c09baSDehao Chen for (BasicBlock *ColdestBB : ColdLoopBBs) {
137b94c09baSDehao Chen BBsDominatedByColdestBB.clear();
138b94c09baSDehao Chen for (BasicBlock *SinkedBB : BBsToSinkInto)
139b94c09baSDehao Chen if (DT.dominates(ColdestBB, SinkedBB))
140b94c09baSDehao Chen BBsDominatedByColdestBB.insert(SinkedBB);
141b94c09baSDehao Chen if (BBsDominatedByColdestBB.size() == 0)
142b94c09baSDehao Chen continue;
143b94c09baSDehao Chen if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
144b94c09baSDehao Chen BFI.getBlockFreq(ColdestBB)) {
145b94c09baSDehao Chen for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
146b94c09baSDehao Chen BBsToSinkInto.erase(DominatedBB);
147b94c09baSDehao Chen }
148b94c09baSDehao Chen BBsToSinkInto.insert(ColdestBB);
149b94c09baSDehao Chen }
150b94c09baSDehao Chen }
151b94c09baSDehao Chen
152e0f3e928SHans Wennborg // Can't sink into blocks that have no valid insertion point.
153e0f3e928SHans Wennborg for (BasicBlock *BB : BBsToSinkInto) {
154e0f3e928SHans Wennborg if (BB->getFirstInsertionPt() == BB->end()) {
155e0f3e928SHans Wennborg BBsToSinkInto.clear();
156e0f3e928SHans Wennborg break;
157e0f3e928SHans Wennborg }
158e0f3e928SHans Wennborg }
159e0f3e928SHans Wennborg
160b94c09baSDehao Chen // If the total frequency of BBsToSinkInto is larger than preheader frequency,
161b94c09baSDehao Chen // do not sink.
162b94c09baSDehao Chen if (adjustedSumFreq(BBsToSinkInto, BFI) >
163b94c09baSDehao Chen BFI.getBlockFreq(L.getLoopPreheader()))
164b94c09baSDehao Chen BBsToSinkInto.clear();
165b94c09baSDehao Chen return BBsToSinkInto;
166b94c09baSDehao Chen }
167b94c09baSDehao Chen
168b94c09baSDehao Chen // Sinks \p I from the loop \p L's preheader to its uses. Returns true if
169b94c09baSDehao Chen // sinking is successful.
170b94c09baSDehao Chen // \p LoopBlockNumber is used to sort the insertion blocks to ensure
171b94c09baSDehao Chen // determinism.
sinkInstruction(Loop & L,Instruction & I,const SmallVectorImpl<BasicBlock * > & ColdLoopBBs,const SmallDenseMap<BasicBlock *,int,16> & LoopBlockNumber,LoopInfo & LI,DominatorTree & DT,BlockFrequencyInfo & BFI,MemorySSAUpdater * MSSAU)1727f6360cdSJamie Schmeiser static bool sinkInstruction(
1737f6360cdSJamie Schmeiser Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
1747f6360cdSJamie Schmeiser const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
1757f6360cdSJamie Schmeiser DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
176b94c09baSDehao Chen // Compute the set of blocks in loop L which contain a use of I.
177b94c09baSDehao Chen SmallPtrSet<BasicBlock *, 2> BBs;
178b94c09baSDehao Chen for (auto &U : I.uses()) {
179b94c09baSDehao Chen Instruction *UI = cast<Instruction>(U.getUser());
180b94c09baSDehao Chen // We cannot sink I to PHI-uses.
1818ed16361SKazu Hirata if (isa<PHINode>(UI))
182b94c09baSDehao Chen return false;
183b94c09baSDehao Chen // We cannot sink I if it has uses outside of the loop.
184b94c09baSDehao Chen if (!L.contains(LI.getLoopFor(UI->getParent())))
185b94c09baSDehao Chen return false;
186b94c09baSDehao Chen BBs.insert(UI->getParent());
187b94c09baSDehao Chen }
188b94c09baSDehao Chen
189b94c09baSDehao Chen // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
190b94c09baSDehao Chen // BBs.size() to avoid expensive computation.
191b94c09baSDehao Chen // FIXME: Handle code size growth for min_size and opt_size.
192b94c09baSDehao Chen if (BBs.size() > MaxNumberOfUseBBsForSinking)
193b94c09baSDehao Chen return false;
194b94c09baSDehao Chen
195b94c09baSDehao Chen // Find the set of BBs that we should insert a copy of I.
196b94c09baSDehao Chen SmallPtrSet<BasicBlock *, 2> BBsToSinkInto =
197b94c09baSDehao Chen findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI);
198b94c09baSDehao Chen if (BBsToSinkInto.empty())
199b94c09baSDehao Chen return false;
200b94c09baSDehao Chen
201d47d188bSMandeep Singh Grang // Return if any of the candidate blocks to sink into is non-cold.
202d6391209SKazu Hirata if (BBsToSinkInto.size() > 1 &&
203d6391209SKazu Hirata !llvm::set_is_subset(BBsToSinkInto, LoopBlockNumber))
204d47d188bSMandeep Singh Grang return false;
205d47d188bSMandeep Singh Grang
206b94c09baSDehao Chen // Copy the final BBs into a vector and sort them using the total ordering
207b94c09baSDehao Chen // of the loop block numbers as iterating the set doesn't give a useful
208b94c09baSDehao Chen // order. No need to stable sort as the block numbers are a total ordering.
209b94c09baSDehao Chen SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
2108299fb8fSKazu Hirata llvm::append_range(SortedBBsToSinkInto, BBsToSinkInto);
2110cac726aSFangrui Song llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
2120cac726aSFangrui Song return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
213b94c09baSDehao Chen });
214b94c09baSDehao Chen
215b94c09baSDehao Chen BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
216b94c09baSDehao Chen // FIXME: Optimize the efficiency for cloned value replacement. The current
217b94c09baSDehao Chen // implementation is O(SortedBBsToSinkInto.size() * I.num_uses()).
2183687ac52SBenjamin Kramer for (BasicBlock *N : makeArrayRef(SortedBBsToSinkInto).drop_front(1)) {
2193687ac52SBenjamin Kramer assert(LoopBlockNumber.find(N)->second >
2203687ac52SBenjamin Kramer LoopBlockNumber.find(MoveBB)->second &&
2213687ac52SBenjamin Kramer "BBs not sorted!");
222b94c09baSDehao Chen // Clone I and replace its uses.
223b94c09baSDehao Chen Instruction *IC = I.clone();
224b94c09baSDehao Chen IC->setName(I.getName());
225b94c09baSDehao Chen IC->insertBefore(&*N->getFirstInsertionPt());
2267f6360cdSJamie Schmeiser
2277f6360cdSJamie Schmeiser if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
2287f6360cdSJamie Schmeiser // Create a new MemoryAccess and let MemorySSA set its defining access.
2297f6360cdSJamie Schmeiser MemoryAccess *NewMemAcc =
2307f6360cdSJamie Schmeiser MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
2317f6360cdSJamie Schmeiser if (NewMemAcc) {
2327f6360cdSJamie Schmeiser if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
2337f6360cdSJamie Schmeiser MSSAU->insertDef(MemDef, /*RenameUses=*/true);
2347f6360cdSJamie Schmeiser else {
2357f6360cdSJamie Schmeiser auto *MemUse = cast<MemoryUse>(NewMemAcc);
2367f6360cdSJamie Schmeiser MSSAU->insertUse(MemUse, /*RenameUses=*/true);
2377f6360cdSJamie Schmeiser }
2387f6360cdSJamie Schmeiser }
2397f6360cdSJamie Schmeiser }
2407f6360cdSJamie Schmeiser
241b94c09baSDehao Chen // Replaces uses of I with IC in N
242081e990dSRoman Lebedev I.replaceUsesWithIf(IC, [N](Use &U) {
243081e990dSRoman Lebedev return cast<Instruction>(U.getUser())->getParent() == N;
244081e990dSRoman Lebedev });
245b94c09baSDehao Chen // Replaces uses of I with IC in blocks dominated by N
246b94c09baSDehao Chen replaceDominatedUsesWith(&I, IC, DT, N);
247d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
248b94c09baSDehao Chen << '\n');
249b94c09baSDehao Chen NumLoopSunkCloned++;
250b94c09baSDehao Chen }
251d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n');
252b94c09baSDehao Chen NumLoopSunk++;
253b94c09baSDehao Chen I.moveBefore(&*MoveBB->getFirstInsertionPt());
254b94c09baSDehao Chen
2557f6360cdSJamie Schmeiser if (MSSAU)
2567f6360cdSJamie Schmeiser if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
2577f6360cdSJamie Schmeiser MSSAU->getMemorySSA()->getMemoryAccess(&I)))
2587f6360cdSJamie Schmeiser MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
2597f6360cdSJamie Schmeiser
260b94c09baSDehao Chen return true;
261b94c09baSDehao Chen }
262b94c09baSDehao Chen
263b94c09baSDehao Chen /// Sinks instructions from loop's preheader to the loop body if the
264b94c09baSDehao Chen /// sum frequency of inserted copy is smaller than preheader's frequency.
sinkLoopInvariantInstructions(Loop & L,AAResults & AA,LoopInfo & LI,DominatorTree & DT,BlockFrequencyInfo & BFI,MemorySSA & MSSA,ScalarEvolution * SE)265b94c09baSDehao Chen static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
266b94c09baSDehao Chen DominatorTree &DT,
267b94c09baSDehao Chen BlockFrequencyInfo &BFI,
2685cefe7d9SNikita Popov MemorySSA &MSSA,
2695cefe7d9SNikita Popov ScalarEvolution *SE) {
270b94c09baSDehao Chen BasicBlock *Preheader = L.getLoopPreheader();
2717f6360cdSJamie Schmeiser assert(Preheader && "Expected loop to have preheader");
272b94c09baSDehao Chen
2737f6360cdSJamie Schmeiser assert(Preheader->getParent()->hasProfileData() &&
2747f6360cdSJamie Schmeiser "Unexpected call when profile data unavailable.");
275947dbe12SDehao Chen
276b94c09baSDehao Chen const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
277b94c09baSDehao Chen // If there are no basic blocks with lower frequency than the preheader then
278b94c09baSDehao Chen // we can avoid the detailed analysis as we will never find profitable sinking
279b94c09baSDehao Chen // opportunities.
280b94c09baSDehao Chen if (all_of(L.blocks(), [&](const BasicBlock *BB) {
281b94c09baSDehao Chen return BFI.getBlockFreq(BB) > PreheaderFreq;
282b94c09baSDehao Chen }))
283b94c09baSDehao Chen return false;
284b94c09baSDehao Chen
2855cefe7d9SNikita Popov MemorySSAUpdater MSSAU(&MSSA);
2865cefe7d9SNikita Popov SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA);
287cff479b1SJamie Schmeiser
2887f6360cdSJamie Schmeiser bool Changed = false;
289b94c09baSDehao Chen
290b94c09baSDehao Chen // Sort loop's basic blocks by frequency
291b94c09baSDehao Chen SmallVector<BasicBlock *, 10> ColdLoopBBs;
292b94c09baSDehao Chen SmallDenseMap<BasicBlock *, int, 16> LoopBlockNumber;
293b94c09baSDehao Chen int i = 0;
294b94c09baSDehao Chen for (BasicBlock *B : L.blocks())
295b94c09baSDehao Chen if (BFI.getBlockFreq(B) < BFI.getBlockFreq(L.getLoopPreheader())) {
296b94c09baSDehao Chen ColdLoopBBs.push_back(B);
297b94c09baSDehao Chen LoopBlockNumber[B] = ++i;
298b94c09baSDehao Chen }
299efd94c56SFangrui Song llvm::stable_sort(ColdLoopBBs, [&](BasicBlock *A, BasicBlock *B) {
300b94c09baSDehao Chen return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
301b94c09baSDehao Chen });
302b94c09baSDehao Chen
303b94c09baSDehao Chen // Traverse preheader's instructions in reverse order becaue if A depends
304b94c09baSDehao Chen // on B (A appears after B), A needs to be sinked first before B can be
305b94c09baSDehao Chen // sinked.
3061b108ab9SKazu Hirata for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
307bc00f47cSFlorian Hahn if (isa<PHINode>(&I))
308bc00f47cSFlorian Hahn continue;
30912c8cb37SXin Tong // No need to check for instruction's operands are loop invariant.
3101b108ab9SKazu Hirata assert(L.hasLoopInvariantOperands(&I) &&
31112c8cb37SXin Tong "Insts in a loop's preheader should have loop invariant operands!");
312*c8c63625SNikita Popov if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags))
313b94c09baSDehao Chen continue;
3141b108ab9SKazu Hirata if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
3155cefe7d9SNikita Popov &MSSAU))
316b94c09baSDehao Chen Changed = true;
317b94c09baSDehao Chen }
318b94c09baSDehao Chen
319b94c09baSDehao Chen if (Changed && SE)
320b94c09baSDehao Chen SE->forgetLoopDispositions(&L);
321b94c09baSDehao Chen return Changed;
322b94c09baSDehao Chen }
323b94c09baSDehao Chen
run(Function & F,FunctionAnalysisManager & FAM)324e9b18e3dSChandler Carruth PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
325e9b18e3dSChandler Carruth LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
326e9b18e3dSChandler Carruth // Nothing to do if there are no loops.
327e9b18e3dSChandler Carruth if (LI.empty())
328e9b18e3dSChandler Carruth return PreservedAnalyses::all();
329e9b18e3dSChandler Carruth
330e9b18e3dSChandler Carruth AAResults &AA = FAM.getResult<AAManager>(F);
331e9b18e3dSChandler Carruth DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
332e9b18e3dSChandler Carruth BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
3335cefe7d9SNikita Popov MemorySSA &MSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
3347f6360cdSJamie Schmeiser
335e9b18e3dSChandler Carruth // We want to do a postorder walk over the loops. Since loops are a tree this
336e9b18e3dSChandler Carruth // is equivalent to a reversed preorder walk and preorder is easy to compute
337e9b18e3dSChandler Carruth // without recursion. Since we reverse the preorder, we will visit siblings
338e9b18e3dSChandler Carruth // in reverse program order. This isn't expected to matter at all but is more
339e9b18e3dSChandler Carruth // consistent with sinking algorithms which generally work bottom-up.
340e9b18e3dSChandler Carruth SmallVector<Loop *, 4> PreorderLoops = LI.getLoopsInPreorder();
341e9b18e3dSChandler Carruth
342e9b18e3dSChandler Carruth bool Changed = false;
343e9b18e3dSChandler Carruth do {
344e9b18e3dSChandler Carruth Loop &L = *PreorderLoops.pop_back_val();
345e9b18e3dSChandler Carruth
3467f6360cdSJamie Schmeiser BasicBlock *Preheader = L.getLoopPreheader();
3477f6360cdSJamie Schmeiser if (!Preheader)
3487f6360cdSJamie Schmeiser continue;
3497f6360cdSJamie Schmeiser
3507f6360cdSJamie Schmeiser // Enable LoopSink only when runtime profile is available.
3517f6360cdSJamie Schmeiser // With static profile, the sinking decision may be sub-optimal.
3527f6360cdSJamie Schmeiser if (!Preheader->getParent()->hasProfileData())
3537f6360cdSJamie Schmeiser continue;
3547f6360cdSJamie Schmeiser
355e9b18e3dSChandler Carruth // Note that we don't pass SCEV here because it is only used to invalidate
356e9b18e3dSChandler Carruth // loops in SCEV and we don't preserve (or request) SCEV at all making that
357e9b18e3dSChandler Carruth // unnecessary.
3585cefe7d9SNikita Popov Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA,
3595cefe7d9SNikita Popov /*ScalarEvolution*/ nullptr);
360e9b18e3dSChandler Carruth } while (!PreorderLoops.empty());
361e9b18e3dSChandler Carruth
362e9b18e3dSChandler Carruth if (!Changed)
363e9b18e3dSChandler Carruth return PreservedAnalyses::all();
364e9b18e3dSChandler Carruth
365e9b18e3dSChandler Carruth PreservedAnalyses PA;
366e9b18e3dSChandler Carruth PA.preserveSet<CFGAnalyses>();
3677f6360cdSJamie Schmeiser PA.preserve<MemorySSAAnalysis>();
3687f6360cdSJamie Schmeiser
3697f6360cdSJamie Schmeiser if (VerifyMemorySSA)
3705cefe7d9SNikita Popov MSSA.verifyMemorySSA();
3717f6360cdSJamie Schmeiser
372e9b18e3dSChandler Carruth return PA;
373e9b18e3dSChandler Carruth }
374e9b18e3dSChandler Carruth
375b94c09baSDehao Chen namespace {
376b94c09baSDehao Chen struct LegacyLoopSinkPass : public LoopPass {
377b94c09baSDehao Chen static char ID;
LegacyLoopSinkPass__anon614bca720511::LegacyLoopSinkPass378b94c09baSDehao Chen LegacyLoopSinkPass() : LoopPass(ID) {
379b94c09baSDehao Chen initializeLegacyLoopSinkPassPass(*PassRegistry::getPassRegistry());
380b94c09baSDehao Chen }
381b94c09baSDehao Chen
runOnLoop__anon614bca720511::LegacyLoopSinkPass382b94c09baSDehao Chen bool runOnLoop(Loop *L, LPPassManager &LPM) override {
383b94c09baSDehao Chen if (skipLoop(L))
384b94c09baSDehao Chen return false;
385b94c09baSDehao Chen
3867f6360cdSJamie Schmeiser BasicBlock *Preheader = L->getLoopPreheader();
3877f6360cdSJamie Schmeiser if (!Preheader)
3887f6360cdSJamie Schmeiser return false;
3897f6360cdSJamie Schmeiser
3907f6360cdSJamie Schmeiser // Enable LoopSink only when runtime profile is available.
3917f6360cdSJamie Schmeiser // With static profile, the sinking decision may be sub-optimal.
3927f6360cdSJamie Schmeiser if (!Preheader->getParent()->hasProfileData())
3937f6360cdSJamie Schmeiser return false;
3947f6360cdSJamie Schmeiser
3957f6360cdSJamie Schmeiser AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3965cefe7d9SNikita Popov MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
397b94c09baSDehao Chen auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3987f6360cdSJamie Schmeiser bool Changed = sinkLoopInvariantInstructions(
3997f6360cdSJamie Schmeiser *L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
400b94c09baSDehao Chen getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
401b94c09baSDehao Chen getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
4025cefe7d9SNikita Popov MSSA, SE ? &SE->getSE() : nullptr);
4037f6360cdSJamie Schmeiser
4045cefe7d9SNikita Popov if (VerifyMemorySSA)
4055cefe7d9SNikita Popov MSSA.verifyMemorySSA();
4067f6360cdSJamie Schmeiser
4077f6360cdSJamie Schmeiser return Changed;
408b94c09baSDehao Chen }
409b94c09baSDehao Chen
getAnalysisUsage__anon614bca720511::LegacyLoopSinkPass410b94c09baSDehao Chen void getAnalysisUsage(AnalysisUsage &AU) const override {
411b94c09baSDehao Chen AU.setPreservesCFG();
412b94c09baSDehao Chen AU.addRequired<BlockFrequencyInfoWrapperPass>();
413b94c09baSDehao Chen getLoopAnalysisUsage(AU);
4147f6360cdSJamie Schmeiser AU.addRequired<MemorySSAWrapperPass>();
4157f6360cdSJamie Schmeiser AU.addPreserved<MemorySSAWrapperPass>();
4167f6360cdSJamie Schmeiser }
417b94c09baSDehao Chen };
418b94c09baSDehao Chen }
419b94c09baSDehao Chen
420b94c09baSDehao Chen char LegacyLoopSinkPass::ID = 0;
421b94c09baSDehao Chen INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
422b94c09baSDehao Chen false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)423b94c09baSDehao Chen INITIALIZE_PASS_DEPENDENCY(LoopPass)
424b94c09baSDehao Chen INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
4257f6360cdSJamie Schmeiser INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
426b94c09baSDehao Chen INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
427b94c09baSDehao Chen
428b94c09baSDehao Chen Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
429