1b7cfa6caSSidharth Baveja //===- LoopPeel.cpp -------------------------------------------------------===//
2b7cfa6caSSidharth Baveja //
3b7cfa6caSSidharth Baveja // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b7cfa6caSSidharth Baveja // See https://llvm.org/LICENSE.txt for license information.
5b7cfa6caSSidharth Baveja // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b7cfa6caSSidharth Baveja //
7b7cfa6caSSidharth Baveja //===----------------------------------------------------------------------===//
8b7cfa6caSSidharth Baveja //
9b7cfa6caSSidharth Baveja // Loop Peeling Utilities.
10b7cfa6caSSidharth Baveja //===----------------------------------------------------------------------===//
11b7cfa6caSSidharth Baveja
12b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/LoopPeel.h"
13b7cfa6caSSidharth Baveja #include "llvm/ADT/DenseMap.h"
14b7cfa6caSSidharth Baveja #include "llvm/ADT/Optional.h"
15b7cfa6caSSidharth Baveja #include "llvm/ADT/SmallVector.h"
16b7cfa6caSSidharth Baveja #include "llvm/ADT/Statistic.h"
17cd0ba9dcSFlorian Hahn #include "llvm/Analysis/Loads.h"
18b7cfa6caSSidharth Baveja #include "llvm/Analysis/LoopInfo.h"
19b7cfa6caSSidharth Baveja #include "llvm/Analysis/LoopIterator.h"
20b7cfa6caSSidharth Baveja #include "llvm/Analysis/ScalarEvolution.h"
21b7cfa6caSSidharth Baveja #include "llvm/Analysis/ScalarEvolutionExpressions.h"
22b7cfa6caSSidharth Baveja #include "llvm/Analysis/TargetTransformInfo.h"
23b7cfa6caSSidharth Baveja #include "llvm/IR/BasicBlock.h"
24b7cfa6caSSidharth Baveja #include "llvm/IR/Dominators.h"
25b7cfa6caSSidharth Baveja #include "llvm/IR/Function.h"
26b7cfa6caSSidharth Baveja #include "llvm/IR/InstrTypes.h"
27b7cfa6caSSidharth Baveja #include "llvm/IR/Instruction.h"
28b7cfa6caSSidharth Baveja #include "llvm/IR/Instructions.h"
29b7cfa6caSSidharth Baveja #include "llvm/IR/LLVMContext.h"
30b7cfa6caSSidharth Baveja #include "llvm/IR/MDBuilder.h"
31b7cfa6caSSidharth Baveja #include "llvm/IR/PatternMatch.h"
32b7cfa6caSSidharth Baveja #include "llvm/Support/Casting.h"
33b7cfa6caSSidharth Baveja #include "llvm/Support/CommandLine.h"
34b7cfa6caSSidharth Baveja #include "llvm/Support/Debug.h"
35b7cfa6caSSidharth Baveja #include "llvm/Support/raw_ostream.h"
36b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/Cloning.h"
38b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/LoopSimplify.h"
39b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/LoopUtils.h"
40b7cfa6caSSidharth Baveja #include "llvm/Transforms/Utils/ValueMapper.h"
41b7cfa6caSSidharth Baveja #include <algorithm>
42b7cfa6caSSidharth Baveja #include <cassert>
43b7cfa6caSSidharth Baveja #include <cstdint>
44b7cfa6caSSidharth Baveja
45b7cfa6caSSidharth Baveja using namespace llvm;
46b7cfa6caSSidharth Baveja using namespace llvm::PatternMatch;
47b7cfa6caSSidharth Baveja
48b7cfa6caSSidharth Baveja #define DEBUG_TYPE "loop-peel"
49b7cfa6caSSidharth Baveja
50b7cfa6caSSidharth Baveja STATISTIC(NumPeeled, "Number of loops peeled");
51b7cfa6caSSidharth Baveja
52b7cfa6caSSidharth Baveja static cl::opt<unsigned> UnrollPeelCount(
53b7cfa6caSSidharth Baveja "unroll-peel-count", cl::Hidden,
54b7cfa6caSSidharth Baveja cl::desc("Set the unroll peeling count, for testing purposes"));
55b7cfa6caSSidharth Baveja
56b7cfa6caSSidharth Baveja static cl::opt<bool>
57b7cfa6caSSidharth Baveja UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
58b7cfa6caSSidharth Baveja cl::desc("Allows loops to be peeled when the dynamic "
59b7cfa6caSSidharth Baveja "trip count is known to be low."));
60b7cfa6caSSidharth Baveja
61b7cfa6caSSidharth Baveja static cl::opt<bool>
62b7cfa6caSSidharth Baveja UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
63b7cfa6caSSidharth Baveja cl::init(false), cl::Hidden,
64b7cfa6caSSidharth Baveja cl::desc("Allows loop nests to be peeled."));
65b7cfa6caSSidharth Baveja
66b7cfa6caSSidharth Baveja static cl::opt<unsigned> UnrollPeelMaxCount(
67b7cfa6caSSidharth Baveja "unroll-peel-max-count", cl::init(7), cl::Hidden,
68b7cfa6caSSidharth Baveja cl::desc("Max average trip count which will cause loop peeling."));
69b7cfa6caSSidharth Baveja
70b7cfa6caSSidharth Baveja static cl::opt<unsigned> UnrollForcePeelCount(
71b7cfa6caSSidharth Baveja "unroll-force-peel-count", cl::init(0), cl::Hidden,
72b7cfa6caSSidharth Baveja cl::desc("Force a peel count regardless of profiling information."));
73b7cfa6caSSidharth Baveja
74b7cfa6caSSidharth Baveja static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
75b7cfa6caSSidharth Baveja
76b7cfa6caSSidharth Baveja // Check whether we are capable of peeling this loop.
canPeel(Loop * L)77b7cfa6caSSidharth Baveja bool llvm::canPeel(Loop *L) {
78b7cfa6caSSidharth Baveja // Make sure the loop is in simplified form
79b7cfa6caSSidharth Baveja if (!L->isLoopSimplifyForm())
80b7cfa6caSSidharth Baveja return false;
81b7cfa6caSSidharth Baveja
82b7cfa6caSSidharth Baveja // Don't try to peel loops where the latch is not the exiting block.
83b7cfa6caSSidharth Baveja // This can be an indication of two different things:
84b7cfa6caSSidharth Baveja // 1) The loop is not rotated.
85b7cfa6caSSidharth Baveja // 2) The loop contains irreducible control flow that involves the latch.
8640cd262cSJoseph Tremoulet const BasicBlock *Latch = L->getLoopLatch();
8790d09eb3SFlorian Hahn if (!L->isLoopExiting(Latch))
8840cd262cSJoseph Tremoulet return false;
8940cd262cSJoseph Tremoulet
9040cd262cSJoseph Tremoulet // Peeling is only supported if the latch is a branch.
9140cd262cSJoseph Tremoulet if (!isa<BranchInst>(Latch->getTerminator()))
92b7cfa6caSSidharth Baveja return false;
93b7cfa6caSSidharth Baveja
9490d09eb3SFlorian Hahn SmallVector<BasicBlock *, 4> Exits;
9590d09eb3SFlorian Hahn L->getUniqueNonLatchExitBlocks(Exits);
9690d09eb3SFlorian Hahn // The latch must either be the only exiting block or all non-latch exit
97e09958d5SDmitry Makogon // blocks have either a deopt or unreachable terminator or compose a chain of
98e09958d5SDmitry Makogon // blocks where the last one is either deopt or unreachable terminated. Both
99e09958d5SDmitry Makogon // deopt and unreachable terminators are a strong indication they are not
100e09958d5SDmitry Makogon // taken. Note that this is a profitability check, not a legality check. Also
101e09958d5SDmitry Makogon // note that LoopPeeling currently can only update the branch weights of latch
102e09958d5SDmitry Makogon // blocks and branch weights to blocks with deopt or unreachable do not need
10390d09eb3SFlorian Hahn // updating.
1042b7be47bSKazu Hirata return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
105b7cfa6caSSidharth Baveja }
106b7cfa6caSSidharth Baveja
107b7cfa6caSSidharth Baveja // This function calculates the number of iterations after which the given Phi
108b7cfa6caSSidharth Baveja // becomes an invariant. The pre-calculated values are memorized in the map. The
109b7cfa6caSSidharth Baveja // function (shortcut is I) is calculated according to the following definition:
110b7cfa6caSSidharth Baveja // Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
111b7cfa6caSSidharth Baveja // If %y is a loop invariant, then I(%x) = 1.
112b7cfa6caSSidharth Baveja // If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
113b7cfa6caSSidharth Baveja // Otherwise, I(%x) is infinite.
114b7cfa6caSSidharth Baveja // TODO: Actually if %y is an expression that depends only on Phi %z and some
115b7cfa6caSSidharth Baveja // loop invariants, we can estimate I(%x) = I(%z) + 1. The example
116b7cfa6caSSidharth Baveja // looks like:
117b7cfa6caSSidharth Baveja // %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
118b7cfa6caSSidharth Baveja // %y = phi(0, 5),
119b7cfa6caSSidharth Baveja // %a = %y + 1.
calculateIterationsToInvariance(PHINode * Phi,Loop * L,BasicBlock * BackEdge,SmallDenseMap<PHINode *,Optional<unsigned>> & IterationsToInvariance)120cb728cb8SMax Kazantsev static Optional<unsigned> calculateIterationsToInvariance(
121b7cfa6caSSidharth Baveja PHINode *Phi, Loop *L, BasicBlock *BackEdge,
122cb728cb8SMax Kazantsev SmallDenseMap<PHINode *, Optional<unsigned> > &IterationsToInvariance) {
123b7cfa6caSSidharth Baveja assert(Phi->getParent() == L->getHeader() &&
124b7cfa6caSSidharth Baveja "Non-loop Phi should not be checked for turning into invariant.");
125b7cfa6caSSidharth Baveja assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
126b7cfa6caSSidharth Baveja // If we already know the answer, take it from the map.
127b7cfa6caSSidharth Baveja auto I = IterationsToInvariance.find(Phi);
128b7cfa6caSSidharth Baveja if (I != IterationsToInvariance.end())
129b7cfa6caSSidharth Baveja return I->second;
130b7cfa6caSSidharth Baveja
131b7cfa6caSSidharth Baveja // Otherwise we need to analyze the input from the back edge.
132b7cfa6caSSidharth Baveja Value *Input = Phi->getIncomingValueForBlock(BackEdge);
133b7cfa6caSSidharth Baveja // Place infinity to map to avoid infinite recursion for cycled Phis. Such
134b7cfa6caSSidharth Baveja // cycles can never stop on an invariant.
135cb728cb8SMax Kazantsev IterationsToInvariance[Phi] = None;
136cb728cb8SMax Kazantsev Optional<unsigned> ToInvariance = None;
137b7cfa6caSSidharth Baveja
138b7cfa6caSSidharth Baveja if (L->isLoopInvariant(Input))
139b7cfa6caSSidharth Baveja ToInvariance = 1u;
140b7cfa6caSSidharth Baveja else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
141b7cfa6caSSidharth Baveja // Only consider Phis in header block.
142b7cfa6caSSidharth Baveja if (IncPhi->getParent() != L->getHeader())
143cb728cb8SMax Kazantsev return None;
144b7cfa6caSSidharth Baveja // If the input becomes an invariant after X iterations, then our Phi
145b7cfa6caSSidharth Baveja // becomes an invariant after X + 1 iterations.
146cb728cb8SMax Kazantsev auto InputToInvariance = calculateIterationsToInvariance(
147b7cfa6caSSidharth Baveja IncPhi, L, BackEdge, IterationsToInvariance);
148cb728cb8SMax Kazantsev if (InputToInvariance)
149cb728cb8SMax Kazantsev ToInvariance = *InputToInvariance + 1u;
150b7cfa6caSSidharth Baveja }
151b7cfa6caSSidharth Baveja
152b7cfa6caSSidharth Baveja // If we found that this Phi lies in an invariant chain, update the map.
153cb728cb8SMax Kazantsev if (ToInvariance)
154b7cfa6caSSidharth Baveja IterationsToInvariance[Phi] = ToInvariance;
155b7cfa6caSSidharth Baveja return ToInvariance;
156b7cfa6caSSidharth Baveja }
157b7cfa6caSSidharth Baveja
158cd0ba9dcSFlorian Hahn // Try to find any invariant memory reads that will become dereferenceable in
159cd0ba9dcSFlorian Hahn // the remainder loop after peeling. The load must also be used (transitively)
160cd0ba9dcSFlorian Hahn // by an exit condition. Returns the number of iterations to peel off (at the
161cd0ba9dcSFlorian Hahn // moment either 0 or 1).
peelToTurnInvariantLoadsDerefencebale(Loop & L,DominatorTree & DT)162cd0ba9dcSFlorian Hahn static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
163cd0ba9dcSFlorian Hahn DominatorTree &DT) {
164cd0ba9dcSFlorian Hahn // Skip loops with a single exiting block, because there should be no benefit
165cd0ba9dcSFlorian Hahn // for the heuristic below.
166cd0ba9dcSFlorian Hahn if (L.getExitingBlock())
167cd0ba9dcSFlorian Hahn return 0;
168cd0ba9dcSFlorian Hahn
169cd0ba9dcSFlorian Hahn // All non-latch exit blocks must have an UnreachableInst terminator.
170cd0ba9dcSFlorian Hahn // Otherwise the heuristic below may not be profitable.
171cd0ba9dcSFlorian Hahn SmallVector<BasicBlock *, 4> Exits;
172cd0ba9dcSFlorian Hahn L.getUniqueNonLatchExitBlocks(Exits);
173cd0ba9dcSFlorian Hahn if (any_of(Exits, [](const BasicBlock *BB) {
174cd0ba9dcSFlorian Hahn return !isa<UnreachableInst>(BB->getTerminator());
175cd0ba9dcSFlorian Hahn }))
176cd0ba9dcSFlorian Hahn return 0;
177cd0ba9dcSFlorian Hahn
178cd0ba9dcSFlorian Hahn // Now look for invariant loads that dominate the latch and are not known to
179cd0ba9dcSFlorian Hahn // be dereferenceable. If there are such loads and no writes, they will become
180cd0ba9dcSFlorian Hahn // dereferenceable in the loop if the first iteration is peeled off. Also
181cd0ba9dcSFlorian Hahn // collect the set of instructions controlled by such loads. Only peel if an
182cd0ba9dcSFlorian Hahn // exit condition uses (transitively) such a load.
183cd0ba9dcSFlorian Hahn BasicBlock *Header = L.getHeader();
184cd0ba9dcSFlorian Hahn BasicBlock *Latch = L.getLoopLatch();
185cd0ba9dcSFlorian Hahn SmallPtrSet<Value *, 8> LoadUsers;
186cd0ba9dcSFlorian Hahn const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
187cd0ba9dcSFlorian Hahn for (BasicBlock *BB : L.blocks()) {
188cd0ba9dcSFlorian Hahn for (Instruction &I : *BB) {
189cd0ba9dcSFlorian Hahn if (I.mayWriteToMemory())
190cd0ba9dcSFlorian Hahn return 0;
191cd0ba9dcSFlorian Hahn
192cd0ba9dcSFlorian Hahn auto Iter = LoadUsers.find(&I);
193cd0ba9dcSFlorian Hahn if (Iter != LoadUsers.end()) {
194cd0ba9dcSFlorian Hahn for (Value *U : I.users())
195cd0ba9dcSFlorian Hahn LoadUsers.insert(U);
196cd0ba9dcSFlorian Hahn }
197cd0ba9dcSFlorian Hahn // Do not look for reads in the header; they can already be hoisted
198cd0ba9dcSFlorian Hahn // without peeling.
199cd0ba9dcSFlorian Hahn if (BB == Header)
200cd0ba9dcSFlorian Hahn continue;
201cd0ba9dcSFlorian Hahn if (auto *LI = dyn_cast<LoadInst>(&I)) {
202cd0ba9dcSFlorian Hahn Value *Ptr = LI->getPointerOperand();
203cd0ba9dcSFlorian Hahn if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
204cd0ba9dcSFlorian Hahn !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
205cd0ba9dcSFlorian Hahn for (Value *U : I.users())
206cd0ba9dcSFlorian Hahn LoadUsers.insert(U);
207cd0ba9dcSFlorian Hahn }
208cd0ba9dcSFlorian Hahn }
209cd0ba9dcSFlorian Hahn }
210cd0ba9dcSFlorian Hahn SmallVector<BasicBlock *> ExitingBlocks;
211cd0ba9dcSFlorian Hahn L.getExitingBlocks(ExitingBlocks);
21240d85f16SFlorian Hahn if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
21340d85f16SFlorian Hahn return LoadUsers.contains(Exiting->getTerminator());
21440d85f16SFlorian Hahn }))
215cd0ba9dcSFlorian Hahn return 1;
216cd0ba9dcSFlorian Hahn return 0;
217cd0ba9dcSFlorian Hahn }
218cd0ba9dcSFlorian Hahn
219b7cfa6caSSidharth Baveja // Return the number of iterations to peel off that make conditions in the
220b7cfa6caSSidharth Baveja // body true/false. For example, if we peel 2 iterations off the loop below,
221b7cfa6caSSidharth Baveja // the condition i < 2 can be evaluated at compile time.
222b7cfa6caSSidharth Baveja // for (i = 0; i < n; i++)
223b7cfa6caSSidharth Baveja // if (i < 2)
224b7cfa6caSSidharth Baveja // ..
225b7cfa6caSSidharth Baveja // else
226b7cfa6caSSidharth Baveja // ..
227b7cfa6caSSidharth Baveja // }
countToEliminateCompares(Loop & L,unsigned MaxPeelCount,ScalarEvolution & SE)228b7cfa6caSSidharth Baveja static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
229b7cfa6caSSidharth Baveja ScalarEvolution &SE) {
230b7cfa6caSSidharth Baveja assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
231b7cfa6caSSidharth Baveja unsigned DesiredPeelCount = 0;
232b7cfa6caSSidharth Baveja
233b7cfa6caSSidharth Baveja for (auto *BB : L.blocks()) {
234b7cfa6caSSidharth Baveja auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
235b7cfa6caSSidharth Baveja if (!BI || BI->isUnconditional())
236b7cfa6caSSidharth Baveja continue;
237b7cfa6caSSidharth Baveja
238b7cfa6caSSidharth Baveja // Ignore loop exit condition.
239b7cfa6caSSidharth Baveja if (L.getLoopLatch() == BB)
240b7cfa6caSSidharth Baveja continue;
241b7cfa6caSSidharth Baveja
242b7cfa6caSSidharth Baveja Value *Condition = BI->getCondition();
243b7cfa6caSSidharth Baveja Value *LeftVal, *RightVal;
244b7cfa6caSSidharth Baveja CmpInst::Predicate Pred;
245b7cfa6caSSidharth Baveja if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
246b7cfa6caSSidharth Baveja continue;
247b7cfa6caSSidharth Baveja
248b7cfa6caSSidharth Baveja const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
249b7cfa6caSSidharth Baveja const SCEV *RightSCEV = SE.getSCEV(RightVal);
250b7cfa6caSSidharth Baveja
251b7cfa6caSSidharth Baveja // Do not consider predicates that are known to be true or false
252b7cfa6caSSidharth Baveja // independently of the loop iteration.
25326ec76adSMax Kazantsev if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
254b7cfa6caSSidharth Baveja continue;
255b7cfa6caSSidharth Baveja
256b7cfa6caSSidharth Baveja // Check if we have a condition with one AddRec and one non AddRec
257b7cfa6caSSidharth Baveja // expression. Normalize LeftSCEV to be the AddRec.
258b7cfa6caSSidharth Baveja if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
259b7cfa6caSSidharth Baveja if (isa<SCEVAddRecExpr>(RightSCEV)) {
260b7cfa6caSSidharth Baveja std::swap(LeftSCEV, RightSCEV);
261b7cfa6caSSidharth Baveja Pred = ICmpInst::getSwappedPredicate(Pred);
262b7cfa6caSSidharth Baveja } else
263b7cfa6caSSidharth Baveja continue;
264b7cfa6caSSidharth Baveja }
265b7cfa6caSSidharth Baveja
266b7cfa6caSSidharth Baveja const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
267b7cfa6caSSidharth Baveja
268b7cfa6caSSidharth Baveja // Avoid huge SCEV computations in the loop below, make sure we only
269b7cfa6caSSidharth Baveja // consider AddRecs of the loop we are trying to peel.
270b7cfa6caSSidharth Baveja if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
271b7cfa6caSSidharth Baveja continue;
272b7cfa6caSSidharth Baveja if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
273a5b2e795SMax Kazantsev !SE.getMonotonicPredicateType(LeftAR, Pred))
274b7cfa6caSSidharth Baveja continue;
275b7cfa6caSSidharth Baveja
276b7cfa6caSSidharth Baveja // Check if extending the current DesiredPeelCount lets us evaluate Pred
277b7cfa6caSSidharth Baveja // or !Pred in the loop body statically.
278b7cfa6caSSidharth Baveja unsigned NewPeelCount = DesiredPeelCount;
279b7cfa6caSSidharth Baveja
280b7cfa6caSSidharth Baveja const SCEV *IterVal = LeftAR->evaluateAtIteration(
281b7cfa6caSSidharth Baveja SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
282b7cfa6caSSidharth Baveja
283b7cfa6caSSidharth Baveja // If the original condition is not known, get the negated predicate
284b7cfa6caSSidharth Baveja // (which holds on the else branch) and check if it is known. This allows
285b7cfa6caSSidharth Baveja // us to peel of iterations that make the original condition false.
286b7cfa6caSSidharth Baveja if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
287b7cfa6caSSidharth Baveja Pred = ICmpInst::getInversePredicate(Pred);
288b7cfa6caSSidharth Baveja
289b7cfa6caSSidharth Baveja const SCEV *Step = LeftAR->getStepRecurrence(SE);
290b7cfa6caSSidharth Baveja const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
291b7cfa6caSSidharth Baveja auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
292b7cfa6caSSidharth Baveja &NewPeelCount]() {
293b7cfa6caSSidharth Baveja IterVal = NextIterVal;
294b7cfa6caSSidharth Baveja NextIterVal = SE.getAddExpr(IterVal, Step);
295b7cfa6caSSidharth Baveja NewPeelCount++;
296b7cfa6caSSidharth Baveja };
297b7cfa6caSSidharth Baveja
298b7cfa6caSSidharth Baveja auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
299b7cfa6caSSidharth Baveja return NewPeelCount < MaxPeelCount;
300b7cfa6caSSidharth Baveja };
301b7cfa6caSSidharth Baveja
302b7cfa6caSSidharth Baveja while (CanPeelOneMoreIteration() &&
303b7cfa6caSSidharth Baveja SE.isKnownPredicate(Pred, IterVal, RightSCEV))
304b7cfa6caSSidharth Baveja PeelOneMoreIteration();
305b7cfa6caSSidharth Baveja
306b7cfa6caSSidharth Baveja // With *that* peel count, does the predicate !Pred become known in the
307b7cfa6caSSidharth Baveja // first iteration of the loop body after peeling?
308b7cfa6caSSidharth Baveja if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
309b7cfa6caSSidharth Baveja RightSCEV))
310b7cfa6caSSidharth Baveja continue; // If not, give up.
311b7cfa6caSSidharth Baveja
312b7cfa6caSSidharth Baveja // However, for equality comparisons, that isn't always sufficient to
313b7cfa6caSSidharth Baveja // eliminate the comparsion in loop body, we may need to peel one more
314b7cfa6caSSidharth Baveja // iteration. See if that makes !Pred become unknown again.
315b7cfa6caSSidharth Baveja if (ICmpInst::isEquality(Pred) &&
316b7cfa6caSSidharth Baveja !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
317b7cfa6caSSidharth Baveja RightSCEV) &&
318b7cfa6caSSidharth Baveja !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
319b7cfa6caSSidharth Baveja SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
320b7cfa6caSSidharth Baveja if (!CanPeelOneMoreIteration())
321b7cfa6caSSidharth Baveja continue; // Need to peel one more iteration, but can't. Give up.
322b7cfa6caSSidharth Baveja PeelOneMoreIteration(); // Great!
323b7cfa6caSSidharth Baveja }
324b7cfa6caSSidharth Baveja
325b7cfa6caSSidharth Baveja DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
326b7cfa6caSSidharth Baveja }
327b7cfa6caSSidharth Baveja
328b7cfa6caSSidharth Baveja return DesiredPeelCount;
329b7cfa6caSSidharth Baveja }
330b7cfa6caSSidharth Baveja
3312d31b025SPhilip Reames /// This "heuristic" exactly matches implicit behavior which used to exist
3322d31b025SPhilip Reames /// inside getLoopEstimatedTripCount. It was added here to keep an
3332d31b025SPhilip Reames /// improvement inside that API from causing peeling to become more agressive.
3342d31b025SPhilip Reames /// This should probably be removed.
violatesLegacyMultiExitLoopCheck(Loop * L)3352d31b025SPhilip Reames static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
3362d31b025SPhilip Reames BasicBlock *Latch = L->getLoopLatch();
3372d31b025SPhilip Reames if (!Latch)
3382d31b025SPhilip Reames return true;
3392d31b025SPhilip Reames
3402d31b025SPhilip Reames BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
3412d31b025SPhilip Reames if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
3422d31b025SPhilip Reames return true;
3432d31b025SPhilip Reames
3442d31b025SPhilip Reames assert((LatchBR->getSuccessor(0) == L->getHeader() ||
3452d31b025SPhilip Reames LatchBR->getSuccessor(1) == L->getHeader()) &&
3462d31b025SPhilip Reames "At least one edge out of the latch must go to the header");
3472d31b025SPhilip Reames
3482d31b025SPhilip Reames SmallVector<BasicBlock *, 4> ExitBlocks;
3492d31b025SPhilip Reames L->getUniqueNonLatchExitBlocks(ExitBlocks);
3502d31b025SPhilip Reames return any_of(ExitBlocks, [](const BasicBlock *EB) {
3512d31b025SPhilip Reames return !EB->getTerminatingDeoptimizeCall();
3522d31b025SPhilip Reames });
3532d31b025SPhilip Reames }
3542d31b025SPhilip Reames
3552d31b025SPhilip Reames
356b7cfa6caSSidharth Baveja // Return the number of iterations we want to peel off.
computePeelCount(Loop * L,unsigned LoopSize,TargetTransformInfo::PeelingPreferences & PP,unsigned TripCount,DominatorTree & DT,ScalarEvolution & SE,unsigned Threshold)357b7cfa6caSSidharth Baveja void llvm::computePeelCount(Loop *L, unsigned LoopSize,
358b7cfa6caSSidharth Baveja TargetTransformInfo::PeelingPreferences &PP,
35902d9a4d5SCraig Topper unsigned TripCount, DominatorTree &DT,
360cd0ba9dcSFlorian Hahn ScalarEvolution &SE, unsigned Threshold) {
361b7cfa6caSSidharth Baveja assert(LoopSize > 0 && "Zero loop size is not allowed!");
362b7cfa6caSSidharth Baveja // Save the PP.PeelCount value set by the target in
363b7cfa6caSSidharth Baveja // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
364b7cfa6caSSidharth Baveja unsigned TargetPeelCount = PP.PeelCount;
365b7cfa6caSSidharth Baveja PP.PeelCount = 0;
366b7cfa6caSSidharth Baveja if (!canPeel(L))
367b7cfa6caSSidharth Baveja return;
368b7cfa6caSSidharth Baveja
369b7cfa6caSSidharth Baveja // Only try to peel innermost loops by default.
3701507786cSCraig Topper // The constraint can be relaxed by the target in TTI.getPeelingPreferences
371b7cfa6caSSidharth Baveja // or by the flag -unroll-allow-loop-nests-peeling.
37289c1e35fSStefanos Baziotis if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
373b7cfa6caSSidharth Baveja return;
374b7cfa6caSSidharth Baveja
375b7cfa6caSSidharth Baveja // If the user provided a peel count, use that.
376b7cfa6caSSidharth Baveja bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
377b7cfa6caSSidharth Baveja if (UserPeelCount) {
378b7cfa6caSSidharth Baveja LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
379b7cfa6caSSidharth Baveja << " iterations.\n");
380b7cfa6caSSidharth Baveja PP.PeelCount = UnrollForcePeelCount;
381b7cfa6caSSidharth Baveja PP.PeelProfiledIterations = true;
382b7cfa6caSSidharth Baveja return;
383b7cfa6caSSidharth Baveja }
384b7cfa6caSSidharth Baveja
385b7cfa6caSSidharth Baveja // Skip peeling if it's disabled.
386b7cfa6caSSidharth Baveja if (!PP.AllowPeeling)
387b7cfa6caSSidharth Baveja return;
388b7cfa6caSSidharth Baveja
389c71890e1SIgor Kudrin // Check that we can peel at least one iteration.
390c71890e1SIgor Kudrin if (2 * LoopSize > Threshold)
391c71890e1SIgor Kudrin return;
392c71890e1SIgor Kudrin
393b7cfa6caSSidharth Baveja unsigned AlreadyPeeled = 0;
394b7cfa6caSSidharth Baveja if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
395b7cfa6caSSidharth Baveja AlreadyPeeled = *Peeled;
396b7cfa6caSSidharth Baveja // Stop if we already peeled off the maximum number of iterations.
397b7cfa6caSSidharth Baveja if (AlreadyPeeled >= UnrollPeelMaxCount)
398b7cfa6caSSidharth Baveja return;
399b7cfa6caSSidharth Baveja
400b7cfa6caSSidharth Baveja // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
401b7cfa6caSSidharth Baveja // iterations of the loop. For this we compute the number for iterations after
402b7cfa6caSSidharth Baveja // which every Phi is guaranteed to become an invariant, and try to peel the
403b7cfa6caSSidharth Baveja // maximum number of iterations among these values, thus turning all those
404b7cfa6caSSidharth Baveja // Phis into invariants.
405c71890e1SIgor Kudrin
406b7cfa6caSSidharth Baveja // Store the pre-calculated values here.
407cb728cb8SMax Kazantsev SmallDenseMap<PHINode *, Optional<unsigned>> IterationsToInvariance;
408b7cfa6caSSidharth Baveja // Now go through all Phis to calculate their the number of iterations they
409b7cfa6caSSidharth Baveja // need to become invariants.
4101507786cSCraig Topper // Start the max computation with the PP.PeelCount value set by the target
4111507786cSCraig Topper // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
412b7cfa6caSSidharth Baveja unsigned DesiredPeelCount = TargetPeelCount;
413b7cfa6caSSidharth Baveja BasicBlock *BackEdge = L->getLoopLatch();
414b7cfa6caSSidharth Baveja assert(BackEdge && "Loop is not in simplified form?");
415b7cfa6caSSidharth Baveja for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
416b7cfa6caSSidharth Baveja PHINode *Phi = cast<PHINode>(&*BI);
417c71890e1SIgor Kudrin auto ToInvariance = calculateIterationsToInvariance(Phi, L, BackEdge,
418c71890e1SIgor Kudrin IterationsToInvariance);
419cb728cb8SMax Kazantsev if (ToInvariance)
420cb728cb8SMax Kazantsev DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
421b7cfa6caSSidharth Baveja }
422b7cfa6caSSidharth Baveja
423b7cfa6caSSidharth Baveja // Pay respect to limitations implied by loop size and the max peel count.
424b7cfa6caSSidharth Baveja unsigned MaxPeelCount = UnrollPeelMaxCount;
425b7cfa6caSSidharth Baveja MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
426b7cfa6caSSidharth Baveja
427b7cfa6caSSidharth Baveja DesiredPeelCount = std::max(DesiredPeelCount,
428b7cfa6caSSidharth Baveja countToEliminateCompares(*L, MaxPeelCount, SE));
429b7cfa6caSSidharth Baveja
430cd0ba9dcSFlorian Hahn if (DesiredPeelCount == 0)
431cd0ba9dcSFlorian Hahn DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
432cd0ba9dcSFlorian Hahn
433b7cfa6caSSidharth Baveja if (DesiredPeelCount > 0) {
434b7cfa6caSSidharth Baveja DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
435b7cfa6caSSidharth Baveja // Consider max peel count limitation.
436b7cfa6caSSidharth Baveja assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
437b7cfa6caSSidharth Baveja if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
438b7cfa6caSSidharth Baveja LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
439b7cfa6caSSidharth Baveja << " iteration(s) to turn"
440b7cfa6caSSidharth Baveja << " some Phis into invariants.\n");
441b7cfa6caSSidharth Baveja PP.PeelCount = DesiredPeelCount;
442b7cfa6caSSidharth Baveja PP.PeelProfiledIterations = false;
443b7cfa6caSSidharth Baveja return;
444b7cfa6caSSidharth Baveja }
445b7cfa6caSSidharth Baveja }
446b7cfa6caSSidharth Baveja
447b7cfa6caSSidharth Baveja // Bail if we know the statically calculated trip count.
448b7cfa6caSSidharth Baveja // In this case we rather prefer partial unrolling.
449b7cfa6caSSidharth Baveja if (TripCount)
450b7cfa6caSSidharth Baveja return;
451b7cfa6caSSidharth Baveja
452b7cfa6caSSidharth Baveja // Do not apply profile base peeling if it is disabled.
453b7cfa6caSSidharth Baveja if (!PP.PeelProfiledIterations)
454b7cfa6caSSidharth Baveja return;
455b7cfa6caSSidharth Baveja // If we don't know the trip count, but have reason to believe the average
456b7cfa6caSSidharth Baveja // trip count is low, peeling should be beneficial, since we will usually
457b7cfa6caSSidharth Baveja // hit the peeled section.
458b7cfa6caSSidharth Baveja // We only do this in the presence of profile information, since otherwise
459b7cfa6caSSidharth Baveja // our estimates of the trip count are not reliable enough.
460b7cfa6caSSidharth Baveja if (L->getHeader()->getParent()->hasProfileData()) {
4612d31b025SPhilip Reames if (violatesLegacyMultiExitLoopCheck(L))
4622d31b025SPhilip Reames return;
46339ce6888SIgor Kudrin Optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
46439ce6888SIgor Kudrin if (!EstimatedTripCount)
465b7cfa6caSSidharth Baveja return;
466b7cfa6caSSidharth Baveja
46739ce6888SIgor Kudrin LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
46839ce6888SIgor Kudrin << *EstimatedTripCount << "\n");
469b7cfa6caSSidharth Baveja
47039ce6888SIgor Kudrin if (*EstimatedTripCount) {
47139ce6888SIgor Kudrin if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
47239ce6888SIgor Kudrin unsigned PeelCount = *EstimatedTripCount;
47339ce6888SIgor Kudrin LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
47439ce6888SIgor Kudrin PP.PeelCount = PeelCount;
475b7cfa6caSSidharth Baveja return;
476b7cfa6caSSidharth Baveja }
477b7cfa6caSSidharth Baveja LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
478b7cfa6caSSidharth Baveja LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
47939ce6888SIgor Kudrin LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
480b7cfa6caSSidharth Baveja LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
48139ce6888SIgor Kudrin LLVM_DEBUG(dbgs() << "Max peel count by cost: "
48239ce6888SIgor Kudrin << (Threshold / LoopSize - 1) << "\n");
483b7cfa6caSSidharth Baveja }
484b7cfa6caSSidharth Baveja }
485b7cfa6caSSidharth Baveja }
486b7cfa6caSSidharth Baveja
487b7cfa6caSSidharth Baveja /// Update the branch weights of the latch of a peeled-off loop
488b7cfa6caSSidharth Baveja /// iteration.
489b7cfa6caSSidharth Baveja /// This sets the branch weights for the latch of the recently peeled off loop
490b7cfa6caSSidharth Baveja /// iteration correctly.
491b7cfa6caSSidharth Baveja /// Let F is a weight of the edge from latch to header.
492b7cfa6caSSidharth Baveja /// Let E is a weight of the edge from latch to exit.
493b7cfa6caSSidharth Baveja /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
494b7cfa6caSSidharth Baveja /// go to exit.
495b7cfa6caSSidharth Baveja /// Then, Estimated TripCount = F / E.
496b7cfa6caSSidharth Baveja /// For I-th (counting from 0) peeled off iteration we set the the weights for
497b7cfa6caSSidharth Baveja /// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
498b7cfa6caSSidharth Baveja /// The probability to go to exit 1/(TC-I) increases. At the same time
499b7cfa6caSSidharth Baveja /// the estimated trip count of remaining loop reduces by I.
500b7cfa6caSSidharth Baveja /// To avoid dealing with division rounding we can just multiple both part
501b7cfa6caSSidharth Baveja /// of weights to E and use weight as (F - I * E, E).
502b7cfa6caSSidharth Baveja ///
503b7cfa6caSSidharth Baveja /// \param Header The copy of the header block that belongs to next iteration.
504b7cfa6caSSidharth Baveja /// \param LatchBR The copy of the latch branch that belongs to this iteration.
505b7cfa6caSSidharth Baveja /// \param[in,out] FallThroughWeight The weight of the edge from latch to
506b7cfa6caSSidharth Baveja /// header before peeling (in) and after peeled off one iteration (out).
updateBranchWeights(BasicBlock * Header,BranchInst * LatchBR,uint64_t ExitWeight,uint64_t & FallThroughWeight)507b7cfa6caSSidharth Baveja static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
508b7cfa6caSSidharth Baveja uint64_t ExitWeight,
509b7cfa6caSSidharth Baveja uint64_t &FallThroughWeight) {
510b7cfa6caSSidharth Baveja // FallThroughWeight is 0 means that there is no branch weights on original
511b7cfa6caSSidharth Baveja // latch block or estimated trip count is zero.
512b7cfa6caSSidharth Baveja if (!FallThroughWeight)
513b7cfa6caSSidharth Baveja return;
514b7cfa6caSSidharth Baveja
515b7cfa6caSSidharth Baveja unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
516b7cfa6caSSidharth Baveja MDBuilder MDB(LatchBR->getContext());
517b7cfa6caSSidharth Baveja MDNode *WeightNode =
518b7cfa6caSSidharth Baveja HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
519b7cfa6caSSidharth Baveja : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
520b7cfa6caSSidharth Baveja LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
521b7cfa6caSSidharth Baveja FallThroughWeight =
522b7cfa6caSSidharth Baveja FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
523b7cfa6caSSidharth Baveja }
524b7cfa6caSSidharth Baveja
525b7cfa6caSSidharth Baveja /// Initialize the weights.
526b7cfa6caSSidharth Baveja ///
527b7cfa6caSSidharth Baveja /// \param Header The header block.
528b7cfa6caSSidharth Baveja /// \param LatchBR The latch branch.
529b7cfa6caSSidharth Baveja /// \param[out] ExitWeight The weight of the edge from Latch to Exit.
530b7cfa6caSSidharth Baveja /// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
initBranchWeights(BasicBlock * Header,BranchInst * LatchBR,uint64_t & ExitWeight,uint64_t & FallThroughWeight)531b7cfa6caSSidharth Baveja static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
532b7cfa6caSSidharth Baveja uint64_t &ExitWeight,
533b7cfa6caSSidharth Baveja uint64_t &FallThroughWeight) {
534b7cfa6caSSidharth Baveja uint64_t TrueWeight, FalseWeight;
535b7cfa6caSSidharth Baveja if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
536b7cfa6caSSidharth Baveja return;
537b7cfa6caSSidharth Baveja unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
538b7cfa6caSSidharth Baveja ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
539b7cfa6caSSidharth Baveja FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
540b7cfa6caSSidharth Baveja }
541b7cfa6caSSidharth Baveja
542b7cfa6caSSidharth Baveja /// Update the weights of original Latch block after peeling off all iterations.
543b7cfa6caSSidharth Baveja ///
544b7cfa6caSSidharth Baveja /// \param Header The header block.
545b7cfa6caSSidharth Baveja /// \param LatchBR The latch branch.
546b7cfa6caSSidharth Baveja /// \param ExitWeight The weight of the edge from Latch to Exit.
547b7cfa6caSSidharth Baveja /// \param FallThroughWeight The weight of the edge from Latch to Header.
fixupBranchWeights(BasicBlock * Header,BranchInst * LatchBR,uint64_t ExitWeight,uint64_t FallThroughWeight)548b7cfa6caSSidharth Baveja static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
549b7cfa6caSSidharth Baveja uint64_t ExitWeight,
550b7cfa6caSSidharth Baveja uint64_t FallThroughWeight) {
551b7cfa6caSSidharth Baveja // FallThroughWeight is 0 means that there is no branch weights on original
552b7cfa6caSSidharth Baveja // latch block or estimated trip count is zero.
553b7cfa6caSSidharth Baveja if (!FallThroughWeight)
554b7cfa6caSSidharth Baveja return;
555b7cfa6caSSidharth Baveja
556b7cfa6caSSidharth Baveja // Sets the branch weights on the loop exit.
557b7cfa6caSSidharth Baveja MDBuilder MDB(LatchBR->getContext());
558b7cfa6caSSidharth Baveja unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
559b7cfa6caSSidharth Baveja MDNode *WeightNode =
560b7cfa6caSSidharth Baveja HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
561b7cfa6caSSidharth Baveja : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
562b7cfa6caSSidharth Baveja LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
563b7cfa6caSSidharth Baveja }
564b7cfa6caSSidharth Baveja
565b7cfa6caSSidharth Baveja /// Clones the body of the loop L, putting it between \p InsertTop and \p
566b7cfa6caSSidharth Baveja /// InsertBot.
567b7cfa6caSSidharth Baveja /// \param IterNumber The serial number of the iteration currently being
568b7cfa6caSSidharth Baveja /// peeled off.
569b7cfa6caSSidharth Baveja /// \param ExitEdges The exit edges of the original loop.
570b7cfa6caSSidharth Baveja /// \param[out] NewBlocks A list of the blocks in the newly created clone
571b7cfa6caSSidharth Baveja /// \param[out] VMap The value map between the loop and the new clone.
572b7cfa6caSSidharth Baveja /// \param LoopBlocks A helper for DFS-traversal of the loop.
573b7cfa6caSSidharth Baveja /// \param LVMap A value-map that maps instructions from the original loop to
574b7cfa6caSSidharth Baveja /// instructions in the last peeled-off iteration.
cloneLoopBlocks(Loop * L,unsigned IterNumber,BasicBlock * InsertTop,BasicBlock * InsertBot,SmallVectorImpl<std::pair<BasicBlock *,BasicBlock * >> & ExitEdges,SmallVectorImpl<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,ValueToValueMapTy & LVMap,DominatorTree * DT,LoopInfo * LI,ArrayRef<MDNode * > LoopLocalNoAliasDeclScopes,ScalarEvolution & SE)575b7cfa6caSSidharth Baveja static void cloneLoopBlocks(
576b7cfa6caSSidharth Baveja Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
577b7cfa6caSSidharth Baveja SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
578b7cfa6caSSidharth Baveja SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
579baad10c0SMax Kazantsev ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
580cfc741bcSFlorian Hahn LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
581cfc741bcSFlorian Hahn ScalarEvolution &SE) {
582b7cfa6caSSidharth Baveja BasicBlock *Header = L->getHeader();
583b7cfa6caSSidharth Baveja BasicBlock *Latch = L->getLoopLatch();
584b7cfa6caSSidharth Baveja BasicBlock *PreHeader = L->getLoopPreheader();
585b7cfa6caSSidharth Baveja
586b7cfa6caSSidharth Baveja Function *F = Header->getParent();
587b7cfa6caSSidharth Baveja LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
588b7cfa6caSSidharth Baveja LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
589b7cfa6caSSidharth Baveja Loop *ParentLoop = L->getParentLoop();
590b7cfa6caSSidharth Baveja
591b7cfa6caSSidharth Baveja // For each block in the original loop, create a new copy,
592b7cfa6caSSidharth Baveja // and update the value map with the newly created values.
593b7cfa6caSSidharth Baveja for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
594b7cfa6caSSidharth Baveja BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
595b7cfa6caSSidharth Baveja NewBlocks.push_back(NewBB);
596b7cfa6caSSidharth Baveja
597b7cfa6caSSidharth Baveja // If an original block is an immediate child of the loop L, its copy
598b7cfa6caSSidharth Baveja // is a child of a ParentLoop after peeling. If a block is a child of
599b7cfa6caSSidharth Baveja // a nested loop, it is handled in the cloneLoop() call below.
600b7cfa6caSSidharth Baveja if (ParentLoop && LI->getLoopFor(*BB) == L)
601b7cfa6caSSidharth Baveja ParentLoop->addBasicBlockToLoop(NewBB, *LI);
602b7cfa6caSSidharth Baveja
603b7cfa6caSSidharth Baveja VMap[*BB] = NewBB;
604b7cfa6caSSidharth Baveja
605b7cfa6caSSidharth Baveja // If dominator tree is available, insert nodes to represent cloned blocks.
606baad10c0SMax Kazantsev if (DT) {
607b7cfa6caSSidharth Baveja if (Header == *BB)
608baad10c0SMax Kazantsev DT->addNewBlock(NewBB, InsertTop);
609b7cfa6caSSidharth Baveja else {
610baad10c0SMax Kazantsev DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
611b7cfa6caSSidharth Baveja // VMap must contain entry for IDom, as the iteration order is RPO.
612baad10c0SMax Kazantsev DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
613baad10c0SMax Kazantsev }
614b7cfa6caSSidharth Baveja }
615b7cfa6caSSidharth Baveja }
616b7cfa6caSSidharth Baveja
61780cdd30eSJeroen Dobbelaere {
61880cdd30eSJeroen Dobbelaere // Identify what other metadata depends on the cloned version. After
61980cdd30eSJeroen Dobbelaere // cloning, replace the metadata with the corrected version for both
62080cdd30eSJeroen Dobbelaere // memory instructions and noalias intrinsics.
62180cdd30eSJeroen Dobbelaere std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
62280cdd30eSJeroen Dobbelaere cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
62380cdd30eSJeroen Dobbelaere Header->getContext(), Ext);
62480cdd30eSJeroen Dobbelaere }
62580cdd30eSJeroen Dobbelaere
626b7cfa6caSSidharth Baveja // Recursively create the new Loop objects for nested loops, if any,
627b7cfa6caSSidharth Baveja // to preserve LoopInfo.
628b7cfa6caSSidharth Baveja for (Loop *ChildLoop : *L) {
629b7cfa6caSSidharth Baveja cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
630b7cfa6caSSidharth Baveja }
631b7cfa6caSSidharth Baveja
632b7cfa6caSSidharth Baveja // Hook-up the control flow for the newly inserted blocks.
633b7cfa6caSSidharth Baveja // The new header is hooked up directly to the "top", which is either
634b7cfa6caSSidharth Baveja // the original loop preheader (for the first iteration) or the previous
635b7cfa6caSSidharth Baveja // iteration's exiting block (for every other iteration)
636b7cfa6caSSidharth Baveja InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
637b7cfa6caSSidharth Baveja
638b7cfa6caSSidharth Baveja // Similarly, for the latch:
639b7cfa6caSSidharth Baveja // The original exiting edge is still hooked up to the loop exit.
640b7cfa6caSSidharth Baveja // The backedge now goes to the "bottom", which is either the loop's real
641b7cfa6caSSidharth Baveja // header (for the last peeled iteration) or the copied header of the next
642b7cfa6caSSidharth Baveja // iteration (for every other iteration)
643b7cfa6caSSidharth Baveja BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
644b7cfa6caSSidharth Baveja BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
645b7cfa6caSSidharth Baveja for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
646b7cfa6caSSidharth Baveja if (LatchBR->getSuccessor(idx) == Header) {
647b7cfa6caSSidharth Baveja LatchBR->setSuccessor(idx, InsertBot);
648b7cfa6caSSidharth Baveja break;
649b7cfa6caSSidharth Baveja }
650baad10c0SMax Kazantsev if (DT)
651baad10c0SMax Kazantsev DT->changeImmediateDominator(InsertBot, NewLatch);
652b7cfa6caSSidharth Baveja
653b7cfa6caSSidharth Baveja // The new copy of the loop body starts with a bunch of PHI nodes
654b7cfa6caSSidharth Baveja // that pick an incoming value from either the preheader, or the previous
655b7cfa6caSSidharth Baveja // loop iteration. Since this copy is no longer part of the loop, we
656b7cfa6caSSidharth Baveja // resolve this statically:
657b7cfa6caSSidharth Baveja // For the first iteration, we use the value from the preheader directly.
658b7cfa6caSSidharth Baveja // For any other iteration, we replace the phi with the value generated by
659b7cfa6caSSidharth Baveja // the immediately preceding clone of the loop body (which represents
660b7cfa6caSSidharth Baveja // the previous iteration).
661b7cfa6caSSidharth Baveja for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
662b7cfa6caSSidharth Baveja PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
663b7cfa6caSSidharth Baveja if (IterNumber == 0) {
664b7cfa6caSSidharth Baveja VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
665b7cfa6caSSidharth Baveja } else {
666b7cfa6caSSidharth Baveja Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
667b7cfa6caSSidharth Baveja Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
668b7cfa6caSSidharth Baveja if (LatchInst && L->contains(LatchInst))
669b7cfa6caSSidharth Baveja VMap[&*I] = LVMap[LatchInst];
670b7cfa6caSSidharth Baveja else
671b7cfa6caSSidharth Baveja VMap[&*I] = LatchVal;
672b7cfa6caSSidharth Baveja }
673b7cfa6caSSidharth Baveja cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
674b7cfa6caSSidharth Baveja }
675b7cfa6caSSidharth Baveja
676b7cfa6caSSidharth Baveja // Fix up the outgoing values - we need to add a value for the iteration
677b7cfa6caSSidharth Baveja // we've just created. Note that this must happen *after* the incoming
678b7cfa6caSSidharth Baveja // values are adjusted, since the value going out of the latch may also be
679b7cfa6caSSidharth Baveja // a value coming into the header.
680b7cfa6caSSidharth Baveja for (auto Edge : ExitEdges)
681b7cfa6caSSidharth Baveja for (PHINode &PHI : Edge.second->phis()) {
682b7cfa6caSSidharth Baveja Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
683b7cfa6caSSidharth Baveja Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
684b7cfa6caSSidharth Baveja if (LatchInst && L->contains(LatchInst))
685b7cfa6caSSidharth Baveja LatchVal = VMap[LatchVal];
686b7cfa6caSSidharth Baveja PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
687cfc741bcSFlorian Hahn SE.forgetValue(&PHI);
688b7cfa6caSSidharth Baveja }
689b7cfa6caSSidharth Baveja
690b7cfa6caSSidharth Baveja // LastValueMap is updated with the values for the current loop
691b7cfa6caSSidharth Baveja // which are used the next time this function is called.
692b7cfa6caSSidharth Baveja for (auto KV : VMap)
693b7cfa6caSSidharth Baveja LVMap[KV.first] = KV.second;
694b7cfa6caSSidharth Baveja }
695b7cfa6caSSidharth Baveja
gatherPeelingPreferences(Loop * L,ScalarEvolution & SE,const TargetTransformInfo & TTI,Optional<bool> UserAllowPeeling,Optional<bool> UserAllowProfileBasedPeeling,bool UnrollingSpecficValues)696b7cfa6caSSidharth Baveja TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
697b7cfa6caSSidharth Baveja Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
698b7cfa6caSSidharth Baveja Optional<bool> UserAllowPeeling,
699b7cfa6caSSidharth Baveja Optional<bool> UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) {
700b7cfa6caSSidharth Baveja TargetTransformInfo::PeelingPreferences PP;
701b7cfa6caSSidharth Baveja
702b7cfa6caSSidharth Baveja // Set the default values.
703b7cfa6caSSidharth Baveja PP.PeelCount = 0;
704b7cfa6caSSidharth Baveja PP.AllowPeeling = true;
705b7cfa6caSSidharth Baveja PP.AllowLoopNestsPeeling = false;
706b7cfa6caSSidharth Baveja PP.PeelProfiledIterations = true;
707b7cfa6caSSidharth Baveja
708b7cfa6caSSidharth Baveja // Get the target specifc values.
709b7cfa6caSSidharth Baveja TTI.getPeelingPreferences(L, SE, PP);
710b7cfa6caSSidharth Baveja
711b7cfa6caSSidharth Baveja // User specified values using cl::opt.
712b7cfa6caSSidharth Baveja if (UnrollingSpecficValues) {
713b7cfa6caSSidharth Baveja if (UnrollPeelCount.getNumOccurrences() > 0)
714b7cfa6caSSidharth Baveja PP.PeelCount = UnrollPeelCount;
715b7cfa6caSSidharth Baveja if (UnrollAllowPeeling.getNumOccurrences() > 0)
716b7cfa6caSSidharth Baveja PP.AllowPeeling = UnrollAllowPeeling;
717b7cfa6caSSidharth Baveja if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
718b7cfa6caSSidharth Baveja PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
719b7cfa6caSSidharth Baveja }
720b7cfa6caSSidharth Baveja
721b7cfa6caSSidharth Baveja // User specifed values provided by argument.
722*a7938c74SKazu Hirata if (UserAllowPeeling)
723b7cfa6caSSidharth Baveja PP.AllowPeeling = *UserAllowPeeling;
724*a7938c74SKazu Hirata if (UserAllowProfileBasedPeeling)
725b7cfa6caSSidharth Baveja PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
726b7cfa6caSSidharth Baveja
727b7cfa6caSSidharth Baveja return PP;
728b7cfa6caSSidharth Baveja }
729b7cfa6caSSidharth Baveja
730b7cfa6caSSidharth Baveja /// Peel off the first \p PeelCount iterations of loop \p L.
731b7cfa6caSSidharth Baveja ///
732b7cfa6caSSidharth Baveja /// Note that this does not peel them off as a single straight-line block.
733b7cfa6caSSidharth Baveja /// Rather, each iteration is peeled off separately, and needs to check the
734b7cfa6caSSidharth Baveja /// exit condition.
735b7cfa6caSSidharth Baveja /// For loops that dynamically execute \p PeelCount iterations or less
736b7cfa6caSSidharth Baveja /// this provides a benefit, since the peeled off iterations, which account
737b7cfa6caSSidharth Baveja /// for the bulk of dynamic execution, can be further simplified by scalar
738b7cfa6caSSidharth Baveja /// optimizations.
peelLoop(Loop * L,unsigned PeelCount,LoopInfo * LI,ScalarEvolution * SE,DominatorTree & DT,AssumptionCache * AC,bool PreserveLCSSA)739b7cfa6caSSidharth Baveja bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
740bc48a266SAnna Thomas ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
741b7cfa6caSSidharth Baveja bool PreserveLCSSA) {
742b7cfa6caSSidharth Baveja assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
743b7cfa6caSSidharth Baveja assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
744b7cfa6caSSidharth Baveja
745b7cfa6caSSidharth Baveja LoopBlocksDFS LoopBlocks(L);
746b7cfa6caSSidharth Baveja LoopBlocks.perform(LI);
747b7cfa6caSSidharth Baveja
748b7cfa6caSSidharth Baveja BasicBlock *Header = L->getHeader();
749b7cfa6caSSidharth Baveja BasicBlock *PreHeader = L->getLoopPreheader();
750b7cfa6caSSidharth Baveja BasicBlock *Latch = L->getLoopLatch();
751b7cfa6caSSidharth Baveja SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
752b7cfa6caSSidharth Baveja L->getExitEdges(ExitEdges);
753b7cfa6caSSidharth Baveja
754d4c74cd4SMax Kazantsev // Remember dominators of blocks we might reach through exits to change them
755d4c74cd4SMax Kazantsev // later. Immediate dominator of such block might change, because we add more
756d4c74cd4SMax Kazantsev // routes which can lead to the exit: we can reach it from the peeled
757d4c74cd4SMax Kazantsev // iterations too.
758d4c74cd4SMax Kazantsev DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
759d4c74cd4SMax Kazantsev for (auto *BB : L->blocks()) {
760bc48a266SAnna Thomas auto *BBDomNode = DT.getNode(BB);
761d4c74cd4SMax Kazantsev SmallVector<BasicBlock *, 16> ChildrenToUpdate;
762d4c74cd4SMax Kazantsev for (auto *ChildDomNode : BBDomNode->children()) {
763d4c74cd4SMax Kazantsev auto *ChildBB = ChildDomNode->getBlock();
764d4c74cd4SMax Kazantsev if (!L->contains(ChildBB))
765d4c74cd4SMax Kazantsev ChildrenToUpdate.push_back(ChildBB);
766d4c74cd4SMax Kazantsev }
767d4c74cd4SMax Kazantsev // The new idom of the block will be the nearest common dominator
768d4c74cd4SMax Kazantsev // of all copies of the previous idom. This is equivalent to the
769d4c74cd4SMax Kazantsev // nearest common dominator of the previous idom and the first latch,
770d4c74cd4SMax Kazantsev // which dominates all copies of the previous idom.
771bc48a266SAnna Thomas BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
772d4c74cd4SMax Kazantsev for (auto *ChildBB : ChildrenToUpdate)
773d4c74cd4SMax Kazantsev NonLoopBlocksIDom[ChildBB] = NewIDom;
774baad10c0SMax Kazantsev }
77594052179SArthur Eubanks
776b7cfa6caSSidharth Baveja Function *F = Header->getParent();
777b7cfa6caSSidharth Baveja
778b7cfa6caSSidharth Baveja // Set up all the necessary basic blocks. It is convenient to split the
779b7cfa6caSSidharth Baveja // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
780b7cfa6caSSidharth Baveja // body, and a new preheader for the "real" loop.
781b7cfa6caSSidharth Baveja
782b7cfa6caSSidharth Baveja // Peeling the first iteration transforms.
783b7cfa6caSSidharth Baveja //
784b7cfa6caSSidharth Baveja // PreHeader:
785b7cfa6caSSidharth Baveja // ...
786b7cfa6caSSidharth Baveja // Header:
787b7cfa6caSSidharth Baveja // LoopBody
788b7cfa6caSSidharth Baveja // If (cond) goto Header
789b7cfa6caSSidharth Baveja // Exit:
790b7cfa6caSSidharth Baveja //
791b7cfa6caSSidharth Baveja // into
792b7cfa6caSSidharth Baveja //
793b7cfa6caSSidharth Baveja // InsertTop:
794b7cfa6caSSidharth Baveja // LoopBody
795b7cfa6caSSidharth Baveja // If (!cond) goto Exit
796b7cfa6caSSidharth Baveja // InsertBot:
797b7cfa6caSSidharth Baveja // NewPreHeader:
798b7cfa6caSSidharth Baveja // ...
799b7cfa6caSSidharth Baveja // Header:
800b7cfa6caSSidharth Baveja // LoopBody
801b7cfa6caSSidharth Baveja // If (cond) goto Header
802b7cfa6caSSidharth Baveja // Exit:
803b7cfa6caSSidharth Baveja //
804b7cfa6caSSidharth Baveja // Each following iteration will split the current bottom anchor in two,
805b7cfa6caSSidharth Baveja // and put the new copy of the loop body between these two blocks. That is,
806b7cfa6caSSidharth Baveja // after peeling another iteration from the example above, we'll split
807b7cfa6caSSidharth Baveja // InsertBot, and get:
808b7cfa6caSSidharth Baveja //
809b7cfa6caSSidharth Baveja // InsertTop:
810b7cfa6caSSidharth Baveja // LoopBody
811b7cfa6caSSidharth Baveja // If (!cond) goto Exit
812b7cfa6caSSidharth Baveja // InsertBot:
813b7cfa6caSSidharth Baveja // LoopBody
814b7cfa6caSSidharth Baveja // If (!cond) goto Exit
815b7cfa6caSSidharth Baveja // InsertBot.next:
816b7cfa6caSSidharth Baveja // NewPreHeader:
817b7cfa6caSSidharth Baveja // ...
818b7cfa6caSSidharth Baveja // Header:
819b7cfa6caSSidharth Baveja // LoopBody
820b7cfa6caSSidharth Baveja // If (cond) goto Header
821b7cfa6caSSidharth Baveja // Exit:
822b7cfa6caSSidharth Baveja
823bc48a266SAnna Thomas BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
824b7cfa6caSSidharth Baveja BasicBlock *InsertBot =
825bc48a266SAnna Thomas SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
826b7cfa6caSSidharth Baveja BasicBlock *NewPreHeader =
827bc48a266SAnna Thomas SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
828b7cfa6caSSidharth Baveja
829b7cfa6caSSidharth Baveja InsertTop->setName(Header->getName() + ".peel.begin");
830b7cfa6caSSidharth Baveja InsertBot->setName(Header->getName() + ".peel.next");
831b7cfa6caSSidharth Baveja NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
832b7cfa6caSSidharth Baveja
833b7cfa6caSSidharth Baveja ValueToValueMapTy LVMap;
834b7cfa6caSSidharth Baveja
835b7cfa6caSSidharth Baveja // If we have branch weight information, we'll want to update it for the
836b7cfa6caSSidharth Baveja // newly created branches.
837b7cfa6caSSidharth Baveja BranchInst *LatchBR =
838b7cfa6caSSidharth Baveja cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
839b7cfa6caSSidharth Baveja uint64_t ExitWeight = 0, FallThroughWeight = 0;
840b7cfa6caSSidharth Baveja initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
841b7cfa6caSSidharth Baveja
84280cdd30eSJeroen Dobbelaere // Identify what noalias metadata is inside the loop: if it is inside the
84380cdd30eSJeroen Dobbelaere // loop, the associated metadata must be cloned for each iteration.
84480cdd30eSJeroen Dobbelaere SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
84580cdd30eSJeroen Dobbelaere identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
84680cdd30eSJeroen Dobbelaere
847b7cfa6caSSidharth Baveja // For each peeled-off iteration, make a copy of the loop.
848b7cfa6caSSidharth Baveja for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
849b7cfa6caSSidharth Baveja SmallVector<BasicBlock *, 8> NewBlocks;
850b7cfa6caSSidharth Baveja ValueToValueMapTy VMap;
851b7cfa6caSSidharth Baveja
852b7cfa6caSSidharth Baveja cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
853bc48a266SAnna Thomas LoopBlocks, VMap, LVMap, &DT, LI,
854cfc741bcSFlorian Hahn LoopLocalNoAliasDeclScopes, *SE);
855b7cfa6caSSidharth Baveja
856b7cfa6caSSidharth Baveja // Remap to use values from the current iteration instead of the
857b7cfa6caSSidharth Baveja // previous one.
858b7cfa6caSSidharth Baveja remapInstructionsInBlocks(NewBlocks, VMap);
859b7cfa6caSSidharth Baveja
860d4c74cd4SMax Kazantsev // Update IDoms of the blocks reachable through exits.
861baad10c0SMax Kazantsev if (Iter == 0)
862d4c74cd4SMax Kazantsev for (auto BBIDom : NonLoopBlocksIDom)
863bc48a266SAnna Thomas DT.changeImmediateDominator(BBIDom.first,
864d4c74cd4SMax Kazantsev cast<BasicBlock>(LVMap[BBIDom.second]));
865baad10c0SMax Kazantsev #ifdef EXPENSIVE_CHECKS
866bc48a266SAnna Thomas assert(DT.verify(DominatorTree::VerificationLevel::Fast));
867baad10c0SMax Kazantsev #endif
868b7cfa6caSSidharth Baveja
869b7cfa6caSSidharth Baveja auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
870b7cfa6caSSidharth Baveja updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
871b7cfa6caSSidharth Baveja // Remove Loop metadata from the latch branch instruction
872b7cfa6caSSidharth Baveja // because it is not the Loop's latch branch anymore.
873b7cfa6caSSidharth Baveja LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
874b7cfa6caSSidharth Baveja
875b7cfa6caSSidharth Baveja InsertTop = InsertBot;
876bc48a266SAnna Thomas InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
877b7cfa6caSSidharth Baveja InsertBot->setName(Header->getName() + ".peel.next");
878b7cfa6caSSidharth Baveja
879b7cfa6caSSidharth Baveja F->getBasicBlockList().splice(InsertTop->getIterator(),
880b7cfa6caSSidharth Baveja F->getBasicBlockList(),
881b7cfa6caSSidharth Baveja NewBlocks[0]->getIterator(), F->end());
882b7cfa6caSSidharth Baveja }
883b7cfa6caSSidharth Baveja
884b7cfa6caSSidharth Baveja // Now adjust the phi nodes in the loop header to get their initial values
885b7cfa6caSSidharth Baveja // from the last peeled-off iteration instead of the preheader.
886b7cfa6caSSidharth Baveja for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
887b7cfa6caSSidharth Baveja PHINode *PHI = cast<PHINode>(I);
888b7cfa6caSSidharth Baveja Value *NewVal = PHI->getIncomingValueForBlock(Latch);
889b7cfa6caSSidharth Baveja Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
890b7cfa6caSSidharth Baveja if (LatchInst && L->contains(LatchInst))
891b7cfa6caSSidharth Baveja NewVal = LVMap[LatchInst];
892b7cfa6caSSidharth Baveja
893b7cfa6caSSidharth Baveja PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
894b7cfa6caSSidharth Baveja }
895b7cfa6caSSidharth Baveja
896b7cfa6caSSidharth Baveja fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
897b7cfa6caSSidharth Baveja
898b7cfa6caSSidharth Baveja // Update Metadata for count of peeled off iterations.
899b7cfa6caSSidharth Baveja unsigned AlreadyPeeled = 0;
900b7cfa6caSSidharth Baveja if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
901b7cfa6caSSidharth Baveja AlreadyPeeled = *Peeled;
902b7cfa6caSSidharth Baveja addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
903b7cfa6caSSidharth Baveja
904b7cfa6caSSidharth Baveja if (Loop *ParentLoop = L->getParentLoop())
905b7cfa6caSSidharth Baveja L = ParentLoop;
906b7cfa6caSSidharth Baveja
907b7cfa6caSSidharth Baveja // We modified the loop, update SE.
908b7cfa6caSSidharth Baveja SE->forgetTopmostLoop(L);
909b7cfa6caSSidharth Baveja
9102f6c1481SStephen Long #ifdef EXPENSIVE_CHECKS
911b7cfa6caSSidharth Baveja // Finally DomtTree must be correct.
912bc48a266SAnna Thomas assert(DT.verify(DominatorTree::VerificationLevel::Fast));
9132f6c1481SStephen Long #endif
914b7cfa6caSSidharth Baveja
915b7cfa6caSSidharth Baveja // FIXME: Incrementally update loop-simplify
916bc48a266SAnna Thomas simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
917b7cfa6caSSidharth Baveja
918b7cfa6caSSidharth Baveja NumPeeled++;
919b7cfa6caSSidharth Baveja
920b7cfa6caSSidharth Baveja return true;
921b7cfa6caSSidharth Baveja }
922