1963401d2SDavid Green //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
2963401d2SDavid Green //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6963401d2SDavid Green //
7963401d2SDavid Green //===----------------------------------------------------------------------===//
8963401d2SDavid Green //
9963401d2SDavid Green // This file implements loop unroll and jam as a routine, much like
10963401d2SDavid Green // LoopUnroll.cpp implements loop unroll.
11963401d2SDavid Green //
12963401d2SDavid Green //===----------------------------------------------------------------------===//
13963401d2SDavid Green
14a5b6480dSAnh Tuyen Tran #include "llvm/ADT/ArrayRef.h"
15a5b6480dSAnh Tuyen Tran #include "llvm/ADT/DenseMap.h"
16a5b6480dSAnh Tuyen Tran #include "llvm/ADT/Optional.h"
17a5b6480dSAnh Tuyen Tran #include "llvm/ADT/STLExtras.h"
18963401d2SDavid Green #include "llvm/ADT/SmallPtrSet.h"
19a5b6480dSAnh Tuyen Tran #include "llvm/ADT/SmallVector.h"
20963401d2SDavid Green #include "llvm/ADT/Statistic.h"
21a5b6480dSAnh Tuyen Tran #include "llvm/ADT/StringRef.h"
22a5b6480dSAnh Tuyen Tran #include "llvm/ADT/Twine.h"
23a5b6480dSAnh Tuyen Tran #include "llvm/ADT/iterator_range.h"
24963401d2SDavid Green #include "llvm/Analysis/AssumptionCache.h"
25963401d2SDavid Green #include "llvm/Analysis/DependenceAnalysis.h"
26a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/DomTreeUpdater.h"
27a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/LoopInfo.h"
28963401d2SDavid Green #include "llvm/Analysis/LoopIterator.h"
29a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/MustExecute.h"
30963401d2SDavid Green #include "llvm/Analysis/OptimizationRemarkEmitter.h"
31963401d2SDavid Green #include "llvm/Analysis/ScalarEvolution.h"
32963401d2SDavid Green #include "llvm/IR/BasicBlock.h"
33963401d2SDavid Green #include "llvm/IR/DebugInfoMetadata.h"
34a5b6480dSAnh Tuyen Tran #include "llvm/IR/DebugLoc.h"
35a5b6480dSAnh Tuyen Tran #include "llvm/IR/DiagnosticInfo.h"
36963401d2SDavid Green #include "llvm/IR/Dominators.h"
37a5b6480dSAnh Tuyen Tran #include "llvm/IR/Function.h"
38a5b6480dSAnh Tuyen Tran #include "llvm/IR/Instruction.h"
39a5b6480dSAnh Tuyen Tran #include "llvm/IR/Instructions.h"
40963401d2SDavid Green #include "llvm/IR/IntrinsicInst.h"
41a5b6480dSAnh Tuyen Tran #include "llvm/IR/User.h"
42a5b6480dSAnh Tuyen Tran #include "llvm/IR/Value.h"
43a5b6480dSAnh Tuyen Tran #include "llvm/IR/ValueHandle.h"
44a5b6480dSAnh Tuyen Tran #include "llvm/IR/ValueMap.h"
45a5b6480dSAnh Tuyen Tran #include "llvm/Support/Casting.h"
46963401d2SDavid Green #include "llvm/Support/Debug.h"
47a5b6480dSAnh Tuyen Tran #include "llvm/Support/ErrorHandling.h"
48a5b6480dSAnh Tuyen Tran #include "llvm/Support/GenericDomTree.h"
49963401d2SDavid Green #include "llvm/Support/raw_ostream.h"
50963401d2SDavid Green #include "llvm/Transforms/Utils/BasicBlockUtils.h"
51963401d2SDavid Green #include "llvm/Transforms/Utils/Cloning.h"
52963401d2SDavid Green #include "llvm/Transforms/Utils/LoopUtils.h"
53963401d2SDavid Green #include "llvm/Transforms/Utils/UnrollLoop.h"
54a5b6480dSAnh Tuyen Tran #include "llvm/Transforms/Utils/ValueMapper.h"
55a5b6480dSAnh Tuyen Tran #include <assert.h>
56a5b6480dSAnh Tuyen Tran #include <memory>
57a5b6480dSAnh Tuyen Tran #include <type_traits>
58a5b6480dSAnh Tuyen Tran #include <vector>
59a5b6480dSAnh Tuyen Tran
60963401d2SDavid Green using namespace llvm;
61963401d2SDavid Green
62963401d2SDavid Green #define DEBUG_TYPE "loop-unroll-and-jam"
63963401d2SDavid Green
64963401d2SDavid Green STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
65963401d2SDavid Green STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
66963401d2SDavid Green
672557e437SDavid Green typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
68963401d2SDavid Green
69963401d2SDavid Green // Partition blocks in an outer/inner loop pair into blocks before and after
70963401d2SDavid Green // the loop
partitionLoopBlocks(Loop & L,BasicBlockSet & ForeBlocks,BasicBlockSet & AftBlocks,DominatorTree & DT)710a52401aSWhitney Tsang static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks,
720a52401aSWhitney Tsang BasicBlockSet &AftBlocks, DominatorTree &DT) {
730a52401aSWhitney Tsang Loop *SubLoop = L.getSubLoops()[0];
74963401d2SDavid Green BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
75963401d2SDavid Green
760a52401aSWhitney Tsang for (BasicBlock *BB : L.blocks()) {
77963401d2SDavid Green if (!SubLoop->contains(BB)) {
780a52401aSWhitney Tsang if (DT.dominates(SubLoopLatch, BB))
792557e437SDavid Green AftBlocks.insert(BB);
80963401d2SDavid Green else
812557e437SDavid Green ForeBlocks.insert(BB);
82963401d2SDavid Green }
83963401d2SDavid Green }
84963401d2SDavid Green
85963401d2SDavid Green // Check that all blocks in ForeBlocks together dominate the subloop
86963401d2SDavid Green // TODO: This might ideally be done better with a dominator/postdominators.
87963401d2SDavid Green BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
88963401d2SDavid Green for (BasicBlock *BB : ForeBlocks) {
89963401d2SDavid Green if (BB == SubLoopPreHeader)
90963401d2SDavid Green continue;
91edb12a83SChandler Carruth Instruction *TI = BB->getTerminator();
920a52401aSWhitney Tsang for (BasicBlock *Succ : successors(TI))
930a52401aSWhitney Tsang if (!ForeBlocks.count(Succ))
94963401d2SDavid Green return false;
95963401d2SDavid Green }
96963401d2SDavid Green
97963401d2SDavid Green return true;
98963401d2SDavid Green }
99963401d2SDavid Green
1000a52401aSWhitney Tsang /// Partition blocks in a loop nest into blocks before and after each inner
1010a52401aSWhitney Tsang /// loop.
partitionOuterLoopBlocks(Loop & Root,Loop & JamLoop,BasicBlockSet & JamLoopBlocks,DenseMap<Loop *,BasicBlockSet> & ForeBlocksMap,DenseMap<Loop *,BasicBlockSet> & AftBlocksMap,DominatorTree & DT)1020a52401aSWhitney Tsang static bool partitionOuterLoopBlocks(
1030a52401aSWhitney Tsang Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks,
1040a52401aSWhitney Tsang DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
1050a52401aSWhitney Tsang DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) {
1060a52401aSWhitney Tsang JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end());
1070a52401aSWhitney Tsang
1080a52401aSWhitney Tsang for (Loop *L : Root.getLoopsInPreorder()) {
1090a52401aSWhitney Tsang if (L == &JamLoop)
1100a52401aSWhitney Tsang break;
1110a52401aSWhitney Tsang
1120a52401aSWhitney Tsang if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT))
1130a52401aSWhitney Tsang return false;
1140a52401aSWhitney Tsang }
1150a52401aSWhitney Tsang
1160a52401aSWhitney Tsang return true;
1170a52401aSWhitney Tsang }
1180a52401aSWhitney Tsang
1190a52401aSWhitney Tsang // TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more
1200a52401aSWhitney Tsang // than 2 levels loop.
partitionOuterLoopBlocks(Loop * L,Loop * SubLoop,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DominatorTree * DT)1210a52401aSWhitney Tsang static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
1220a52401aSWhitney Tsang BasicBlockSet &ForeBlocks,
1230a52401aSWhitney Tsang BasicBlockSet &SubLoopBlocks,
1240a52401aSWhitney Tsang BasicBlockSet &AftBlocks,
1250a52401aSWhitney Tsang DominatorTree *DT) {
1260a52401aSWhitney Tsang SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
1270a52401aSWhitney Tsang return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT);
1280a52401aSWhitney Tsang }
1290a52401aSWhitney Tsang
130eda3c9efSDavid Green // Looks at the phi nodes in Header for values coming from Latch. For these
131eda3c9efSDavid Green // instructions and all their operands calls Visit on them, keeping going for
132eda3c9efSDavid Green // all the operands in AftBlocks. Returns false if Visit returns false,
133eda3c9efSDavid Green // otherwise returns true. This is used to process the instructions in the
134eda3c9efSDavid Green // Aft blocks that need to be moved before the subloop. It is used in two
135eda3c9efSDavid Green // places. One to check that the required set of instructions can be moved
136eda3c9efSDavid Green // before the loop. Then to collect the instructions to actually move in
137eda3c9efSDavid Green // moveHeaderPhiOperandsToForeBlocks.
138eda3c9efSDavid Green template <typename T>
processHeaderPhiOperands(BasicBlock * Header,BasicBlock * Latch,BasicBlockSet & AftBlocks,T Visit)139eda3c9efSDavid Green static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
140eda3c9efSDavid Green BasicBlockSet &AftBlocks, T Visit) {
141eda3c9efSDavid Green SmallVector<Instruction *, 8> Worklist;
14255487079SDanilo C. Grael SmallPtrSet<Instruction *, 8> VisitedInstr;
143963401d2SDavid Green for (auto &Phi : Header->phis()) {
144963401d2SDavid Green Value *V = Phi.getIncomingValueForBlock(Latch);
145963401d2SDavid Green if (Instruction *I = dyn_cast<Instruction>(V))
146963401d2SDavid Green Worklist.push_back(I);
147963401d2SDavid Green }
148963401d2SDavid Green
149963401d2SDavid Green while (!Worklist.empty()) {
1501238378fSKazu Hirata Instruction *I = Worklist.pop_back_val();
151eda3c9efSDavid Green if (!Visit(I))
152eda3c9efSDavid Green return false;
15355487079SDanilo C. Grael VisitedInstr.insert(I);
154963401d2SDavid Green
155eda3c9efSDavid Green if (AftBlocks.count(I->getParent()))
156963401d2SDavid Green for (auto &U : I->operands())
157963401d2SDavid Green if (Instruction *II = dyn_cast<Instruction>(U))
15855487079SDanilo C. Grael if (!VisitedInstr.count(II))
159963401d2SDavid Green Worklist.push_back(II);
160963401d2SDavid Green }
161963401d2SDavid Green
162eda3c9efSDavid Green return true;
163eda3c9efSDavid Green }
164eda3c9efSDavid Green
165eda3c9efSDavid Green // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
moveHeaderPhiOperandsToForeBlocks(BasicBlock * Header,BasicBlock * Latch,Instruction * InsertLoc,BasicBlockSet & AftBlocks)166eda3c9efSDavid Green static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
167eda3c9efSDavid Green BasicBlock *Latch,
168eda3c9efSDavid Green Instruction *InsertLoc,
169eda3c9efSDavid Green BasicBlockSet &AftBlocks) {
170eda3c9efSDavid Green // We need to ensure we move the instructions in the correct order,
171eda3c9efSDavid Green // starting with the earliest required instruction and moving forward.
172eda3c9efSDavid Green std::vector<Instruction *> Visited;
173eda3c9efSDavid Green processHeaderPhiOperands(Header, Latch, AftBlocks,
174eda3c9efSDavid Green [&Visited, &AftBlocks](Instruction *I) {
175eda3c9efSDavid Green if (AftBlocks.count(I->getParent()))
176eda3c9efSDavid Green Visited.push_back(I);
177eda3c9efSDavid Green return true;
178eda3c9efSDavid Green });
179eda3c9efSDavid Green
180963401d2SDavid Green // Move all instructions in program order to before the InsertLoc
181963401d2SDavid Green BasicBlock *InsertLocBB = InsertLoc->getParent();
182963401d2SDavid Green for (Instruction *I : reverse(Visited)) {
183963401d2SDavid Green if (I->getParent() != InsertLocBB)
184963401d2SDavid Green I->moveBefore(InsertLoc);
185963401d2SDavid Green }
186963401d2SDavid Green }
187963401d2SDavid Green
188963401d2SDavid Green /*
189963401d2SDavid Green This method performs Unroll and Jam. For a simple loop like:
190963401d2SDavid Green for (i = ..)
191963401d2SDavid Green Fore(i)
192963401d2SDavid Green for (j = ..)
193963401d2SDavid Green SubLoop(i, j)
194963401d2SDavid Green Aft(i)
195963401d2SDavid Green
196963401d2SDavid Green Instead of doing normal inner or outer unrolling, we do:
197963401d2SDavid Green for (i = .., i+=2)
198963401d2SDavid Green Fore(i)
199963401d2SDavid Green Fore(i+1)
200963401d2SDavid Green for (j = ..)
201963401d2SDavid Green SubLoop(i, j)
202963401d2SDavid Green SubLoop(i+1, j)
203963401d2SDavid Green Aft(i)
204963401d2SDavid Green Aft(i+1)
205963401d2SDavid Green
206963401d2SDavid Green So the outer loop is essetially unrolled and then the inner loops are fused
207963401d2SDavid Green ("jammed") together into a single loop. This can increase speed when there
208963401d2SDavid Green are loads in SubLoop that are invariant to i, as they become shared between
209963401d2SDavid Green the now jammed inner loops.
210963401d2SDavid Green
211963401d2SDavid Green We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
212963401d2SDavid Green Fore blocks are those before the inner loop, Aft are those after. Normal
213963401d2SDavid Green Unroll code is used to copy each of these sets of blocks and the results are
214963401d2SDavid Green combined together into the final form above.
215963401d2SDavid Green
216963401d2SDavid Green isSafeToUnrollAndJam should be used prior to calling this to make sure the
217963401d2SDavid Green unrolling will be valid. Checking profitablility is also advisable.
21872448525SMichael Kruse
21972448525SMichael Kruse If EpilogueLoop is non-null, it receives the epilogue loop (if it was
22072448525SMichael Kruse necessary to create one and not fully unrolled).
221963401d2SDavid Green */
222e77d24f7Smaekawatoshiki LoopUnrollResult
UnrollAndJamLoop(Loop * L,unsigned Count,unsigned TripCount,unsigned TripMultiple,bool UnrollRemainder,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE,Loop ** EpilogueLoop)223e77d24f7Smaekawatoshiki llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
224e77d24f7Smaekawatoshiki unsigned TripMultiple, bool UnrollRemainder,
225e77d24f7Smaekawatoshiki LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
2260789f280SRoman Lebedev AssumptionCache *AC, const TargetTransformInfo *TTI,
227e77d24f7Smaekawatoshiki OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
228963401d2SDavid Green
229963401d2SDavid Green // When we enter here we should have already checked that it is safe
230963401d2SDavid Green BasicBlock *Header = L->getHeader();
23160cb193aSDávid Bolvanský assert(Header && "No header.");
232963401d2SDavid Green assert(L->getSubLoops().size() == 1);
233963401d2SDavid Green Loop *SubLoop = *L->begin();
234963401d2SDavid Green
235963401d2SDavid Green // Don't enter the unroll code if there is nothing to do.
236963401d2SDavid Green if (TripCount == 0 && Count < 2) {
237bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
238963401d2SDavid Green return LoopUnrollResult::Unmodified;
239963401d2SDavid Green }
240963401d2SDavid Green
241963401d2SDavid Green assert(Count > 0);
242963401d2SDavid Green assert(TripMultiple > 0);
243963401d2SDavid Green assert(TripCount == 0 || TripCount % TripMultiple == 0);
244963401d2SDavid Green
245963401d2SDavid Green // Are we eliminating the loop control altogether?
246963401d2SDavid Green bool CompletelyUnroll = (Count == TripCount);
247963401d2SDavid Green
248963401d2SDavid Green // We use the runtime remainder in cases where we don't know trip multiple
2491bd4085eSNikita Popov if (TripMultiple % Count != 0) {
250963401d2SDavid Green if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
251963401d2SDavid Green /*UseEpilogRemainder*/ true,
2522312a06cSAlina Sbirlea UnrollRemainder, /*ForgetAllSCEV*/ false,
2530789f280SRoman Lebedev LI, SE, DT, AC, TTI, true, EpilogueLoop)) {
254963401d2SDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
255963401d2SDavid Green "generated when assuming runtime trip count\n");
256963401d2SDavid Green return LoopUnrollResult::Unmodified;
257963401d2SDavid Green }
258963401d2SDavid Green }
259963401d2SDavid Green
260963401d2SDavid Green // Notify ScalarEvolution that the loop will be substantially changed,
261963401d2SDavid Green // if not outright eliminated.
262963401d2SDavid Green if (SE) {
263963401d2SDavid Green SE->forgetLoop(L);
264963401d2SDavid Green SE->forgetLoop(SubLoop);
265963401d2SDavid Green }
266963401d2SDavid Green
267963401d2SDavid Green using namespace ore;
268963401d2SDavid Green // Report the unrolling decision.
269963401d2SDavid Green if (CompletelyUnroll) {
270963401d2SDavid Green LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
271963401d2SDavid Green << Header->getName() << " with trip count " << TripCount
272963401d2SDavid Green << "!\n");
273963401d2SDavid Green ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
274963401d2SDavid Green L->getHeader())
275963401d2SDavid Green << "completely unroll and jammed loop with "
276963401d2SDavid Green << NV("UnrollCount", TripCount) << " iterations");
277963401d2SDavid Green } else {
278963401d2SDavid Green auto DiagBuilder = [&]() {
279963401d2SDavid Green OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
280963401d2SDavid Green L->getHeader());
281963401d2SDavid Green return Diag << "unroll and jammed loop by a factor of "
282963401d2SDavid Green << NV("UnrollCount", Count);
283963401d2SDavid Green };
284963401d2SDavid Green
285963401d2SDavid Green LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
286963401d2SDavid Green << " by " << Count);
287963401d2SDavid Green if (TripMultiple != 1) {
288963401d2SDavid Green LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
289963401d2SDavid Green ORE->emit([&]() {
290963401d2SDavid Green return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
291963401d2SDavid Green << " trips per branch";
292963401d2SDavid Green });
293963401d2SDavid Green } else {
294963401d2SDavid Green LLVM_DEBUG(dbgs() << " with run-time trip count");
295963401d2SDavid Green ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
296963401d2SDavid Green }
297963401d2SDavid Green LLVM_DEBUG(dbgs() << "!\n");
298963401d2SDavid Green }
299963401d2SDavid Green
300963401d2SDavid Green BasicBlock *Preheader = L->getLoopPreheader();
301963401d2SDavid Green BasicBlock *LatchBlock = L->getLoopLatch();
30260cb193aSDávid Bolvanský assert(Preheader && "No preheader");
30360cb193aSDávid Bolvanský assert(LatchBlock && "No latch block");
304963401d2SDavid Green BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
305963401d2SDavid Green assert(BI && !BI->isUnconditional());
306963401d2SDavid Green bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
307963401d2SDavid Green BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
308963401d2SDavid Green bool SubLoopContinueOnTrue = SubLoop->contains(
309963401d2SDavid Green SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
310963401d2SDavid Green
311963401d2SDavid Green // Partition blocks in an outer/inner loop pair into blocks before and after
312963401d2SDavid Green // the loop
3132557e437SDavid Green BasicBlockSet SubLoopBlocks;
3142557e437SDavid Green BasicBlockSet ForeBlocks;
3152557e437SDavid Green BasicBlockSet AftBlocks;
316963401d2SDavid Green partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
317963401d2SDavid Green DT);
318963401d2SDavid Green
319963401d2SDavid Green // We keep track of the entering/first and exiting/last block of each of
320963401d2SDavid Green // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
321963401d2SDavid Green // blocks easier.
322963401d2SDavid Green std::vector<BasicBlock *> ForeBlocksFirst;
323963401d2SDavid Green std::vector<BasicBlock *> ForeBlocksLast;
324963401d2SDavid Green std::vector<BasicBlock *> SubLoopBlocksFirst;
325963401d2SDavid Green std::vector<BasicBlock *> SubLoopBlocksLast;
326963401d2SDavid Green std::vector<BasicBlock *> AftBlocksFirst;
327963401d2SDavid Green std::vector<BasicBlock *> AftBlocksLast;
328963401d2SDavid Green ForeBlocksFirst.push_back(Header);
329963401d2SDavid Green ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
330963401d2SDavid Green SubLoopBlocksFirst.push_back(SubLoop->getHeader());
331963401d2SDavid Green SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
332963401d2SDavid Green AftBlocksFirst.push_back(SubLoop->getExitBlock());
333963401d2SDavid Green AftBlocksLast.push_back(L->getExitingBlock());
334963401d2SDavid Green // Maps Blocks[0] -> Blocks[It]
335963401d2SDavid Green ValueToValueMapTy LastValueMap;
336963401d2SDavid Green
337963401d2SDavid Green // Move any instructions from fore phi operands from AftBlocks into Fore.
338963401d2SDavid Green moveHeaderPhiOperandsToForeBlocks(
339cd0cff43SWhitney Tsang Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
340963401d2SDavid Green
341963401d2SDavid Green // The current on-the-fly SSA update requires blocks to be processed in
342963401d2SDavid Green // reverse postorder so that LastValueMap contains the correct value at each
343963401d2SDavid Green // exit.
344963401d2SDavid Green LoopBlocksDFS DFS(L);
345963401d2SDavid Green DFS.perform(LI);
346963401d2SDavid Green // Stash the DFS iterators before adding blocks to the loop.
347963401d2SDavid Green LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
348963401d2SDavid Green LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
349963401d2SDavid Green
350886629a8SRong Xu // When a FSDiscriminator is enabled, we don't need to add the multiply
351886629a8SRong Xu // factors to the discriminators.
352886629a8SRong Xu if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
353963401d2SDavid Green for (BasicBlock *BB : L->getBlocks())
354963401d2SDavid Green for (Instruction &I : *BB)
355963401d2SDavid Green if (!isa<DbgInfoIntrinsic>(&I))
356b53eeb6fSMircea Trofin if (const DILocation *DIL = I.getDebugLoc()) {
357ec026302SMircea Trofin auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
358b53eeb6fSMircea Trofin if (NewDIL)
359*7a47ee51SKazu Hirata I.setDebugLoc(*NewDIL);
360b53eeb6fSMircea Trofin else
361b53eeb6fSMircea Trofin LLVM_DEBUG(dbgs()
362b53eeb6fSMircea Trofin << "Failed to create new discriminator: "
363b53eeb6fSMircea Trofin << DIL->getFilename() << " Line: " << DIL->getLine());
364b53eeb6fSMircea Trofin }
365963401d2SDavid Green
366963401d2SDavid Green // Copy all blocks
367963401d2SDavid Green for (unsigned It = 1; It != Count; ++It) {
3682b335e9aSWhitney Tsang SmallVector<BasicBlock *, 8> NewBlocks;
369963401d2SDavid Green // Maps Blocks[It] -> Blocks[It-1]
370963401d2SDavid Green DenseMap<Value *, Value *> PrevItValueMap;
37170d4a202SDavid Green SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
37270d4a202SDavid Green NewLoops[L] = L;
37370d4a202SDavid Green NewLoops[SubLoop] = SubLoop;
374963401d2SDavid Green
375963401d2SDavid Green for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
376963401d2SDavid Green ValueToValueMapTy VMap;
377963401d2SDavid Green BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
378963401d2SDavid Green Header->getParent()->getBasicBlockList().push_back(New);
379963401d2SDavid Green
38070d4a202SDavid Green // Tell LI about New.
38170d4a202SDavid Green addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
382963401d2SDavid Green
38370d4a202SDavid Green if (ForeBlocks.count(*BB)) {
384963401d2SDavid Green if (*BB == ForeBlocksFirst[0])
385963401d2SDavid Green ForeBlocksFirst.push_back(New);
386963401d2SDavid Green if (*BB == ForeBlocksLast[0])
387963401d2SDavid Green ForeBlocksLast.push_back(New);
3882557e437SDavid Green } else if (SubLoopBlocks.count(*BB)) {
389963401d2SDavid Green if (*BB == SubLoopBlocksFirst[0])
390963401d2SDavid Green SubLoopBlocksFirst.push_back(New);
391963401d2SDavid Green if (*BB == SubLoopBlocksLast[0])
392963401d2SDavid Green SubLoopBlocksLast.push_back(New);
3932557e437SDavid Green } else if (AftBlocks.count(*BB)) {
394963401d2SDavid Green if (*BB == AftBlocksFirst[0])
395963401d2SDavid Green AftBlocksFirst.push_back(New);
396963401d2SDavid Green if (*BB == AftBlocksLast[0])
397963401d2SDavid Green AftBlocksLast.push_back(New);
398963401d2SDavid Green } else {
399963401d2SDavid Green llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
400963401d2SDavid Green }
401963401d2SDavid Green
402963401d2SDavid Green // Update our running maps of newest clones
403963401d2SDavid Green PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
404963401d2SDavid Green LastValueMap[*BB] = New;
405963401d2SDavid Green for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
406963401d2SDavid Green VI != VE; ++VI) {
407963401d2SDavid Green PrevItValueMap[VI->second] =
408963401d2SDavid Green const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
409963401d2SDavid Green LastValueMap[VI->first] = VI->second;
410963401d2SDavid Green }
411963401d2SDavid Green
412963401d2SDavid Green NewBlocks.push_back(New);
413963401d2SDavid Green
414963401d2SDavid Green // Update DomTree:
415963401d2SDavid Green if (*BB == ForeBlocksFirst[0])
416963401d2SDavid Green DT->addNewBlock(New, ForeBlocksLast[It - 1]);
417963401d2SDavid Green else if (*BB == SubLoopBlocksFirst[0])
418963401d2SDavid Green DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
419963401d2SDavid Green else if (*BB == AftBlocksFirst[0])
420963401d2SDavid Green DT->addNewBlock(New, AftBlocksLast[It - 1]);
421963401d2SDavid Green else {
422963401d2SDavid Green // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
423963401d2SDavid Green // structure.
424963401d2SDavid Green auto BBDomNode = DT->getNode(*BB);
425963401d2SDavid Green auto BBIDom = BBDomNode->getIDom();
426963401d2SDavid Green BasicBlock *OriginalBBIDom = BBIDom->getBlock();
427963401d2SDavid Green assert(OriginalBBIDom);
428963401d2SDavid Green assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
429963401d2SDavid Green DT->addNewBlock(
430963401d2SDavid Green New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
431963401d2SDavid Green }
432963401d2SDavid Green }
433963401d2SDavid Green
434963401d2SDavid Green // Remap all instructions in the most recent iteration
4352b335e9aSWhitney Tsang remapInstructionsInBlocks(NewBlocks, LastValueMap);
436963401d2SDavid Green for (BasicBlock *NewBlock : NewBlocks) {
437963401d2SDavid Green for (Instruction &I : *NewBlock) {
438a6d2a8d6SPhilip Reames if (auto *II = dyn_cast<AssumeInst>(&I))
439963401d2SDavid Green AC->registerAssumption(II);
440963401d2SDavid Green }
441963401d2SDavid Green }
442963401d2SDavid Green
443963401d2SDavid Green // Alter the ForeBlocks phi's, pointing them at the latest version of the
444963401d2SDavid Green // value from the previous iteration's phis
445963401d2SDavid Green for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
446963401d2SDavid Green Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
447963401d2SDavid Green assert(OldValue && "should have incoming edge from Aft[It]");
448963401d2SDavid Green Value *NewValue = OldValue;
449963401d2SDavid Green if (Value *PrevValue = PrevItValueMap[OldValue])
450963401d2SDavid Green NewValue = PrevValue;
451963401d2SDavid Green
452963401d2SDavid Green assert(Phi.getNumOperands() == 2);
453963401d2SDavid Green Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
454963401d2SDavid Green Phi.setIncomingValue(0, NewValue);
455963401d2SDavid Green Phi.removeIncomingValue(1);
456963401d2SDavid Green }
457963401d2SDavid Green }
458963401d2SDavid Green
459963401d2SDavid Green // Now that all the basic blocks for the unrolled iterations are in place,
460963401d2SDavid Green // finish up connecting the blocks and phi nodes. At this point LastValueMap
461963401d2SDavid Green // is the last unrolled iterations values.
462963401d2SDavid Green
463963401d2SDavid Green // Update Phis in BB from OldBB to point to NewBB and use the latest value
464963401d2SDavid Green // from LastValueMap
465963401d2SDavid Green auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
466963401d2SDavid Green BasicBlock *NewBB,
467963401d2SDavid Green ValueToValueMapTy &LastValueMap) {
468963401d2SDavid Green for (PHINode &Phi : BB->phis()) {
469963401d2SDavid Green for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
470963401d2SDavid Green if (Phi.getIncomingBlock(b) == OldBB) {
471963401d2SDavid Green Value *OldValue = Phi.getIncomingValue(b);
472963401d2SDavid Green if (Value *LastValue = LastValueMap[OldValue])
473963401d2SDavid Green Phi.setIncomingValue(b, LastValue);
474963401d2SDavid Green Phi.setIncomingBlock(b, NewBB);
475963401d2SDavid Green break;
476963401d2SDavid Green }
477963401d2SDavid Green }
478963401d2SDavid Green }
479963401d2SDavid Green };
480963401d2SDavid Green // Move all the phis from Src into Dest
481963401d2SDavid Green auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
482963401d2SDavid Green Instruction *insertPoint = Dest->getFirstNonPHI();
483963401d2SDavid Green while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
484963401d2SDavid Green Phi->moveBefore(insertPoint);
485963401d2SDavid Green };
486963401d2SDavid Green
487963401d2SDavid Green // Update the PHI values outside the loop to point to the last block
488963401d2SDavid Green updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
489963401d2SDavid Green LastValueMap);
490963401d2SDavid Green
491963401d2SDavid Green // Update ForeBlocks successors and phi nodes
492963401d2SDavid Green BranchInst *ForeTerm =
493963401d2SDavid Green cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
494cd0cff43SWhitney Tsang assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
495cd0cff43SWhitney Tsang ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]);
496963401d2SDavid Green
497963401d2SDavid Green if (CompletelyUnroll) {
498963401d2SDavid Green while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
499963401d2SDavid Green Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
500963401d2SDavid Green Phi->getParent()->getInstList().erase(Phi);
501963401d2SDavid Green }
502963401d2SDavid Green } else {
503963401d2SDavid Green // Update the PHI values to point to the last aft block
504963401d2SDavid Green updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
505963401d2SDavid Green AftBlocksLast.back(), LastValueMap);
506963401d2SDavid Green }
507963401d2SDavid Green
508963401d2SDavid Green for (unsigned It = 1; It != Count; It++) {
509963401d2SDavid Green // Remap ForeBlock successors from previous iteration to this
510963401d2SDavid Green BranchInst *ForeTerm =
511963401d2SDavid Green cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
512cd0cff43SWhitney Tsang assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
513cd0cff43SWhitney Tsang ForeTerm->setSuccessor(0, ForeBlocksFirst[It]);
514963401d2SDavid Green }
515963401d2SDavid Green
516963401d2SDavid Green // Subloop successors and phis
517963401d2SDavid Green BranchInst *SubTerm =
518963401d2SDavid Green cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
519963401d2SDavid Green SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
520963401d2SDavid Green SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
521aa994d98SWhitney Tsang SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
522963401d2SDavid Green ForeBlocksLast.back());
523aa994d98SWhitney Tsang SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
524963401d2SDavid Green SubLoopBlocksLast.back());
525963401d2SDavid Green
526963401d2SDavid Green for (unsigned It = 1; It != Count; It++) {
527963401d2SDavid Green // Replace the conditional branch of the previous iteration subloop with an
528963401d2SDavid Green // unconditional one to this one
529963401d2SDavid Green BranchInst *SubTerm =
530963401d2SDavid Green cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
531963401d2SDavid Green BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
532963401d2SDavid Green SubTerm->eraseFromParent();
533963401d2SDavid Green
534aa994d98SWhitney Tsang SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
535963401d2SDavid Green ForeBlocksLast.back());
536aa994d98SWhitney Tsang SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
537963401d2SDavid Green SubLoopBlocksLast.back());
538963401d2SDavid Green movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
539963401d2SDavid Green }
540963401d2SDavid Green
541963401d2SDavid Green // Aft blocks successors and phis
542cd0cff43SWhitney Tsang BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
543963401d2SDavid Green if (CompletelyUnroll) {
544cd0cff43SWhitney Tsang BranchInst::Create(LoopExit, AftTerm);
545cd0cff43SWhitney Tsang AftTerm->eraseFromParent();
546963401d2SDavid Green } else {
547cd0cff43SWhitney Tsang AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
548cd0cff43SWhitney Tsang assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
549cd0cff43SWhitney Tsang "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
550963401d2SDavid Green }
551aa994d98SWhitney Tsang AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
552963401d2SDavid Green SubLoopBlocksLast.back());
553963401d2SDavid Green
554963401d2SDavid Green for (unsigned It = 1; It != Count; It++) {
555963401d2SDavid Green // Replace the conditional branch of the previous iteration subloop with an
556963401d2SDavid Green // unconditional one to this one
557963401d2SDavid Green BranchInst *AftTerm =
558963401d2SDavid Green cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
559963401d2SDavid Green BranchInst::Create(AftBlocksFirst[It], AftTerm);
560963401d2SDavid Green AftTerm->eraseFromParent();
561963401d2SDavid Green
562aa994d98SWhitney Tsang AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
563963401d2SDavid Green SubLoopBlocksLast.back());
564963401d2SDavid Green movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
565963401d2SDavid Green }
566963401d2SDavid Green
567f9cdb98fSFlorian Hahn DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
568963401d2SDavid Green // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
569963401d2SDavid Green // new ones required.
570963401d2SDavid Green if (Count != 1) {
571963401d2SDavid Green SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
572963401d2SDavid Green DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
573963401d2SDavid Green SubLoopBlocksFirst[0]);
574963401d2SDavid Green DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
575963401d2SDavid Green SubLoopBlocksLast[0], AftBlocksFirst[0]);
576963401d2SDavid Green
577963401d2SDavid Green DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
578963401d2SDavid Green ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
579963401d2SDavid Green DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
580963401d2SDavid Green SubLoopBlocksLast.back(), AftBlocksFirst[0]);
581f9cdb98fSFlorian Hahn DTU.applyUpdatesPermissive(DTUpdates);
582963401d2SDavid Green }
583963401d2SDavid Green
584963401d2SDavid Green // Merge adjacent basic blocks, if possible.
585963401d2SDavid Green SmallPtrSet<BasicBlock *, 16> MergeBlocks;
586963401d2SDavid Green MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
587963401d2SDavid Green MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
588963401d2SDavid Green MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
58915b6730fSSidharth Baveja
59015b6730fSSidharth Baveja MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI);
59115b6730fSSidharth Baveja
592f9cdb98fSFlorian Hahn // Apply updates to the DomTree.
593f9cdb98fSFlorian Hahn DT = &DTU.getDomTree();
594963401d2SDavid Green
595963401d2SDavid Green // At this point, the code is well formed. We now do a quick sweep over the
596963401d2SDavid Green // inserted code, doing constant propagation and dead code elimination as we
597963401d2SDavid Green // go.
5980789f280SRoman Lebedev simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI);
5990789f280SRoman Lebedev simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC,
6000789f280SRoman Lebedev TTI);
601963401d2SDavid Green
602963401d2SDavid Green NumCompletelyUnrolledAndJammed += CompletelyUnroll;
603963401d2SDavid Green ++NumUnrolledAndJammed;
604963401d2SDavid Green
605cd0cff43SWhitney Tsang // Update LoopInfo if the loop is completely removed.
606e77d24f7Smaekawatoshiki if (CompletelyUnroll)
607cd0cff43SWhitney Tsang LI->erase(L);
608cd0cff43SWhitney Tsang
609963401d2SDavid Green #ifndef NDEBUG
610963401d2SDavid Green // We shouldn't have done anything to break loop simplify form or LCSSA.
611cd0cff43SWhitney Tsang Loop *OutestLoop = SubLoop->getParentLoop()
612cd0cff43SWhitney Tsang ? SubLoop->getParentLoop()->getParentLoop()
613cd0cff43SWhitney Tsang ? SubLoop->getParentLoop()->getParentLoop()
614cd0cff43SWhitney Tsang : SubLoop->getParentLoop()
615cd0cff43SWhitney Tsang : SubLoop;
616cd0cff43SWhitney Tsang assert(DT->verify());
617cd0cff43SWhitney Tsang LI->verify(*DT);
618963401d2SDavid Green assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
619963401d2SDavid Green if (!CompletelyUnroll)
620963401d2SDavid Green assert(L->isLoopSimplifyForm());
621963401d2SDavid Green assert(SubLoop->isLoopSimplifyForm());
622cd0cff43SWhitney Tsang SE->verify();
623963401d2SDavid Green #endif
624963401d2SDavid Green
625963401d2SDavid Green return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
626963401d2SDavid Green : LoopUnrollResult::PartiallyUnrolled;
627963401d2SDavid Green }
628963401d2SDavid Green
getLoadsAndStores(BasicBlockSet & Blocks,SmallVector<Instruction *,4> & MemInstr)6292557e437SDavid Green static bool getLoadsAndStores(BasicBlockSet &Blocks,
6300a52401aSWhitney Tsang SmallVector<Instruction *, 4> &MemInstr) {
631963401d2SDavid Green // Scan the BBs and collect legal loads and stores.
632963401d2SDavid Green // Returns false if non-simple loads/stores are found.
633963401d2SDavid Green for (BasicBlock *BB : Blocks) {
634963401d2SDavid Green for (Instruction &I : *BB) {
635963401d2SDavid Green if (auto *Ld = dyn_cast<LoadInst>(&I)) {
636963401d2SDavid Green if (!Ld->isSimple())
637963401d2SDavid Green return false;
638963401d2SDavid Green MemInstr.push_back(&I);
639963401d2SDavid Green } else if (auto *St = dyn_cast<StoreInst>(&I)) {
640963401d2SDavid Green if (!St->isSimple())
641963401d2SDavid Green return false;
642963401d2SDavid Green MemInstr.push_back(&I);
643963401d2SDavid Green } else if (I.mayReadOrWriteMemory()) {
644963401d2SDavid Green return false;
645963401d2SDavid Green }
646963401d2SDavid Green }
647963401d2SDavid Green }
648963401d2SDavid Green return true;
649963401d2SDavid Green }
650963401d2SDavid Green
preservesForwardDependence(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,Dependence * D)6510a52401aSWhitney Tsang static bool preservesForwardDependence(Instruction *Src, Instruction *Dst,
6520a52401aSWhitney Tsang unsigned UnrollLevel, unsigned JamLevel,
6530a52401aSWhitney Tsang bool Sequentialized, Dependence *D) {
6540a52401aSWhitney Tsang // UnrollLevel might carry the dependency Src --> Dst
6550a52401aSWhitney Tsang // Does a different loop after unrolling?
6560a52401aSWhitney Tsang for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6570a52401aSWhitney Tsang ++CurLoopDepth) {
6580a52401aSWhitney Tsang auto JammedDir = D->getDirection(CurLoopDepth);
6590a52401aSWhitney Tsang if (JammedDir == Dependence::DVEntry::LT)
6600a52401aSWhitney Tsang return true;
6610a52401aSWhitney Tsang
6620a52401aSWhitney Tsang if (JammedDir & Dependence::DVEntry::GT)
6630a52401aSWhitney Tsang return false;
6640a52401aSWhitney Tsang }
6650a52401aSWhitney Tsang
6660a52401aSWhitney Tsang return true;
6670a52401aSWhitney Tsang }
6680a52401aSWhitney Tsang
preservesBackwardDependence(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,Dependence * D)6690a52401aSWhitney Tsang static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst,
6700a52401aSWhitney Tsang unsigned UnrollLevel, unsigned JamLevel,
6710a52401aSWhitney Tsang bool Sequentialized, Dependence *D) {
6720a52401aSWhitney Tsang // UnrollLevel might carry the dependency Dst --> Src
6730a52401aSWhitney Tsang for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6740a52401aSWhitney Tsang ++CurLoopDepth) {
6750a52401aSWhitney Tsang auto JammedDir = D->getDirection(CurLoopDepth);
6760a52401aSWhitney Tsang if (JammedDir == Dependence::DVEntry::GT)
6770a52401aSWhitney Tsang return true;
6780a52401aSWhitney Tsang
6790a52401aSWhitney Tsang if (JammedDir & Dependence::DVEntry::LT)
6800a52401aSWhitney Tsang return false;
6810a52401aSWhitney Tsang }
6820a52401aSWhitney Tsang
6830a52401aSWhitney Tsang // Backward dependencies are only preserved if not interleaved.
6840a52401aSWhitney Tsang return Sequentialized;
6850a52401aSWhitney Tsang }
6860a52401aSWhitney Tsang
6870a52401aSWhitney Tsang // Check whether it is semantically safe Src and Dst considering any potential
6880a52401aSWhitney Tsang // dependency between them.
6890a52401aSWhitney Tsang //
6900a52401aSWhitney Tsang // @param UnrollLevel The level of the loop being unrolled
6910a52401aSWhitney Tsang // @param JamLevel The level of the loop being jammed; if Src and Dst are on
6920a52401aSWhitney Tsang // different levels, the outermost common loop counts as jammed level
6930a52401aSWhitney Tsang //
6940a52401aSWhitney Tsang // @return true if is safe and false if there is a dependency violation.
checkDependency(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,DependenceInfo & DI)6950a52401aSWhitney Tsang static bool checkDependency(Instruction *Src, Instruction *Dst,
6960a52401aSWhitney Tsang unsigned UnrollLevel, unsigned JamLevel,
6970a52401aSWhitney Tsang bool Sequentialized, DependenceInfo &DI) {
6980a52401aSWhitney Tsang assert(UnrollLevel <= JamLevel &&
6990a52401aSWhitney Tsang "Expecting JamLevel to be at least UnrollLevel");
7000a52401aSWhitney Tsang
701963401d2SDavid Green if (Src == Dst)
7020a52401aSWhitney Tsang return true;
703963401d2SDavid Green // Ignore Input dependencies.
704963401d2SDavid Green if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
7050a52401aSWhitney Tsang return true;
706963401d2SDavid Green
7070a52401aSWhitney Tsang // Check whether unroll-and-jam may violate a dependency.
7080a52401aSWhitney Tsang // By construction, every dependency will be lexicographically non-negative
7090a52401aSWhitney Tsang // (if it was, it would violate the current execution order), such as
7100a52401aSWhitney Tsang // (0,0,>,*,*)
7110a52401aSWhitney Tsang // Unroll-and-jam changes the GT execution of two executions to the same
7120a52401aSWhitney Tsang // iteration of the chosen unroll level. That is, a GT dependence becomes a GE
7130a52401aSWhitney Tsang // dependence (or EQ, if we fully unrolled the loop) at the loop's position:
7140a52401aSWhitney Tsang // (0,0,>=,*,*)
7150a52401aSWhitney Tsang // Now, the dependency is not necessarily non-negative anymore, i.e.
7160a52401aSWhitney Tsang // unroll-and-jam may violate correctness.
7170a52401aSWhitney Tsang std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true);
7180a52401aSWhitney Tsang if (!D)
7190a52401aSWhitney Tsang return true;
720963401d2SDavid Green assert(D->isOrdered() && "Expected an output, flow or anti dep.");
721963401d2SDavid Green
722bc2e1c3aSDavid Green if (D->isConfused()) {
723bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
724bc2e1c3aSDavid Green << " " << *Src << "\n"
725bc2e1c3aSDavid Green << " " << *Dst << "\n");
726963401d2SDavid Green return false;
727bc2e1c3aSDavid Green }
7280a52401aSWhitney Tsang
7290a52401aSWhitney Tsang // If outer levels (levels enclosing the loop being unroll-and-jammed) have a
7300a52401aSWhitney Tsang // non-equal direction, then the locations accessed in the inner levels cannot
7310a52401aSWhitney Tsang // overlap in memory. We assumes the indexes never overlap into neighboring
7320a52401aSWhitney Tsang // dimensions.
7330a52401aSWhitney Tsang for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
7340a52401aSWhitney Tsang if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ))
7350a52401aSWhitney Tsang return true;
7360a52401aSWhitney Tsang
7370a52401aSWhitney Tsang auto UnrollDirection = D->getDirection(UnrollLevel);
7380a52401aSWhitney Tsang
7390a52401aSWhitney Tsang // If the distance carried by the unrolled loop is 0, then after unrolling
7400a52401aSWhitney Tsang // that distance will become non-zero resulting in non-overlapping accesses in
7410a52401aSWhitney Tsang // the inner loops.
7420a52401aSWhitney Tsang if (UnrollDirection == Dependence::DVEntry::EQ)
7430a52401aSWhitney Tsang return true;
7440a52401aSWhitney Tsang
7450a52401aSWhitney Tsang if (UnrollDirection & Dependence::DVEntry::LT &&
7460a52401aSWhitney Tsang !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel,
7470a52401aSWhitney Tsang Sequentialized, D.get()))
748963401d2SDavid Green return false;
7490a52401aSWhitney Tsang
7500a52401aSWhitney Tsang if (UnrollDirection & Dependence::DVEntry::GT &&
7510a52401aSWhitney Tsang !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel,
7520a52401aSWhitney Tsang Sequentialized, D.get()))
7530a52401aSWhitney Tsang return false;
7540a52401aSWhitney Tsang
7550a52401aSWhitney Tsang return true;
756bc2e1c3aSDavid Green }
7570a52401aSWhitney Tsang
7580a52401aSWhitney Tsang static bool
checkDependencies(Loop & Root,const BasicBlockSet & SubLoopBlocks,const DenseMap<Loop *,BasicBlockSet> & ForeBlocksMap,const DenseMap<Loop *,BasicBlockSet> & AftBlocksMap,DependenceInfo & DI,LoopInfo & LI)7590a52401aSWhitney Tsang checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
7600a52401aSWhitney Tsang const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
7610a52401aSWhitney Tsang const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap,
7620a52401aSWhitney Tsang DependenceInfo &DI, LoopInfo &LI) {
7630a52401aSWhitney Tsang SmallVector<BasicBlockSet, 8> AllBlocks;
7640a52401aSWhitney Tsang for (Loop *L : Root.getLoopsInPreorder())
7650a52401aSWhitney Tsang if (ForeBlocksMap.find(L) != ForeBlocksMap.end())
7660a52401aSWhitney Tsang AllBlocks.push_back(ForeBlocksMap.lookup(L));
7670a52401aSWhitney Tsang AllBlocks.push_back(SubLoopBlocks);
7680a52401aSWhitney Tsang for (Loop *L : Root.getLoopsInPreorder())
7690a52401aSWhitney Tsang if (AftBlocksMap.find(L) != AftBlocksMap.end())
7700a52401aSWhitney Tsang AllBlocks.push_back(AftBlocksMap.lookup(L));
7710a52401aSWhitney Tsang
7720a52401aSWhitney Tsang unsigned LoopDepth = Root.getLoopDepth();
7730a52401aSWhitney Tsang SmallVector<Instruction *, 4> EarlierLoadsAndStores;
7740a52401aSWhitney Tsang SmallVector<Instruction *, 4> CurrentLoadsAndStores;
7750a52401aSWhitney Tsang for (BasicBlockSet &Blocks : AllBlocks) {
7760a52401aSWhitney Tsang CurrentLoadsAndStores.clear();
7770a52401aSWhitney Tsang if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores))
7780a52401aSWhitney Tsang return false;
7790a52401aSWhitney Tsang
7800a52401aSWhitney Tsang Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent());
7810a52401aSWhitney Tsang unsigned CurLoopDepth = CurLoop->getLoopDepth();
7820a52401aSWhitney Tsang
7830a52401aSWhitney Tsang for (auto *Earlier : EarlierLoadsAndStores) {
7840a52401aSWhitney Tsang Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent());
7850a52401aSWhitney Tsang unsigned EarlierDepth = EarlierLoop->getLoopDepth();
7860a52401aSWhitney Tsang unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
7870a52401aSWhitney Tsang for (auto *Later : CurrentLoadsAndStores) {
7880a52401aSWhitney Tsang if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false,
7890a52401aSWhitney Tsang DI))
790963401d2SDavid Green return false;
791963401d2SDavid Green }
792963401d2SDavid Green }
7930a52401aSWhitney Tsang
7940a52401aSWhitney Tsang size_t NumInsts = CurrentLoadsAndStores.size();
7950a52401aSWhitney Tsang for (size_t I = 0; I < NumInsts; ++I) {
7960a52401aSWhitney Tsang for (size_t J = I; J < NumInsts; ++J) {
7970a52401aSWhitney Tsang if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J],
7980a52401aSWhitney Tsang LoopDepth, CurLoopDepth, true, DI))
7990a52401aSWhitney Tsang return false;
800963401d2SDavid Green }
801963401d2SDavid Green }
8020a52401aSWhitney Tsang
8030a52401aSWhitney Tsang EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
8040a52401aSWhitney Tsang CurrentLoadsAndStores.end());
805bc2e1c3aSDavid Green }
806963401d2SDavid Green return true;
807963401d2SDavid Green }
808963401d2SDavid Green
isEligibleLoopForm(const Loop & Root)8090a52401aSWhitney Tsang static bool isEligibleLoopForm(const Loop &Root) {
8100a52401aSWhitney Tsang // Root must have a child.
8110a52401aSWhitney Tsang if (Root.getSubLoops().size() != 1)
812963401d2SDavid Green return false;
813963401d2SDavid Green
8140a52401aSWhitney Tsang const Loop *L = &Root;
8150a52401aSWhitney Tsang do {
8160a52401aSWhitney Tsang // All loops in Root need to be in simplify and rotated form.
8170a52401aSWhitney Tsang if (!L->isLoopSimplifyForm())
8180a52401aSWhitney Tsang return false;
8190a52401aSWhitney Tsang
8200a52401aSWhitney Tsang if (!L->isRotatedForm())
8210a52401aSWhitney Tsang return false;
8220a52401aSWhitney Tsang
8230a52401aSWhitney Tsang if (L->getHeader()->hasAddressTaken()) {
8240a52401aSWhitney Tsang LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
8250a52401aSWhitney Tsang return false;
8260a52401aSWhitney Tsang }
8270a52401aSWhitney Tsang
8280a52401aSWhitney Tsang unsigned SubLoopsSize = L->getSubLoops().size();
8290a52401aSWhitney Tsang if (SubLoopsSize == 0)
8300a52401aSWhitney Tsang return true;
8310a52401aSWhitney Tsang
8320a52401aSWhitney Tsang // Only one child is allowed.
8330a52401aSWhitney Tsang if (SubLoopsSize != 1)
8340a52401aSWhitney Tsang return false;
8350a52401aSWhitney Tsang
83622ebbc47SSidharth Baveja // Only loops with a single exit block can be unrolled and jammed.
83722ebbc47SSidharth Baveja // The function getExitBlock() is used for this check, rather than
83822ebbc47SSidharth Baveja // getUniqueExitBlock() to ensure loops with mulitple exit edges are
83922ebbc47SSidharth Baveja // disallowed.
84022ebbc47SSidharth Baveja if (!L->getExitBlock()) {
84122ebbc47SSidharth Baveja LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit "
84222ebbc47SSidharth Baveja "blocks can be unrolled and jammed.\n");
84322ebbc47SSidharth Baveja return false;
84422ebbc47SSidharth Baveja }
84522ebbc47SSidharth Baveja
84622ebbc47SSidharth Baveja // Only loops with a single exiting block can be unrolled and jammed.
84722ebbc47SSidharth Baveja if (!L->getExitingBlock()) {
84822ebbc47SSidharth Baveja LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single "
84922ebbc47SSidharth Baveja "exiting blocks can be unrolled and jammed.\n");
85022ebbc47SSidharth Baveja return false;
85122ebbc47SSidharth Baveja }
85222ebbc47SSidharth Baveja
8530a52401aSWhitney Tsang L = L->getSubLoops()[0];
8540a52401aSWhitney Tsang } while (L);
8550a52401aSWhitney Tsang
8560a52401aSWhitney Tsang return true;
8570a52401aSWhitney Tsang }
8580a52401aSWhitney Tsang
getInnerMostLoop(Loop * L)8590a52401aSWhitney Tsang static Loop *getInnerMostLoop(Loop *L) {
8600a52401aSWhitney Tsang while (!L->getSubLoops().empty())
8610a52401aSWhitney Tsang L = L->getSubLoops()[0];
8620a52401aSWhitney Tsang return L;
863963401d2SDavid Green }
864963401d2SDavid Green
isSafeToUnrollAndJam(Loop * L,ScalarEvolution & SE,DominatorTree & DT,DependenceInfo & DI,LoopInfo & LI)865963401d2SDavid Green bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
8660a52401aSWhitney Tsang DependenceInfo &DI, LoopInfo &LI) {
8670a52401aSWhitney Tsang if (!isEligibleLoopForm(*L)) {
8680a52401aSWhitney Tsang LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n");
8690a52401aSWhitney Tsang return false;
8700a52401aSWhitney Tsang }
8710a52401aSWhitney Tsang
872963401d2SDavid Green /* We currently handle outer loops like this:
873963401d2SDavid Green |
8740a52401aSWhitney Tsang ForeFirst <------\ }
8750a52401aSWhitney Tsang Blocks | } ForeBlocks of L
876963401d2SDavid Green ForeLast | }
877963401d2SDavid Green | |
8780a52401aSWhitney Tsang ... |
8790a52401aSWhitney Tsang | |
8800a52401aSWhitney Tsang ForeFirst <----\ | }
8810a52401aSWhitney Tsang Blocks | | } ForeBlocks of a inner loop of L
8820a52401aSWhitney Tsang ForeLast | | }
8830a52401aSWhitney Tsang | | |
8840a52401aSWhitney Tsang JamLoopFirst <\ | | }
8850a52401aSWhitney Tsang Blocks | | | } JamLoopBlocks of the innermost loop
8860a52401aSWhitney Tsang JamLoopLast -/ | | }
8870a52401aSWhitney Tsang | | |
8880a52401aSWhitney Tsang AftFirst | | }
8890a52401aSWhitney Tsang Blocks | | } AftBlocks of a inner loop of L
8900a52401aSWhitney Tsang AftLast ------/ | }
8910a52401aSWhitney Tsang | |
8920a52401aSWhitney Tsang ... |
893963401d2SDavid Green | |
894963401d2SDavid Green AftFirst | }
8950a52401aSWhitney Tsang Blocks | } AftBlocks of L
8960a52401aSWhitney Tsang AftLast --------/ }
897963401d2SDavid Green |
898963401d2SDavid Green
899963401d2SDavid Green There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
900963401d2SDavid Green and AftBlocks, providing that there is one edge from Fores to SubLoops,
901963401d2SDavid Green one edge from SubLoops to Afts and a single outer loop exit (from Afts).
902963401d2SDavid Green In practice we currently limit Aft blocks to a single block, and limit
903963401d2SDavid Green things further in the profitablility checks of the unroll and jam pass.
904963401d2SDavid Green
905963401d2SDavid Green Because of the way we rearrange basic blocks, we also require that
9060a52401aSWhitney Tsang the Fore blocks of L on all unrolled iterations are safe to move before the
9070a52401aSWhitney Tsang blocks of the direct child of L of all iterations. So we require that the
9080a52401aSWhitney Tsang phi node looping operands of ForeHeader can be moved to at least the end of
9090a52401aSWhitney Tsang ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and
9100a52401aSWhitney Tsang match up Phi's correctly.
911963401d2SDavid Green
9120a52401aSWhitney Tsang i.e. The old order of blocks used to be
9130a52401aSWhitney Tsang (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2.
9140a52401aSWhitney Tsang It needs to be safe to transform this to
9150a52401aSWhitney Tsang (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2.
916963401d2SDavid Green
917963401d2SDavid Green There are then a number of checks along the lines of no calls, no
918963401d2SDavid Green exceptions, inner loop IV is consistent, etc. Note that for loops requiring
919963401d2SDavid Green runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
920963401d2SDavid Green UnrollAndJamLoop if the trip count cannot be easily calculated.
921963401d2SDavid Green */
922963401d2SDavid Green
923963401d2SDavid Green // Split blocks into Fore/SubLoop/Aft based on dominators
9240a52401aSWhitney Tsang Loop *JamLoop = getInnerMostLoop(L);
9252557e437SDavid Green BasicBlockSet SubLoopBlocks;
9260a52401aSWhitney Tsang DenseMap<Loop *, BasicBlockSet> ForeBlocksMap;
9270a52401aSWhitney Tsang DenseMap<Loop *, BasicBlockSet> AftBlocksMap;
9280a52401aSWhitney Tsang if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap,
9290a52401aSWhitney Tsang AftBlocksMap, DT)) {
930bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
931963401d2SDavid Green return false;
932bc2e1c3aSDavid Green }
933963401d2SDavid Green
934963401d2SDavid Green // Aft blocks may need to move instructions to fore blocks, which becomes more
935963401d2SDavid Green // difficult if there are multiple (potentially conditionally executed)
936963401d2SDavid Green // blocks. For now we just exclude loops with multiple aft blocks.
9370a52401aSWhitney Tsang if (AftBlocksMap[L].size() != 1) {
938bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
939bc2e1c3aSDavid Green "multiple blocks after the loop\n");
940963401d2SDavid Green return false;
941bc2e1c3aSDavid Green }
942963401d2SDavid Green
943bc2e1c3aSDavid Green // Check inner loop backedge count is consistent on all iterations of the
944bc2e1c3aSDavid Green // outer loop
9450a52401aSWhitney Tsang if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) {
9460a52401aSWhitney Tsang return !hasIterationCountInvariantInParent(SubLoop, SE);
9470a52401aSWhitney Tsang })) {
948bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
949bc2e1c3aSDavid Green "not consistent on each iteration\n");
950bc2e1c3aSDavid Green return false;
951bc2e1c3aSDavid Green }
952963401d2SDavid Green
953963401d2SDavid Green // Check the loop safety info for exceptions.
9549c90ec2fSMax Kazantsev SimpleLoopSafetyInfo LSI;
955530b8d1cSMax Kazantsev LSI.computeLoopSafetyInfo(L);
956530b8d1cSMax Kazantsev if (LSI.anyBlockMayThrow()) {
957bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
958963401d2SDavid Green return false;
959bc2e1c3aSDavid Green }
960963401d2SDavid Green
961963401d2SDavid Green // We've ruled out the easy stuff and now need to check that there are no
962963401d2SDavid Green // interdependencies which may prevent us from moving the:
963963401d2SDavid Green // ForeBlocks before Subloop and AftBlocks.
964963401d2SDavid Green // Subloop before AftBlocks.
965963401d2SDavid Green // ForeBlock phi operands before the subloop
966963401d2SDavid Green
967963401d2SDavid Green // Make sure we can move all instructions we need to before the subloop
9680a52401aSWhitney Tsang BasicBlock *Header = L->getHeader();
9690a52401aSWhitney Tsang BasicBlock *Latch = L->getLoopLatch();
9700a52401aSWhitney Tsang BasicBlockSet AftBlocks = AftBlocksMap[L];
9710a52401aSWhitney Tsang Loop *SubLoop = L->getSubLoops()[0];
972eda3c9efSDavid Green if (!processHeaderPhiOperands(
973eda3c9efSDavid Green Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
974963401d2SDavid Green if (SubLoop->contains(I->getParent()))
975963401d2SDavid Green return false;
9762557e437SDavid Green if (AftBlocks.count(I->getParent())) {
977eda3c9efSDavid Green // If we hit a phi node in afts we know we are done (probably
978eda3c9efSDavid Green // LCSSA)
979963401d2SDavid Green if (isa<PHINode>(I))
980963401d2SDavid Green return false;
981eda3c9efSDavid Green // Can't move instructions with side effects or memory
982eda3c9efSDavid Green // reads/writes
983963401d2SDavid Green if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
984963401d2SDavid Green return false;
985963401d2SDavid Green }
986eda3c9efSDavid Green // Keep going
987eda3c9efSDavid Green return true;
988bc2e1c3aSDavid Green })) {
989bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
990bc2e1c3aSDavid Green "instructions after subloop to before it\n");
991eda3c9efSDavid Green return false;
992bc2e1c3aSDavid Green }
993963401d2SDavid Green
994963401d2SDavid Green // Check for memory dependencies which prohibit the unrolling we are doing.
995963401d2SDavid Green // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
996963401d2SDavid Green // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
9970a52401aSWhitney Tsang if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI,
9980a52401aSWhitney Tsang LI)) {
999bc2e1c3aSDavid Green LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
1000963401d2SDavid Green return false;
1001bc2e1c3aSDavid Green }
1002963401d2SDavid Green
1003963401d2SDavid Green return true;
1004963401d2SDavid Green }
1005