1963401d2SDavid Green //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
2963401d2SDavid Green //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6963401d2SDavid Green //
7963401d2SDavid Green //===----------------------------------------------------------------------===//
8963401d2SDavid Green //
9963401d2SDavid Green // This file implements loop unroll and jam as a routine, much like
10963401d2SDavid Green // LoopUnroll.cpp implements loop unroll.
11963401d2SDavid Green //
12963401d2SDavid Green //===----------------------------------------------------------------------===//
13963401d2SDavid Green 
14a5b6480dSAnh Tuyen Tran #include "llvm/ADT/ArrayRef.h"
15a5b6480dSAnh Tuyen Tran #include "llvm/ADT/DenseMap.h"
16a5b6480dSAnh Tuyen Tran #include "llvm/ADT/Optional.h"
17a5b6480dSAnh Tuyen Tran #include "llvm/ADT/STLExtras.h"
18963401d2SDavid Green #include "llvm/ADT/SmallPtrSet.h"
19a5b6480dSAnh Tuyen Tran #include "llvm/ADT/SmallVector.h"
20963401d2SDavid Green #include "llvm/ADT/Statistic.h"
21a5b6480dSAnh Tuyen Tran #include "llvm/ADT/StringRef.h"
22a5b6480dSAnh Tuyen Tran #include "llvm/ADT/Twine.h"
23a5b6480dSAnh Tuyen Tran #include "llvm/ADT/iterator_range.h"
24963401d2SDavid Green #include "llvm/Analysis/AssumptionCache.h"
25963401d2SDavid Green #include "llvm/Analysis/DependenceAnalysis.h"
26a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/DomTreeUpdater.h"
27a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/LoopInfo.h"
28963401d2SDavid Green #include "llvm/Analysis/LoopIterator.h"
29a5b6480dSAnh Tuyen Tran #include "llvm/Analysis/MustExecute.h"
30963401d2SDavid Green #include "llvm/Analysis/OptimizationRemarkEmitter.h"
31963401d2SDavid Green #include "llvm/Analysis/ScalarEvolution.h"
32963401d2SDavid Green #include "llvm/IR/BasicBlock.h"
33963401d2SDavid Green #include "llvm/IR/DebugInfoMetadata.h"
34a5b6480dSAnh Tuyen Tran #include "llvm/IR/DebugLoc.h"
35a5b6480dSAnh Tuyen Tran #include "llvm/IR/DiagnosticInfo.h"
36963401d2SDavid Green #include "llvm/IR/Dominators.h"
37a5b6480dSAnh Tuyen Tran #include "llvm/IR/Function.h"
38a5b6480dSAnh Tuyen Tran #include "llvm/IR/Instruction.h"
39a5b6480dSAnh Tuyen Tran #include "llvm/IR/Instructions.h"
40963401d2SDavid Green #include "llvm/IR/IntrinsicInst.h"
41a5b6480dSAnh Tuyen Tran #include "llvm/IR/User.h"
42a5b6480dSAnh Tuyen Tran #include "llvm/IR/Value.h"
43a5b6480dSAnh Tuyen Tran #include "llvm/IR/ValueHandle.h"
44a5b6480dSAnh Tuyen Tran #include "llvm/IR/ValueMap.h"
45a5b6480dSAnh Tuyen Tran #include "llvm/Support/Casting.h"
46963401d2SDavid Green #include "llvm/Support/Debug.h"
47a5b6480dSAnh Tuyen Tran #include "llvm/Support/ErrorHandling.h"
48a5b6480dSAnh Tuyen Tran #include "llvm/Support/GenericDomTree.h"
49963401d2SDavid Green #include "llvm/Support/raw_ostream.h"
50963401d2SDavid Green #include "llvm/Transforms/Utils/BasicBlockUtils.h"
51963401d2SDavid Green #include "llvm/Transforms/Utils/Cloning.h"
52963401d2SDavid Green #include "llvm/Transforms/Utils/LoopUtils.h"
53963401d2SDavid Green #include "llvm/Transforms/Utils/UnrollLoop.h"
54a5b6480dSAnh Tuyen Tran #include "llvm/Transforms/Utils/ValueMapper.h"
55a5b6480dSAnh Tuyen Tran #include <assert.h>
56a5b6480dSAnh Tuyen Tran #include <memory>
57a5b6480dSAnh Tuyen Tran #include <type_traits>
58a5b6480dSAnh Tuyen Tran #include <vector>
59a5b6480dSAnh Tuyen Tran 
60963401d2SDavid Green using namespace llvm;
61963401d2SDavid Green 
62963401d2SDavid Green #define DEBUG_TYPE "loop-unroll-and-jam"
63963401d2SDavid Green 
64963401d2SDavid Green STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
65963401d2SDavid Green STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
66963401d2SDavid Green 
672557e437SDavid Green typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
68963401d2SDavid Green 
69963401d2SDavid Green // Partition blocks in an outer/inner loop pair into blocks before and after
70963401d2SDavid Green // the loop
partitionLoopBlocks(Loop & L,BasicBlockSet & ForeBlocks,BasicBlockSet & AftBlocks,DominatorTree & DT)710a52401aSWhitney Tsang static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks,
720a52401aSWhitney Tsang                                 BasicBlockSet &AftBlocks, DominatorTree &DT) {
730a52401aSWhitney Tsang   Loop *SubLoop = L.getSubLoops()[0];
74963401d2SDavid Green   BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
75963401d2SDavid Green 
760a52401aSWhitney Tsang   for (BasicBlock *BB : L.blocks()) {
77963401d2SDavid Green     if (!SubLoop->contains(BB)) {
780a52401aSWhitney Tsang       if (DT.dominates(SubLoopLatch, BB))
792557e437SDavid Green         AftBlocks.insert(BB);
80963401d2SDavid Green       else
812557e437SDavid Green         ForeBlocks.insert(BB);
82963401d2SDavid Green     }
83963401d2SDavid Green   }
84963401d2SDavid Green 
85963401d2SDavid Green   // Check that all blocks in ForeBlocks together dominate the subloop
86963401d2SDavid Green   // TODO: This might ideally be done better with a dominator/postdominators.
87963401d2SDavid Green   BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
88963401d2SDavid Green   for (BasicBlock *BB : ForeBlocks) {
89963401d2SDavid Green     if (BB == SubLoopPreHeader)
90963401d2SDavid Green       continue;
91edb12a83SChandler Carruth     Instruction *TI = BB->getTerminator();
920a52401aSWhitney Tsang     for (BasicBlock *Succ : successors(TI))
930a52401aSWhitney Tsang       if (!ForeBlocks.count(Succ))
94963401d2SDavid Green         return false;
95963401d2SDavid Green   }
96963401d2SDavid Green 
97963401d2SDavid Green   return true;
98963401d2SDavid Green }
99963401d2SDavid Green 
1000a52401aSWhitney Tsang /// Partition blocks in a loop nest into blocks before and after each inner
1010a52401aSWhitney Tsang /// loop.
partitionOuterLoopBlocks(Loop & Root,Loop & JamLoop,BasicBlockSet & JamLoopBlocks,DenseMap<Loop *,BasicBlockSet> & ForeBlocksMap,DenseMap<Loop *,BasicBlockSet> & AftBlocksMap,DominatorTree & DT)1020a52401aSWhitney Tsang static bool partitionOuterLoopBlocks(
1030a52401aSWhitney Tsang     Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks,
1040a52401aSWhitney Tsang     DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
1050a52401aSWhitney Tsang     DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) {
1060a52401aSWhitney Tsang   JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end());
1070a52401aSWhitney Tsang 
1080a52401aSWhitney Tsang   for (Loop *L : Root.getLoopsInPreorder()) {
1090a52401aSWhitney Tsang     if (L == &JamLoop)
1100a52401aSWhitney Tsang       break;
1110a52401aSWhitney Tsang 
1120a52401aSWhitney Tsang     if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT))
1130a52401aSWhitney Tsang       return false;
1140a52401aSWhitney Tsang   }
1150a52401aSWhitney Tsang 
1160a52401aSWhitney Tsang   return true;
1170a52401aSWhitney Tsang }
1180a52401aSWhitney Tsang 
1190a52401aSWhitney Tsang // TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more
1200a52401aSWhitney Tsang // than 2 levels loop.
partitionOuterLoopBlocks(Loop * L,Loop * SubLoop,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DominatorTree * DT)1210a52401aSWhitney Tsang static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
1220a52401aSWhitney Tsang                                      BasicBlockSet &ForeBlocks,
1230a52401aSWhitney Tsang                                      BasicBlockSet &SubLoopBlocks,
1240a52401aSWhitney Tsang                                      BasicBlockSet &AftBlocks,
1250a52401aSWhitney Tsang                                      DominatorTree *DT) {
1260a52401aSWhitney Tsang   SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
1270a52401aSWhitney Tsang   return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT);
1280a52401aSWhitney Tsang }
1290a52401aSWhitney Tsang 
130eda3c9efSDavid Green // Looks at the phi nodes in Header for values coming from Latch. For these
131eda3c9efSDavid Green // instructions and all their operands calls Visit on them, keeping going for
132eda3c9efSDavid Green // all the operands in AftBlocks. Returns false if Visit returns false,
133eda3c9efSDavid Green // otherwise returns true. This is used to process the instructions in the
134eda3c9efSDavid Green // Aft blocks that need to be moved before the subloop. It is used in two
135eda3c9efSDavid Green // places. One to check that the required set of instructions can be moved
136eda3c9efSDavid Green // before the loop. Then to collect the instructions to actually move in
137eda3c9efSDavid Green // moveHeaderPhiOperandsToForeBlocks.
138eda3c9efSDavid Green template <typename T>
processHeaderPhiOperands(BasicBlock * Header,BasicBlock * Latch,BasicBlockSet & AftBlocks,T Visit)139eda3c9efSDavid Green static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
140eda3c9efSDavid Green                                      BasicBlockSet &AftBlocks, T Visit) {
141eda3c9efSDavid Green   SmallVector<Instruction *, 8> Worklist;
14255487079SDanilo C. Grael   SmallPtrSet<Instruction *, 8> VisitedInstr;
143963401d2SDavid Green   for (auto &Phi : Header->phis()) {
144963401d2SDavid Green     Value *V = Phi.getIncomingValueForBlock(Latch);
145963401d2SDavid Green     if (Instruction *I = dyn_cast<Instruction>(V))
146963401d2SDavid Green       Worklist.push_back(I);
147963401d2SDavid Green   }
148963401d2SDavid Green 
149963401d2SDavid Green   while (!Worklist.empty()) {
1501238378fSKazu Hirata     Instruction *I = Worklist.pop_back_val();
151eda3c9efSDavid Green     if (!Visit(I))
152eda3c9efSDavid Green       return false;
15355487079SDanilo C. Grael     VisitedInstr.insert(I);
154963401d2SDavid Green 
155eda3c9efSDavid Green     if (AftBlocks.count(I->getParent()))
156963401d2SDavid Green       for (auto &U : I->operands())
157963401d2SDavid Green         if (Instruction *II = dyn_cast<Instruction>(U))
15855487079SDanilo C. Grael           if (!VisitedInstr.count(II))
159963401d2SDavid Green             Worklist.push_back(II);
160963401d2SDavid Green   }
161963401d2SDavid Green 
162eda3c9efSDavid Green   return true;
163eda3c9efSDavid Green }
164eda3c9efSDavid Green 
165eda3c9efSDavid Green // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
moveHeaderPhiOperandsToForeBlocks(BasicBlock * Header,BasicBlock * Latch,Instruction * InsertLoc,BasicBlockSet & AftBlocks)166eda3c9efSDavid Green static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
167eda3c9efSDavid Green                                               BasicBlock *Latch,
168eda3c9efSDavid Green                                               Instruction *InsertLoc,
169eda3c9efSDavid Green                                               BasicBlockSet &AftBlocks) {
170eda3c9efSDavid Green   // We need to ensure we move the instructions in the correct order,
171eda3c9efSDavid Green   // starting with the earliest required instruction and moving forward.
172eda3c9efSDavid Green   std::vector<Instruction *> Visited;
173eda3c9efSDavid Green   processHeaderPhiOperands(Header, Latch, AftBlocks,
174eda3c9efSDavid Green                            [&Visited, &AftBlocks](Instruction *I) {
175eda3c9efSDavid Green                              if (AftBlocks.count(I->getParent()))
176eda3c9efSDavid Green                                Visited.push_back(I);
177eda3c9efSDavid Green                              return true;
178eda3c9efSDavid Green                            });
179eda3c9efSDavid Green 
180963401d2SDavid Green   // Move all instructions in program order to before the InsertLoc
181963401d2SDavid Green   BasicBlock *InsertLocBB = InsertLoc->getParent();
182963401d2SDavid Green   for (Instruction *I : reverse(Visited)) {
183963401d2SDavid Green     if (I->getParent() != InsertLocBB)
184963401d2SDavid Green       I->moveBefore(InsertLoc);
185963401d2SDavid Green   }
186963401d2SDavid Green }
187963401d2SDavid Green 
188963401d2SDavid Green /*
189963401d2SDavid Green   This method performs Unroll and Jam. For a simple loop like:
190963401d2SDavid Green   for (i = ..)
191963401d2SDavid Green     Fore(i)
192963401d2SDavid Green     for (j = ..)
193963401d2SDavid Green       SubLoop(i, j)
194963401d2SDavid Green     Aft(i)
195963401d2SDavid Green 
196963401d2SDavid Green   Instead of doing normal inner or outer unrolling, we do:
197963401d2SDavid Green   for (i = .., i+=2)
198963401d2SDavid Green     Fore(i)
199963401d2SDavid Green     Fore(i+1)
200963401d2SDavid Green     for (j = ..)
201963401d2SDavid Green       SubLoop(i, j)
202963401d2SDavid Green       SubLoop(i+1, j)
203963401d2SDavid Green     Aft(i)
204963401d2SDavid Green     Aft(i+1)
205963401d2SDavid Green 
206963401d2SDavid Green   So the outer loop is essetially unrolled and then the inner loops are fused
207963401d2SDavid Green   ("jammed") together into a single loop. This can increase speed when there
208963401d2SDavid Green   are loads in SubLoop that are invariant to i, as they become shared between
209963401d2SDavid Green   the now jammed inner loops.
210963401d2SDavid Green 
211963401d2SDavid Green   We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
212963401d2SDavid Green   Fore blocks are those before the inner loop, Aft are those after. Normal
213963401d2SDavid Green   Unroll code is used to copy each of these sets of blocks and the results are
214963401d2SDavid Green   combined together into the final form above.
215963401d2SDavid Green 
216963401d2SDavid Green   isSafeToUnrollAndJam should be used prior to calling this to make sure the
217963401d2SDavid Green   unrolling will be valid. Checking profitablility is also advisable.
21872448525SMichael Kruse 
21972448525SMichael Kruse   If EpilogueLoop is non-null, it receives the epilogue loop (if it was
22072448525SMichael Kruse   necessary to create one and not fully unrolled).
221963401d2SDavid Green */
222e77d24f7Smaekawatoshiki LoopUnrollResult
UnrollAndJamLoop(Loop * L,unsigned Count,unsigned TripCount,unsigned TripMultiple,bool UnrollRemainder,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE,Loop ** EpilogueLoop)223e77d24f7Smaekawatoshiki llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
224e77d24f7Smaekawatoshiki                        unsigned TripMultiple, bool UnrollRemainder,
225e77d24f7Smaekawatoshiki                        LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
2260789f280SRoman Lebedev                        AssumptionCache *AC, const TargetTransformInfo *TTI,
227e77d24f7Smaekawatoshiki                        OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
228963401d2SDavid Green 
229963401d2SDavid Green   // When we enter here we should have already checked that it is safe
230963401d2SDavid Green   BasicBlock *Header = L->getHeader();
23160cb193aSDávid Bolvanský   assert(Header && "No header.");
232963401d2SDavid Green   assert(L->getSubLoops().size() == 1);
233963401d2SDavid Green   Loop *SubLoop = *L->begin();
234963401d2SDavid Green 
235963401d2SDavid Green   // Don't enter the unroll code if there is nothing to do.
236963401d2SDavid Green   if (TripCount == 0 && Count < 2) {
237bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
238963401d2SDavid Green     return LoopUnrollResult::Unmodified;
239963401d2SDavid Green   }
240963401d2SDavid Green 
241963401d2SDavid Green   assert(Count > 0);
242963401d2SDavid Green   assert(TripMultiple > 0);
243963401d2SDavid Green   assert(TripCount == 0 || TripCount % TripMultiple == 0);
244963401d2SDavid Green 
245963401d2SDavid Green   // Are we eliminating the loop control altogether?
246963401d2SDavid Green   bool CompletelyUnroll = (Count == TripCount);
247963401d2SDavid Green 
248963401d2SDavid Green   // We use the runtime remainder in cases where we don't know trip multiple
2491bd4085eSNikita Popov   if (TripMultiple % Count != 0) {
250963401d2SDavid Green     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
251963401d2SDavid Green                                     /*UseEpilogRemainder*/ true,
2522312a06cSAlina Sbirlea                                     UnrollRemainder, /*ForgetAllSCEV*/ false,
2530789f280SRoman Lebedev                                     LI, SE, DT, AC, TTI, true, EpilogueLoop)) {
254963401d2SDavid Green       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
255963401d2SDavid Green                            "generated when assuming runtime trip count\n");
256963401d2SDavid Green       return LoopUnrollResult::Unmodified;
257963401d2SDavid Green     }
258963401d2SDavid Green   }
259963401d2SDavid Green 
260963401d2SDavid Green   // Notify ScalarEvolution that the loop will be substantially changed,
261963401d2SDavid Green   // if not outright eliminated.
262963401d2SDavid Green   if (SE) {
263963401d2SDavid Green     SE->forgetLoop(L);
264963401d2SDavid Green     SE->forgetLoop(SubLoop);
265963401d2SDavid Green   }
266963401d2SDavid Green 
267963401d2SDavid Green   using namespace ore;
268963401d2SDavid Green   // Report the unrolling decision.
269963401d2SDavid Green   if (CompletelyUnroll) {
270963401d2SDavid Green     LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
271963401d2SDavid Green                       << Header->getName() << " with trip count " << TripCount
272963401d2SDavid Green                       << "!\n");
273963401d2SDavid Green     ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
274963401d2SDavid Green                                  L->getHeader())
275963401d2SDavid Green               << "completely unroll and jammed loop with "
276963401d2SDavid Green               << NV("UnrollCount", TripCount) << " iterations");
277963401d2SDavid Green   } else {
278963401d2SDavid Green     auto DiagBuilder = [&]() {
279963401d2SDavid Green       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
280963401d2SDavid Green                               L->getHeader());
281963401d2SDavid Green       return Diag << "unroll and jammed loop by a factor of "
282963401d2SDavid Green                   << NV("UnrollCount", Count);
283963401d2SDavid Green     };
284963401d2SDavid Green 
285963401d2SDavid Green     LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
286963401d2SDavid Green                       << " by " << Count);
287963401d2SDavid Green     if (TripMultiple != 1) {
288963401d2SDavid Green       LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
289963401d2SDavid Green       ORE->emit([&]() {
290963401d2SDavid Green         return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
291963401d2SDavid Green                              << " trips per branch";
292963401d2SDavid Green       });
293963401d2SDavid Green     } else {
294963401d2SDavid Green       LLVM_DEBUG(dbgs() << " with run-time trip count");
295963401d2SDavid Green       ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
296963401d2SDavid Green     }
297963401d2SDavid Green     LLVM_DEBUG(dbgs() << "!\n");
298963401d2SDavid Green   }
299963401d2SDavid Green 
300963401d2SDavid Green   BasicBlock *Preheader = L->getLoopPreheader();
301963401d2SDavid Green   BasicBlock *LatchBlock = L->getLoopLatch();
30260cb193aSDávid Bolvanský   assert(Preheader && "No preheader");
30360cb193aSDávid Bolvanský   assert(LatchBlock && "No latch block");
304963401d2SDavid Green   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
305963401d2SDavid Green   assert(BI && !BI->isUnconditional());
306963401d2SDavid Green   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
307963401d2SDavid Green   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
308963401d2SDavid Green   bool SubLoopContinueOnTrue = SubLoop->contains(
309963401d2SDavid Green       SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
310963401d2SDavid Green 
311963401d2SDavid Green   // Partition blocks in an outer/inner loop pair into blocks before and after
312963401d2SDavid Green   // the loop
3132557e437SDavid Green   BasicBlockSet SubLoopBlocks;
3142557e437SDavid Green   BasicBlockSet ForeBlocks;
3152557e437SDavid Green   BasicBlockSet AftBlocks;
316963401d2SDavid Green   partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
317963401d2SDavid Green                            DT);
318963401d2SDavid Green 
319963401d2SDavid Green   // We keep track of the entering/first and exiting/last block of each of
320963401d2SDavid Green   // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
321963401d2SDavid Green   // blocks easier.
322963401d2SDavid Green   std::vector<BasicBlock *> ForeBlocksFirst;
323963401d2SDavid Green   std::vector<BasicBlock *> ForeBlocksLast;
324963401d2SDavid Green   std::vector<BasicBlock *> SubLoopBlocksFirst;
325963401d2SDavid Green   std::vector<BasicBlock *> SubLoopBlocksLast;
326963401d2SDavid Green   std::vector<BasicBlock *> AftBlocksFirst;
327963401d2SDavid Green   std::vector<BasicBlock *> AftBlocksLast;
328963401d2SDavid Green   ForeBlocksFirst.push_back(Header);
329963401d2SDavid Green   ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
330963401d2SDavid Green   SubLoopBlocksFirst.push_back(SubLoop->getHeader());
331963401d2SDavid Green   SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
332963401d2SDavid Green   AftBlocksFirst.push_back(SubLoop->getExitBlock());
333963401d2SDavid Green   AftBlocksLast.push_back(L->getExitingBlock());
334963401d2SDavid Green   // Maps Blocks[0] -> Blocks[It]
335963401d2SDavid Green   ValueToValueMapTy LastValueMap;
336963401d2SDavid Green 
337963401d2SDavid Green   // Move any instructions from fore phi operands from AftBlocks into Fore.
338963401d2SDavid Green   moveHeaderPhiOperandsToForeBlocks(
339cd0cff43SWhitney Tsang       Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
340963401d2SDavid Green 
341963401d2SDavid Green   // The current on-the-fly SSA update requires blocks to be processed in
342963401d2SDavid Green   // reverse postorder so that LastValueMap contains the correct value at each
343963401d2SDavid Green   // exit.
344963401d2SDavid Green   LoopBlocksDFS DFS(L);
345963401d2SDavid Green   DFS.perform(LI);
346963401d2SDavid Green   // Stash the DFS iterators before adding blocks to the loop.
347963401d2SDavid Green   LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
348963401d2SDavid Green   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
349963401d2SDavid Green 
350886629a8SRong Xu   // When a FSDiscriminator is enabled, we don't need to add the multiply
351886629a8SRong Xu   // factors to the discriminators.
352886629a8SRong Xu   if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
353963401d2SDavid Green     for (BasicBlock *BB : L->getBlocks())
354963401d2SDavid Green       for (Instruction &I : *BB)
355963401d2SDavid Green         if (!isa<DbgInfoIntrinsic>(&I))
356b53eeb6fSMircea Trofin           if (const DILocation *DIL = I.getDebugLoc()) {
357ec026302SMircea Trofin             auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
358b53eeb6fSMircea Trofin             if (NewDIL)
359*7a47ee51SKazu Hirata               I.setDebugLoc(*NewDIL);
360b53eeb6fSMircea Trofin             else
361b53eeb6fSMircea Trofin               LLVM_DEBUG(dbgs()
362b53eeb6fSMircea Trofin                          << "Failed to create new discriminator: "
363b53eeb6fSMircea Trofin                          << DIL->getFilename() << " Line: " << DIL->getLine());
364b53eeb6fSMircea Trofin           }
365963401d2SDavid Green 
366963401d2SDavid Green   // Copy all blocks
367963401d2SDavid Green   for (unsigned It = 1; It != Count; ++It) {
3682b335e9aSWhitney Tsang     SmallVector<BasicBlock *, 8> NewBlocks;
369963401d2SDavid Green     // Maps Blocks[It] -> Blocks[It-1]
370963401d2SDavid Green     DenseMap<Value *, Value *> PrevItValueMap;
37170d4a202SDavid Green     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
37270d4a202SDavid Green     NewLoops[L] = L;
37370d4a202SDavid Green     NewLoops[SubLoop] = SubLoop;
374963401d2SDavid Green 
375963401d2SDavid Green     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
376963401d2SDavid Green       ValueToValueMapTy VMap;
377963401d2SDavid Green       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
378963401d2SDavid Green       Header->getParent()->getBasicBlockList().push_back(New);
379963401d2SDavid Green 
38070d4a202SDavid Green       // Tell LI about New.
38170d4a202SDavid Green       addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
382963401d2SDavid Green 
38370d4a202SDavid Green       if (ForeBlocks.count(*BB)) {
384963401d2SDavid Green         if (*BB == ForeBlocksFirst[0])
385963401d2SDavid Green           ForeBlocksFirst.push_back(New);
386963401d2SDavid Green         if (*BB == ForeBlocksLast[0])
387963401d2SDavid Green           ForeBlocksLast.push_back(New);
3882557e437SDavid Green       } else if (SubLoopBlocks.count(*BB)) {
389963401d2SDavid Green         if (*BB == SubLoopBlocksFirst[0])
390963401d2SDavid Green           SubLoopBlocksFirst.push_back(New);
391963401d2SDavid Green         if (*BB == SubLoopBlocksLast[0])
392963401d2SDavid Green           SubLoopBlocksLast.push_back(New);
3932557e437SDavid Green       } else if (AftBlocks.count(*BB)) {
394963401d2SDavid Green         if (*BB == AftBlocksFirst[0])
395963401d2SDavid Green           AftBlocksFirst.push_back(New);
396963401d2SDavid Green         if (*BB == AftBlocksLast[0])
397963401d2SDavid Green           AftBlocksLast.push_back(New);
398963401d2SDavid Green       } else {
399963401d2SDavid Green         llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
400963401d2SDavid Green       }
401963401d2SDavid Green 
402963401d2SDavid Green       // Update our running maps of newest clones
403963401d2SDavid Green       PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
404963401d2SDavid Green       LastValueMap[*BB] = New;
405963401d2SDavid Green       for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
406963401d2SDavid Green            VI != VE; ++VI) {
407963401d2SDavid Green         PrevItValueMap[VI->second] =
408963401d2SDavid Green             const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
409963401d2SDavid Green         LastValueMap[VI->first] = VI->second;
410963401d2SDavid Green       }
411963401d2SDavid Green 
412963401d2SDavid Green       NewBlocks.push_back(New);
413963401d2SDavid Green 
414963401d2SDavid Green       // Update DomTree:
415963401d2SDavid Green       if (*BB == ForeBlocksFirst[0])
416963401d2SDavid Green         DT->addNewBlock(New, ForeBlocksLast[It - 1]);
417963401d2SDavid Green       else if (*BB == SubLoopBlocksFirst[0])
418963401d2SDavid Green         DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
419963401d2SDavid Green       else if (*BB == AftBlocksFirst[0])
420963401d2SDavid Green         DT->addNewBlock(New, AftBlocksLast[It - 1]);
421963401d2SDavid Green       else {
422963401d2SDavid Green         // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
423963401d2SDavid Green         // structure.
424963401d2SDavid Green         auto BBDomNode = DT->getNode(*BB);
425963401d2SDavid Green         auto BBIDom = BBDomNode->getIDom();
426963401d2SDavid Green         BasicBlock *OriginalBBIDom = BBIDom->getBlock();
427963401d2SDavid Green         assert(OriginalBBIDom);
428963401d2SDavid Green         assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
429963401d2SDavid Green         DT->addNewBlock(
430963401d2SDavid Green             New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
431963401d2SDavid Green       }
432963401d2SDavid Green     }
433963401d2SDavid Green 
434963401d2SDavid Green     // Remap all instructions in the most recent iteration
4352b335e9aSWhitney Tsang     remapInstructionsInBlocks(NewBlocks, LastValueMap);
436963401d2SDavid Green     for (BasicBlock *NewBlock : NewBlocks) {
437963401d2SDavid Green       for (Instruction &I : *NewBlock) {
438a6d2a8d6SPhilip Reames         if (auto *II = dyn_cast<AssumeInst>(&I))
439963401d2SDavid Green           AC->registerAssumption(II);
440963401d2SDavid Green       }
441963401d2SDavid Green     }
442963401d2SDavid Green 
443963401d2SDavid Green     // Alter the ForeBlocks phi's, pointing them at the latest version of the
444963401d2SDavid Green     // value from the previous iteration's phis
445963401d2SDavid Green     for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
446963401d2SDavid Green       Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
447963401d2SDavid Green       assert(OldValue && "should have incoming edge from Aft[It]");
448963401d2SDavid Green       Value *NewValue = OldValue;
449963401d2SDavid Green       if (Value *PrevValue = PrevItValueMap[OldValue])
450963401d2SDavid Green         NewValue = PrevValue;
451963401d2SDavid Green 
452963401d2SDavid Green       assert(Phi.getNumOperands() == 2);
453963401d2SDavid Green       Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
454963401d2SDavid Green       Phi.setIncomingValue(0, NewValue);
455963401d2SDavid Green       Phi.removeIncomingValue(1);
456963401d2SDavid Green     }
457963401d2SDavid Green   }
458963401d2SDavid Green 
459963401d2SDavid Green   // Now that all the basic blocks for the unrolled iterations are in place,
460963401d2SDavid Green   // finish up connecting the blocks and phi nodes. At this point LastValueMap
461963401d2SDavid Green   // is the last unrolled iterations values.
462963401d2SDavid Green 
463963401d2SDavid Green   // Update Phis in BB from OldBB to point to NewBB and use the latest value
464963401d2SDavid Green   // from LastValueMap
465963401d2SDavid Green   auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
466963401d2SDavid Green                                      BasicBlock *NewBB,
467963401d2SDavid Green                                      ValueToValueMapTy &LastValueMap) {
468963401d2SDavid Green     for (PHINode &Phi : BB->phis()) {
469963401d2SDavid Green       for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
470963401d2SDavid Green         if (Phi.getIncomingBlock(b) == OldBB) {
471963401d2SDavid Green           Value *OldValue = Phi.getIncomingValue(b);
472963401d2SDavid Green           if (Value *LastValue = LastValueMap[OldValue])
473963401d2SDavid Green             Phi.setIncomingValue(b, LastValue);
474963401d2SDavid Green           Phi.setIncomingBlock(b, NewBB);
475963401d2SDavid Green           break;
476963401d2SDavid Green         }
477963401d2SDavid Green       }
478963401d2SDavid Green     }
479963401d2SDavid Green   };
480963401d2SDavid Green   // Move all the phis from Src into Dest
481963401d2SDavid Green   auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
482963401d2SDavid Green     Instruction *insertPoint = Dest->getFirstNonPHI();
483963401d2SDavid Green     while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
484963401d2SDavid Green       Phi->moveBefore(insertPoint);
485963401d2SDavid Green   };
486963401d2SDavid Green 
487963401d2SDavid Green   // Update the PHI values outside the loop to point to the last block
488963401d2SDavid Green   updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
489963401d2SDavid Green                            LastValueMap);
490963401d2SDavid Green 
491963401d2SDavid Green   // Update ForeBlocks successors and phi nodes
492963401d2SDavid Green   BranchInst *ForeTerm =
493963401d2SDavid Green       cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
494cd0cff43SWhitney Tsang   assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
495cd0cff43SWhitney Tsang   ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]);
496963401d2SDavid Green 
497963401d2SDavid Green   if (CompletelyUnroll) {
498963401d2SDavid Green     while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
499963401d2SDavid Green       Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
500963401d2SDavid Green       Phi->getParent()->getInstList().erase(Phi);
501963401d2SDavid Green     }
502963401d2SDavid Green   } else {
503963401d2SDavid Green     // Update the PHI values to point to the last aft block
504963401d2SDavid Green     updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
505963401d2SDavid Green                              AftBlocksLast.back(), LastValueMap);
506963401d2SDavid Green   }
507963401d2SDavid Green 
508963401d2SDavid Green   for (unsigned It = 1; It != Count; It++) {
509963401d2SDavid Green     // Remap ForeBlock successors from previous iteration to this
510963401d2SDavid Green     BranchInst *ForeTerm =
511963401d2SDavid Green         cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
512cd0cff43SWhitney Tsang     assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
513cd0cff43SWhitney Tsang     ForeTerm->setSuccessor(0, ForeBlocksFirst[It]);
514963401d2SDavid Green   }
515963401d2SDavid Green 
516963401d2SDavid Green   // Subloop successors and phis
517963401d2SDavid Green   BranchInst *SubTerm =
518963401d2SDavid Green       cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
519963401d2SDavid Green   SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
520963401d2SDavid Green   SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
521aa994d98SWhitney Tsang   SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
522963401d2SDavid Green                                             ForeBlocksLast.back());
523aa994d98SWhitney Tsang   SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
524963401d2SDavid Green                                             SubLoopBlocksLast.back());
525963401d2SDavid Green 
526963401d2SDavid Green   for (unsigned It = 1; It != Count; It++) {
527963401d2SDavid Green     // Replace the conditional branch of the previous iteration subloop with an
528963401d2SDavid Green     // unconditional one to this one
529963401d2SDavid Green     BranchInst *SubTerm =
530963401d2SDavid Green         cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
531963401d2SDavid Green     BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
532963401d2SDavid Green     SubTerm->eraseFromParent();
533963401d2SDavid Green 
534aa994d98SWhitney Tsang     SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
535963401d2SDavid Green                                                ForeBlocksLast.back());
536aa994d98SWhitney Tsang     SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
537963401d2SDavid Green                                                SubLoopBlocksLast.back());
538963401d2SDavid Green     movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
539963401d2SDavid Green   }
540963401d2SDavid Green 
541963401d2SDavid Green   // Aft blocks successors and phis
542cd0cff43SWhitney Tsang   BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
543963401d2SDavid Green   if (CompletelyUnroll) {
544cd0cff43SWhitney Tsang     BranchInst::Create(LoopExit, AftTerm);
545cd0cff43SWhitney Tsang     AftTerm->eraseFromParent();
546963401d2SDavid Green   } else {
547cd0cff43SWhitney Tsang     AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
548cd0cff43SWhitney Tsang     assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
549cd0cff43SWhitney Tsang            "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
550963401d2SDavid Green   }
551aa994d98SWhitney Tsang   AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
552963401d2SDavid Green                                         SubLoopBlocksLast.back());
553963401d2SDavid Green 
554963401d2SDavid Green   for (unsigned It = 1; It != Count; It++) {
555963401d2SDavid Green     // Replace the conditional branch of the previous iteration subloop with an
556963401d2SDavid Green     // unconditional one to this one
557963401d2SDavid Green     BranchInst *AftTerm =
558963401d2SDavid Green         cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
559963401d2SDavid Green     BranchInst::Create(AftBlocksFirst[It], AftTerm);
560963401d2SDavid Green     AftTerm->eraseFromParent();
561963401d2SDavid Green 
562aa994d98SWhitney Tsang     AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
563963401d2SDavid Green                                            SubLoopBlocksLast.back());
564963401d2SDavid Green     movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
565963401d2SDavid Green   }
566963401d2SDavid Green 
567f9cdb98fSFlorian Hahn   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
568963401d2SDavid Green   // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
569963401d2SDavid Green   // new ones required.
570963401d2SDavid Green   if (Count != 1) {
571963401d2SDavid Green     SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
572963401d2SDavid Green     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
573963401d2SDavid Green                            SubLoopBlocksFirst[0]);
574963401d2SDavid Green     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
575963401d2SDavid Green                            SubLoopBlocksLast[0], AftBlocksFirst[0]);
576963401d2SDavid Green 
577963401d2SDavid Green     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
578963401d2SDavid Green                            ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
579963401d2SDavid Green     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
580963401d2SDavid Green                            SubLoopBlocksLast.back(), AftBlocksFirst[0]);
581f9cdb98fSFlorian Hahn     DTU.applyUpdatesPermissive(DTUpdates);
582963401d2SDavid Green   }
583963401d2SDavid Green 
584963401d2SDavid Green   // Merge adjacent basic blocks, if possible.
585963401d2SDavid Green   SmallPtrSet<BasicBlock *, 16> MergeBlocks;
586963401d2SDavid Green   MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
587963401d2SDavid Green   MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
588963401d2SDavid Green   MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
58915b6730fSSidharth Baveja 
59015b6730fSSidharth Baveja   MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI);
59115b6730fSSidharth Baveja 
592f9cdb98fSFlorian Hahn   // Apply updates to the DomTree.
593f9cdb98fSFlorian Hahn   DT = &DTU.getDomTree();
594963401d2SDavid Green 
595963401d2SDavid Green   // At this point, the code is well formed.  We now do a quick sweep over the
596963401d2SDavid Green   // inserted code, doing constant propagation and dead code elimination as we
597963401d2SDavid Green   // go.
5980789f280SRoman Lebedev   simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI);
5990789f280SRoman Lebedev   simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC,
6000789f280SRoman Lebedev                           TTI);
601963401d2SDavid Green 
602963401d2SDavid Green   NumCompletelyUnrolledAndJammed += CompletelyUnroll;
603963401d2SDavid Green   ++NumUnrolledAndJammed;
604963401d2SDavid Green 
605cd0cff43SWhitney Tsang   // Update LoopInfo if the loop is completely removed.
606e77d24f7Smaekawatoshiki   if (CompletelyUnroll)
607cd0cff43SWhitney Tsang     LI->erase(L);
608cd0cff43SWhitney Tsang 
609963401d2SDavid Green #ifndef NDEBUG
610963401d2SDavid Green   // We shouldn't have done anything to break loop simplify form or LCSSA.
611cd0cff43SWhitney Tsang   Loop *OutestLoop = SubLoop->getParentLoop()
612cd0cff43SWhitney Tsang                          ? SubLoop->getParentLoop()->getParentLoop()
613cd0cff43SWhitney Tsang                                ? SubLoop->getParentLoop()->getParentLoop()
614cd0cff43SWhitney Tsang                                : SubLoop->getParentLoop()
615cd0cff43SWhitney Tsang                          : SubLoop;
616cd0cff43SWhitney Tsang   assert(DT->verify());
617cd0cff43SWhitney Tsang   LI->verify(*DT);
618963401d2SDavid Green   assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
619963401d2SDavid Green   if (!CompletelyUnroll)
620963401d2SDavid Green     assert(L->isLoopSimplifyForm());
621963401d2SDavid Green   assert(SubLoop->isLoopSimplifyForm());
622cd0cff43SWhitney Tsang   SE->verify();
623963401d2SDavid Green #endif
624963401d2SDavid Green 
625963401d2SDavid Green   return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
626963401d2SDavid Green                           : LoopUnrollResult::PartiallyUnrolled;
627963401d2SDavid Green }
628963401d2SDavid Green 
getLoadsAndStores(BasicBlockSet & Blocks,SmallVector<Instruction *,4> & MemInstr)6292557e437SDavid Green static bool getLoadsAndStores(BasicBlockSet &Blocks,
6300a52401aSWhitney Tsang                               SmallVector<Instruction *, 4> &MemInstr) {
631963401d2SDavid Green   // Scan the BBs and collect legal loads and stores.
632963401d2SDavid Green   // Returns false if non-simple loads/stores are found.
633963401d2SDavid Green   for (BasicBlock *BB : Blocks) {
634963401d2SDavid Green     for (Instruction &I : *BB) {
635963401d2SDavid Green       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
636963401d2SDavid Green         if (!Ld->isSimple())
637963401d2SDavid Green           return false;
638963401d2SDavid Green         MemInstr.push_back(&I);
639963401d2SDavid Green       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
640963401d2SDavid Green         if (!St->isSimple())
641963401d2SDavid Green           return false;
642963401d2SDavid Green         MemInstr.push_back(&I);
643963401d2SDavid Green       } else if (I.mayReadOrWriteMemory()) {
644963401d2SDavid Green         return false;
645963401d2SDavid Green       }
646963401d2SDavid Green     }
647963401d2SDavid Green   }
648963401d2SDavid Green   return true;
649963401d2SDavid Green }
650963401d2SDavid Green 
preservesForwardDependence(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,Dependence * D)6510a52401aSWhitney Tsang static bool preservesForwardDependence(Instruction *Src, Instruction *Dst,
6520a52401aSWhitney Tsang                                        unsigned UnrollLevel, unsigned JamLevel,
6530a52401aSWhitney Tsang                                        bool Sequentialized, Dependence *D) {
6540a52401aSWhitney Tsang   // UnrollLevel might carry the dependency Src --> Dst
6550a52401aSWhitney Tsang   // Does a different loop after unrolling?
6560a52401aSWhitney Tsang   for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6570a52401aSWhitney Tsang        ++CurLoopDepth) {
6580a52401aSWhitney Tsang     auto JammedDir = D->getDirection(CurLoopDepth);
6590a52401aSWhitney Tsang     if (JammedDir == Dependence::DVEntry::LT)
6600a52401aSWhitney Tsang       return true;
6610a52401aSWhitney Tsang 
6620a52401aSWhitney Tsang     if (JammedDir & Dependence::DVEntry::GT)
6630a52401aSWhitney Tsang       return false;
6640a52401aSWhitney Tsang   }
6650a52401aSWhitney Tsang 
6660a52401aSWhitney Tsang   return true;
6670a52401aSWhitney Tsang }
6680a52401aSWhitney Tsang 
preservesBackwardDependence(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,Dependence * D)6690a52401aSWhitney Tsang static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst,
6700a52401aSWhitney Tsang                                         unsigned UnrollLevel, unsigned JamLevel,
6710a52401aSWhitney Tsang                                         bool Sequentialized, Dependence *D) {
6720a52401aSWhitney Tsang   // UnrollLevel might carry the dependency Dst --> Src
6730a52401aSWhitney Tsang   for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6740a52401aSWhitney Tsang        ++CurLoopDepth) {
6750a52401aSWhitney Tsang     auto JammedDir = D->getDirection(CurLoopDepth);
6760a52401aSWhitney Tsang     if (JammedDir == Dependence::DVEntry::GT)
6770a52401aSWhitney Tsang       return true;
6780a52401aSWhitney Tsang 
6790a52401aSWhitney Tsang     if (JammedDir & Dependence::DVEntry::LT)
6800a52401aSWhitney Tsang       return false;
6810a52401aSWhitney Tsang   }
6820a52401aSWhitney Tsang 
6830a52401aSWhitney Tsang   // Backward dependencies are only preserved if not interleaved.
6840a52401aSWhitney Tsang   return Sequentialized;
6850a52401aSWhitney Tsang }
6860a52401aSWhitney Tsang 
6870a52401aSWhitney Tsang // Check whether it is semantically safe Src and Dst considering any potential
6880a52401aSWhitney Tsang // dependency between them.
6890a52401aSWhitney Tsang //
6900a52401aSWhitney Tsang // @param UnrollLevel The level of the loop being unrolled
6910a52401aSWhitney Tsang // @param JamLevel    The level of the loop being jammed; if Src and Dst are on
6920a52401aSWhitney Tsang // different levels, the outermost common loop counts as jammed level
6930a52401aSWhitney Tsang //
6940a52401aSWhitney Tsang // @return true if is safe and false if there is a dependency violation.
checkDependency(Instruction * Src,Instruction * Dst,unsigned UnrollLevel,unsigned JamLevel,bool Sequentialized,DependenceInfo & DI)6950a52401aSWhitney Tsang static bool checkDependency(Instruction *Src, Instruction *Dst,
6960a52401aSWhitney Tsang                             unsigned UnrollLevel, unsigned JamLevel,
6970a52401aSWhitney Tsang                             bool Sequentialized, DependenceInfo &DI) {
6980a52401aSWhitney Tsang   assert(UnrollLevel <= JamLevel &&
6990a52401aSWhitney Tsang          "Expecting JamLevel to be at least UnrollLevel");
7000a52401aSWhitney Tsang 
701963401d2SDavid Green   if (Src == Dst)
7020a52401aSWhitney Tsang     return true;
703963401d2SDavid Green   // Ignore Input dependencies.
704963401d2SDavid Green   if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
7050a52401aSWhitney Tsang     return true;
706963401d2SDavid Green 
7070a52401aSWhitney Tsang   // Check whether unroll-and-jam may violate a dependency.
7080a52401aSWhitney Tsang   // By construction, every dependency will be lexicographically non-negative
7090a52401aSWhitney Tsang   // (if it was, it would violate the current execution order), such as
7100a52401aSWhitney Tsang   //   (0,0,>,*,*)
7110a52401aSWhitney Tsang   // Unroll-and-jam changes the GT execution of two executions to the same
7120a52401aSWhitney Tsang   // iteration of the chosen unroll level. That is, a GT dependence becomes a GE
7130a52401aSWhitney Tsang   // dependence (or EQ, if we fully unrolled the loop) at the loop's position:
7140a52401aSWhitney Tsang   //   (0,0,>=,*,*)
7150a52401aSWhitney Tsang   // Now, the dependency is not necessarily non-negative anymore, i.e.
7160a52401aSWhitney Tsang   // unroll-and-jam may violate correctness.
7170a52401aSWhitney Tsang   std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true);
7180a52401aSWhitney Tsang   if (!D)
7190a52401aSWhitney Tsang     return true;
720963401d2SDavid Green   assert(D->isOrdered() && "Expected an output, flow or anti dep.");
721963401d2SDavid Green 
722bc2e1c3aSDavid Green   if (D->isConfused()) {
723bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "  Confused dependency between:\n"
724bc2e1c3aSDavid Green                       << "  " << *Src << "\n"
725bc2e1c3aSDavid Green                       << "  " << *Dst << "\n");
726963401d2SDavid Green     return false;
727bc2e1c3aSDavid Green   }
7280a52401aSWhitney Tsang 
7290a52401aSWhitney Tsang   // If outer levels (levels enclosing the loop being unroll-and-jammed) have a
7300a52401aSWhitney Tsang   // non-equal direction, then the locations accessed in the inner levels cannot
7310a52401aSWhitney Tsang   // overlap in memory. We assumes the indexes never overlap into neighboring
7320a52401aSWhitney Tsang   // dimensions.
7330a52401aSWhitney Tsang   for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
7340a52401aSWhitney Tsang     if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ))
7350a52401aSWhitney Tsang       return true;
7360a52401aSWhitney Tsang 
7370a52401aSWhitney Tsang   auto UnrollDirection = D->getDirection(UnrollLevel);
7380a52401aSWhitney Tsang 
7390a52401aSWhitney Tsang   // If the distance carried by the unrolled loop is 0, then after unrolling
7400a52401aSWhitney Tsang   // that distance will become non-zero resulting in non-overlapping accesses in
7410a52401aSWhitney Tsang   // the inner loops.
7420a52401aSWhitney Tsang   if (UnrollDirection == Dependence::DVEntry::EQ)
7430a52401aSWhitney Tsang     return true;
7440a52401aSWhitney Tsang 
7450a52401aSWhitney Tsang   if (UnrollDirection & Dependence::DVEntry::LT &&
7460a52401aSWhitney Tsang       !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel,
7470a52401aSWhitney Tsang                                   Sequentialized, D.get()))
748963401d2SDavid Green     return false;
7490a52401aSWhitney Tsang 
7500a52401aSWhitney Tsang   if (UnrollDirection & Dependence::DVEntry::GT &&
7510a52401aSWhitney Tsang       !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel,
7520a52401aSWhitney Tsang                                    Sequentialized, D.get()))
7530a52401aSWhitney Tsang     return false;
7540a52401aSWhitney Tsang 
7550a52401aSWhitney Tsang   return true;
756bc2e1c3aSDavid Green }
7570a52401aSWhitney Tsang 
7580a52401aSWhitney Tsang static bool
checkDependencies(Loop & Root,const BasicBlockSet & SubLoopBlocks,const DenseMap<Loop *,BasicBlockSet> & ForeBlocksMap,const DenseMap<Loop *,BasicBlockSet> & AftBlocksMap,DependenceInfo & DI,LoopInfo & LI)7590a52401aSWhitney Tsang checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
7600a52401aSWhitney Tsang                   const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
7610a52401aSWhitney Tsang                   const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap,
7620a52401aSWhitney Tsang                   DependenceInfo &DI, LoopInfo &LI) {
7630a52401aSWhitney Tsang   SmallVector<BasicBlockSet, 8> AllBlocks;
7640a52401aSWhitney Tsang   for (Loop *L : Root.getLoopsInPreorder())
7650a52401aSWhitney Tsang     if (ForeBlocksMap.find(L) != ForeBlocksMap.end())
7660a52401aSWhitney Tsang       AllBlocks.push_back(ForeBlocksMap.lookup(L));
7670a52401aSWhitney Tsang   AllBlocks.push_back(SubLoopBlocks);
7680a52401aSWhitney Tsang   for (Loop *L : Root.getLoopsInPreorder())
7690a52401aSWhitney Tsang     if (AftBlocksMap.find(L) != AftBlocksMap.end())
7700a52401aSWhitney Tsang       AllBlocks.push_back(AftBlocksMap.lookup(L));
7710a52401aSWhitney Tsang 
7720a52401aSWhitney Tsang   unsigned LoopDepth = Root.getLoopDepth();
7730a52401aSWhitney Tsang   SmallVector<Instruction *, 4> EarlierLoadsAndStores;
7740a52401aSWhitney Tsang   SmallVector<Instruction *, 4> CurrentLoadsAndStores;
7750a52401aSWhitney Tsang   for (BasicBlockSet &Blocks : AllBlocks) {
7760a52401aSWhitney Tsang     CurrentLoadsAndStores.clear();
7770a52401aSWhitney Tsang     if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores))
7780a52401aSWhitney Tsang       return false;
7790a52401aSWhitney Tsang 
7800a52401aSWhitney Tsang     Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent());
7810a52401aSWhitney Tsang     unsigned CurLoopDepth = CurLoop->getLoopDepth();
7820a52401aSWhitney Tsang 
7830a52401aSWhitney Tsang     for (auto *Earlier : EarlierLoadsAndStores) {
7840a52401aSWhitney Tsang       Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent());
7850a52401aSWhitney Tsang       unsigned EarlierDepth = EarlierLoop->getLoopDepth();
7860a52401aSWhitney Tsang       unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
7870a52401aSWhitney Tsang       for (auto *Later : CurrentLoadsAndStores) {
7880a52401aSWhitney Tsang         if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false,
7890a52401aSWhitney Tsang                              DI))
790963401d2SDavid Green           return false;
791963401d2SDavid Green       }
792963401d2SDavid Green     }
7930a52401aSWhitney Tsang 
7940a52401aSWhitney Tsang     size_t NumInsts = CurrentLoadsAndStores.size();
7950a52401aSWhitney Tsang     for (size_t I = 0; I < NumInsts; ++I) {
7960a52401aSWhitney Tsang       for (size_t J = I; J < NumInsts; ++J) {
7970a52401aSWhitney Tsang         if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J],
7980a52401aSWhitney Tsang                              LoopDepth, CurLoopDepth, true, DI))
7990a52401aSWhitney Tsang           return false;
800963401d2SDavid Green       }
801963401d2SDavid Green     }
8020a52401aSWhitney Tsang 
8030a52401aSWhitney Tsang     EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
8040a52401aSWhitney Tsang                                  CurrentLoadsAndStores.end());
805bc2e1c3aSDavid Green   }
806963401d2SDavid Green   return true;
807963401d2SDavid Green }
808963401d2SDavid Green 
isEligibleLoopForm(const Loop & Root)8090a52401aSWhitney Tsang static bool isEligibleLoopForm(const Loop &Root) {
8100a52401aSWhitney Tsang   // Root must have a child.
8110a52401aSWhitney Tsang   if (Root.getSubLoops().size() != 1)
812963401d2SDavid Green     return false;
813963401d2SDavid Green 
8140a52401aSWhitney Tsang   const Loop *L = &Root;
8150a52401aSWhitney Tsang   do {
8160a52401aSWhitney Tsang     // All loops in Root need to be in simplify and rotated form.
8170a52401aSWhitney Tsang     if (!L->isLoopSimplifyForm())
8180a52401aSWhitney Tsang       return false;
8190a52401aSWhitney Tsang 
8200a52401aSWhitney Tsang     if (!L->isRotatedForm())
8210a52401aSWhitney Tsang       return false;
8220a52401aSWhitney Tsang 
8230a52401aSWhitney Tsang     if (L->getHeader()->hasAddressTaken()) {
8240a52401aSWhitney Tsang       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
8250a52401aSWhitney Tsang       return false;
8260a52401aSWhitney Tsang     }
8270a52401aSWhitney Tsang 
8280a52401aSWhitney Tsang     unsigned SubLoopsSize = L->getSubLoops().size();
8290a52401aSWhitney Tsang     if (SubLoopsSize == 0)
8300a52401aSWhitney Tsang       return true;
8310a52401aSWhitney Tsang 
8320a52401aSWhitney Tsang     // Only one child is allowed.
8330a52401aSWhitney Tsang     if (SubLoopsSize != 1)
8340a52401aSWhitney Tsang       return false;
8350a52401aSWhitney Tsang 
83622ebbc47SSidharth Baveja     // Only loops with a single exit block can be unrolled and jammed.
83722ebbc47SSidharth Baveja     // The function getExitBlock() is used for this check, rather than
83822ebbc47SSidharth Baveja     // getUniqueExitBlock() to ensure loops with mulitple exit edges are
83922ebbc47SSidharth Baveja     // disallowed.
84022ebbc47SSidharth Baveja     if (!L->getExitBlock()) {
84122ebbc47SSidharth Baveja       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit "
84222ebbc47SSidharth Baveja                            "blocks can be unrolled and jammed.\n");
84322ebbc47SSidharth Baveja       return false;
84422ebbc47SSidharth Baveja     }
84522ebbc47SSidharth Baveja 
84622ebbc47SSidharth Baveja     // Only loops with a single exiting block can be unrolled and jammed.
84722ebbc47SSidharth Baveja     if (!L->getExitingBlock()) {
84822ebbc47SSidharth Baveja       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single "
84922ebbc47SSidharth Baveja                            "exiting blocks can be unrolled and jammed.\n");
85022ebbc47SSidharth Baveja       return false;
85122ebbc47SSidharth Baveja     }
85222ebbc47SSidharth Baveja 
8530a52401aSWhitney Tsang     L = L->getSubLoops()[0];
8540a52401aSWhitney Tsang   } while (L);
8550a52401aSWhitney Tsang 
8560a52401aSWhitney Tsang   return true;
8570a52401aSWhitney Tsang }
8580a52401aSWhitney Tsang 
getInnerMostLoop(Loop * L)8590a52401aSWhitney Tsang static Loop *getInnerMostLoop(Loop *L) {
8600a52401aSWhitney Tsang   while (!L->getSubLoops().empty())
8610a52401aSWhitney Tsang     L = L->getSubLoops()[0];
8620a52401aSWhitney Tsang   return L;
863963401d2SDavid Green }
864963401d2SDavid Green 
isSafeToUnrollAndJam(Loop * L,ScalarEvolution & SE,DominatorTree & DT,DependenceInfo & DI,LoopInfo & LI)865963401d2SDavid Green bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
8660a52401aSWhitney Tsang                                 DependenceInfo &DI, LoopInfo &LI) {
8670a52401aSWhitney Tsang   if (!isEligibleLoopForm(*L)) {
8680a52401aSWhitney Tsang     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n");
8690a52401aSWhitney Tsang     return false;
8700a52401aSWhitney Tsang   }
8710a52401aSWhitney Tsang 
872963401d2SDavid Green   /* We currently handle outer loops like this:
873963401d2SDavid Green         |
8740a52401aSWhitney Tsang     ForeFirst    <------\   }
8750a52401aSWhitney Tsang      Blocks             |   } ForeBlocks of L
876963401d2SDavid Green     ForeLast            |   }
877963401d2SDavid Green         |               |
8780a52401aSWhitney Tsang        ...              |
8790a52401aSWhitney Tsang         |               |
8800a52401aSWhitney Tsang     ForeFirst    <----\ |   }
8810a52401aSWhitney Tsang      Blocks           | |   } ForeBlocks of a inner loop of L
8820a52401aSWhitney Tsang     ForeLast          | |   }
8830a52401aSWhitney Tsang         |             | |
8840a52401aSWhitney Tsang     JamLoopFirst  <\  | |   }
8850a52401aSWhitney Tsang      Blocks        |  | |   } JamLoopBlocks of the innermost loop
8860a52401aSWhitney Tsang     JamLoopLast   -/  | |   }
8870a52401aSWhitney Tsang         |             | |
8880a52401aSWhitney Tsang     AftFirst          | |   }
8890a52401aSWhitney Tsang      Blocks           | |   } AftBlocks of a inner loop of L
8900a52401aSWhitney Tsang     AftLast     ------/ |   }
8910a52401aSWhitney Tsang         |               |
8920a52401aSWhitney Tsang        ...              |
893963401d2SDavid Green         |               |
894963401d2SDavid Green     AftFirst            |   }
8950a52401aSWhitney Tsang      Blocks             |   } AftBlocks of L
8960a52401aSWhitney Tsang     AftLast     --------/   }
897963401d2SDavid Green         |
898963401d2SDavid Green 
899963401d2SDavid Green     There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
900963401d2SDavid Green     and AftBlocks, providing that there is one edge from Fores to SubLoops,
901963401d2SDavid Green     one edge from SubLoops to Afts and a single outer loop exit (from Afts).
902963401d2SDavid Green     In practice we currently limit Aft blocks to a single block, and limit
903963401d2SDavid Green     things further in the profitablility checks of the unroll and jam pass.
904963401d2SDavid Green 
905963401d2SDavid Green     Because of the way we rearrange basic blocks, we also require that
9060a52401aSWhitney Tsang     the Fore blocks of L on all unrolled iterations are safe to move before the
9070a52401aSWhitney Tsang     blocks of the direct child of L of all iterations. So we require that the
9080a52401aSWhitney Tsang     phi node looping operands of ForeHeader can be moved to at least the end of
9090a52401aSWhitney Tsang     ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and
9100a52401aSWhitney Tsang     match up Phi's correctly.
911963401d2SDavid Green 
9120a52401aSWhitney Tsang     i.e. The old order of blocks used to be
9130a52401aSWhitney Tsang            (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2.
9140a52401aSWhitney Tsang          It needs to be safe to transform this to
9150a52401aSWhitney Tsang            (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2.
916963401d2SDavid Green 
917963401d2SDavid Green     There are then a number of checks along the lines of no calls, no
918963401d2SDavid Green     exceptions, inner loop IV is consistent, etc. Note that for loops requiring
919963401d2SDavid Green     runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
920963401d2SDavid Green     UnrollAndJamLoop if the trip count cannot be easily calculated.
921963401d2SDavid Green   */
922963401d2SDavid Green 
923963401d2SDavid Green   // Split blocks into Fore/SubLoop/Aft based on dominators
9240a52401aSWhitney Tsang   Loop *JamLoop = getInnerMostLoop(L);
9252557e437SDavid Green   BasicBlockSet SubLoopBlocks;
9260a52401aSWhitney Tsang   DenseMap<Loop *, BasicBlockSet> ForeBlocksMap;
9270a52401aSWhitney Tsang   DenseMap<Loop *, BasicBlockSet> AftBlocksMap;
9280a52401aSWhitney Tsang   if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap,
9290a52401aSWhitney Tsang                                 AftBlocksMap, DT)) {
930bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
931963401d2SDavid Green     return false;
932bc2e1c3aSDavid Green   }
933963401d2SDavid Green 
934963401d2SDavid Green   // Aft blocks may need to move instructions to fore blocks, which becomes more
935963401d2SDavid Green   // difficult if there are multiple (potentially conditionally executed)
936963401d2SDavid Green   // blocks. For now we just exclude loops with multiple aft blocks.
9370a52401aSWhitney Tsang   if (AftBlocksMap[L].size() != 1) {
938bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
939bc2e1c3aSDavid Green                          "multiple blocks after the loop\n");
940963401d2SDavid Green     return false;
941bc2e1c3aSDavid Green   }
942963401d2SDavid Green 
943bc2e1c3aSDavid Green   // Check inner loop backedge count is consistent on all iterations of the
944bc2e1c3aSDavid Green   // outer loop
9450a52401aSWhitney Tsang   if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) {
9460a52401aSWhitney Tsang         return !hasIterationCountInvariantInParent(SubLoop, SE);
9470a52401aSWhitney Tsang       })) {
948bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
949bc2e1c3aSDavid Green                          "not consistent on each iteration\n");
950bc2e1c3aSDavid Green     return false;
951bc2e1c3aSDavid Green   }
952963401d2SDavid Green 
953963401d2SDavid Green   // Check the loop safety info for exceptions.
9549c90ec2fSMax Kazantsev   SimpleLoopSafetyInfo LSI;
955530b8d1cSMax Kazantsev   LSI.computeLoopSafetyInfo(L);
956530b8d1cSMax Kazantsev   if (LSI.anyBlockMayThrow()) {
957bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
958963401d2SDavid Green     return false;
959bc2e1c3aSDavid Green   }
960963401d2SDavid Green 
961963401d2SDavid Green   // We've ruled out the easy stuff and now need to check that there are no
962963401d2SDavid Green   // interdependencies which may prevent us from moving the:
963963401d2SDavid Green   //  ForeBlocks before Subloop and AftBlocks.
964963401d2SDavid Green   //  Subloop before AftBlocks.
965963401d2SDavid Green   //  ForeBlock phi operands before the subloop
966963401d2SDavid Green 
967963401d2SDavid Green   // Make sure we can move all instructions we need to before the subloop
9680a52401aSWhitney Tsang   BasicBlock *Header = L->getHeader();
9690a52401aSWhitney Tsang   BasicBlock *Latch = L->getLoopLatch();
9700a52401aSWhitney Tsang   BasicBlockSet AftBlocks = AftBlocksMap[L];
9710a52401aSWhitney Tsang   Loop *SubLoop = L->getSubLoops()[0];
972eda3c9efSDavid Green   if (!processHeaderPhiOperands(
973eda3c9efSDavid Green           Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
974963401d2SDavid Green             if (SubLoop->contains(I->getParent()))
975963401d2SDavid Green               return false;
9762557e437SDavid Green             if (AftBlocks.count(I->getParent())) {
977eda3c9efSDavid Green               // If we hit a phi node in afts we know we are done (probably
978eda3c9efSDavid Green               // LCSSA)
979963401d2SDavid Green               if (isa<PHINode>(I))
980963401d2SDavid Green                 return false;
981eda3c9efSDavid Green               // Can't move instructions with side effects or memory
982eda3c9efSDavid Green               // reads/writes
983963401d2SDavid Green               if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
984963401d2SDavid Green                 return false;
985963401d2SDavid Green             }
986eda3c9efSDavid Green             // Keep going
987eda3c9efSDavid Green             return true;
988bc2e1c3aSDavid Green           })) {
989bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
990bc2e1c3aSDavid Green                          "instructions after subloop to before it\n");
991eda3c9efSDavid Green     return false;
992bc2e1c3aSDavid Green   }
993963401d2SDavid Green 
994963401d2SDavid Green   // Check for memory dependencies which prohibit the unrolling we are doing.
995963401d2SDavid Green   // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
996963401d2SDavid Green   // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
9970a52401aSWhitney Tsang   if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI,
9980a52401aSWhitney Tsang                          LI)) {
999bc2e1c3aSDavid Green     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
1000963401d2SDavid Green     return false;
1001bc2e1c3aSDavid Green   }
1002963401d2SDavid Green 
1003963401d2SDavid Green   return true;
1004963401d2SDavid Green }
1005