14ba319b5SDimitry Andric //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
24ba319b5SDimitry Andric //
34ba319b5SDimitry Andric //                     The LLVM Compiler Infrastructure
44ba319b5SDimitry Andric //
54ba319b5SDimitry Andric // This file is distributed under the University of Illinois Open Source
64ba319b5SDimitry Andric // License. See LICENSE.TXT for details.
74ba319b5SDimitry Andric //
84ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
94ba319b5SDimitry Andric //
104ba319b5SDimitry Andric // This file implements loop unroll and jam as a routine, much like
114ba319b5SDimitry Andric // LoopUnroll.cpp implements loop unroll.
124ba319b5SDimitry Andric //
134ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
144ba319b5SDimitry Andric 
154ba319b5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
164ba319b5SDimitry Andric #include "llvm/ADT/Statistic.h"
174ba319b5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
184ba319b5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
194ba319b5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
204ba319b5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
214ba319b5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
224ba319b5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
234ba319b5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
244ba319b5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
254ba319b5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpander.h"
264ba319b5SDimitry Andric #include "llvm/Analysis/Utils/Local.h"
274ba319b5SDimitry Andric #include "llvm/IR/BasicBlock.h"
284ba319b5SDimitry Andric #include "llvm/IR/DataLayout.h"
294ba319b5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
304ba319b5SDimitry Andric #include "llvm/IR/Dominators.h"
314ba319b5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
324ba319b5SDimitry Andric #include "llvm/IR/LLVMContext.h"
334ba319b5SDimitry Andric #include "llvm/Support/Debug.h"
344ba319b5SDimitry Andric #include "llvm/Support/raw_ostream.h"
354ba319b5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
364ba319b5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
374ba319b5SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h"
384ba319b5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
394ba319b5SDimitry Andric #include "llvm/Transforms/Utils/SimplifyIndVar.h"
404ba319b5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
414ba319b5SDimitry Andric using namespace llvm;
424ba319b5SDimitry Andric 
434ba319b5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam"
444ba319b5SDimitry Andric 
454ba319b5SDimitry Andric STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
464ba319b5SDimitry Andric STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
474ba319b5SDimitry Andric 
484ba319b5SDimitry Andric typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
494ba319b5SDimitry Andric 
504ba319b5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after
514ba319b5SDimitry Andric // the loop
partitionOuterLoopBlocks(Loop * L,Loop * SubLoop,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DominatorTree * DT)524ba319b5SDimitry Andric static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
534ba319b5SDimitry Andric                                      BasicBlockSet &ForeBlocks,
544ba319b5SDimitry Andric                                      BasicBlockSet &SubLoopBlocks,
554ba319b5SDimitry Andric                                      BasicBlockSet &AftBlocks,
564ba319b5SDimitry Andric                                      DominatorTree *DT) {
574ba319b5SDimitry Andric   BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
584ba319b5SDimitry Andric   SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
594ba319b5SDimitry Andric 
604ba319b5SDimitry Andric   for (BasicBlock *BB : L->blocks()) {
614ba319b5SDimitry Andric     if (!SubLoop->contains(BB)) {
624ba319b5SDimitry Andric       if (DT->dominates(SubLoopLatch, BB))
634ba319b5SDimitry Andric         AftBlocks.insert(BB);
644ba319b5SDimitry Andric       else
654ba319b5SDimitry Andric         ForeBlocks.insert(BB);
664ba319b5SDimitry Andric     }
674ba319b5SDimitry Andric   }
684ba319b5SDimitry Andric 
694ba319b5SDimitry Andric   // Check that all blocks in ForeBlocks together dominate the subloop
704ba319b5SDimitry Andric   // TODO: This might ideally be done better with a dominator/postdominators.
714ba319b5SDimitry Andric   BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
724ba319b5SDimitry Andric   for (BasicBlock *BB : ForeBlocks) {
734ba319b5SDimitry Andric     if (BB == SubLoopPreHeader)
744ba319b5SDimitry Andric       continue;
75*b5893f02SDimitry Andric     Instruction *TI = BB->getTerminator();
764ba319b5SDimitry Andric     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
774ba319b5SDimitry Andric       if (!ForeBlocks.count(TI->getSuccessor(i)))
784ba319b5SDimitry Andric         return false;
794ba319b5SDimitry Andric   }
804ba319b5SDimitry Andric 
814ba319b5SDimitry Andric   return true;
824ba319b5SDimitry Andric }
834ba319b5SDimitry Andric 
844ba319b5SDimitry Andric // Looks at the phi nodes in Header for values coming from Latch. For these
854ba319b5SDimitry Andric // instructions and all their operands calls Visit on them, keeping going for
864ba319b5SDimitry Andric // all the operands in AftBlocks. Returns false if Visit returns false,
874ba319b5SDimitry Andric // otherwise returns true. This is used to process the instructions in the
884ba319b5SDimitry Andric // Aft blocks that need to be moved before the subloop. It is used in two
894ba319b5SDimitry Andric // places. One to check that the required set of instructions can be moved
904ba319b5SDimitry Andric // before the loop. Then to collect the instructions to actually move in
914ba319b5SDimitry Andric // moveHeaderPhiOperandsToForeBlocks.
924ba319b5SDimitry Andric template <typename T>
processHeaderPhiOperands(BasicBlock * Header,BasicBlock * Latch,BasicBlockSet & AftBlocks,T Visit)934ba319b5SDimitry Andric static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
944ba319b5SDimitry Andric                                      BasicBlockSet &AftBlocks, T Visit) {
954ba319b5SDimitry Andric   SmallVector<Instruction *, 8> Worklist;
964ba319b5SDimitry Andric   for (auto &Phi : Header->phis()) {
974ba319b5SDimitry Andric     Value *V = Phi.getIncomingValueForBlock(Latch);
984ba319b5SDimitry Andric     if (Instruction *I = dyn_cast<Instruction>(V))
994ba319b5SDimitry Andric       Worklist.push_back(I);
1004ba319b5SDimitry Andric   }
1014ba319b5SDimitry Andric 
1024ba319b5SDimitry Andric   while (!Worklist.empty()) {
1034ba319b5SDimitry Andric     Instruction *I = Worklist.back();
1044ba319b5SDimitry Andric     Worklist.pop_back();
1054ba319b5SDimitry Andric     if (!Visit(I))
1064ba319b5SDimitry Andric       return false;
1074ba319b5SDimitry Andric 
1084ba319b5SDimitry Andric     if (AftBlocks.count(I->getParent()))
1094ba319b5SDimitry Andric       for (auto &U : I->operands())
1104ba319b5SDimitry Andric         if (Instruction *II = dyn_cast<Instruction>(U))
1114ba319b5SDimitry Andric           Worklist.push_back(II);
1124ba319b5SDimitry Andric   }
1134ba319b5SDimitry Andric 
1144ba319b5SDimitry Andric   return true;
1154ba319b5SDimitry Andric }
1164ba319b5SDimitry Andric 
1174ba319b5SDimitry Andric // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
moveHeaderPhiOperandsToForeBlocks(BasicBlock * Header,BasicBlock * Latch,Instruction * InsertLoc,BasicBlockSet & AftBlocks)1184ba319b5SDimitry Andric static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
1194ba319b5SDimitry Andric                                               BasicBlock *Latch,
1204ba319b5SDimitry Andric                                               Instruction *InsertLoc,
1214ba319b5SDimitry Andric                                               BasicBlockSet &AftBlocks) {
1224ba319b5SDimitry Andric   // We need to ensure we move the instructions in the correct order,
1234ba319b5SDimitry Andric   // starting with the earliest required instruction and moving forward.
1244ba319b5SDimitry Andric   std::vector<Instruction *> Visited;
1254ba319b5SDimitry Andric   processHeaderPhiOperands(Header, Latch, AftBlocks,
1264ba319b5SDimitry Andric                            [&Visited, &AftBlocks](Instruction *I) {
1274ba319b5SDimitry Andric                              if (AftBlocks.count(I->getParent()))
1284ba319b5SDimitry Andric                                Visited.push_back(I);
1294ba319b5SDimitry Andric                              return true;
1304ba319b5SDimitry Andric                            });
1314ba319b5SDimitry Andric 
1324ba319b5SDimitry Andric   // Move all instructions in program order to before the InsertLoc
1334ba319b5SDimitry Andric   BasicBlock *InsertLocBB = InsertLoc->getParent();
1344ba319b5SDimitry Andric   for (Instruction *I : reverse(Visited)) {
1354ba319b5SDimitry Andric     if (I->getParent() != InsertLocBB)
1364ba319b5SDimitry Andric       I->moveBefore(InsertLoc);
1374ba319b5SDimitry Andric   }
1384ba319b5SDimitry Andric }
1394ba319b5SDimitry Andric 
1404ba319b5SDimitry Andric /*
1414ba319b5SDimitry Andric   This method performs Unroll and Jam. For a simple loop like:
1424ba319b5SDimitry Andric   for (i = ..)
1434ba319b5SDimitry Andric     Fore(i)
1444ba319b5SDimitry Andric     for (j = ..)
1454ba319b5SDimitry Andric       SubLoop(i, j)
1464ba319b5SDimitry Andric     Aft(i)
1474ba319b5SDimitry Andric 
1484ba319b5SDimitry Andric   Instead of doing normal inner or outer unrolling, we do:
1494ba319b5SDimitry Andric   for (i = .., i+=2)
1504ba319b5SDimitry Andric     Fore(i)
1514ba319b5SDimitry Andric     Fore(i+1)
1524ba319b5SDimitry Andric     for (j = ..)
1534ba319b5SDimitry Andric       SubLoop(i, j)
1544ba319b5SDimitry Andric       SubLoop(i+1, j)
1554ba319b5SDimitry Andric     Aft(i)
1564ba319b5SDimitry Andric     Aft(i+1)
1574ba319b5SDimitry Andric 
1584ba319b5SDimitry Andric   So the outer loop is essetially unrolled and then the inner loops are fused
1594ba319b5SDimitry Andric   ("jammed") together into a single loop. This can increase speed when there
1604ba319b5SDimitry Andric   are loads in SubLoop that are invariant to i, as they become shared between
1614ba319b5SDimitry Andric   the now jammed inner loops.
1624ba319b5SDimitry Andric 
1634ba319b5SDimitry Andric   We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
1644ba319b5SDimitry Andric   Fore blocks are those before the inner loop, Aft are those after. Normal
1654ba319b5SDimitry Andric   Unroll code is used to copy each of these sets of blocks and the results are
1664ba319b5SDimitry Andric   combined together into the final form above.
1674ba319b5SDimitry Andric 
1684ba319b5SDimitry Andric   isSafeToUnrollAndJam should be used prior to calling this to make sure the
1694ba319b5SDimitry Andric   unrolling will be valid. Checking profitablility is also advisable.
170*b5893f02SDimitry Andric 
171*b5893f02SDimitry Andric   If EpilogueLoop is non-null, it receives the epilogue loop (if it was
172*b5893f02SDimitry Andric   necessary to create one and not fully unrolled).
1734ba319b5SDimitry Andric */
UnrollAndJamLoop(Loop * L,unsigned Count,unsigned TripCount,unsigned TripMultiple,bool UnrollRemainder,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,OptimizationRemarkEmitter * ORE,Loop ** EpilogueLoop)174*b5893f02SDimitry Andric LoopUnrollResult llvm::UnrollAndJamLoop(
175*b5893f02SDimitry Andric     Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
176*b5893f02SDimitry Andric     bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
177*b5893f02SDimitry Andric     AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
1784ba319b5SDimitry Andric 
1794ba319b5SDimitry Andric   // When we enter here we should have already checked that it is safe
1804ba319b5SDimitry Andric   BasicBlock *Header = L->getHeader();
1814ba319b5SDimitry Andric   assert(L->getSubLoops().size() == 1);
1824ba319b5SDimitry Andric   Loop *SubLoop = *L->begin();
1834ba319b5SDimitry Andric 
1844ba319b5SDimitry Andric   // Don't enter the unroll code if there is nothing to do.
1854ba319b5SDimitry Andric   if (TripCount == 0 && Count < 2) {
186*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
1874ba319b5SDimitry Andric     return LoopUnrollResult::Unmodified;
1884ba319b5SDimitry Andric   }
1894ba319b5SDimitry Andric 
1904ba319b5SDimitry Andric   assert(Count > 0);
1914ba319b5SDimitry Andric   assert(TripMultiple > 0);
1924ba319b5SDimitry Andric   assert(TripCount == 0 || TripCount % TripMultiple == 0);
1934ba319b5SDimitry Andric 
1944ba319b5SDimitry Andric   // Are we eliminating the loop control altogether?
1954ba319b5SDimitry Andric   bool CompletelyUnroll = (Count == TripCount);
1964ba319b5SDimitry Andric 
1974ba319b5SDimitry Andric   // We use the runtime remainder in cases where we don't know trip multiple
1984ba319b5SDimitry Andric   if (TripMultiple == 1 || TripMultiple % Count != 0) {
1994ba319b5SDimitry Andric     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
2004ba319b5SDimitry Andric                                     /*UseEpilogRemainder*/ true,
201*b5893f02SDimitry Andric                                     UnrollRemainder, LI, SE, DT, AC, true,
202*b5893f02SDimitry Andric                                     EpilogueLoop)) {
2034ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
2044ba319b5SDimitry Andric                            "generated when assuming runtime trip count\n");
2054ba319b5SDimitry Andric       return LoopUnrollResult::Unmodified;
2064ba319b5SDimitry Andric     }
2074ba319b5SDimitry Andric   }
2084ba319b5SDimitry Andric 
2094ba319b5SDimitry Andric   // Notify ScalarEvolution that the loop will be substantially changed,
2104ba319b5SDimitry Andric   // if not outright eliminated.
2114ba319b5SDimitry Andric   if (SE) {
2124ba319b5SDimitry Andric     SE->forgetLoop(L);
2134ba319b5SDimitry Andric     SE->forgetLoop(SubLoop);
2144ba319b5SDimitry Andric   }
2154ba319b5SDimitry Andric 
2164ba319b5SDimitry Andric   using namespace ore;
2174ba319b5SDimitry Andric   // Report the unrolling decision.
2184ba319b5SDimitry Andric   if (CompletelyUnroll) {
2194ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
2204ba319b5SDimitry Andric                       << Header->getName() << " with trip count " << TripCount
2214ba319b5SDimitry Andric                       << "!\n");
2224ba319b5SDimitry Andric     ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
2234ba319b5SDimitry Andric                                  L->getHeader())
2244ba319b5SDimitry Andric               << "completely unroll and jammed loop with "
2254ba319b5SDimitry Andric               << NV("UnrollCount", TripCount) << " iterations");
2264ba319b5SDimitry Andric   } else {
2274ba319b5SDimitry Andric     auto DiagBuilder = [&]() {
2284ba319b5SDimitry Andric       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
2294ba319b5SDimitry Andric                               L->getHeader());
2304ba319b5SDimitry Andric       return Diag << "unroll and jammed loop by a factor of "
2314ba319b5SDimitry Andric                   << NV("UnrollCount", Count);
2324ba319b5SDimitry Andric     };
2334ba319b5SDimitry Andric 
2344ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
2354ba319b5SDimitry Andric                       << " by " << Count);
2364ba319b5SDimitry Andric     if (TripMultiple != 1) {
2374ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
2384ba319b5SDimitry Andric       ORE->emit([&]() {
2394ba319b5SDimitry Andric         return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
2404ba319b5SDimitry Andric                              << " trips per branch";
2414ba319b5SDimitry Andric       });
2424ba319b5SDimitry Andric     } else {
2434ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << " with run-time trip count");
2444ba319b5SDimitry Andric       ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
2454ba319b5SDimitry Andric     }
2464ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "!\n");
2474ba319b5SDimitry Andric   }
2484ba319b5SDimitry Andric 
2494ba319b5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
2504ba319b5SDimitry Andric   BasicBlock *LatchBlock = L->getLoopLatch();
2514ba319b5SDimitry Andric   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
2524ba319b5SDimitry Andric   assert(Preheader && LatchBlock && Header);
2534ba319b5SDimitry Andric   assert(BI && !BI->isUnconditional());
2544ba319b5SDimitry Andric   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
2554ba319b5SDimitry Andric   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
2564ba319b5SDimitry Andric   bool SubLoopContinueOnTrue = SubLoop->contains(
2574ba319b5SDimitry Andric       SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
2584ba319b5SDimitry Andric 
2594ba319b5SDimitry Andric   // Partition blocks in an outer/inner loop pair into blocks before and after
2604ba319b5SDimitry Andric   // the loop
2614ba319b5SDimitry Andric   BasicBlockSet SubLoopBlocks;
2624ba319b5SDimitry Andric   BasicBlockSet ForeBlocks;
2634ba319b5SDimitry Andric   BasicBlockSet AftBlocks;
2644ba319b5SDimitry Andric   partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
2654ba319b5SDimitry Andric                            DT);
2664ba319b5SDimitry Andric 
2674ba319b5SDimitry Andric   // We keep track of the entering/first and exiting/last block of each of
2684ba319b5SDimitry Andric   // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
2694ba319b5SDimitry Andric   // blocks easier.
2704ba319b5SDimitry Andric   std::vector<BasicBlock *> ForeBlocksFirst;
2714ba319b5SDimitry Andric   std::vector<BasicBlock *> ForeBlocksLast;
2724ba319b5SDimitry Andric   std::vector<BasicBlock *> SubLoopBlocksFirst;
2734ba319b5SDimitry Andric   std::vector<BasicBlock *> SubLoopBlocksLast;
2744ba319b5SDimitry Andric   std::vector<BasicBlock *> AftBlocksFirst;
2754ba319b5SDimitry Andric   std::vector<BasicBlock *> AftBlocksLast;
2764ba319b5SDimitry Andric   ForeBlocksFirst.push_back(Header);
2774ba319b5SDimitry Andric   ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
2784ba319b5SDimitry Andric   SubLoopBlocksFirst.push_back(SubLoop->getHeader());
2794ba319b5SDimitry Andric   SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
2804ba319b5SDimitry Andric   AftBlocksFirst.push_back(SubLoop->getExitBlock());
2814ba319b5SDimitry Andric   AftBlocksLast.push_back(L->getExitingBlock());
2824ba319b5SDimitry Andric   // Maps Blocks[0] -> Blocks[It]
2834ba319b5SDimitry Andric   ValueToValueMapTy LastValueMap;
2844ba319b5SDimitry Andric 
2854ba319b5SDimitry Andric   // Move any instructions from fore phi operands from AftBlocks into Fore.
2864ba319b5SDimitry Andric   moveHeaderPhiOperandsToForeBlocks(
2874ba319b5SDimitry Andric       Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
2884ba319b5SDimitry Andric       AftBlocks);
2894ba319b5SDimitry Andric 
2904ba319b5SDimitry Andric   // The current on-the-fly SSA update requires blocks to be processed in
2914ba319b5SDimitry Andric   // reverse postorder so that LastValueMap contains the correct value at each
2924ba319b5SDimitry Andric   // exit.
2934ba319b5SDimitry Andric   LoopBlocksDFS DFS(L);
2944ba319b5SDimitry Andric   DFS.perform(LI);
2954ba319b5SDimitry Andric   // Stash the DFS iterators before adding blocks to the loop.
2964ba319b5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
2974ba319b5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
2984ba319b5SDimitry Andric 
2994ba319b5SDimitry Andric   if (Header->getParent()->isDebugInfoForProfiling())
3004ba319b5SDimitry Andric     for (BasicBlock *BB : L->getBlocks())
3014ba319b5SDimitry Andric       for (Instruction &I : *BB)
3024ba319b5SDimitry Andric         if (!isa<DbgInfoIntrinsic>(&I))
303*b5893f02SDimitry Andric           if (const DILocation *DIL = I.getDebugLoc()) {
304*b5893f02SDimitry Andric             auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
305*b5893f02SDimitry Andric             if (NewDIL)
306*b5893f02SDimitry Andric               I.setDebugLoc(NewDIL.getValue());
307*b5893f02SDimitry Andric             else
308*b5893f02SDimitry Andric               LLVM_DEBUG(dbgs()
309*b5893f02SDimitry Andric                          << "Failed to create new discriminator: "
310*b5893f02SDimitry Andric                          << DIL->getFilename() << " Line: " << DIL->getLine());
311*b5893f02SDimitry Andric           }
3124ba319b5SDimitry Andric 
3134ba319b5SDimitry Andric   // Copy all blocks
3144ba319b5SDimitry Andric   for (unsigned It = 1; It != Count; ++It) {
3154ba319b5SDimitry Andric     std::vector<BasicBlock *> NewBlocks;
3164ba319b5SDimitry Andric     // Maps Blocks[It] -> Blocks[It-1]
3174ba319b5SDimitry Andric     DenseMap<Value *, Value *> PrevItValueMap;
3184ba319b5SDimitry Andric 
3194ba319b5SDimitry Andric     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3204ba319b5SDimitry Andric       ValueToValueMapTy VMap;
3214ba319b5SDimitry Andric       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
3224ba319b5SDimitry Andric       Header->getParent()->getBasicBlockList().push_back(New);
3234ba319b5SDimitry Andric 
3244ba319b5SDimitry Andric       if (ForeBlocks.count(*BB)) {
3254ba319b5SDimitry Andric         L->addBasicBlockToLoop(New, *LI);
3264ba319b5SDimitry Andric 
3274ba319b5SDimitry Andric         if (*BB == ForeBlocksFirst[0])
3284ba319b5SDimitry Andric           ForeBlocksFirst.push_back(New);
3294ba319b5SDimitry Andric         if (*BB == ForeBlocksLast[0])
3304ba319b5SDimitry Andric           ForeBlocksLast.push_back(New);
3314ba319b5SDimitry Andric       } else if (SubLoopBlocks.count(*BB)) {
3324ba319b5SDimitry Andric         SubLoop->addBasicBlockToLoop(New, *LI);
3334ba319b5SDimitry Andric 
3344ba319b5SDimitry Andric         if (*BB == SubLoopBlocksFirst[0])
3354ba319b5SDimitry Andric           SubLoopBlocksFirst.push_back(New);
3364ba319b5SDimitry Andric         if (*BB == SubLoopBlocksLast[0])
3374ba319b5SDimitry Andric           SubLoopBlocksLast.push_back(New);
3384ba319b5SDimitry Andric       } else if (AftBlocks.count(*BB)) {
3394ba319b5SDimitry Andric         L->addBasicBlockToLoop(New, *LI);
3404ba319b5SDimitry Andric 
3414ba319b5SDimitry Andric         if (*BB == AftBlocksFirst[0])
3424ba319b5SDimitry Andric           AftBlocksFirst.push_back(New);
3434ba319b5SDimitry Andric         if (*BB == AftBlocksLast[0])
3444ba319b5SDimitry Andric           AftBlocksLast.push_back(New);
3454ba319b5SDimitry Andric       } else {
3464ba319b5SDimitry Andric         llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
3474ba319b5SDimitry Andric       }
3484ba319b5SDimitry Andric 
3494ba319b5SDimitry Andric       // Update our running maps of newest clones
3504ba319b5SDimitry Andric       PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
3514ba319b5SDimitry Andric       LastValueMap[*BB] = New;
3524ba319b5SDimitry Andric       for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
3534ba319b5SDimitry Andric            VI != VE; ++VI) {
3544ba319b5SDimitry Andric         PrevItValueMap[VI->second] =
3554ba319b5SDimitry Andric             const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
3564ba319b5SDimitry Andric         LastValueMap[VI->first] = VI->second;
3574ba319b5SDimitry Andric       }
3584ba319b5SDimitry Andric 
3594ba319b5SDimitry Andric       NewBlocks.push_back(New);
3604ba319b5SDimitry Andric 
3614ba319b5SDimitry Andric       // Update DomTree:
3624ba319b5SDimitry Andric       if (*BB == ForeBlocksFirst[0])
3634ba319b5SDimitry Andric         DT->addNewBlock(New, ForeBlocksLast[It - 1]);
3644ba319b5SDimitry Andric       else if (*BB == SubLoopBlocksFirst[0])
3654ba319b5SDimitry Andric         DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
3664ba319b5SDimitry Andric       else if (*BB == AftBlocksFirst[0])
3674ba319b5SDimitry Andric         DT->addNewBlock(New, AftBlocksLast[It - 1]);
3684ba319b5SDimitry Andric       else {
3694ba319b5SDimitry Andric         // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
3704ba319b5SDimitry Andric         // structure.
3714ba319b5SDimitry Andric         auto BBDomNode = DT->getNode(*BB);
3724ba319b5SDimitry Andric         auto BBIDom = BBDomNode->getIDom();
3734ba319b5SDimitry Andric         BasicBlock *OriginalBBIDom = BBIDom->getBlock();
3744ba319b5SDimitry Andric         assert(OriginalBBIDom);
3754ba319b5SDimitry Andric         assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
3764ba319b5SDimitry Andric         DT->addNewBlock(
3774ba319b5SDimitry Andric             New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
3784ba319b5SDimitry Andric       }
3794ba319b5SDimitry Andric     }
3804ba319b5SDimitry Andric 
3814ba319b5SDimitry Andric     // Remap all instructions in the most recent iteration
3824ba319b5SDimitry Andric     for (BasicBlock *NewBlock : NewBlocks) {
3834ba319b5SDimitry Andric       for (Instruction &I : *NewBlock) {
3844ba319b5SDimitry Andric         ::remapInstruction(&I, LastValueMap);
3854ba319b5SDimitry Andric         if (auto *II = dyn_cast<IntrinsicInst>(&I))
3864ba319b5SDimitry Andric           if (II->getIntrinsicID() == Intrinsic::assume)
3874ba319b5SDimitry Andric             AC->registerAssumption(II);
3884ba319b5SDimitry Andric       }
3894ba319b5SDimitry Andric     }
3904ba319b5SDimitry Andric 
3914ba319b5SDimitry Andric     // Alter the ForeBlocks phi's, pointing them at the latest version of the
3924ba319b5SDimitry Andric     // value from the previous iteration's phis
3934ba319b5SDimitry Andric     for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
3944ba319b5SDimitry Andric       Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
3954ba319b5SDimitry Andric       assert(OldValue && "should have incoming edge from Aft[It]");
3964ba319b5SDimitry Andric       Value *NewValue = OldValue;
3974ba319b5SDimitry Andric       if (Value *PrevValue = PrevItValueMap[OldValue])
3984ba319b5SDimitry Andric         NewValue = PrevValue;
3994ba319b5SDimitry Andric 
4004ba319b5SDimitry Andric       assert(Phi.getNumOperands() == 2);
4014ba319b5SDimitry Andric       Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
4024ba319b5SDimitry Andric       Phi.setIncomingValue(0, NewValue);
4034ba319b5SDimitry Andric       Phi.removeIncomingValue(1);
4044ba319b5SDimitry Andric     }
4054ba319b5SDimitry Andric   }
4064ba319b5SDimitry Andric 
4074ba319b5SDimitry Andric   // Now that all the basic blocks for the unrolled iterations are in place,
4084ba319b5SDimitry Andric   // finish up connecting the blocks and phi nodes. At this point LastValueMap
4094ba319b5SDimitry Andric   // is the last unrolled iterations values.
4104ba319b5SDimitry Andric 
4114ba319b5SDimitry Andric   // Update Phis in BB from OldBB to point to NewBB
4124ba319b5SDimitry Andric   auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
4134ba319b5SDimitry Andric                             BasicBlock *NewBB) {
4144ba319b5SDimitry Andric     for (PHINode &Phi : BB->phis()) {
4154ba319b5SDimitry Andric       int I = Phi.getBasicBlockIndex(OldBB);
4164ba319b5SDimitry Andric       Phi.setIncomingBlock(I, NewBB);
4174ba319b5SDimitry Andric     }
4184ba319b5SDimitry Andric   };
4194ba319b5SDimitry Andric   // Update Phis in BB from OldBB to point to NewBB and use the latest value
4204ba319b5SDimitry Andric   // from LastValueMap
4214ba319b5SDimitry Andric   auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
4224ba319b5SDimitry Andric                                      BasicBlock *NewBB,
4234ba319b5SDimitry Andric                                      ValueToValueMapTy &LastValueMap) {
4244ba319b5SDimitry Andric     for (PHINode &Phi : BB->phis()) {
4254ba319b5SDimitry Andric       for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
4264ba319b5SDimitry Andric         if (Phi.getIncomingBlock(b) == OldBB) {
4274ba319b5SDimitry Andric           Value *OldValue = Phi.getIncomingValue(b);
4284ba319b5SDimitry Andric           if (Value *LastValue = LastValueMap[OldValue])
4294ba319b5SDimitry Andric             Phi.setIncomingValue(b, LastValue);
4304ba319b5SDimitry Andric           Phi.setIncomingBlock(b, NewBB);
4314ba319b5SDimitry Andric           break;
4324ba319b5SDimitry Andric         }
4334ba319b5SDimitry Andric       }
4344ba319b5SDimitry Andric     }
4354ba319b5SDimitry Andric   };
4364ba319b5SDimitry Andric   // Move all the phis from Src into Dest
4374ba319b5SDimitry Andric   auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
4384ba319b5SDimitry Andric     Instruction *insertPoint = Dest->getFirstNonPHI();
4394ba319b5SDimitry Andric     while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
4404ba319b5SDimitry Andric       Phi->moveBefore(insertPoint);
4414ba319b5SDimitry Andric   };
4424ba319b5SDimitry Andric 
4434ba319b5SDimitry Andric   // Update the PHI values outside the loop to point to the last block
4444ba319b5SDimitry Andric   updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
4454ba319b5SDimitry Andric                            LastValueMap);
4464ba319b5SDimitry Andric 
4474ba319b5SDimitry Andric   // Update ForeBlocks successors and phi nodes
4484ba319b5SDimitry Andric   BranchInst *ForeTerm =
4494ba319b5SDimitry Andric       cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
4504ba319b5SDimitry Andric   BasicBlock *Dest = SubLoopBlocksFirst[0];
4514ba319b5SDimitry Andric   ForeTerm->setSuccessor(0, Dest);
4524ba319b5SDimitry Andric 
4534ba319b5SDimitry Andric   if (CompletelyUnroll) {
4544ba319b5SDimitry Andric     while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
4554ba319b5SDimitry Andric       Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
4564ba319b5SDimitry Andric       Phi->getParent()->getInstList().erase(Phi);
4574ba319b5SDimitry Andric     }
4584ba319b5SDimitry Andric   } else {
4594ba319b5SDimitry Andric     // Update the PHI values to point to the last aft block
4604ba319b5SDimitry Andric     updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
4614ba319b5SDimitry Andric                              AftBlocksLast.back(), LastValueMap);
4624ba319b5SDimitry Andric   }
4634ba319b5SDimitry Andric 
4644ba319b5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
4654ba319b5SDimitry Andric     // Remap ForeBlock successors from previous iteration to this
4664ba319b5SDimitry Andric     BranchInst *ForeTerm =
4674ba319b5SDimitry Andric         cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
4684ba319b5SDimitry Andric     BasicBlock *Dest = ForeBlocksFirst[It];
4694ba319b5SDimitry Andric     ForeTerm->setSuccessor(0, Dest);
4704ba319b5SDimitry Andric   }
4714ba319b5SDimitry Andric 
4724ba319b5SDimitry Andric   // Subloop successors and phis
4734ba319b5SDimitry Andric   BranchInst *SubTerm =
4744ba319b5SDimitry Andric       cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
4754ba319b5SDimitry Andric   SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
4764ba319b5SDimitry Andric   SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
4774ba319b5SDimitry Andric   updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
4784ba319b5SDimitry Andric                   ForeBlocksLast.back());
4794ba319b5SDimitry Andric   updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
4804ba319b5SDimitry Andric                   SubLoopBlocksLast.back());
4814ba319b5SDimitry Andric 
4824ba319b5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
4834ba319b5SDimitry Andric     // Replace the conditional branch of the previous iteration subloop with an
4844ba319b5SDimitry Andric     // unconditional one to this one
4854ba319b5SDimitry Andric     BranchInst *SubTerm =
4864ba319b5SDimitry Andric         cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
4874ba319b5SDimitry Andric     BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
4884ba319b5SDimitry Andric     SubTerm->eraseFromParent();
4894ba319b5SDimitry Andric 
4904ba319b5SDimitry Andric     updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
4914ba319b5SDimitry Andric                     ForeBlocksLast.back());
4924ba319b5SDimitry Andric     updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
4934ba319b5SDimitry Andric                     SubLoopBlocksLast.back());
4944ba319b5SDimitry Andric     movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
4954ba319b5SDimitry Andric   }
4964ba319b5SDimitry Andric 
4974ba319b5SDimitry Andric   // Aft blocks successors and phis
4984ba319b5SDimitry Andric   BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
4994ba319b5SDimitry Andric   if (CompletelyUnroll) {
5004ba319b5SDimitry Andric     BranchInst::Create(LoopExit, Term);
5014ba319b5SDimitry Andric     Term->eraseFromParent();
5024ba319b5SDimitry Andric   } else {
5034ba319b5SDimitry Andric     Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
5044ba319b5SDimitry Andric   }
5054ba319b5SDimitry Andric   updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
5064ba319b5SDimitry Andric                   SubLoopBlocksLast.back());
5074ba319b5SDimitry Andric 
5084ba319b5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
5094ba319b5SDimitry Andric     // Replace the conditional branch of the previous iteration subloop with an
5104ba319b5SDimitry Andric     // unconditional one to this one
5114ba319b5SDimitry Andric     BranchInst *AftTerm =
5124ba319b5SDimitry Andric         cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
5134ba319b5SDimitry Andric     BranchInst::Create(AftBlocksFirst[It], AftTerm);
5144ba319b5SDimitry Andric     AftTerm->eraseFromParent();
5154ba319b5SDimitry Andric 
5164ba319b5SDimitry Andric     updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
5174ba319b5SDimitry Andric                     SubLoopBlocksLast.back());
5184ba319b5SDimitry Andric     movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
5194ba319b5SDimitry Andric   }
5204ba319b5SDimitry Andric 
5214ba319b5SDimitry Andric   // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
5224ba319b5SDimitry Andric   // new ones required.
5234ba319b5SDimitry Andric   if (Count != 1) {
5244ba319b5SDimitry Andric     SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
5254ba319b5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
5264ba319b5SDimitry Andric                            SubLoopBlocksFirst[0]);
5274ba319b5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
5284ba319b5SDimitry Andric                            SubLoopBlocksLast[0], AftBlocksFirst[0]);
5294ba319b5SDimitry Andric 
5304ba319b5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5314ba319b5SDimitry Andric                            ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
5324ba319b5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5334ba319b5SDimitry Andric                            SubLoopBlocksLast.back(), AftBlocksFirst[0]);
5344ba319b5SDimitry Andric     DT->applyUpdates(DTUpdates);
5354ba319b5SDimitry Andric   }
5364ba319b5SDimitry Andric 
5374ba319b5SDimitry Andric   // Merge adjacent basic blocks, if possible.
5384ba319b5SDimitry Andric   SmallPtrSet<BasicBlock *, 16> MergeBlocks;
5394ba319b5SDimitry Andric   MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
5404ba319b5SDimitry Andric   MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
5414ba319b5SDimitry Andric   MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
5424ba319b5SDimitry Andric   while (!MergeBlocks.empty()) {
5434ba319b5SDimitry Andric     BasicBlock *BB = *MergeBlocks.begin();
5444ba319b5SDimitry Andric     BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
5454ba319b5SDimitry Andric     if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
5464ba319b5SDimitry Andric       BasicBlock *Dest = Term->getSuccessor(0);
5474ba319b5SDimitry Andric       if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
5484ba319b5SDimitry Andric         // Don't remove BB and add Fold as they are the same BB
5494ba319b5SDimitry Andric         assert(Fold == BB);
5504ba319b5SDimitry Andric         (void)Fold;
5514ba319b5SDimitry Andric         MergeBlocks.erase(Dest);
5524ba319b5SDimitry Andric       } else
5534ba319b5SDimitry Andric         MergeBlocks.erase(BB);
5544ba319b5SDimitry Andric     } else
5554ba319b5SDimitry Andric       MergeBlocks.erase(BB);
5564ba319b5SDimitry Andric   }
5574ba319b5SDimitry Andric 
5584ba319b5SDimitry Andric   // At this point, the code is well formed.  We now do a quick sweep over the
5594ba319b5SDimitry Andric   // inserted code, doing constant propagation and dead code elimination as we
5604ba319b5SDimitry Andric   // go.
5614ba319b5SDimitry Andric   simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
5624ba319b5SDimitry Andric   simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);
5634ba319b5SDimitry Andric 
5644ba319b5SDimitry Andric   NumCompletelyUnrolledAndJammed += CompletelyUnroll;
5654ba319b5SDimitry Andric   ++NumUnrolledAndJammed;
5664ba319b5SDimitry Andric 
5674ba319b5SDimitry Andric #ifndef NDEBUG
5684ba319b5SDimitry Andric   // We shouldn't have done anything to break loop simplify form or LCSSA.
5694ba319b5SDimitry Andric   Loop *OuterL = L->getParentLoop();
5704ba319b5SDimitry Andric   Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
5714ba319b5SDimitry Andric   assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
5724ba319b5SDimitry Andric   if (!CompletelyUnroll)
5734ba319b5SDimitry Andric     assert(L->isLoopSimplifyForm());
5744ba319b5SDimitry Andric   assert(SubLoop->isLoopSimplifyForm());
5754ba319b5SDimitry Andric   assert(DT->verify());
5764ba319b5SDimitry Andric #endif
5774ba319b5SDimitry Andric 
5784ba319b5SDimitry Andric   // Update LoopInfo if the loop is completely removed.
5794ba319b5SDimitry Andric   if (CompletelyUnroll)
5804ba319b5SDimitry Andric     LI->erase(L);
5814ba319b5SDimitry Andric 
5824ba319b5SDimitry Andric   return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
5834ba319b5SDimitry Andric                           : LoopUnrollResult::PartiallyUnrolled;
5844ba319b5SDimitry Andric }
5854ba319b5SDimitry Andric 
getLoadsAndStores(BasicBlockSet & Blocks,SmallVector<Value *,4> & MemInstr)5864ba319b5SDimitry Andric static bool getLoadsAndStores(BasicBlockSet &Blocks,
5874ba319b5SDimitry Andric                               SmallVector<Value *, 4> &MemInstr) {
5884ba319b5SDimitry Andric   // Scan the BBs and collect legal loads and stores.
5894ba319b5SDimitry Andric   // Returns false if non-simple loads/stores are found.
5904ba319b5SDimitry Andric   for (BasicBlock *BB : Blocks) {
5914ba319b5SDimitry Andric     for (Instruction &I : *BB) {
5924ba319b5SDimitry Andric       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
5934ba319b5SDimitry Andric         if (!Ld->isSimple())
5944ba319b5SDimitry Andric           return false;
5954ba319b5SDimitry Andric         MemInstr.push_back(&I);
5964ba319b5SDimitry Andric       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
5974ba319b5SDimitry Andric         if (!St->isSimple())
5984ba319b5SDimitry Andric           return false;
5994ba319b5SDimitry Andric         MemInstr.push_back(&I);
6004ba319b5SDimitry Andric       } else if (I.mayReadOrWriteMemory()) {
6014ba319b5SDimitry Andric         return false;
6024ba319b5SDimitry Andric       }
6034ba319b5SDimitry Andric     }
6044ba319b5SDimitry Andric   }
6054ba319b5SDimitry Andric   return true;
6064ba319b5SDimitry Andric }
6074ba319b5SDimitry Andric 
checkDependencies(SmallVector<Value *,4> & Earlier,SmallVector<Value *,4> & Later,unsigned LoopDepth,bool InnerLoop,DependenceInfo & DI)6084ba319b5SDimitry Andric static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
6094ba319b5SDimitry Andric                               SmallVector<Value *, 4> &Later,
6104ba319b5SDimitry Andric                               unsigned LoopDepth, bool InnerLoop,
6114ba319b5SDimitry Andric                               DependenceInfo &DI) {
6124ba319b5SDimitry Andric   // Use DA to check for dependencies between loads and stores that make unroll
6134ba319b5SDimitry Andric   // and jam invalid
6144ba319b5SDimitry Andric   for (Value *I : Earlier) {
6154ba319b5SDimitry Andric     for (Value *J : Later) {
6164ba319b5SDimitry Andric       Instruction *Src = cast<Instruction>(I);
6174ba319b5SDimitry Andric       Instruction *Dst = cast<Instruction>(J);
6184ba319b5SDimitry Andric       if (Src == Dst)
6194ba319b5SDimitry Andric         continue;
6204ba319b5SDimitry Andric       // Ignore Input dependencies.
6214ba319b5SDimitry Andric       if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
6224ba319b5SDimitry Andric         continue;
6234ba319b5SDimitry Andric 
6244ba319b5SDimitry Andric       // Track dependencies, and if we find them take a conservative approach
6254ba319b5SDimitry Andric       // by allowing only = or < (not >), altough some > would be safe
6264ba319b5SDimitry Andric       // (depending upon unroll width).
6274ba319b5SDimitry Andric       // For the inner loop, we need to disallow any (> <) dependencies
6284ba319b5SDimitry Andric       // FIXME: Allow > so long as distance is less than unroll width
6294ba319b5SDimitry Andric       if (auto D = DI.depends(Src, Dst, true)) {
6304ba319b5SDimitry Andric         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
6314ba319b5SDimitry Andric 
632*b5893f02SDimitry Andric         if (D->isConfused()) {
633*b5893f02SDimitry Andric           LLVM_DEBUG(dbgs() << "  Confused dependency between:\n"
634*b5893f02SDimitry Andric                             << "  " << *Src << "\n"
635*b5893f02SDimitry Andric                             << "  " << *Dst << "\n");
6364ba319b5SDimitry Andric           return false;
637*b5893f02SDimitry Andric         }
6384ba319b5SDimitry Andric         if (!InnerLoop) {
639*b5893f02SDimitry Andric           if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
640*b5893f02SDimitry Andric             LLVM_DEBUG(dbgs() << "  > dependency between:\n"
641*b5893f02SDimitry Andric                               << "  " << *Src << "\n"
642*b5893f02SDimitry Andric                               << "  " << *Dst << "\n");
6434ba319b5SDimitry Andric             return false;
644*b5893f02SDimitry Andric           }
6454ba319b5SDimitry Andric         } else {
6464ba319b5SDimitry Andric           assert(LoopDepth + 1 <= D->getLevels());
6474ba319b5SDimitry Andric           if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
648*b5893f02SDimitry Andric               D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
649*b5893f02SDimitry Andric             LLVM_DEBUG(dbgs() << "  < > dependency between:\n"
650*b5893f02SDimitry Andric                               << "  " << *Src << "\n"
651*b5893f02SDimitry Andric                               << "  " << *Dst << "\n");
6524ba319b5SDimitry Andric             return false;
6534ba319b5SDimitry Andric           }
6544ba319b5SDimitry Andric         }
6554ba319b5SDimitry Andric       }
6564ba319b5SDimitry Andric     }
657*b5893f02SDimitry Andric   }
6584ba319b5SDimitry Andric   return true;
6594ba319b5SDimitry Andric }
6604ba319b5SDimitry Andric 
checkDependencies(Loop * L,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DependenceInfo & DI)6614ba319b5SDimitry Andric static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks,
6624ba319b5SDimitry Andric                               BasicBlockSet &SubLoopBlocks,
6634ba319b5SDimitry Andric                               BasicBlockSet &AftBlocks, DependenceInfo &DI) {
6644ba319b5SDimitry Andric   // Get all loads/store pairs for each blocks
6654ba319b5SDimitry Andric   SmallVector<Value *, 4> ForeMemInstr;
6664ba319b5SDimitry Andric   SmallVector<Value *, 4> SubLoopMemInstr;
6674ba319b5SDimitry Andric   SmallVector<Value *, 4> AftMemInstr;
6684ba319b5SDimitry Andric   if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) ||
6694ba319b5SDimitry Andric       !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) ||
6704ba319b5SDimitry Andric       !getLoadsAndStores(AftBlocks, AftMemInstr))
6714ba319b5SDimitry Andric     return false;
6724ba319b5SDimitry Andric 
6734ba319b5SDimitry Andric   // Check for dependencies between any blocks that may change order
6744ba319b5SDimitry Andric   unsigned LoopDepth = L->getLoopDepth();
6754ba319b5SDimitry Andric   return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false,
6764ba319b5SDimitry Andric                            DI) &&
6774ba319b5SDimitry Andric          checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) &&
6784ba319b5SDimitry Andric          checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false,
6794ba319b5SDimitry Andric                            DI) &&
6804ba319b5SDimitry Andric          checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true,
6814ba319b5SDimitry Andric                            DI);
6824ba319b5SDimitry Andric }
6834ba319b5SDimitry Andric 
isSafeToUnrollAndJam(Loop * L,ScalarEvolution & SE,DominatorTree & DT,DependenceInfo & DI)6844ba319b5SDimitry Andric bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
6854ba319b5SDimitry Andric                                 DependenceInfo &DI) {
6864ba319b5SDimitry Andric   /* We currently handle outer loops like this:
6874ba319b5SDimitry Andric         |
6884ba319b5SDimitry Andric     ForeFirst    <----\    }
6894ba319b5SDimitry Andric      Blocks           |    } ForeBlocks
6904ba319b5SDimitry Andric     ForeLast          |    }
6914ba319b5SDimitry Andric         |             |
6924ba319b5SDimitry Andric     SubLoopFirst  <\  |    }
6934ba319b5SDimitry Andric      Blocks        |  |    } SubLoopBlocks
6944ba319b5SDimitry Andric     SubLoopLast   -/  |    }
6954ba319b5SDimitry Andric         |             |
6964ba319b5SDimitry Andric     AftFirst          |    }
6974ba319b5SDimitry Andric      Blocks           |    } AftBlocks
6984ba319b5SDimitry Andric     AftLast     ------/    }
6994ba319b5SDimitry Andric         |
7004ba319b5SDimitry Andric 
7014ba319b5SDimitry Andric     There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
7024ba319b5SDimitry Andric     and AftBlocks, providing that there is one edge from Fores to SubLoops,
7034ba319b5SDimitry Andric     one edge from SubLoops to Afts and a single outer loop exit (from Afts).
7044ba319b5SDimitry Andric     In practice we currently limit Aft blocks to a single block, and limit
7054ba319b5SDimitry Andric     things further in the profitablility checks of the unroll and jam pass.
7064ba319b5SDimitry Andric 
7074ba319b5SDimitry Andric     Because of the way we rearrange basic blocks, we also require that
7084ba319b5SDimitry Andric     the Fore blocks on all unrolled iterations are safe to move before the
7094ba319b5SDimitry Andric     SubLoop blocks of all iterations. So we require that the phi node looping
7104ba319b5SDimitry Andric     operands of ForeHeader can be moved to at least the end of ForeEnd, so that
7114ba319b5SDimitry Andric     we can arrange cloned Fore Blocks before the subloop and match up Phi's
7124ba319b5SDimitry Andric     correctly.
7134ba319b5SDimitry Andric 
7144ba319b5SDimitry Andric     i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
7154ba319b5SDimitry Andric     It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.
7164ba319b5SDimitry Andric 
7174ba319b5SDimitry Andric     There are then a number of checks along the lines of no calls, no
7184ba319b5SDimitry Andric     exceptions, inner loop IV is consistent, etc. Note that for loops requiring
7194ba319b5SDimitry Andric     runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
7204ba319b5SDimitry Andric     UnrollAndJamLoop if the trip count cannot be easily calculated.
7214ba319b5SDimitry Andric   */
7224ba319b5SDimitry Andric 
7234ba319b5SDimitry Andric   if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
7244ba319b5SDimitry Andric     return false;
7254ba319b5SDimitry Andric   Loop *SubLoop = L->getSubLoops()[0];
7264ba319b5SDimitry Andric   if (!SubLoop->isLoopSimplifyForm())
7274ba319b5SDimitry Andric     return false;
7284ba319b5SDimitry Andric 
7294ba319b5SDimitry Andric   BasicBlock *Header = L->getHeader();
7304ba319b5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
7314ba319b5SDimitry Andric   BasicBlock *Exit = L->getExitingBlock();
7324ba319b5SDimitry Andric   BasicBlock *SubLoopHeader = SubLoop->getHeader();
7334ba319b5SDimitry Andric   BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
7344ba319b5SDimitry Andric   BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
7354ba319b5SDimitry Andric 
7364ba319b5SDimitry Andric   if (Latch != Exit)
7374ba319b5SDimitry Andric     return false;
7384ba319b5SDimitry Andric   if (SubLoopLatch != SubLoopExit)
7394ba319b5SDimitry Andric     return false;
7404ba319b5SDimitry Andric 
741*b5893f02SDimitry Andric   if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
742*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
7434ba319b5SDimitry Andric     return false;
744*b5893f02SDimitry Andric   }
7454ba319b5SDimitry Andric 
7464ba319b5SDimitry Andric   // Split blocks into Fore/SubLoop/Aft based on dominators
7474ba319b5SDimitry Andric   BasicBlockSet SubLoopBlocks;
7484ba319b5SDimitry Andric   BasicBlockSet ForeBlocks;
7494ba319b5SDimitry Andric   BasicBlockSet AftBlocks;
7504ba319b5SDimitry Andric   if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
751*b5893f02SDimitry Andric                                 AftBlocks, &DT)) {
752*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
7534ba319b5SDimitry Andric     return false;
754*b5893f02SDimitry Andric   }
7554ba319b5SDimitry Andric 
7564ba319b5SDimitry Andric   // Aft blocks may need to move instructions to fore blocks, which becomes more
7574ba319b5SDimitry Andric   // difficult if there are multiple (potentially conditionally executed)
7584ba319b5SDimitry Andric   // blocks. For now we just exclude loops with multiple aft blocks.
759*b5893f02SDimitry Andric   if (AftBlocks.size() != 1) {
760*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
761*b5893f02SDimitry Andric                          "multiple blocks after the loop\n");
7624ba319b5SDimitry Andric     return false;
763*b5893f02SDimitry Andric   }
7644ba319b5SDimitry Andric 
765*b5893f02SDimitry Andric   // Check inner loop backedge count is consistent on all iterations of the
766*b5893f02SDimitry Andric   // outer loop
767*b5893f02SDimitry Andric   if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
768*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
769*b5893f02SDimitry Andric                          "not consistent on each iteration\n");
7704ba319b5SDimitry Andric     return false;
771*b5893f02SDimitry Andric   }
7724ba319b5SDimitry Andric 
7734ba319b5SDimitry Andric   // Check the loop safety info for exceptions.
774*b5893f02SDimitry Andric   SimpleLoopSafetyInfo LSI;
775*b5893f02SDimitry Andric   LSI.computeLoopSafetyInfo(L);
776*b5893f02SDimitry Andric   if (LSI.anyBlockMayThrow()) {
777*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
7784ba319b5SDimitry Andric     return false;
779*b5893f02SDimitry Andric   }
7804ba319b5SDimitry Andric 
7814ba319b5SDimitry Andric   // We've ruled out the easy stuff and now need to check that there are no
7824ba319b5SDimitry Andric   // interdependencies which may prevent us from moving the:
7834ba319b5SDimitry Andric   //  ForeBlocks before Subloop and AftBlocks.
7844ba319b5SDimitry Andric   //  Subloop before AftBlocks.
7854ba319b5SDimitry Andric   //  ForeBlock phi operands before the subloop
7864ba319b5SDimitry Andric 
7874ba319b5SDimitry Andric   // Make sure we can move all instructions we need to before the subloop
7884ba319b5SDimitry Andric   if (!processHeaderPhiOperands(
7894ba319b5SDimitry Andric           Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
7904ba319b5SDimitry Andric             if (SubLoop->contains(I->getParent()))
7914ba319b5SDimitry Andric               return false;
7924ba319b5SDimitry Andric             if (AftBlocks.count(I->getParent())) {
7934ba319b5SDimitry Andric               // If we hit a phi node in afts we know we are done (probably
7944ba319b5SDimitry Andric               // LCSSA)
7954ba319b5SDimitry Andric               if (isa<PHINode>(I))
7964ba319b5SDimitry Andric                 return false;
7974ba319b5SDimitry Andric               // Can't move instructions with side effects or memory
7984ba319b5SDimitry Andric               // reads/writes
7994ba319b5SDimitry Andric               if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
8004ba319b5SDimitry Andric                 return false;
8014ba319b5SDimitry Andric             }
8024ba319b5SDimitry Andric             // Keep going
8034ba319b5SDimitry Andric             return true;
804*b5893f02SDimitry Andric           })) {
805*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
806*b5893f02SDimitry Andric                          "instructions after subloop to before it\n");
8074ba319b5SDimitry Andric     return false;
808*b5893f02SDimitry Andric   }
8094ba319b5SDimitry Andric 
8104ba319b5SDimitry Andric   // Check for memory dependencies which prohibit the unrolling we are doing.
8114ba319b5SDimitry Andric   // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
8124ba319b5SDimitry Andric   // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
813*b5893f02SDimitry Andric   if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
814*b5893f02SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
8154ba319b5SDimitry Andric     return false;
816*b5893f02SDimitry Andric   }
8174ba319b5SDimitry Andric 
8184ba319b5SDimitry Andric   return true;
8194ba319b5SDimitry Andric }
820