14ba319b5SDimitry Andric //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
24ba319b5SDimitry Andric //
34ba319b5SDimitry Andric // The LLVM Compiler Infrastructure
44ba319b5SDimitry Andric //
54ba319b5SDimitry Andric // This file is distributed under the University of Illinois Open Source
64ba319b5SDimitry Andric // License. See LICENSE.TXT for details.
74ba319b5SDimitry Andric //
84ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
94ba319b5SDimitry Andric //
104ba319b5SDimitry Andric // This file implements loop unroll and jam as a routine, much like
114ba319b5SDimitry Andric // LoopUnroll.cpp implements loop unroll.
124ba319b5SDimitry Andric //
134ba319b5SDimitry Andric //===----------------------------------------------------------------------===//
144ba319b5SDimitry Andric
154ba319b5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
164ba319b5SDimitry Andric #include "llvm/ADT/Statistic.h"
174ba319b5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
184ba319b5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
194ba319b5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
204ba319b5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
214ba319b5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
224ba319b5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
234ba319b5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
244ba319b5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
254ba319b5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpander.h"
264ba319b5SDimitry Andric #include "llvm/Analysis/Utils/Local.h"
274ba319b5SDimitry Andric #include "llvm/IR/BasicBlock.h"
284ba319b5SDimitry Andric #include "llvm/IR/DataLayout.h"
294ba319b5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
304ba319b5SDimitry Andric #include "llvm/IR/Dominators.h"
314ba319b5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
324ba319b5SDimitry Andric #include "llvm/IR/LLVMContext.h"
334ba319b5SDimitry Andric #include "llvm/Support/Debug.h"
344ba319b5SDimitry Andric #include "llvm/Support/raw_ostream.h"
354ba319b5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
364ba319b5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
374ba319b5SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h"
384ba319b5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
394ba319b5SDimitry Andric #include "llvm/Transforms/Utils/SimplifyIndVar.h"
404ba319b5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
414ba319b5SDimitry Andric using namespace llvm;
424ba319b5SDimitry Andric
434ba319b5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam"
444ba319b5SDimitry Andric
454ba319b5SDimitry Andric STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
464ba319b5SDimitry Andric STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
474ba319b5SDimitry Andric
484ba319b5SDimitry Andric typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
494ba319b5SDimitry Andric
504ba319b5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after
514ba319b5SDimitry Andric // the loop
partitionOuterLoopBlocks(Loop * L,Loop * SubLoop,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DominatorTree * DT)524ba319b5SDimitry Andric static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
534ba319b5SDimitry Andric BasicBlockSet &ForeBlocks,
544ba319b5SDimitry Andric BasicBlockSet &SubLoopBlocks,
554ba319b5SDimitry Andric BasicBlockSet &AftBlocks,
564ba319b5SDimitry Andric DominatorTree *DT) {
574ba319b5SDimitry Andric BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
584ba319b5SDimitry Andric SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
594ba319b5SDimitry Andric
604ba319b5SDimitry Andric for (BasicBlock *BB : L->blocks()) {
614ba319b5SDimitry Andric if (!SubLoop->contains(BB)) {
624ba319b5SDimitry Andric if (DT->dominates(SubLoopLatch, BB))
634ba319b5SDimitry Andric AftBlocks.insert(BB);
644ba319b5SDimitry Andric else
654ba319b5SDimitry Andric ForeBlocks.insert(BB);
664ba319b5SDimitry Andric }
674ba319b5SDimitry Andric }
684ba319b5SDimitry Andric
694ba319b5SDimitry Andric // Check that all blocks in ForeBlocks together dominate the subloop
704ba319b5SDimitry Andric // TODO: This might ideally be done better with a dominator/postdominators.
714ba319b5SDimitry Andric BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
724ba319b5SDimitry Andric for (BasicBlock *BB : ForeBlocks) {
734ba319b5SDimitry Andric if (BB == SubLoopPreHeader)
744ba319b5SDimitry Andric continue;
75*b5893f02SDimitry Andric Instruction *TI = BB->getTerminator();
764ba319b5SDimitry Andric for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
774ba319b5SDimitry Andric if (!ForeBlocks.count(TI->getSuccessor(i)))
784ba319b5SDimitry Andric return false;
794ba319b5SDimitry Andric }
804ba319b5SDimitry Andric
814ba319b5SDimitry Andric return true;
824ba319b5SDimitry Andric }
834ba319b5SDimitry Andric
844ba319b5SDimitry Andric // Looks at the phi nodes in Header for values coming from Latch. For these
854ba319b5SDimitry Andric // instructions and all their operands calls Visit on them, keeping going for
864ba319b5SDimitry Andric // all the operands in AftBlocks. Returns false if Visit returns false,
874ba319b5SDimitry Andric // otherwise returns true. This is used to process the instructions in the
884ba319b5SDimitry Andric // Aft blocks that need to be moved before the subloop. It is used in two
894ba319b5SDimitry Andric // places. One to check that the required set of instructions can be moved
904ba319b5SDimitry Andric // before the loop. Then to collect the instructions to actually move in
914ba319b5SDimitry Andric // moveHeaderPhiOperandsToForeBlocks.
924ba319b5SDimitry Andric template <typename T>
processHeaderPhiOperands(BasicBlock * Header,BasicBlock * Latch,BasicBlockSet & AftBlocks,T Visit)934ba319b5SDimitry Andric static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
944ba319b5SDimitry Andric BasicBlockSet &AftBlocks, T Visit) {
954ba319b5SDimitry Andric SmallVector<Instruction *, 8> Worklist;
964ba319b5SDimitry Andric for (auto &Phi : Header->phis()) {
974ba319b5SDimitry Andric Value *V = Phi.getIncomingValueForBlock(Latch);
984ba319b5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V))
994ba319b5SDimitry Andric Worklist.push_back(I);
1004ba319b5SDimitry Andric }
1014ba319b5SDimitry Andric
1024ba319b5SDimitry Andric while (!Worklist.empty()) {
1034ba319b5SDimitry Andric Instruction *I = Worklist.back();
1044ba319b5SDimitry Andric Worklist.pop_back();
1054ba319b5SDimitry Andric if (!Visit(I))
1064ba319b5SDimitry Andric return false;
1074ba319b5SDimitry Andric
1084ba319b5SDimitry Andric if (AftBlocks.count(I->getParent()))
1094ba319b5SDimitry Andric for (auto &U : I->operands())
1104ba319b5SDimitry Andric if (Instruction *II = dyn_cast<Instruction>(U))
1114ba319b5SDimitry Andric Worklist.push_back(II);
1124ba319b5SDimitry Andric }
1134ba319b5SDimitry Andric
1144ba319b5SDimitry Andric return true;
1154ba319b5SDimitry Andric }
1164ba319b5SDimitry Andric
1174ba319b5SDimitry Andric // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
moveHeaderPhiOperandsToForeBlocks(BasicBlock * Header,BasicBlock * Latch,Instruction * InsertLoc,BasicBlockSet & AftBlocks)1184ba319b5SDimitry Andric static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
1194ba319b5SDimitry Andric BasicBlock *Latch,
1204ba319b5SDimitry Andric Instruction *InsertLoc,
1214ba319b5SDimitry Andric BasicBlockSet &AftBlocks) {
1224ba319b5SDimitry Andric // We need to ensure we move the instructions in the correct order,
1234ba319b5SDimitry Andric // starting with the earliest required instruction and moving forward.
1244ba319b5SDimitry Andric std::vector<Instruction *> Visited;
1254ba319b5SDimitry Andric processHeaderPhiOperands(Header, Latch, AftBlocks,
1264ba319b5SDimitry Andric [&Visited, &AftBlocks](Instruction *I) {
1274ba319b5SDimitry Andric if (AftBlocks.count(I->getParent()))
1284ba319b5SDimitry Andric Visited.push_back(I);
1294ba319b5SDimitry Andric return true;
1304ba319b5SDimitry Andric });
1314ba319b5SDimitry Andric
1324ba319b5SDimitry Andric // Move all instructions in program order to before the InsertLoc
1334ba319b5SDimitry Andric BasicBlock *InsertLocBB = InsertLoc->getParent();
1344ba319b5SDimitry Andric for (Instruction *I : reverse(Visited)) {
1354ba319b5SDimitry Andric if (I->getParent() != InsertLocBB)
1364ba319b5SDimitry Andric I->moveBefore(InsertLoc);
1374ba319b5SDimitry Andric }
1384ba319b5SDimitry Andric }
1394ba319b5SDimitry Andric
1404ba319b5SDimitry Andric /*
1414ba319b5SDimitry Andric This method performs Unroll and Jam. For a simple loop like:
1424ba319b5SDimitry Andric for (i = ..)
1434ba319b5SDimitry Andric Fore(i)
1444ba319b5SDimitry Andric for (j = ..)
1454ba319b5SDimitry Andric SubLoop(i, j)
1464ba319b5SDimitry Andric Aft(i)
1474ba319b5SDimitry Andric
1484ba319b5SDimitry Andric Instead of doing normal inner or outer unrolling, we do:
1494ba319b5SDimitry Andric for (i = .., i+=2)
1504ba319b5SDimitry Andric Fore(i)
1514ba319b5SDimitry Andric Fore(i+1)
1524ba319b5SDimitry Andric for (j = ..)
1534ba319b5SDimitry Andric SubLoop(i, j)
1544ba319b5SDimitry Andric SubLoop(i+1, j)
1554ba319b5SDimitry Andric Aft(i)
1564ba319b5SDimitry Andric Aft(i+1)
1574ba319b5SDimitry Andric
1584ba319b5SDimitry Andric So the outer loop is essetially unrolled and then the inner loops are fused
1594ba319b5SDimitry Andric ("jammed") together into a single loop. This can increase speed when there
1604ba319b5SDimitry Andric are loads in SubLoop that are invariant to i, as they become shared between
1614ba319b5SDimitry Andric the now jammed inner loops.
1624ba319b5SDimitry Andric
1634ba319b5SDimitry Andric We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
1644ba319b5SDimitry Andric Fore blocks are those before the inner loop, Aft are those after. Normal
1654ba319b5SDimitry Andric Unroll code is used to copy each of these sets of blocks and the results are
1664ba319b5SDimitry Andric combined together into the final form above.
1674ba319b5SDimitry Andric
1684ba319b5SDimitry Andric isSafeToUnrollAndJam should be used prior to calling this to make sure the
1694ba319b5SDimitry Andric unrolling will be valid. Checking profitablility is also advisable.
170*b5893f02SDimitry Andric
171*b5893f02SDimitry Andric If EpilogueLoop is non-null, it receives the epilogue loop (if it was
172*b5893f02SDimitry Andric necessary to create one and not fully unrolled).
1734ba319b5SDimitry Andric */
UnrollAndJamLoop(Loop * L,unsigned Count,unsigned TripCount,unsigned TripMultiple,bool UnrollRemainder,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,OptimizationRemarkEmitter * ORE,Loop ** EpilogueLoop)174*b5893f02SDimitry Andric LoopUnrollResult llvm::UnrollAndJamLoop(
175*b5893f02SDimitry Andric Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
176*b5893f02SDimitry Andric bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
177*b5893f02SDimitry Andric AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
1784ba319b5SDimitry Andric
1794ba319b5SDimitry Andric // When we enter here we should have already checked that it is safe
1804ba319b5SDimitry Andric BasicBlock *Header = L->getHeader();
1814ba319b5SDimitry Andric assert(L->getSubLoops().size() == 1);
1824ba319b5SDimitry Andric Loop *SubLoop = *L->begin();
1834ba319b5SDimitry Andric
1844ba319b5SDimitry Andric // Don't enter the unroll code if there is nothing to do.
1854ba319b5SDimitry Andric if (TripCount == 0 && Count < 2) {
186*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
1874ba319b5SDimitry Andric return LoopUnrollResult::Unmodified;
1884ba319b5SDimitry Andric }
1894ba319b5SDimitry Andric
1904ba319b5SDimitry Andric assert(Count > 0);
1914ba319b5SDimitry Andric assert(TripMultiple > 0);
1924ba319b5SDimitry Andric assert(TripCount == 0 || TripCount % TripMultiple == 0);
1934ba319b5SDimitry Andric
1944ba319b5SDimitry Andric // Are we eliminating the loop control altogether?
1954ba319b5SDimitry Andric bool CompletelyUnroll = (Count == TripCount);
1964ba319b5SDimitry Andric
1974ba319b5SDimitry Andric // We use the runtime remainder in cases where we don't know trip multiple
1984ba319b5SDimitry Andric if (TripMultiple == 1 || TripMultiple % Count != 0) {
1994ba319b5SDimitry Andric if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
2004ba319b5SDimitry Andric /*UseEpilogRemainder*/ true,
201*b5893f02SDimitry Andric UnrollRemainder, LI, SE, DT, AC, true,
202*b5893f02SDimitry Andric EpilogueLoop)) {
2034ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
2044ba319b5SDimitry Andric "generated when assuming runtime trip count\n");
2054ba319b5SDimitry Andric return LoopUnrollResult::Unmodified;
2064ba319b5SDimitry Andric }
2074ba319b5SDimitry Andric }
2084ba319b5SDimitry Andric
2094ba319b5SDimitry Andric // Notify ScalarEvolution that the loop will be substantially changed,
2104ba319b5SDimitry Andric // if not outright eliminated.
2114ba319b5SDimitry Andric if (SE) {
2124ba319b5SDimitry Andric SE->forgetLoop(L);
2134ba319b5SDimitry Andric SE->forgetLoop(SubLoop);
2144ba319b5SDimitry Andric }
2154ba319b5SDimitry Andric
2164ba319b5SDimitry Andric using namespace ore;
2174ba319b5SDimitry Andric // Report the unrolling decision.
2184ba319b5SDimitry Andric if (CompletelyUnroll) {
2194ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
2204ba319b5SDimitry Andric << Header->getName() << " with trip count " << TripCount
2214ba319b5SDimitry Andric << "!\n");
2224ba319b5SDimitry Andric ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
2234ba319b5SDimitry Andric L->getHeader())
2244ba319b5SDimitry Andric << "completely unroll and jammed loop with "
2254ba319b5SDimitry Andric << NV("UnrollCount", TripCount) << " iterations");
2264ba319b5SDimitry Andric } else {
2274ba319b5SDimitry Andric auto DiagBuilder = [&]() {
2284ba319b5SDimitry Andric OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
2294ba319b5SDimitry Andric L->getHeader());
2304ba319b5SDimitry Andric return Diag << "unroll and jammed loop by a factor of "
2314ba319b5SDimitry Andric << NV("UnrollCount", Count);
2324ba319b5SDimitry Andric };
2334ba319b5SDimitry Andric
2344ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
2354ba319b5SDimitry Andric << " by " << Count);
2364ba319b5SDimitry Andric if (TripMultiple != 1) {
2374ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
2384ba319b5SDimitry Andric ORE->emit([&]() {
2394ba319b5SDimitry Andric return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
2404ba319b5SDimitry Andric << " trips per branch";
2414ba319b5SDimitry Andric });
2424ba319b5SDimitry Andric } else {
2434ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " with run-time trip count");
2444ba319b5SDimitry Andric ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
2454ba319b5SDimitry Andric }
2464ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "!\n");
2474ba319b5SDimitry Andric }
2484ba319b5SDimitry Andric
2494ba319b5SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
2504ba319b5SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch();
2514ba319b5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
2524ba319b5SDimitry Andric assert(Preheader && LatchBlock && Header);
2534ba319b5SDimitry Andric assert(BI && !BI->isUnconditional());
2544ba319b5SDimitry Andric bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
2554ba319b5SDimitry Andric BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
2564ba319b5SDimitry Andric bool SubLoopContinueOnTrue = SubLoop->contains(
2574ba319b5SDimitry Andric SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
2584ba319b5SDimitry Andric
2594ba319b5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after
2604ba319b5SDimitry Andric // the loop
2614ba319b5SDimitry Andric BasicBlockSet SubLoopBlocks;
2624ba319b5SDimitry Andric BasicBlockSet ForeBlocks;
2634ba319b5SDimitry Andric BasicBlockSet AftBlocks;
2644ba319b5SDimitry Andric partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
2654ba319b5SDimitry Andric DT);
2664ba319b5SDimitry Andric
2674ba319b5SDimitry Andric // We keep track of the entering/first and exiting/last block of each of
2684ba319b5SDimitry Andric // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
2694ba319b5SDimitry Andric // blocks easier.
2704ba319b5SDimitry Andric std::vector<BasicBlock *> ForeBlocksFirst;
2714ba319b5SDimitry Andric std::vector<BasicBlock *> ForeBlocksLast;
2724ba319b5SDimitry Andric std::vector<BasicBlock *> SubLoopBlocksFirst;
2734ba319b5SDimitry Andric std::vector<BasicBlock *> SubLoopBlocksLast;
2744ba319b5SDimitry Andric std::vector<BasicBlock *> AftBlocksFirst;
2754ba319b5SDimitry Andric std::vector<BasicBlock *> AftBlocksLast;
2764ba319b5SDimitry Andric ForeBlocksFirst.push_back(Header);
2774ba319b5SDimitry Andric ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
2784ba319b5SDimitry Andric SubLoopBlocksFirst.push_back(SubLoop->getHeader());
2794ba319b5SDimitry Andric SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
2804ba319b5SDimitry Andric AftBlocksFirst.push_back(SubLoop->getExitBlock());
2814ba319b5SDimitry Andric AftBlocksLast.push_back(L->getExitingBlock());
2824ba319b5SDimitry Andric // Maps Blocks[0] -> Blocks[It]
2834ba319b5SDimitry Andric ValueToValueMapTy LastValueMap;
2844ba319b5SDimitry Andric
2854ba319b5SDimitry Andric // Move any instructions from fore phi operands from AftBlocks into Fore.
2864ba319b5SDimitry Andric moveHeaderPhiOperandsToForeBlocks(
2874ba319b5SDimitry Andric Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
2884ba319b5SDimitry Andric AftBlocks);
2894ba319b5SDimitry Andric
2904ba319b5SDimitry Andric // The current on-the-fly SSA update requires blocks to be processed in
2914ba319b5SDimitry Andric // reverse postorder so that LastValueMap contains the correct value at each
2924ba319b5SDimitry Andric // exit.
2934ba319b5SDimitry Andric LoopBlocksDFS DFS(L);
2944ba319b5SDimitry Andric DFS.perform(LI);
2954ba319b5SDimitry Andric // Stash the DFS iterators before adding blocks to the loop.
2964ba319b5SDimitry Andric LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
2974ba319b5SDimitry Andric LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
2984ba319b5SDimitry Andric
2994ba319b5SDimitry Andric if (Header->getParent()->isDebugInfoForProfiling())
3004ba319b5SDimitry Andric for (BasicBlock *BB : L->getBlocks())
3014ba319b5SDimitry Andric for (Instruction &I : *BB)
3024ba319b5SDimitry Andric if (!isa<DbgInfoIntrinsic>(&I))
303*b5893f02SDimitry Andric if (const DILocation *DIL = I.getDebugLoc()) {
304*b5893f02SDimitry Andric auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
305*b5893f02SDimitry Andric if (NewDIL)
306*b5893f02SDimitry Andric I.setDebugLoc(NewDIL.getValue());
307*b5893f02SDimitry Andric else
308*b5893f02SDimitry Andric LLVM_DEBUG(dbgs()
309*b5893f02SDimitry Andric << "Failed to create new discriminator: "
310*b5893f02SDimitry Andric << DIL->getFilename() << " Line: " << DIL->getLine());
311*b5893f02SDimitry Andric }
3124ba319b5SDimitry Andric
3134ba319b5SDimitry Andric // Copy all blocks
3144ba319b5SDimitry Andric for (unsigned It = 1; It != Count; ++It) {
3154ba319b5SDimitry Andric std::vector<BasicBlock *> NewBlocks;
3164ba319b5SDimitry Andric // Maps Blocks[It] -> Blocks[It-1]
3174ba319b5SDimitry Andric DenseMap<Value *, Value *> PrevItValueMap;
3184ba319b5SDimitry Andric
3194ba319b5SDimitry Andric for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3204ba319b5SDimitry Andric ValueToValueMapTy VMap;
3214ba319b5SDimitry Andric BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
3224ba319b5SDimitry Andric Header->getParent()->getBasicBlockList().push_back(New);
3234ba319b5SDimitry Andric
3244ba319b5SDimitry Andric if (ForeBlocks.count(*BB)) {
3254ba319b5SDimitry Andric L->addBasicBlockToLoop(New, *LI);
3264ba319b5SDimitry Andric
3274ba319b5SDimitry Andric if (*BB == ForeBlocksFirst[0])
3284ba319b5SDimitry Andric ForeBlocksFirst.push_back(New);
3294ba319b5SDimitry Andric if (*BB == ForeBlocksLast[0])
3304ba319b5SDimitry Andric ForeBlocksLast.push_back(New);
3314ba319b5SDimitry Andric } else if (SubLoopBlocks.count(*BB)) {
3324ba319b5SDimitry Andric SubLoop->addBasicBlockToLoop(New, *LI);
3334ba319b5SDimitry Andric
3344ba319b5SDimitry Andric if (*BB == SubLoopBlocksFirst[0])
3354ba319b5SDimitry Andric SubLoopBlocksFirst.push_back(New);
3364ba319b5SDimitry Andric if (*BB == SubLoopBlocksLast[0])
3374ba319b5SDimitry Andric SubLoopBlocksLast.push_back(New);
3384ba319b5SDimitry Andric } else if (AftBlocks.count(*BB)) {
3394ba319b5SDimitry Andric L->addBasicBlockToLoop(New, *LI);
3404ba319b5SDimitry Andric
3414ba319b5SDimitry Andric if (*BB == AftBlocksFirst[0])
3424ba319b5SDimitry Andric AftBlocksFirst.push_back(New);
3434ba319b5SDimitry Andric if (*BB == AftBlocksLast[0])
3444ba319b5SDimitry Andric AftBlocksLast.push_back(New);
3454ba319b5SDimitry Andric } else {
3464ba319b5SDimitry Andric llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
3474ba319b5SDimitry Andric }
3484ba319b5SDimitry Andric
3494ba319b5SDimitry Andric // Update our running maps of newest clones
3504ba319b5SDimitry Andric PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
3514ba319b5SDimitry Andric LastValueMap[*BB] = New;
3524ba319b5SDimitry Andric for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
3534ba319b5SDimitry Andric VI != VE; ++VI) {
3544ba319b5SDimitry Andric PrevItValueMap[VI->second] =
3554ba319b5SDimitry Andric const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
3564ba319b5SDimitry Andric LastValueMap[VI->first] = VI->second;
3574ba319b5SDimitry Andric }
3584ba319b5SDimitry Andric
3594ba319b5SDimitry Andric NewBlocks.push_back(New);
3604ba319b5SDimitry Andric
3614ba319b5SDimitry Andric // Update DomTree:
3624ba319b5SDimitry Andric if (*BB == ForeBlocksFirst[0])
3634ba319b5SDimitry Andric DT->addNewBlock(New, ForeBlocksLast[It - 1]);
3644ba319b5SDimitry Andric else if (*BB == SubLoopBlocksFirst[0])
3654ba319b5SDimitry Andric DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
3664ba319b5SDimitry Andric else if (*BB == AftBlocksFirst[0])
3674ba319b5SDimitry Andric DT->addNewBlock(New, AftBlocksLast[It - 1]);
3684ba319b5SDimitry Andric else {
3694ba319b5SDimitry Andric // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
3704ba319b5SDimitry Andric // structure.
3714ba319b5SDimitry Andric auto BBDomNode = DT->getNode(*BB);
3724ba319b5SDimitry Andric auto BBIDom = BBDomNode->getIDom();
3734ba319b5SDimitry Andric BasicBlock *OriginalBBIDom = BBIDom->getBlock();
3744ba319b5SDimitry Andric assert(OriginalBBIDom);
3754ba319b5SDimitry Andric assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
3764ba319b5SDimitry Andric DT->addNewBlock(
3774ba319b5SDimitry Andric New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
3784ba319b5SDimitry Andric }
3794ba319b5SDimitry Andric }
3804ba319b5SDimitry Andric
3814ba319b5SDimitry Andric // Remap all instructions in the most recent iteration
3824ba319b5SDimitry Andric for (BasicBlock *NewBlock : NewBlocks) {
3834ba319b5SDimitry Andric for (Instruction &I : *NewBlock) {
3844ba319b5SDimitry Andric ::remapInstruction(&I, LastValueMap);
3854ba319b5SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I))
3864ba319b5SDimitry Andric if (II->getIntrinsicID() == Intrinsic::assume)
3874ba319b5SDimitry Andric AC->registerAssumption(II);
3884ba319b5SDimitry Andric }
3894ba319b5SDimitry Andric }
3904ba319b5SDimitry Andric
3914ba319b5SDimitry Andric // Alter the ForeBlocks phi's, pointing them at the latest version of the
3924ba319b5SDimitry Andric // value from the previous iteration's phis
3934ba319b5SDimitry Andric for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
3944ba319b5SDimitry Andric Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
3954ba319b5SDimitry Andric assert(OldValue && "should have incoming edge from Aft[It]");
3964ba319b5SDimitry Andric Value *NewValue = OldValue;
3974ba319b5SDimitry Andric if (Value *PrevValue = PrevItValueMap[OldValue])
3984ba319b5SDimitry Andric NewValue = PrevValue;
3994ba319b5SDimitry Andric
4004ba319b5SDimitry Andric assert(Phi.getNumOperands() == 2);
4014ba319b5SDimitry Andric Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
4024ba319b5SDimitry Andric Phi.setIncomingValue(0, NewValue);
4034ba319b5SDimitry Andric Phi.removeIncomingValue(1);
4044ba319b5SDimitry Andric }
4054ba319b5SDimitry Andric }
4064ba319b5SDimitry Andric
4074ba319b5SDimitry Andric // Now that all the basic blocks for the unrolled iterations are in place,
4084ba319b5SDimitry Andric // finish up connecting the blocks and phi nodes. At this point LastValueMap
4094ba319b5SDimitry Andric // is the last unrolled iterations values.
4104ba319b5SDimitry Andric
4114ba319b5SDimitry Andric // Update Phis in BB from OldBB to point to NewBB
4124ba319b5SDimitry Andric auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
4134ba319b5SDimitry Andric BasicBlock *NewBB) {
4144ba319b5SDimitry Andric for (PHINode &Phi : BB->phis()) {
4154ba319b5SDimitry Andric int I = Phi.getBasicBlockIndex(OldBB);
4164ba319b5SDimitry Andric Phi.setIncomingBlock(I, NewBB);
4174ba319b5SDimitry Andric }
4184ba319b5SDimitry Andric };
4194ba319b5SDimitry Andric // Update Phis in BB from OldBB to point to NewBB and use the latest value
4204ba319b5SDimitry Andric // from LastValueMap
4214ba319b5SDimitry Andric auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
4224ba319b5SDimitry Andric BasicBlock *NewBB,
4234ba319b5SDimitry Andric ValueToValueMapTy &LastValueMap) {
4244ba319b5SDimitry Andric for (PHINode &Phi : BB->phis()) {
4254ba319b5SDimitry Andric for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
4264ba319b5SDimitry Andric if (Phi.getIncomingBlock(b) == OldBB) {
4274ba319b5SDimitry Andric Value *OldValue = Phi.getIncomingValue(b);
4284ba319b5SDimitry Andric if (Value *LastValue = LastValueMap[OldValue])
4294ba319b5SDimitry Andric Phi.setIncomingValue(b, LastValue);
4304ba319b5SDimitry Andric Phi.setIncomingBlock(b, NewBB);
4314ba319b5SDimitry Andric break;
4324ba319b5SDimitry Andric }
4334ba319b5SDimitry Andric }
4344ba319b5SDimitry Andric }
4354ba319b5SDimitry Andric };
4364ba319b5SDimitry Andric // Move all the phis from Src into Dest
4374ba319b5SDimitry Andric auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
4384ba319b5SDimitry Andric Instruction *insertPoint = Dest->getFirstNonPHI();
4394ba319b5SDimitry Andric while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
4404ba319b5SDimitry Andric Phi->moveBefore(insertPoint);
4414ba319b5SDimitry Andric };
4424ba319b5SDimitry Andric
4434ba319b5SDimitry Andric // Update the PHI values outside the loop to point to the last block
4444ba319b5SDimitry Andric updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
4454ba319b5SDimitry Andric LastValueMap);
4464ba319b5SDimitry Andric
4474ba319b5SDimitry Andric // Update ForeBlocks successors and phi nodes
4484ba319b5SDimitry Andric BranchInst *ForeTerm =
4494ba319b5SDimitry Andric cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
4504ba319b5SDimitry Andric BasicBlock *Dest = SubLoopBlocksFirst[0];
4514ba319b5SDimitry Andric ForeTerm->setSuccessor(0, Dest);
4524ba319b5SDimitry Andric
4534ba319b5SDimitry Andric if (CompletelyUnroll) {
4544ba319b5SDimitry Andric while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
4554ba319b5SDimitry Andric Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
4564ba319b5SDimitry Andric Phi->getParent()->getInstList().erase(Phi);
4574ba319b5SDimitry Andric }
4584ba319b5SDimitry Andric } else {
4594ba319b5SDimitry Andric // Update the PHI values to point to the last aft block
4604ba319b5SDimitry Andric updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
4614ba319b5SDimitry Andric AftBlocksLast.back(), LastValueMap);
4624ba319b5SDimitry Andric }
4634ba319b5SDimitry Andric
4644ba319b5SDimitry Andric for (unsigned It = 1; It != Count; It++) {
4654ba319b5SDimitry Andric // Remap ForeBlock successors from previous iteration to this
4664ba319b5SDimitry Andric BranchInst *ForeTerm =
4674ba319b5SDimitry Andric cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
4684ba319b5SDimitry Andric BasicBlock *Dest = ForeBlocksFirst[It];
4694ba319b5SDimitry Andric ForeTerm->setSuccessor(0, Dest);
4704ba319b5SDimitry Andric }
4714ba319b5SDimitry Andric
4724ba319b5SDimitry Andric // Subloop successors and phis
4734ba319b5SDimitry Andric BranchInst *SubTerm =
4744ba319b5SDimitry Andric cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
4754ba319b5SDimitry Andric SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
4764ba319b5SDimitry Andric SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
4774ba319b5SDimitry Andric updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
4784ba319b5SDimitry Andric ForeBlocksLast.back());
4794ba319b5SDimitry Andric updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
4804ba319b5SDimitry Andric SubLoopBlocksLast.back());
4814ba319b5SDimitry Andric
4824ba319b5SDimitry Andric for (unsigned It = 1; It != Count; It++) {
4834ba319b5SDimitry Andric // Replace the conditional branch of the previous iteration subloop with an
4844ba319b5SDimitry Andric // unconditional one to this one
4854ba319b5SDimitry Andric BranchInst *SubTerm =
4864ba319b5SDimitry Andric cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
4874ba319b5SDimitry Andric BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
4884ba319b5SDimitry Andric SubTerm->eraseFromParent();
4894ba319b5SDimitry Andric
4904ba319b5SDimitry Andric updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
4914ba319b5SDimitry Andric ForeBlocksLast.back());
4924ba319b5SDimitry Andric updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
4934ba319b5SDimitry Andric SubLoopBlocksLast.back());
4944ba319b5SDimitry Andric movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
4954ba319b5SDimitry Andric }
4964ba319b5SDimitry Andric
4974ba319b5SDimitry Andric // Aft blocks successors and phis
4984ba319b5SDimitry Andric BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
4994ba319b5SDimitry Andric if (CompletelyUnroll) {
5004ba319b5SDimitry Andric BranchInst::Create(LoopExit, Term);
5014ba319b5SDimitry Andric Term->eraseFromParent();
5024ba319b5SDimitry Andric } else {
5034ba319b5SDimitry Andric Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
5044ba319b5SDimitry Andric }
5054ba319b5SDimitry Andric updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
5064ba319b5SDimitry Andric SubLoopBlocksLast.back());
5074ba319b5SDimitry Andric
5084ba319b5SDimitry Andric for (unsigned It = 1; It != Count; It++) {
5094ba319b5SDimitry Andric // Replace the conditional branch of the previous iteration subloop with an
5104ba319b5SDimitry Andric // unconditional one to this one
5114ba319b5SDimitry Andric BranchInst *AftTerm =
5124ba319b5SDimitry Andric cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
5134ba319b5SDimitry Andric BranchInst::Create(AftBlocksFirst[It], AftTerm);
5144ba319b5SDimitry Andric AftTerm->eraseFromParent();
5154ba319b5SDimitry Andric
5164ba319b5SDimitry Andric updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
5174ba319b5SDimitry Andric SubLoopBlocksLast.back());
5184ba319b5SDimitry Andric movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
5194ba319b5SDimitry Andric }
5204ba319b5SDimitry Andric
5214ba319b5SDimitry Andric // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
5224ba319b5SDimitry Andric // new ones required.
5234ba319b5SDimitry Andric if (Count != 1) {
5244ba319b5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
5254ba319b5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
5264ba319b5SDimitry Andric SubLoopBlocksFirst[0]);
5274ba319b5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
5284ba319b5SDimitry Andric SubLoopBlocksLast[0], AftBlocksFirst[0]);
5294ba319b5SDimitry Andric
5304ba319b5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5314ba319b5SDimitry Andric ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
5324ba319b5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5334ba319b5SDimitry Andric SubLoopBlocksLast.back(), AftBlocksFirst[0]);
5344ba319b5SDimitry Andric DT->applyUpdates(DTUpdates);
5354ba319b5SDimitry Andric }
5364ba319b5SDimitry Andric
5374ba319b5SDimitry Andric // Merge adjacent basic blocks, if possible.
5384ba319b5SDimitry Andric SmallPtrSet<BasicBlock *, 16> MergeBlocks;
5394ba319b5SDimitry Andric MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
5404ba319b5SDimitry Andric MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
5414ba319b5SDimitry Andric MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
5424ba319b5SDimitry Andric while (!MergeBlocks.empty()) {
5434ba319b5SDimitry Andric BasicBlock *BB = *MergeBlocks.begin();
5444ba319b5SDimitry Andric BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
5454ba319b5SDimitry Andric if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
5464ba319b5SDimitry Andric BasicBlock *Dest = Term->getSuccessor(0);
5474ba319b5SDimitry Andric if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
5484ba319b5SDimitry Andric // Don't remove BB and add Fold as they are the same BB
5494ba319b5SDimitry Andric assert(Fold == BB);
5504ba319b5SDimitry Andric (void)Fold;
5514ba319b5SDimitry Andric MergeBlocks.erase(Dest);
5524ba319b5SDimitry Andric } else
5534ba319b5SDimitry Andric MergeBlocks.erase(BB);
5544ba319b5SDimitry Andric } else
5554ba319b5SDimitry Andric MergeBlocks.erase(BB);
5564ba319b5SDimitry Andric }
5574ba319b5SDimitry Andric
5584ba319b5SDimitry Andric // At this point, the code is well formed. We now do a quick sweep over the
5594ba319b5SDimitry Andric // inserted code, doing constant propagation and dead code elimination as we
5604ba319b5SDimitry Andric // go.
5614ba319b5SDimitry Andric simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
5624ba319b5SDimitry Andric simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);
5634ba319b5SDimitry Andric
5644ba319b5SDimitry Andric NumCompletelyUnrolledAndJammed += CompletelyUnroll;
5654ba319b5SDimitry Andric ++NumUnrolledAndJammed;
5664ba319b5SDimitry Andric
5674ba319b5SDimitry Andric #ifndef NDEBUG
5684ba319b5SDimitry Andric // We shouldn't have done anything to break loop simplify form or LCSSA.
5694ba319b5SDimitry Andric Loop *OuterL = L->getParentLoop();
5704ba319b5SDimitry Andric Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
5714ba319b5SDimitry Andric assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
5724ba319b5SDimitry Andric if (!CompletelyUnroll)
5734ba319b5SDimitry Andric assert(L->isLoopSimplifyForm());
5744ba319b5SDimitry Andric assert(SubLoop->isLoopSimplifyForm());
5754ba319b5SDimitry Andric assert(DT->verify());
5764ba319b5SDimitry Andric #endif
5774ba319b5SDimitry Andric
5784ba319b5SDimitry Andric // Update LoopInfo if the loop is completely removed.
5794ba319b5SDimitry Andric if (CompletelyUnroll)
5804ba319b5SDimitry Andric LI->erase(L);
5814ba319b5SDimitry Andric
5824ba319b5SDimitry Andric return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
5834ba319b5SDimitry Andric : LoopUnrollResult::PartiallyUnrolled;
5844ba319b5SDimitry Andric }
5854ba319b5SDimitry Andric
getLoadsAndStores(BasicBlockSet & Blocks,SmallVector<Value *,4> & MemInstr)5864ba319b5SDimitry Andric static bool getLoadsAndStores(BasicBlockSet &Blocks,
5874ba319b5SDimitry Andric SmallVector<Value *, 4> &MemInstr) {
5884ba319b5SDimitry Andric // Scan the BBs and collect legal loads and stores.
5894ba319b5SDimitry Andric // Returns false if non-simple loads/stores are found.
5904ba319b5SDimitry Andric for (BasicBlock *BB : Blocks) {
5914ba319b5SDimitry Andric for (Instruction &I : *BB) {
5924ba319b5SDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) {
5934ba319b5SDimitry Andric if (!Ld->isSimple())
5944ba319b5SDimitry Andric return false;
5954ba319b5SDimitry Andric MemInstr.push_back(&I);
5964ba319b5SDimitry Andric } else if (auto *St = dyn_cast<StoreInst>(&I)) {
5974ba319b5SDimitry Andric if (!St->isSimple())
5984ba319b5SDimitry Andric return false;
5994ba319b5SDimitry Andric MemInstr.push_back(&I);
6004ba319b5SDimitry Andric } else if (I.mayReadOrWriteMemory()) {
6014ba319b5SDimitry Andric return false;
6024ba319b5SDimitry Andric }
6034ba319b5SDimitry Andric }
6044ba319b5SDimitry Andric }
6054ba319b5SDimitry Andric return true;
6064ba319b5SDimitry Andric }
6074ba319b5SDimitry Andric
checkDependencies(SmallVector<Value *,4> & Earlier,SmallVector<Value *,4> & Later,unsigned LoopDepth,bool InnerLoop,DependenceInfo & DI)6084ba319b5SDimitry Andric static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
6094ba319b5SDimitry Andric SmallVector<Value *, 4> &Later,
6104ba319b5SDimitry Andric unsigned LoopDepth, bool InnerLoop,
6114ba319b5SDimitry Andric DependenceInfo &DI) {
6124ba319b5SDimitry Andric // Use DA to check for dependencies between loads and stores that make unroll
6134ba319b5SDimitry Andric // and jam invalid
6144ba319b5SDimitry Andric for (Value *I : Earlier) {
6154ba319b5SDimitry Andric for (Value *J : Later) {
6164ba319b5SDimitry Andric Instruction *Src = cast<Instruction>(I);
6174ba319b5SDimitry Andric Instruction *Dst = cast<Instruction>(J);
6184ba319b5SDimitry Andric if (Src == Dst)
6194ba319b5SDimitry Andric continue;
6204ba319b5SDimitry Andric // Ignore Input dependencies.
6214ba319b5SDimitry Andric if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
6224ba319b5SDimitry Andric continue;
6234ba319b5SDimitry Andric
6244ba319b5SDimitry Andric // Track dependencies, and if we find them take a conservative approach
6254ba319b5SDimitry Andric // by allowing only = or < (not >), altough some > would be safe
6264ba319b5SDimitry Andric // (depending upon unroll width).
6274ba319b5SDimitry Andric // For the inner loop, we need to disallow any (> <) dependencies
6284ba319b5SDimitry Andric // FIXME: Allow > so long as distance is less than unroll width
6294ba319b5SDimitry Andric if (auto D = DI.depends(Src, Dst, true)) {
6304ba319b5SDimitry Andric assert(D->isOrdered() && "Expected an output, flow or anti dep.");
6314ba319b5SDimitry Andric
632*b5893f02SDimitry Andric if (D->isConfused()) {
633*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
634*b5893f02SDimitry Andric << " " << *Src << "\n"
635*b5893f02SDimitry Andric << " " << *Dst << "\n");
6364ba319b5SDimitry Andric return false;
637*b5893f02SDimitry Andric }
6384ba319b5SDimitry Andric if (!InnerLoop) {
639*b5893f02SDimitry Andric if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
640*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " > dependency between:\n"
641*b5893f02SDimitry Andric << " " << *Src << "\n"
642*b5893f02SDimitry Andric << " " << *Dst << "\n");
6434ba319b5SDimitry Andric return false;
644*b5893f02SDimitry Andric }
6454ba319b5SDimitry Andric } else {
6464ba319b5SDimitry Andric assert(LoopDepth + 1 <= D->getLevels());
6474ba319b5SDimitry Andric if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
648*b5893f02SDimitry Andric D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
649*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " < > dependency between:\n"
650*b5893f02SDimitry Andric << " " << *Src << "\n"
651*b5893f02SDimitry Andric << " " << *Dst << "\n");
6524ba319b5SDimitry Andric return false;
6534ba319b5SDimitry Andric }
6544ba319b5SDimitry Andric }
6554ba319b5SDimitry Andric }
6564ba319b5SDimitry Andric }
657*b5893f02SDimitry Andric }
6584ba319b5SDimitry Andric return true;
6594ba319b5SDimitry Andric }
6604ba319b5SDimitry Andric
checkDependencies(Loop * L,BasicBlockSet & ForeBlocks,BasicBlockSet & SubLoopBlocks,BasicBlockSet & AftBlocks,DependenceInfo & DI)6614ba319b5SDimitry Andric static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks,
6624ba319b5SDimitry Andric BasicBlockSet &SubLoopBlocks,
6634ba319b5SDimitry Andric BasicBlockSet &AftBlocks, DependenceInfo &DI) {
6644ba319b5SDimitry Andric // Get all loads/store pairs for each blocks
6654ba319b5SDimitry Andric SmallVector<Value *, 4> ForeMemInstr;
6664ba319b5SDimitry Andric SmallVector<Value *, 4> SubLoopMemInstr;
6674ba319b5SDimitry Andric SmallVector<Value *, 4> AftMemInstr;
6684ba319b5SDimitry Andric if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) ||
6694ba319b5SDimitry Andric !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) ||
6704ba319b5SDimitry Andric !getLoadsAndStores(AftBlocks, AftMemInstr))
6714ba319b5SDimitry Andric return false;
6724ba319b5SDimitry Andric
6734ba319b5SDimitry Andric // Check for dependencies between any blocks that may change order
6744ba319b5SDimitry Andric unsigned LoopDepth = L->getLoopDepth();
6754ba319b5SDimitry Andric return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false,
6764ba319b5SDimitry Andric DI) &&
6774ba319b5SDimitry Andric checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) &&
6784ba319b5SDimitry Andric checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false,
6794ba319b5SDimitry Andric DI) &&
6804ba319b5SDimitry Andric checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true,
6814ba319b5SDimitry Andric DI);
6824ba319b5SDimitry Andric }
6834ba319b5SDimitry Andric
isSafeToUnrollAndJam(Loop * L,ScalarEvolution & SE,DominatorTree & DT,DependenceInfo & DI)6844ba319b5SDimitry Andric bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
6854ba319b5SDimitry Andric DependenceInfo &DI) {
6864ba319b5SDimitry Andric /* We currently handle outer loops like this:
6874ba319b5SDimitry Andric |
6884ba319b5SDimitry Andric ForeFirst <----\ }
6894ba319b5SDimitry Andric Blocks | } ForeBlocks
6904ba319b5SDimitry Andric ForeLast | }
6914ba319b5SDimitry Andric | |
6924ba319b5SDimitry Andric SubLoopFirst <\ | }
6934ba319b5SDimitry Andric Blocks | | } SubLoopBlocks
6944ba319b5SDimitry Andric SubLoopLast -/ | }
6954ba319b5SDimitry Andric | |
6964ba319b5SDimitry Andric AftFirst | }
6974ba319b5SDimitry Andric Blocks | } AftBlocks
6984ba319b5SDimitry Andric AftLast ------/ }
6994ba319b5SDimitry Andric |
7004ba319b5SDimitry Andric
7014ba319b5SDimitry Andric There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
7024ba319b5SDimitry Andric and AftBlocks, providing that there is one edge from Fores to SubLoops,
7034ba319b5SDimitry Andric one edge from SubLoops to Afts and a single outer loop exit (from Afts).
7044ba319b5SDimitry Andric In practice we currently limit Aft blocks to a single block, and limit
7054ba319b5SDimitry Andric things further in the profitablility checks of the unroll and jam pass.
7064ba319b5SDimitry Andric
7074ba319b5SDimitry Andric Because of the way we rearrange basic blocks, we also require that
7084ba319b5SDimitry Andric the Fore blocks on all unrolled iterations are safe to move before the
7094ba319b5SDimitry Andric SubLoop blocks of all iterations. So we require that the phi node looping
7104ba319b5SDimitry Andric operands of ForeHeader can be moved to at least the end of ForeEnd, so that
7114ba319b5SDimitry Andric we can arrange cloned Fore Blocks before the subloop and match up Phi's
7124ba319b5SDimitry Andric correctly.
7134ba319b5SDimitry Andric
7144ba319b5SDimitry Andric i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
7154ba319b5SDimitry Andric It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.
7164ba319b5SDimitry Andric
7174ba319b5SDimitry Andric There are then a number of checks along the lines of no calls, no
7184ba319b5SDimitry Andric exceptions, inner loop IV is consistent, etc. Note that for loops requiring
7194ba319b5SDimitry Andric runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
7204ba319b5SDimitry Andric UnrollAndJamLoop if the trip count cannot be easily calculated.
7214ba319b5SDimitry Andric */
7224ba319b5SDimitry Andric
7234ba319b5SDimitry Andric if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
7244ba319b5SDimitry Andric return false;
7254ba319b5SDimitry Andric Loop *SubLoop = L->getSubLoops()[0];
7264ba319b5SDimitry Andric if (!SubLoop->isLoopSimplifyForm())
7274ba319b5SDimitry Andric return false;
7284ba319b5SDimitry Andric
7294ba319b5SDimitry Andric BasicBlock *Header = L->getHeader();
7304ba319b5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
7314ba319b5SDimitry Andric BasicBlock *Exit = L->getExitingBlock();
7324ba319b5SDimitry Andric BasicBlock *SubLoopHeader = SubLoop->getHeader();
7334ba319b5SDimitry Andric BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
7344ba319b5SDimitry Andric BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
7354ba319b5SDimitry Andric
7364ba319b5SDimitry Andric if (Latch != Exit)
7374ba319b5SDimitry Andric return false;
7384ba319b5SDimitry Andric if (SubLoopLatch != SubLoopExit)
7394ba319b5SDimitry Andric return false;
7404ba319b5SDimitry Andric
741*b5893f02SDimitry Andric if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
742*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
7434ba319b5SDimitry Andric return false;
744*b5893f02SDimitry Andric }
7454ba319b5SDimitry Andric
7464ba319b5SDimitry Andric // Split blocks into Fore/SubLoop/Aft based on dominators
7474ba319b5SDimitry Andric BasicBlockSet SubLoopBlocks;
7484ba319b5SDimitry Andric BasicBlockSet ForeBlocks;
7494ba319b5SDimitry Andric BasicBlockSet AftBlocks;
7504ba319b5SDimitry Andric if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
751*b5893f02SDimitry Andric AftBlocks, &DT)) {
752*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
7534ba319b5SDimitry Andric return false;
754*b5893f02SDimitry Andric }
7554ba319b5SDimitry Andric
7564ba319b5SDimitry Andric // Aft blocks may need to move instructions to fore blocks, which becomes more
7574ba319b5SDimitry Andric // difficult if there are multiple (potentially conditionally executed)
7584ba319b5SDimitry Andric // blocks. For now we just exclude loops with multiple aft blocks.
759*b5893f02SDimitry Andric if (AftBlocks.size() != 1) {
760*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
761*b5893f02SDimitry Andric "multiple blocks after the loop\n");
7624ba319b5SDimitry Andric return false;
763*b5893f02SDimitry Andric }
7644ba319b5SDimitry Andric
765*b5893f02SDimitry Andric // Check inner loop backedge count is consistent on all iterations of the
766*b5893f02SDimitry Andric // outer loop
767*b5893f02SDimitry Andric if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
768*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
769*b5893f02SDimitry Andric "not consistent on each iteration\n");
7704ba319b5SDimitry Andric return false;
771*b5893f02SDimitry Andric }
7724ba319b5SDimitry Andric
7734ba319b5SDimitry Andric // Check the loop safety info for exceptions.
774*b5893f02SDimitry Andric SimpleLoopSafetyInfo LSI;
775*b5893f02SDimitry Andric LSI.computeLoopSafetyInfo(L);
776*b5893f02SDimitry Andric if (LSI.anyBlockMayThrow()) {
777*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
7784ba319b5SDimitry Andric return false;
779*b5893f02SDimitry Andric }
7804ba319b5SDimitry Andric
7814ba319b5SDimitry Andric // We've ruled out the easy stuff and now need to check that there are no
7824ba319b5SDimitry Andric // interdependencies which may prevent us from moving the:
7834ba319b5SDimitry Andric // ForeBlocks before Subloop and AftBlocks.
7844ba319b5SDimitry Andric // Subloop before AftBlocks.
7854ba319b5SDimitry Andric // ForeBlock phi operands before the subloop
7864ba319b5SDimitry Andric
7874ba319b5SDimitry Andric // Make sure we can move all instructions we need to before the subloop
7884ba319b5SDimitry Andric if (!processHeaderPhiOperands(
7894ba319b5SDimitry Andric Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
7904ba319b5SDimitry Andric if (SubLoop->contains(I->getParent()))
7914ba319b5SDimitry Andric return false;
7924ba319b5SDimitry Andric if (AftBlocks.count(I->getParent())) {
7934ba319b5SDimitry Andric // If we hit a phi node in afts we know we are done (probably
7944ba319b5SDimitry Andric // LCSSA)
7954ba319b5SDimitry Andric if (isa<PHINode>(I))
7964ba319b5SDimitry Andric return false;
7974ba319b5SDimitry Andric // Can't move instructions with side effects or memory
7984ba319b5SDimitry Andric // reads/writes
7994ba319b5SDimitry Andric if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
8004ba319b5SDimitry Andric return false;
8014ba319b5SDimitry Andric }
8024ba319b5SDimitry Andric // Keep going
8034ba319b5SDimitry Andric return true;
804*b5893f02SDimitry Andric })) {
805*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
806*b5893f02SDimitry Andric "instructions after subloop to before it\n");
8074ba319b5SDimitry Andric return false;
808*b5893f02SDimitry Andric }
8094ba319b5SDimitry Andric
8104ba319b5SDimitry Andric // Check for memory dependencies which prohibit the unrolling we are doing.
8114ba319b5SDimitry Andric // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
8124ba319b5SDimitry Andric // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
813*b5893f02SDimitry Andric if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
814*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
8154ba319b5SDimitry Andric return false;
816*b5893f02SDimitry Andric }
8174ba319b5SDimitry Andric
8184ba319b5SDimitry Andric return true;
8194ba319b5SDimitry Andric }
820