10b57cec5SDimitry Andric //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements some loop unrolling utilities for loops with run-time
100b57cec5SDimitry Andric // trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
110b57cec5SDimitry Andric // trip counts.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The functions in this file are used to generate extra code when the
140b57cec5SDimitry Andric // run-time trip count modulo the unroll factor is not 0.  When this is the
150b57cec5SDimitry Andric // case, we need to generate code to execute these 'left over' iterations.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // The current strategy generates an if-then-else sequence prior to the
180b57cec5SDimitry Andric // unrolled loop to execute the 'left over' iterations before or after the
190b57cec5SDimitry Andric // unrolled loop.
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
240b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
270b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
280b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
29af732203SDimitry Andric #include "llvm/IR/MDBuilder.h"
300b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
310b57cec5SDimitry Andric #include "llvm/IR/Module.h"
32480093f4SDimitry Andric #include "llvm/Support/CommandLine.h"
330b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
340b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
350b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
360b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
370b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
380b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
395ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
410b57cec5SDimitry Andric #include <algorithm>
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll"
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric STATISTIC(NumRuntimeUnrolled,
480b57cec5SDimitry Andric           "Number of loops unrolled with run-time trip counts");
490b57cec5SDimitry Andric static cl::opt<bool> UnrollRuntimeMultiExit(
500b57cec5SDimitry Andric     "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
510b57cec5SDimitry Andric     cl::desc("Allow runtime unrolling for loops with multiple exits, when "
520b57cec5SDimitry Andric              "epilog is generated"));
53*5f7ddb14SDimitry Andric static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
54*5f7ddb14SDimitry Andric     "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
55*5f7ddb14SDimitry Andric     cl::desc("Assume the non latch exit block to be predictable"));
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric /// Connect the unrolling prolog code to the original loop.
580b57cec5SDimitry Andric /// The unrolling prolog code contains code to execute the
590b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
600b57cec5SDimitry Andric /// unroll count is non-zero.
610b57cec5SDimitry Andric ///
620b57cec5SDimitry Andric /// This function performs the following:
630b57cec5SDimitry Andric /// - Create PHI nodes at prolog end block to combine values
640b57cec5SDimitry Andric ///   that exit the prolog code and jump around the prolog.
650b57cec5SDimitry Andric /// - Add a PHI operand to a PHI node at the loop exit block
660b57cec5SDimitry Andric ///   for values that exit the prolog and go around the loop.
670b57cec5SDimitry Andric /// - Branch around the original loop if the trip count is less
680b57cec5SDimitry Andric ///   than the unroll factor.
690b57cec5SDimitry Andric ///
ConnectProlog(Loop * L,Value * BECount,unsigned Count,BasicBlock * PrologExit,BasicBlock * OriginalLoopLatchExit,BasicBlock * PreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)700b57cec5SDimitry Andric static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
710b57cec5SDimitry Andric                           BasicBlock *PrologExit,
720b57cec5SDimitry Andric                           BasicBlock *OriginalLoopLatchExit,
730b57cec5SDimitry Andric                           BasicBlock *PreHeader, BasicBlock *NewPreHeader,
740b57cec5SDimitry Andric                           ValueToValueMapTy &VMap, DominatorTree *DT,
750b57cec5SDimitry Andric                           LoopInfo *LI, bool PreserveLCSSA) {
760b57cec5SDimitry Andric   // Loop structure should be the following:
770b57cec5SDimitry Andric   // Preheader
780b57cec5SDimitry Andric   //  PrologHeader
790b57cec5SDimitry Andric   //  ...
800b57cec5SDimitry Andric   //  PrologLatch
810b57cec5SDimitry Andric   //  PrologExit
820b57cec5SDimitry Andric   //   NewPreheader
830b57cec5SDimitry Andric   //    Header
840b57cec5SDimitry Andric   //    ...
850b57cec5SDimitry Andric   //    Latch
860b57cec5SDimitry Andric   //      LatchExit
870b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
880b57cec5SDimitry Andric   assert(Latch && "Loop must have a latch");
890b57cec5SDimitry Andric   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   // Create a PHI node for each outgoing value from the original loop
920b57cec5SDimitry Andric   // (which means it is an outgoing value from the prolog code too).
930b57cec5SDimitry Andric   // The new PHI node is inserted in the prolog end basic block.
940b57cec5SDimitry Andric   // The new PHI node value is added as an operand of a PHI node in either
950b57cec5SDimitry Andric   // the loop header or the loop exit block.
960b57cec5SDimitry Andric   for (BasicBlock *Succ : successors(Latch)) {
970b57cec5SDimitry Andric     for (PHINode &PN : Succ->phis()) {
980b57cec5SDimitry Andric       // Add a new PHI node to the prolog end block and add the
990b57cec5SDimitry Andric       // appropriate incoming values.
1000b57cec5SDimitry Andric       // TODO: This code assumes that the PrologExit (or the LatchExit block for
1010b57cec5SDimitry Andric       // prolog loop) contains only one predecessor from the loop, i.e. the
1020b57cec5SDimitry Andric       // PrologLatch. When supporting multiple-exiting block loops, we can have
1030b57cec5SDimitry Andric       // two or more blocks that have the LatchExit as the target in the
1040b57cec5SDimitry Andric       // original loop.
1050b57cec5SDimitry Andric       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
1060b57cec5SDimitry Andric                                        PrologExit->getFirstNonPHI());
1070b57cec5SDimitry Andric       // Adding a value to the new PHI node from the original loop preheader.
1080b57cec5SDimitry Andric       // This is the value that skips all the prolog code.
1090b57cec5SDimitry Andric       if (L->contains(&PN)) {
1100b57cec5SDimitry Andric         // Succ is loop header.
1110b57cec5SDimitry Andric         NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
1120b57cec5SDimitry Andric                            PreHeader);
1130b57cec5SDimitry Andric       } else {
1140b57cec5SDimitry Andric         // Succ is LatchExit.
1150b57cec5SDimitry Andric         NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
1160b57cec5SDimitry Andric       }
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric       Value *V = PN.getIncomingValueForBlock(Latch);
1190b57cec5SDimitry Andric       if (Instruction *I = dyn_cast<Instruction>(V)) {
1200b57cec5SDimitry Andric         if (L->contains(I)) {
1210b57cec5SDimitry Andric           V = VMap.lookup(I);
1220b57cec5SDimitry Andric         }
1230b57cec5SDimitry Andric       }
1240b57cec5SDimitry Andric       // Adding a value to the new PHI node from the last prolog block
1250b57cec5SDimitry Andric       // that was created.
1260b57cec5SDimitry Andric       NewPN->addIncoming(V, PrologLatch);
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric       // Update the existing PHI node operand with the value from the
1290b57cec5SDimitry Andric       // new PHI node.  How this is done depends on if the existing
1300b57cec5SDimitry Andric       // PHI node is in the original loop block, or the exit block.
1310b57cec5SDimitry Andric       if (L->contains(&PN))
1320b57cec5SDimitry Andric         PN.setIncomingValueForBlock(NewPreHeader, NewPN);
1330b57cec5SDimitry Andric       else
1340b57cec5SDimitry Andric         PN.addIncoming(NewPN, PrologExit);
1350b57cec5SDimitry Andric     }
1360b57cec5SDimitry Andric   }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   // Make sure that created prolog loop is in simplified form
1390b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> PrologExitPreds;
1400b57cec5SDimitry Andric   Loop *PrologLoop = LI->getLoopFor(PrologLatch);
1410b57cec5SDimitry Andric   if (PrologLoop) {
1420b57cec5SDimitry Andric     for (BasicBlock *PredBB : predecessors(PrologExit))
1430b57cec5SDimitry Andric       if (PrologLoop->contains(PredBB))
1440b57cec5SDimitry Andric         PrologExitPreds.push_back(PredBB);
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric     SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
1470b57cec5SDimitry Andric                            nullptr, PreserveLCSSA);
1480b57cec5SDimitry Andric   }
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric   // Create a branch around the original loop, which is taken if there are no
1510b57cec5SDimitry Andric   // iterations remaining to be executed after running the prologue.
1520b57cec5SDimitry Andric   Instruction *InsertPt = PrologExit->getTerminator();
1530b57cec5SDimitry Andric   IRBuilder<> B(InsertPt);
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric   assert(Count != 0 && "nonsensical Count!");
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric   // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
1580b57cec5SDimitry Andric   // This means %xtraiter is (BECount + 1) and all of the iterations of this
1590b57cec5SDimitry Andric   // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
1600b57cec5SDimitry Andric   // then (BECount + 1) cannot unsigned-overflow.
1610b57cec5SDimitry Andric   Value *BrLoopExit =
1620b57cec5SDimitry Andric       B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
1630b57cec5SDimitry Andric   // Split the exit to maintain loop canonicalization guarantees
1640b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
1650b57cec5SDimitry Andric   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
1660b57cec5SDimitry Andric                          nullptr, PreserveLCSSA);
1670b57cec5SDimitry Andric   // Add the branch to the exit block (around the unrolled loop)
1680b57cec5SDimitry Andric   B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
1690b57cec5SDimitry Andric   InsertPt->eraseFromParent();
1700b57cec5SDimitry Andric   if (DT)
1710b57cec5SDimitry Andric     DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric /// Connect the unrolling epilog code to the original loop.
1750b57cec5SDimitry Andric /// The unrolling epilog code contains code to execute the
1760b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
1770b57cec5SDimitry Andric /// unroll count is non-zero.
1780b57cec5SDimitry Andric ///
1790b57cec5SDimitry Andric /// This function performs the following:
1800b57cec5SDimitry Andric /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
1810b57cec5SDimitry Andric /// - Create PHI nodes at the unrolling loop exit to combine
1820b57cec5SDimitry Andric ///   values that exit the unrolling loop code and jump around it.
1830b57cec5SDimitry Andric /// - Update PHI operands in the epilog loop by the new PHI nodes
1840b57cec5SDimitry Andric /// - Branch around the epilog loop if extra iters (ModVal) is zero.
1850b57cec5SDimitry Andric ///
ConnectEpilog(Loop * L,Value * ModVal,BasicBlock * NewExit,BasicBlock * Exit,BasicBlock * PreHeader,BasicBlock * EpilogPreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)1860b57cec5SDimitry Andric static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
1870b57cec5SDimitry Andric                           BasicBlock *Exit, BasicBlock *PreHeader,
1880b57cec5SDimitry Andric                           BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
1890b57cec5SDimitry Andric                           ValueToValueMapTy &VMap, DominatorTree *DT,
1900b57cec5SDimitry Andric                           LoopInfo *LI, bool PreserveLCSSA)  {
1910b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
1920b57cec5SDimitry Andric   assert(Latch && "Loop must have a latch");
1930b57cec5SDimitry Andric   BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   // Loop structure should be the following:
1960b57cec5SDimitry Andric   //
1970b57cec5SDimitry Andric   // PreHeader
1980b57cec5SDimitry Andric   // NewPreHeader
1990b57cec5SDimitry Andric   //   Header
2000b57cec5SDimitry Andric   //   ...
2010b57cec5SDimitry Andric   //   Latch
2020b57cec5SDimitry Andric   // NewExit (PN)
2030b57cec5SDimitry Andric   // EpilogPreHeader
2040b57cec5SDimitry Andric   //   EpilogHeader
2050b57cec5SDimitry Andric   //   ...
2060b57cec5SDimitry Andric   //   EpilogLatch
2070b57cec5SDimitry Andric   // Exit (EpilogPN)
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   // Update PHI nodes at NewExit and Exit.
2100b57cec5SDimitry Andric   for (PHINode &PN : NewExit->phis()) {
2110b57cec5SDimitry Andric     // PN should be used in another PHI located in Exit block as
2120b57cec5SDimitry Andric     // Exit was split by SplitBlockPredecessors into Exit and NewExit
2130b57cec5SDimitry Andric     // Basicaly it should look like:
2140b57cec5SDimitry Andric     // NewExit:
2150b57cec5SDimitry Andric     //   PN = PHI [I, Latch]
2160b57cec5SDimitry Andric     // ...
2170b57cec5SDimitry Andric     // Exit:
2180b57cec5SDimitry Andric     //   EpilogPN = PHI [PN, EpilogPreHeader]
2190b57cec5SDimitry Andric     //
2200b57cec5SDimitry Andric     // There is EpilogPreHeader incoming block instead of NewExit as
2210b57cec5SDimitry Andric     // NewExit was spilt 1 more time to get EpilogPreHeader.
2220b57cec5SDimitry Andric     assert(PN.hasOneUse() && "The phi should have 1 use");
2230b57cec5SDimitry Andric     PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
2240b57cec5SDimitry Andric     assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric     // Add incoming PreHeader from branch around the Loop
2270b57cec5SDimitry Andric     PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric     Value *V = PN.getIncomingValueForBlock(Latch);
2300b57cec5SDimitry Andric     Instruction *I = dyn_cast<Instruction>(V);
2310b57cec5SDimitry Andric     if (I && L->contains(I))
2320b57cec5SDimitry Andric       // If value comes from an instruction in the loop add VMap value.
2330b57cec5SDimitry Andric       V = VMap.lookup(I);
2340b57cec5SDimitry Andric     // For the instruction out of the loop, constant or undefined value
2350b57cec5SDimitry Andric     // insert value itself.
2360b57cec5SDimitry Andric     EpilogPN->addIncoming(V, EpilogLatch);
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric     assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
2390b57cec5SDimitry Andric           "EpilogPN should have EpilogPreHeader incoming block");
2400b57cec5SDimitry Andric     // Change EpilogPreHeader incoming block to NewExit.
2410b57cec5SDimitry Andric     EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
2420b57cec5SDimitry Andric                                NewExit);
2430b57cec5SDimitry Andric     // Now PHIs should look like:
2440b57cec5SDimitry Andric     // NewExit:
2450b57cec5SDimitry Andric     //   PN = PHI [I, Latch], [undef, PreHeader]
2460b57cec5SDimitry Andric     // ...
2470b57cec5SDimitry Andric     // Exit:
2480b57cec5SDimitry Andric     //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
2490b57cec5SDimitry Andric   }
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric   // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
2520b57cec5SDimitry Andric   // Update corresponding PHI nodes in epilog loop.
2530b57cec5SDimitry Andric   for (BasicBlock *Succ : successors(Latch)) {
2540b57cec5SDimitry Andric     // Skip this as we already updated phis in exit blocks.
2550b57cec5SDimitry Andric     if (!L->contains(Succ))
2560b57cec5SDimitry Andric       continue;
2570b57cec5SDimitry Andric     for (PHINode &PN : Succ->phis()) {
2580b57cec5SDimitry Andric       // Add new PHI nodes to the loop exit block and update epilog
2590b57cec5SDimitry Andric       // PHIs with the new PHI values.
2600b57cec5SDimitry Andric       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
2610b57cec5SDimitry Andric                                        NewExit->getFirstNonPHI());
2620b57cec5SDimitry Andric       // Adding a value to the new PHI node from the unrolling loop preheader.
2630b57cec5SDimitry Andric       NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
2640b57cec5SDimitry Andric       // Adding a value to the new PHI node from the unrolling loop latch.
2650b57cec5SDimitry Andric       NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric       // Update the existing PHI node operand with the value from the new PHI
2680b57cec5SDimitry Andric       // node.  Corresponding instruction in epilog loop should be PHI.
2690b57cec5SDimitry Andric       PHINode *VPN = cast<PHINode>(VMap[&PN]);
2700b57cec5SDimitry Andric       VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
2710b57cec5SDimitry Andric     }
2720b57cec5SDimitry Andric   }
2730b57cec5SDimitry Andric 
2740b57cec5SDimitry Andric   Instruction *InsertPt = NewExit->getTerminator();
2750b57cec5SDimitry Andric   IRBuilder<> B(InsertPt);
2760b57cec5SDimitry Andric   Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
2770b57cec5SDimitry Andric   assert(Exit && "Loop must have a single exit block only");
2780b57cec5SDimitry Andric   // Split the epilogue exit to maintain loop canonicalization guarantees
2790b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
2800b57cec5SDimitry Andric   SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
2810b57cec5SDimitry Andric                          PreserveLCSSA);
2820b57cec5SDimitry Andric   // Add the branch to the exit block (around the unrolling loop)
2830b57cec5SDimitry Andric   B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
2840b57cec5SDimitry Andric   InsertPt->eraseFromParent();
2850b57cec5SDimitry Andric   if (DT)
2860b57cec5SDimitry Andric     DT->changeImmediateDominator(Exit, NewExit);
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric   // Split the main loop exit to maintain canonicalization guarantees.
2890b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
2900b57cec5SDimitry Andric   SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
2910b57cec5SDimitry Andric                          PreserveLCSSA);
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric /// Create a clone of the blocks in a loop and connect them together.
2950b57cec5SDimitry Andric /// If CreateRemainderLoop is false, loop structure will not be cloned,
2960b57cec5SDimitry Andric /// otherwise a new loop will be created including all cloned blocks, and the
2970b57cec5SDimitry Andric /// iterator of it switches to count NewIter down to 0.
2980b57cec5SDimitry Andric /// The cloned blocks should be inserted between InsertTop and InsertBot.
2990b57cec5SDimitry Andric /// If loop structure is cloned InsertTop should be new preheader, InsertBot
3000b57cec5SDimitry Andric /// new loop exit.
3010b57cec5SDimitry Andric /// Return the new cloned loop that is created when CreateRemainderLoop is true.
3020b57cec5SDimitry Andric static Loop *
CloneLoopBlocks(Loop * L,Value * NewIter,const bool CreateRemainderLoop,const bool UseEpilogRemainder,const bool UnrollRemainder,BasicBlock * InsertTop,BasicBlock * InsertBot,BasicBlock * Preheader,std::vector<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI)3030b57cec5SDimitry Andric CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
3040b57cec5SDimitry Andric                 const bool UseEpilogRemainder, const bool UnrollRemainder,
3050b57cec5SDimitry Andric                 BasicBlock *InsertTop,
3060b57cec5SDimitry Andric                 BasicBlock *InsertBot, BasicBlock *Preheader,
3070b57cec5SDimitry Andric                 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
3080b57cec5SDimitry Andric                 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
3090b57cec5SDimitry Andric   StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
3100b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
3110b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
3120b57cec5SDimitry Andric   Function *F = Header->getParent();
3130b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
3140b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
3150b57cec5SDimitry Andric   Loop *ParentLoop = L->getParentLoop();
3160b57cec5SDimitry Andric   NewLoopsMap NewLoops;
3170b57cec5SDimitry Andric   NewLoops[ParentLoop] = ParentLoop;
3180b57cec5SDimitry Andric   if (!CreateRemainderLoop)
3190b57cec5SDimitry Andric     NewLoops[L] = ParentLoop;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   // For each block in the original loop, create a new copy,
3220b57cec5SDimitry Andric   // and update the value map with the newly created values.
3230b57cec5SDimitry Andric   for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3240b57cec5SDimitry Andric     BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
3250b57cec5SDimitry Andric     NewBlocks.push_back(NewBB);
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric     // If we're unrolling the outermost loop, there's no remainder loop,
3280b57cec5SDimitry Andric     // and this block isn't in a nested loop, then the new block is not
3290b57cec5SDimitry Andric     // in any loop. Otherwise, add it to loopinfo.
3300b57cec5SDimitry Andric     if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
3310b57cec5SDimitry Andric       addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric     VMap[*BB] = NewBB;
3340b57cec5SDimitry Andric     if (Header == *BB) {
3350b57cec5SDimitry Andric       // For the first block, add a CFG connection to this newly
3360b57cec5SDimitry Andric       // created block.
3370b57cec5SDimitry Andric       InsertTop->getTerminator()->setSuccessor(0, NewBB);
3380b57cec5SDimitry Andric     }
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric     if (DT) {
3410b57cec5SDimitry Andric       if (Header == *BB) {
3420b57cec5SDimitry Andric         // The header is dominated by the preheader.
3430b57cec5SDimitry Andric         DT->addNewBlock(NewBB, InsertTop);
3440b57cec5SDimitry Andric       } else {
3450b57cec5SDimitry Andric         // Copy information from original loop to unrolled loop.
3460b57cec5SDimitry Andric         BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
3470b57cec5SDimitry Andric         DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
3480b57cec5SDimitry Andric       }
3490b57cec5SDimitry Andric     }
3500b57cec5SDimitry Andric 
3510b57cec5SDimitry Andric     if (Latch == *BB) {
3520b57cec5SDimitry Andric       // For the last block, if CreateRemainderLoop is false, create a direct
3530b57cec5SDimitry Andric       // jump to InsertBot. If not, create a loop back to cloned head.
3540b57cec5SDimitry Andric       VMap.erase((*BB)->getTerminator());
3550b57cec5SDimitry Andric       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
3560b57cec5SDimitry Andric       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
3570b57cec5SDimitry Andric       IRBuilder<> Builder(LatchBR);
3580b57cec5SDimitry Andric       if (!CreateRemainderLoop) {
3590b57cec5SDimitry Andric         Builder.CreateBr(InsertBot);
3600b57cec5SDimitry Andric       } else {
3610b57cec5SDimitry Andric         PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
3620b57cec5SDimitry Andric                                           suffix + ".iter",
3630b57cec5SDimitry Andric                                           FirstLoopBB->getFirstNonPHI());
3640b57cec5SDimitry Andric         Value *IdxSub =
3650b57cec5SDimitry Andric             Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
3660b57cec5SDimitry Andric                               NewIdx->getName() + ".sub");
3670b57cec5SDimitry Andric         Value *IdxCmp =
3680b57cec5SDimitry Andric             Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
3690b57cec5SDimitry Andric         Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
3700b57cec5SDimitry Andric         NewIdx->addIncoming(NewIter, InsertTop);
3710b57cec5SDimitry Andric         NewIdx->addIncoming(IdxSub, NewBB);
3720b57cec5SDimitry Andric       }
3730b57cec5SDimitry Andric       LatchBR->eraseFromParent();
3740b57cec5SDimitry Andric     }
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   // Change the incoming values to the ones defined in the preheader or
3780b57cec5SDimitry Andric   // cloned loop.
3790b57cec5SDimitry Andric   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
3800b57cec5SDimitry Andric     PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
3810b57cec5SDimitry Andric     if (!CreateRemainderLoop) {
3820b57cec5SDimitry Andric       if (UseEpilogRemainder) {
3830b57cec5SDimitry Andric         unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
3840b57cec5SDimitry Andric         NewPHI->setIncomingBlock(idx, InsertTop);
3850b57cec5SDimitry Andric         NewPHI->removeIncomingValue(Latch, false);
3860b57cec5SDimitry Andric       } else {
3870b57cec5SDimitry Andric         VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
3880b57cec5SDimitry Andric         cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
3890b57cec5SDimitry Andric       }
3900b57cec5SDimitry Andric     } else {
3910b57cec5SDimitry Andric       unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
3920b57cec5SDimitry Andric       NewPHI->setIncomingBlock(idx, InsertTop);
3930b57cec5SDimitry Andric       BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
3940b57cec5SDimitry Andric       idx = NewPHI->getBasicBlockIndex(Latch);
3950b57cec5SDimitry Andric       Value *InVal = NewPHI->getIncomingValue(idx);
3960b57cec5SDimitry Andric       NewPHI->setIncomingBlock(idx, NewLatch);
3970b57cec5SDimitry Andric       if (Value *V = VMap.lookup(InVal))
3980b57cec5SDimitry Andric         NewPHI->setIncomingValue(idx, V);
3990b57cec5SDimitry Andric     }
4000b57cec5SDimitry Andric   }
4010b57cec5SDimitry Andric   if (CreateRemainderLoop) {
4020b57cec5SDimitry Andric     Loop *NewLoop = NewLoops[L];
4030b57cec5SDimitry Andric     assert(NewLoop && "L should have been cloned");
404480093f4SDimitry Andric     MDNode *LoopID = NewLoop->getLoopID();
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric     // Only add loop metadata if the loop is not going to be completely
4070b57cec5SDimitry Andric     // unrolled.
4080b57cec5SDimitry Andric     if (UnrollRemainder)
4090b57cec5SDimitry Andric       return NewLoop;
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric     Optional<MDNode *> NewLoopID = makeFollowupLoopID(
4120b57cec5SDimitry Andric         LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
4130b57cec5SDimitry Andric     if (NewLoopID.hasValue()) {
4140b57cec5SDimitry Andric       NewLoop->setLoopID(NewLoopID.getValue());
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric       // Do not setLoopAlreadyUnrolled if loop attributes have been defined
4170b57cec5SDimitry Andric       // explicitly.
4180b57cec5SDimitry Andric       return NewLoop;
4190b57cec5SDimitry Andric     }
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric     // Add unroll disable metadata to disable future unrolling for this loop.
4220b57cec5SDimitry Andric     NewLoop->setLoopAlreadyUnrolled();
4230b57cec5SDimitry Andric     return NewLoop;
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric   else
4260b57cec5SDimitry Andric     return nullptr;
4270b57cec5SDimitry Andric }
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
4300b57cec5SDimitry Andric /// is populated with all the loop exit blocks other than the LatchExit block.
canSafelyUnrollMultiExitLoop(Loop * L,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)4310b57cec5SDimitry Andric static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
4320b57cec5SDimitry Andric                                          bool PreserveLCSSA,
4330b57cec5SDimitry Andric                                          bool UseEpilogRemainder) {
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   // We currently have some correctness constrains in unrolling a multi-exit
4360b57cec5SDimitry Andric   // loop. Check for these below.
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric   // We rely on LCSSA form being preserved when the exit blocks are transformed.
4390b57cec5SDimitry Andric   if (!PreserveLCSSA)
4400b57cec5SDimitry Andric     return false;
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
4430b57cec5SDimitry Andric   // UnrollRuntimeMultiExit is true. This will need updating the logic in
4440b57cec5SDimitry Andric   // connectEpilog/connectProlog.
4450b57cec5SDimitry Andric   if (!LatchExit->getSinglePredecessor()) {
4460b57cec5SDimitry Andric     LLVM_DEBUG(
4470b57cec5SDimitry Andric         dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
4480b57cec5SDimitry Andric                   "predecessor.\n");
4490b57cec5SDimitry Andric     return false;
4500b57cec5SDimitry Andric   }
4510b57cec5SDimitry Andric   // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
4520b57cec5SDimitry Andric   // and L is an inner loop. This is because in presence of multiple exits, the
4530b57cec5SDimitry Andric   // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
4540b57cec5SDimitry Andric   // outer loop. This is automatically handled in the prolog case, so we do not
4550b57cec5SDimitry Andric   // have that bug in prolog generation.
4560b57cec5SDimitry Andric   if (UseEpilogRemainder && L->getParentLoop())
4570b57cec5SDimitry Andric     return false;
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric   // All constraints have been satisfied.
4600b57cec5SDimitry Andric   return true;
4610b57cec5SDimitry Andric }
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
4640b57cec5SDimitry Andric /// we return true only if UnrollRuntimeMultiExit is set to true.
canProfitablyUnrollMultiExitLoop(Loop * L,SmallVectorImpl<BasicBlock * > & OtherExits,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)4650b57cec5SDimitry Andric static bool canProfitablyUnrollMultiExitLoop(
4660b57cec5SDimitry Andric     Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
4670b57cec5SDimitry Andric     bool PreserveLCSSA, bool UseEpilogRemainder) {
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric #if !defined(NDEBUG)
4700b57cec5SDimitry Andric   assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
4710b57cec5SDimitry Andric                                       UseEpilogRemainder) &&
4720b57cec5SDimitry Andric          "Should be safe to unroll before checking profitability!");
4730b57cec5SDimitry Andric #endif
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric   // Priority goes to UnrollRuntimeMultiExit if it's supplied.
4760b57cec5SDimitry Andric   if (UnrollRuntimeMultiExit.getNumOccurrences())
4770b57cec5SDimitry Andric     return UnrollRuntimeMultiExit;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   // The main pain point with multi-exit loop unrolling is that once unrolled,
4800b57cec5SDimitry Andric   // we will not be able to merge all blocks into a straight line code.
4810b57cec5SDimitry Andric   // There are branches within the unrolled loop that go to the OtherExits.
4820b57cec5SDimitry Andric   // The second point is the increase in code size, but this is true
4830b57cec5SDimitry Andric   // irrespective of multiple exits.
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric   // Note: Both the heuristics below are coarse grained. We are essentially
4860b57cec5SDimitry Andric   // enabling unrolling of loops that have a single side exit other than the
4870b57cec5SDimitry Andric   // normal LatchExit (i.e. exiting into a deoptimize block).
4880b57cec5SDimitry Andric   // The heuristics considered are:
4890b57cec5SDimitry Andric   // 1. low number of branches in the unrolled version.
4900b57cec5SDimitry Andric   // 2. high predictability of these extra branches.
4910b57cec5SDimitry Andric   // We avoid unrolling loops that have more than two exiting blocks. This
4920b57cec5SDimitry Andric   // limits the total number of branches in the unrolled loop to be atmost
4930b57cec5SDimitry Andric   // the unroll factor (since one of the exiting blocks is the latch block).
4940b57cec5SDimitry Andric   SmallVector<BasicBlock*, 4> ExitingBlocks;
4950b57cec5SDimitry Andric   L->getExitingBlocks(ExitingBlocks);
4960b57cec5SDimitry Andric   if (ExitingBlocks.size() > 2)
4970b57cec5SDimitry Andric     return false;
4980b57cec5SDimitry Andric 
499*5f7ddb14SDimitry Andric   // Allow unrolling of loops with no non latch exit blocks.
500*5f7ddb14SDimitry Andric   if (OtherExits.size() == 0)
501*5f7ddb14SDimitry Andric     return true;
502*5f7ddb14SDimitry Andric 
5030b57cec5SDimitry Andric   // The second heuristic is that L has one exit other than the latchexit and
5040b57cec5SDimitry Andric   // that exit is a deoptimize block. We know that deoptimize blocks are rarely
5050b57cec5SDimitry Andric   // taken, which also implies the branch leading to the deoptimize block is
506*5f7ddb14SDimitry Andric   // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
507*5f7ddb14SDimitry Andric   // assume the other exit branch is predictable even if it has no deoptimize
508*5f7ddb14SDimitry Andric   // call.
5090b57cec5SDimitry Andric   return (OtherExits.size() == 1 &&
510*5f7ddb14SDimitry Andric           (UnrollRuntimeOtherExitPredictable ||
511*5f7ddb14SDimitry Andric            OtherExits[0]->getTerminatingDeoptimizeCall()));
5120b57cec5SDimitry Andric   // TODO: These can be fine-tuned further to consider code size or deopt states
5130b57cec5SDimitry Andric   // that are captured by the deoptimize exit block.
5140b57cec5SDimitry Andric   // Also, we can extend this to support more cases, if we actually
5150b57cec5SDimitry Andric   // know of kinds of multiexit loops that would benefit from unrolling.
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric 
518af732203SDimitry Andric // Assign the maximum possible trip count as the back edge weight for the
519af732203SDimitry Andric // remainder loop if the original loop comes with a branch weight.
updateLatchBranchWeightsForRemainderLoop(Loop * OrigLoop,Loop * RemainderLoop,uint64_t UnrollFactor)520af732203SDimitry Andric static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
521af732203SDimitry Andric                                                      Loop *RemainderLoop,
522af732203SDimitry Andric                                                      uint64_t UnrollFactor) {
523af732203SDimitry Andric   uint64_t TrueWeight, FalseWeight;
524af732203SDimitry Andric   BranchInst *LatchBR =
525af732203SDimitry Andric       cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
526af732203SDimitry Andric   if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
527af732203SDimitry Andric     uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
528af732203SDimitry Andric                               ? FalseWeight
529af732203SDimitry Andric                               : TrueWeight;
530af732203SDimitry Andric     assert(UnrollFactor > 1);
531af732203SDimitry Andric     uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
532af732203SDimitry Andric     BasicBlock *Header = RemainderLoop->getHeader();
533af732203SDimitry Andric     BasicBlock *Latch = RemainderLoop->getLoopLatch();
534af732203SDimitry Andric     auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
535af732203SDimitry Andric     unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
536af732203SDimitry Andric     MDBuilder MDB(RemainderLatchBR->getContext());
537af732203SDimitry Andric     MDNode *WeightNode =
538af732203SDimitry Andric         HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
539af732203SDimitry Andric                   : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
540af732203SDimitry Andric     RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
541af732203SDimitry Andric   }
542af732203SDimitry Andric }
543af732203SDimitry Andric 
5440b57cec5SDimitry Andric /// Insert code in the prolog/epilog code when unrolling a loop with a
5450b57cec5SDimitry Andric /// run-time trip-count.
5460b57cec5SDimitry Andric ///
5470b57cec5SDimitry Andric /// This method assumes that the loop unroll factor is total number
5480b57cec5SDimitry Andric /// of loop bodies in the loop after unrolling. (Some folks refer
5490b57cec5SDimitry Andric /// to the unroll factor as the number of *extra* copies added).
5500b57cec5SDimitry Andric /// We assume also that the loop unroll factor is a power-of-two. So, after
5510b57cec5SDimitry Andric /// unrolling the loop, the number of loop bodies executed is 2,
5520b57cec5SDimitry Andric /// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
5530b57cec5SDimitry Andric /// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
5540b57cec5SDimitry Andric /// the switch instruction is generated.
5550b57cec5SDimitry Andric ///
5560b57cec5SDimitry Andric /// ***Prolog case***
5570b57cec5SDimitry Andric ///        extraiters = tripcount % loopfactor
5580b57cec5SDimitry Andric ///        if (extraiters == 0) jump Loop:
5590b57cec5SDimitry Andric ///        else jump Prol:
5600b57cec5SDimitry Andric /// Prol:  LoopBody;
5610b57cec5SDimitry Andric ///        extraiters -= 1                 // Omitted if unroll factor is 2.
5620b57cec5SDimitry Andric ///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
5630b57cec5SDimitry Andric ///        if (tripcount < loopfactor) jump End:
5640b57cec5SDimitry Andric /// Loop:
5650b57cec5SDimitry Andric /// ...
5660b57cec5SDimitry Andric /// End:
5670b57cec5SDimitry Andric ///
5680b57cec5SDimitry Andric /// ***Epilog case***
5690b57cec5SDimitry Andric ///        extraiters = tripcount % loopfactor
5700b57cec5SDimitry Andric ///        if (tripcount < loopfactor) jump LoopExit:
5710b57cec5SDimitry Andric ///        unroll_iters = tripcount - extraiters
5720b57cec5SDimitry Andric /// Loop:  LoopBody; (executes unroll_iter times);
5730b57cec5SDimitry Andric ///        unroll_iter -= 1
5740b57cec5SDimitry Andric ///        if (unroll_iter != 0) jump Loop:
5750b57cec5SDimitry Andric /// LoopExit:
5760b57cec5SDimitry Andric ///        if (extraiters == 0) jump EpilExit:
5770b57cec5SDimitry Andric /// Epil:  LoopBody; (executes extraiters times)
5780b57cec5SDimitry Andric ///        extraiters -= 1                 // Omitted if unroll factor is 2.
5790b57cec5SDimitry Andric ///        if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
5800b57cec5SDimitry Andric /// EpilExit:
5810b57cec5SDimitry Andric 
UnrollRuntimeLoopRemainder(Loop * L,unsigned Count,bool AllowExpensiveTripCount,bool UseEpilogRemainder,bool UnrollRemainder,bool ForgetAllSCEV,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,bool PreserveLCSSA,Loop ** ResultLoop)5825ffd83dbSDimitry Andric bool llvm::UnrollRuntimeLoopRemainder(
5835ffd83dbSDimitry Andric     Loop *L, unsigned Count, bool AllowExpensiveTripCount,
5845ffd83dbSDimitry Andric     bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
5855ffd83dbSDimitry Andric     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
5865ffd83dbSDimitry Andric     const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
5870b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
5880b57cec5SDimitry Andric   LLVM_DEBUG(L->dump());
5890b57cec5SDimitry Andric   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
5900b57cec5SDimitry Andric                                 : dbgs() << "Using prolog remainder.\n");
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric   // Make sure the loop is in canonical form.
5930b57cec5SDimitry Andric   if (!L->isLoopSimplifyForm()) {
5940b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
5950b57cec5SDimitry Andric     return false;
5960b57cec5SDimitry Andric   }
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   // Guaranteed by LoopSimplifyForm.
5990b57cec5SDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
6000b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
6030b57cec5SDimitry Andric 
6040b57cec5SDimitry Andric   if (!LatchBR || LatchBR->isUnconditional()) {
6050b57cec5SDimitry Andric     // The loop-rotate pass can be helpful to avoid this in many cases.
6060b57cec5SDimitry Andric     LLVM_DEBUG(
6070b57cec5SDimitry Andric         dbgs()
6080b57cec5SDimitry Andric         << "Loop latch not terminated by a conditional branch.\n");
6090b57cec5SDimitry Andric     return false;
6100b57cec5SDimitry Andric   }
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric   unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
6130b57cec5SDimitry Andric   BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
6140b57cec5SDimitry Andric 
6150b57cec5SDimitry Andric   if (L->contains(LatchExit)) {
6160b57cec5SDimitry Andric     // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
6170b57cec5SDimitry Andric     // targets of the Latch be an exit block out of the loop.
6180b57cec5SDimitry Andric     LLVM_DEBUG(
6190b57cec5SDimitry Andric         dbgs()
6200b57cec5SDimitry Andric         << "One of the loop latch successors must be the exit block.\n");
6210b57cec5SDimitry Andric     return false;
6220b57cec5SDimitry Andric   }
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric   // These are exit blocks other than the target of the latch exiting block.
6250b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> OtherExits;
6260b57cec5SDimitry Andric   L->getUniqueNonLatchExitBlocks(OtherExits);
6270b57cec5SDimitry Andric   bool isMultiExitUnrollingEnabled =
6280b57cec5SDimitry Andric       canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
6290b57cec5SDimitry Andric                                    UseEpilogRemainder) &&
6300b57cec5SDimitry Andric       canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
6310b57cec5SDimitry Andric                                        UseEpilogRemainder);
6320b57cec5SDimitry Andric   // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
6330b57cec5SDimitry Andric   if (!isMultiExitUnrollingEnabled &&
6340b57cec5SDimitry Andric       (!L->getExitingBlock() || OtherExits.size())) {
6350b57cec5SDimitry Andric     LLVM_DEBUG(
6360b57cec5SDimitry Andric         dbgs()
6370b57cec5SDimitry Andric         << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
6380b57cec5SDimitry Andric            "enabled!\n");
6390b57cec5SDimitry Andric     return false;
6400b57cec5SDimitry Andric   }
6410b57cec5SDimitry Andric   // Use Scalar Evolution to compute the trip count. This allows more loops to
6420b57cec5SDimitry Andric   // be unrolled than relying on induction var simplification.
6430b57cec5SDimitry Andric   if (!SE)
6440b57cec5SDimitry Andric     return false;
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric   // Only unroll loops with a computable trip count, and the trip count needs
6470b57cec5SDimitry Andric   // to be an int value (allowing a pointer type is a TODO item).
6480b57cec5SDimitry Andric   // We calculate the backedge count by using getExitCount on the Latch block,
6490b57cec5SDimitry Andric   // which is proven to be the only exiting block in this loop. This is same as
6500b57cec5SDimitry Andric   // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
6510b57cec5SDimitry Andric   // exiting blocks).
6520b57cec5SDimitry Andric   const SCEV *BECountSC = SE->getExitCount(L, Latch);
6530b57cec5SDimitry Andric   if (isa<SCEVCouldNotCompute>(BECountSC) ||
6540b57cec5SDimitry Andric       !BECountSC->getType()->isIntegerTy()) {
6550b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
6560b57cec5SDimitry Andric     return false;
6570b57cec5SDimitry Andric   }
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
6600b57cec5SDimitry Andric 
6610b57cec5SDimitry Andric   // Add 1 since the backedge count doesn't include the first loop iteration.
6620b57cec5SDimitry Andric   const SCEV *TripCountSC =
6630b57cec5SDimitry Andric       SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
6640b57cec5SDimitry Andric   if (isa<SCEVCouldNotCompute>(TripCountSC)) {
6650b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
6660b57cec5SDimitry Andric     return false;
6670b57cec5SDimitry Andric   }
6680b57cec5SDimitry Andric 
6690b57cec5SDimitry Andric   BasicBlock *PreHeader = L->getLoopPreheader();
6700b57cec5SDimitry Andric   BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
6710b57cec5SDimitry Andric   const DataLayout &DL = Header->getModule()->getDataLayout();
6720b57cec5SDimitry Andric   SCEVExpander Expander(*SE, DL, "loop-unroll");
6730b57cec5SDimitry Andric   if (!AllowExpensiveTripCount &&
6745ffd83dbSDimitry Andric       Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
6755ffd83dbSDimitry Andric                                    TTI, PreHeaderBR)) {
6760b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
6770b57cec5SDimitry Andric     return false;
6780b57cec5SDimitry Andric   }
6790b57cec5SDimitry Andric 
6800b57cec5SDimitry Andric   // This constraint lets us deal with an overflowing trip count easily; see the
6810b57cec5SDimitry Andric   // comment on ModVal below.
6820b57cec5SDimitry Andric   if (Log2_32(Count) > BEWidth) {
6830b57cec5SDimitry Andric     LLVM_DEBUG(
6840b57cec5SDimitry Andric         dbgs()
6850b57cec5SDimitry Andric         << "Count failed constraint on overflow trip count calculation.\n");
6860b57cec5SDimitry Andric     return false;
6870b57cec5SDimitry Andric   }
6880b57cec5SDimitry Andric 
6890b57cec5SDimitry Andric   // Loop structure is the following:
6900b57cec5SDimitry Andric   //
6910b57cec5SDimitry Andric   // PreHeader
6920b57cec5SDimitry Andric   //   Header
6930b57cec5SDimitry Andric   //   ...
6940b57cec5SDimitry Andric   //   Latch
6950b57cec5SDimitry Andric   // LatchExit
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   BasicBlock *NewPreHeader;
6980b57cec5SDimitry Andric   BasicBlock *NewExit = nullptr;
6990b57cec5SDimitry Andric   BasicBlock *PrologExit = nullptr;
7000b57cec5SDimitry Andric   BasicBlock *EpilogPreHeader = nullptr;
7010b57cec5SDimitry Andric   BasicBlock *PrologPreHeader = nullptr;
7020b57cec5SDimitry Andric 
7030b57cec5SDimitry Andric   if (UseEpilogRemainder) {
7040b57cec5SDimitry Andric     // If epilog remainder
7050b57cec5SDimitry Andric     // Split PreHeader to insert a branch around loop for unrolling.
7060b57cec5SDimitry Andric     NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
7070b57cec5SDimitry Andric     NewPreHeader->setName(PreHeader->getName() + ".new");
7080b57cec5SDimitry Andric     // Split LatchExit to create phi nodes from branch above.
7090b57cec5SDimitry Andric     SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
7100b57cec5SDimitry Andric     NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
7110b57cec5SDimitry Andric                                      nullptr, PreserveLCSSA);
7120b57cec5SDimitry Andric     // NewExit gets its DebugLoc from LatchExit, which is not part of the
7130b57cec5SDimitry Andric     // original Loop.
7140b57cec5SDimitry Andric     // Fix this by setting Loop's DebugLoc to NewExit.
7150b57cec5SDimitry Andric     auto *NewExitTerminator = NewExit->getTerminator();
7160b57cec5SDimitry Andric     NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
7170b57cec5SDimitry Andric     // Split NewExit to insert epilog remainder loop.
7180b57cec5SDimitry Andric     EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
7190b57cec5SDimitry Andric     EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
7200b57cec5SDimitry Andric   } else {
7210b57cec5SDimitry Andric     // If prolog remainder
7220b57cec5SDimitry Andric     // Split the original preheader twice to insert prolog remainder loop
7230b57cec5SDimitry Andric     PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
7240b57cec5SDimitry Andric     PrologPreHeader->setName(Header->getName() + ".prol.preheader");
7250b57cec5SDimitry Andric     PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
7260b57cec5SDimitry Andric                             DT, LI);
7270b57cec5SDimitry Andric     PrologExit->setName(Header->getName() + ".prol.loopexit");
7280b57cec5SDimitry Andric     // Split PrologExit to get NewPreHeader.
7290b57cec5SDimitry Andric     NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
7300b57cec5SDimitry Andric     NewPreHeader->setName(PreHeader->getName() + ".new");
7310b57cec5SDimitry Andric   }
7320b57cec5SDimitry Andric   // Loop structure should be the following:
7330b57cec5SDimitry Andric   //  Epilog             Prolog
7340b57cec5SDimitry Andric   //
7350b57cec5SDimitry Andric   // PreHeader         PreHeader
7360b57cec5SDimitry Andric   // *NewPreHeader     *PrologPreHeader
7370b57cec5SDimitry Andric   //   Header          *PrologExit
7380b57cec5SDimitry Andric   //   ...             *NewPreHeader
7390b57cec5SDimitry Andric   //   Latch             Header
7400b57cec5SDimitry Andric   // *NewExit            ...
7410b57cec5SDimitry Andric   // *EpilogPreHeader    Latch
7420b57cec5SDimitry Andric   // LatchExit              LatchExit
7430b57cec5SDimitry Andric 
7440b57cec5SDimitry Andric   // Calculate conditions for branch around loop for unrolling
7450b57cec5SDimitry Andric   // in epilog case and around prolog remainder loop in prolog case.
7460b57cec5SDimitry Andric   // Compute the number of extra iterations required, which is:
7470b57cec5SDimitry Andric   //  extra iterations = run-time trip count % loop unroll factor
7480b57cec5SDimitry Andric   PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
7490b57cec5SDimitry Andric   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
7500b57cec5SDimitry Andric                                             PreHeaderBR);
7510b57cec5SDimitry Andric   Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
7520b57cec5SDimitry Andric                                           PreHeaderBR);
7530b57cec5SDimitry Andric   IRBuilder<> B(PreHeaderBR);
7540b57cec5SDimitry Andric   Value *ModVal;
7550b57cec5SDimitry Andric   // Calculate ModVal = (BECount + 1) % Count.
7560b57cec5SDimitry Andric   // Note that TripCount is BECount + 1.
7570b57cec5SDimitry Andric   if (isPowerOf2_32(Count)) {
7580b57cec5SDimitry Andric     // When Count is power of 2 we don't BECount for epilog case, however we'll
7590b57cec5SDimitry Andric     // need it for a branch around unrolling loop for prolog case.
7600b57cec5SDimitry Andric     ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
7610b57cec5SDimitry Andric     //  1. There are no iterations to be run in the prolog/epilog loop.
7620b57cec5SDimitry Andric     // OR
7630b57cec5SDimitry Andric     //  2. The addition computing TripCount overflowed.
7640b57cec5SDimitry Andric     //
7650b57cec5SDimitry Andric     // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
7660b57cec5SDimitry Andric     // the number of iterations that remain to be run in the original loop is a
7670b57cec5SDimitry Andric     // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
7680b57cec5SDimitry Andric     // explicitly check this above).
7690b57cec5SDimitry Andric   } else {
7700b57cec5SDimitry Andric     // As (BECount + 1) can potentially unsigned overflow we count
7710b57cec5SDimitry Andric     // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
7720b57cec5SDimitry Andric     Value *ModValTmp = B.CreateURem(BECount,
7730b57cec5SDimitry Andric                                     ConstantInt::get(BECount->getType(),
7740b57cec5SDimitry Andric                                                      Count));
7750b57cec5SDimitry Andric     Value *ModValAdd = B.CreateAdd(ModValTmp,
7760b57cec5SDimitry Andric                                    ConstantInt::get(ModValTmp->getType(), 1));
7770b57cec5SDimitry Andric     // At that point (BECount % Count) + 1 could be equal to Count.
7780b57cec5SDimitry Andric     // To handle this case we need to take mod by Count one more time.
7790b57cec5SDimitry Andric     ModVal = B.CreateURem(ModValAdd,
7800b57cec5SDimitry Andric                           ConstantInt::get(BECount->getType(), Count),
7810b57cec5SDimitry Andric                           "xtraiter");
7820b57cec5SDimitry Andric   }
7830b57cec5SDimitry Andric   Value *BranchVal =
7840b57cec5SDimitry Andric       UseEpilogRemainder ? B.CreateICmpULT(BECount,
7850b57cec5SDimitry Andric                                            ConstantInt::get(BECount->getType(),
7860b57cec5SDimitry Andric                                                             Count - 1)) :
7870b57cec5SDimitry Andric                            B.CreateIsNotNull(ModVal, "lcmp.mod");
7880b57cec5SDimitry Andric   BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
7890b57cec5SDimitry Andric   BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
7900b57cec5SDimitry Andric   // Branch to either remainder (extra iterations) loop or unrolling loop.
7910b57cec5SDimitry Andric   B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
7920b57cec5SDimitry Andric   PreHeaderBR->eraseFromParent();
7930b57cec5SDimitry Andric   if (DT) {
7940b57cec5SDimitry Andric     if (UseEpilogRemainder)
7950b57cec5SDimitry Andric       DT->changeImmediateDominator(NewExit, PreHeader);
7960b57cec5SDimitry Andric     else
7970b57cec5SDimitry Andric       DT->changeImmediateDominator(PrologExit, PreHeader);
7980b57cec5SDimitry Andric   }
7990b57cec5SDimitry Andric   Function *F = Header->getParent();
8000b57cec5SDimitry Andric   // Get an ordered list of blocks in the loop to help with the ordering of the
8010b57cec5SDimitry Andric   // cloned blocks in the prolog/epilog code
8020b57cec5SDimitry Andric   LoopBlocksDFS LoopBlocks(L);
8030b57cec5SDimitry Andric   LoopBlocks.perform(LI);
8040b57cec5SDimitry Andric 
8050b57cec5SDimitry Andric   //
8060b57cec5SDimitry Andric   // For each extra loop iteration, create a copy of the loop's basic blocks
8070b57cec5SDimitry Andric   // and generate a condition that branches to the copy depending on the
8080b57cec5SDimitry Andric   // number of 'left over' iterations.
8090b57cec5SDimitry Andric   //
8100b57cec5SDimitry Andric   std::vector<BasicBlock *> NewBlocks;
8110b57cec5SDimitry Andric   ValueToValueMapTy VMap;
8120b57cec5SDimitry Andric 
8130b57cec5SDimitry Andric   // For unroll factor 2 remainder loop will have 1 iterations.
8140b57cec5SDimitry Andric   // Do not create 1 iteration loop.
8150b57cec5SDimitry Andric   bool CreateRemainderLoop = (Count != 2);
8160b57cec5SDimitry Andric 
8170b57cec5SDimitry Andric   // Clone all the basic blocks in the loop. If Count is 2, we don't clone
8180b57cec5SDimitry Andric   // the loop, otherwise we create a cloned loop to execute the extra
8190b57cec5SDimitry Andric   // iterations. This function adds the appropriate CFG connections.
8200b57cec5SDimitry Andric   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
8210b57cec5SDimitry Andric   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
8220b57cec5SDimitry Andric   Loop *remainderLoop = CloneLoopBlocks(
8230b57cec5SDimitry Andric       L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
8240b57cec5SDimitry Andric       InsertTop, InsertBot,
8250b57cec5SDimitry Andric       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
8260b57cec5SDimitry Andric 
827af732203SDimitry Andric   // Assign the maximum possible trip count as the back edge weight for the
828af732203SDimitry Andric   // remainder loop if the original loop comes with a branch weight.
829af732203SDimitry Andric   if (remainderLoop && !UnrollRemainder)
830af732203SDimitry Andric     updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
831af732203SDimitry Andric 
8320b57cec5SDimitry Andric   // Insert the cloned blocks into the function.
8330b57cec5SDimitry Andric   F->getBasicBlockList().splice(InsertBot->getIterator(),
8340b57cec5SDimitry Andric                                 F->getBasicBlockList(),
8350b57cec5SDimitry Andric                                 NewBlocks[0]->getIterator(),
8360b57cec5SDimitry Andric                                 F->end());
8370b57cec5SDimitry Andric 
8380b57cec5SDimitry Andric   // Now the loop blocks are cloned and the other exiting blocks from the
8390b57cec5SDimitry Andric   // remainder are connected to the original Loop's exit blocks. The remaining
8400b57cec5SDimitry Andric   // work is to update the phi nodes in the original loop, and take in the
8410b57cec5SDimitry Andric   // values from the cloned region.
8420b57cec5SDimitry Andric   for (auto *BB : OtherExits) {
8430b57cec5SDimitry Andric    for (auto &II : *BB) {
8440b57cec5SDimitry Andric 
8450b57cec5SDimitry Andric      // Given we preserve LCSSA form, we know that the values used outside the
8460b57cec5SDimitry Andric      // loop will be used through these phi nodes at the exit blocks that are
8470b57cec5SDimitry Andric      // transformed below.
8480b57cec5SDimitry Andric      if (!isa<PHINode>(II))
8490b57cec5SDimitry Andric        break;
8500b57cec5SDimitry Andric      PHINode *Phi = cast<PHINode>(&II);
8510b57cec5SDimitry Andric      unsigned oldNumOperands = Phi->getNumIncomingValues();
8520b57cec5SDimitry Andric      // Add the incoming values from the remainder code to the end of the phi
8530b57cec5SDimitry Andric      // node.
8540b57cec5SDimitry Andric      for (unsigned i =0; i < oldNumOperands; i++){
8550b57cec5SDimitry Andric        Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
8560b57cec5SDimitry Andric        // newVal can be a constant or derived from values outside the loop, and
8570b57cec5SDimitry Andric        // hence need not have a VMap value. Also, since lookup already generated
8580b57cec5SDimitry Andric        // a default "null" VMap entry for this value, we need to populate that
8590b57cec5SDimitry Andric        // VMap entry correctly, with the mapped entry being itself.
8600b57cec5SDimitry Andric        if (!newVal) {
8610b57cec5SDimitry Andric          newVal = Phi->getIncomingValue(i);
8620b57cec5SDimitry Andric          VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
8630b57cec5SDimitry Andric        }
8640b57cec5SDimitry Andric        Phi->addIncoming(newVal,
8650b57cec5SDimitry Andric                            cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
8660b57cec5SDimitry Andric      }
8670b57cec5SDimitry Andric    }
8680b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
8690b57cec5SDimitry Andric     for (BasicBlock *SuccBB : successors(BB)) {
8700b57cec5SDimitry Andric       assert(!(any_of(OtherExits,
8710b57cec5SDimitry Andric                       [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
8720b57cec5SDimitry Andric                SuccBB == LatchExit) &&
8730b57cec5SDimitry Andric              "Breaks the definition of dedicated exits!");
8740b57cec5SDimitry Andric     }
8750b57cec5SDimitry Andric #endif
8760b57cec5SDimitry Andric   }
8770b57cec5SDimitry Andric 
8780b57cec5SDimitry Andric   // Update the immediate dominator of the exit blocks and blocks that are
8790b57cec5SDimitry Andric   // reachable from the exit blocks. This is needed because we now have paths
8800b57cec5SDimitry Andric   // from both the original loop and the remainder code reaching the exit
8810b57cec5SDimitry Andric   // blocks. While the IDom of these exit blocks were from the original loop,
8820b57cec5SDimitry Andric   // now the IDom is the preheader (which decides whether the original loop or
8830b57cec5SDimitry Andric   // remainder code should run).
8840b57cec5SDimitry Andric   if (DT && !L->getExitingBlock()) {
8850b57cec5SDimitry Andric     SmallVector<BasicBlock *, 16> ChildrenToUpdate;
8860b57cec5SDimitry Andric     // NB! We have to examine the dom children of all loop blocks, not just
8870b57cec5SDimitry Andric     // those which are the IDom of the exit blocks. This is because blocks
8880b57cec5SDimitry Andric     // reachable from the exit blocks can have their IDom as the nearest common
8890b57cec5SDimitry Andric     // dominator of the exit blocks.
8900b57cec5SDimitry Andric     for (auto *BB : L->blocks()) {
8910b57cec5SDimitry Andric       auto *DomNodeBB = DT->getNode(BB);
8925ffd83dbSDimitry Andric       for (auto *DomChild : DomNodeBB->children()) {
8930b57cec5SDimitry Andric         auto *DomChildBB = DomChild->getBlock();
8940b57cec5SDimitry Andric         if (!L->contains(LI->getLoopFor(DomChildBB)))
8950b57cec5SDimitry Andric           ChildrenToUpdate.push_back(DomChildBB);
8960b57cec5SDimitry Andric       }
8970b57cec5SDimitry Andric     }
8980b57cec5SDimitry Andric     for (auto *BB : ChildrenToUpdate)
8990b57cec5SDimitry Andric       DT->changeImmediateDominator(BB, PreHeader);
9000b57cec5SDimitry Andric   }
9010b57cec5SDimitry Andric 
9020b57cec5SDimitry Andric   // Loop structure should be the following:
9030b57cec5SDimitry Andric   //  Epilog             Prolog
9040b57cec5SDimitry Andric   //
9050b57cec5SDimitry Andric   // PreHeader         PreHeader
9060b57cec5SDimitry Andric   // NewPreHeader      PrologPreHeader
9070b57cec5SDimitry Andric   //   Header            PrologHeader
9080b57cec5SDimitry Andric   //   ...               ...
9090b57cec5SDimitry Andric   //   Latch             PrologLatch
9100b57cec5SDimitry Andric   // NewExit           PrologExit
9110b57cec5SDimitry Andric   // EpilogPreHeader   NewPreHeader
9120b57cec5SDimitry Andric   //   EpilogHeader      Header
9130b57cec5SDimitry Andric   //   ...               ...
9140b57cec5SDimitry Andric   //   EpilogLatch       Latch
9150b57cec5SDimitry Andric   // LatchExit              LatchExit
9160b57cec5SDimitry Andric 
9170b57cec5SDimitry Andric   // Rewrite the cloned instruction operands to use the values created when the
9180b57cec5SDimitry Andric   // clone is created.
9190b57cec5SDimitry Andric   for (BasicBlock *BB : NewBlocks) {
9200b57cec5SDimitry Andric     for (Instruction &I : *BB) {
9210b57cec5SDimitry Andric       RemapInstruction(&I, VMap,
9220b57cec5SDimitry Andric                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
9230b57cec5SDimitry Andric     }
9240b57cec5SDimitry Andric   }
9250b57cec5SDimitry Andric 
9260b57cec5SDimitry Andric   if (UseEpilogRemainder) {
9270b57cec5SDimitry Andric     // Connect the epilog code to the original loop and update the
9280b57cec5SDimitry Andric     // PHI functions.
9290b57cec5SDimitry Andric     ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
9300b57cec5SDimitry Andric                   EpilogPreHeader, NewPreHeader, VMap, DT, LI,
9310b57cec5SDimitry Andric                   PreserveLCSSA);
9320b57cec5SDimitry Andric 
9330b57cec5SDimitry Andric     // Update counter in loop for unrolling.
9340b57cec5SDimitry Andric     // I should be multiply of Count.
9350b57cec5SDimitry Andric     IRBuilder<> B2(NewPreHeader->getTerminator());
9360b57cec5SDimitry Andric     Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
9370b57cec5SDimitry Andric     BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
9380b57cec5SDimitry Andric     B2.SetInsertPoint(LatchBR);
9390b57cec5SDimitry Andric     PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
9400b57cec5SDimitry Andric                                       Header->getFirstNonPHI());
9410b57cec5SDimitry Andric     Value *IdxSub =
9420b57cec5SDimitry Andric         B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
9430b57cec5SDimitry Andric                      NewIdx->getName() + ".nsub");
9440b57cec5SDimitry Andric     Value *IdxCmp;
9450b57cec5SDimitry Andric     if (LatchBR->getSuccessor(0) == Header)
9460b57cec5SDimitry Andric       IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
9470b57cec5SDimitry Andric     else
9480b57cec5SDimitry Andric       IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
9490b57cec5SDimitry Andric     NewIdx->addIncoming(TestVal, NewPreHeader);
9500b57cec5SDimitry Andric     NewIdx->addIncoming(IdxSub, Latch);
9510b57cec5SDimitry Andric     LatchBR->setCondition(IdxCmp);
9520b57cec5SDimitry Andric   } else {
9530b57cec5SDimitry Andric     // Connect the prolog code to the original loop and update the
9540b57cec5SDimitry Andric     // PHI functions.
9550b57cec5SDimitry Andric     ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
9560b57cec5SDimitry Andric                   NewPreHeader, VMap, DT, LI, PreserveLCSSA);
9570b57cec5SDimitry Andric   }
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric   // If this loop is nested, then the loop unroller changes the code in the any
9600b57cec5SDimitry Andric   // of its parent loops, so the Scalar Evolution pass needs to be run again.
9610b57cec5SDimitry Andric   SE->forgetTopmostLoop(L);
9620b57cec5SDimitry Andric 
9630b57cec5SDimitry Andric   // Verify that the Dom Tree is correct.
9640b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
9650b57cec5SDimitry Andric   if (DT)
9660b57cec5SDimitry Andric     assert(DT->verify(DominatorTree::VerificationLevel::Full));
9670b57cec5SDimitry Andric #endif
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
9700b57cec5SDimitry Andric   // cannot rely on the LoopUnrollPass to do this because it only does
9710b57cec5SDimitry Andric   // canonicalization for parent/subloops and not the sibling loops.
9720b57cec5SDimitry Andric   if (OtherExits.size() > 0) {
9730b57cec5SDimitry Andric     // Generate dedicated exit blocks for the original loop, to preserve
9740b57cec5SDimitry Andric     // LoopSimplifyForm.
9750b57cec5SDimitry Andric     formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
9760b57cec5SDimitry Andric     // Generate dedicated exit blocks for the remainder loop if one exists, to
9770b57cec5SDimitry Andric     // preserve LoopSimplifyForm.
9780b57cec5SDimitry Andric     if (remainderLoop)
9790b57cec5SDimitry Andric       formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
9800b57cec5SDimitry Andric   }
9810b57cec5SDimitry Andric 
9820b57cec5SDimitry Andric   auto UnrollResult = LoopUnrollResult::Unmodified;
9830b57cec5SDimitry Andric   if (remainderLoop && UnrollRemainder) {
9840b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
9850b57cec5SDimitry Andric     UnrollResult =
9860b57cec5SDimitry Andric         UnrollLoop(remainderLoop,
987*5f7ddb14SDimitry Andric                    {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
988*5f7ddb14SDimitry Andric                     /*AllowExpensiveTripCount*/ false,
989*5f7ddb14SDimitry Andric                     /*UnrollRemainder*/ false, ForgetAllSCEV},
9905ffd83dbSDimitry Andric                    LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
9910b57cec5SDimitry Andric   }
9920b57cec5SDimitry Andric 
9930b57cec5SDimitry Andric   if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
9940b57cec5SDimitry Andric     *ResultLoop = remainderLoop;
9950b57cec5SDimitry Andric   NumRuntimeUnrolled++;
9960b57cec5SDimitry Andric   return true;
9970b57cec5SDimitry Andric }
998