10b57cec5SDimitry Andric //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements some loop unrolling utilities for loops with run-time
100b57cec5SDimitry Andric // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
110b57cec5SDimitry Andric // trip counts.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The functions in this file are used to generate extra code when the
140b57cec5SDimitry Andric // run-time trip count modulo the unroll factor is not 0. When this is the
150b57cec5SDimitry Andric // case, we need to generate code to execute these 'left over' iterations.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // The current strategy generates an if-then-else sequence prior to the
180b57cec5SDimitry Andric // unrolled loop to execute the 'left over' iterations before or after the
190b57cec5SDimitry Andric // unrolled loop.
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
240b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
270b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
280b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
29af732203SDimitry Andric #include "llvm/IR/MDBuilder.h"
300b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
310b57cec5SDimitry Andric #include "llvm/IR/Module.h"
32480093f4SDimitry Andric #include "llvm/Support/CommandLine.h"
330b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
340b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
350b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
360b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
370b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
380b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
395ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
410b57cec5SDimitry Andric #include <algorithm>
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric using namespace llvm;
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll"
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric STATISTIC(NumRuntimeUnrolled,
480b57cec5SDimitry Andric "Number of loops unrolled with run-time trip counts");
490b57cec5SDimitry Andric static cl::opt<bool> UnrollRuntimeMultiExit(
500b57cec5SDimitry Andric "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
510b57cec5SDimitry Andric cl::desc("Allow runtime unrolling for loops with multiple exits, when "
520b57cec5SDimitry Andric "epilog is generated"));
53*5f7ddb14SDimitry Andric static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
54*5f7ddb14SDimitry Andric "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
55*5f7ddb14SDimitry Andric cl::desc("Assume the non latch exit block to be predictable"));
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric /// Connect the unrolling prolog code to the original loop.
580b57cec5SDimitry Andric /// The unrolling prolog code contains code to execute the
590b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
600b57cec5SDimitry Andric /// unroll count is non-zero.
610b57cec5SDimitry Andric ///
620b57cec5SDimitry Andric /// This function performs the following:
630b57cec5SDimitry Andric /// - Create PHI nodes at prolog end block to combine values
640b57cec5SDimitry Andric /// that exit the prolog code and jump around the prolog.
650b57cec5SDimitry Andric /// - Add a PHI operand to a PHI node at the loop exit block
660b57cec5SDimitry Andric /// for values that exit the prolog and go around the loop.
670b57cec5SDimitry Andric /// - Branch around the original loop if the trip count is less
680b57cec5SDimitry Andric /// than the unroll factor.
690b57cec5SDimitry Andric ///
ConnectProlog(Loop * L,Value * BECount,unsigned Count,BasicBlock * PrologExit,BasicBlock * OriginalLoopLatchExit,BasicBlock * PreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)700b57cec5SDimitry Andric static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
710b57cec5SDimitry Andric BasicBlock *PrologExit,
720b57cec5SDimitry Andric BasicBlock *OriginalLoopLatchExit,
730b57cec5SDimitry Andric BasicBlock *PreHeader, BasicBlock *NewPreHeader,
740b57cec5SDimitry Andric ValueToValueMapTy &VMap, DominatorTree *DT,
750b57cec5SDimitry Andric LoopInfo *LI, bool PreserveLCSSA) {
760b57cec5SDimitry Andric // Loop structure should be the following:
770b57cec5SDimitry Andric // Preheader
780b57cec5SDimitry Andric // PrologHeader
790b57cec5SDimitry Andric // ...
800b57cec5SDimitry Andric // PrologLatch
810b57cec5SDimitry Andric // PrologExit
820b57cec5SDimitry Andric // NewPreheader
830b57cec5SDimitry Andric // Header
840b57cec5SDimitry Andric // ...
850b57cec5SDimitry Andric // Latch
860b57cec5SDimitry Andric // LatchExit
870b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
880b57cec5SDimitry Andric assert(Latch && "Loop must have a latch");
890b57cec5SDimitry Andric BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric // Create a PHI node for each outgoing value from the original loop
920b57cec5SDimitry Andric // (which means it is an outgoing value from the prolog code too).
930b57cec5SDimitry Andric // The new PHI node is inserted in the prolog end basic block.
940b57cec5SDimitry Andric // The new PHI node value is added as an operand of a PHI node in either
950b57cec5SDimitry Andric // the loop header or the loop exit block.
960b57cec5SDimitry Andric for (BasicBlock *Succ : successors(Latch)) {
970b57cec5SDimitry Andric for (PHINode &PN : Succ->phis()) {
980b57cec5SDimitry Andric // Add a new PHI node to the prolog end block and add the
990b57cec5SDimitry Andric // appropriate incoming values.
1000b57cec5SDimitry Andric // TODO: This code assumes that the PrologExit (or the LatchExit block for
1010b57cec5SDimitry Andric // prolog loop) contains only one predecessor from the loop, i.e. the
1020b57cec5SDimitry Andric // PrologLatch. When supporting multiple-exiting block loops, we can have
1030b57cec5SDimitry Andric // two or more blocks that have the LatchExit as the target in the
1040b57cec5SDimitry Andric // original loop.
1050b57cec5SDimitry Andric PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
1060b57cec5SDimitry Andric PrologExit->getFirstNonPHI());
1070b57cec5SDimitry Andric // Adding a value to the new PHI node from the original loop preheader.
1080b57cec5SDimitry Andric // This is the value that skips all the prolog code.
1090b57cec5SDimitry Andric if (L->contains(&PN)) {
1100b57cec5SDimitry Andric // Succ is loop header.
1110b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
1120b57cec5SDimitry Andric PreHeader);
1130b57cec5SDimitry Andric } else {
1140b57cec5SDimitry Andric // Succ is LatchExit.
1150b57cec5SDimitry Andric NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
1160b57cec5SDimitry Andric }
1170b57cec5SDimitry Andric
1180b57cec5SDimitry Andric Value *V = PN.getIncomingValueForBlock(Latch);
1190b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V)) {
1200b57cec5SDimitry Andric if (L->contains(I)) {
1210b57cec5SDimitry Andric V = VMap.lookup(I);
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric // Adding a value to the new PHI node from the last prolog block
1250b57cec5SDimitry Andric // that was created.
1260b57cec5SDimitry Andric NewPN->addIncoming(V, PrologLatch);
1270b57cec5SDimitry Andric
1280b57cec5SDimitry Andric // Update the existing PHI node operand with the value from the
1290b57cec5SDimitry Andric // new PHI node. How this is done depends on if the existing
1300b57cec5SDimitry Andric // PHI node is in the original loop block, or the exit block.
1310b57cec5SDimitry Andric if (L->contains(&PN))
1320b57cec5SDimitry Andric PN.setIncomingValueForBlock(NewPreHeader, NewPN);
1330b57cec5SDimitry Andric else
1340b57cec5SDimitry Andric PN.addIncoming(NewPN, PrologExit);
1350b57cec5SDimitry Andric }
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric // Make sure that created prolog loop is in simplified form
1390b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> PrologExitPreds;
1400b57cec5SDimitry Andric Loop *PrologLoop = LI->getLoopFor(PrologLatch);
1410b57cec5SDimitry Andric if (PrologLoop) {
1420b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(PrologExit))
1430b57cec5SDimitry Andric if (PrologLoop->contains(PredBB))
1440b57cec5SDimitry Andric PrologExitPreds.push_back(PredBB);
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
1470b57cec5SDimitry Andric nullptr, PreserveLCSSA);
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric // Create a branch around the original loop, which is taken if there are no
1510b57cec5SDimitry Andric // iterations remaining to be executed after running the prologue.
1520b57cec5SDimitry Andric Instruction *InsertPt = PrologExit->getTerminator();
1530b57cec5SDimitry Andric IRBuilder<> B(InsertPt);
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric assert(Count != 0 && "nonsensical Count!");
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
1580b57cec5SDimitry Andric // This means %xtraiter is (BECount + 1) and all of the iterations of this
1590b57cec5SDimitry Andric // loop were executed by the prologue. Note that if BECount <u (Count - 1)
1600b57cec5SDimitry Andric // then (BECount + 1) cannot unsigned-overflow.
1610b57cec5SDimitry Andric Value *BrLoopExit =
1620b57cec5SDimitry Andric B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
1630b57cec5SDimitry Andric // Split the exit to maintain loop canonicalization guarantees
1640b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
1650b57cec5SDimitry Andric SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
1660b57cec5SDimitry Andric nullptr, PreserveLCSSA);
1670b57cec5SDimitry Andric // Add the branch to the exit block (around the unrolled loop)
1680b57cec5SDimitry Andric B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
1690b57cec5SDimitry Andric InsertPt->eraseFromParent();
1700b57cec5SDimitry Andric if (DT)
1710b57cec5SDimitry Andric DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andric /// Connect the unrolling epilog code to the original loop.
1750b57cec5SDimitry Andric /// The unrolling epilog code contains code to execute the
1760b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the
1770b57cec5SDimitry Andric /// unroll count is non-zero.
1780b57cec5SDimitry Andric ///
1790b57cec5SDimitry Andric /// This function performs the following:
1800b57cec5SDimitry Andric /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
1810b57cec5SDimitry Andric /// - Create PHI nodes at the unrolling loop exit to combine
1820b57cec5SDimitry Andric /// values that exit the unrolling loop code and jump around it.
1830b57cec5SDimitry Andric /// - Update PHI operands in the epilog loop by the new PHI nodes
1840b57cec5SDimitry Andric /// - Branch around the epilog loop if extra iters (ModVal) is zero.
1850b57cec5SDimitry Andric ///
ConnectEpilog(Loop * L,Value * ModVal,BasicBlock * NewExit,BasicBlock * Exit,BasicBlock * PreHeader,BasicBlock * EpilogPreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)1860b57cec5SDimitry Andric static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
1870b57cec5SDimitry Andric BasicBlock *Exit, BasicBlock *PreHeader,
1880b57cec5SDimitry Andric BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
1890b57cec5SDimitry Andric ValueToValueMapTy &VMap, DominatorTree *DT,
1900b57cec5SDimitry Andric LoopInfo *LI, bool PreserveLCSSA) {
1910b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
1920b57cec5SDimitry Andric assert(Latch && "Loop must have a latch");
1930b57cec5SDimitry Andric BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
1940b57cec5SDimitry Andric
1950b57cec5SDimitry Andric // Loop structure should be the following:
1960b57cec5SDimitry Andric //
1970b57cec5SDimitry Andric // PreHeader
1980b57cec5SDimitry Andric // NewPreHeader
1990b57cec5SDimitry Andric // Header
2000b57cec5SDimitry Andric // ...
2010b57cec5SDimitry Andric // Latch
2020b57cec5SDimitry Andric // NewExit (PN)
2030b57cec5SDimitry Andric // EpilogPreHeader
2040b57cec5SDimitry Andric // EpilogHeader
2050b57cec5SDimitry Andric // ...
2060b57cec5SDimitry Andric // EpilogLatch
2070b57cec5SDimitry Andric // Exit (EpilogPN)
2080b57cec5SDimitry Andric
2090b57cec5SDimitry Andric // Update PHI nodes at NewExit and Exit.
2100b57cec5SDimitry Andric for (PHINode &PN : NewExit->phis()) {
2110b57cec5SDimitry Andric // PN should be used in another PHI located in Exit block as
2120b57cec5SDimitry Andric // Exit was split by SplitBlockPredecessors into Exit and NewExit
2130b57cec5SDimitry Andric // Basicaly it should look like:
2140b57cec5SDimitry Andric // NewExit:
2150b57cec5SDimitry Andric // PN = PHI [I, Latch]
2160b57cec5SDimitry Andric // ...
2170b57cec5SDimitry Andric // Exit:
2180b57cec5SDimitry Andric // EpilogPN = PHI [PN, EpilogPreHeader]
2190b57cec5SDimitry Andric //
2200b57cec5SDimitry Andric // There is EpilogPreHeader incoming block instead of NewExit as
2210b57cec5SDimitry Andric // NewExit was spilt 1 more time to get EpilogPreHeader.
2220b57cec5SDimitry Andric assert(PN.hasOneUse() && "The phi should have 1 use");
2230b57cec5SDimitry Andric PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
2240b57cec5SDimitry Andric assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andric // Add incoming PreHeader from branch around the Loop
2270b57cec5SDimitry Andric PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric Value *V = PN.getIncomingValueForBlock(Latch);
2300b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V);
2310b57cec5SDimitry Andric if (I && L->contains(I))
2320b57cec5SDimitry Andric // If value comes from an instruction in the loop add VMap value.
2330b57cec5SDimitry Andric V = VMap.lookup(I);
2340b57cec5SDimitry Andric // For the instruction out of the loop, constant or undefined value
2350b57cec5SDimitry Andric // insert value itself.
2360b57cec5SDimitry Andric EpilogPN->addIncoming(V, EpilogLatch);
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
2390b57cec5SDimitry Andric "EpilogPN should have EpilogPreHeader incoming block");
2400b57cec5SDimitry Andric // Change EpilogPreHeader incoming block to NewExit.
2410b57cec5SDimitry Andric EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
2420b57cec5SDimitry Andric NewExit);
2430b57cec5SDimitry Andric // Now PHIs should look like:
2440b57cec5SDimitry Andric // NewExit:
2450b57cec5SDimitry Andric // PN = PHI [I, Latch], [undef, PreHeader]
2460b57cec5SDimitry Andric // ...
2470b57cec5SDimitry Andric // Exit:
2480b57cec5SDimitry Andric // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric
2510b57cec5SDimitry Andric // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
2520b57cec5SDimitry Andric // Update corresponding PHI nodes in epilog loop.
2530b57cec5SDimitry Andric for (BasicBlock *Succ : successors(Latch)) {
2540b57cec5SDimitry Andric // Skip this as we already updated phis in exit blocks.
2550b57cec5SDimitry Andric if (!L->contains(Succ))
2560b57cec5SDimitry Andric continue;
2570b57cec5SDimitry Andric for (PHINode &PN : Succ->phis()) {
2580b57cec5SDimitry Andric // Add new PHI nodes to the loop exit block and update epilog
2590b57cec5SDimitry Andric // PHIs with the new PHI values.
2600b57cec5SDimitry Andric PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
2610b57cec5SDimitry Andric NewExit->getFirstNonPHI());
2620b57cec5SDimitry Andric // Adding a value to the new PHI node from the unrolling loop preheader.
2630b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
2640b57cec5SDimitry Andric // Adding a value to the new PHI node from the unrolling loop latch.
2650b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
2660b57cec5SDimitry Andric
2670b57cec5SDimitry Andric // Update the existing PHI node operand with the value from the new PHI
2680b57cec5SDimitry Andric // node. Corresponding instruction in epilog loop should be PHI.
2690b57cec5SDimitry Andric PHINode *VPN = cast<PHINode>(VMap[&PN]);
2700b57cec5SDimitry Andric VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric
2740b57cec5SDimitry Andric Instruction *InsertPt = NewExit->getTerminator();
2750b57cec5SDimitry Andric IRBuilder<> B(InsertPt);
2760b57cec5SDimitry Andric Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
2770b57cec5SDimitry Andric assert(Exit && "Loop must have a single exit block only");
2780b57cec5SDimitry Andric // Split the epilogue exit to maintain loop canonicalization guarantees
2790b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
2800b57cec5SDimitry Andric SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
2810b57cec5SDimitry Andric PreserveLCSSA);
2820b57cec5SDimitry Andric // Add the branch to the exit block (around the unrolling loop)
2830b57cec5SDimitry Andric B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
2840b57cec5SDimitry Andric InsertPt->eraseFromParent();
2850b57cec5SDimitry Andric if (DT)
2860b57cec5SDimitry Andric DT->changeImmediateDominator(Exit, NewExit);
2870b57cec5SDimitry Andric
2880b57cec5SDimitry Andric // Split the main loop exit to maintain canonicalization guarantees.
2890b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
2900b57cec5SDimitry Andric SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
2910b57cec5SDimitry Andric PreserveLCSSA);
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric /// Create a clone of the blocks in a loop and connect them together.
2950b57cec5SDimitry Andric /// If CreateRemainderLoop is false, loop structure will not be cloned,
2960b57cec5SDimitry Andric /// otherwise a new loop will be created including all cloned blocks, and the
2970b57cec5SDimitry Andric /// iterator of it switches to count NewIter down to 0.
2980b57cec5SDimitry Andric /// The cloned blocks should be inserted between InsertTop and InsertBot.
2990b57cec5SDimitry Andric /// If loop structure is cloned InsertTop should be new preheader, InsertBot
3000b57cec5SDimitry Andric /// new loop exit.
3010b57cec5SDimitry Andric /// Return the new cloned loop that is created when CreateRemainderLoop is true.
3020b57cec5SDimitry Andric static Loop *
CloneLoopBlocks(Loop * L,Value * NewIter,const bool CreateRemainderLoop,const bool UseEpilogRemainder,const bool UnrollRemainder,BasicBlock * InsertTop,BasicBlock * InsertBot,BasicBlock * Preheader,std::vector<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI)3030b57cec5SDimitry Andric CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
3040b57cec5SDimitry Andric const bool UseEpilogRemainder, const bool UnrollRemainder,
3050b57cec5SDimitry Andric BasicBlock *InsertTop,
3060b57cec5SDimitry Andric BasicBlock *InsertBot, BasicBlock *Preheader,
3070b57cec5SDimitry Andric std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
3080b57cec5SDimitry Andric ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
3090b57cec5SDimitry Andric StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
3100b57cec5SDimitry Andric BasicBlock *Header = L->getHeader();
3110b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
3120b57cec5SDimitry Andric Function *F = Header->getParent();
3130b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
3140b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
3150b57cec5SDimitry Andric Loop *ParentLoop = L->getParentLoop();
3160b57cec5SDimitry Andric NewLoopsMap NewLoops;
3170b57cec5SDimitry Andric NewLoops[ParentLoop] = ParentLoop;
3180b57cec5SDimitry Andric if (!CreateRemainderLoop)
3190b57cec5SDimitry Andric NewLoops[L] = ParentLoop;
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric // For each block in the original loop, create a new copy,
3220b57cec5SDimitry Andric // and update the value map with the newly created values.
3230b57cec5SDimitry Andric for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3240b57cec5SDimitry Andric BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
3250b57cec5SDimitry Andric NewBlocks.push_back(NewBB);
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andric // If we're unrolling the outermost loop, there's no remainder loop,
3280b57cec5SDimitry Andric // and this block isn't in a nested loop, then the new block is not
3290b57cec5SDimitry Andric // in any loop. Otherwise, add it to loopinfo.
3300b57cec5SDimitry Andric if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
3310b57cec5SDimitry Andric addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric VMap[*BB] = NewBB;
3340b57cec5SDimitry Andric if (Header == *BB) {
3350b57cec5SDimitry Andric // For the first block, add a CFG connection to this newly
3360b57cec5SDimitry Andric // created block.
3370b57cec5SDimitry Andric InsertTop->getTerminator()->setSuccessor(0, NewBB);
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric if (DT) {
3410b57cec5SDimitry Andric if (Header == *BB) {
3420b57cec5SDimitry Andric // The header is dominated by the preheader.
3430b57cec5SDimitry Andric DT->addNewBlock(NewBB, InsertTop);
3440b57cec5SDimitry Andric } else {
3450b57cec5SDimitry Andric // Copy information from original loop to unrolled loop.
3460b57cec5SDimitry Andric BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
3470b57cec5SDimitry Andric DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
3480b57cec5SDimitry Andric }
3490b57cec5SDimitry Andric }
3500b57cec5SDimitry Andric
3510b57cec5SDimitry Andric if (Latch == *BB) {
3520b57cec5SDimitry Andric // For the last block, if CreateRemainderLoop is false, create a direct
3530b57cec5SDimitry Andric // jump to InsertBot. If not, create a loop back to cloned head.
3540b57cec5SDimitry Andric VMap.erase((*BB)->getTerminator());
3550b57cec5SDimitry Andric BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
3560b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
3570b57cec5SDimitry Andric IRBuilder<> Builder(LatchBR);
3580b57cec5SDimitry Andric if (!CreateRemainderLoop) {
3590b57cec5SDimitry Andric Builder.CreateBr(InsertBot);
3600b57cec5SDimitry Andric } else {
3610b57cec5SDimitry Andric PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
3620b57cec5SDimitry Andric suffix + ".iter",
3630b57cec5SDimitry Andric FirstLoopBB->getFirstNonPHI());
3640b57cec5SDimitry Andric Value *IdxSub =
3650b57cec5SDimitry Andric Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
3660b57cec5SDimitry Andric NewIdx->getName() + ".sub");
3670b57cec5SDimitry Andric Value *IdxCmp =
3680b57cec5SDimitry Andric Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
3690b57cec5SDimitry Andric Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
3700b57cec5SDimitry Andric NewIdx->addIncoming(NewIter, InsertTop);
3710b57cec5SDimitry Andric NewIdx->addIncoming(IdxSub, NewBB);
3720b57cec5SDimitry Andric }
3730b57cec5SDimitry Andric LatchBR->eraseFromParent();
3740b57cec5SDimitry Andric }
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric
3770b57cec5SDimitry Andric // Change the incoming values to the ones defined in the preheader or
3780b57cec5SDimitry Andric // cloned loop.
3790b57cec5SDimitry Andric for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
3800b57cec5SDimitry Andric PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
3810b57cec5SDimitry Andric if (!CreateRemainderLoop) {
3820b57cec5SDimitry Andric if (UseEpilogRemainder) {
3830b57cec5SDimitry Andric unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
3840b57cec5SDimitry Andric NewPHI->setIncomingBlock(idx, InsertTop);
3850b57cec5SDimitry Andric NewPHI->removeIncomingValue(Latch, false);
3860b57cec5SDimitry Andric } else {
3870b57cec5SDimitry Andric VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
3880b57cec5SDimitry Andric cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric } else {
3910b57cec5SDimitry Andric unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
3920b57cec5SDimitry Andric NewPHI->setIncomingBlock(idx, InsertTop);
3930b57cec5SDimitry Andric BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
3940b57cec5SDimitry Andric idx = NewPHI->getBasicBlockIndex(Latch);
3950b57cec5SDimitry Andric Value *InVal = NewPHI->getIncomingValue(idx);
3960b57cec5SDimitry Andric NewPHI->setIncomingBlock(idx, NewLatch);
3970b57cec5SDimitry Andric if (Value *V = VMap.lookup(InVal))
3980b57cec5SDimitry Andric NewPHI->setIncomingValue(idx, V);
3990b57cec5SDimitry Andric }
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric if (CreateRemainderLoop) {
4020b57cec5SDimitry Andric Loop *NewLoop = NewLoops[L];
4030b57cec5SDimitry Andric assert(NewLoop && "L should have been cloned");
404480093f4SDimitry Andric MDNode *LoopID = NewLoop->getLoopID();
4050b57cec5SDimitry Andric
4060b57cec5SDimitry Andric // Only add loop metadata if the loop is not going to be completely
4070b57cec5SDimitry Andric // unrolled.
4080b57cec5SDimitry Andric if (UnrollRemainder)
4090b57cec5SDimitry Andric return NewLoop;
4100b57cec5SDimitry Andric
4110b57cec5SDimitry Andric Optional<MDNode *> NewLoopID = makeFollowupLoopID(
4120b57cec5SDimitry Andric LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
4130b57cec5SDimitry Andric if (NewLoopID.hasValue()) {
4140b57cec5SDimitry Andric NewLoop->setLoopID(NewLoopID.getValue());
4150b57cec5SDimitry Andric
4160b57cec5SDimitry Andric // Do not setLoopAlreadyUnrolled if loop attributes have been defined
4170b57cec5SDimitry Andric // explicitly.
4180b57cec5SDimitry Andric return NewLoop;
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric // Add unroll disable metadata to disable future unrolling for this loop.
4220b57cec5SDimitry Andric NewLoop->setLoopAlreadyUnrolled();
4230b57cec5SDimitry Andric return NewLoop;
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric else
4260b57cec5SDimitry Andric return nullptr;
4270b57cec5SDimitry Andric }
4280b57cec5SDimitry Andric
4290b57cec5SDimitry Andric /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
4300b57cec5SDimitry Andric /// is populated with all the loop exit blocks other than the LatchExit block.
canSafelyUnrollMultiExitLoop(Loop * L,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)4310b57cec5SDimitry Andric static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
4320b57cec5SDimitry Andric bool PreserveLCSSA,
4330b57cec5SDimitry Andric bool UseEpilogRemainder) {
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric // We currently have some correctness constrains in unrolling a multi-exit
4360b57cec5SDimitry Andric // loop. Check for these below.
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andric // We rely on LCSSA form being preserved when the exit blocks are transformed.
4390b57cec5SDimitry Andric if (!PreserveLCSSA)
4400b57cec5SDimitry Andric return false;
4410b57cec5SDimitry Andric
4420b57cec5SDimitry Andric // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
4430b57cec5SDimitry Andric // UnrollRuntimeMultiExit is true. This will need updating the logic in
4440b57cec5SDimitry Andric // connectEpilog/connectProlog.
4450b57cec5SDimitry Andric if (!LatchExit->getSinglePredecessor()) {
4460b57cec5SDimitry Andric LLVM_DEBUG(
4470b57cec5SDimitry Andric dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
4480b57cec5SDimitry Andric "predecessor.\n");
4490b57cec5SDimitry Andric return false;
4500b57cec5SDimitry Andric }
4510b57cec5SDimitry Andric // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
4520b57cec5SDimitry Andric // and L is an inner loop. This is because in presence of multiple exits, the
4530b57cec5SDimitry Andric // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
4540b57cec5SDimitry Andric // outer loop. This is automatically handled in the prolog case, so we do not
4550b57cec5SDimitry Andric // have that bug in prolog generation.
4560b57cec5SDimitry Andric if (UseEpilogRemainder && L->getParentLoop())
4570b57cec5SDimitry Andric return false;
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric // All constraints have been satisfied.
4600b57cec5SDimitry Andric return true;
4610b57cec5SDimitry Andric }
4620b57cec5SDimitry Andric
4630b57cec5SDimitry Andric /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
4640b57cec5SDimitry Andric /// we return true only if UnrollRuntimeMultiExit is set to true.
canProfitablyUnrollMultiExitLoop(Loop * L,SmallVectorImpl<BasicBlock * > & OtherExits,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)4650b57cec5SDimitry Andric static bool canProfitablyUnrollMultiExitLoop(
4660b57cec5SDimitry Andric Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
4670b57cec5SDimitry Andric bool PreserveLCSSA, bool UseEpilogRemainder) {
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric #if !defined(NDEBUG)
4700b57cec5SDimitry Andric assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
4710b57cec5SDimitry Andric UseEpilogRemainder) &&
4720b57cec5SDimitry Andric "Should be safe to unroll before checking profitability!");
4730b57cec5SDimitry Andric #endif
4740b57cec5SDimitry Andric
4750b57cec5SDimitry Andric // Priority goes to UnrollRuntimeMultiExit if it's supplied.
4760b57cec5SDimitry Andric if (UnrollRuntimeMultiExit.getNumOccurrences())
4770b57cec5SDimitry Andric return UnrollRuntimeMultiExit;
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andric // The main pain point with multi-exit loop unrolling is that once unrolled,
4800b57cec5SDimitry Andric // we will not be able to merge all blocks into a straight line code.
4810b57cec5SDimitry Andric // There are branches within the unrolled loop that go to the OtherExits.
4820b57cec5SDimitry Andric // The second point is the increase in code size, but this is true
4830b57cec5SDimitry Andric // irrespective of multiple exits.
4840b57cec5SDimitry Andric
4850b57cec5SDimitry Andric // Note: Both the heuristics below are coarse grained. We are essentially
4860b57cec5SDimitry Andric // enabling unrolling of loops that have a single side exit other than the
4870b57cec5SDimitry Andric // normal LatchExit (i.e. exiting into a deoptimize block).
4880b57cec5SDimitry Andric // The heuristics considered are:
4890b57cec5SDimitry Andric // 1. low number of branches in the unrolled version.
4900b57cec5SDimitry Andric // 2. high predictability of these extra branches.
4910b57cec5SDimitry Andric // We avoid unrolling loops that have more than two exiting blocks. This
4920b57cec5SDimitry Andric // limits the total number of branches in the unrolled loop to be atmost
4930b57cec5SDimitry Andric // the unroll factor (since one of the exiting blocks is the latch block).
4940b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> ExitingBlocks;
4950b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks);
4960b57cec5SDimitry Andric if (ExitingBlocks.size() > 2)
4970b57cec5SDimitry Andric return false;
4980b57cec5SDimitry Andric
499*5f7ddb14SDimitry Andric // Allow unrolling of loops with no non latch exit blocks.
500*5f7ddb14SDimitry Andric if (OtherExits.size() == 0)
501*5f7ddb14SDimitry Andric return true;
502*5f7ddb14SDimitry Andric
5030b57cec5SDimitry Andric // The second heuristic is that L has one exit other than the latchexit and
5040b57cec5SDimitry Andric // that exit is a deoptimize block. We know that deoptimize blocks are rarely
5050b57cec5SDimitry Andric // taken, which also implies the branch leading to the deoptimize block is
506*5f7ddb14SDimitry Andric // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
507*5f7ddb14SDimitry Andric // assume the other exit branch is predictable even if it has no deoptimize
508*5f7ddb14SDimitry Andric // call.
5090b57cec5SDimitry Andric return (OtherExits.size() == 1 &&
510*5f7ddb14SDimitry Andric (UnrollRuntimeOtherExitPredictable ||
511*5f7ddb14SDimitry Andric OtherExits[0]->getTerminatingDeoptimizeCall()));
5120b57cec5SDimitry Andric // TODO: These can be fine-tuned further to consider code size or deopt states
5130b57cec5SDimitry Andric // that are captured by the deoptimize exit block.
5140b57cec5SDimitry Andric // Also, we can extend this to support more cases, if we actually
5150b57cec5SDimitry Andric // know of kinds of multiexit loops that would benefit from unrolling.
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric
518af732203SDimitry Andric // Assign the maximum possible trip count as the back edge weight for the
519af732203SDimitry Andric // remainder loop if the original loop comes with a branch weight.
updateLatchBranchWeightsForRemainderLoop(Loop * OrigLoop,Loop * RemainderLoop,uint64_t UnrollFactor)520af732203SDimitry Andric static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
521af732203SDimitry Andric Loop *RemainderLoop,
522af732203SDimitry Andric uint64_t UnrollFactor) {
523af732203SDimitry Andric uint64_t TrueWeight, FalseWeight;
524af732203SDimitry Andric BranchInst *LatchBR =
525af732203SDimitry Andric cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
526af732203SDimitry Andric if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
527af732203SDimitry Andric uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
528af732203SDimitry Andric ? FalseWeight
529af732203SDimitry Andric : TrueWeight;
530af732203SDimitry Andric assert(UnrollFactor > 1);
531af732203SDimitry Andric uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
532af732203SDimitry Andric BasicBlock *Header = RemainderLoop->getHeader();
533af732203SDimitry Andric BasicBlock *Latch = RemainderLoop->getLoopLatch();
534af732203SDimitry Andric auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
535af732203SDimitry Andric unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
536af732203SDimitry Andric MDBuilder MDB(RemainderLatchBR->getContext());
537af732203SDimitry Andric MDNode *WeightNode =
538af732203SDimitry Andric HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
539af732203SDimitry Andric : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
540af732203SDimitry Andric RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
541af732203SDimitry Andric }
542af732203SDimitry Andric }
543af732203SDimitry Andric
5440b57cec5SDimitry Andric /// Insert code in the prolog/epilog code when unrolling a loop with a
5450b57cec5SDimitry Andric /// run-time trip-count.
5460b57cec5SDimitry Andric ///
5470b57cec5SDimitry Andric /// This method assumes that the loop unroll factor is total number
5480b57cec5SDimitry Andric /// of loop bodies in the loop after unrolling. (Some folks refer
5490b57cec5SDimitry Andric /// to the unroll factor as the number of *extra* copies added).
5500b57cec5SDimitry Andric /// We assume also that the loop unroll factor is a power-of-two. So, after
5510b57cec5SDimitry Andric /// unrolling the loop, the number of loop bodies executed is 2,
5520b57cec5SDimitry Andric /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
5530b57cec5SDimitry Andric /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
5540b57cec5SDimitry Andric /// the switch instruction is generated.
5550b57cec5SDimitry Andric ///
5560b57cec5SDimitry Andric /// ***Prolog case***
5570b57cec5SDimitry Andric /// extraiters = tripcount % loopfactor
5580b57cec5SDimitry Andric /// if (extraiters == 0) jump Loop:
5590b57cec5SDimitry Andric /// else jump Prol:
5600b57cec5SDimitry Andric /// Prol: LoopBody;
5610b57cec5SDimitry Andric /// extraiters -= 1 // Omitted if unroll factor is 2.
5620b57cec5SDimitry Andric /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
5630b57cec5SDimitry Andric /// if (tripcount < loopfactor) jump End:
5640b57cec5SDimitry Andric /// Loop:
5650b57cec5SDimitry Andric /// ...
5660b57cec5SDimitry Andric /// End:
5670b57cec5SDimitry Andric ///
5680b57cec5SDimitry Andric /// ***Epilog case***
5690b57cec5SDimitry Andric /// extraiters = tripcount % loopfactor
5700b57cec5SDimitry Andric /// if (tripcount < loopfactor) jump LoopExit:
5710b57cec5SDimitry Andric /// unroll_iters = tripcount - extraiters
5720b57cec5SDimitry Andric /// Loop: LoopBody; (executes unroll_iter times);
5730b57cec5SDimitry Andric /// unroll_iter -= 1
5740b57cec5SDimitry Andric /// if (unroll_iter != 0) jump Loop:
5750b57cec5SDimitry Andric /// LoopExit:
5760b57cec5SDimitry Andric /// if (extraiters == 0) jump EpilExit:
5770b57cec5SDimitry Andric /// Epil: LoopBody; (executes extraiters times)
5780b57cec5SDimitry Andric /// extraiters -= 1 // Omitted if unroll factor is 2.
5790b57cec5SDimitry Andric /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
5800b57cec5SDimitry Andric /// EpilExit:
5810b57cec5SDimitry Andric
UnrollRuntimeLoopRemainder(Loop * L,unsigned Count,bool AllowExpensiveTripCount,bool UseEpilogRemainder,bool UnrollRemainder,bool ForgetAllSCEV,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,bool PreserveLCSSA,Loop ** ResultLoop)5825ffd83dbSDimitry Andric bool llvm::UnrollRuntimeLoopRemainder(
5835ffd83dbSDimitry Andric Loop *L, unsigned Count, bool AllowExpensiveTripCount,
5845ffd83dbSDimitry Andric bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
5855ffd83dbSDimitry Andric LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
5865ffd83dbSDimitry Andric const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
5870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
5880b57cec5SDimitry Andric LLVM_DEBUG(L->dump());
5890b57cec5SDimitry Andric LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
5900b57cec5SDimitry Andric : dbgs() << "Using prolog remainder.\n");
5910b57cec5SDimitry Andric
5920b57cec5SDimitry Andric // Make sure the loop is in canonical form.
5930b57cec5SDimitry Andric if (!L->isLoopSimplifyForm()) {
5940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
5950b57cec5SDimitry Andric return false;
5960b57cec5SDimitry Andric }
5970b57cec5SDimitry Andric
5980b57cec5SDimitry Andric // Guaranteed by LoopSimplifyForm.
5990b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch();
6000b57cec5SDimitry Andric BasicBlock *Header = L->getHeader();
6010b57cec5SDimitry Andric
6020b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
6030b57cec5SDimitry Andric
6040b57cec5SDimitry Andric if (!LatchBR || LatchBR->isUnconditional()) {
6050b57cec5SDimitry Andric // The loop-rotate pass can be helpful to avoid this in many cases.
6060b57cec5SDimitry Andric LLVM_DEBUG(
6070b57cec5SDimitry Andric dbgs()
6080b57cec5SDimitry Andric << "Loop latch not terminated by a conditional branch.\n");
6090b57cec5SDimitry Andric return false;
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric
6120b57cec5SDimitry Andric unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
6130b57cec5SDimitry Andric BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
6140b57cec5SDimitry Andric
6150b57cec5SDimitry Andric if (L->contains(LatchExit)) {
6160b57cec5SDimitry Andric // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
6170b57cec5SDimitry Andric // targets of the Latch be an exit block out of the loop.
6180b57cec5SDimitry Andric LLVM_DEBUG(
6190b57cec5SDimitry Andric dbgs()
6200b57cec5SDimitry Andric << "One of the loop latch successors must be the exit block.\n");
6210b57cec5SDimitry Andric return false;
6220b57cec5SDimitry Andric }
6230b57cec5SDimitry Andric
6240b57cec5SDimitry Andric // These are exit blocks other than the target of the latch exiting block.
6250b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> OtherExits;
6260b57cec5SDimitry Andric L->getUniqueNonLatchExitBlocks(OtherExits);
6270b57cec5SDimitry Andric bool isMultiExitUnrollingEnabled =
6280b57cec5SDimitry Andric canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
6290b57cec5SDimitry Andric UseEpilogRemainder) &&
6300b57cec5SDimitry Andric canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
6310b57cec5SDimitry Andric UseEpilogRemainder);
6320b57cec5SDimitry Andric // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
6330b57cec5SDimitry Andric if (!isMultiExitUnrollingEnabled &&
6340b57cec5SDimitry Andric (!L->getExitingBlock() || OtherExits.size())) {
6350b57cec5SDimitry Andric LLVM_DEBUG(
6360b57cec5SDimitry Andric dbgs()
6370b57cec5SDimitry Andric << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
6380b57cec5SDimitry Andric "enabled!\n");
6390b57cec5SDimitry Andric return false;
6400b57cec5SDimitry Andric }
6410b57cec5SDimitry Andric // Use Scalar Evolution to compute the trip count. This allows more loops to
6420b57cec5SDimitry Andric // be unrolled than relying on induction var simplification.
6430b57cec5SDimitry Andric if (!SE)
6440b57cec5SDimitry Andric return false;
6450b57cec5SDimitry Andric
6460b57cec5SDimitry Andric // Only unroll loops with a computable trip count, and the trip count needs
6470b57cec5SDimitry Andric // to be an int value (allowing a pointer type is a TODO item).
6480b57cec5SDimitry Andric // We calculate the backedge count by using getExitCount on the Latch block,
6490b57cec5SDimitry Andric // which is proven to be the only exiting block in this loop. This is same as
6500b57cec5SDimitry Andric // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
6510b57cec5SDimitry Andric // exiting blocks).
6520b57cec5SDimitry Andric const SCEV *BECountSC = SE->getExitCount(L, Latch);
6530b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(BECountSC) ||
6540b57cec5SDimitry Andric !BECountSC->getType()->isIntegerTy()) {
6550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
6560b57cec5SDimitry Andric return false;
6570b57cec5SDimitry Andric }
6580b57cec5SDimitry Andric
6590b57cec5SDimitry Andric unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
6600b57cec5SDimitry Andric
6610b57cec5SDimitry Andric // Add 1 since the backedge count doesn't include the first loop iteration.
6620b57cec5SDimitry Andric const SCEV *TripCountSC =
6630b57cec5SDimitry Andric SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
6640b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(TripCountSC)) {
6650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
6660b57cec5SDimitry Andric return false;
6670b57cec5SDimitry Andric }
6680b57cec5SDimitry Andric
6690b57cec5SDimitry Andric BasicBlock *PreHeader = L->getLoopPreheader();
6700b57cec5SDimitry Andric BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
6710b57cec5SDimitry Andric const DataLayout &DL = Header->getModule()->getDataLayout();
6720b57cec5SDimitry Andric SCEVExpander Expander(*SE, DL, "loop-unroll");
6730b57cec5SDimitry Andric if (!AllowExpensiveTripCount &&
6745ffd83dbSDimitry Andric Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
6755ffd83dbSDimitry Andric TTI, PreHeaderBR)) {
6760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
6770b57cec5SDimitry Andric return false;
6780b57cec5SDimitry Andric }
6790b57cec5SDimitry Andric
6800b57cec5SDimitry Andric // This constraint lets us deal with an overflowing trip count easily; see the
6810b57cec5SDimitry Andric // comment on ModVal below.
6820b57cec5SDimitry Andric if (Log2_32(Count) > BEWidth) {
6830b57cec5SDimitry Andric LLVM_DEBUG(
6840b57cec5SDimitry Andric dbgs()
6850b57cec5SDimitry Andric << "Count failed constraint on overflow trip count calculation.\n");
6860b57cec5SDimitry Andric return false;
6870b57cec5SDimitry Andric }
6880b57cec5SDimitry Andric
6890b57cec5SDimitry Andric // Loop structure is the following:
6900b57cec5SDimitry Andric //
6910b57cec5SDimitry Andric // PreHeader
6920b57cec5SDimitry Andric // Header
6930b57cec5SDimitry Andric // ...
6940b57cec5SDimitry Andric // Latch
6950b57cec5SDimitry Andric // LatchExit
6960b57cec5SDimitry Andric
6970b57cec5SDimitry Andric BasicBlock *NewPreHeader;
6980b57cec5SDimitry Andric BasicBlock *NewExit = nullptr;
6990b57cec5SDimitry Andric BasicBlock *PrologExit = nullptr;
7000b57cec5SDimitry Andric BasicBlock *EpilogPreHeader = nullptr;
7010b57cec5SDimitry Andric BasicBlock *PrologPreHeader = nullptr;
7020b57cec5SDimitry Andric
7030b57cec5SDimitry Andric if (UseEpilogRemainder) {
7040b57cec5SDimitry Andric // If epilog remainder
7050b57cec5SDimitry Andric // Split PreHeader to insert a branch around loop for unrolling.
7060b57cec5SDimitry Andric NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
7070b57cec5SDimitry Andric NewPreHeader->setName(PreHeader->getName() + ".new");
7080b57cec5SDimitry Andric // Split LatchExit to create phi nodes from branch above.
7090b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
7100b57cec5SDimitry Andric NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
7110b57cec5SDimitry Andric nullptr, PreserveLCSSA);
7120b57cec5SDimitry Andric // NewExit gets its DebugLoc from LatchExit, which is not part of the
7130b57cec5SDimitry Andric // original Loop.
7140b57cec5SDimitry Andric // Fix this by setting Loop's DebugLoc to NewExit.
7150b57cec5SDimitry Andric auto *NewExitTerminator = NewExit->getTerminator();
7160b57cec5SDimitry Andric NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
7170b57cec5SDimitry Andric // Split NewExit to insert epilog remainder loop.
7180b57cec5SDimitry Andric EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
7190b57cec5SDimitry Andric EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
7200b57cec5SDimitry Andric } else {
7210b57cec5SDimitry Andric // If prolog remainder
7220b57cec5SDimitry Andric // Split the original preheader twice to insert prolog remainder loop
7230b57cec5SDimitry Andric PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
7240b57cec5SDimitry Andric PrologPreHeader->setName(Header->getName() + ".prol.preheader");
7250b57cec5SDimitry Andric PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
7260b57cec5SDimitry Andric DT, LI);
7270b57cec5SDimitry Andric PrologExit->setName(Header->getName() + ".prol.loopexit");
7280b57cec5SDimitry Andric // Split PrologExit to get NewPreHeader.
7290b57cec5SDimitry Andric NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
7300b57cec5SDimitry Andric NewPreHeader->setName(PreHeader->getName() + ".new");
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric // Loop structure should be the following:
7330b57cec5SDimitry Andric // Epilog Prolog
7340b57cec5SDimitry Andric //
7350b57cec5SDimitry Andric // PreHeader PreHeader
7360b57cec5SDimitry Andric // *NewPreHeader *PrologPreHeader
7370b57cec5SDimitry Andric // Header *PrologExit
7380b57cec5SDimitry Andric // ... *NewPreHeader
7390b57cec5SDimitry Andric // Latch Header
7400b57cec5SDimitry Andric // *NewExit ...
7410b57cec5SDimitry Andric // *EpilogPreHeader Latch
7420b57cec5SDimitry Andric // LatchExit LatchExit
7430b57cec5SDimitry Andric
7440b57cec5SDimitry Andric // Calculate conditions for branch around loop for unrolling
7450b57cec5SDimitry Andric // in epilog case and around prolog remainder loop in prolog case.
7460b57cec5SDimitry Andric // Compute the number of extra iterations required, which is:
7470b57cec5SDimitry Andric // extra iterations = run-time trip count % loop unroll factor
7480b57cec5SDimitry Andric PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
7490b57cec5SDimitry Andric Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
7500b57cec5SDimitry Andric PreHeaderBR);
7510b57cec5SDimitry Andric Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
7520b57cec5SDimitry Andric PreHeaderBR);
7530b57cec5SDimitry Andric IRBuilder<> B(PreHeaderBR);
7540b57cec5SDimitry Andric Value *ModVal;
7550b57cec5SDimitry Andric // Calculate ModVal = (BECount + 1) % Count.
7560b57cec5SDimitry Andric // Note that TripCount is BECount + 1.
7570b57cec5SDimitry Andric if (isPowerOf2_32(Count)) {
7580b57cec5SDimitry Andric // When Count is power of 2 we don't BECount for epilog case, however we'll
7590b57cec5SDimitry Andric // need it for a branch around unrolling loop for prolog case.
7600b57cec5SDimitry Andric ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
7610b57cec5SDimitry Andric // 1. There are no iterations to be run in the prolog/epilog loop.
7620b57cec5SDimitry Andric // OR
7630b57cec5SDimitry Andric // 2. The addition computing TripCount overflowed.
7640b57cec5SDimitry Andric //
7650b57cec5SDimitry Andric // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
7660b57cec5SDimitry Andric // the number of iterations that remain to be run in the original loop is a
7670b57cec5SDimitry Andric // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
7680b57cec5SDimitry Andric // explicitly check this above).
7690b57cec5SDimitry Andric } else {
7700b57cec5SDimitry Andric // As (BECount + 1) can potentially unsigned overflow we count
7710b57cec5SDimitry Andric // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
7720b57cec5SDimitry Andric Value *ModValTmp = B.CreateURem(BECount,
7730b57cec5SDimitry Andric ConstantInt::get(BECount->getType(),
7740b57cec5SDimitry Andric Count));
7750b57cec5SDimitry Andric Value *ModValAdd = B.CreateAdd(ModValTmp,
7760b57cec5SDimitry Andric ConstantInt::get(ModValTmp->getType(), 1));
7770b57cec5SDimitry Andric // At that point (BECount % Count) + 1 could be equal to Count.
7780b57cec5SDimitry Andric // To handle this case we need to take mod by Count one more time.
7790b57cec5SDimitry Andric ModVal = B.CreateURem(ModValAdd,
7800b57cec5SDimitry Andric ConstantInt::get(BECount->getType(), Count),
7810b57cec5SDimitry Andric "xtraiter");
7820b57cec5SDimitry Andric }
7830b57cec5SDimitry Andric Value *BranchVal =
7840b57cec5SDimitry Andric UseEpilogRemainder ? B.CreateICmpULT(BECount,
7850b57cec5SDimitry Andric ConstantInt::get(BECount->getType(),
7860b57cec5SDimitry Andric Count - 1)) :
7870b57cec5SDimitry Andric B.CreateIsNotNull(ModVal, "lcmp.mod");
7880b57cec5SDimitry Andric BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
7890b57cec5SDimitry Andric BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
7900b57cec5SDimitry Andric // Branch to either remainder (extra iterations) loop or unrolling loop.
7910b57cec5SDimitry Andric B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
7920b57cec5SDimitry Andric PreHeaderBR->eraseFromParent();
7930b57cec5SDimitry Andric if (DT) {
7940b57cec5SDimitry Andric if (UseEpilogRemainder)
7950b57cec5SDimitry Andric DT->changeImmediateDominator(NewExit, PreHeader);
7960b57cec5SDimitry Andric else
7970b57cec5SDimitry Andric DT->changeImmediateDominator(PrologExit, PreHeader);
7980b57cec5SDimitry Andric }
7990b57cec5SDimitry Andric Function *F = Header->getParent();
8000b57cec5SDimitry Andric // Get an ordered list of blocks in the loop to help with the ordering of the
8010b57cec5SDimitry Andric // cloned blocks in the prolog/epilog code
8020b57cec5SDimitry Andric LoopBlocksDFS LoopBlocks(L);
8030b57cec5SDimitry Andric LoopBlocks.perform(LI);
8040b57cec5SDimitry Andric
8050b57cec5SDimitry Andric //
8060b57cec5SDimitry Andric // For each extra loop iteration, create a copy of the loop's basic blocks
8070b57cec5SDimitry Andric // and generate a condition that branches to the copy depending on the
8080b57cec5SDimitry Andric // number of 'left over' iterations.
8090b57cec5SDimitry Andric //
8100b57cec5SDimitry Andric std::vector<BasicBlock *> NewBlocks;
8110b57cec5SDimitry Andric ValueToValueMapTy VMap;
8120b57cec5SDimitry Andric
8130b57cec5SDimitry Andric // For unroll factor 2 remainder loop will have 1 iterations.
8140b57cec5SDimitry Andric // Do not create 1 iteration loop.
8150b57cec5SDimitry Andric bool CreateRemainderLoop = (Count != 2);
8160b57cec5SDimitry Andric
8170b57cec5SDimitry Andric // Clone all the basic blocks in the loop. If Count is 2, we don't clone
8180b57cec5SDimitry Andric // the loop, otherwise we create a cloned loop to execute the extra
8190b57cec5SDimitry Andric // iterations. This function adds the appropriate CFG connections.
8200b57cec5SDimitry Andric BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
8210b57cec5SDimitry Andric BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
8220b57cec5SDimitry Andric Loop *remainderLoop = CloneLoopBlocks(
8230b57cec5SDimitry Andric L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
8240b57cec5SDimitry Andric InsertTop, InsertBot,
8250b57cec5SDimitry Andric NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
8260b57cec5SDimitry Andric
827af732203SDimitry Andric // Assign the maximum possible trip count as the back edge weight for the
828af732203SDimitry Andric // remainder loop if the original loop comes with a branch weight.
829af732203SDimitry Andric if (remainderLoop && !UnrollRemainder)
830af732203SDimitry Andric updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
831af732203SDimitry Andric
8320b57cec5SDimitry Andric // Insert the cloned blocks into the function.
8330b57cec5SDimitry Andric F->getBasicBlockList().splice(InsertBot->getIterator(),
8340b57cec5SDimitry Andric F->getBasicBlockList(),
8350b57cec5SDimitry Andric NewBlocks[0]->getIterator(),
8360b57cec5SDimitry Andric F->end());
8370b57cec5SDimitry Andric
8380b57cec5SDimitry Andric // Now the loop blocks are cloned and the other exiting blocks from the
8390b57cec5SDimitry Andric // remainder are connected to the original Loop's exit blocks. The remaining
8400b57cec5SDimitry Andric // work is to update the phi nodes in the original loop, and take in the
8410b57cec5SDimitry Andric // values from the cloned region.
8420b57cec5SDimitry Andric for (auto *BB : OtherExits) {
8430b57cec5SDimitry Andric for (auto &II : *BB) {
8440b57cec5SDimitry Andric
8450b57cec5SDimitry Andric // Given we preserve LCSSA form, we know that the values used outside the
8460b57cec5SDimitry Andric // loop will be used through these phi nodes at the exit blocks that are
8470b57cec5SDimitry Andric // transformed below.
8480b57cec5SDimitry Andric if (!isa<PHINode>(II))
8490b57cec5SDimitry Andric break;
8500b57cec5SDimitry Andric PHINode *Phi = cast<PHINode>(&II);
8510b57cec5SDimitry Andric unsigned oldNumOperands = Phi->getNumIncomingValues();
8520b57cec5SDimitry Andric // Add the incoming values from the remainder code to the end of the phi
8530b57cec5SDimitry Andric // node.
8540b57cec5SDimitry Andric for (unsigned i =0; i < oldNumOperands; i++){
8550b57cec5SDimitry Andric Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
8560b57cec5SDimitry Andric // newVal can be a constant or derived from values outside the loop, and
8570b57cec5SDimitry Andric // hence need not have a VMap value. Also, since lookup already generated
8580b57cec5SDimitry Andric // a default "null" VMap entry for this value, we need to populate that
8590b57cec5SDimitry Andric // VMap entry correctly, with the mapped entry being itself.
8600b57cec5SDimitry Andric if (!newVal) {
8610b57cec5SDimitry Andric newVal = Phi->getIncomingValue(i);
8620b57cec5SDimitry Andric VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
8630b57cec5SDimitry Andric }
8640b57cec5SDimitry Andric Phi->addIncoming(newVal,
8650b57cec5SDimitry Andric cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
8660b57cec5SDimitry Andric }
8670b57cec5SDimitry Andric }
8680b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
8690b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(BB)) {
8700b57cec5SDimitry Andric assert(!(any_of(OtherExits,
8710b57cec5SDimitry Andric [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
8720b57cec5SDimitry Andric SuccBB == LatchExit) &&
8730b57cec5SDimitry Andric "Breaks the definition of dedicated exits!");
8740b57cec5SDimitry Andric }
8750b57cec5SDimitry Andric #endif
8760b57cec5SDimitry Andric }
8770b57cec5SDimitry Andric
8780b57cec5SDimitry Andric // Update the immediate dominator of the exit blocks and blocks that are
8790b57cec5SDimitry Andric // reachable from the exit blocks. This is needed because we now have paths
8800b57cec5SDimitry Andric // from both the original loop and the remainder code reaching the exit
8810b57cec5SDimitry Andric // blocks. While the IDom of these exit blocks were from the original loop,
8820b57cec5SDimitry Andric // now the IDom is the preheader (which decides whether the original loop or
8830b57cec5SDimitry Andric // remainder code should run).
8840b57cec5SDimitry Andric if (DT && !L->getExitingBlock()) {
8850b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> ChildrenToUpdate;
8860b57cec5SDimitry Andric // NB! We have to examine the dom children of all loop blocks, not just
8870b57cec5SDimitry Andric // those which are the IDom of the exit blocks. This is because blocks
8880b57cec5SDimitry Andric // reachable from the exit blocks can have their IDom as the nearest common
8890b57cec5SDimitry Andric // dominator of the exit blocks.
8900b57cec5SDimitry Andric for (auto *BB : L->blocks()) {
8910b57cec5SDimitry Andric auto *DomNodeBB = DT->getNode(BB);
8925ffd83dbSDimitry Andric for (auto *DomChild : DomNodeBB->children()) {
8930b57cec5SDimitry Andric auto *DomChildBB = DomChild->getBlock();
8940b57cec5SDimitry Andric if (!L->contains(LI->getLoopFor(DomChildBB)))
8950b57cec5SDimitry Andric ChildrenToUpdate.push_back(DomChildBB);
8960b57cec5SDimitry Andric }
8970b57cec5SDimitry Andric }
8980b57cec5SDimitry Andric for (auto *BB : ChildrenToUpdate)
8990b57cec5SDimitry Andric DT->changeImmediateDominator(BB, PreHeader);
9000b57cec5SDimitry Andric }
9010b57cec5SDimitry Andric
9020b57cec5SDimitry Andric // Loop structure should be the following:
9030b57cec5SDimitry Andric // Epilog Prolog
9040b57cec5SDimitry Andric //
9050b57cec5SDimitry Andric // PreHeader PreHeader
9060b57cec5SDimitry Andric // NewPreHeader PrologPreHeader
9070b57cec5SDimitry Andric // Header PrologHeader
9080b57cec5SDimitry Andric // ... ...
9090b57cec5SDimitry Andric // Latch PrologLatch
9100b57cec5SDimitry Andric // NewExit PrologExit
9110b57cec5SDimitry Andric // EpilogPreHeader NewPreHeader
9120b57cec5SDimitry Andric // EpilogHeader Header
9130b57cec5SDimitry Andric // ... ...
9140b57cec5SDimitry Andric // EpilogLatch Latch
9150b57cec5SDimitry Andric // LatchExit LatchExit
9160b57cec5SDimitry Andric
9170b57cec5SDimitry Andric // Rewrite the cloned instruction operands to use the values created when the
9180b57cec5SDimitry Andric // clone is created.
9190b57cec5SDimitry Andric for (BasicBlock *BB : NewBlocks) {
9200b57cec5SDimitry Andric for (Instruction &I : *BB) {
9210b57cec5SDimitry Andric RemapInstruction(&I, VMap,
9220b57cec5SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
9230b57cec5SDimitry Andric }
9240b57cec5SDimitry Andric }
9250b57cec5SDimitry Andric
9260b57cec5SDimitry Andric if (UseEpilogRemainder) {
9270b57cec5SDimitry Andric // Connect the epilog code to the original loop and update the
9280b57cec5SDimitry Andric // PHI functions.
9290b57cec5SDimitry Andric ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
9300b57cec5SDimitry Andric EpilogPreHeader, NewPreHeader, VMap, DT, LI,
9310b57cec5SDimitry Andric PreserveLCSSA);
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric // Update counter in loop for unrolling.
9340b57cec5SDimitry Andric // I should be multiply of Count.
9350b57cec5SDimitry Andric IRBuilder<> B2(NewPreHeader->getTerminator());
9360b57cec5SDimitry Andric Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
9370b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
9380b57cec5SDimitry Andric B2.SetInsertPoint(LatchBR);
9390b57cec5SDimitry Andric PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
9400b57cec5SDimitry Andric Header->getFirstNonPHI());
9410b57cec5SDimitry Andric Value *IdxSub =
9420b57cec5SDimitry Andric B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
9430b57cec5SDimitry Andric NewIdx->getName() + ".nsub");
9440b57cec5SDimitry Andric Value *IdxCmp;
9450b57cec5SDimitry Andric if (LatchBR->getSuccessor(0) == Header)
9460b57cec5SDimitry Andric IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
9470b57cec5SDimitry Andric else
9480b57cec5SDimitry Andric IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
9490b57cec5SDimitry Andric NewIdx->addIncoming(TestVal, NewPreHeader);
9500b57cec5SDimitry Andric NewIdx->addIncoming(IdxSub, Latch);
9510b57cec5SDimitry Andric LatchBR->setCondition(IdxCmp);
9520b57cec5SDimitry Andric } else {
9530b57cec5SDimitry Andric // Connect the prolog code to the original loop and update the
9540b57cec5SDimitry Andric // PHI functions.
9550b57cec5SDimitry Andric ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
9560b57cec5SDimitry Andric NewPreHeader, VMap, DT, LI, PreserveLCSSA);
9570b57cec5SDimitry Andric }
9580b57cec5SDimitry Andric
9590b57cec5SDimitry Andric // If this loop is nested, then the loop unroller changes the code in the any
9600b57cec5SDimitry Andric // of its parent loops, so the Scalar Evolution pass needs to be run again.
9610b57cec5SDimitry Andric SE->forgetTopmostLoop(L);
9620b57cec5SDimitry Andric
9630b57cec5SDimitry Andric // Verify that the Dom Tree is correct.
9640b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
9650b57cec5SDimitry Andric if (DT)
9660b57cec5SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Full));
9670b57cec5SDimitry Andric #endif
9680b57cec5SDimitry Andric
9690b57cec5SDimitry Andric // Canonicalize to LoopSimplifyForm both original and remainder loops. We
9700b57cec5SDimitry Andric // cannot rely on the LoopUnrollPass to do this because it only does
9710b57cec5SDimitry Andric // canonicalization for parent/subloops and not the sibling loops.
9720b57cec5SDimitry Andric if (OtherExits.size() > 0) {
9730b57cec5SDimitry Andric // Generate dedicated exit blocks for the original loop, to preserve
9740b57cec5SDimitry Andric // LoopSimplifyForm.
9750b57cec5SDimitry Andric formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
9760b57cec5SDimitry Andric // Generate dedicated exit blocks for the remainder loop if one exists, to
9770b57cec5SDimitry Andric // preserve LoopSimplifyForm.
9780b57cec5SDimitry Andric if (remainderLoop)
9790b57cec5SDimitry Andric formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
9800b57cec5SDimitry Andric }
9810b57cec5SDimitry Andric
9820b57cec5SDimitry Andric auto UnrollResult = LoopUnrollResult::Unmodified;
9830b57cec5SDimitry Andric if (remainderLoop && UnrollRemainder) {
9840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
9850b57cec5SDimitry Andric UnrollResult =
9860b57cec5SDimitry Andric UnrollLoop(remainderLoop,
987*5f7ddb14SDimitry Andric {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
988*5f7ddb14SDimitry Andric /*AllowExpensiveTripCount*/ false,
989*5f7ddb14SDimitry Andric /*UnrollRemainder*/ false, ForgetAllSCEV},
9905ffd83dbSDimitry Andric LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
9910b57cec5SDimitry Andric }
9920b57cec5SDimitry Andric
9930b57cec5SDimitry Andric if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
9940b57cec5SDimitry Andric *ResultLoop = remainderLoop;
9950b57cec5SDimitry Andric NumRuntimeUnrolled++;
9960b57cec5SDimitry Andric return true;
9970b57cec5SDimitry Andric }
998