14ba319b5SDimitry Andric ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2f37b6182SDimitry Andric //
3f37b6182SDimitry Andric // The LLVM Compiler Infrastructure
4f37b6182SDimitry Andric //
5f37b6182SDimitry Andric // This file is distributed under the University of Illinois Open Source
6f37b6182SDimitry Andric // License. See LICENSE.TXT for details.
7f37b6182SDimitry Andric //
8f37b6182SDimitry Andric //===----------------------------------------------------------------------===//
9f37b6182SDimitry Andric
10db17bf38SDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
1160ff8e32SDimitry Andric #include "llvm/ADT/DenseMap.h"
12db17bf38SDimitry Andric #include "llvm/ADT/STLExtras.h"
1360ff8e32SDimitry Andric #include "llvm/ADT/Sequence.h"
1460ff8e32SDimitry Andric #include "llvm/ADT/SetVector.h"
15f37b6182SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
1660ff8e32SDimitry Andric #include "llvm/ADT/SmallVector.h"
17f37b6182SDimitry Andric #include "llvm/ADT/Statistic.h"
1860ff8e32SDimitry Andric #include "llvm/ADT/Twine.h"
19f37b6182SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
204ba319b5SDimitry Andric #include "llvm/Analysis/CFG.h"
212cab237bSDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
22*b5893f02SDimitry Andric #include "llvm/Analysis/GuardUtils.h"
234ba319b5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
2460ff8e32SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
25f37b6182SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
264ba319b5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
27f37b6182SDimitry Andric #include "llvm/Analysis/LoopPass.h"
28*b5893f02SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
29*b5893f02SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
304ba319b5SDimitry Andric #include "llvm/Analysis/Utils/Local.h"
3160ff8e32SDimitry Andric #include "llvm/IR/BasicBlock.h"
3260ff8e32SDimitry Andric #include "llvm/IR/Constant.h"
33f37b6182SDimitry Andric #include "llvm/IR/Constants.h"
34f37b6182SDimitry Andric #include "llvm/IR/Dominators.h"
35f37b6182SDimitry Andric #include "llvm/IR/Function.h"
3660ff8e32SDimitry Andric #include "llvm/IR/InstrTypes.h"
3760ff8e32SDimitry Andric #include "llvm/IR/Instruction.h"
38f37b6182SDimitry Andric #include "llvm/IR/Instructions.h"
392cab237bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
4060ff8e32SDimitry Andric #include "llvm/IR/Use.h"
4160ff8e32SDimitry Andric #include "llvm/IR/Value.h"
4260ff8e32SDimitry Andric #include "llvm/Pass.h"
4360ff8e32SDimitry Andric #include "llvm/Support/Casting.h"
44f37b6182SDimitry Andric #include "llvm/Support/Debug.h"
4560ff8e32SDimitry Andric #include "llvm/Support/ErrorHandling.h"
4660ff8e32SDimitry Andric #include "llvm/Support/GenericDomTree.h"
47f37b6182SDimitry Andric #include "llvm/Support/raw_ostream.h"
482cab237bSDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
49f37b6182SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
502cab237bSDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
51f37b6182SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
522cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
5360ff8e32SDimitry Andric #include <algorithm>
5460ff8e32SDimitry Andric #include <cassert>
5560ff8e32SDimitry Andric #include <iterator>
562cab237bSDimitry Andric #include <numeric>
5760ff8e32SDimitry Andric #include <utility>
58f37b6182SDimitry Andric
59f37b6182SDimitry Andric #define DEBUG_TYPE "simple-loop-unswitch"
60f37b6182SDimitry Andric
61f37b6182SDimitry Andric using namespace llvm;
62f37b6182SDimitry Andric
63f37b6182SDimitry Andric STATISTIC(NumBranches, "Number of branches unswitched");
64f37b6182SDimitry Andric STATISTIC(NumSwitches, "Number of switches unswitched");
65*b5893f02SDimitry Andric STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
66f37b6182SDimitry Andric STATISTIC(NumTrivial, "Number of unswitches that are trivial");
67*b5893f02SDimitry Andric STATISTIC(
68*b5893f02SDimitry Andric NumCostMultiplierSkipped,
69*b5893f02SDimitry Andric "Number of unswitch candidates that had their cost multiplier skipped");
70f37b6182SDimitry Andric
712cab237bSDimitry Andric static cl::opt<bool> EnableNonTrivialUnswitch(
722cab237bSDimitry Andric "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
732cab237bSDimitry Andric cl::desc("Forcibly enables non-trivial loop unswitching rather than "
742cab237bSDimitry Andric "following the configuration passed into the pass."));
752cab237bSDimitry Andric
762cab237bSDimitry Andric static cl::opt<int>
772cab237bSDimitry Andric UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
782cab237bSDimitry Andric cl::desc("The cost threshold for unswitching a loop."));
792cab237bSDimitry Andric
80*b5893f02SDimitry Andric static cl::opt<bool> EnableUnswitchCostMultiplier(
81*b5893f02SDimitry Andric "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
82*b5893f02SDimitry Andric cl::desc("Enable unswitch cost multiplier that prohibits exponential "
83*b5893f02SDimitry Andric "explosion in nontrivial unswitch."));
84*b5893f02SDimitry Andric static cl::opt<int> UnswitchSiblingsToplevelDiv(
85*b5893f02SDimitry Andric "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
86*b5893f02SDimitry Andric cl::desc("Toplevel siblings divisor for cost multiplier."));
87*b5893f02SDimitry Andric static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
88*b5893f02SDimitry Andric "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
89*b5893f02SDimitry Andric cl::desc("Number of unswitch candidates that are ignored when calculating "
90*b5893f02SDimitry Andric "cost multiplier."));
91*b5893f02SDimitry Andric static cl::opt<bool> UnswitchGuards(
92*b5893f02SDimitry Andric "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
93*b5893f02SDimitry Andric cl::desc("If enabled, simple loop unswitching will also consider "
94*b5893f02SDimitry Andric "llvm.experimental.guard intrinsics as unswitch candidates."));
95*b5893f02SDimitry Andric
964ba319b5SDimitry Andric /// Collect all of the loop invariant input values transitively used by the
974ba319b5SDimitry Andric /// homogeneous instruction graph from a given root.
984ba319b5SDimitry Andric ///
994ba319b5SDimitry Andric /// This essentially walks from a root recursively through loop variant operands
1004ba319b5SDimitry Andric /// which have the exact same opcode and finds all inputs which are loop
1014ba319b5SDimitry Andric /// invariant. For some operations these can be re-associated and unswitched out
1024ba319b5SDimitry Andric /// of the loop entirely.
1034ba319b5SDimitry Andric static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop & L,Instruction & Root,LoopInfo & LI)1044ba319b5SDimitry Andric collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
1054ba319b5SDimitry Andric LoopInfo &LI) {
1064ba319b5SDimitry Andric assert(!L.isLoopInvariant(&Root) &&
1074ba319b5SDimitry Andric "Only need to walk the graph if root itself is not invariant.");
1084ba319b5SDimitry Andric TinyPtrVector<Value *> Invariants;
1094ba319b5SDimitry Andric
1104ba319b5SDimitry Andric // Build a worklist and recurse through operators collecting invariants.
1114ba319b5SDimitry Andric SmallVector<Instruction *, 4> Worklist;
1124ba319b5SDimitry Andric SmallPtrSet<Instruction *, 8> Visited;
1134ba319b5SDimitry Andric Worklist.push_back(&Root);
1144ba319b5SDimitry Andric Visited.insert(&Root);
1154ba319b5SDimitry Andric do {
1164ba319b5SDimitry Andric Instruction &I = *Worklist.pop_back_val();
1174ba319b5SDimitry Andric for (Value *OpV : I.operand_values()) {
1184ba319b5SDimitry Andric // Skip constants as unswitching isn't interesting for them.
1194ba319b5SDimitry Andric if (isa<Constant>(OpV))
1204ba319b5SDimitry Andric continue;
1214ba319b5SDimitry Andric
1224ba319b5SDimitry Andric // Add it to our result if loop invariant.
1234ba319b5SDimitry Andric if (L.isLoopInvariant(OpV)) {
1244ba319b5SDimitry Andric Invariants.push_back(OpV);
1254ba319b5SDimitry Andric continue;
1264ba319b5SDimitry Andric }
1274ba319b5SDimitry Andric
1284ba319b5SDimitry Andric // If not an instruction with the same opcode, nothing we can do.
1294ba319b5SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(OpV);
1304ba319b5SDimitry Andric if (!OpI || OpI->getOpcode() != Root.getOpcode())
1314ba319b5SDimitry Andric continue;
1324ba319b5SDimitry Andric
1334ba319b5SDimitry Andric // Visit this operand.
1344ba319b5SDimitry Andric if (Visited.insert(OpI).second)
1354ba319b5SDimitry Andric Worklist.push_back(OpI);
1364ba319b5SDimitry Andric }
1374ba319b5SDimitry Andric } while (!Worklist.empty());
1384ba319b5SDimitry Andric
1394ba319b5SDimitry Andric return Invariants;
1404ba319b5SDimitry Andric }
1414ba319b5SDimitry Andric
replaceLoopInvariantUses(Loop & L,Value * Invariant,Constant & Replacement)1424ba319b5SDimitry Andric static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
143f37b6182SDimitry Andric Constant &Replacement) {
1444ba319b5SDimitry Andric assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
145f37b6182SDimitry Andric
146f37b6182SDimitry Andric // Replace uses of LIC in the loop with the given constant.
1474ba319b5SDimitry Andric for (auto UI = Invariant->use_begin(), UE = Invariant->use_end(); UI != UE;) {
148f37b6182SDimitry Andric // Grab the use and walk past it so we can clobber it in the use list.
149f37b6182SDimitry Andric Use *U = &*UI++;
150f37b6182SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U->getUser());
151f37b6182SDimitry Andric
152f37b6182SDimitry Andric // Replace this use within the loop body.
1534ba319b5SDimitry Andric if (UserI && L.contains(UserI))
1544ba319b5SDimitry Andric U->set(&Replacement);
155f37b6182SDimitry Andric }
156f37b6182SDimitry Andric }
157f37b6182SDimitry Andric
1585517e702SDimitry Andric /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
1595517e702SDimitry Andric /// incoming values along this edge.
areLoopExitPHIsLoopInvariant(Loop & L,BasicBlock & ExitingBB,BasicBlock & ExitBB)1605517e702SDimitry Andric static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
1615517e702SDimitry Andric BasicBlock &ExitBB) {
1625517e702SDimitry Andric for (Instruction &I : ExitBB) {
1635517e702SDimitry Andric auto *PN = dyn_cast<PHINode>(&I);
1645517e702SDimitry Andric if (!PN)
1655517e702SDimitry Andric // No more PHIs to check.
1665517e702SDimitry Andric return true;
1675517e702SDimitry Andric
1685517e702SDimitry Andric // If the incoming value for this edge isn't loop invariant the unswitch
1695517e702SDimitry Andric // won't be trivial.
1705517e702SDimitry Andric if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
1715517e702SDimitry Andric return false;
1725517e702SDimitry Andric }
1735517e702SDimitry Andric llvm_unreachable("Basic blocks should never be empty!");
1745517e702SDimitry Andric }
1755517e702SDimitry Andric
1764ba319b5SDimitry Andric /// Insert code to test a set of loop invariant values, and conditionally branch
1774ba319b5SDimitry Andric /// on them.
buildPartialUnswitchConditionalBranch(BasicBlock & BB,ArrayRef<Value * > Invariants,bool Direction,BasicBlock & UnswitchedSucc,BasicBlock & NormalSucc)1784ba319b5SDimitry Andric static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
1794ba319b5SDimitry Andric ArrayRef<Value *> Invariants,
1804ba319b5SDimitry Andric bool Direction,
1814ba319b5SDimitry Andric BasicBlock &UnswitchedSucc,
1824ba319b5SDimitry Andric BasicBlock &NormalSucc) {
1834ba319b5SDimitry Andric IRBuilder<> IRB(&BB);
1844ba319b5SDimitry Andric Value *Cond = Invariants.front();
1854ba319b5SDimitry Andric for (Value *Invariant :
1864ba319b5SDimitry Andric make_range(std::next(Invariants.begin()), Invariants.end()))
1874ba319b5SDimitry Andric if (Direction)
1884ba319b5SDimitry Andric Cond = IRB.CreateOr(Cond, Invariant);
1894ba319b5SDimitry Andric else
1904ba319b5SDimitry Andric Cond = IRB.CreateAnd(Cond, Invariant);
1914ba319b5SDimitry Andric
1924ba319b5SDimitry Andric IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
1934ba319b5SDimitry Andric Direction ? &NormalSucc : &UnswitchedSucc);
1944ba319b5SDimitry Andric }
1954ba319b5SDimitry Andric
1965517e702SDimitry Andric /// Rewrite the PHI nodes in an unswitched loop exit basic block.
1975517e702SDimitry Andric ///
1985517e702SDimitry Andric /// Requires that the loop exit and unswitched basic block are the same, and
1995517e702SDimitry Andric /// that the exiting block was a unique predecessor of that block. Rewrites the
2005517e702SDimitry Andric /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
2015517e702SDimitry Andric /// PHI nodes from the old preheader that now contains the unswitched
2025517e702SDimitry Andric /// terminator.
rewritePHINodesForUnswitchedExitBlock(BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH)2035517e702SDimitry Andric static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
2045517e702SDimitry Andric BasicBlock &OldExitingBB,
2055517e702SDimitry Andric BasicBlock &OldPH) {
20630785c0eSDimitry Andric for (PHINode &PN : UnswitchedBB.phis()) {
2075517e702SDimitry Andric // When the loop exit is directly unswitched we just need to update the
2085517e702SDimitry Andric // incoming basic block. We loop to handle weird cases with repeated
2095517e702SDimitry Andric // incoming blocks, but expect to typically only have one operand here.
21030785c0eSDimitry Andric for (auto i : seq<int>(0, PN.getNumOperands())) {
21130785c0eSDimitry Andric assert(PN.getIncomingBlock(i) == &OldExitingBB &&
2125517e702SDimitry Andric "Found incoming block different from unique predecessor!");
21330785c0eSDimitry Andric PN.setIncomingBlock(i, &OldPH);
2145517e702SDimitry Andric }
2155517e702SDimitry Andric }
2165517e702SDimitry Andric }
2175517e702SDimitry Andric
2185517e702SDimitry Andric /// Rewrite the PHI nodes in the loop exit basic block and the split off
2195517e702SDimitry Andric /// unswitched block.
2205517e702SDimitry Andric ///
2215517e702SDimitry Andric /// Because the exit block remains an exit from the loop, this rewrites the
2225517e702SDimitry Andric /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
2235517e702SDimitry Andric /// nodes into the unswitched basic block to select between the value in the
2245517e702SDimitry Andric /// old preheader and the loop exit.
rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock & ExitBB,BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH,bool FullUnswitch)2255517e702SDimitry Andric static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
2265517e702SDimitry Andric BasicBlock &UnswitchedBB,
2275517e702SDimitry Andric BasicBlock &OldExitingBB,
2284ba319b5SDimitry Andric BasicBlock &OldPH,
2294ba319b5SDimitry Andric bool FullUnswitch) {
2305517e702SDimitry Andric assert(&ExitBB != &UnswitchedBB &&
2315517e702SDimitry Andric "Must have different loop exit and unswitched blocks!");
2325517e702SDimitry Andric Instruction *InsertPt = &*UnswitchedBB.begin();
23330785c0eSDimitry Andric for (PHINode &PN : ExitBB.phis()) {
23430785c0eSDimitry Andric auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
23530785c0eSDimitry Andric PN.getName() + ".split", InsertPt);
2365517e702SDimitry Andric
2375517e702SDimitry Andric // Walk backwards over the old PHI node's inputs to minimize the cost of
2385517e702SDimitry Andric // removing each one. We have to do this weird loop manually so that we
2395517e702SDimitry Andric // create the same number of new incoming edges in the new PHI as we expect
2405517e702SDimitry Andric // each case-based edge to be included in the unswitched switch in some
2415517e702SDimitry Andric // cases.
2425517e702SDimitry Andric // FIXME: This is really, really gross. It would be much cleaner if LLVM
2435517e702SDimitry Andric // allowed us to create a single entry for a predecessor block without
2445517e702SDimitry Andric // having separate entries for each "edge" even though these edges are
2455517e702SDimitry Andric // required to produce identical results.
24630785c0eSDimitry Andric for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
24730785c0eSDimitry Andric if (PN.getIncomingBlock(i) != &OldExitingBB)
2485517e702SDimitry Andric continue;
2495517e702SDimitry Andric
2504ba319b5SDimitry Andric Value *Incoming = PN.getIncomingValue(i);
2514ba319b5SDimitry Andric if (FullUnswitch)
2524ba319b5SDimitry Andric // No more edge from the old exiting block to the exit block.
2534ba319b5SDimitry Andric PN.removeIncomingValue(i);
2544ba319b5SDimitry Andric
2555517e702SDimitry Andric NewPN->addIncoming(Incoming, &OldPH);
2565517e702SDimitry Andric }
2575517e702SDimitry Andric
2585517e702SDimitry Andric // Now replace the old PHI with the new one and wire the old one in as an
2595517e702SDimitry Andric // input to the new one.
26030785c0eSDimitry Andric PN.replaceAllUsesWith(NewPN);
26130785c0eSDimitry Andric NewPN->addIncoming(&PN, &ExitBB);
2625517e702SDimitry Andric }
2635517e702SDimitry Andric }
2645517e702SDimitry Andric
2654ba319b5SDimitry Andric /// Hoist the current loop up to the innermost loop containing a remaining exit.
2664ba319b5SDimitry Andric ///
2674ba319b5SDimitry Andric /// Because we've removed an exit from the loop, we may have changed the set of
2684ba319b5SDimitry Andric /// loops reachable and need to move the current loop up the loop nest or even
2694ba319b5SDimitry Andric /// to an entirely separate nest.
hoistLoopToNewParent(Loop & L,BasicBlock & Preheader,DominatorTree & DT,LoopInfo & LI)2704ba319b5SDimitry Andric static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
2714ba319b5SDimitry Andric DominatorTree &DT, LoopInfo &LI) {
2724ba319b5SDimitry Andric // If the loop is already at the top level, we can't hoist it anywhere.
2734ba319b5SDimitry Andric Loop *OldParentL = L.getParentLoop();
2744ba319b5SDimitry Andric if (!OldParentL)
2754ba319b5SDimitry Andric return;
2764ba319b5SDimitry Andric
2774ba319b5SDimitry Andric SmallVector<BasicBlock *, 4> Exits;
2784ba319b5SDimitry Andric L.getExitBlocks(Exits);
2794ba319b5SDimitry Andric Loop *NewParentL = nullptr;
2804ba319b5SDimitry Andric for (auto *ExitBB : Exits)
2814ba319b5SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB))
2824ba319b5SDimitry Andric if (!NewParentL || NewParentL->contains(ExitL))
2834ba319b5SDimitry Andric NewParentL = ExitL;
2844ba319b5SDimitry Andric
2854ba319b5SDimitry Andric if (NewParentL == OldParentL)
2864ba319b5SDimitry Andric return;
2874ba319b5SDimitry Andric
2884ba319b5SDimitry Andric // The new parent loop (if different) should always contain the old one.
2894ba319b5SDimitry Andric if (NewParentL)
2904ba319b5SDimitry Andric assert(NewParentL->contains(OldParentL) &&
2914ba319b5SDimitry Andric "Can only hoist this loop up the nest!");
2924ba319b5SDimitry Andric
2934ba319b5SDimitry Andric // The preheader will need to move with the body of this loop. However,
2944ba319b5SDimitry Andric // because it isn't in this loop we also need to update the primary loop map.
2954ba319b5SDimitry Andric assert(OldParentL == LI.getLoopFor(&Preheader) &&
2964ba319b5SDimitry Andric "Parent loop of this loop should contain this loop's preheader!");
2974ba319b5SDimitry Andric LI.changeLoopFor(&Preheader, NewParentL);
2984ba319b5SDimitry Andric
2994ba319b5SDimitry Andric // Remove this loop from its old parent.
3004ba319b5SDimitry Andric OldParentL->removeChildLoop(&L);
3014ba319b5SDimitry Andric
3024ba319b5SDimitry Andric // Add the loop either to the new parent or as a top-level loop.
3034ba319b5SDimitry Andric if (NewParentL)
3044ba319b5SDimitry Andric NewParentL->addChildLoop(&L);
3054ba319b5SDimitry Andric else
3064ba319b5SDimitry Andric LI.addTopLevelLoop(&L);
3074ba319b5SDimitry Andric
3084ba319b5SDimitry Andric // Remove this loops blocks from the old parent and every other loop up the
3094ba319b5SDimitry Andric // nest until reaching the new parent. Also update all of these
3104ba319b5SDimitry Andric // no-longer-containing loops to reflect the nesting change.
3114ba319b5SDimitry Andric for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
3124ba319b5SDimitry Andric OldContainingL = OldContainingL->getParentLoop()) {
3134ba319b5SDimitry Andric llvm::erase_if(OldContainingL->getBlocksVector(),
3144ba319b5SDimitry Andric [&](const BasicBlock *BB) {
3154ba319b5SDimitry Andric return BB == &Preheader || L.contains(BB);
3164ba319b5SDimitry Andric });
3174ba319b5SDimitry Andric
3184ba319b5SDimitry Andric OldContainingL->getBlocksSet().erase(&Preheader);
3194ba319b5SDimitry Andric for (BasicBlock *BB : L.blocks())
3204ba319b5SDimitry Andric OldContainingL->getBlocksSet().erase(BB);
3214ba319b5SDimitry Andric
3224ba319b5SDimitry Andric // Because we just hoisted a loop out of this one, we have essentially
3234ba319b5SDimitry Andric // created new exit paths from it. That means we need to form LCSSA PHI
3244ba319b5SDimitry Andric // nodes for values used in the no-longer-nested loop.
3254ba319b5SDimitry Andric formLCSSA(*OldContainingL, DT, &LI, nullptr);
3264ba319b5SDimitry Andric
3274ba319b5SDimitry Andric // We shouldn't need to form dedicated exits because the exit introduced
328*b5893f02SDimitry Andric // here is the (just split by unswitching) preheader. However, after trivial
329*b5893f02SDimitry Andric // unswitching it is possible to get new non-dedicated exits out of parent
330*b5893f02SDimitry Andric // loop so let's conservatively form dedicated exit blocks and figure out
331*b5893f02SDimitry Andric // if we can optimize later.
332*b5893f02SDimitry Andric formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
3334ba319b5SDimitry Andric }
3344ba319b5SDimitry Andric }
3354ba319b5SDimitry Andric
336f37b6182SDimitry Andric /// Unswitch a trivial branch if the condition is loop invariant.
337f37b6182SDimitry Andric ///
338f37b6182SDimitry Andric /// This routine should only be called when loop code leading to the branch has
339f37b6182SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
340f37b6182SDimitry Andric /// condition is invariant and one of the successors is a loop exit. This
341f37b6182SDimitry Andric /// allows us to unswitch without duplicating the loop, making it trivial.
342f37b6182SDimitry Andric ///
343f37b6182SDimitry Andric /// If this routine fails to unswitch the branch it returns false.
344f37b6182SDimitry Andric ///
345f37b6182SDimitry Andric /// If the branch can be unswitched, this routine splits the preheader and
346f37b6182SDimitry Andric /// hoists the branch above that split. Preserves loop simplified form
347f37b6182SDimitry Andric /// (splitting the exit block as necessary). It simplifies the branch within
348f37b6182SDimitry Andric /// the loop to an unconditional branch but doesn't remove it entirely. Further
349f37b6182SDimitry Andric /// cleanup can be done with some simplify-cfg like pass.
3504ba319b5SDimitry Andric ///
3514ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
3524ba319b5SDimitry Andric /// invalidated by this.
unswitchTrivialBranch(Loop & L,BranchInst & BI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)353f37b6182SDimitry Andric static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
354*b5893f02SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
355*b5893f02SDimitry Andric MemorySSAUpdater *MSSAU) {
356f37b6182SDimitry Andric assert(BI.isConditional() && "Can only unswitch a conditional branch!");
3574ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
358f37b6182SDimitry Andric
3594ba319b5SDimitry Andric // The loop invariant values that we want to unswitch.
3604ba319b5SDimitry Andric TinyPtrVector<Value *> Invariants;
361f37b6182SDimitry Andric
3624ba319b5SDimitry Andric // When true, we're fully unswitching the branch rather than just unswitching
3634ba319b5SDimitry Andric // some input conditions to the branch.
3644ba319b5SDimitry Andric bool FullUnswitch = false;
3654ba319b5SDimitry Andric
3664ba319b5SDimitry Andric if (L.isLoopInvariant(BI.getCondition())) {
3674ba319b5SDimitry Andric Invariants.push_back(BI.getCondition());
3684ba319b5SDimitry Andric FullUnswitch = true;
3694ba319b5SDimitry Andric } else {
3704ba319b5SDimitry Andric if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
3714ba319b5SDimitry Andric Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
3724ba319b5SDimitry Andric if (Invariants.empty())
3734ba319b5SDimitry Andric // Couldn't find invariant inputs!
374f37b6182SDimitry Andric return false;
3754ba319b5SDimitry Andric }
376f37b6182SDimitry Andric
3774ba319b5SDimitry Andric // Check that one of the branch's successors exits, and which one.
3784ba319b5SDimitry Andric bool ExitDirection = true;
379f37b6182SDimitry Andric int LoopExitSuccIdx = 0;
380f37b6182SDimitry Andric auto *LoopExitBB = BI.getSuccessor(0);
3814ba319b5SDimitry Andric if (L.contains(LoopExitBB)) {
3824ba319b5SDimitry Andric ExitDirection = false;
383f37b6182SDimitry Andric LoopExitSuccIdx = 1;
384f37b6182SDimitry Andric LoopExitBB = BI.getSuccessor(1);
3854ba319b5SDimitry Andric if (L.contains(LoopExitBB))
386f37b6182SDimitry Andric return false;
387f37b6182SDimitry Andric }
388f37b6182SDimitry Andric auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
3895517e702SDimitry Andric auto *ParentBB = BI.getParent();
3905517e702SDimitry Andric if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
391f37b6182SDimitry Andric return false;
392f37b6182SDimitry Andric
3934ba319b5SDimitry Andric // When unswitching only part of the branch's condition, we need the exit
3944ba319b5SDimitry Andric // block to be reached directly from the partially unswitched input. This can
3954ba319b5SDimitry Andric // be done when the exit block is along the true edge and the branch condition
3964ba319b5SDimitry Andric // is a graph of `or` operations, or the exit block is along the false edge
3974ba319b5SDimitry Andric // and the condition is a graph of `and` operations.
3984ba319b5SDimitry Andric if (!FullUnswitch) {
3994ba319b5SDimitry Andric if (ExitDirection) {
4004ba319b5SDimitry Andric if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or)
4014ba319b5SDimitry Andric return false;
4024ba319b5SDimitry Andric } else {
4034ba319b5SDimitry Andric if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::And)
4044ba319b5SDimitry Andric return false;
4054ba319b5SDimitry Andric }
4064ba319b5SDimitry Andric }
4074ba319b5SDimitry Andric
4084ba319b5SDimitry Andric LLVM_DEBUG({
4094ba319b5SDimitry Andric dbgs() << " unswitching trivial invariant conditions for: " << BI
4104ba319b5SDimitry Andric << "\n";
4114ba319b5SDimitry Andric for (Value *Invariant : Invariants) {
4124ba319b5SDimitry Andric dbgs() << " " << *Invariant << " == true";
4134ba319b5SDimitry Andric if (Invariant != Invariants.back())
4144ba319b5SDimitry Andric dbgs() << " ||";
4154ba319b5SDimitry Andric dbgs() << "\n";
4164ba319b5SDimitry Andric }
4174ba319b5SDimitry Andric });
4184ba319b5SDimitry Andric
4194ba319b5SDimitry Andric // If we have scalar evolutions, we need to invalidate them including this
4204ba319b5SDimitry Andric // loop and the loop containing the exit block.
4214ba319b5SDimitry Andric if (SE) {
4224ba319b5SDimitry Andric if (Loop *ExitL = LI.getLoopFor(LoopExitBB))
4234ba319b5SDimitry Andric SE->forgetLoop(ExitL);
4244ba319b5SDimitry Andric else
4254ba319b5SDimitry Andric // Forget the entire nest as this exits the entire nest.
4264ba319b5SDimitry Andric SE->forgetTopmostLoop(&L);
4274ba319b5SDimitry Andric }
428f37b6182SDimitry Andric
429*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
430*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
431*b5893f02SDimitry Andric
432f37b6182SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
433f37b6182SDimitry Andric // the conditional branch. We will change the preheader to have a conditional
434f37b6182SDimitry Andric // branch on LoopCond.
435f37b6182SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader();
436*b5893f02SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
437f37b6182SDimitry Andric
438f37b6182SDimitry Andric // Now that we have a place to insert the conditional branch, create a place
439f37b6182SDimitry Andric // to branch to: this is the exit block out of the loop that we are
440f37b6182SDimitry Andric // unswitching. We need to split this if there are other loop predecessors.
441f37b6182SDimitry Andric // Because the loop is in simplified form, *any* other predecessor is enough.
442f37b6182SDimitry Andric BasicBlock *UnswitchedBB;
4434ba319b5SDimitry Andric if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
4444ba319b5SDimitry Andric assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
4455517e702SDimitry Andric "A branch's parent isn't a predecessor!");
446f37b6182SDimitry Andric UnswitchedBB = LoopExitBB;
447f37b6182SDimitry Andric } else {
448*b5893f02SDimitry Andric UnswitchedBB =
449*b5893f02SDimitry Andric SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
450f37b6182SDimitry Andric }
451f37b6182SDimitry Andric
452*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
453*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
454*b5893f02SDimitry Andric
4554ba319b5SDimitry Andric // Actually move the invariant uses into the unswitched position. If possible,
4564ba319b5SDimitry Andric // we do this by moving the instructions, but when doing partial unswitching
4574ba319b5SDimitry Andric // we do it by building a new merge of the values in the unswitched position.
458f37b6182SDimitry Andric OldPH->getTerminator()->eraseFromParent();
4594ba319b5SDimitry Andric if (FullUnswitch) {
4604ba319b5SDimitry Andric // If fully unswitching, we can use the existing branch instruction.
4614ba319b5SDimitry Andric // Splice it into the old PH to gate reaching the new preheader and re-point
4624ba319b5SDimitry Andric // its successors.
4634ba319b5SDimitry Andric OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
4644ba319b5SDimitry Andric BI);
465*b5893f02SDimitry Andric if (MSSAU) {
466*b5893f02SDimitry Andric // Temporarily clone the terminator, to make MSSA update cheaper by
467*b5893f02SDimitry Andric // separating "insert edge" updates from "remove edge" ones.
468*b5893f02SDimitry Andric ParentBB->getInstList().push_back(BI.clone());
469*b5893f02SDimitry Andric } else {
470f37b6182SDimitry Andric // Create a new unconditional branch that will continue the loop as a new
471f37b6182SDimitry Andric // terminator.
472f37b6182SDimitry Andric BranchInst::Create(ContinueBB, ParentBB);
473*b5893f02SDimitry Andric }
474*b5893f02SDimitry Andric BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
475*b5893f02SDimitry Andric BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
4764ba319b5SDimitry Andric } else {
4774ba319b5SDimitry Andric // Only unswitching a subset of inputs to the condition, so we will need to
4784ba319b5SDimitry Andric // build a new branch that merges the invariant inputs.
4794ba319b5SDimitry Andric if (ExitDirection)
4804ba319b5SDimitry Andric assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
4814ba319b5SDimitry Andric Instruction::Or &&
4824ba319b5SDimitry Andric "Must have an `or` of `i1`s for the condition!");
4834ba319b5SDimitry Andric else
4844ba319b5SDimitry Andric assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
4854ba319b5SDimitry Andric Instruction::And &&
4864ba319b5SDimitry Andric "Must have an `and` of `i1`s for the condition!");
4874ba319b5SDimitry Andric buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
4884ba319b5SDimitry Andric *UnswitchedBB, *NewPH);
4894ba319b5SDimitry Andric }
490f37b6182SDimitry Andric
491*b5893f02SDimitry Andric // Update the dominator tree with the added edge.
492*b5893f02SDimitry Andric DT.insertEdge(OldPH, UnswitchedBB);
493*b5893f02SDimitry Andric
494*b5893f02SDimitry Andric // After the dominator tree was updated with the added edge, update MemorySSA
495*b5893f02SDimitry Andric // if available.
496*b5893f02SDimitry Andric if (MSSAU) {
497*b5893f02SDimitry Andric SmallVector<CFGUpdate, 1> Updates;
498*b5893f02SDimitry Andric Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
499*b5893f02SDimitry Andric MSSAU->applyInsertUpdates(Updates, DT);
500*b5893f02SDimitry Andric }
501*b5893f02SDimitry Andric
502*b5893f02SDimitry Andric // Finish updating dominator tree and memory ssa for full unswitch.
503*b5893f02SDimitry Andric if (FullUnswitch) {
504*b5893f02SDimitry Andric if (MSSAU) {
505*b5893f02SDimitry Andric // Remove the cloned branch instruction.
506*b5893f02SDimitry Andric ParentBB->getTerminator()->eraseFromParent();
507*b5893f02SDimitry Andric // Create unconditional branch now.
508*b5893f02SDimitry Andric BranchInst::Create(ContinueBB, ParentBB);
509*b5893f02SDimitry Andric MSSAU->removeEdge(ParentBB, LoopExitBB);
510*b5893f02SDimitry Andric }
511*b5893f02SDimitry Andric DT.deleteEdge(ParentBB, LoopExitBB);
512*b5893f02SDimitry Andric }
513*b5893f02SDimitry Andric
514*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
515*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
516*b5893f02SDimitry Andric
5175517e702SDimitry Andric // Rewrite the relevant PHI nodes.
5185517e702SDimitry Andric if (UnswitchedBB == LoopExitBB)
5195517e702SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
5205517e702SDimitry Andric else
5215517e702SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
5224ba319b5SDimitry Andric *ParentBB, *OldPH, FullUnswitch);
5235517e702SDimitry Andric
5244ba319b5SDimitry Andric // The constant we can replace all of our invariants with inside the loop
5254ba319b5SDimitry Andric // body. If any of the invariants have a value other than this the loop won't
5264ba319b5SDimitry Andric // be entered.
5274ba319b5SDimitry Andric ConstantInt *Replacement = ExitDirection
5284ba319b5SDimitry Andric ? ConstantInt::getFalse(BI.getContext())
5294ba319b5SDimitry Andric : ConstantInt::getTrue(BI.getContext());
530f37b6182SDimitry Andric
531f37b6182SDimitry Andric // Since this is an i1 condition we can also trivially replace uses of it
532f37b6182SDimitry Andric // within the loop with a constant.
5334ba319b5SDimitry Andric for (Value *Invariant : Invariants)
5344ba319b5SDimitry Andric replaceLoopInvariantUses(L, Invariant, *Replacement);
5354ba319b5SDimitry Andric
5364ba319b5SDimitry Andric // If this was full unswitching, we may have changed the nesting relationship
5374ba319b5SDimitry Andric // for this loop so hoist it to its correct parent if needed.
5384ba319b5SDimitry Andric if (FullUnswitch)
5394ba319b5SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI);
540f37b6182SDimitry Andric
541*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
542f37b6182SDimitry Andric ++NumTrivial;
543f37b6182SDimitry Andric ++NumBranches;
544f37b6182SDimitry Andric return true;
545f37b6182SDimitry Andric }
546f37b6182SDimitry Andric
547f37b6182SDimitry Andric /// Unswitch a trivial switch if the condition is loop invariant.
548f37b6182SDimitry Andric ///
549f37b6182SDimitry Andric /// This routine should only be called when loop code leading to the switch has
550f37b6182SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
551f37b6182SDimitry Andric /// condition is invariant and that at least one of the successors is a loop
552f37b6182SDimitry Andric /// exit. This allows us to unswitch without duplicating the loop, making it
553f37b6182SDimitry Andric /// trivial.
554f37b6182SDimitry Andric ///
555f37b6182SDimitry Andric /// If this routine fails to unswitch the switch it returns false.
556f37b6182SDimitry Andric ///
557f37b6182SDimitry Andric /// If the switch can be unswitched, this routine splits the preheader and
558f37b6182SDimitry Andric /// copies the switch above that split. If the default case is one of the
559f37b6182SDimitry Andric /// exiting cases, it copies the non-exiting cases and points them at the new
560f37b6182SDimitry Andric /// preheader. If the default case is not exiting, it copies the exiting cases
561f37b6182SDimitry Andric /// and points the default at the preheader. It preserves loop simplified form
562f37b6182SDimitry Andric /// (splitting the exit blocks as necessary). It simplifies the switch within
563f37b6182SDimitry Andric /// the loop by removing now-dead cases. If the default case is one of those
564f37b6182SDimitry Andric /// unswitched, it replaces its destination with a new basic block containing
565f37b6182SDimitry Andric /// only unreachable. Such basic blocks, while technically loop exits, are not
566f37b6182SDimitry Andric /// considered for unswitching so this is a stable transform and the same
567f37b6182SDimitry Andric /// switch will not be revisited. If after unswitching there is only a single
568f37b6182SDimitry Andric /// in-loop successor, the switch is further simplified to an unconditional
569f37b6182SDimitry Andric /// branch. Still more cleanup can be done with some simplify-cfg like pass.
5704ba319b5SDimitry Andric ///
5714ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
5724ba319b5SDimitry Andric /// invalidated by this.
unswitchTrivialSwitch(Loop & L,SwitchInst & SI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)573f37b6182SDimitry Andric static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
574*b5893f02SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
575*b5893f02SDimitry Andric MemorySSAUpdater *MSSAU) {
5764ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
577f37b6182SDimitry Andric Value *LoopCond = SI.getCondition();
578f37b6182SDimitry Andric
579f37b6182SDimitry Andric // If this isn't switching on an invariant condition, we can't unswitch it.
580f37b6182SDimitry Andric if (!L.isLoopInvariant(LoopCond))
581f37b6182SDimitry Andric return false;
582f37b6182SDimitry Andric
5835517e702SDimitry Andric auto *ParentBB = SI.getParent();
5845517e702SDimitry Andric
585f37b6182SDimitry Andric SmallVector<int, 4> ExitCaseIndices;
586f37b6182SDimitry Andric for (auto Case : SI.cases()) {
587f37b6182SDimitry Andric auto *SuccBB = Case.getCaseSuccessor();
5884ba319b5SDimitry Andric if (!L.contains(SuccBB) &&
5895517e702SDimitry Andric areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
590f37b6182SDimitry Andric ExitCaseIndices.push_back(Case.getCaseIndex());
591f37b6182SDimitry Andric }
592f37b6182SDimitry Andric BasicBlock *DefaultExitBB = nullptr;
5934ba319b5SDimitry Andric if (!L.contains(SI.getDefaultDest()) &&
5945517e702SDimitry Andric areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
595f37b6182SDimitry Andric !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
596f37b6182SDimitry Andric DefaultExitBB = SI.getDefaultDest();
597f37b6182SDimitry Andric else if (ExitCaseIndices.empty())
598f37b6182SDimitry Andric return false;
599f37b6182SDimitry Andric
600*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
601*b5893f02SDimitry Andric
602*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
603*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
604f37b6182SDimitry Andric
6054ba319b5SDimitry Andric // We may need to invalidate SCEVs for the outermost loop reached by any of
6064ba319b5SDimitry Andric // the exits.
6074ba319b5SDimitry Andric Loop *OuterL = &L;
6084ba319b5SDimitry Andric
6094ba319b5SDimitry Andric if (DefaultExitBB) {
6104ba319b5SDimitry Andric // Clear out the default destination temporarily to allow accurate
6114ba319b5SDimitry Andric // predecessor lists to be examined below.
6124ba319b5SDimitry Andric SI.setDefaultDest(nullptr);
6134ba319b5SDimitry Andric // Check the loop containing this exit.
6144ba319b5SDimitry Andric Loop *ExitL = LI.getLoopFor(DefaultExitBB);
6154ba319b5SDimitry Andric if (!ExitL || ExitL->contains(OuterL))
6164ba319b5SDimitry Andric OuterL = ExitL;
6174ba319b5SDimitry Andric }
6184ba319b5SDimitry Andric
6194ba319b5SDimitry Andric // Store the exit cases into a separate data structure and remove them from
6204ba319b5SDimitry Andric // the switch.
621f37b6182SDimitry Andric SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
622f37b6182SDimitry Andric ExitCases.reserve(ExitCaseIndices.size());
623f37b6182SDimitry Andric // We walk the case indices backwards so that we remove the last case first
624f37b6182SDimitry Andric // and don't disrupt the earlier indices.
625f37b6182SDimitry Andric for (unsigned Index : reverse(ExitCaseIndices)) {
626f37b6182SDimitry Andric auto CaseI = SI.case_begin() + Index;
6274ba319b5SDimitry Andric // Compute the outer loop from this exit.
6284ba319b5SDimitry Andric Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
6294ba319b5SDimitry Andric if (!ExitL || ExitL->contains(OuterL))
6304ba319b5SDimitry Andric OuterL = ExitL;
631f37b6182SDimitry Andric // Save the value of this case.
632f37b6182SDimitry Andric ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
633f37b6182SDimitry Andric // Delete the unswitched cases.
634f37b6182SDimitry Andric SI.removeCase(CaseI);
635f37b6182SDimitry Andric }
636f37b6182SDimitry Andric
6374ba319b5SDimitry Andric if (SE) {
6384ba319b5SDimitry Andric if (OuterL)
6394ba319b5SDimitry Andric SE->forgetLoop(OuterL);
6404ba319b5SDimitry Andric else
6414ba319b5SDimitry Andric SE->forgetTopmostLoop(&L);
6424ba319b5SDimitry Andric }
6434ba319b5SDimitry Andric
644f37b6182SDimitry Andric // Check if after this all of the remaining cases point at the same
645f37b6182SDimitry Andric // successor.
646f37b6182SDimitry Andric BasicBlock *CommonSuccBB = nullptr;
647f37b6182SDimitry Andric if (SI.getNumCases() > 0 &&
648f37b6182SDimitry Andric std::all_of(std::next(SI.case_begin()), SI.case_end(),
649f37b6182SDimitry Andric [&SI](const SwitchInst::CaseHandle &Case) {
650f37b6182SDimitry Andric return Case.getCaseSuccessor() ==
651f37b6182SDimitry Andric SI.case_begin()->getCaseSuccessor();
652f37b6182SDimitry Andric }))
653f37b6182SDimitry Andric CommonSuccBB = SI.case_begin()->getCaseSuccessor();
6544ba319b5SDimitry Andric if (!DefaultExitBB) {
655f37b6182SDimitry Andric // If we're not unswitching the default, we need it to match any cases to
656f37b6182SDimitry Andric // have a common successor or if we have no cases it is the common
657f37b6182SDimitry Andric // successor.
658f37b6182SDimitry Andric if (SI.getNumCases() == 0)
659f37b6182SDimitry Andric CommonSuccBB = SI.getDefaultDest();
660f37b6182SDimitry Andric else if (SI.getDefaultDest() != CommonSuccBB)
661f37b6182SDimitry Andric CommonSuccBB = nullptr;
662f37b6182SDimitry Andric }
663f37b6182SDimitry Andric
664f37b6182SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
665f37b6182SDimitry Andric // the switch.
666f37b6182SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader();
667*b5893f02SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
668f37b6182SDimitry Andric OldPH->getTerminator()->eraseFromParent();
669f37b6182SDimitry Andric
670f37b6182SDimitry Andric // Now add the unswitched switch.
671f37b6182SDimitry Andric auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
672f37b6182SDimitry Andric
6735517e702SDimitry Andric // Rewrite the IR for the unswitched basic blocks. This requires two steps.
6745517e702SDimitry Andric // First, we split any exit blocks with remaining in-loop predecessors. Then
6755517e702SDimitry Andric // we update the PHIs in one of two ways depending on if there was a split.
6765517e702SDimitry Andric // We walk in reverse so that we split in the same order as the cases
6775517e702SDimitry Andric // appeared. This is purely for convenience of reading the resulting IR, but
6785517e702SDimitry Andric // it doesn't cost anything really.
6795517e702SDimitry Andric SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
680f37b6182SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
681f37b6182SDimitry Andric // Handle the default exit if necessary.
682f37b6182SDimitry Andric // FIXME: It'd be great if we could merge this with the loop below but LLVM's
683f37b6182SDimitry Andric // ranges aren't quite powerful enough yet.
6845517e702SDimitry Andric if (DefaultExitBB) {
6855517e702SDimitry Andric if (pred_empty(DefaultExitBB)) {
6865517e702SDimitry Andric UnswitchedExitBBs.insert(DefaultExitBB);
6875517e702SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
6885517e702SDimitry Andric } else {
689f37b6182SDimitry Andric auto *SplitBB =
690*b5893f02SDimitry Andric SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
691*b5893f02SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
692*b5893f02SDimitry Andric *ParentBB, *OldPH,
693*b5893f02SDimitry Andric /*FullUnswitch*/ true);
694f37b6182SDimitry Andric DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
695f37b6182SDimitry Andric }
6965517e702SDimitry Andric }
697f37b6182SDimitry Andric // Note that we must use a reference in the for loop so that we update the
698f37b6182SDimitry Andric // container.
699f37b6182SDimitry Andric for (auto &CasePair : reverse(ExitCases)) {
700f37b6182SDimitry Andric // Grab a reference to the exit block in the pair so that we can update it.
7015517e702SDimitry Andric BasicBlock *ExitBB = CasePair.second;
702f37b6182SDimitry Andric
703f37b6182SDimitry Andric // If this case is the last edge into the exit block, we can simply reuse it
704f37b6182SDimitry Andric // as it will no longer be a loop exit. No mapping necessary.
7055517e702SDimitry Andric if (pred_empty(ExitBB)) {
7065517e702SDimitry Andric // Only rewrite once.
7075517e702SDimitry Andric if (UnswitchedExitBBs.insert(ExitBB).second)
7085517e702SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
709f37b6182SDimitry Andric continue;
7105517e702SDimitry Andric }
711f37b6182SDimitry Andric
712f37b6182SDimitry Andric // Otherwise we need to split the exit block so that we retain an exit
713f37b6182SDimitry Andric // block from the loop and a target for the unswitched condition.
714f37b6182SDimitry Andric BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
715f37b6182SDimitry Andric if (!SplitExitBB) {
716f37b6182SDimitry Andric // If this is the first time we see this, do the split and remember it.
717*b5893f02SDimitry Andric SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
718*b5893f02SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
719*b5893f02SDimitry Andric *ParentBB, *OldPH,
720*b5893f02SDimitry Andric /*FullUnswitch*/ true);
721f37b6182SDimitry Andric }
7225517e702SDimitry Andric // Update the case pair to point to the split block.
7235517e702SDimitry Andric CasePair.second = SplitExitBB;
724f37b6182SDimitry Andric }
725f37b6182SDimitry Andric
726f37b6182SDimitry Andric // Now add the unswitched cases. We do this in reverse order as we built them
727f37b6182SDimitry Andric // in reverse order.
728f37b6182SDimitry Andric for (auto CasePair : reverse(ExitCases)) {
729f37b6182SDimitry Andric ConstantInt *CaseVal = CasePair.first;
730f37b6182SDimitry Andric BasicBlock *UnswitchedBB = CasePair.second;
731f37b6182SDimitry Andric
732f37b6182SDimitry Andric NewSI->addCase(CaseVal, UnswitchedBB);
733f37b6182SDimitry Andric }
734f37b6182SDimitry Andric
735f37b6182SDimitry Andric // If the default was unswitched, re-point it and add explicit cases for
736f37b6182SDimitry Andric // entering the loop.
737f37b6182SDimitry Andric if (DefaultExitBB) {
738f37b6182SDimitry Andric NewSI->setDefaultDest(DefaultExitBB);
739f37b6182SDimitry Andric
740f37b6182SDimitry Andric // We removed all the exit cases, so we just copy the cases to the
741f37b6182SDimitry Andric // unswitched switch.
742f37b6182SDimitry Andric for (auto Case : SI.cases())
743f37b6182SDimitry Andric NewSI->addCase(Case.getCaseValue(), NewPH);
744f37b6182SDimitry Andric }
745f37b6182SDimitry Andric
746f37b6182SDimitry Andric // If we ended up with a common successor for every path through the switch
747f37b6182SDimitry Andric // after unswitching, rewrite it to an unconditional branch to make it easy
748f37b6182SDimitry Andric // to recognize. Otherwise we potentially have to recognize the default case
749f37b6182SDimitry Andric // pointing at unreachable and other complexity.
750f37b6182SDimitry Andric if (CommonSuccBB) {
751f37b6182SDimitry Andric BasicBlock *BB = SI.getParent();
7524ba319b5SDimitry Andric // We may have had multiple edges to this common successor block, so remove
7534ba319b5SDimitry Andric // them as predecessors. We skip the first one, either the default or the
7544ba319b5SDimitry Andric // actual first case.
7554ba319b5SDimitry Andric bool SkippedFirst = DefaultExitBB == nullptr;
7564ba319b5SDimitry Andric for (auto Case : SI.cases()) {
7574ba319b5SDimitry Andric assert(Case.getCaseSuccessor() == CommonSuccBB &&
7584ba319b5SDimitry Andric "Non-common successor!");
7594ba319b5SDimitry Andric (void)Case;
7604ba319b5SDimitry Andric if (!SkippedFirst) {
7614ba319b5SDimitry Andric SkippedFirst = true;
7624ba319b5SDimitry Andric continue;
7634ba319b5SDimitry Andric }
7644ba319b5SDimitry Andric CommonSuccBB->removePredecessor(BB,
7654ba319b5SDimitry Andric /*DontDeleteUselessPHIs*/ true);
7664ba319b5SDimitry Andric }
7674ba319b5SDimitry Andric // Now nuke the switch and replace it with a direct branch.
768f37b6182SDimitry Andric SI.eraseFromParent();
769f37b6182SDimitry Andric BranchInst::Create(CommonSuccBB, BB);
7704ba319b5SDimitry Andric } else if (DefaultExitBB) {
7714ba319b5SDimitry Andric assert(SI.getNumCases() > 0 &&
7724ba319b5SDimitry Andric "If we had no cases we'd have a common successor!");
7734ba319b5SDimitry Andric // Move the last case to the default successor. This is valid as if the
7744ba319b5SDimitry Andric // default got unswitched it cannot be reached. This has the advantage of
7754ba319b5SDimitry Andric // being simple and keeping the number of edges from this switch to
7764ba319b5SDimitry Andric // successors the same, and avoiding any PHI update complexity.
7774ba319b5SDimitry Andric auto LastCaseI = std::prev(SI.case_end());
7784ba319b5SDimitry Andric SI.setDefaultDest(LastCaseI->getCaseSuccessor());
7794ba319b5SDimitry Andric SI.removeCase(LastCaseI);
780f37b6182SDimitry Andric }
781f37b6182SDimitry Andric
7824ba319b5SDimitry Andric // Walk the unswitched exit blocks and the unswitched split blocks and update
7834ba319b5SDimitry Andric // the dominator tree based on the CFG edits. While we are walking unordered
7844ba319b5SDimitry Andric // containers here, the API for applyUpdates takes an unordered list of
7854ba319b5SDimitry Andric // updates and requires them to not contain duplicates.
7864ba319b5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
7874ba319b5SDimitry Andric for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
7884ba319b5SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
7894ba319b5SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
7904ba319b5SDimitry Andric }
7914ba319b5SDimitry Andric for (auto SplitUnswitchedPair : SplitExitBBMap) {
7924ba319b5SDimitry Andric auto *UnswitchedBB = SplitUnswitchedPair.second;
7934ba319b5SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
7944ba319b5SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
7954ba319b5SDimitry Andric }
7964ba319b5SDimitry Andric DT.applyUpdates(DTUpdates);
797*b5893f02SDimitry Andric
798*b5893f02SDimitry Andric if (MSSAU) {
799*b5893f02SDimitry Andric MSSAU->applyUpdates(DTUpdates, DT);
800*b5893f02SDimitry Andric if (VerifyMemorySSA)
801*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
802*b5893f02SDimitry Andric }
803*b5893f02SDimitry Andric
8044ba319b5SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast));
8054ba319b5SDimitry Andric
8064ba319b5SDimitry Andric // We may have changed the nesting relationship for this loop so hoist it to
8074ba319b5SDimitry Andric // its correct parent if needed.
8084ba319b5SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI);
8094ba319b5SDimitry Andric
810f37b6182SDimitry Andric ++NumTrivial;
811f37b6182SDimitry Andric ++NumSwitches;
812*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
813f37b6182SDimitry Andric return true;
814f37b6182SDimitry Andric }
815f37b6182SDimitry Andric
816f37b6182SDimitry Andric /// This routine scans the loop to find a branch or switch which occurs before
817f37b6182SDimitry Andric /// any side effects occur. These can potentially be unswitched without
818f37b6182SDimitry Andric /// duplicating the loop. If a branch or switch is successfully unswitched the
819f37b6182SDimitry Andric /// scanning continues to see if subsequent branches or switches have become
820f37b6182SDimitry Andric /// trivial. Once all trivial candidates have been unswitched, this routine
821f37b6182SDimitry Andric /// returns.
822f37b6182SDimitry Andric ///
823f37b6182SDimitry Andric /// The return value indicates whether anything was unswitched (and therefore
824f37b6182SDimitry Andric /// changed).
8254ba319b5SDimitry Andric ///
8264ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
8274ba319b5SDimitry Andric /// invalidated by this.
unswitchAllTrivialConditions(Loop & L,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)828f37b6182SDimitry Andric static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
829*b5893f02SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
830*b5893f02SDimitry Andric MemorySSAUpdater *MSSAU) {
831f37b6182SDimitry Andric bool Changed = false;
832f37b6182SDimitry Andric
833f37b6182SDimitry Andric // If loop header has only one reachable successor we should keep looking for
834f37b6182SDimitry Andric // trivial condition candidates in the successor as well. An alternative is
835f37b6182SDimitry Andric // to constant fold conditions and merge successors into loop header (then we
836f37b6182SDimitry Andric // only need to check header's terminator). The reason for not doing this in
837f37b6182SDimitry Andric // LoopUnswitch pass is that it could potentially break LoopPassManager's
838f37b6182SDimitry Andric // invariants. Folding dead branches could either eliminate the current loop
839f37b6182SDimitry Andric // or make other loops unreachable. LCSSA form might also not be preserved
840f37b6182SDimitry Andric // after deleting branches. The following code keeps traversing loop header's
841f37b6182SDimitry Andric // successors until it finds the trivial condition candidate (condition that
842f37b6182SDimitry Andric // is not a constant). Since unswitching generates branches with constant
843f37b6182SDimitry Andric // conditions, this scenario could be very common in practice.
844f37b6182SDimitry Andric BasicBlock *CurrentBB = L.getHeader();
845f37b6182SDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited;
846f37b6182SDimitry Andric Visited.insert(CurrentBB);
847f37b6182SDimitry Andric do {
848f37b6182SDimitry Andric // Check if there are any side-effecting instructions (e.g. stores, calls,
849f37b6182SDimitry Andric // volatile loads) in the part of the loop that the code *would* execute
850f37b6182SDimitry Andric // without unswitching.
851f37b6182SDimitry Andric if (llvm::any_of(*CurrentBB,
852f37b6182SDimitry Andric [](Instruction &I) { return I.mayHaveSideEffects(); }))
853f37b6182SDimitry Andric return Changed;
854f37b6182SDimitry Andric
855*b5893f02SDimitry Andric Instruction *CurrentTerm = CurrentBB->getTerminator();
856f37b6182SDimitry Andric
857f37b6182SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
858f37b6182SDimitry Andric // Don't bother trying to unswitch past a switch with a constant
859f37b6182SDimitry Andric // condition. This should be removed prior to running this pass by
860f37b6182SDimitry Andric // simplify-cfg.
861f37b6182SDimitry Andric if (isa<Constant>(SI->getCondition()))
862f37b6182SDimitry Andric return Changed;
863f37b6182SDimitry Andric
864*b5893f02SDimitry Andric if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
8654ba319b5SDimitry Andric // Couldn't unswitch this one so we're done.
866f37b6182SDimitry Andric return Changed;
867f37b6182SDimitry Andric
868f37b6182SDimitry Andric // Mark that we managed to unswitch something.
869f37b6182SDimitry Andric Changed = true;
870f37b6182SDimitry Andric
871f37b6182SDimitry Andric // If unswitching turned the terminator into an unconditional branch then
872f37b6182SDimitry Andric // we can continue. The unswitching logic specifically works to fold any
873f37b6182SDimitry Andric // cases it can into an unconditional branch to make it easier to
874f37b6182SDimitry Andric // recognize here.
875f37b6182SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
876f37b6182SDimitry Andric if (!BI || BI->isConditional())
877f37b6182SDimitry Andric return Changed;
878f37b6182SDimitry Andric
879f37b6182SDimitry Andric CurrentBB = BI->getSuccessor(0);
880f37b6182SDimitry Andric continue;
881f37b6182SDimitry Andric }
882f37b6182SDimitry Andric
883f37b6182SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentTerm);
884f37b6182SDimitry Andric if (!BI)
885f37b6182SDimitry Andric // We do not understand other terminator instructions.
886f37b6182SDimitry Andric return Changed;
887f37b6182SDimitry Andric
888f37b6182SDimitry Andric // Don't bother trying to unswitch past an unconditional branch or a branch
889f37b6182SDimitry Andric // with a constant value. These should be removed by simplify-cfg prior to
890f37b6182SDimitry Andric // running this pass.
891f37b6182SDimitry Andric if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
892f37b6182SDimitry Andric return Changed;
893f37b6182SDimitry Andric
894f37b6182SDimitry Andric // Found a trivial condition candidate: non-foldable conditional branch. If
895f37b6182SDimitry Andric // we fail to unswitch this, we can't do anything else that is trivial.
896*b5893f02SDimitry Andric if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
897f37b6182SDimitry Andric return Changed;
898f37b6182SDimitry Andric
899f37b6182SDimitry Andric // Mark that we managed to unswitch something.
900f37b6182SDimitry Andric Changed = true;
901f37b6182SDimitry Andric
9024ba319b5SDimitry Andric // If we only unswitched some of the conditions feeding the branch, we won't
9034ba319b5SDimitry Andric // have collapsed it to a single successor.
904f37b6182SDimitry Andric BI = cast<BranchInst>(CurrentBB->getTerminator());
9054ba319b5SDimitry Andric if (BI->isConditional())
9064ba319b5SDimitry Andric return Changed;
9074ba319b5SDimitry Andric
9084ba319b5SDimitry Andric // Follow the newly unconditional branch into its successor.
909f37b6182SDimitry Andric CurrentBB = BI->getSuccessor(0);
910f37b6182SDimitry Andric
911f37b6182SDimitry Andric // When continuing, if we exit the loop or reach a previous visited block,
912f37b6182SDimitry Andric // then we can not reach any trivial condition candidates (unfoldable
913f37b6182SDimitry Andric // branch instructions or switch instructions) and no unswitch can happen.
914f37b6182SDimitry Andric } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
915f37b6182SDimitry Andric
916f37b6182SDimitry Andric return Changed;
917f37b6182SDimitry Andric }
918f37b6182SDimitry Andric
9192cab237bSDimitry Andric /// Build the cloned blocks for an unswitched copy of the given loop.
9202cab237bSDimitry Andric ///
9212cab237bSDimitry Andric /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
9222cab237bSDimitry Andric /// after the split block (`SplitBB`) that will be used to select between the
9232cab237bSDimitry Andric /// cloned and original loop.
9242cab237bSDimitry Andric ///
9252cab237bSDimitry Andric /// This routine handles cloning all of the necessary loop blocks and exit
9262cab237bSDimitry Andric /// blocks including rewriting their instructions and the relevant PHI nodes.
9274ba319b5SDimitry Andric /// Any loop blocks or exit blocks which are dominated by a different successor
9284ba319b5SDimitry Andric /// than the one for this clone of the loop blocks can be trivially skipped. We
9294ba319b5SDimitry Andric /// use the `DominatingSucc` map to determine whether a block satisfies that
9304ba319b5SDimitry Andric /// property with a simple map lookup.
9314ba319b5SDimitry Andric ///
9324ba319b5SDimitry Andric /// It also correctly creates the unconditional branch in the cloned
9332cab237bSDimitry Andric /// unswitched parent block to only point at the unswitched successor.
9342cab237bSDimitry Andric ///
9352cab237bSDimitry Andric /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
9362cab237bSDimitry Andric /// block splitting is correctly reflected in `LoopInfo`, essentially all of
9372cab237bSDimitry Andric /// the cloned blocks (and their loops) are left without full `LoopInfo`
9382cab237bSDimitry Andric /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
9392cab237bSDimitry Andric /// blocks to them but doesn't create the cloned `DominatorTree` structure and
9402cab237bSDimitry Andric /// instead the caller must recompute an accurate DT. It *does* correctly
9412cab237bSDimitry Andric /// update the `AssumptionCache` provided in `AC`.
buildClonedLoopBlocks(Loop & L,BasicBlock * LoopPH,BasicBlock * SplitBB,ArrayRef<BasicBlock * > ExitBlocks,BasicBlock * ParentBB,BasicBlock * UnswitchedSuccBB,BasicBlock * ContinueSuccBB,const SmallDenseMap<BasicBlock *,BasicBlock *,16> & DominatingSucc,ValueToValueMapTy & VMap,SmallVectorImpl<DominatorTree::UpdateType> & DTUpdates,AssumptionCache & AC,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)9422cab237bSDimitry Andric static BasicBlock *buildClonedLoopBlocks(
9432cab237bSDimitry Andric Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
9442cab237bSDimitry Andric ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
9452cab237bSDimitry Andric BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
9464ba319b5SDimitry Andric const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
9474ba319b5SDimitry Andric ValueToValueMapTy &VMap,
9484ba319b5SDimitry Andric SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
949*b5893f02SDimitry Andric DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
9502cab237bSDimitry Andric SmallVector<BasicBlock *, 4> NewBlocks;
9512cab237bSDimitry Andric NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
9522cab237bSDimitry Andric
9532cab237bSDimitry Andric // We will need to clone a bunch of blocks, wrap up the clone operation in
9542cab237bSDimitry Andric // a helper.
9552cab237bSDimitry Andric auto CloneBlock = [&](BasicBlock *OldBB) {
9562cab237bSDimitry Andric // Clone the basic block and insert it before the new preheader.
9572cab237bSDimitry Andric BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
9582cab237bSDimitry Andric NewBB->moveBefore(LoopPH);
9592cab237bSDimitry Andric
9602cab237bSDimitry Andric // Record this block and the mapping.
9612cab237bSDimitry Andric NewBlocks.push_back(NewBB);
9622cab237bSDimitry Andric VMap[OldBB] = NewBB;
9632cab237bSDimitry Andric
9642cab237bSDimitry Andric return NewBB;
9652cab237bSDimitry Andric };
9662cab237bSDimitry Andric
9674ba319b5SDimitry Andric // We skip cloning blocks when they have a dominating succ that is not the
9684ba319b5SDimitry Andric // succ we are cloning for.
9694ba319b5SDimitry Andric auto SkipBlock = [&](BasicBlock *BB) {
9704ba319b5SDimitry Andric auto It = DominatingSucc.find(BB);
9714ba319b5SDimitry Andric return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
9724ba319b5SDimitry Andric };
9734ba319b5SDimitry Andric
9742cab237bSDimitry Andric // First, clone the preheader.
9752cab237bSDimitry Andric auto *ClonedPH = CloneBlock(LoopPH);
9762cab237bSDimitry Andric
9772cab237bSDimitry Andric // Then clone all the loop blocks, skipping the ones that aren't necessary.
9782cab237bSDimitry Andric for (auto *LoopBB : L.blocks())
9794ba319b5SDimitry Andric if (!SkipBlock(LoopBB))
9802cab237bSDimitry Andric CloneBlock(LoopBB);
9812cab237bSDimitry Andric
9822cab237bSDimitry Andric // Split all the loop exit edges so that when we clone the exit blocks, if
9832cab237bSDimitry Andric // any of the exit blocks are *also* a preheader for some other loop, we
9842cab237bSDimitry Andric // don't create multiple predecessors entering the loop header.
9852cab237bSDimitry Andric for (auto *ExitBB : ExitBlocks) {
9864ba319b5SDimitry Andric if (SkipBlock(ExitBB))
9872cab237bSDimitry Andric continue;
9882cab237bSDimitry Andric
9892cab237bSDimitry Andric // When we are going to clone an exit, we don't need to clone all the
9902cab237bSDimitry Andric // instructions in the exit block and we want to ensure we have an easy
9912cab237bSDimitry Andric // place to merge the CFG, so split the exit first. This is always safe to
9922cab237bSDimitry Andric // do because there cannot be any non-loop predecessors of a loop exit in
9932cab237bSDimitry Andric // loop simplified form.
994*b5893f02SDimitry Andric auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
9952cab237bSDimitry Andric
9962cab237bSDimitry Andric // Rearrange the names to make it easier to write test cases by having the
9972cab237bSDimitry Andric // exit block carry the suffix rather than the merge block carrying the
9982cab237bSDimitry Andric // suffix.
9992cab237bSDimitry Andric MergeBB->takeName(ExitBB);
10002cab237bSDimitry Andric ExitBB->setName(Twine(MergeBB->getName()) + ".split");
10012cab237bSDimitry Andric
10022cab237bSDimitry Andric // Now clone the original exit block.
10032cab237bSDimitry Andric auto *ClonedExitBB = CloneBlock(ExitBB);
10042cab237bSDimitry Andric assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
10052cab237bSDimitry Andric "Exit block should have been split to have one successor!");
10062cab237bSDimitry Andric assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
10072cab237bSDimitry Andric "Cloned exit block has the wrong successor!");
10082cab237bSDimitry Andric
10092cab237bSDimitry Andric // Remap any cloned instructions and create a merge phi node for them.
10102cab237bSDimitry Andric for (auto ZippedInsts : llvm::zip_first(
10112cab237bSDimitry Andric llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
10122cab237bSDimitry Andric llvm::make_range(ClonedExitBB->begin(),
10132cab237bSDimitry Andric std::prev(ClonedExitBB->end())))) {
10142cab237bSDimitry Andric Instruction &I = std::get<0>(ZippedInsts);
10152cab237bSDimitry Andric Instruction &ClonedI = std::get<1>(ZippedInsts);
10162cab237bSDimitry Andric
10172cab237bSDimitry Andric // The only instructions in the exit block should be PHI nodes and
10182cab237bSDimitry Andric // potentially a landing pad.
10192cab237bSDimitry Andric assert(
10202cab237bSDimitry Andric (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
10212cab237bSDimitry Andric "Bad instruction in exit block!");
10222cab237bSDimitry Andric // We should have a value map between the instruction and its clone.
10232cab237bSDimitry Andric assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
10242cab237bSDimitry Andric
10252cab237bSDimitry Andric auto *MergePN =
10262cab237bSDimitry Andric PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
10272cab237bSDimitry Andric &*MergeBB->getFirstInsertionPt());
10282cab237bSDimitry Andric I.replaceAllUsesWith(MergePN);
10292cab237bSDimitry Andric MergePN->addIncoming(&I, ExitBB);
10302cab237bSDimitry Andric MergePN->addIncoming(&ClonedI, ClonedExitBB);
10312cab237bSDimitry Andric }
10322cab237bSDimitry Andric }
10332cab237bSDimitry Andric
10342cab237bSDimitry Andric // Rewrite the instructions in the cloned blocks to refer to the instructions
10352cab237bSDimitry Andric // in the cloned blocks. We have to do this as a second pass so that we have
10362cab237bSDimitry Andric // everything available. Also, we have inserted new instructions which may
10372cab237bSDimitry Andric // include assume intrinsics, so we update the assumption cache while
10382cab237bSDimitry Andric // processing this.
10392cab237bSDimitry Andric for (auto *ClonedBB : NewBlocks)
10402cab237bSDimitry Andric for (Instruction &I : *ClonedBB) {
10412cab237bSDimitry Andric RemapInstruction(&I, VMap,
10422cab237bSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
10432cab237bSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I))
10442cab237bSDimitry Andric if (II->getIntrinsicID() == Intrinsic::assume)
10452cab237bSDimitry Andric AC.registerAssumption(II);
10462cab237bSDimitry Andric }
10472cab237bSDimitry Andric
10484ba319b5SDimitry Andric // Update any PHI nodes in the cloned successors of the skipped blocks to not
10494ba319b5SDimitry Andric // have spurious incoming values.
10504ba319b5SDimitry Andric for (auto *LoopBB : L.blocks())
10514ba319b5SDimitry Andric if (SkipBlock(LoopBB))
10524ba319b5SDimitry Andric for (auto *SuccBB : successors(LoopBB))
10534ba319b5SDimitry Andric if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
10544ba319b5SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis())
10554ba319b5SDimitry Andric PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
10564ba319b5SDimitry Andric
10574ba319b5SDimitry Andric // Remove the cloned parent as a predecessor of any successor we ended up
10584ba319b5SDimitry Andric // cloning other than the unswitched one.
10592cab237bSDimitry Andric auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
10604ba319b5SDimitry Andric for (auto *SuccBB : successors(ParentBB)) {
10614ba319b5SDimitry Andric if (SuccBB == UnswitchedSuccBB)
10624ba319b5SDimitry Andric continue;
10634ba319b5SDimitry Andric
10644ba319b5SDimitry Andric auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
10654ba319b5SDimitry Andric if (!ClonedSuccBB)
10664ba319b5SDimitry Andric continue;
10674ba319b5SDimitry Andric
10684ba319b5SDimitry Andric ClonedSuccBB->removePredecessor(ClonedParentBB,
10692cab237bSDimitry Andric /*DontDeleteUselessPHIs*/ true);
10704ba319b5SDimitry Andric }
10714ba319b5SDimitry Andric
10724ba319b5SDimitry Andric // Replace the cloned branch with an unconditional branch to the cloned
10732cab237bSDimitry Andric // unswitched successor.
10742cab237bSDimitry Andric auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
10752cab237bSDimitry Andric ClonedParentBB->getTerminator()->eraseFromParent();
10762cab237bSDimitry Andric BranchInst::Create(ClonedSuccBB, ClonedParentBB);
10772cab237bSDimitry Andric
10784ba319b5SDimitry Andric // If there are duplicate entries in the PHI nodes because of multiple edges
10794ba319b5SDimitry Andric // to the unswitched successor, we need to nuke all but one as we replaced it
10804ba319b5SDimitry Andric // with a direct branch.
10814ba319b5SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis()) {
10824ba319b5SDimitry Andric bool Found = false;
10834ba319b5SDimitry Andric // Loop over the incoming operands backwards so we can easily delete as we
10844ba319b5SDimitry Andric // go without invalidating the index.
10854ba319b5SDimitry Andric for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
10864ba319b5SDimitry Andric if (PN.getIncomingBlock(i) != ClonedParentBB)
10874ba319b5SDimitry Andric continue;
10884ba319b5SDimitry Andric if (!Found) {
10894ba319b5SDimitry Andric Found = true;
10904ba319b5SDimitry Andric continue;
10914ba319b5SDimitry Andric }
10924ba319b5SDimitry Andric PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
10934ba319b5SDimitry Andric }
10944ba319b5SDimitry Andric }
10954ba319b5SDimitry Andric
10964ba319b5SDimitry Andric // Record the domtree updates for the new blocks.
10974ba319b5SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet;
10984ba319b5SDimitry Andric for (auto *ClonedBB : NewBlocks) {
10994ba319b5SDimitry Andric for (auto *SuccBB : successors(ClonedBB))
11004ba319b5SDimitry Andric if (SuccSet.insert(SuccBB).second)
11014ba319b5SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
11024ba319b5SDimitry Andric SuccSet.clear();
11034ba319b5SDimitry Andric }
11042cab237bSDimitry Andric
11052cab237bSDimitry Andric return ClonedPH;
11062cab237bSDimitry Andric }
11072cab237bSDimitry Andric
11082cab237bSDimitry Andric /// Recursively clone the specified loop and all of its children.
11092cab237bSDimitry Andric ///
11102cab237bSDimitry Andric /// The target parent loop for the clone should be provided, or can be null if
11112cab237bSDimitry Andric /// the clone is a top-level loop. While cloning, all the blocks are mapped
11122cab237bSDimitry Andric /// with the provided value map. The entire original loop must be present in
11132cab237bSDimitry Andric /// the value map. The cloned loop is returned.
cloneLoopNest(Loop & OrigRootL,Loop * RootParentL,const ValueToValueMapTy & VMap,LoopInfo & LI)11142cab237bSDimitry Andric static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
11152cab237bSDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI) {
11162cab237bSDimitry Andric auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
11172cab237bSDimitry Andric assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
11182cab237bSDimitry Andric ClonedL.reserveBlocks(OrigL.getNumBlocks());
11192cab237bSDimitry Andric for (auto *BB : OrigL.blocks()) {
11202cab237bSDimitry Andric auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
11212cab237bSDimitry Andric ClonedL.addBlockEntry(ClonedBB);
11224ba319b5SDimitry Andric if (LI.getLoopFor(BB) == &OrigL)
11232cab237bSDimitry Andric LI.changeLoopFor(ClonedBB, &ClonedL);
11242cab237bSDimitry Andric }
11252cab237bSDimitry Andric };
11262cab237bSDimitry Andric
11272cab237bSDimitry Andric // We specially handle the first loop because it may get cloned into
11282cab237bSDimitry Andric // a different parent and because we most commonly are cloning leaf loops.
11292cab237bSDimitry Andric Loop *ClonedRootL = LI.AllocateLoop();
11302cab237bSDimitry Andric if (RootParentL)
11312cab237bSDimitry Andric RootParentL->addChildLoop(ClonedRootL);
11322cab237bSDimitry Andric else
11332cab237bSDimitry Andric LI.addTopLevelLoop(ClonedRootL);
11342cab237bSDimitry Andric AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
11352cab237bSDimitry Andric
11362cab237bSDimitry Andric if (OrigRootL.empty())
11372cab237bSDimitry Andric return ClonedRootL;
11382cab237bSDimitry Andric
11392cab237bSDimitry Andric // If we have a nest, we can quickly clone the entire loop nest using an
11402cab237bSDimitry Andric // iterative approach because it is a tree. We keep the cloned parent in the
11412cab237bSDimitry Andric // data structure to avoid repeatedly querying through a map to find it.
11422cab237bSDimitry Andric SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
11432cab237bSDimitry Andric // Build up the loops to clone in reverse order as we'll clone them from the
11442cab237bSDimitry Andric // back.
11452cab237bSDimitry Andric for (Loop *ChildL : llvm::reverse(OrigRootL))
11462cab237bSDimitry Andric LoopsToClone.push_back({ClonedRootL, ChildL});
11472cab237bSDimitry Andric do {
11482cab237bSDimitry Andric Loop *ClonedParentL, *L;
11492cab237bSDimitry Andric std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
11502cab237bSDimitry Andric Loop *ClonedL = LI.AllocateLoop();
11512cab237bSDimitry Andric ClonedParentL->addChildLoop(ClonedL);
11522cab237bSDimitry Andric AddClonedBlocksToLoop(*L, *ClonedL);
11532cab237bSDimitry Andric for (Loop *ChildL : llvm::reverse(*L))
11542cab237bSDimitry Andric LoopsToClone.push_back({ClonedL, ChildL});
11552cab237bSDimitry Andric } while (!LoopsToClone.empty());
11562cab237bSDimitry Andric
11572cab237bSDimitry Andric return ClonedRootL;
11582cab237bSDimitry Andric }
11592cab237bSDimitry Andric
11602cab237bSDimitry Andric /// Build the cloned loops of an original loop from unswitching.
11612cab237bSDimitry Andric ///
11622cab237bSDimitry Andric /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
11632cab237bSDimitry Andric /// operation. We need to re-verify that there even is a loop (as the backedge
11642cab237bSDimitry Andric /// may not have been cloned), and even if there are remaining backedges the
11652cab237bSDimitry Andric /// backedge set may be different. However, we know that each child loop is
11662cab237bSDimitry Andric /// undisturbed, we only need to find where to place each child loop within
11672cab237bSDimitry Andric /// either any parent loop or within a cloned version of the original loop.
11682cab237bSDimitry Andric ///
11692cab237bSDimitry Andric /// Because child loops may end up cloned outside of any cloned version of the
11702cab237bSDimitry Andric /// original loop, multiple cloned sibling loops may be created. All of them
11712cab237bSDimitry Andric /// are returned so that the newly introduced loop nest roots can be
11722cab237bSDimitry Andric /// identified.
buildClonedLoops(Loop & OrigL,ArrayRef<BasicBlock * > ExitBlocks,const ValueToValueMapTy & VMap,LoopInfo & LI,SmallVectorImpl<Loop * > & NonChildClonedLoops)11734ba319b5SDimitry Andric static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
11742cab237bSDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI,
11752cab237bSDimitry Andric SmallVectorImpl<Loop *> &NonChildClonedLoops) {
11762cab237bSDimitry Andric Loop *ClonedL = nullptr;
11772cab237bSDimitry Andric
11782cab237bSDimitry Andric auto *OrigPH = OrigL.getLoopPreheader();
11792cab237bSDimitry Andric auto *OrigHeader = OrigL.getHeader();
11802cab237bSDimitry Andric
11812cab237bSDimitry Andric auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
11822cab237bSDimitry Andric auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
11832cab237bSDimitry Andric
11842cab237bSDimitry Andric // We need to know the loops of the cloned exit blocks to even compute the
11852cab237bSDimitry Andric // accurate parent loop. If we only clone exits to some parent of the
11862cab237bSDimitry Andric // original parent, we want to clone into that outer loop. We also keep track
11872cab237bSDimitry Andric // of the loops that our cloned exit blocks participate in.
11882cab237bSDimitry Andric Loop *ParentL = nullptr;
11892cab237bSDimitry Andric SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
11902cab237bSDimitry Andric SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
11912cab237bSDimitry Andric ClonedExitsInLoops.reserve(ExitBlocks.size());
11922cab237bSDimitry Andric for (auto *ExitBB : ExitBlocks)
11932cab237bSDimitry Andric if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
11942cab237bSDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
11952cab237bSDimitry Andric ExitLoopMap[ClonedExitBB] = ExitL;
11962cab237bSDimitry Andric ClonedExitsInLoops.push_back(ClonedExitBB);
11972cab237bSDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
11982cab237bSDimitry Andric ParentL = ExitL;
11992cab237bSDimitry Andric }
12002cab237bSDimitry Andric assert((!ParentL || ParentL == OrigL.getParentLoop() ||
12012cab237bSDimitry Andric ParentL->contains(OrigL.getParentLoop())) &&
12022cab237bSDimitry Andric "The computed parent loop should always contain (or be) the parent of "
12032cab237bSDimitry Andric "the original loop.");
12042cab237bSDimitry Andric
12052cab237bSDimitry Andric // We build the set of blocks dominated by the cloned header from the set of
12062cab237bSDimitry Andric // cloned blocks out of the original loop. While not all of these will
12072cab237bSDimitry Andric // necessarily be in the cloned loop, it is enough to establish that they
12082cab237bSDimitry Andric // aren't in unreachable cycles, etc.
12092cab237bSDimitry Andric SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
12102cab237bSDimitry Andric for (auto *BB : OrigL.blocks())
12112cab237bSDimitry Andric if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
12122cab237bSDimitry Andric ClonedLoopBlocks.insert(ClonedBB);
12132cab237bSDimitry Andric
12142cab237bSDimitry Andric // Rebuild the set of blocks that will end up in the cloned loop. We may have
12152cab237bSDimitry Andric // skipped cloning some region of this loop which can in turn skip some of
12162cab237bSDimitry Andric // the backedges so we have to rebuild the blocks in the loop based on the
12172cab237bSDimitry Andric // backedges that remain after cloning.
12182cab237bSDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
12192cab237bSDimitry Andric SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
12202cab237bSDimitry Andric for (auto *Pred : predecessors(ClonedHeader)) {
12212cab237bSDimitry Andric // The only possible non-loop header predecessor is the preheader because
12222cab237bSDimitry Andric // we know we cloned the loop in simplified form.
12232cab237bSDimitry Andric if (Pred == ClonedPH)
12242cab237bSDimitry Andric continue;
12252cab237bSDimitry Andric
12262cab237bSDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor
12272cab237bSDimitry Andric // should be the preheader.
12282cab237bSDimitry Andric assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
12292cab237bSDimitry Andric "header other than the preheader "
12302cab237bSDimitry Andric "that is not part of the loop!");
12312cab237bSDimitry Andric
12322cab237bSDimitry Andric // Insert this block into the loop set and on the first visit (and if it
12332cab237bSDimitry Andric // isn't the header we're currently walking) put it into the worklist to
12342cab237bSDimitry Andric // recurse through.
12352cab237bSDimitry Andric if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
12362cab237bSDimitry Andric Worklist.push_back(Pred);
12372cab237bSDimitry Andric }
12382cab237bSDimitry Andric
12392cab237bSDimitry Andric // If we had any backedges then there *is* a cloned loop. Put the header into
12402cab237bSDimitry Andric // the loop set and then walk the worklist backwards to find all the blocks
12412cab237bSDimitry Andric // that remain within the loop after cloning.
12422cab237bSDimitry Andric if (!BlocksInClonedLoop.empty()) {
12432cab237bSDimitry Andric BlocksInClonedLoop.insert(ClonedHeader);
12442cab237bSDimitry Andric
12452cab237bSDimitry Andric while (!Worklist.empty()) {
12462cab237bSDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
12472cab237bSDimitry Andric assert(BlocksInClonedLoop.count(BB) &&
12482cab237bSDimitry Andric "Didn't put block into the loop set!");
12492cab237bSDimitry Andric
12502cab237bSDimitry Andric // Insert any predecessors that are in the possible set into the cloned
12512cab237bSDimitry Andric // set, and if the insert is successful, add them to the worklist. Note
12522cab237bSDimitry Andric // that we filter on the blocks that are definitely reachable via the
12532cab237bSDimitry Andric // backedge to the loop header so we may prune out dead code within the
12542cab237bSDimitry Andric // cloned loop.
12552cab237bSDimitry Andric for (auto *Pred : predecessors(BB))
12562cab237bSDimitry Andric if (ClonedLoopBlocks.count(Pred) &&
12572cab237bSDimitry Andric BlocksInClonedLoop.insert(Pred).second)
12582cab237bSDimitry Andric Worklist.push_back(Pred);
12592cab237bSDimitry Andric }
12602cab237bSDimitry Andric
12612cab237bSDimitry Andric ClonedL = LI.AllocateLoop();
12622cab237bSDimitry Andric if (ParentL) {
12632cab237bSDimitry Andric ParentL->addBasicBlockToLoop(ClonedPH, LI);
12642cab237bSDimitry Andric ParentL->addChildLoop(ClonedL);
12652cab237bSDimitry Andric } else {
12662cab237bSDimitry Andric LI.addTopLevelLoop(ClonedL);
12672cab237bSDimitry Andric }
12684ba319b5SDimitry Andric NonChildClonedLoops.push_back(ClonedL);
12692cab237bSDimitry Andric
12702cab237bSDimitry Andric ClonedL->reserveBlocks(BlocksInClonedLoop.size());
12712cab237bSDimitry Andric // We don't want to just add the cloned loop blocks based on how we
12722cab237bSDimitry Andric // discovered them. The original order of blocks was carefully built in
12732cab237bSDimitry Andric // a way that doesn't rely on predecessor ordering. Rather than re-invent
12742cab237bSDimitry Andric // that logic, we just re-walk the original blocks (and those of the child
12752cab237bSDimitry Andric // loops) and filter them as we add them into the cloned loop.
12762cab237bSDimitry Andric for (auto *BB : OrigL.blocks()) {
12772cab237bSDimitry Andric auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
12782cab237bSDimitry Andric if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
12792cab237bSDimitry Andric continue;
12802cab237bSDimitry Andric
12812cab237bSDimitry Andric // Directly add the blocks that are only in this loop.
12822cab237bSDimitry Andric if (LI.getLoopFor(BB) == &OrigL) {
12832cab237bSDimitry Andric ClonedL->addBasicBlockToLoop(ClonedBB, LI);
12842cab237bSDimitry Andric continue;
12852cab237bSDimitry Andric }
12862cab237bSDimitry Andric
12872cab237bSDimitry Andric // We want to manually add it to this loop and parents.
12882cab237bSDimitry Andric // Registering it with LoopInfo will happen when we clone the top
12892cab237bSDimitry Andric // loop for this block.
12902cab237bSDimitry Andric for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
12912cab237bSDimitry Andric PL->addBlockEntry(ClonedBB);
12922cab237bSDimitry Andric }
12932cab237bSDimitry Andric
12942cab237bSDimitry Andric // Now add each child loop whose header remains within the cloned loop. All
12952cab237bSDimitry Andric // of the blocks within the loop must satisfy the same constraints as the
12962cab237bSDimitry Andric // header so once we pass the header checks we can just clone the entire
12972cab237bSDimitry Andric // child loop nest.
12982cab237bSDimitry Andric for (Loop *ChildL : OrigL) {
12992cab237bSDimitry Andric auto *ClonedChildHeader =
13002cab237bSDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
13012cab237bSDimitry Andric if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
13022cab237bSDimitry Andric continue;
13032cab237bSDimitry Andric
13042cab237bSDimitry Andric #ifndef NDEBUG
13052cab237bSDimitry Andric // We should never have a cloned child loop header but fail to have
13062cab237bSDimitry Andric // all of the blocks for that child loop.
13072cab237bSDimitry Andric for (auto *ChildLoopBB : ChildL->blocks())
13082cab237bSDimitry Andric assert(BlocksInClonedLoop.count(
13092cab237bSDimitry Andric cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
13102cab237bSDimitry Andric "Child cloned loop has a header within the cloned outer "
13112cab237bSDimitry Andric "loop but not all of its blocks!");
13122cab237bSDimitry Andric #endif
13132cab237bSDimitry Andric
13142cab237bSDimitry Andric cloneLoopNest(*ChildL, ClonedL, VMap, LI);
13152cab237bSDimitry Andric }
13162cab237bSDimitry Andric }
13172cab237bSDimitry Andric
13182cab237bSDimitry Andric // Now that we've handled all the components of the original loop that were
13192cab237bSDimitry Andric // cloned into a new loop, we still need to handle anything from the original
13202cab237bSDimitry Andric // loop that wasn't in a cloned loop.
13212cab237bSDimitry Andric
13222cab237bSDimitry Andric // Figure out what blocks are left to place within any loop nest containing
13232cab237bSDimitry Andric // the unswitched loop. If we never formed a loop, the cloned PH is one of
13242cab237bSDimitry Andric // them.
13252cab237bSDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
13262cab237bSDimitry Andric if (BlocksInClonedLoop.empty())
13272cab237bSDimitry Andric UnloopedBlockSet.insert(ClonedPH);
13282cab237bSDimitry Andric for (auto *ClonedBB : ClonedLoopBlocks)
13292cab237bSDimitry Andric if (!BlocksInClonedLoop.count(ClonedBB))
13302cab237bSDimitry Andric UnloopedBlockSet.insert(ClonedBB);
13312cab237bSDimitry Andric
13322cab237bSDimitry Andric // Copy the cloned exits and sort them in ascending loop depth, we'll work
13332cab237bSDimitry Andric // backwards across these to process them inside out. The order shouldn't
13342cab237bSDimitry Andric // matter as we're just trying to build up the map from inside-out; we use
13352cab237bSDimitry Andric // the map in a more stably ordered way below.
13362cab237bSDimitry Andric auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1337*b5893f02SDimitry Andric llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
13382cab237bSDimitry Andric return ExitLoopMap.lookup(LHS)->getLoopDepth() <
13392cab237bSDimitry Andric ExitLoopMap.lookup(RHS)->getLoopDepth();
13402cab237bSDimitry Andric });
13412cab237bSDimitry Andric
13422cab237bSDimitry Andric // Populate the existing ExitLoopMap with everything reachable from each
13432cab237bSDimitry Andric // exit, starting from the inner most exit.
13442cab237bSDimitry Andric while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
13452cab237bSDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!");
13462cab237bSDimitry Andric
13472cab237bSDimitry Andric BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
13482cab237bSDimitry Andric Loop *ExitL = ExitLoopMap.lookup(ExitBB);
13492cab237bSDimitry Andric
13502cab237bSDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable
13512cab237bSDimitry Andric // and in the unlooped set to this exit block's loop.
13522cab237bSDimitry Andric Worklist.push_back(ExitBB);
13532cab237bSDimitry Andric do {
13542cab237bSDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
13552cab237bSDimitry Andric // We can stop recursing at the cloned preheader (if we get there).
13562cab237bSDimitry Andric if (BB == ClonedPH)
13572cab237bSDimitry Andric continue;
13582cab237bSDimitry Andric
13592cab237bSDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) {
13602cab237bSDimitry Andric // If this pred has already been moved to our set or is part of some
13612cab237bSDimitry Andric // (inner) loop, no update needed.
13622cab237bSDimitry Andric if (!UnloopedBlockSet.erase(PredBB)) {
13632cab237bSDimitry Andric assert(
13642cab237bSDimitry Andric (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
13652cab237bSDimitry Andric "Predecessor not mapped to a loop!");
13662cab237bSDimitry Andric continue;
13672cab237bSDimitry Andric }
13682cab237bSDimitry Andric
13692cab237bSDimitry Andric // We just insert into the loop set here. We'll add these blocks to the
13702cab237bSDimitry Andric // exit loop after we build up the set in an order that doesn't rely on
13712cab237bSDimitry Andric // predecessor order (which in turn relies on use list order).
13722cab237bSDimitry Andric bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
13732cab237bSDimitry Andric (void)Inserted;
13742cab237bSDimitry Andric assert(Inserted && "Should only visit an unlooped block once!");
13752cab237bSDimitry Andric
13762cab237bSDimitry Andric // And recurse through to its predecessors.
13772cab237bSDimitry Andric Worklist.push_back(PredBB);
13782cab237bSDimitry Andric }
13792cab237bSDimitry Andric } while (!Worklist.empty());
13802cab237bSDimitry Andric }
13812cab237bSDimitry Andric
13822cab237bSDimitry Andric // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
13832cab237bSDimitry Andric // blocks to their outer loops, walk the cloned blocks and the cloned exits
13842cab237bSDimitry Andric // in their original order adding them to the correct loop.
13852cab237bSDimitry Andric
13862cab237bSDimitry Andric // We need a stable insertion order. We use the order of the original loop
13872cab237bSDimitry Andric // order and map into the correct parent loop.
13882cab237bSDimitry Andric for (auto *BB : llvm::concat<BasicBlock *const>(
13892cab237bSDimitry Andric makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
13902cab237bSDimitry Andric if (Loop *OuterL = ExitLoopMap.lookup(BB))
13912cab237bSDimitry Andric OuterL->addBasicBlockToLoop(BB, LI);
13922cab237bSDimitry Andric
13932cab237bSDimitry Andric #ifndef NDEBUG
13942cab237bSDimitry Andric for (auto &BBAndL : ExitLoopMap) {
13952cab237bSDimitry Andric auto *BB = BBAndL.first;
13962cab237bSDimitry Andric auto *OuterL = BBAndL.second;
13972cab237bSDimitry Andric assert(LI.getLoopFor(BB) == OuterL &&
13982cab237bSDimitry Andric "Failed to put all blocks into outer loops!");
13992cab237bSDimitry Andric }
14002cab237bSDimitry Andric #endif
14012cab237bSDimitry Andric
14022cab237bSDimitry Andric // Now that all the blocks are placed into the correct containing loop in the
14032cab237bSDimitry Andric // absence of child loops, find all the potentially cloned child loops and
14042cab237bSDimitry Andric // clone them into whatever outer loop we placed their header into.
14052cab237bSDimitry Andric for (Loop *ChildL : OrigL) {
14062cab237bSDimitry Andric auto *ClonedChildHeader =
14072cab237bSDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
14082cab237bSDimitry Andric if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
14092cab237bSDimitry Andric continue;
14102cab237bSDimitry Andric
14112cab237bSDimitry Andric #ifndef NDEBUG
14122cab237bSDimitry Andric for (auto *ChildLoopBB : ChildL->blocks())
14132cab237bSDimitry Andric assert(VMap.count(ChildLoopBB) &&
14142cab237bSDimitry Andric "Cloned a child loop header but not all of that loops blocks!");
14152cab237bSDimitry Andric #endif
14162cab237bSDimitry Andric
14172cab237bSDimitry Andric NonChildClonedLoops.push_back(cloneLoopNest(
14182cab237bSDimitry Andric *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
14192cab237bSDimitry Andric }
14202cab237bSDimitry Andric }
14212cab237bSDimitry Andric
14224ba319b5SDimitry Andric static void
deleteDeadClonedBlocks(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,DominatorTree & DT,MemorySSAUpdater * MSSAU)14234ba319b5SDimitry Andric deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
14244ba319b5SDimitry Andric ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1425*b5893f02SDimitry Andric DominatorTree &DT, MemorySSAUpdater *MSSAU) {
14264ba319b5SDimitry Andric // Find all the dead clones, and remove them from their successors.
14274ba319b5SDimitry Andric SmallVector<BasicBlock *, 16> DeadBlocks;
14284ba319b5SDimitry Andric for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
14294ba319b5SDimitry Andric for (auto &VMap : VMaps)
14304ba319b5SDimitry Andric if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
14314ba319b5SDimitry Andric if (!DT.isReachableFromEntry(ClonedBB)) {
14324ba319b5SDimitry Andric for (BasicBlock *SuccBB : successors(ClonedBB))
14334ba319b5SDimitry Andric SuccBB->removePredecessor(ClonedBB);
14344ba319b5SDimitry Andric DeadBlocks.push_back(ClonedBB);
14354ba319b5SDimitry Andric }
14364ba319b5SDimitry Andric
1437*b5893f02SDimitry Andric // Remove all MemorySSA in the dead blocks
1438*b5893f02SDimitry Andric if (MSSAU) {
1439*b5893f02SDimitry Andric SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
1440*b5893f02SDimitry Andric DeadBlocks.end());
1441*b5893f02SDimitry Andric MSSAU->removeBlocks(DeadBlockSet);
1442*b5893f02SDimitry Andric }
1443*b5893f02SDimitry Andric
14444ba319b5SDimitry Andric // Drop any remaining references to break cycles.
14454ba319b5SDimitry Andric for (BasicBlock *BB : DeadBlocks)
14464ba319b5SDimitry Andric BB->dropAllReferences();
14474ba319b5SDimitry Andric // Erase them from the IR.
14484ba319b5SDimitry Andric for (BasicBlock *BB : DeadBlocks)
14494ba319b5SDimitry Andric BB->eraseFromParent();
14504ba319b5SDimitry Andric }
14514ba319b5SDimitry Andric
deleteDeadBlocksFromLoop(Loop & L,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)1452*b5893f02SDimitry Andric static void deleteDeadBlocksFromLoop(Loop &L,
14532cab237bSDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks,
1454*b5893f02SDimitry Andric DominatorTree &DT, LoopInfo &LI,
1455*b5893f02SDimitry Andric MemorySSAUpdater *MSSAU) {
1456*b5893f02SDimitry Andric // Find all the dead blocks tied to this loop, and remove them from their
1457*b5893f02SDimitry Andric // successors.
1458*b5893f02SDimitry Andric SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
1459*b5893f02SDimitry Andric
1460*b5893f02SDimitry Andric // Start with loop/exit blocks and get a transitive closure of reachable dead
1461*b5893f02SDimitry Andric // blocks.
1462*b5893f02SDimitry Andric SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1463*b5893f02SDimitry Andric ExitBlocks.end());
1464*b5893f02SDimitry Andric DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1465*b5893f02SDimitry Andric while (!DeathCandidates.empty()) {
1466*b5893f02SDimitry Andric auto *BB = DeathCandidates.pop_back_val();
1467*b5893f02SDimitry Andric if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1468*b5893f02SDimitry Andric for (BasicBlock *SuccBB : successors(BB)) {
14694ba319b5SDimitry Andric SuccBB->removePredecessor(BB);
1470*b5893f02SDimitry Andric DeathCandidates.push_back(SuccBB);
1471*b5893f02SDimitry Andric }
1472*b5893f02SDimitry Andric DeadBlockSet.insert(BB);
1473*b5893f02SDimitry Andric }
14742cab237bSDimitry Andric }
14752cab237bSDimitry Andric
1476*b5893f02SDimitry Andric // Remove all MemorySSA in the dead blocks
1477*b5893f02SDimitry Andric if (MSSAU)
1478*b5893f02SDimitry Andric MSSAU->removeBlocks(DeadBlockSet);
14794ba319b5SDimitry Andric
14802cab237bSDimitry Andric // Filter out the dead blocks from the exit blocks list so that it can be
14812cab237bSDimitry Andric // used in the caller.
14822cab237bSDimitry Andric llvm::erase_if(ExitBlocks,
14834ba319b5SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
14842cab237bSDimitry Andric
14852cab237bSDimitry Andric // Walk from this loop up through its parents removing all of the dead blocks.
14862cab237bSDimitry Andric for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1487*b5893f02SDimitry Andric for (auto *BB : DeadBlockSet)
14882cab237bSDimitry Andric ParentL->getBlocksSet().erase(BB);
14892cab237bSDimitry Andric llvm::erase_if(ParentL->getBlocksVector(),
14904ba319b5SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
14912cab237bSDimitry Andric }
14922cab237bSDimitry Andric
14932cab237bSDimitry Andric // Now delete the dead child loops. This raw delete will clear them
14942cab237bSDimitry Andric // recursively.
14952cab237bSDimitry Andric llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
14964ba319b5SDimitry Andric if (!DeadBlockSet.count(ChildL->getHeader()))
14972cab237bSDimitry Andric return false;
14982cab237bSDimitry Andric
14992cab237bSDimitry Andric assert(llvm::all_of(ChildL->blocks(),
15002cab237bSDimitry Andric [&](BasicBlock *ChildBB) {
15014ba319b5SDimitry Andric return DeadBlockSet.count(ChildBB);
15022cab237bSDimitry Andric }) &&
15032cab237bSDimitry Andric "If the child loop header is dead all blocks in the child loop must "
15042cab237bSDimitry Andric "be dead as well!");
15052cab237bSDimitry Andric LI.destroy(ChildL);
15062cab237bSDimitry Andric return true;
15072cab237bSDimitry Andric });
15082cab237bSDimitry Andric
15094ba319b5SDimitry Andric // Remove the loop mappings for the dead blocks and drop all the references
15104ba319b5SDimitry Andric // from these blocks to others to handle cyclic references as we start
15114ba319b5SDimitry Andric // deleting the blocks themselves.
1512*b5893f02SDimitry Andric for (auto *BB : DeadBlockSet) {
15134ba319b5SDimitry Andric // Check that the dominator tree has already been updated.
15144ba319b5SDimitry Andric assert(!DT.getNode(BB) && "Should already have cleared domtree!");
15152cab237bSDimitry Andric LI.changeLoopFor(BB, nullptr);
15162cab237bSDimitry Andric BB->dropAllReferences();
15172cab237bSDimitry Andric }
15184ba319b5SDimitry Andric
15194ba319b5SDimitry Andric // Actually delete the blocks now that they've been fully unhooked from the
15204ba319b5SDimitry Andric // IR.
1521*b5893f02SDimitry Andric for (auto *BB : DeadBlockSet)
15224ba319b5SDimitry Andric BB->eraseFromParent();
15232cab237bSDimitry Andric }
15242cab237bSDimitry Andric
15252cab237bSDimitry Andric /// Recompute the set of blocks in a loop after unswitching.
15262cab237bSDimitry Andric ///
15272cab237bSDimitry Andric /// This walks from the original headers predecessors to rebuild the loop. We
15282cab237bSDimitry Andric /// take advantage of the fact that new blocks can't have been added, and so we
15292cab237bSDimitry Andric /// filter by the original loop's blocks. This also handles potentially
15302cab237bSDimitry Andric /// unreachable code that we don't want to explore but might be found examining
15312cab237bSDimitry Andric /// the predecessors of the header.
15322cab237bSDimitry Andric ///
15332cab237bSDimitry Andric /// If the original loop is no longer a loop, this will return an empty set. If
15342cab237bSDimitry Andric /// it remains a loop, all the blocks within it will be added to the set
15352cab237bSDimitry Andric /// (including those blocks in inner loops).
recomputeLoopBlockSet(Loop & L,LoopInfo & LI)15362cab237bSDimitry Andric static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
15372cab237bSDimitry Andric LoopInfo &LI) {
15382cab237bSDimitry Andric SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
15392cab237bSDimitry Andric
15402cab237bSDimitry Andric auto *PH = L.getLoopPreheader();
15412cab237bSDimitry Andric auto *Header = L.getHeader();
15422cab237bSDimitry Andric
15432cab237bSDimitry Andric // A worklist to use while walking backwards from the header.
15442cab237bSDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
15452cab237bSDimitry Andric
15462cab237bSDimitry Andric // First walk the predecessors of the header to find the backedges. This will
15472cab237bSDimitry Andric // form the basis of our walk.
15482cab237bSDimitry Andric for (auto *Pred : predecessors(Header)) {
15492cab237bSDimitry Andric // Skip the preheader.
15502cab237bSDimitry Andric if (Pred == PH)
15512cab237bSDimitry Andric continue;
15522cab237bSDimitry Andric
15532cab237bSDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor
15542cab237bSDimitry Andric // is the preheader.
15552cab237bSDimitry Andric assert(L.contains(Pred) && "Found a predecessor of the loop header other "
15562cab237bSDimitry Andric "than the preheader that is not part of the "
15572cab237bSDimitry Andric "loop!");
15582cab237bSDimitry Andric
15592cab237bSDimitry Andric // Insert this block into the loop set and on the first visit and, if it
15602cab237bSDimitry Andric // isn't the header we're currently walking, put it into the worklist to
15612cab237bSDimitry Andric // recurse through.
15622cab237bSDimitry Andric if (LoopBlockSet.insert(Pred).second && Pred != Header)
15632cab237bSDimitry Andric Worklist.push_back(Pred);
15642cab237bSDimitry Andric }
15652cab237bSDimitry Andric
15662cab237bSDimitry Andric // If no backedges were found, we're done.
15672cab237bSDimitry Andric if (LoopBlockSet.empty())
15682cab237bSDimitry Andric return LoopBlockSet;
15692cab237bSDimitry Andric
15702cab237bSDimitry Andric // We found backedges, recurse through them to identify the loop blocks.
15712cab237bSDimitry Andric while (!Worklist.empty()) {
15722cab237bSDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
15732cab237bSDimitry Andric assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
15742cab237bSDimitry Andric
15754ba319b5SDimitry Andric // No need to walk past the header.
15764ba319b5SDimitry Andric if (BB == Header)
15774ba319b5SDimitry Andric continue;
15784ba319b5SDimitry Andric
15792cab237bSDimitry Andric // Because we know the inner loop structure remains valid we can use the
15802cab237bSDimitry Andric // loop structure to jump immediately across the entire nested loop.
15812cab237bSDimitry Andric // Further, because it is in loop simplified form, we can directly jump
15822cab237bSDimitry Andric // to its preheader afterward.
15832cab237bSDimitry Andric if (Loop *InnerL = LI.getLoopFor(BB))
15842cab237bSDimitry Andric if (InnerL != &L) {
15852cab237bSDimitry Andric assert(L.contains(InnerL) &&
15862cab237bSDimitry Andric "Should not reach a loop *outside* this loop!");
15872cab237bSDimitry Andric // The preheader is the only possible predecessor of the loop so
15882cab237bSDimitry Andric // insert it into the set and check whether it was already handled.
15892cab237bSDimitry Andric auto *InnerPH = InnerL->getLoopPreheader();
15902cab237bSDimitry Andric assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
15912cab237bSDimitry Andric "but not contain the inner loop "
15922cab237bSDimitry Andric "preheader!");
15932cab237bSDimitry Andric if (!LoopBlockSet.insert(InnerPH).second)
15942cab237bSDimitry Andric // The only way to reach the preheader is through the loop body
15952cab237bSDimitry Andric // itself so if it has been visited the loop is already handled.
15962cab237bSDimitry Andric continue;
15972cab237bSDimitry Andric
15982cab237bSDimitry Andric // Insert all of the blocks (other than those already present) into
15994ba319b5SDimitry Andric // the loop set. We expect at least the block that led us to find the
16004ba319b5SDimitry Andric // inner loop to be in the block set, but we may also have other loop
16014ba319b5SDimitry Andric // blocks if they were already enqueued as predecessors of some other
16024ba319b5SDimitry Andric // outer loop block.
16032cab237bSDimitry Andric for (auto *InnerBB : InnerL->blocks()) {
16042cab237bSDimitry Andric if (InnerBB == BB) {
16052cab237bSDimitry Andric assert(LoopBlockSet.count(InnerBB) &&
16062cab237bSDimitry Andric "Block should already be in the set!");
16072cab237bSDimitry Andric continue;
16082cab237bSDimitry Andric }
16092cab237bSDimitry Andric
16104ba319b5SDimitry Andric LoopBlockSet.insert(InnerBB);
16112cab237bSDimitry Andric }
16122cab237bSDimitry Andric
16132cab237bSDimitry Andric // Add the preheader to the worklist so we will continue past the
16142cab237bSDimitry Andric // loop body.
16152cab237bSDimitry Andric Worklist.push_back(InnerPH);
16162cab237bSDimitry Andric continue;
16172cab237bSDimitry Andric }
16182cab237bSDimitry Andric
16192cab237bSDimitry Andric // Insert any predecessors that were in the original loop into the new
16202cab237bSDimitry Andric // set, and if the insert is successful, add them to the worklist.
16212cab237bSDimitry Andric for (auto *Pred : predecessors(BB))
16222cab237bSDimitry Andric if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
16232cab237bSDimitry Andric Worklist.push_back(Pred);
16242cab237bSDimitry Andric }
16252cab237bSDimitry Andric
16264ba319b5SDimitry Andric assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
16274ba319b5SDimitry Andric
16282cab237bSDimitry Andric // We've found all the blocks participating in the loop, return our completed
16292cab237bSDimitry Andric // set.
16302cab237bSDimitry Andric return LoopBlockSet;
16312cab237bSDimitry Andric }
16322cab237bSDimitry Andric
16332cab237bSDimitry Andric /// Rebuild a loop after unswitching removes some subset of blocks and edges.
16342cab237bSDimitry Andric ///
16352cab237bSDimitry Andric /// The removal may have removed some child loops entirely but cannot have
16362cab237bSDimitry Andric /// disturbed any remaining child loops. However, they may need to be hoisted
16372cab237bSDimitry Andric /// to the parent loop (or to be top-level loops). The original loop may be
16382cab237bSDimitry Andric /// completely removed.
16392cab237bSDimitry Andric ///
16402cab237bSDimitry Andric /// The sibling loops resulting from this update are returned. If the original
16412cab237bSDimitry Andric /// loop remains a valid loop, it will be the first entry in this list with all
16422cab237bSDimitry Andric /// of the newly sibling loops following it.
16432cab237bSDimitry Andric ///
16442cab237bSDimitry Andric /// Returns true if the loop remains a loop after unswitching, and false if it
16452cab237bSDimitry Andric /// is no longer a loop after unswitching (and should not continue to be
16462cab237bSDimitry Andric /// referenced).
rebuildLoopAfterUnswitch(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,LoopInfo & LI,SmallVectorImpl<Loop * > & HoistedLoops)16472cab237bSDimitry Andric static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
16482cab237bSDimitry Andric LoopInfo &LI,
16492cab237bSDimitry Andric SmallVectorImpl<Loop *> &HoistedLoops) {
16502cab237bSDimitry Andric auto *PH = L.getLoopPreheader();
16512cab237bSDimitry Andric
16522cab237bSDimitry Andric // Compute the actual parent loop from the exit blocks. Because we may have
16532cab237bSDimitry Andric // pruned some exits the loop may be different from the original parent.
16542cab237bSDimitry Andric Loop *ParentL = nullptr;
16552cab237bSDimitry Andric SmallVector<Loop *, 4> ExitLoops;
16562cab237bSDimitry Andric SmallVector<BasicBlock *, 4> ExitsInLoops;
16572cab237bSDimitry Andric ExitsInLoops.reserve(ExitBlocks.size());
16582cab237bSDimitry Andric for (auto *ExitBB : ExitBlocks)
16592cab237bSDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
16602cab237bSDimitry Andric ExitLoops.push_back(ExitL);
16612cab237bSDimitry Andric ExitsInLoops.push_back(ExitBB);
16622cab237bSDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
16632cab237bSDimitry Andric ParentL = ExitL;
16642cab237bSDimitry Andric }
16652cab237bSDimitry Andric
16662cab237bSDimitry Andric // Recompute the blocks participating in this loop. This may be empty if it
16672cab237bSDimitry Andric // is no longer a loop.
16682cab237bSDimitry Andric auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
16692cab237bSDimitry Andric
16702cab237bSDimitry Andric // If we still have a loop, we need to re-set the loop's parent as the exit
16712cab237bSDimitry Andric // block set changing may have moved it within the loop nest. Note that this
16722cab237bSDimitry Andric // can only happen when this loop has a parent as it can only hoist the loop
16732cab237bSDimitry Andric // *up* the nest.
16742cab237bSDimitry Andric if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
16752cab237bSDimitry Andric // Remove this loop's (original) blocks from all of the intervening loops.
16762cab237bSDimitry Andric for (Loop *IL = L.getParentLoop(); IL != ParentL;
16772cab237bSDimitry Andric IL = IL->getParentLoop()) {
16782cab237bSDimitry Andric IL->getBlocksSet().erase(PH);
16792cab237bSDimitry Andric for (auto *BB : L.blocks())
16802cab237bSDimitry Andric IL->getBlocksSet().erase(BB);
16812cab237bSDimitry Andric llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
16822cab237bSDimitry Andric return BB == PH || L.contains(BB);
16832cab237bSDimitry Andric });
16842cab237bSDimitry Andric }
16852cab237bSDimitry Andric
16862cab237bSDimitry Andric LI.changeLoopFor(PH, ParentL);
16872cab237bSDimitry Andric L.getParentLoop()->removeChildLoop(&L);
16882cab237bSDimitry Andric if (ParentL)
16892cab237bSDimitry Andric ParentL->addChildLoop(&L);
16902cab237bSDimitry Andric else
16912cab237bSDimitry Andric LI.addTopLevelLoop(&L);
16922cab237bSDimitry Andric }
16932cab237bSDimitry Andric
16942cab237bSDimitry Andric // Now we update all the blocks which are no longer within the loop.
16952cab237bSDimitry Andric auto &Blocks = L.getBlocksVector();
16962cab237bSDimitry Andric auto BlocksSplitI =
16972cab237bSDimitry Andric LoopBlockSet.empty()
16982cab237bSDimitry Andric ? Blocks.begin()
16992cab237bSDimitry Andric : std::stable_partition(
17002cab237bSDimitry Andric Blocks.begin(), Blocks.end(),
17012cab237bSDimitry Andric [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
17022cab237bSDimitry Andric
17032cab237bSDimitry Andric // Before we erase the list of unlooped blocks, build a set of them.
17042cab237bSDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
17052cab237bSDimitry Andric if (LoopBlockSet.empty())
17062cab237bSDimitry Andric UnloopedBlocks.insert(PH);
17072cab237bSDimitry Andric
17082cab237bSDimitry Andric // Now erase these blocks from the loop.
17092cab237bSDimitry Andric for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
17102cab237bSDimitry Andric L.getBlocksSet().erase(BB);
17112cab237bSDimitry Andric Blocks.erase(BlocksSplitI, Blocks.end());
17122cab237bSDimitry Andric
17132cab237bSDimitry Andric // Sort the exits in ascending loop depth, we'll work backwards across these
17142cab237bSDimitry Andric // to process them inside out.
17152cab237bSDimitry Andric std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
17162cab237bSDimitry Andric [&](BasicBlock *LHS, BasicBlock *RHS) {
17172cab237bSDimitry Andric return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
17182cab237bSDimitry Andric });
17192cab237bSDimitry Andric
17202cab237bSDimitry Andric // We'll build up a set for each exit loop.
17212cab237bSDimitry Andric SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
17222cab237bSDimitry Andric Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
17232cab237bSDimitry Andric
17242cab237bSDimitry Andric auto RemoveUnloopedBlocksFromLoop =
17252cab237bSDimitry Andric [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
17262cab237bSDimitry Andric for (auto *BB : UnloopedBlocks)
17272cab237bSDimitry Andric L.getBlocksSet().erase(BB);
17282cab237bSDimitry Andric llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
17292cab237bSDimitry Andric return UnloopedBlocks.count(BB);
17302cab237bSDimitry Andric });
17312cab237bSDimitry Andric };
17322cab237bSDimitry Andric
17332cab237bSDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
17342cab237bSDimitry Andric while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
17352cab237bSDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!");
17362cab237bSDimitry Andric assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
17372cab237bSDimitry Andric
17382cab237bSDimitry Andric // Grab the next exit block, in decreasing loop depth order.
17392cab237bSDimitry Andric BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
17402cab237bSDimitry Andric Loop &ExitL = *LI.getLoopFor(ExitBB);
17412cab237bSDimitry Andric assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
17422cab237bSDimitry Andric
17432cab237bSDimitry Andric // Erase all of the unlooped blocks from the loops between the previous
17442cab237bSDimitry Andric // exit loop and this exit loop. This works because the ExitInLoops list is
17452cab237bSDimitry Andric // sorted in increasing order of loop depth and thus we visit loops in
17462cab237bSDimitry Andric // decreasing order of loop depth.
17472cab237bSDimitry Andric for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
17482cab237bSDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
17492cab237bSDimitry Andric
17502cab237bSDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable
17512cab237bSDimitry Andric // and in the unlooped set to this exit block's loop.
17522cab237bSDimitry Andric Worklist.push_back(ExitBB);
17532cab237bSDimitry Andric do {
17542cab237bSDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
17552cab237bSDimitry Andric // We can stop recursing at the cloned preheader (if we get there).
17562cab237bSDimitry Andric if (BB == PH)
17572cab237bSDimitry Andric continue;
17582cab237bSDimitry Andric
17592cab237bSDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) {
17602cab237bSDimitry Andric // If this pred has already been moved to our set or is part of some
17612cab237bSDimitry Andric // (inner) loop, no update needed.
17622cab237bSDimitry Andric if (!UnloopedBlocks.erase(PredBB)) {
17632cab237bSDimitry Andric assert((NewExitLoopBlocks.count(PredBB) ||
17642cab237bSDimitry Andric ExitL.contains(LI.getLoopFor(PredBB))) &&
17652cab237bSDimitry Andric "Predecessor not in a nested loop (or already visited)!");
17662cab237bSDimitry Andric continue;
17672cab237bSDimitry Andric }
17682cab237bSDimitry Andric
17692cab237bSDimitry Andric // We just insert into the loop set here. We'll add these blocks to the
17702cab237bSDimitry Andric // exit loop after we build up the set in a deterministic order rather
17712cab237bSDimitry Andric // than the predecessor-influenced visit order.
17722cab237bSDimitry Andric bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
17732cab237bSDimitry Andric (void)Inserted;
17742cab237bSDimitry Andric assert(Inserted && "Should only visit an unlooped block once!");
17752cab237bSDimitry Andric
17762cab237bSDimitry Andric // And recurse through to its predecessors.
17772cab237bSDimitry Andric Worklist.push_back(PredBB);
17782cab237bSDimitry Andric }
17792cab237bSDimitry Andric } while (!Worklist.empty());
17802cab237bSDimitry Andric
17812cab237bSDimitry Andric // If blocks in this exit loop were directly part of the original loop (as
17822cab237bSDimitry Andric // opposed to a child loop) update the map to point to this exit loop. This
17832cab237bSDimitry Andric // just updates a map and so the fact that the order is unstable is fine.
17842cab237bSDimitry Andric for (auto *BB : NewExitLoopBlocks)
17852cab237bSDimitry Andric if (Loop *BBL = LI.getLoopFor(BB))
17862cab237bSDimitry Andric if (BBL == &L || !L.contains(BBL))
17872cab237bSDimitry Andric LI.changeLoopFor(BB, &ExitL);
17882cab237bSDimitry Andric
17892cab237bSDimitry Andric // We will remove the remaining unlooped blocks from this loop in the next
17902cab237bSDimitry Andric // iteration or below.
17912cab237bSDimitry Andric NewExitLoopBlocks.clear();
17922cab237bSDimitry Andric }
17932cab237bSDimitry Andric
17942cab237bSDimitry Andric // Any remaining unlooped blocks are no longer part of any loop unless they
17952cab237bSDimitry Andric // are part of some child loop.
17962cab237bSDimitry Andric for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
17972cab237bSDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
17982cab237bSDimitry Andric for (auto *BB : UnloopedBlocks)
17992cab237bSDimitry Andric if (Loop *BBL = LI.getLoopFor(BB))
18002cab237bSDimitry Andric if (BBL == &L || !L.contains(BBL))
18012cab237bSDimitry Andric LI.changeLoopFor(BB, nullptr);
18022cab237bSDimitry Andric
18032cab237bSDimitry Andric // Sink all the child loops whose headers are no longer in the loop set to
18042cab237bSDimitry Andric // the parent (or to be top level loops). We reach into the loop and directly
18052cab237bSDimitry Andric // update its subloop vector to make this batch update efficient.
18062cab237bSDimitry Andric auto &SubLoops = L.getSubLoopsVector();
18072cab237bSDimitry Andric auto SubLoopsSplitI =
18082cab237bSDimitry Andric LoopBlockSet.empty()
18092cab237bSDimitry Andric ? SubLoops.begin()
18102cab237bSDimitry Andric : std::stable_partition(
18112cab237bSDimitry Andric SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
18122cab237bSDimitry Andric return LoopBlockSet.count(SubL->getHeader());
18132cab237bSDimitry Andric });
18142cab237bSDimitry Andric for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
18152cab237bSDimitry Andric HoistedLoops.push_back(HoistedL);
18162cab237bSDimitry Andric HoistedL->setParentLoop(nullptr);
18172cab237bSDimitry Andric
18182cab237bSDimitry Andric // To compute the new parent of this hoisted loop we look at where we
18192cab237bSDimitry Andric // placed the preheader above. We can't lookup the header itself because we
18202cab237bSDimitry Andric // retained the mapping from the header to the hoisted loop. But the
18212cab237bSDimitry Andric // preheader and header should have the exact same new parent computed
18222cab237bSDimitry Andric // based on the set of exit blocks from the original loop as the preheader
18232cab237bSDimitry Andric // is a predecessor of the header and so reached in the reverse walk. And
18242cab237bSDimitry Andric // because the loops were all in simplified form the preheader of the
18252cab237bSDimitry Andric // hoisted loop can't be part of some *other* loop.
18262cab237bSDimitry Andric if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
18272cab237bSDimitry Andric NewParentL->addChildLoop(HoistedL);
18282cab237bSDimitry Andric else
18292cab237bSDimitry Andric LI.addTopLevelLoop(HoistedL);
18302cab237bSDimitry Andric }
18312cab237bSDimitry Andric SubLoops.erase(SubLoopsSplitI, SubLoops.end());
18322cab237bSDimitry Andric
18332cab237bSDimitry Andric // Actually delete the loop if nothing remained within it.
18342cab237bSDimitry Andric if (Blocks.empty()) {
18352cab237bSDimitry Andric assert(SubLoops.empty() &&
18362cab237bSDimitry Andric "Failed to remove all subloops from the original loop!");
18372cab237bSDimitry Andric if (Loop *ParentL = L.getParentLoop())
18382cab237bSDimitry Andric ParentL->removeChildLoop(llvm::find(*ParentL, &L));
18392cab237bSDimitry Andric else
18402cab237bSDimitry Andric LI.removeLoop(llvm::find(LI, &L));
18412cab237bSDimitry Andric LI.destroy(&L);
18422cab237bSDimitry Andric return false;
18432cab237bSDimitry Andric }
18442cab237bSDimitry Andric
18452cab237bSDimitry Andric return true;
18462cab237bSDimitry Andric }
18472cab237bSDimitry Andric
18482cab237bSDimitry Andric /// Helper to visit a dominator subtree, invoking a callable on each node.
18492cab237bSDimitry Andric ///
18502cab237bSDimitry Andric /// Returning false at any point will stop walking past that node of the tree.
18512cab237bSDimitry Andric template <typename CallableT>
visitDomSubTree(DominatorTree & DT,BasicBlock * BB,CallableT Callable)18522cab237bSDimitry Andric void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
18532cab237bSDimitry Andric SmallVector<DomTreeNode *, 4> DomWorklist;
18542cab237bSDimitry Andric DomWorklist.push_back(DT[BB]);
18552cab237bSDimitry Andric #ifndef NDEBUG
18562cab237bSDimitry Andric SmallPtrSet<DomTreeNode *, 4> Visited;
18572cab237bSDimitry Andric Visited.insert(DT[BB]);
18582cab237bSDimitry Andric #endif
18592cab237bSDimitry Andric do {
18602cab237bSDimitry Andric DomTreeNode *N = DomWorklist.pop_back_val();
18612cab237bSDimitry Andric
18622cab237bSDimitry Andric // Visit this node.
18632cab237bSDimitry Andric if (!Callable(N->getBlock()))
18642cab237bSDimitry Andric continue;
18652cab237bSDimitry Andric
18662cab237bSDimitry Andric // Accumulate the child nodes.
18672cab237bSDimitry Andric for (DomTreeNode *ChildN : *N) {
18682cab237bSDimitry Andric assert(Visited.insert(ChildN).second &&
18692cab237bSDimitry Andric "Cannot visit a node twice when walking a tree!");
18702cab237bSDimitry Andric DomWorklist.push_back(ChildN);
18712cab237bSDimitry Andric }
18722cab237bSDimitry Andric } while (!DomWorklist.empty());
18732cab237bSDimitry Andric }
18742cab237bSDimitry Andric
unswitchNontrivialInvariants(Loop & L,Instruction & TI,ArrayRef<Value * > Invariants,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)1875*b5893f02SDimitry Andric static void unswitchNontrivialInvariants(
1876*b5893f02SDimitry Andric Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
1877*b5893f02SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI,
1878*b5893f02SDimitry Andric AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
1879*b5893f02SDimitry Andric ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
18804ba319b5SDimitry Andric auto *ParentBB = TI.getParent();
18814ba319b5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI);
18824ba319b5SDimitry Andric SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
18832cab237bSDimitry Andric
18844ba319b5SDimitry Andric // We can only unswitch switches, conditional branches with an invariant
18854ba319b5SDimitry Andric // condition, or combining invariant conditions with an instruction.
18864ba319b5SDimitry Andric assert((SI || BI->isConditional()) &&
18874ba319b5SDimitry Andric "Can only unswitch switches and conditional branch!");
18884ba319b5SDimitry Andric bool FullUnswitch = SI || BI->getCondition() == Invariants[0];
18894ba319b5SDimitry Andric if (FullUnswitch)
18904ba319b5SDimitry Andric assert(Invariants.size() == 1 &&
18914ba319b5SDimitry Andric "Cannot have other invariants with full unswitching!");
18924ba319b5SDimitry Andric else
18934ba319b5SDimitry Andric assert(isa<Instruction>(BI->getCondition()) &&
18944ba319b5SDimitry Andric "Partial unswitching requires an instruction as the condition!");
18952cab237bSDimitry Andric
1896*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
1897*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
1898*b5893f02SDimitry Andric
18994ba319b5SDimitry Andric // Constant and BBs tracking the cloned and continuing successor. When we are
19004ba319b5SDimitry Andric // unswitching the entire condition, this can just be trivially chosen to
19014ba319b5SDimitry Andric // unswitch towards `true`. However, when we are unswitching a set of
19024ba319b5SDimitry Andric // invariants combined with `and` or `or`, the combining operation determines
19034ba319b5SDimitry Andric // the best direction to unswitch: we want to unswitch the direction that will
19044ba319b5SDimitry Andric // collapse the branch.
19054ba319b5SDimitry Andric bool Direction = true;
19064ba319b5SDimitry Andric int ClonedSucc = 0;
19074ba319b5SDimitry Andric if (!FullUnswitch) {
19084ba319b5SDimitry Andric if (cast<Instruction>(BI->getCondition())->getOpcode() != Instruction::Or) {
19094ba319b5SDimitry Andric assert(cast<Instruction>(BI->getCondition())->getOpcode() ==
19104ba319b5SDimitry Andric Instruction::And &&
19114ba319b5SDimitry Andric "Only `or` and `and` instructions can combine invariants being "
19124ba319b5SDimitry Andric "unswitched.");
19134ba319b5SDimitry Andric Direction = false;
19144ba319b5SDimitry Andric ClonedSucc = 1;
19154ba319b5SDimitry Andric }
19164ba319b5SDimitry Andric }
19174ba319b5SDimitry Andric
19184ba319b5SDimitry Andric BasicBlock *RetainedSuccBB =
19194ba319b5SDimitry Andric BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
19204ba319b5SDimitry Andric SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
19214ba319b5SDimitry Andric if (BI)
19224ba319b5SDimitry Andric UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
19234ba319b5SDimitry Andric else
19244ba319b5SDimitry Andric for (auto Case : SI->cases())
19254ba319b5SDimitry Andric if (Case.getCaseSuccessor() != RetainedSuccBB)
19264ba319b5SDimitry Andric UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
19274ba319b5SDimitry Andric
19284ba319b5SDimitry Andric assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
19294ba319b5SDimitry Andric "Should not unswitch the same successor we are retaining!");
19302cab237bSDimitry Andric
19312cab237bSDimitry Andric // The branch should be in this exact loop. Any inner loop's invariant branch
19322cab237bSDimitry Andric // should be handled by unswitching that inner loop. The caller of this
19332cab237bSDimitry Andric // routine should filter out any candidates that remain (but were skipped for
19342cab237bSDimitry Andric // whatever reason).
19352cab237bSDimitry Andric assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
19362cab237bSDimitry Andric
19372cab237bSDimitry Andric // Compute the parent loop now before we start hacking on things.
19382cab237bSDimitry Andric Loop *ParentL = L.getParentLoop();
1939*b5893f02SDimitry Andric // Get blocks in RPO order for MSSA update, before changing the CFG.
1940*b5893f02SDimitry Andric LoopBlocksRPO LBRPO(&L);
1941*b5893f02SDimitry Andric if (MSSAU)
1942*b5893f02SDimitry Andric LBRPO.perform(&LI);
19432cab237bSDimitry Andric
19442cab237bSDimitry Andric // Compute the outer-most loop containing one of our exit blocks. This is the
19452cab237bSDimitry Andric // furthest up our loopnest which can be mutated, which we will use below to
19462cab237bSDimitry Andric // update things.
19472cab237bSDimitry Andric Loop *OuterExitL = &L;
19482cab237bSDimitry Andric for (auto *ExitBB : ExitBlocks) {
19492cab237bSDimitry Andric Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
19502cab237bSDimitry Andric if (!NewOuterExitL) {
19512cab237bSDimitry Andric // We exited the entire nest with this block, so we're done.
19522cab237bSDimitry Andric OuterExitL = nullptr;
19532cab237bSDimitry Andric break;
19542cab237bSDimitry Andric }
19552cab237bSDimitry Andric if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
19562cab237bSDimitry Andric OuterExitL = NewOuterExitL;
19572cab237bSDimitry Andric }
19582cab237bSDimitry Andric
19594ba319b5SDimitry Andric // At this point, we're definitely going to unswitch something so invalidate
19604ba319b5SDimitry Andric // any cached information in ScalarEvolution for the outer most loop
19614ba319b5SDimitry Andric // containing an exit block and all nested loops.
19624ba319b5SDimitry Andric if (SE) {
19634ba319b5SDimitry Andric if (OuterExitL)
19644ba319b5SDimitry Andric SE->forgetLoop(OuterExitL);
19654ba319b5SDimitry Andric else
19664ba319b5SDimitry Andric SE->forgetTopmostLoop(&L);
19672cab237bSDimitry Andric }
19684ba319b5SDimitry Andric
19694ba319b5SDimitry Andric // If the edge from this terminator to a successor dominates that successor,
19704ba319b5SDimitry Andric // store a map from each block in its dominator subtree to it. This lets us
19714ba319b5SDimitry Andric // tell when cloning for a particular successor if a block is dominated by
19724ba319b5SDimitry Andric // some *other* successor with a single data structure. We use this to
19734ba319b5SDimitry Andric // significantly reduce cloning.
19744ba319b5SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
19754ba319b5SDimitry Andric for (auto *SuccBB : llvm::concat<BasicBlock *const>(
19764ba319b5SDimitry Andric makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
19774ba319b5SDimitry Andric if (SuccBB->getUniquePredecessor() ||
19784ba319b5SDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
19794ba319b5SDimitry Andric return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
19804ba319b5SDimitry Andric }))
19814ba319b5SDimitry Andric visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
19824ba319b5SDimitry Andric DominatingSucc[BB] = SuccBB;
19834ba319b5SDimitry Andric return true;
19842cab237bSDimitry Andric });
19852cab237bSDimitry Andric
19862cab237bSDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
19872cab237bSDimitry Andric // the conditional branch. We will change the preheader to have a conditional
19882cab237bSDimitry Andric // branch on LoopCond. The original preheader will become the split point
19892cab237bSDimitry Andric // between the unswitched versions, and we will have a new preheader for the
19902cab237bSDimitry Andric // original loop.
19912cab237bSDimitry Andric BasicBlock *SplitBB = L.getLoopPreheader();
1992*b5893f02SDimitry Andric BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
19932cab237bSDimitry Andric
19944ba319b5SDimitry Andric // Keep track of the dominator tree updates needed.
19954ba319b5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
19962cab237bSDimitry Andric
19974ba319b5SDimitry Andric // Clone the loop for each unswitched successor.
19984ba319b5SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
19994ba319b5SDimitry Andric VMaps.reserve(UnswitchedSuccBBs.size());
20004ba319b5SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
20014ba319b5SDimitry Andric for (auto *SuccBB : UnswitchedSuccBBs) {
20024ba319b5SDimitry Andric VMaps.emplace_back(new ValueToValueMapTy());
20034ba319b5SDimitry Andric ClonedPHs[SuccBB] = buildClonedLoopBlocks(
20044ba319b5SDimitry Andric L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2005*b5893f02SDimitry Andric DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
20064ba319b5SDimitry Andric }
20074ba319b5SDimitry Andric
20084ba319b5SDimitry Andric // The stitching of the branched code back together depends on whether we're
20094ba319b5SDimitry Andric // doing full unswitching or not with the exception that we always want to
20104ba319b5SDimitry Andric // nuke the initial terminator placed in the split block.
20114ba319b5SDimitry Andric SplitBB->getTerminator()->eraseFromParent();
20124ba319b5SDimitry Andric if (FullUnswitch) {
2013*b5893f02SDimitry Andric // Splice the terminator from the original loop and rewrite its
2014*b5893f02SDimitry Andric // successors.
2015*b5893f02SDimitry Andric SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2016*b5893f02SDimitry Andric
2017*b5893f02SDimitry Andric // Keep a clone of the terminator for MSSA updates.
2018*b5893f02SDimitry Andric Instruction *NewTI = TI.clone();
2019*b5893f02SDimitry Andric ParentBB->getInstList().push_back(NewTI);
2020*b5893f02SDimitry Andric
2021*b5893f02SDimitry Andric // First wire up the moved terminator to the preheaders.
2022*b5893f02SDimitry Andric if (BI) {
2023*b5893f02SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2024*b5893f02SDimitry Andric BI->setSuccessor(ClonedSucc, ClonedPH);
2025*b5893f02SDimitry Andric BI->setSuccessor(1 - ClonedSucc, LoopPH);
2026*b5893f02SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2027*b5893f02SDimitry Andric } else {
2028*b5893f02SDimitry Andric assert(SI && "Must either be a branch or switch!");
2029*b5893f02SDimitry Andric
2030*b5893f02SDimitry Andric // Walk the cases and directly update their successors.
2031*b5893f02SDimitry Andric assert(SI->getDefaultDest() == RetainedSuccBB &&
2032*b5893f02SDimitry Andric "Not retaining default successor!");
2033*b5893f02SDimitry Andric SI->setDefaultDest(LoopPH);
2034*b5893f02SDimitry Andric for (auto &Case : SI->cases())
2035*b5893f02SDimitry Andric if (Case.getCaseSuccessor() == RetainedSuccBB)
2036*b5893f02SDimitry Andric Case.setSuccessor(LoopPH);
2037*b5893f02SDimitry Andric else
2038*b5893f02SDimitry Andric Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2039*b5893f02SDimitry Andric
2040*b5893f02SDimitry Andric // We need to use the set to populate domtree updates as even when there
2041*b5893f02SDimitry Andric // are multiple cases pointing at the same successor we only want to
2042*b5893f02SDimitry Andric // remove and insert one edge in the domtree.
2043*b5893f02SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2044*b5893f02SDimitry Andric DTUpdates.push_back(
2045*b5893f02SDimitry Andric {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2046*b5893f02SDimitry Andric }
2047*b5893f02SDimitry Andric
2048*b5893f02SDimitry Andric if (MSSAU) {
2049*b5893f02SDimitry Andric DT.applyUpdates(DTUpdates);
2050*b5893f02SDimitry Andric DTUpdates.clear();
2051*b5893f02SDimitry Andric
2052*b5893f02SDimitry Andric // Remove all but one edge to the retained block and all unswitched
2053*b5893f02SDimitry Andric // blocks. This is to avoid having duplicate entries in the cloned Phis,
2054*b5893f02SDimitry Andric // when we know we only keep a single edge for each case.
2055*b5893f02SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2056*b5893f02SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2057*b5893f02SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2058*b5893f02SDimitry Andric
2059*b5893f02SDimitry Andric for (auto &VMap : VMaps)
2060*b5893f02SDimitry Andric MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2061*b5893f02SDimitry Andric /*IgnoreIncomingWithNoClones=*/true);
2062*b5893f02SDimitry Andric MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2063*b5893f02SDimitry Andric
2064*b5893f02SDimitry Andric // Remove all edges to unswitched blocks.
2065*b5893f02SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2066*b5893f02SDimitry Andric MSSAU->removeEdge(ParentBB, SuccBB);
2067*b5893f02SDimitry Andric }
2068*b5893f02SDimitry Andric
2069*b5893f02SDimitry Andric // Now unhook the successor relationship as we'll be replacing
20704ba319b5SDimitry Andric // the terminator with a direct branch. This is much simpler for branches
20714ba319b5SDimitry Andric // than switches so we handle those first.
20724ba319b5SDimitry Andric if (BI) {
20734ba319b5SDimitry Andric // Remove the parent as a predecessor of the unswitched successor.
20744ba319b5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
20754ba319b5SDimitry Andric "Only one possible unswitched block for a branch!");
20764ba319b5SDimitry Andric BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
20774ba319b5SDimitry Andric UnswitchedSuccBB->removePredecessor(ParentBB,
20784ba319b5SDimitry Andric /*DontDeleteUselessPHIs*/ true);
20794ba319b5SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
20804ba319b5SDimitry Andric } else {
20814ba319b5SDimitry Andric // Note that we actually want to remove the parent block as a predecessor
20824ba319b5SDimitry Andric // of *every* case successor. The case successor is either unswitched,
20834ba319b5SDimitry Andric // completely eliminating an edge from the parent to that successor, or it
20844ba319b5SDimitry Andric // is a duplicate edge to the retained successor as the retained successor
20854ba319b5SDimitry Andric // is always the default successor and as we'll replace this with a direct
20864ba319b5SDimitry Andric // branch we no longer need the duplicate entries in the PHI nodes.
2087*b5893f02SDimitry Andric SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2088*b5893f02SDimitry Andric assert(NewSI->getDefaultDest() == RetainedSuccBB &&
20894ba319b5SDimitry Andric "Not retaining default successor!");
2090*b5893f02SDimitry Andric for (auto &Case : NewSI->cases())
20914ba319b5SDimitry Andric Case.getCaseSuccessor()->removePredecessor(
20924ba319b5SDimitry Andric ParentBB,
20934ba319b5SDimitry Andric /*DontDeleteUselessPHIs*/ true);
20944ba319b5SDimitry Andric
20954ba319b5SDimitry Andric // We need to use the set to populate domtree updates as even when there
20964ba319b5SDimitry Andric // are multiple cases pointing at the same successor we only want to
20974ba319b5SDimitry Andric // remove and insert one edge in the domtree.
20984ba319b5SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
20994ba319b5SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
21004ba319b5SDimitry Andric }
21014ba319b5SDimitry Andric
2102*b5893f02SDimitry Andric // After MSSAU update, remove the cloned terminator instruction NewTI.
2103*b5893f02SDimitry Andric ParentBB->getTerminator()->eraseFromParent();
21044ba319b5SDimitry Andric
21054ba319b5SDimitry Andric // Create a new unconditional branch to the continuing block (as opposed to
21064ba319b5SDimitry Andric // the one cloned).
21074ba319b5SDimitry Andric BranchInst::Create(RetainedSuccBB, ParentBB);
21084ba319b5SDimitry Andric } else {
21094ba319b5SDimitry Andric assert(BI && "Only branches have partial unswitching.");
21104ba319b5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
21114ba319b5SDimitry Andric "Only one possible unswitched block for a branch!");
21124ba319b5SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
21134ba319b5SDimitry Andric // When doing a partial unswitch, we have to do a bit more work to build up
21144ba319b5SDimitry Andric // the branch in the split block.
21154ba319b5SDimitry Andric buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
21164ba319b5SDimitry Andric *ClonedPH, *LoopPH);
21174ba319b5SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
21184ba319b5SDimitry Andric }
21194ba319b5SDimitry Andric
21204ba319b5SDimitry Andric // Apply the updates accumulated above to get an up-to-date dominator tree.
21214ba319b5SDimitry Andric DT.applyUpdates(DTUpdates);
2122*b5893f02SDimitry Andric if (!FullUnswitch && MSSAU) {
2123*b5893f02SDimitry Andric // Update MSSA for partial unswitch, after DT update.
2124*b5893f02SDimitry Andric SmallVector<CFGUpdate, 1> Updates;
2125*b5893f02SDimitry Andric Updates.push_back(
2126*b5893f02SDimitry Andric {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second});
2127*b5893f02SDimitry Andric MSSAU->applyInsertUpdates(Updates, DT);
2128*b5893f02SDimitry Andric }
21294ba319b5SDimitry Andric
21304ba319b5SDimitry Andric // Now that we have an accurate dominator tree, first delete the dead cloned
21314ba319b5SDimitry Andric // blocks so that we can accurately build any cloned loops. It is important to
21324ba319b5SDimitry Andric // not delete the blocks from the original loop yet because we still want to
21334ba319b5SDimitry Andric // reference the original loop to understand the cloned loop's structure.
2134*b5893f02SDimitry Andric deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
21352cab237bSDimitry Andric
21362cab237bSDimitry Andric // Build the cloned loop structure itself. This may be substantially
21372cab237bSDimitry Andric // different from the original structure due to the simplified CFG. This also
21382cab237bSDimitry Andric // handles inserting all the cloned blocks into the correct loops.
21392cab237bSDimitry Andric SmallVector<Loop *, 4> NonChildClonedLoops;
21404ba319b5SDimitry Andric for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
21414ba319b5SDimitry Andric buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
21422cab237bSDimitry Andric
21434ba319b5SDimitry Andric // Now that our cloned loops have been built, we can update the original loop.
21444ba319b5SDimitry Andric // First we delete the dead blocks from it and then we rebuild the loop
21454ba319b5SDimitry Andric // structure taking these deletions into account.
2146*b5893f02SDimitry Andric deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2147*b5893f02SDimitry Andric
2148*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
2149*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2150*b5893f02SDimitry Andric
21512cab237bSDimitry Andric SmallVector<Loop *, 4> HoistedLoops;
21522cab237bSDimitry Andric bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
21532cab237bSDimitry Andric
2154*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
2155*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2156*b5893f02SDimitry Andric
21574ba319b5SDimitry Andric // This transformation has a high risk of corrupting the dominator tree, and
21584ba319b5SDimitry Andric // the below steps to rebuild loop structures will result in hard to debug
21594ba319b5SDimitry Andric // errors in that case so verify that the dominator tree is sane first.
21604ba319b5SDimitry Andric // FIXME: Remove this when the bugs stop showing up and rely on existing
21614ba319b5SDimitry Andric // verification steps.
21624ba319b5SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast));
21634ba319b5SDimitry Andric
21644ba319b5SDimitry Andric if (BI) {
21654ba319b5SDimitry Andric // If we unswitched a branch which collapses the condition to a known
21664ba319b5SDimitry Andric // constant we want to replace all the uses of the invariants within both
21674ba319b5SDimitry Andric // the original and cloned blocks. We do this here so that we can use the
21684ba319b5SDimitry Andric // now updated dominator tree to identify which side the users are on.
21694ba319b5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
21704ba319b5SDimitry Andric "Only one possible unswitched block for a branch!");
21714ba319b5SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2172*b5893f02SDimitry Andric
2173*b5893f02SDimitry Andric // When considering multiple partially-unswitched invariants
2174*b5893f02SDimitry Andric // we cant just go replace them with constants in both branches.
2175*b5893f02SDimitry Andric //
2176*b5893f02SDimitry Andric // For 'AND' we infer that true branch ("continue") means true
2177*b5893f02SDimitry Andric // for each invariant operand.
2178*b5893f02SDimitry Andric // For 'OR' we can infer that false branch ("continue") means false
2179*b5893f02SDimitry Andric // for each invariant operand.
2180*b5893f02SDimitry Andric // So it happens that for multiple-partial case we dont replace
2181*b5893f02SDimitry Andric // in the unswitched branch.
2182*b5893f02SDimitry Andric bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1);
2183*b5893f02SDimitry Andric
21844ba319b5SDimitry Andric ConstantInt *UnswitchedReplacement =
21854ba319b5SDimitry Andric Direction ? ConstantInt::getTrue(BI->getContext())
21864ba319b5SDimitry Andric : ConstantInt::getFalse(BI->getContext());
21874ba319b5SDimitry Andric ConstantInt *ContinueReplacement =
21884ba319b5SDimitry Andric Direction ? ConstantInt::getFalse(BI->getContext())
21894ba319b5SDimitry Andric : ConstantInt::getTrue(BI->getContext());
21904ba319b5SDimitry Andric for (Value *Invariant : Invariants)
21914ba319b5SDimitry Andric for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
21924ba319b5SDimitry Andric UI != UE;) {
21934ba319b5SDimitry Andric // Grab the use and walk past it so we can clobber it in the use list.
21944ba319b5SDimitry Andric Use *U = &*UI++;
21954ba319b5SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U->getUser());
21964ba319b5SDimitry Andric if (!UserI)
21974ba319b5SDimitry Andric continue;
21984ba319b5SDimitry Andric
21994ba319b5SDimitry Andric // Replace it with the 'continue' side if in the main loop body, and the
22004ba319b5SDimitry Andric // unswitched if in the cloned blocks.
22014ba319b5SDimitry Andric if (DT.dominates(LoopPH, UserI->getParent()))
22024ba319b5SDimitry Andric U->set(ContinueReplacement);
2203*b5893f02SDimitry Andric else if (ReplaceUnswitched &&
2204*b5893f02SDimitry Andric DT.dominates(ClonedPH, UserI->getParent()))
22054ba319b5SDimitry Andric U->set(UnswitchedReplacement);
22064ba319b5SDimitry Andric }
22074ba319b5SDimitry Andric }
22082cab237bSDimitry Andric
22092cab237bSDimitry Andric // We can change which blocks are exit blocks of all the cloned sibling
22102cab237bSDimitry Andric // loops, the current loop, and any parent loops which shared exit blocks
22112cab237bSDimitry Andric // with the current loop. As a consequence, we need to re-form LCSSA for
22122cab237bSDimitry Andric // them. But we shouldn't need to re-form LCSSA for any child loops.
22132cab237bSDimitry Andric // FIXME: This could be made more efficient by tracking which exit blocks are
22142cab237bSDimitry Andric // new, and focusing on them, but that isn't likely to be necessary.
22152cab237bSDimitry Andric //
22162cab237bSDimitry Andric // In order to reasonably rebuild LCSSA we need to walk inside-out across the
22172cab237bSDimitry Andric // loop nest and update every loop that could have had its exits changed. We
22182cab237bSDimitry Andric // also need to cover any intervening loops. We add all of these loops to
22192cab237bSDimitry Andric // a list and sort them by loop depth to achieve this without updating
22202cab237bSDimitry Andric // unnecessary loops.
22214ba319b5SDimitry Andric auto UpdateLoop = [&](Loop &UpdateL) {
22222cab237bSDimitry Andric #ifndef NDEBUG
22234ba319b5SDimitry Andric UpdateL.verifyLoop();
22244ba319b5SDimitry Andric for (Loop *ChildL : UpdateL) {
22254ba319b5SDimitry Andric ChildL->verifyLoop();
22262cab237bSDimitry Andric assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
22272cab237bSDimitry Andric "Perturbed a child loop's LCSSA form!");
22284ba319b5SDimitry Andric }
22292cab237bSDimitry Andric #endif
22302cab237bSDimitry Andric // First build LCSSA for this loop so that we can preserve it when
22312cab237bSDimitry Andric // forming dedicated exits. We don't want to perturb some other loop's
22322cab237bSDimitry Andric // LCSSA while doing that CFG edit.
22334ba319b5SDimitry Andric formLCSSA(UpdateL, DT, &LI, nullptr);
22342cab237bSDimitry Andric
22352cab237bSDimitry Andric // For loops reached by this loop's original exit blocks we may
22362cab237bSDimitry Andric // introduced new, non-dedicated exits. At least try to re-form dedicated
22372cab237bSDimitry Andric // exits for these loops. This may fail if they couldn't have dedicated
22382cab237bSDimitry Andric // exits to start with.
22394ba319b5SDimitry Andric formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
22404ba319b5SDimitry Andric };
22414ba319b5SDimitry Andric
22424ba319b5SDimitry Andric // For non-child cloned loops and hoisted loops, we just need to update LCSSA
22434ba319b5SDimitry Andric // and we can do it in any order as they don't nest relative to each other.
22444ba319b5SDimitry Andric //
22454ba319b5SDimitry Andric // Also check if any of the loops we have updated have become top-level loops
22464ba319b5SDimitry Andric // as that will necessitate widening the outer loop scope.
22474ba319b5SDimitry Andric for (Loop *UpdatedL :
22484ba319b5SDimitry Andric llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
22494ba319b5SDimitry Andric UpdateLoop(*UpdatedL);
22504ba319b5SDimitry Andric if (!UpdatedL->getParentLoop())
22514ba319b5SDimitry Andric OuterExitL = nullptr;
22522cab237bSDimitry Andric }
22534ba319b5SDimitry Andric if (IsStillLoop) {
22544ba319b5SDimitry Andric UpdateLoop(L);
22554ba319b5SDimitry Andric if (!L.getParentLoop())
22564ba319b5SDimitry Andric OuterExitL = nullptr;
22572cab237bSDimitry Andric }
22582cab237bSDimitry Andric
22594ba319b5SDimitry Andric // If the original loop had exit blocks, walk up through the outer most loop
22604ba319b5SDimitry Andric // of those exit blocks to update LCSSA and form updated dedicated exits.
22614ba319b5SDimitry Andric if (OuterExitL != &L)
22624ba319b5SDimitry Andric for (Loop *OuterL = ParentL; OuterL != OuterExitL;
22634ba319b5SDimitry Andric OuterL = OuterL->getParentLoop())
22644ba319b5SDimitry Andric UpdateLoop(*OuterL);
22654ba319b5SDimitry Andric
22662cab237bSDimitry Andric #ifndef NDEBUG
22672cab237bSDimitry Andric // Verify the entire loop structure to catch any incorrect updates before we
22682cab237bSDimitry Andric // progress in the pass pipeline.
22692cab237bSDimitry Andric LI.verify(DT);
22702cab237bSDimitry Andric #endif
22712cab237bSDimitry Andric
22722cab237bSDimitry Andric // Now that we've unswitched something, make callbacks to report the changes.
22732cab237bSDimitry Andric // For that we need to merge together the updated loops and the cloned loops
22742cab237bSDimitry Andric // and check whether the original loop survived.
22752cab237bSDimitry Andric SmallVector<Loop *, 4> SibLoops;
22762cab237bSDimitry Andric for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
22772cab237bSDimitry Andric if (UpdatedL->getParentLoop() == ParentL)
22782cab237bSDimitry Andric SibLoops.push_back(UpdatedL);
22794ba319b5SDimitry Andric UnswitchCB(IsStillLoop, SibLoops);
22802cab237bSDimitry Andric
2281*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
2282*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2283*b5893f02SDimitry Andric
2284*b5893f02SDimitry Andric if (BI)
22852cab237bSDimitry Andric ++NumBranches;
2286*b5893f02SDimitry Andric else
2287*b5893f02SDimitry Andric ++NumSwitches;
22882cab237bSDimitry Andric }
22892cab237bSDimitry Andric
22902cab237bSDimitry Andric /// Recursively compute the cost of a dominator subtree based on the per-block
22912cab237bSDimitry Andric /// cost map provided.
22922cab237bSDimitry Andric ///
22932cab237bSDimitry Andric /// The recursive computation is memozied into the provided DT-indexed cost map
22942cab237bSDimitry Andric /// to allow querying it for most nodes in the domtree without it becoming
22952cab237bSDimitry Andric /// quadratic.
22962cab237bSDimitry Andric static int
computeDomSubtreeCost(DomTreeNode & N,const SmallDenseMap<BasicBlock *,int,4> & BBCostMap,SmallDenseMap<DomTreeNode *,int,4> & DTCostMap)22972cab237bSDimitry Andric computeDomSubtreeCost(DomTreeNode &N,
22982cab237bSDimitry Andric const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
22992cab237bSDimitry Andric SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
23002cab237bSDimitry Andric // Don't accumulate cost (or recurse through) blocks not in our block cost
23012cab237bSDimitry Andric // map and thus not part of the duplication cost being considered.
23022cab237bSDimitry Andric auto BBCostIt = BBCostMap.find(N.getBlock());
23032cab237bSDimitry Andric if (BBCostIt == BBCostMap.end())
23042cab237bSDimitry Andric return 0;
23052cab237bSDimitry Andric
23062cab237bSDimitry Andric // Lookup this node to see if we already computed its cost.
23072cab237bSDimitry Andric auto DTCostIt = DTCostMap.find(&N);
23082cab237bSDimitry Andric if (DTCostIt != DTCostMap.end())
23092cab237bSDimitry Andric return DTCostIt->second;
23102cab237bSDimitry Andric
23112cab237bSDimitry Andric // If not, we have to compute it. We can't use insert above and update
23122cab237bSDimitry Andric // because computing the cost may insert more things into the map.
23132cab237bSDimitry Andric int Cost = std::accumulate(
23142cab237bSDimitry Andric N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
23152cab237bSDimitry Andric return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
23162cab237bSDimitry Andric });
23172cab237bSDimitry Andric bool Inserted = DTCostMap.insert({&N, Cost}).second;
23182cab237bSDimitry Andric (void)Inserted;
23192cab237bSDimitry Andric assert(Inserted && "Should not insert a node while visiting children!");
23202cab237bSDimitry Andric return Cost;
23212cab237bSDimitry Andric }
23222cab237bSDimitry Andric
2323*b5893f02SDimitry Andric /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2324*b5893f02SDimitry Andric /// making the following replacement:
2325*b5893f02SDimitry Andric ///
2326*b5893f02SDimitry Andric /// --code before guard--
2327*b5893f02SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2328*b5893f02SDimitry Andric /// --code after guard--
2329*b5893f02SDimitry Andric ///
2330*b5893f02SDimitry Andric /// into
2331*b5893f02SDimitry Andric ///
2332*b5893f02SDimitry Andric /// --code before guard--
2333*b5893f02SDimitry Andric /// br i1 %cond, label %guarded, label %deopt
2334*b5893f02SDimitry Andric ///
2335*b5893f02SDimitry Andric /// guarded:
2336*b5893f02SDimitry Andric /// --code after guard--
2337*b5893f02SDimitry Andric ///
2338*b5893f02SDimitry Andric /// deopt:
2339*b5893f02SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2340*b5893f02SDimitry Andric /// unreachable
2341*b5893f02SDimitry Andric ///
2342*b5893f02SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in
2343*b5893f02SDimitry Andric /// valid state after this transform.
2344*b5893f02SDimitry Andric static BranchInst *
turnGuardIntoBranch(IntrinsicInst * GI,Loop & L,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)2345*b5893f02SDimitry Andric turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2346*b5893f02SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks,
2347*b5893f02SDimitry Andric DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2348*b5893f02SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2349*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2350*b5893f02SDimitry Andric BasicBlock *CheckBB = GI->getParent();
2351*b5893f02SDimitry Andric
2352*b5893f02SDimitry Andric if (MSSAU && VerifyMemorySSA)
2353*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2354*b5893f02SDimitry Andric
2355*b5893f02SDimitry Andric // Remove all CheckBB's successors from DomTree. A block can be seen among
2356*b5893f02SDimitry Andric // successors more than once, but for DomTree it should be added only once.
2357*b5893f02SDimitry Andric SmallPtrSet<BasicBlock *, 4> Successors;
2358*b5893f02SDimitry Andric for (auto *Succ : successors(CheckBB))
2359*b5893f02SDimitry Andric if (Successors.insert(Succ).second)
2360*b5893f02SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2361*b5893f02SDimitry Andric
2362*b5893f02SDimitry Andric Instruction *DeoptBlockTerm =
2363*b5893f02SDimitry Andric SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2364*b5893f02SDimitry Andric BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2365*b5893f02SDimitry Andric // SplitBlockAndInsertIfThen inserts control flow that branches to
2366*b5893f02SDimitry Andric // DeoptBlockTerm if the condition is true. We want the opposite.
2367*b5893f02SDimitry Andric CheckBI->swapSuccessors();
2368*b5893f02SDimitry Andric
2369*b5893f02SDimitry Andric BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2370*b5893f02SDimitry Andric GuardedBlock->setName("guarded");
2371*b5893f02SDimitry Andric CheckBI->getSuccessor(1)->setName("deopt");
2372*b5893f02SDimitry Andric BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2373*b5893f02SDimitry Andric
2374*b5893f02SDimitry Andric // We now have a new exit block.
2375*b5893f02SDimitry Andric ExitBlocks.push_back(CheckBI->getSuccessor(1));
2376*b5893f02SDimitry Andric
2377*b5893f02SDimitry Andric if (MSSAU)
2378*b5893f02SDimitry Andric MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2379*b5893f02SDimitry Andric
2380*b5893f02SDimitry Andric GI->moveBefore(DeoptBlockTerm);
2381*b5893f02SDimitry Andric GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2382*b5893f02SDimitry Andric
2383*b5893f02SDimitry Andric // Add new successors of CheckBB into DomTree.
2384*b5893f02SDimitry Andric for (auto *Succ : successors(CheckBB))
2385*b5893f02SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2386*b5893f02SDimitry Andric
2387*b5893f02SDimitry Andric // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2388*b5893f02SDimitry Andric // successors.
2389*b5893f02SDimitry Andric for (auto *Succ : Successors)
2390*b5893f02SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2391*b5893f02SDimitry Andric
2392*b5893f02SDimitry Andric // Make proper changes to DT.
2393*b5893f02SDimitry Andric DT.applyUpdates(DTUpdates);
2394*b5893f02SDimitry Andric // Inform LI of a new loop block.
2395*b5893f02SDimitry Andric L.addBasicBlockToLoop(GuardedBlock, LI);
2396*b5893f02SDimitry Andric
2397*b5893f02SDimitry Andric if (MSSAU) {
2398*b5893f02SDimitry Andric MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2399*b5893f02SDimitry Andric MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
2400*b5893f02SDimitry Andric if (VerifyMemorySSA)
2401*b5893f02SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2402*b5893f02SDimitry Andric }
2403*b5893f02SDimitry Andric
2404*b5893f02SDimitry Andric ++NumGuards;
2405*b5893f02SDimitry Andric return CheckBI;
2406*b5893f02SDimitry Andric }
2407*b5893f02SDimitry Andric
2408*b5893f02SDimitry Andric /// Cost multiplier is a way to limit potentially exponential behavior
2409*b5893f02SDimitry Andric /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2410*b5893f02SDimitry Andric /// candidates available. Also accounting for the number of "sibling" loops with
2411*b5893f02SDimitry Andric /// the idea to account for previous unswitches that already happened on this
2412*b5893f02SDimitry Andric /// cluster of loops. There was an attempt to keep this formula simple,
2413*b5893f02SDimitry Andric /// just enough to limit the worst case behavior. Even if it is not that simple
2414*b5893f02SDimitry Andric /// now it is still not an attempt to provide a detailed heuristic size
2415*b5893f02SDimitry Andric /// prediction.
2416*b5893f02SDimitry Andric ///
2417*b5893f02SDimitry Andric /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2418*b5893f02SDimitry Andric /// unswitch candidates, making adequate predictions instead of wild guesses.
2419*b5893f02SDimitry Andric /// That requires knowing not just the number of "remaining" candidates but
2420*b5893f02SDimitry Andric /// also costs of unswitching for each of these candidates.
calculateUnswitchCostMultiplier(Instruction & TI,Loop & L,LoopInfo & LI,DominatorTree & DT,ArrayRef<std::pair<Instruction *,TinyPtrVector<Value * >>> UnswitchCandidates)2421*b5893f02SDimitry Andric static int calculateUnswitchCostMultiplier(
2422*b5893f02SDimitry Andric Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2423*b5893f02SDimitry Andric ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2424*b5893f02SDimitry Andric UnswitchCandidates) {
2425*b5893f02SDimitry Andric
2426*b5893f02SDimitry Andric // Guards and other exiting conditions do not contribute to exponential
2427*b5893f02SDimitry Andric // explosion as soon as they dominate the latch (otherwise there might be
2428*b5893f02SDimitry Andric // another path to the latch remaining that does not allow to eliminate the
2429*b5893f02SDimitry Andric // loop copy on unswitch).
2430*b5893f02SDimitry Andric BasicBlock *Latch = L.getLoopLatch();
2431*b5893f02SDimitry Andric BasicBlock *CondBlock = TI.getParent();
2432*b5893f02SDimitry Andric if (DT.dominates(CondBlock, Latch) &&
2433*b5893f02SDimitry Andric (isGuard(&TI) ||
2434*b5893f02SDimitry Andric llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2435*b5893f02SDimitry Andric return L.contains(SuccBB);
2436*b5893f02SDimitry Andric }) <= 1)) {
2437*b5893f02SDimitry Andric NumCostMultiplierSkipped++;
2438*b5893f02SDimitry Andric return 1;
2439*b5893f02SDimitry Andric }
2440*b5893f02SDimitry Andric
2441*b5893f02SDimitry Andric auto *ParentL = L.getParentLoop();
2442*b5893f02SDimitry Andric int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2443*b5893f02SDimitry Andric : std::distance(LI.begin(), LI.end()));
2444*b5893f02SDimitry Andric // Count amount of clones that all the candidates might cause during
2445*b5893f02SDimitry Andric // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2446*b5893f02SDimitry Andric int UnswitchedClones = 0;
2447*b5893f02SDimitry Andric for (auto Candidate : UnswitchCandidates) {
2448*b5893f02SDimitry Andric Instruction *CI = Candidate.first;
2449*b5893f02SDimitry Andric BasicBlock *CondBlock = CI->getParent();
2450*b5893f02SDimitry Andric bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2451*b5893f02SDimitry Andric if (isGuard(CI)) {
2452*b5893f02SDimitry Andric if (!SkipExitingSuccessors)
2453*b5893f02SDimitry Andric UnswitchedClones++;
2454*b5893f02SDimitry Andric continue;
2455*b5893f02SDimitry Andric }
2456*b5893f02SDimitry Andric int NonExitingSuccessors = llvm::count_if(
2457*b5893f02SDimitry Andric successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2458*b5893f02SDimitry Andric return !SkipExitingSuccessors || L.contains(SuccBB);
2459*b5893f02SDimitry Andric });
2460*b5893f02SDimitry Andric UnswitchedClones += Log2_32(NonExitingSuccessors);
2461*b5893f02SDimitry Andric }
2462*b5893f02SDimitry Andric
2463*b5893f02SDimitry Andric // Ignore up to the "unscaled candidates" number of unswitch candidates
2464*b5893f02SDimitry Andric // when calculating the power-of-two scaling of the cost. The main idea
2465*b5893f02SDimitry Andric // with this control is to allow a small number of unswitches to happen
2466*b5893f02SDimitry Andric // and rely more on siblings multiplier (see below) when the number
2467*b5893f02SDimitry Andric // of candidates is small.
2468*b5893f02SDimitry Andric unsigned ClonesPower =
2469*b5893f02SDimitry Andric std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2470*b5893f02SDimitry Andric
2471*b5893f02SDimitry Andric // Allowing top-level loops to spread a bit more than nested ones.
2472*b5893f02SDimitry Andric int SiblingsMultiplier =
2473*b5893f02SDimitry Andric std::max((ParentL ? SiblingsCount
2474*b5893f02SDimitry Andric : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2475*b5893f02SDimitry Andric 1);
2476*b5893f02SDimitry Andric // Compute the cost multiplier in a way that won't overflow by saturating
2477*b5893f02SDimitry Andric // at an upper bound.
2478*b5893f02SDimitry Andric int CostMultiplier;
2479*b5893f02SDimitry Andric if (ClonesPower > Log2_32(UnswitchThreshold) ||
2480*b5893f02SDimitry Andric SiblingsMultiplier > UnswitchThreshold)
2481*b5893f02SDimitry Andric CostMultiplier = UnswitchThreshold;
2482*b5893f02SDimitry Andric else
2483*b5893f02SDimitry Andric CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2484*b5893f02SDimitry Andric (int)UnswitchThreshold);
2485*b5893f02SDimitry Andric
2486*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2487*b5893f02SDimitry Andric << " (siblings " << SiblingsMultiplier << " * clones "
2488*b5893f02SDimitry Andric << (1 << ClonesPower) << ")"
2489*b5893f02SDimitry Andric << " for unswitch candidate: " << TI << "\n");
2490*b5893f02SDimitry Andric return CostMultiplier;
2491*b5893f02SDimitry Andric }
2492*b5893f02SDimitry Andric
24932cab237bSDimitry Andric static bool
unswitchBestCondition(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,TargetTransformInfo & TTI,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)24944ba319b5SDimitry Andric unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
24954ba319b5SDimitry Andric AssumptionCache &AC, TargetTransformInfo &TTI,
24964ba319b5SDimitry Andric function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2497*b5893f02SDimitry Andric ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
24984ba319b5SDimitry Andric // Collect all invariant conditions within this loop (as opposed to an inner
24994ba319b5SDimitry Andric // loop which would be handled when visiting that inner loop).
2500*b5893f02SDimitry Andric SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
25014ba319b5SDimitry Andric UnswitchCandidates;
2502*b5893f02SDimitry Andric
2503*b5893f02SDimitry Andric // Whether or not we should also collect guards in the loop.
2504*b5893f02SDimitry Andric bool CollectGuards = false;
2505*b5893f02SDimitry Andric if (UnswitchGuards) {
2506*b5893f02SDimitry Andric auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2507*b5893f02SDimitry Andric Intrinsic::getName(Intrinsic::experimental_guard));
2508*b5893f02SDimitry Andric if (GuardDecl && !GuardDecl->use_empty())
2509*b5893f02SDimitry Andric CollectGuards = true;
2510*b5893f02SDimitry Andric }
2511*b5893f02SDimitry Andric
25124ba319b5SDimitry Andric for (auto *BB : L.blocks()) {
25134ba319b5SDimitry Andric if (LI.getLoopFor(BB) != &L)
25144ba319b5SDimitry Andric continue;
2515f37b6182SDimitry Andric
2516*b5893f02SDimitry Andric if (CollectGuards)
2517*b5893f02SDimitry Andric for (auto &I : *BB)
2518*b5893f02SDimitry Andric if (isGuard(&I)) {
2519*b5893f02SDimitry Andric auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2520*b5893f02SDimitry Andric // TODO: Support AND, OR conditions and partial unswitching.
2521*b5893f02SDimitry Andric if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2522*b5893f02SDimitry Andric UnswitchCandidates.push_back({&I, {Cond}});
2523*b5893f02SDimitry Andric }
2524*b5893f02SDimitry Andric
25254ba319b5SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
25264ba319b5SDimitry Andric // We can only consider fully loop-invariant switch conditions as we need
25274ba319b5SDimitry Andric // to completely eliminate the switch after unswitching.
25284ba319b5SDimitry Andric if (!isa<Constant>(SI->getCondition()) &&
25294ba319b5SDimitry Andric L.isLoopInvariant(SI->getCondition()))
25304ba319b5SDimitry Andric UnswitchCandidates.push_back({SI, {SI->getCondition()}});
25314ba319b5SDimitry Andric continue;
25324ba319b5SDimitry Andric }
2533f37b6182SDimitry Andric
25344ba319b5SDimitry Andric auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
25354ba319b5SDimitry Andric if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
25364ba319b5SDimitry Andric BI->getSuccessor(0) == BI->getSuccessor(1))
25374ba319b5SDimitry Andric continue;
2538f37b6182SDimitry Andric
25394ba319b5SDimitry Andric if (L.isLoopInvariant(BI->getCondition())) {
25404ba319b5SDimitry Andric UnswitchCandidates.push_back({BI, {BI->getCondition()}});
25414ba319b5SDimitry Andric continue;
25424ba319b5SDimitry Andric }
25432cab237bSDimitry Andric
25444ba319b5SDimitry Andric Instruction &CondI = *cast<Instruction>(BI->getCondition());
25454ba319b5SDimitry Andric if (CondI.getOpcode() != Instruction::And &&
25464ba319b5SDimitry Andric CondI.getOpcode() != Instruction::Or)
25474ba319b5SDimitry Andric continue;
25484ba319b5SDimitry Andric
25494ba319b5SDimitry Andric TinyPtrVector<Value *> Invariants =
25504ba319b5SDimitry Andric collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
25514ba319b5SDimitry Andric if (Invariants.empty())
25524ba319b5SDimitry Andric continue;
25534ba319b5SDimitry Andric
25544ba319b5SDimitry Andric UnswitchCandidates.push_back({BI, std::move(Invariants)});
25554ba319b5SDimitry Andric }
25562cab237bSDimitry Andric
25572cab237bSDimitry Andric // If we didn't find any candidates, we're done.
25582cab237bSDimitry Andric if (UnswitchCandidates.empty())
25594ba319b5SDimitry Andric return false;
25602cab237bSDimitry Andric
25614ba319b5SDimitry Andric // Check if there are irreducible CFG cycles in this loop. If so, we cannot
25624ba319b5SDimitry Andric // easily unswitch non-trivial edges out of the loop. Doing so might turn the
25634ba319b5SDimitry Andric // irreducible control flow into reducible control flow and introduce new
25644ba319b5SDimitry Andric // loops "out of thin air". If we ever discover important use cases for doing
25654ba319b5SDimitry Andric // this, we can add support to loop unswitch, but it is a lot of complexity
25664ba319b5SDimitry Andric // for what seems little or no real world benefit.
25674ba319b5SDimitry Andric LoopBlocksRPO RPOT(&L);
25684ba319b5SDimitry Andric RPOT.perform(&LI);
25694ba319b5SDimitry Andric if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
25704ba319b5SDimitry Andric return false;
25714ba319b5SDimitry Andric
2572*b5893f02SDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks;
2573*b5893f02SDimitry Andric L.getUniqueExitBlocks(ExitBlocks);
2574*b5893f02SDimitry Andric
2575*b5893f02SDimitry Andric // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2576*b5893f02SDimitry Andric // don't know how to split those exit blocks.
2577*b5893f02SDimitry Andric // FIXME: We should teach SplitBlock to handle this and remove this
2578*b5893f02SDimitry Andric // restriction.
2579*b5893f02SDimitry Andric for (auto *ExitBB : ExitBlocks)
2580*b5893f02SDimitry Andric if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2581*b5893f02SDimitry Andric dbgs() << "Cannot unswitch because of cleanuppad in exit block\n";
2582*b5893f02SDimitry Andric return false;
2583*b5893f02SDimitry Andric }
2584*b5893f02SDimitry Andric
25854ba319b5SDimitry Andric LLVM_DEBUG(
25864ba319b5SDimitry Andric dbgs() << "Considering " << UnswitchCandidates.size()
25872cab237bSDimitry Andric << " non-trivial loop invariant conditions for unswitching.\n");
25882cab237bSDimitry Andric
25892cab237bSDimitry Andric // Given that unswitching these terminators will require duplicating parts of
25902cab237bSDimitry Andric // the loop, so we need to be able to model that cost. Compute the ephemeral
25912cab237bSDimitry Andric // values and set up a data structure to hold per-BB costs. We cache each
25922cab237bSDimitry Andric // block's cost so that we don't recompute this when considering different
25932cab237bSDimitry Andric // subsets of the loop for duplication during unswitching.
25942cab237bSDimitry Andric SmallPtrSet<const Value *, 4> EphValues;
25952cab237bSDimitry Andric CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
25962cab237bSDimitry Andric SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
25972cab237bSDimitry Andric
25982cab237bSDimitry Andric // Compute the cost of each block, as well as the total loop cost. Also, bail
25992cab237bSDimitry Andric // out if we see instructions which are incompatible with loop unswitching
26002cab237bSDimitry Andric // (convergent, noduplicate, or cross-basic-block tokens).
26012cab237bSDimitry Andric // FIXME: We might be able to safely handle some of these in non-duplicated
26022cab237bSDimitry Andric // regions.
26032cab237bSDimitry Andric int LoopCost = 0;
26042cab237bSDimitry Andric for (auto *BB : L.blocks()) {
26052cab237bSDimitry Andric int Cost = 0;
26062cab237bSDimitry Andric for (auto &I : *BB) {
26072cab237bSDimitry Andric if (EphValues.count(&I))
26082cab237bSDimitry Andric continue;
26092cab237bSDimitry Andric
26102cab237bSDimitry Andric if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
26114ba319b5SDimitry Andric return false;
26122cab237bSDimitry Andric if (auto CS = CallSite(&I))
26132cab237bSDimitry Andric if (CS.isConvergent() || CS.cannotDuplicate())
26144ba319b5SDimitry Andric return false;
26152cab237bSDimitry Andric
26162cab237bSDimitry Andric Cost += TTI.getUserCost(&I);
26172cab237bSDimitry Andric }
26182cab237bSDimitry Andric assert(Cost >= 0 && "Must not have negative costs!");
26192cab237bSDimitry Andric LoopCost += Cost;
26202cab237bSDimitry Andric assert(LoopCost >= 0 && "Must not have negative loop costs!");
26212cab237bSDimitry Andric BBCostMap[BB] = Cost;
26222cab237bSDimitry Andric }
26234ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
26242cab237bSDimitry Andric
26252cab237bSDimitry Andric // Now we find the best candidate by searching for the one with the following
26262cab237bSDimitry Andric // properties in order:
26272cab237bSDimitry Andric //
26282cab237bSDimitry Andric // 1) An unswitching cost below the threshold
26292cab237bSDimitry Andric // 2) The smallest number of duplicated unswitch candidates (to avoid
26302cab237bSDimitry Andric // creating redundant subsequent unswitching)
26312cab237bSDimitry Andric // 3) The smallest cost after unswitching.
26322cab237bSDimitry Andric //
26332cab237bSDimitry Andric // We prioritize reducing fanout of unswitch candidates provided the cost
26342cab237bSDimitry Andric // remains below the threshold because this has a multiplicative effect.
26352cab237bSDimitry Andric //
26362cab237bSDimitry Andric // This requires memoizing each dominator subtree to avoid redundant work.
26372cab237bSDimitry Andric //
26382cab237bSDimitry Andric // FIXME: Need to actually do the number of candidates part above.
26392cab237bSDimitry Andric SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
26402cab237bSDimitry Andric // Given a terminator which might be unswitched, computes the non-duplicated
26412cab237bSDimitry Andric // cost for that terminator.
2642*b5893f02SDimitry Andric auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
26434ba319b5SDimitry Andric BasicBlock &BB = *TI.getParent();
26442cab237bSDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited;
26452cab237bSDimitry Andric
26462cab237bSDimitry Andric int Cost = LoopCost;
26472cab237bSDimitry Andric for (BasicBlock *SuccBB : successors(&BB)) {
26482cab237bSDimitry Andric // Don't count successors more than once.
26492cab237bSDimitry Andric if (!Visited.insert(SuccBB).second)
26502cab237bSDimitry Andric continue;
26512cab237bSDimitry Andric
26524ba319b5SDimitry Andric // If this is a partial unswitch candidate, then it must be a conditional
26534ba319b5SDimitry Andric // branch with a condition of either `or` or `and`. In that case, one of
26544ba319b5SDimitry Andric // the successors is necessarily duplicated, so don't even try to remove
26554ba319b5SDimitry Andric // its cost.
26564ba319b5SDimitry Andric if (!FullUnswitch) {
26574ba319b5SDimitry Andric auto &BI = cast<BranchInst>(TI);
26584ba319b5SDimitry Andric if (cast<Instruction>(BI.getCondition())->getOpcode() ==
26594ba319b5SDimitry Andric Instruction::And) {
26604ba319b5SDimitry Andric if (SuccBB == BI.getSuccessor(1))
26614ba319b5SDimitry Andric continue;
26624ba319b5SDimitry Andric } else {
26634ba319b5SDimitry Andric assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
26644ba319b5SDimitry Andric Instruction::Or &&
26654ba319b5SDimitry Andric "Only `and` and `or` conditions can result in a partial "
26664ba319b5SDimitry Andric "unswitch!");
26674ba319b5SDimitry Andric if (SuccBB == BI.getSuccessor(0))
26684ba319b5SDimitry Andric continue;
26694ba319b5SDimitry Andric }
26704ba319b5SDimitry Andric }
26714ba319b5SDimitry Andric
26722cab237bSDimitry Andric // This successor's domtree will not need to be duplicated after
26732cab237bSDimitry Andric // unswitching if the edge to the successor dominates it (and thus the
26742cab237bSDimitry Andric // entire tree). This essentially means there is no other path into this
26752cab237bSDimitry Andric // subtree and so it will end up live in only one clone of the loop.
26762cab237bSDimitry Andric if (SuccBB->getUniquePredecessor() ||
26772cab237bSDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
26782cab237bSDimitry Andric return PredBB == &BB || DT.dominates(SuccBB, PredBB);
26792cab237bSDimitry Andric })) {
26802cab237bSDimitry Andric Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
26812cab237bSDimitry Andric assert(Cost >= 0 &&
26822cab237bSDimitry Andric "Non-duplicated cost should never exceed total loop cost!");
26832cab237bSDimitry Andric }
26842cab237bSDimitry Andric }
26852cab237bSDimitry Andric
26862cab237bSDimitry Andric // Now scale the cost by the number of unique successors minus one. We
26872cab237bSDimitry Andric // subtract one because there is already at least one copy of the entire
26882cab237bSDimitry Andric // loop. This is computing the new cost of unswitching a condition.
2689*b5893f02SDimitry Andric // Note that guards always have 2 unique successors that are implicit and
2690*b5893f02SDimitry Andric // will be materialized if we decide to unswitch it.
2691*b5893f02SDimitry Andric int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2692*b5893f02SDimitry Andric assert(SuccessorsCount > 1 &&
26932cab237bSDimitry Andric "Cannot unswitch a condition without multiple distinct successors!");
2694*b5893f02SDimitry Andric return Cost * (SuccessorsCount - 1);
26952cab237bSDimitry Andric };
2696*b5893f02SDimitry Andric Instruction *BestUnswitchTI = nullptr;
26972cab237bSDimitry Andric int BestUnswitchCost;
26984ba319b5SDimitry Andric ArrayRef<Value *> BestUnswitchInvariants;
26994ba319b5SDimitry Andric for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2700*b5893f02SDimitry Andric Instruction &TI = *TerminatorAndInvariants.first;
27014ba319b5SDimitry Andric ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
27024ba319b5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI);
27034ba319b5SDimitry Andric int CandidateCost = ComputeUnswitchedCost(
27044ba319b5SDimitry Andric TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
27054ba319b5SDimitry Andric Invariants[0] == BI->getCondition()));
2706*b5893f02SDimitry Andric // Calculate cost multiplier which is a tool to limit potentially
2707*b5893f02SDimitry Andric // exponential behavior of loop-unswitch.
2708*b5893f02SDimitry Andric if (EnableUnswitchCostMultiplier) {
2709*b5893f02SDimitry Andric int CostMultiplier =
2710*b5893f02SDimitry Andric calculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2711*b5893f02SDimitry Andric assert(
2712*b5893f02SDimitry Andric (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2713*b5893f02SDimitry Andric "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2714*b5893f02SDimitry Andric CandidateCost *= CostMultiplier;
2715*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
2716*b5893f02SDimitry Andric << " (multiplier: " << CostMultiplier << ")"
2717*b5893f02SDimitry Andric << " for unswitch candidate: " << TI << "\n");
2718*b5893f02SDimitry Andric } else {
27194ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
27204ba319b5SDimitry Andric << " for unswitch candidate: " << TI << "\n");
2721*b5893f02SDimitry Andric }
2722*b5893f02SDimitry Andric
27232cab237bSDimitry Andric if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
27244ba319b5SDimitry Andric BestUnswitchTI = &TI;
27252cab237bSDimitry Andric BestUnswitchCost = CandidateCost;
27264ba319b5SDimitry Andric BestUnswitchInvariants = Invariants;
27272cab237bSDimitry Andric }
27282cab237bSDimitry Andric }
27292cab237bSDimitry Andric
27304ba319b5SDimitry Andric if (BestUnswitchCost >= UnswitchThreshold) {
27314ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
27324ba319b5SDimitry Andric << BestUnswitchCost << "\n");
27334ba319b5SDimitry Andric return false;
27342cab237bSDimitry Andric }
2735f37b6182SDimitry Andric
2736*b5893f02SDimitry Andric // If the best candidate is a guard, turn it into a branch.
2737*b5893f02SDimitry Andric if (isGuard(BestUnswitchTI))
2738*b5893f02SDimitry Andric BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2739*b5893f02SDimitry Andric ExitBlocks, DT, LI, MSSAU);
2740*b5893f02SDimitry Andric
2741*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
27424ba319b5SDimitry Andric << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
27434ba319b5SDimitry Andric << "\n");
2744*b5893f02SDimitry Andric unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2745*b5893f02SDimitry Andric ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
2746*b5893f02SDimitry Andric return true;
27474ba319b5SDimitry Andric }
27484ba319b5SDimitry Andric
27494ba319b5SDimitry Andric /// Unswitch control flow predicated on loop invariant conditions.
27504ba319b5SDimitry Andric ///
27514ba319b5SDimitry Andric /// This first hoists all branches or switches which are trivial (IE, do not
27524ba319b5SDimitry Andric /// require duplicating any part of the loop) out of the loop body. It then
27534ba319b5SDimitry Andric /// looks at other loop invariant control flows and tries to unswitch those as
27544ba319b5SDimitry Andric /// well by cloning the loop if the result is small enough.
27554ba319b5SDimitry Andric ///
27564ba319b5SDimitry Andric /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
27574ba319b5SDimitry Andric /// updated based on the unswitch.
2758*b5893f02SDimitry Andric /// The `MSSA` analysis is also updated if valid (i.e. its use is enabled).
27594ba319b5SDimitry Andric ///
27604ba319b5SDimitry Andric /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
27614ba319b5SDimitry Andric /// true, we will attempt to do non-trivial unswitching as well as trivial
27624ba319b5SDimitry Andric /// unswitching.
27634ba319b5SDimitry Andric ///
27644ba319b5SDimitry Andric /// The `UnswitchCB` callback provided will be run after unswitching is
27654ba319b5SDimitry Andric /// complete, with the first parameter set to `true` if the provided loop
27664ba319b5SDimitry Andric /// remains a loop, and a list of new sibling loops created.
27674ba319b5SDimitry Andric ///
27684ba319b5SDimitry Andric /// If `SE` is non-null, we will update that analysis based on the unswitching
27694ba319b5SDimitry Andric /// done.
unswitchLoop(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,TargetTransformInfo & TTI,bool NonTrivial,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)27704ba319b5SDimitry Andric static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
27714ba319b5SDimitry Andric AssumptionCache &AC, TargetTransformInfo &TTI,
27724ba319b5SDimitry Andric bool NonTrivial,
27734ba319b5SDimitry Andric function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2774*b5893f02SDimitry Andric ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
27754ba319b5SDimitry Andric assert(L.isRecursivelyLCSSAForm(DT, LI) &&
27764ba319b5SDimitry Andric "Loops must be in LCSSA form before unswitching.");
27774ba319b5SDimitry Andric bool Changed = false;
27784ba319b5SDimitry Andric
27794ba319b5SDimitry Andric // Must be in loop simplified form: we need a preheader and dedicated exits.
27804ba319b5SDimitry Andric if (!L.isLoopSimplifyForm())
27814ba319b5SDimitry Andric return false;
27824ba319b5SDimitry Andric
27834ba319b5SDimitry Andric // Try trivial unswitch first before loop over other basic blocks in the loop.
2784*b5893f02SDimitry Andric if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
27854ba319b5SDimitry Andric // If we unswitched successfully we will want to clean up the loop before
27864ba319b5SDimitry Andric // processing it further so just mark it as unswitched and return.
27874ba319b5SDimitry Andric UnswitchCB(/*CurrentLoopValid*/ true, {});
27884ba319b5SDimitry Andric return true;
27894ba319b5SDimitry Andric }
27904ba319b5SDimitry Andric
27914ba319b5SDimitry Andric // If we're not doing non-trivial unswitching, we're done. We both accept
27924ba319b5SDimitry Andric // a parameter but also check a local flag that can be used for testing
27934ba319b5SDimitry Andric // a debugging.
27944ba319b5SDimitry Andric if (!NonTrivial && !EnableNonTrivialUnswitch)
27954ba319b5SDimitry Andric return false;
27964ba319b5SDimitry Andric
27974ba319b5SDimitry Andric // For non-trivial unswitching, because it often creates new loops, we rely on
27984ba319b5SDimitry Andric // the pass manager to iterate on the loops rather than trying to immediately
27994ba319b5SDimitry Andric // reach a fixed point. There is no substantial advantage to iterating
28004ba319b5SDimitry Andric // internally, and if any of the new loops are simplified enough to contain
28014ba319b5SDimitry Andric // trivial unswitching we want to prefer those.
28024ba319b5SDimitry Andric
28034ba319b5SDimitry Andric // Try to unswitch the best invariant condition. We prefer this full unswitch to
28044ba319b5SDimitry Andric // a partial unswitch when possible below the threshold.
2805*b5893f02SDimitry Andric if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU))
28064ba319b5SDimitry Andric return true;
28074ba319b5SDimitry Andric
28084ba319b5SDimitry Andric // No other opportunities to unswitch.
2809f37b6182SDimitry Andric return Changed;
2810f37b6182SDimitry Andric }
2811f37b6182SDimitry Andric
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)2812f37b6182SDimitry Andric PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
2813f37b6182SDimitry Andric LoopStandardAnalysisResults &AR,
2814f37b6182SDimitry Andric LPMUpdater &U) {
2815f37b6182SDimitry Andric Function &F = *L.getHeader()->getParent();
2816f37b6182SDimitry Andric (void)F;
2817f37b6182SDimitry Andric
28184ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
28194ba319b5SDimitry Andric << "\n");
2820f37b6182SDimitry Andric
28212cab237bSDimitry Andric // Save the current loop name in a variable so that we can report it even
28222cab237bSDimitry Andric // after it has been deleted.
28232cab237bSDimitry Andric std::string LoopName = L.getName();
28242cab237bSDimitry Andric
28254ba319b5SDimitry Andric auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
28262cab237bSDimitry Andric ArrayRef<Loop *> NewLoops) {
28272cab237bSDimitry Andric // If we did a non-trivial unswitch, we have added new (cloned) loops.
28284ba319b5SDimitry Andric if (!NewLoops.empty())
28292cab237bSDimitry Andric U.addSiblingLoops(NewLoops);
28302cab237bSDimitry Andric
28312cab237bSDimitry Andric // If the current loop remains valid, we should revisit it to catch any
28322cab237bSDimitry Andric // other unswitch opportunities. Otherwise, we need to mark it as deleted.
28332cab237bSDimitry Andric if (CurrentLoopValid)
28342cab237bSDimitry Andric U.revisitCurrentLoop();
28352cab237bSDimitry Andric else
28362cab237bSDimitry Andric U.markLoopAsDeleted(L, LoopName);
28372cab237bSDimitry Andric };
28382cab237bSDimitry Andric
2839*b5893f02SDimitry Andric Optional<MemorySSAUpdater> MSSAU;
2840*b5893f02SDimitry Andric if (AR.MSSA) {
2841*b5893f02SDimitry Andric MSSAU = MemorySSAUpdater(AR.MSSA);
2842*b5893f02SDimitry Andric if (VerifyMemorySSA)
2843*b5893f02SDimitry Andric AR.MSSA->verifyMemorySSA();
2844*b5893f02SDimitry Andric }
28454ba319b5SDimitry Andric if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
2846*b5893f02SDimitry Andric &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
2847f37b6182SDimitry Andric return PreservedAnalyses::all();
2848f37b6182SDimitry Andric
2849*b5893f02SDimitry Andric if (AR.MSSA && VerifyMemorySSA)
2850*b5893f02SDimitry Andric AR.MSSA->verifyMemorySSA();
2851*b5893f02SDimitry Andric
2852f37b6182SDimitry Andric // Historically this pass has had issues with the dominator tree so verify it
2853f37b6182SDimitry Andric // in asserts builds.
28544ba319b5SDimitry Andric assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
2855f37b6182SDimitry Andric return getLoopPassPreservedAnalyses();
2856f37b6182SDimitry Andric }
2857f37b6182SDimitry Andric
2858f37b6182SDimitry Andric namespace {
285960ff8e32SDimitry Andric
2860f37b6182SDimitry Andric class SimpleLoopUnswitchLegacyPass : public LoopPass {
28612cab237bSDimitry Andric bool NonTrivial;
28622cab237bSDimitry Andric
2863f37b6182SDimitry Andric public:
2864f37b6182SDimitry Andric static char ID; // Pass ID, replacement for typeid
286560ff8e32SDimitry Andric
SimpleLoopUnswitchLegacyPass(bool NonTrivial=false)28662cab237bSDimitry Andric explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
28672cab237bSDimitry Andric : LoopPass(ID), NonTrivial(NonTrivial) {
2868f37b6182SDimitry Andric initializeSimpleLoopUnswitchLegacyPassPass(
2869f37b6182SDimitry Andric *PassRegistry::getPassRegistry());
2870f37b6182SDimitry Andric }
2871f37b6182SDimitry Andric
2872f37b6182SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override;
2873f37b6182SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const2874f37b6182SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
2875f37b6182SDimitry Andric AU.addRequired<AssumptionCacheTracker>();
28762cab237bSDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
2877*b5893f02SDimitry Andric if (EnableMSSALoopDependency) {
2878*b5893f02SDimitry Andric AU.addRequired<MemorySSAWrapperPass>();
2879*b5893f02SDimitry Andric AU.addPreserved<MemorySSAWrapperPass>();
2880*b5893f02SDimitry Andric }
2881f37b6182SDimitry Andric getLoopAnalysisUsage(AU);
2882f37b6182SDimitry Andric }
2883f37b6182SDimitry Andric };
288460ff8e32SDimitry Andric
288560ff8e32SDimitry Andric } // end anonymous namespace
2886f37b6182SDimitry Andric
runOnLoop(Loop * L,LPPassManager & LPM)2887f37b6182SDimitry Andric bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
2888f37b6182SDimitry Andric if (skipLoop(L))
2889f37b6182SDimitry Andric return false;
2890f37b6182SDimitry Andric
2891f37b6182SDimitry Andric Function &F = *L->getHeader()->getParent();
2892f37b6182SDimitry Andric
28934ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
28944ba319b5SDimitry Andric << "\n");
2895f37b6182SDimitry Andric
2896f37b6182SDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2897f37b6182SDimitry Andric auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2898f37b6182SDimitry Andric auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
28992cab237bSDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2900*b5893f02SDimitry Andric MemorySSA *MSSA = nullptr;
2901*b5893f02SDimitry Andric Optional<MemorySSAUpdater> MSSAU;
2902*b5893f02SDimitry Andric if (EnableMSSALoopDependency) {
2903*b5893f02SDimitry Andric MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
2904*b5893f02SDimitry Andric MSSAU = MemorySSAUpdater(MSSA);
2905*b5893f02SDimitry Andric }
2906f37b6182SDimitry Andric
29074ba319b5SDimitry Andric auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
29084ba319b5SDimitry Andric auto *SE = SEWP ? &SEWP->getSE() : nullptr;
29094ba319b5SDimitry Andric
29104ba319b5SDimitry Andric auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
29112cab237bSDimitry Andric ArrayRef<Loop *> NewLoops) {
29122cab237bSDimitry Andric // If we did a non-trivial unswitch, we have added new (cloned) loops.
29132cab237bSDimitry Andric for (auto *NewL : NewLoops)
29142cab237bSDimitry Andric LPM.addLoop(*NewL);
29152cab237bSDimitry Andric
29162cab237bSDimitry Andric // If the current loop remains valid, re-add it to the queue. This is
29172cab237bSDimitry Andric // a little wasteful as we'll finish processing the current loop as well,
29182cab237bSDimitry Andric // but it is the best we can do in the old PM.
29192cab237bSDimitry Andric if (CurrentLoopValid)
29202cab237bSDimitry Andric LPM.addLoop(*L);
29212cab237bSDimitry Andric else
29222cab237bSDimitry Andric LPM.markLoopAsDeleted(*L);
29232cab237bSDimitry Andric };
29242cab237bSDimitry Andric
2925*b5893f02SDimitry Andric if (MSSA && VerifyMemorySSA)
2926*b5893f02SDimitry Andric MSSA->verifyMemorySSA();
2927*b5893f02SDimitry Andric
2928*b5893f02SDimitry Andric bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE,
2929*b5893f02SDimitry Andric MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
2930*b5893f02SDimitry Andric
2931*b5893f02SDimitry Andric if (MSSA && VerifyMemorySSA)
2932*b5893f02SDimitry Andric MSSA->verifyMemorySSA();
29332cab237bSDimitry Andric
29342cab237bSDimitry Andric // If anything was unswitched, also clear any cached information about this
29352cab237bSDimitry Andric // loop.
29362cab237bSDimitry Andric LPM.deleteSimpleAnalysisLoop(L);
2937f37b6182SDimitry Andric
2938f37b6182SDimitry Andric // Historically this pass has had issues with the dominator tree so verify it
2939f37b6182SDimitry Andric // in asserts builds.
29404ba319b5SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast));
29414ba319b5SDimitry Andric
2942f37b6182SDimitry Andric return Changed;
2943f37b6182SDimitry Andric }
2944f37b6182SDimitry Andric
2945f37b6182SDimitry Andric char SimpleLoopUnswitchLegacyPass::ID = 0;
2946f37b6182SDimitry Andric INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2947f37b6182SDimitry Andric "Simple unswitch loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)2948f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
29492cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
29502cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2951f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass)
2952*b5893f02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2953f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2954f37b6182SDimitry Andric INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2955f37b6182SDimitry Andric "Simple unswitch loops", false, false)
2956f37b6182SDimitry Andric
29572cab237bSDimitry Andric Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
29582cab237bSDimitry Andric return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2959f37b6182SDimitry Andric }
2960