14ba319b5SDimitry Andric ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2f37b6182SDimitry Andric //
3f37b6182SDimitry Andric //                     The LLVM Compiler Infrastructure
4f37b6182SDimitry Andric //
5f37b6182SDimitry Andric // This file is distributed under the University of Illinois Open Source
6f37b6182SDimitry Andric // License. See LICENSE.TXT for details.
7f37b6182SDimitry Andric //
8f37b6182SDimitry Andric //===----------------------------------------------------------------------===//
9f37b6182SDimitry Andric 
10db17bf38SDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
1160ff8e32SDimitry Andric #include "llvm/ADT/DenseMap.h"
12db17bf38SDimitry Andric #include "llvm/ADT/STLExtras.h"
1360ff8e32SDimitry Andric #include "llvm/ADT/Sequence.h"
1460ff8e32SDimitry Andric #include "llvm/ADT/SetVector.h"
15f37b6182SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
1660ff8e32SDimitry Andric #include "llvm/ADT/SmallVector.h"
17f37b6182SDimitry Andric #include "llvm/ADT/Statistic.h"
1860ff8e32SDimitry Andric #include "llvm/ADT/Twine.h"
19f37b6182SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
204ba319b5SDimitry Andric #include "llvm/Analysis/CFG.h"
212cab237bSDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
22*b5893f02SDimitry Andric #include "llvm/Analysis/GuardUtils.h"
234ba319b5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
2460ff8e32SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
25f37b6182SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
264ba319b5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
27f37b6182SDimitry Andric #include "llvm/Analysis/LoopPass.h"
28*b5893f02SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
29*b5893f02SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
304ba319b5SDimitry Andric #include "llvm/Analysis/Utils/Local.h"
3160ff8e32SDimitry Andric #include "llvm/IR/BasicBlock.h"
3260ff8e32SDimitry Andric #include "llvm/IR/Constant.h"
33f37b6182SDimitry Andric #include "llvm/IR/Constants.h"
34f37b6182SDimitry Andric #include "llvm/IR/Dominators.h"
35f37b6182SDimitry Andric #include "llvm/IR/Function.h"
3660ff8e32SDimitry Andric #include "llvm/IR/InstrTypes.h"
3760ff8e32SDimitry Andric #include "llvm/IR/Instruction.h"
38f37b6182SDimitry Andric #include "llvm/IR/Instructions.h"
392cab237bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
4060ff8e32SDimitry Andric #include "llvm/IR/Use.h"
4160ff8e32SDimitry Andric #include "llvm/IR/Value.h"
4260ff8e32SDimitry Andric #include "llvm/Pass.h"
4360ff8e32SDimitry Andric #include "llvm/Support/Casting.h"
44f37b6182SDimitry Andric #include "llvm/Support/Debug.h"
4560ff8e32SDimitry Andric #include "llvm/Support/ErrorHandling.h"
4660ff8e32SDimitry Andric #include "llvm/Support/GenericDomTree.h"
47f37b6182SDimitry Andric #include "llvm/Support/raw_ostream.h"
482cab237bSDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
49f37b6182SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
502cab237bSDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
51f37b6182SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
522cab237bSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
5360ff8e32SDimitry Andric #include <algorithm>
5460ff8e32SDimitry Andric #include <cassert>
5560ff8e32SDimitry Andric #include <iterator>
562cab237bSDimitry Andric #include <numeric>
5760ff8e32SDimitry Andric #include <utility>
58f37b6182SDimitry Andric 
59f37b6182SDimitry Andric #define DEBUG_TYPE "simple-loop-unswitch"
60f37b6182SDimitry Andric 
61f37b6182SDimitry Andric using namespace llvm;
62f37b6182SDimitry Andric 
63f37b6182SDimitry Andric STATISTIC(NumBranches, "Number of branches unswitched");
64f37b6182SDimitry Andric STATISTIC(NumSwitches, "Number of switches unswitched");
65*b5893f02SDimitry Andric STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
66f37b6182SDimitry Andric STATISTIC(NumTrivial, "Number of unswitches that are trivial");
67*b5893f02SDimitry Andric STATISTIC(
68*b5893f02SDimitry Andric     NumCostMultiplierSkipped,
69*b5893f02SDimitry Andric     "Number of unswitch candidates that had their cost multiplier skipped");
70f37b6182SDimitry Andric 
712cab237bSDimitry Andric static cl::opt<bool> EnableNonTrivialUnswitch(
722cab237bSDimitry Andric     "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
732cab237bSDimitry Andric     cl::desc("Forcibly enables non-trivial loop unswitching rather than "
742cab237bSDimitry Andric              "following the configuration passed into the pass."));
752cab237bSDimitry Andric 
762cab237bSDimitry Andric static cl::opt<int>
772cab237bSDimitry Andric     UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
782cab237bSDimitry Andric                       cl::desc("The cost threshold for unswitching a loop."));
792cab237bSDimitry Andric 
80*b5893f02SDimitry Andric static cl::opt<bool> EnableUnswitchCostMultiplier(
81*b5893f02SDimitry Andric     "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
82*b5893f02SDimitry Andric     cl::desc("Enable unswitch cost multiplier that prohibits exponential "
83*b5893f02SDimitry Andric              "explosion in nontrivial unswitch."));
84*b5893f02SDimitry Andric static cl::opt<int> UnswitchSiblingsToplevelDiv(
85*b5893f02SDimitry Andric     "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
86*b5893f02SDimitry Andric     cl::desc("Toplevel siblings divisor for cost multiplier."));
87*b5893f02SDimitry Andric static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
88*b5893f02SDimitry Andric     "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
89*b5893f02SDimitry Andric     cl::desc("Number of unswitch candidates that are ignored when calculating "
90*b5893f02SDimitry Andric              "cost multiplier."));
91*b5893f02SDimitry Andric static cl::opt<bool> UnswitchGuards(
92*b5893f02SDimitry Andric     "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
93*b5893f02SDimitry Andric     cl::desc("If enabled, simple loop unswitching will also consider "
94*b5893f02SDimitry Andric              "llvm.experimental.guard intrinsics as unswitch candidates."));
95*b5893f02SDimitry Andric 
964ba319b5SDimitry Andric /// Collect all of the loop invariant input values transitively used by the
974ba319b5SDimitry Andric /// homogeneous instruction graph from a given root.
984ba319b5SDimitry Andric ///
994ba319b5SDimitry Andric /// This essentially walks from a root recursively through loop variant operands
1004ba319b5SDimitry Andric /// which have the exact same opcode and finds all inputs which are loop
1014ba319b5SDimitry Andric /// invariant. For some operations these can be re-associated and unswitched out
1024ba319b5SDimitry Andric /// of the loop entirely.
1034ba319b5SDimitry Andric static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop & L,Instruction & Root,LoopInfo & LI)1044ba319b5SDimitry Andric collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
1054ba319b5SDimitry Andric                                          LoopInfo &LI) {
1064ba319b5SDimitry Andric   assert(!L.isLoopInvariant(&Root) &&
1074ba319b5SDimitry Andric          "Only need to walk the graph if root itself is not invariant.");
1084ba319b5SDimitry Andric   TinyPtrVector<Value *> Invariants;
1094ba319b5SDimitry Andric 
1104ba319b5SDimitry Andric   // Build a worklist and recurse through operators collecting invariants.
1114ba319b5SDimitry Andric   SmallVector<Instruction *, 4> Worklist;
1124ba319b5SDimitry Andric   SmallPtrSet<Instruction *, 8> Visited;
1134ba319b5SDimitry Andric   Worklist.push_back(&Root);
1144ba319b5SDimitry Andric   Visited.insert(&Root);
1154ba319b5SDimitry Andric   do {
1164ba319b5SDimitry Andric     Instruction &I = *Worklist.pop_back_val();
1174ba319b5SDimitry Andric     for (Value *OpV : I.operand_values()) {
1184ba319b5SDimitry Andric       // Skip constants as unswitching isn't interesting for them.
1194ba319b5SDimitry Andric       if (isa<Constant>(OpV))
1204ba319b5SDimitry Andric         continue;
1214ba319b5SDimitry Andric 
1224ba319b5SDimitry Andric       // Add it to our result if loop invariant.
1234ba319b5SDimitry Andric       if (L.isLoopInvariant(OpV)) {
1244ba319b5SDimitry Andric         Invariants.push_back(OpV);
1254ba319b5SDimitry Andric         continue;
1264ba319b5SDimitry Andric       }
1274ba319b5SDimitry Andric 
1284ba319b5SDimitry Andric       // If not an instruction with the same opcode, nothing we can do.
1294ba319b5SDimitry Andric       Instruction *OpI = dyn_cast<Instruction>(OpV);
1304ba319b5SDimitry Andric       if (!OpI || OpI->getOpcode() != Root.getOpcode())
1314ba319b5SDimitry Andric         continue;
1324ba319b5SDimitry Andric 
1334ba319b5SDimitry Andric       // Visit this operand.
1344ba319b5SDimitry Andric       if (Visited.insert(OpI).second)
1354ba319b5SDimitry Andric         Worklist.push_back(OpI);
1364ba319b5SDimitry Andric     }
1374ba319b5SDimitry Andric   } while (!Worklist.empty());
1384ba319b5SDimitry Andric 
1394ba319b5SDimitry Andric   return Invariants;
1404ba319b5SDimitry Andric }
1414ba319b5SDimitry Andric 
replaceLoopInvariantUses(Loop & L,Value * Invariant,Constant & Replacement)1424ba319b5SDimitry Andric static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
143f37b6182SDimitry Andric                                      Constant &Replacement) {
1444ba319b5SDimitry Andric   assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
145f37b6182SDimitry Andric 
146f37b6182SDimitry Andric   // Replace uses of LIC in the loop with the given constant.
1474ba319b5SDimitry Andric   for (auto UI = Invariant->use_begin(), UE = Invariant->use_end(); UI != UE;) {
148f37b6182SDimitry Andric     // Grab the use and walk past it so we can clobber it in the use list.
149f37b6182SDimitry Andric     Use *U = &*UI++;
150f37b6182SDimitry Andric     Instruction *UserI = dyn_cast<Instruction>(U->getUser());
151f37b6182SDimitry Andric 
152f37b6182SDimitry Andric     // Replace this use within the loop body.
1534ba319b5SDimitry Andric     if (UserI && L.contains(UserI))
1544ba319b5SDimitry Andric       U->set(&Replacement);
155f37b6182SDimitry Andric   }
156f37b6182SDimitry Andric }
157f37b6182SDimitry Andric 
1585517e702SDimitry Andric /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
1595517e702SDimitry Andric /// incoming values along this edge.
areLoopExitPHIsLoopInvariant(Loop & L,BasicBlock & ExitingBB,BasicBlock & ExitBB)1605517e702SDimitry Andric static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
1615517e702SDimitry Andric                                          BasicBlock &ExitBB) {
1625517e702SDimitry Andric   for (Instruction &I : ExitBB) {
1635517e702SDimitry Andric     auto *PN = dyn_cast<PHINode>(&I);
1645517e702SDimitry Andric     if (!PN)
1655517e702SDimitry Andric       // No more PHIs to check.
1665517e702SDimitry Andric       return true;
1675517e702SDimitry Andric 
1685517e702SDimitry Andric     // If the incoming value for this edge isn't loop invariant the unswitch
1695517e702SDimitry Andric     // won't be trivial.
1705517e702SDimitry Andric     if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
1715517e702SDimitry Andric       return false;
1725517e702SDimitry Andric   }
1735517e702SDimitry Andric   llvm_unreachable("Basic blocks should never be empty!");
1745517e702SDimitry Andric }
1755517e702SDimitry Andric 
1764ba319b5SDimitry Andric /// Insert code to test a set of loop invariant values, and conditionally branch
1774ba319b5SDimitry Andric /// on them.
buildPartialUnswitchConditionalBranch(BasicBlock & BB,ArrayRef<Value * > Invariants,bool Direction,BasicBlock & UnswitchedSucc,BasicBlock & NormalSucc)1784ba319b5SDimitry Andric static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
1794ba319b5SDimitry Andric                                                   ArrayRef<Value *> Invariants,
1804ba319b5SDimitry Andric                                                   bool Direction,
1814ba319b5SDimitry Andric                                                   BasicBlock &UnswitchedSucc,
1824ba319b5SDimitry Andric                                                   BasicBlock &NormalSucc) {
1834ba319b5SDimitry Andric   IRBuilder<> IRB(&BB);
1844ba319b5SDimitry Andric   Value *Cond = Invariants.front();
1854ba319b5SDimitry Andric   for (Value *Invariant :
1864ba319b5SDimitry Andric        make_range(std::next(Invariants.begin()), Invariants.end()))
1874ba319b5SDimitry Andric     if (Direction)
1884ba319b5SDimitry Andric       Cond = IRB.CreateOr(Cond, Invariant);
1894ba319b5SDimitry Andric     else
1904ba319b5SDimitry Andric       Cond = IRB.CreateAnd(Cond, Invariant);
1914ba319b5SDimitry Andric 
1924ba319b5SDimitry Andric   IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
1934ba319b5SDimitry Andric                    Direction ? &NormalSucc : &UnswitchedSucc);
1944ba319b5SDimitry Andric }
1954ba319b5SDimitry Andric 
1965517e702SDimitry Andric /// Rewrite the PHI nodes in an unswitched loop exit basic block.
1975517e702SDimitry Andric ///
1985517e702SDimitry Andric /// Requires that the loop exit and unswitched basic block are the same, and
1995517e702SDimitry Andric /// that the exiting block was a unique predecessor of that block. Rewrites the
2005517e702SDimitry Andric /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
2015517e702SDimitry Andric /// PHI nodes from the old preheader that now contains the unswitched
2025517e702SDimitry Andric /// terminator.
rewritePHINodesForUnswitchedExitBlock(BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH)2035517e702SDimitry Andric static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
2045517e702SDimitry Andric                                                   BasicBlock &OldExitingBB,
2055517e702SDimitry Andric                                                   BasicBlock &OldPH) {
20630785c0eSDimitry Andric   for (PHINode &PN : UnswitchedBB.phis()) {
2075517e702SDimitry Andric     // When the loop exit is directly unswitched we just need to update the
2085517e702SDimitry Andric     // incoming basic block. We loop to handle weird cases with repeated
2095517e702SDimitry Andric     // incoming blocks, but expect to typically only have one operand here.
21030785c0eSDimitry Andric     for (auto i : seq<int>(0, PN.getNumOperands())) {
21130785c0eSDimitry Andric       assert(PN.getIncomingBlock(i) == &OldExitingBB &&
2125517e702SDimitry Andric              "Found incoming block different from unique predecessor!");
21330785c0eSDimitry Andric       PN.setIncomingBlock(i, &OldPH);
2145517e702SDimitry Andric     }
2155517e702SDimitry Andric   }
2165517e702SDimitry Andric }
2175517e702SDimitry Andric 
2185517e702SDimitry Andric /// Rewrite the PHI nodes in the loop exit basic block and the split off
2195517e702SDimitry Andric /// unswitched block.
2205517e702SDimitry Andric ///
2215517e702SDimitry Andric /// Because the exit block remains an exit from the loop, this rewrites the
2225517e702SDimitry Andric /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
2235517e702SDimitry Andric /// nodes into the unswitched basic block to select between the value in the
2245517e702SDimitry Andric /// old preheader and the loop exit.
rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock & ExitBB,BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH,bool FullUnswitch)2255517e702SDimitry Andric static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
2265517e702SDimitry Andric                                                       BasicBlock &UnswitchedBB,
2275517e702SDimitry Andric                                                       BasicBlock &OldExitingBB,
2284ba319b5SDimitry Andric                                                       BasicBlock &OldPH,
2294ba319b5SDimitry Andric                                                       bool FullUnswitch) {
2305517e702SDimitry Andric   assert(&ExitBB != &UnswitchedBB &&
2315517e702SDimitry Andric          "Must have different loop exit and unswitched blocks!");
2325517e702SDimitry Andric   Instruction *InsertPt = &*UnswitchedBB.begin();
23330785c0eSDimitry Andric   for (PHINode &PN : ExitBB.phis()) {
23430785c0eSDimitry Andric     auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
23530785c0eSDimitry Andric                                   PN.getName() + ".split", InsertPt);
2365517e702SDimitry Andric 
2375517e702SDimitry Andric     // Walk backwards over the old PHI node's inputs to minimize the cost of
2385517e702SDimitry Andric     // removing each one. We have to do this weird loop manually so that we
2395517e702SDimitry Andric     // create the same number of new incoming edges in the new PHI as we expect
2405517e702SDimitry Andric     // each case-based edge to be included in the unswitched switch in some
2415517e702SDimitry Andric     // cases.
2425517e702SDimitry Andric     // FIXME: This is really, really gross. It would be much cleaner if LLVM
2435517e702SDimitry Andric     // allowed us to create a single entry for a predecessor block without
2445517e702SDimitry Andric     // having separate entries for each "edge" even though these edges are
2455517e702SDimitry Andric     // required to produce identical results.
24630785c0eSDimitry Andric     for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
24730785c0eSDimitry Andric       if (PN.getIncomingBlock(i) != &OldExitingBB)
2485517e702SDimitry Andric         continue;
2495517e702SDimitry Andric 
2504ba319b5SDimitry Andric       Value *Incoming = PN.getIncomingValue(i);
2514ba319b5SDimitry Andric       if (FullUnswitch)
2524ba319b5SDimitry Andric         // No more edge from the old exiting block to the exit block.
2534ba319b5SDimitry Andric         PN.removeIncomingValue(i);
2544ba319b5SDimitry Andric 
2555517e702SDimitry Andric       NewPN->addIncoming(Incoming, &OldPH);
2565517e702SDimitry Andric     }
2575517e702SDimitry Andric 
2585517e702SDimitry Andric     // Now replace the old PHI with the new one and wire the old one in as an
2595517e702SDimitry Andric     // input to the new one.
26030785c0eSDimitry Andric     PN.replaceAllUsesWith(NewPN);
26130785c0eSDimitry Andric     NewPN->addIncoming(&PN, &ExitBB);
2625517e702SDimitry Andric   }
2635517e702SDimitry Andric }
2645517e702SDimitry Andric 
2654ba319b5SDimitry Andric /// Hoist the current loop up to the innermost loop containing a remaining exit.
2664ba319b5SDimitry Andric ///
2674ba319b5SDimitry Andric /// Because we've removed an exit from the loop, we may have changed the set of
2684ba319b5SDimitry Andric /// loops reachable and need to move the current loop up the loop nest or even
2694ba319b5SDimitry Andric /// to an entirely separate nest.
hoistLoopToNewParent(Loop & L,BasicBlock & Preheader,DominatorTree & DT,LoopInfo & LI)2704ba319b5SDimitry Andric static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
2714ba319b5SDimitry Andric                                  DominatorTree &DT, LoopInfo &LI) {
2724ba319b5SDimitry Andric   // If the loop is already at the top level, we can't hoist it anywhere.
2734ba319b5SDimitry Andric   Loop *OldParentL = L.getParentLoop();
2744ba319b5SDimitry Andric   if (!OldParentL)
2754ba319b5SDimitry Andric     return;
2764ba319b5SDimitry Andric 
2774ba319b5SDimitry Andric   SmallVector<BasicBlock *, 4> Exits;
2784ba319b5SDimitry Andric   L.getExitBlocks(Exits);
2794ba319b5SDimitry Andric   Loop *NewParentL = nullptr;
2804ba319b5SDimitry Andric   for (auto *ExitBB : Exits)
2814ba319b5SDimitry Andric     if (Loop *ExitL = LI.getLoopFor(ExitBB))
2824ba319b5SDimitry Andric       if (!NewParentL || NewParentL->contains(ExitL))
2834ba319b5SDimitry Andric         NewParentL = ExitL;
2844ba319b5SDimitry Andric 
2854ba319b5SDimitry Andric   if (NewParentL == OldParentL)
2864ba319b5SDimitry Andric     return;
2874ba319b5SDimitry Andric 
2884ba319b5SDimitry Andric   // The new parent loop (if different) should always contain the old one.
2894ba319b5SDimitry Andric   if (NewParentL)
2904ba319b5SDimitry Andric     assert(NewParentL->contains(OldParentL) &&
2914ba319b5SDimitry Andric            "Can only hoist this loop up the nest!");
2924ba319b5SDimitry Andric 
2934ba319b5SDimitry Andric   // The preheader will need to move with the body of this loop. However,
2944ba319b5SDimitry Andric   // because it isn't in this loop we also need to update the primary loop map.
2954ba319b5SDimitry Andric   assert(OldParentL == LI.getLoopFor(&Preheader) &&
2964ba319b5SDimitry Andric          "Parent loop of this loop should contain this loop's preheader!");
2974ba319b5SDimitry Andric   LI.changeLoopFor(&Preheader, NewParentL);
2984ba319b5SDimitry Andric 
2994ba319b5SDimitry Andric   // Remove this loop from its old parent.
3004ba319b5SDimitry Andric   OldParentL->removeChildLoop(&L);
3014ba319b5SDimitry Andric 
3024ba319b5SDimitry Andric   // Add the loop either to the new parent or as a top-level loop.
3034ba319b5SDimitry Andric   if (NewParentL)
3044ba319b5SDimitry Andric     NewParentL->addChildLoop(&L);
3054ba319b5SDimitry Andric   else
3064ba319b5SDimitry Andric     LI.addTopLevelLoop(&L);
3074ba319b5SDimitry Andric 
3084ba319b5SDimitry Andric   // Remove this loops blocks from the old parent and every other loop up the
3094ba319b5SDimitry Andric   // nest until reaching the new parent. Also update all of these
3104ba319b5SDimitry Andric   // no-longer-containing loops to reflect the nesting change.
3114ba319b5SDimitry Andric   for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
3124ba319b5SDimitry Andric        OldContainingL = OldContainingL->getParentLoop()) {
3134ba319b5SDimitry Andric     llvm::erase_if(OldContainingL->getBlocksVector(),
3144ba319b5SDimitry Andric                    [&](const BasicBlock *BB) {
3154ba319b5SDimitry Andric                      return BB == &Preheader || L.contains(BB);
3164ba319b5SDimitry Andric                    });
3174ba319b5SDimitry Andric 
3184ba319b5SDimitry Andric     OldContainingL->getBlocksSet().erase(&Preheader);
3194ba319b5SDimitry Andric     for (BasicBlock *BB : L.blocks())
3204ba319b5SDimitry Andric       OldContainingL->getBlocksSet().erase(BB);
3214ba319b5SDimitry Andric 
3224ba319b5SDimitry Andric     // Because we just hoisted a loop out of this one, we have essentially
3234ba319b5SDimitry Andric     // created new exit paths from it. That means we need to form LCSSA PHI
3244ba319b5SDimitry Andric     // nodes for values used in the no-longer-nested loop.
3254ba319b5SDimitry Andric     formLCSSA(*OldContainingL, DT, &LI, nullptr);
3264ba319b5SDimitry Andric 
3274ba319b5SDimitry Andric     // We shouldn't need to form dedicated exits because the exit introduced
328*b5893f02SDimitry Andric     // here is the (just split by unswitching) preheader. However, after trivial
329*b5893f02SDimitry Andric     // unswitching it is possible to get new non-dedicated exits out of parent
330*b5893f02SDimitry Andric     // loop so let's conservatively form dedicated exit blocks and figure out
331*b5893f02SDimitry Andric     // if we can optimize later.
332*b5893f02SDimitry Andric     formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
3334ba319b5SDimitry Andric   }
3344ba319b5SDimitry Andric }
3354ba319b5SDimitry Andric 
336f37b6182SDimitry Andric /// Unswitch a trivial branch if the condition is loop invariant.
337f37b6182SDimitry Andric ///
338f37b6182SDimitry Andric /// This routine should only be called when loop code leading to the branch has
339f37b6182SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
340f37b6182SDimitry Andric /// condition is invariant and one of the successors is a loop exit. This
341f37b6182SDimitry Andric /// allows us to unswitch without duplicating the loop, making it trivial.
342f37b6182SDimitry Andric ///
343f37b6182SDimitry Andric /// If this routine fails to unswitch the branch it returns false.
344f37b6182SDimitry Andric ///
345f37b6182SDimitry Andric /// If the branch can be unswitched, this routine splits the preheader and
346f37b6182SDimitry Andric /// hoists the branch above that split. Preserves loop simplified form
347f37b6182SDimitry Andric /// (splitting the exit block as necessary). It simplifies the branch within
348f37b6182SDimitry Andric /// the loop to an unconditional branch but doesn't remove it entirely. Further
349f37b6182SDimitry Andric /// cleanup can be done with some simplify-cfg like pass.
3504ba319b5SDimitry Andric ///
3514ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
3524ba319b5SDimitry Andric /// invalidated by this.
unswitchTrivialBranch(Loop & L,BranchInst & BI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)353f37b6182SDimitry Andric static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
354*b5893f02SDimitry Andric                                   LoopInfo &LI, ScalarEvolution *SE,
355*b5893f02SDimitry Andric                                   MemorySSAUpdater *MSSAU) {
356f37b6182SDimitry Andric   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
3574ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
358f37b6182SDimitry Andric 
3594ba319b5SDimitry Andric   // The loop invariant values that we want to unswitch.
3604ba319b5SDimitry Andric   TinyPtrVector<Value *> Invariants;
361f37b6182SDimitry Andric 
3624ba319b5SDimitry Andric   // When true, we're fully unswitching the branch rather than just unswitching
3634ba319b5SDimitry Andric   // some input conditions to the branch.
3644ba319b5SDimitry Andric   bool FullUnswitch = false;
3654ba319b5SDimitry Andric 
3664ba319b5SDimitry Andric   if (L.isLoopInvariant(BI.getCondition())) {
3674ba319b5SDimitry Andric     Invariants.push_back(BI.getCondition());
3684ba319b5SDimitry Andric     FullUnswitch = true;
3694ba319b5SDimitry Andric   } else {
3704ba319b5SDimitry Andric     if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
3714ba319b5SDimitry Andric       Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
3724ba319b5SDimitry Andric     if (Invariants.empty())
3734ba319b5SDimitry Andric       // Couldn't find invariant inputs!
374f37b6182SDimitry Andric       return false;
3754ba319b5SDimitry Andric   }
376f37b6182SDimitry Andric 
3774ba319b5SDimitry Andric   // Check that one of the branch's successors exits, and which one.
3784ba319b5SDimitry Andric   bool ExitDirection = true;
379f37b6182SDimitry Andric   int LoopExitSuccIdx = 0;
380f37b6182SDimitry Andric   auto *LoopExitBB = BI.getSuccessor(0);
3814ba319b5SDimitry Andric   if (L.contains(LoopExitBB)) {
3824ba319b5SDimitry Andric     ExitDirection = false;
383f37b6182SDimitry Andric     LoopExitSuccIdx = 1;
384f37b6182SDimitry Andric     LoopExitBB = BI.getSuccessor(1);
3854ba319b5SDimitry Andric     if (L.contains(LoopExitBB))
386f37b6182SDimitry Andric       return false;
387f37b6182SDimitry Andric   }
388f37b6182SDimitry Andric   auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
3895517e702SDimitry Andric   auto *ParentBB = BI.getParent();
3905517e702SDimitry Andric   if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
391f37b6182SDimitry Andric     return false;
392f37b6182SDimitry Andric 
3934ba319b5SDimitry Andric   // When unswitching only part of the branch's condition, we need the exit
3944ba319b5SDimitry Andric   // block to be reached directly from the partially unswitched input. This can
3954ba319b5SDimitry Andric   // be done when the exit block is along the true edge and the branch condition
3964ba319b5SDimitry Andric   // is a graph of `or` operations, or the exit block is along the false edge
3974ba319b5SDimitry Andric   // and the condition is a graph of `and` operations.
3984ba319b5SDimitry Andric   if (!FullUnswitch) {
3994ba319b5SDimitry Andric     if (ExitDirection) {
4004ba319b5SDimitry Andric       if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or)
4014ba319b5SDimitry Andric         return false;
4024ba319b5SDimitry Andric     } else {
4034ba319b5SDimitry Andric       if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::And)
4044ba319b5SDimitry Andric         return false;
4054ba319b5SDimitry Andric     }
4064ba319b5SDimitry Andric   }
4074ba319b5SDimitry Andric 
4084ba319b5SDimitry Andric   LLVM_DEBUG({
4094ba319b5SDimitry Andric     dbgs() << "    unswitching trivial invariant conditions for: " << BI
4104ba319b5SDimitry Andric            << "\n";
4114ba319b5SDimitry Andric     for (Value *Invariant : Invariants) {
4124ba319b5SDimitry Andric       dbgs() << "      " << *Invariant << " == true";
4134ba319b5SDimitry Andric       if (Invariant != Invariants.back())
4144ba319b5SDimitry Andric         dbgs() << " ||";
4154ba319b5SDimitry Andric       dbgs() << "\n";
4164ba319b5SDimitry Andric     }
4174ba319b5SDimitry Andric   });
4184ba319b5SDimitry Andric 
4194ba319b5SDimitry Andric   // If we have scalar evolutions, we need to invalidate them including this
4204ba319b5SDimitry Andric   // loop and the loop containing the exit block.
4214ba319b5SDimitry Andric   if (SE) {
4224ba319b5SDimitry Andric     if (Loop *ExitL = LI.getLoopFor(LoopExitBB))
4234ba319b5SDimitry Andric       SE->forgetLoop(ExitL);
4244ba319b5SDimitry Andric     else
4254ba319b5SDimitry Andric       // Forget the entire nest as this exits the entire nest.
4264ba319b5SDimitry Andric       SE->forgetTopmostLoop(&L);
4274ba319b5SDimitry Andric   }
428f37b6182SDimitry Andric 
429*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
430*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
431*b5893f02SDimitry Andric 
432f37b6182SDimitry Andric   // Split the preheader, so that we know that there is a safe place to insert
433f37b6182SDimitry Andric   // the conditional branch. We will change the preheader to have a conditional
434f37b6182SDimitry Andric   // branch on LoopCond.
435f37b6182SDimitry Andric   BasicBlock *OldPH = L.getLoopPreheader();
436*b5893f02SDimitry Andric   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
437f37b6182SDimitry Andric 
438f37b6182SDimitry Andric   // Now that we have a place to insert the conditional branch, create a place
439f37b6182SDimitry Andric   // to branch to: this is the exit block out of the loop that we are
440f37b6182SDimitry Andric   // unswitching. We need to split this if there are other loop predecessors.
441f37b6182SDimitry Andric   // Because the loop is in simplified form, *any* other predecessor is enough.
442f37b6182SDimitry Andric   BasicBlock *UnswitchedBB;
4434ba319b5SDimitry Andric   if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
4444ba319b5SDimitry Andric     assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
4455517e702SDimitry Andric            "A branch's parent isn't a predecessor!");
446f37b6182SDimitry Andric     UnswitchedBB = LoopExitBB;
447f37b6182SDimitry Andric   } else {
448*b5893f02SDimitry Andric     UnswitchedBB =
449*b5893f02SDimitry Andric         SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
450f37b6182SDimitry Andric   }
451f37b6182SDimitry Andric 
452*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
453*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
454*b5893f02SDimitry Andric 
4554ba319b5SDimitry Andric   // Actually move the invariant uses into the unswitched position. If possible,
4564ba319b5SDimitry Andric   // we do this by moving the instructions, but when doing partial unswitching
4574ba319b5SDimitry Andric   // we do it by building a new merge of the values in the unswitched position.
458f37b6182SDimitry Andric   OldPH->getTerminator()->eraseFromParent();
4594ba319b5SDimitry Andric   if (FullUnswitch) {
4604ba319b5SDimitry Andric     // If fully unswitching, we can use the existing branch instruction.
4614ba319b5SDimitry Andric     // Splice it into the old PH to gate reaching the new preheader and re-point
4624ba319b5SDimitry Andric     // its successors.
4634ba319b5SDimitry Andric     OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
4644ba319b5SDimitry Andric                                 BI);
465*b5893f02SDimitry Andric     if (MSSAU) {
466*b5893f02SDimitry Andric       // Temporarily clone the terminator, to make MSSA update cheaper by
467*b5893f02SDimitry Andric       // separating "insert edge" updates from "remove edge" ones.
468*b5893f02SDimitry Andric       ParentBB->getInstList().push_back(BI.clone());
469*b5893f02SDimitry Andric     } else {
470f37b6182SDimitry Andric       // Create a new unconditional branch that will continue the loop as a new
471f37b6182SDimitry Andric       // terminator.
472f37b6182SDimitry Andric       BranchInst::Create(ContinueBB, ParentBB);
473*b5893f02SDimitry Andric     }
474*b5893f02SDimitry Andric     BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
475*b5893f02SDimitry Andric     BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
4764ba319b5SDimitry Andric   } else {
4774ba319b5SDimitry Andric     // Only unswitching a subset of inputs to the condition, so we will need to
4784ba319b5SDimitry Andric     // build a new branch that merges the invariant inputs.
4794ba319b5SDimitry Andric     if (ExitDirection)
4804ba319b5SDimitry Andric       assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
4814ba319b5SDimitry Andric                  Instruction::Or &&
4824ba319b5SDimitry Andric              "Must have an `or` of `i1`s for the condition!");
4834ba319b5SDimitry Andric     else
4844ba319b5SDimitry Andric       assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
4854ba319b5SDimitry Andric                  Instruction::And &&
4864ba319b5SDimitry Andric              "Must have an `and` of `i1`s for the condition!");
4874ba319b5SDimitry Andric     buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
4884ba319b5SDimitry Andric                                           *UnswitchedBB, *NewPH);
4894ba319b5SDimitry Andric   }
490f37b6182SDimitry Andric 
491*b5893f02SDimitry Andric   // Update the dominator tree with the added edge.
492*b5893f02SDimitry Andric   DT.insertEdge(OldPH, UnswitchedBB);
493*b5893f02SDimitry Andric 
494*b5893f02SDimitry Andric   // After the dominator tree was updated with the added edge, update MemorySSA
495*b5893f02SDimitry Andric   // if available.
496*b5893f02SDimitry Andric   if (MSSAU) {
497*b5893f02SDimitry Andric     SmallVector<CFGUpdate, 1> Updates;
498*b5893f02SDimitry Andric     Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
499*b5893f02SDimitry Andric     MSSAU->applyInsertUpdates(Updates, DT);
500*b5893f02SDimitry Andric   }
501*b5893f02SDimitry Andric 
502*b5893f02SDimitry Andric   // Finish updating dominator tree and memory ssa for full unswitch.
503*b5893f02SDimitry Andric   if (FullUnswitch) {
504*b5893f02SDimitry Andric     if (MSSAU) {
505*b5893f02SDimitry Andric       // Remove the cloned branch instruction.
506*b5893f02SDimitry Andric       ParentBB->getTerminator()->eraseFromParent();
507*b5893f02SDimitry Andric       // Create unconditional branch now.
508*b5893f02SDimitry Andric       BranchInst::Create(ContinueBB, ParentBB);
509*b5893f02SDimitry Andric       MSSAU->removeEdge(ParentBB, LoopExitBB);
510*b5893f02SDimitry Andric     }
511*b5893f02SDimitry Andric     DT.deleteEdge(ParentBB, LoopExitBB);
512*b5893f02SDimitry Andric   }
513*b5893f02SDimitry Andric 
514*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
515*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
516*b5893f02SDimitry Andric 
5175517e702SDimitry Andric   // Rewrite the relevant PHI nodes.
5185517e702SDimitry Andric   if (UnswitchedBB == LoopExitBB)
5195517e702SDimitry Andric     rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
5205517e702SDimitry Andric   else
5215517e702SDimitry Andric     rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
5224ba319b5SDimitry Andric                                               *ParentBB, *OldPH, FullUnswitch);
5235517e702SDimitry Andric 
5244ba319b5SDimitry Andric   // The constant we can replace all of our invariants with inside the loop
5254ba319b5SDimitry Andric   // body. If any of the invariants have a value other than this the loop won't
5264ba319b5SDimitry Andric   // be entered.
5274ba319b5SDimitry Andric   ConstantInt *Replacement = ExitDirection
5284ba319b5SDimitry Andric                                  ? ConstantInt::getFalse(BI.getContext())
5294ba319b5SDimitry Andric                                  : ConstantInt::getTrue(BI.getContext());
530f37b6182SDimitry Andric 
531f37b6182SDimitry Andric   // Since this is an i1 condition we can also trivially replace uses of it
532f37b6182SDimitry Andric   // within the loop with a constant.
5334ba319b5SDimitry Andric   for (Value *Invariant : Invariants)
5344ba319b5SDimitry Andric     replaceLoopInvariantUses(L, Invariant, *Replacement);
5354ba319b5SDimitry Andric 
5364ba319b5SDimitry Andric   // If this was full unswitching, we may have changed the nesting relationship
5374ba319b5SDimitry Andric   // for this loop so hoist it to its correct parent if needed.
5384ba319b5SDimitry Andric   if (FullUnswitch)
5394ba319b5SDimitry Andric     hoistLoopToNewParent(L, *NewPH, DT, LI);
540f37b6182SDimitry Andric 
541*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "    done: unswitching trivial branch...\n");
542f37b6182SDimitry Andric   ++NumTrivial;
543f37b6182SDimitry Andric   ++NumBranches;
544f37b6182SDimitry Andric   return true;
545f37b6182SDimitry Andric }
546f37b6182SDimitry Andric 
547f37b6182SDimitry Andric /// Unswitch a trivial switch if the condition is loop invariant.
548f37b6182SDimitry Andric ///
549f37b6182SDimitry Andric /// This routine should only be called when loop code leading to the switch has
550f37b6182SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
551f37b6182SDimitry Andric /// condition is invariant and that at least one of the successors is a loop
552f37b6182SDimitry Andric /// exit. This allows us to unswitch without duplicating the loop, making it
553f37b6182SDimitry Andric /// trivial.
554f37b6182SDimitry Andric ///
555f37b6182SDimitry Andric /// If this routine fails to unswitch the switch it returns false.
556f37b6182SDimitry Andric ///
557f37b6182SDimitry Andric /// If the switch can be unswitched, this routine splits the preheader and
558f37b6182SDimitry Andric /// copies the switch above that split. If the default case is one of the
559f37b6182SDimitry Andric /// exiting cases, it copies the non-exiting cases and points them at the new
560f37b6182SDimitry Andric /// preheader. If the default case is not exiting, it copies the exiting cases
561f37b6182SDimitry Andric /// and points the default at the preheader. It preserves loop simplified form
562f37b6182SDimitry Andric /// (splitting the exit blocks as necessary). It simplifies the switch within
563f37b6182SDimitry Andric /// the loop by removing now-dead cases. If the default case is one of those
564f37b6182SDimitry Andric /// unswitched, it replaces its destination with a new basic block containing
565f37b6182SDimitry Andric /// only unreachable. Such basic blocks, while technically loop exits, are not
566f37b6182SDimitry Andric /// considered for unswitching so this is a stable transform and the same
567f37b6182SDimitry Andric /// switch will not be revisited. If after unswitching there is only a single
568f37b6182SDimitry Andric /// in-loop successor, the switch is further simplified to an unconditional
569f37b6182SDimitry Andric /// branch. Still more cleanup can be done with some simplify-cfg like pass.
5704ba319b5SDimitry Andric ///
5714ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
5724ba319b5SDimitry Andric /// invalidated by this.
unswitchTrivialSwitch(Loop & L,SwitchInst & SI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)573f37b6182SDimitry Andric static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
574*b5893f02SDimitry Andric                                   LoopInfo &LI, ScalarEvolution *SE,
575*b5893f02SDimitry Andric                                   MemorySSAUpdater *MSSAU) {
5764ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
577f37b6182SDimitry Andric   Value *LoopCond = SI.getCondition();
578f37b6182SDimitry Andric 
579f37b6182SDimitry Andric   // If this isn't switching on an invariant condition, we can't unswitch it.
580f37b6182SDimitry Andric   if (!L.isLoopInvariant(LoopCond))
581f37b6182SDimitry Andric     return false;
582f37b6182SDimitry Andric 
5835517e702SDimitry Andric   auto *ParentBB = SI.getParent();
5845517e702SDimitry Andric 
585f37b6182SDimitry Andric   SmallVector<int, 4> ExitCaseIndices;
586f37b6182SDimitry Andric   for (auto Case : SI.cases()) {
587f37b6182SDimitry Andric     auto *SuccBB = Case.getCaseSuccessor();
5884ba319b5SDimitry Andric     if (!L.contains(SuccBB) &&
5895517e702SDimitry Andric         areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
590f37b6182SDimitry Andric       ExitCaseIndices.push_back(Case.getCaseIndex());
591f37b6182SDimitry Andric   }
592f37b6182SDimitry Andric   BasicBlock *DefaultExitBB = nullptr;
5934ba319b5SDimitry Andric   if (!L.contains(SI.getDefaultDest()) &&
5945517e702SDimitry Andric       areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
595f37b6182SDimitry Andric       !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
596f37b6182SDimitry Andric     DefaultExitBB = SI.getDefaultDest();
597f37b6182SDimitry Andric   else if (ExitCaseIndices.empty())
598f37b6182SDimitry Andric     return false;
599f37b6182SDimitry Andric 
600*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "    unswitching trivial switch...\n");
601*b5893f02SDimitry Andric 
602*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
603*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
604f37b6182SDimitry Andric 
6054ba319b5SDimitry Andric   // We may need to invalidate SCEVs for the outermost loop reached by any of
6064ba319b5SDimitry Andric   // the exits.
6074ba319b5SDimitry Andric   Loop *OuterL = &L;
6084ba319b5SDimitry Andric 
6094ba319b5SDimitry Andric   if (DefaultExitBB) {
6104ba319b5SDimitry Andric     // Clear out the default destination temporarily to allow accurate
6114ba319b5SDimitry Andric     // predecessor lists to be examined below.
6124ba319b5SDimitry Andric     SI.setDefaultDest(nullptr);
6134ba319b5SDimitry Andric     // Check the loop containing this exit.
6144ba319b5SDimitry Andric     Loop *ExitL = LI.getLoopFor(DefaultExitBB);
6154ba319b5SDimitry Andric     if (!ExitL || ExitL->contains(OuterL))
6164ba319b5SDimitry Andric       OuterL = ExitL;
6174ba319b5SDimitry Andric   }
6184ba319b5SDimitry Andric 
6194ba319b5SDimitry Andric   // Store the exit cases into a separate data structure and remove them from
6204ba319b5SDimitry Andric   // the switch.
621f37b6182SDimitry Andric   SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
622f37b6182SDimitry Andric   ExitCases.reserve(ExitCaseIndices.size());
623f37b6182SDimitry Andric   // We walk the case indices backwards so that we remove the last case first
624f37b6182SDimitry Andric   // and don't disrupt the earlier indices.
625f37b6182SDimitry Andric   for (unsigned Index : reverse(ExitCaseIndices)) {
626f37b6182SDimitry Andric     auto CaseI = SI.case_begin() + Index;
6274ba319b5SDimitry Andric     // Compute the outer loop from this exit.
6284ba319b5SDimitry Andric     Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
6294ba319b5SDimitry Andric     if (!ExitL || ExitL->contains(OuterL))
6304ba319b5SDimitry Andric       OuterL = ExitL;
631f37b6182SDimitry Andric     // Save the value of this case.
632f37b6182SDimitry Andric     ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
633f37b6182SDimitry Andric     // Delete the unswitched cases.
634f37b6182SDimitry Andric     SI.removeCase(CaseI);
635f37b6182SDimitry Andric   }
636f37b6182SDimitry Andric 
6374ba319b5SDimitry Andric   if (SE) {
6384ba319b5SDimitry Andric     if (OuterL)
6394ba319b5SDimitry Andric       SE->forgetLoop(OuterL);
6404ba319b5SDimitry Andric     else
6414ba319b5SDimitry Andric       SE->forgetTopmostLoop(&L);
6424ba319b5SDimitry Andric   }
6434ba319b5SDimitry Andric 
644f37b6182SDimitry Andric   // Check if after this all of the remaining cases point at the same
645f37b6182SDimitry Andric   // successor.
646f37b6182SDimitry Andric   BasicBlock *CommonSuccBB = nullptr;
647f37b6182SDimitry Andric   if (SI.getNumCases() > 0 &&
648f37b6182SDimitry Andric       std::all_of(std::next(SI.case_begin()), SI.case_end(),
649f37b6182SDimitry Andric                   [&SI](const SwitchInst::CaseHandle &Case) {
650f37b6182SDimitry Andric                     return Case.getCaseSuccessor() ==
651f37b6182SDimitry Andric                            SI.case_begin()->getCaseSuccessor();
652f37b6182SDimitry Andric                   }))
653f37b6182SDimitry Andric     CommonSuccBB = SI.case_begin()->getCaseSuccessor();
6544ba319b5SDimitry Andric   if (!DefaultExitBB) {
655f37b6182SDimitry Andric     // If we're not unswitching the default, we need it to match any cases to
656f37b6182SDimitry Andric     // have a common successor or if we have no cases it is the common
657f37b6182SDimitry Andric     // successor.
658f37b6182SDimitry Andric     if (SI.getNumCases() == 0)
659f37b6182SDimitry Andric       CommonSuccBB = SI.getDefaultDest();
660f37b6182SDimitry Andric     else if (SI.getDefaultDest() != CommonSuccBB)
661f37b6182SDimitry Andric       CommonSuccBB = nullptr;
662f37b6182SDimitry Andric   }
663f37b6182SDimitry Andric 
664f37b6182SDimitry Andric   // Split the preheader, so that we know that there is a safe place to insert
665f37b6182SDimitry Andric   // the switch.
666f37b6182SDimitry Andric   BasicBlock *OldPH = L.getLoopPreheader();
667*b5893f02SDimitry Andric   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
668f37b6182SDimitry Andric   OldPH->getTerminator()->eraseFromParent();
669f37b6182SDimitry Andric 
670f37b6182SDimitry Andric   // Now add the unswitched switch.
671f37b6182SDimitry Andric   auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
672f37b6182SDimitry Andric 
6735517e702SDimitry Andric   // Rewrite the IR for the unswitched basic blocks. This requires two steps.
6745517e702SDimitry Andric   // First, we split any exit blocks with remaining in-loop predecessors. Then
6755517e702SDimitry Andric   // we update the PHIs in one of two ways depending on if there was a split.
6765517e702SDimitry Andric   // We walk in reverse so that we split in the same order as the cases
6775517e702SDimitry Andric   // appeared. This is purely for convenience of reading the resulting IR, but
6785517e702SDimitry Andric   // it doesn't cost anything really.
6795517e702SDimitry Andric   SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
680f37b6182SDimitry Andric   SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
681f37b6182SDimitry Andric   // Handle the default exit if necessary.
682f37b6182SDimitry Andric   // FIXME: It'd be great if we could merge this with the loop below but LLVM's
683f37b6182SDimitry Andric   // ranges aren't quite powerful enough yet.
6845517e702SDimitry Andric   if (DefaultExitBB) {
6855517e702SDimitry Andric     if (pred_empty(DefaultExitBB)) {
6865517e702SDimitry Andric       UnswitchedExitBBs.insert(DefaultExitBB);
6875517e702SDimitry Andric       rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
6885517e702SDimitry Andric     } else {
689f37b6182SDimitry Andric       auto *SplitBB =
690*b5893f02SDimitry Andric           SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
691*b5893f02SDimitry Andric       rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
692*b5893f02SDimitry Andric                                                 *ParentBB, *OldPH,
693*b5893f02SDimitry Andric                                                 /*FullUnswitch*/ true);
694f37b6182SDimitry Andric       DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
695f37b6182SDimitry Andric     }
6965517e702SDimitry Andric   }
697f37b6182SDimitry Andric   // Note that we must use a reference in the for loop so that we update the
698f37b6182SDimitry Andric   // container.
699f37b6182SDimitry Andric   for (auto &CasePair : reverse(ExitCases)) {
700f37b6182SDimitry Andric     // Grab a reference to the exit block in the pair so that we can update it.
7015517e702SDimitry Andric     BasicBlock *ExitBB = CasePair.second;
702f37b6182SDimitry Andric 
703f37b6182SDimitry Andric     // If this case is the last edge into the exit block, we can simply reuse it
704f37b6182SDimitry Andric     // as it will no longer be a loop exit. No mapping necessary.
7055517e702SDimitry Andric     if (pred_empty(ExitBB)) {
7065517e702SDimitry Andric       // Only rewrite once.
7075517e702SDimitry Andric       if (UnswitchedExitBBs.insert(ExitBB).second)
7085517e702SDimitry Andric         rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
709f37b6182SDimitry Andric       continue;
7105517e702SDimitry Andric     }
711f37b6182SDimitry Andric 
712f37b6182SDimitry Andric     // Otherwise we need to split the exit block so that we retain an exit
713f37b6182SDimitry Andric     // block from the loop and a target for the unswitched condition.
714f37b6182SDimitry Andric     BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
715f37b6182SDimitry Andric     if (!SplitExitBB) {
716f37b6182SDimitry Andric       // If this is the first time we see this, do the split and remember it.
717*b5893f02SDimitry Andric       SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
718*b5893f02SDimitry Andric       rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
719*b5893f02SDimitry Andric                                                 *ParentBB, *OldPH,
720*b5893f02SDimitry Andric                                                 /*FullUnswitch*/ true);
721f37b6182SDimitry Andric     }
7225517e702SDimitry Andric     // Update the case pair to point to the split block.
7235517e702SDimitry Andric     CasePair.second = SplitExitBB;
724f37b6182SDimitry Andric   }
725f37b6182SDimitry Andric 
726f37b6182SDimitry Andric   // Now add the unswitched cases. We do this in reverse order as we built them
727f37b6182SDimitry Andric   // in reverse order.
728f37b6182SDimitry Andric   for (auto CasePair : reverse(ExitCases)) {
729f37b6182SDimitry Andric     ConstantInt *CaseVal = CasePair.first;
730f37b6182SDimitry Andric     BasicBlock *UnswitchedBB = CasePair.second;
731f37b6182SDimitry Andric 
732f37b6182SDimitry Andric     NewSI->addCase(CaseVal, UnswitchedBB);
733f37b6182SDimitry Andric   }
734f37b6182SDimitry Andric 
735f37b6182SDimitry Andric   // If the default was unswitched, re-point it and add explicit cases for
736f37b6182SDimitry Andric   // entering the loop.
737f37b6182SDimitry Andric   if (DefaultExitBB) {
738f37b6182SDimitry Andric     NewSI->setDefaultDest(DefaultExitBB);
739f37b6182SDimitry Andric 
740f37b6182SDimitry Andric     // We removed all the exit cases, so we just copy the cases to the
741f37b6182SDimitry Andric     // unswitched switch.
742f37b6182SDimitry Andric     for (auto Case : SI.cases())
743f37b6182SDimitry Andric       NewSI->addCase(Case.getCaseValue(), NewPH);
744f37b6182SDimitry Andric   }
745f37b6182SDimitry Andric 
746f37b6182SDimitry Andric   // If we ended up with a common successor for every path through the switch
747f37b6182SDimitry Andric   // after unswitching, rewrite it to an unconditional branch to make it easy
748f37b6182SDimitry Andric   // to recognize. Otherwise we potentially have to recognize the default case
749f37b6182SDimitry Andric   // pointing at unreachable and other complexity.
750f37b6182SDimitry Andric   if (CommonSuccBB) {
751f37b6182SDimitry Andric     BasicBlock *BB = SI.getParent();
7524ba319b5SDimitry Andric     // We may have had multiple edges to this common successor block, so remove
7534ba319b5SDimitry Andric     // them as predecessors. We skip the first one, either the default or the
7544ba319b5SDimitry Andric     // actual first case.
7554ba319b5SDimitry Andric     bool SkippedFirst = DefaultExitBB == nullptr;
7564ba319b5SDimitry Andric     for (auto Case : SI.cases()) {
7574ba319b5SDimitry Andric       assert(Case.getCaseSuccessor() == CommonSuccBB &&
7584ba319b5SDimitry Andric              "Non-common successor!");
7594ba319b5SDimitry Andric       (void)Case;
7604ba319b5SDimitry Andric       if (!SkippedFirst) {
7614ba319b5SDimitry Andric         SkippedFirst = true;
7624ba319b5SDimitry Andric         continue;
7634ba319b5SDimitry Andric       }
7644ba319b5SDimitry Andric       CommonSuccBB->removePredecessor(BB,
7654ba319b5SDimitry Andric                                       /*DontDeleteUselessPHIs*/ true);
7664ba319b5SDimitry Andric     }
7674ba319b5SDimitry Andric     // Now nuke the switch and replace it with a direct branch.
768f37b6182SDimitry Andric     SI.eraseFromParent();
769f37b6182SDimitry Andric     BranchInst::Create(CommonSuccBB, BB);
7704ba319b5SDimitry Andric   } else if (DefaultExitBB) {
7714ba319b5SDimitry Andric     assert(SI.getNumCases() > 0 &&
7724ba319b5SDimitry Andric            "If we had no cases we'd have a common successor!");
7734ba319b5SDimitry Andric     // Move the last case to the default successor. This is valid as if the
7744ba319b5SDimitry Andric     // default got unswitched it cannot be reached. This has the advantage of
7754ba319b5SDimitry Andric     // being simple and keeping the number of edges from this switch to
7764ba319b5SDimitry Andric     // successors the same, and avoiding any PHI update complexity.
7774ba319b5SDimitry Andric     auto LastCaseI = std::prev(SI.case_end());
7784ba319b5SDimitry Andric     SI.setDefaultDest(LastCaseI->getCaseSuccessor());
7794ba319b5SDimitry Andric     SI.removeCase(LastCaseI);
780f37b6182SDimitry Andric   }
781f37b6182SDimitry Andric 
7824ba319b5SDimitry Andric   // Walk the unswitched exit blocks and the unswitched split blocks and update
7834ba319b5SDimitry Andric   // the dominator tree based on the CFG edits. While we are walking unordered
7844ba319b5SDimitry Andric   // containers here, the API for applyUpdates takes an unordered list of
7854ba319b5SDimitry Andric   // updates and requires them to not contain duplicates.
7864ba319b5SDimitry Andric   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
7874ba319b5SDimitry Andric   for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
7884ba319b5SDimitry Andric     DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
7894ba319b5SDimitry Andric     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
7904ba319b5SDimitry Andric   }
7914ba319b5SDimitry Andric   for (auto SplitUnswitchedPair : SplitExitBBMap) {
7924ba319b5SDimitry Andric     auto *UnswitchedBB = SplitUnswitchedPair.second;
7934ba319b5SDimitry Andric     DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
7944ba319b5SDimitry Andric     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
7954ba319b5SDimitry Andric   }
7964ba319b5SDimitry Andric   DT.applyUpdates(DTUpdates);
797*b5893f02SDimitry Andric 
798*b5893f02SDimitry Andric   if (MSSAU) {
799*b5893f02SDimitry Andric     MSSAU->applyUpdates(DTUpdates, DT);
800*b5893f02SDimitry Andric     if (VerifyMemorySSA)
801*b5893f02SDimitry Andric       MSSAU->getMemorySSA()->verifyMemorySSA();
802*b5893f02SDimitry Andric   }
803*b5893f02SDimitry Andric 
8044ba319b5SDimitry Andric   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
8054ba319b5SDimitry Andric 
8064ba319b5SDimitry Andric   // We may have changed the nesting relationship for this loop so hoist it to
8074ba319b5SDimitry Andric   // its correct parent if needed.
8084ba319b5SDimitry Andric   hoistLoopToNewParent(L, *NewPH, DT, LI);
8094ba319b5SDimitry Andric 
810f37b6182SDimitry Andric   ++NumTrivial;
811f37b6182SDimitry Andric   ++NumSwitches;
812*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "    done: unswitching trivial switch...\n");
813f37b6182SDimitry Andric   return true;
814f37b6182SDimitry Andric }
815f37b6182SDimitry Andric 
816f37b6182SDimitry Andric /// This routine scans the loop to find a branch or switch which occurs before
817f37b6182SDimitry Andric /// any side effects occur. These can potentially be unswitched without
818f37b6182SDimitry Andric /// duplicating the loop. If a branch or switch is successfully unswitched the
819f37b6182SDimitry Andric /// scanning continues to see if subsequent branches or switches have become
820f37b6182SDimitry Andric /// trivial. Once all trivial candidates have been unswitched, this routine
821f37b6182SDimitry Andric /// returns.
822f37b6182SDimitry Andric ///
823f37b6182SDimitry Andric /// The return value indicates whether anything was unswitched (and therefore
824f37b6182SDimitry Andric /// changed).
8254ba319b5SDimitry Andric ///
8264ba319b5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
8274ba319b5SDimitry Andric /// invalidated by this.
unswitchAllTrivialConditions(Loop & L,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)828f37b6182SDimitry Andric static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
829*b5893f02SDimitry Andric                                          LoopInfo &LI, ScalarEvolution *SE,
830*b5893f02SDimitry Andric                                          MemorySSAUpdater *MSSAU) {
831f37b6182SDimitry Andric   bool Changed = false;
832f37b6182SDimitry Andric 
833f37b6182SDimitry Andric   // If loop header has only one reachable successor we should keep looking for
834f37b6182SDimitry Andric   // trivial condition candidates in the successor as well. An alternative is
835f37b6182SDimitry Andric   // to constant fold conditions and merge successors into loop header (then we
836f37b6182SDimitry Andric   // only need to check header's terminator). The reason for not doing this in
837f37b6182SDimitry Andric   // LoopUnswitch pass is that it could potentially break LoopPassManager's
838f37b6182SDimitry Andric   // invariants. Folding dead branches could either eliminate the current loop
839f37b6182SDimitry Andric   // or make other loops unreachable. LCSSA form might also not be preserved
840f37b6182SDimitry Andric   // after deleting branches. The following code keeps traversing loop header's
841f37b6182SDimitry Andric   // successors until it finds the trivial condition candidate (condition that
842f37b6182SDimitry Andric   // is not a constant). Since unswitching generates branches with constant
843f37b6182SDimitry Andric   // conditions, this scenario could be very common in practice.
844f37b6182SDimitry Andric   BasicBlock *CurrentBB = L.getHeader();
845f37b6182SDimitry Andric   SmallPtrSet<BasicBlock *, 8> Visited;
846f37b6182SDimitry Andric   Visited.insert(CurrentBB);
847f37b6182SDimitry Andric   do {
848f37b6182SDimitry Andric     // Check if there are any side-effecting instructions (e.g. stores, calls,
849f37b6182SDimitry Andric     // volatile loads) in the part of the loop that the code *would* execute
850f37b6182SDimitry Andric     // without unswitching.
851f37b6182SDimitry Andric     if (llvm::any_of(*CurrentBB,
852f37b6182SDimitry Andric                      [](Instruction &I) { return I.mayHaveSideEffects(); }))
853f37b6182SDimitry Andric       return Changed;
854f37b6182SDimitry Andric 
855*b5893f02SDimitry Andric     Instruction *CurrentTerm = CurrentBB->getTerminator();
856f37b6182SDimitry Andric 
857f37b6182SDimitry Andric     if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
858f37b6182SDimitry Andric       // Don't bother trying to unswitch past a switch with a constant
859f37b6182SDimitry Andric       // condition. This should be removed prior to running this pass by
860f37b6182SDimitry Andric       // simplify-cfg.
861f37b6182SDimitry Andric       if (isa<Constant>(SI->getCondition()))
862f37b6182SDimitry Andric         return Changed;
863f37b6182SDimitry Andric 
864*b5893f02SDimitry Andric       if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
8654ba319b5SDimitry Andric         // Couldn't unswitch this one so we're done.
866f37b6182SDimitry Andric         return Changed;
867f37b6182SDimitry Andric 
868f37b6182SDimitry Andric       // Mark that we managed to unswitch something.
869f37b6182SDimitry Andric       Changed = true;
870f37b6182SDimitry Andric 
871f37b6182SDimitry Andric       // If unswitching turned the terminator into an unconditional branch then
872f37b6182SDimitry Andric       // we can continue. The unswitching logic specifically works to fold any
873f37b6182SDimitry Andric       // cases it can into an unconditional branch to make it easier to
874f37b6182SDimitry Andric       // recognize here.
875f37b6182SDimitry Andric       auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
876f37b6182SDimitry Andric       if (!BI || BI->isConditional())
877f37b6182SDimitry Andric         return Changed;
878f37b6182SDimitry Andric 
879f37b6182SDimitry Andric       CurrentBB = BI->getSuccessor(0);
880f37b6182SDimitry Andric       continue;
881f37b6182SDimitry Andric     }
882f37b6182SDimitry Andric 
883f37b6182SDimitry Andric     auto *BI = dyn_cast<BranchInst>(CurrentTerm);
884f37b6182SDimitry Andric     if (!BI)
885f37b6182SDimitry Andric       // We do not understand other terminator instructions.
886f37b6182SDimitry Andric       return Changed;
887f37b6182SDimitry Andric 
888f37b6182SDimitry Andric     // Don't bother trying to unswitch past an unconditional branch or a branch
889f37b6182SDimitry Andric     // with a constant value. These should be removed by simplify-cfg prior to
890f37b6182SDimitry Andric     // running this pass.
891f37b6182SDimitry Andric     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
892f37b6182SDimitry Andric       return Changed;
893f37b6182SDimitry Andric 
894f37b6182SDimitry Andric     // Found a trivial condition candidate: non-foldable conditional branch. If
895f37b6182SDimitry Andric     // we fail to unswitch this, we can't do anything else that is trivial.
896*b5893f02SDimitry Andric     if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
897f37b6182SDimitry Andric       return Changed;
898f37b6182SDimitry Andric 
899f37b6182SDimitry Andric     // Mark that we managed to unswitch something.
900f37b6182SDimitry Andric     Changed = true;
901f37b6182SDimitry Andric 
9024ba319b5SDimitry Andric     // If we only unswitched some of the conditions feeding the branch, we won't
9034ba319b5SDimitry Andric     // have collapsed it to a single successor.
904f37b6182SDimitry Andric     BI = cast<BranchInst>(CurrentBB->getTerminator());
9054ba319b5SDimitry Andric     if (BI->isConditional())
9064ba319b5SDimitry Andric       return Changed;
9074ba319b5SDimitry Andric 
9084ba319b5SDimitry Andric     // Follow the newly unconditional branch into its successor.
909f37b6182SDimitry Andric     CurrentBB = BI->getSuccessor(0);
910f37b6182SDimitry Andric 
911f37b6182SDimitry Andric     // When continuing, if we exit the loop or reach a previous visited block,
912f37b6182SDimitry Andric     // then we can not reach any trivial condition candidates (unfoldable
913f37b6182SDimitry Andric     // branch instructions or switch instructions) and no unswitch can happen.
914f37b6182SDimitry Andric   } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
915f37b6182SDimitry Andric 
916f37b6182SDimitry Andric   return Changed;
917f37b6182SDimitry Andric }
918f37b6182SDimitry Andric 
9192cab237bSDimitry Andric /// Build the cloned blocks for an unswitched copy of the given loop.
9202cab237bSDimitry Andric ///
9212cab237bSDimitry Andric /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
9222cab237bSDimitry Andric /// after the split block (`SplitBB`) that will be used to select between the
9232cab237bSDimitry Andric /// cloned and original loop.
9242cab237bSDimitry Andric ///
9252cab237bSDimitry Andric /// This routine handles cloning all of the necessary loop blocks and exit
9262cab237bSDimitry Andric /// blocks including rewriting their instructions and the relevant PHI nodes.
9274ba319b5SDimitry Andric /// Any loop blocks or exit blocks which are dominated by a different successor
9284ba319b5SDimitry Andric /// than the one for this clone of the loop blocks can be trivially skipped. We
9294ba319b5SDimitry Andric /// use the `DominatingSucc` map to determine whether a block satisfies that
9304ba319b5SDimitry Andric /// property with a simple map lookup.
9314ba319b5SDimitry Andric ///
9324ba319b5SDimitry Andric /// It also correctly creates the unconditional branch in the cloned
9332cab237bSDimitry Andric /// unswitched parent block to only point at the unswitched successor.
9342cab237bSDimitry Andric ///
9352cab237bSDimitry Andric /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
9362cab237bSDimitry Andric /// block splitting is correctly reflected in `LoopInfo`, essentially all of
9372cab237bSDimitry Andric /// the cloned blocks (and their loops) are left without full `LoopInfo`
9382cab237bSDimitry Andric /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
9392cab237bSDimitry Andric /// blocks to them but doesn't create the cloned `DominatorTree` structure and
9402cab237bSDimitry Andric /// instead the caller must recompute an accurate DT. It *does* correctly
9412cab237bSDimitry Andric /// update the `AssumptionCache` provided in `AC`.
buildClonedLoopBlocks(Loop & L,BasicBlock * LoopPH,BasicBlock * SplitBB,ArrayRef<BasicBlock * > ExitBlocks,BasicBlock * ParentBB,BasicBlock * UnswitchedSuccBB,BasicBlock * ContinueSuccBB,const SmallDenseMap<BasicBlock *,BasicBlock *,16> & DominatingSucc,ValueToValueMapTy & VMap,SmallVectorImpl<DominatorTree::UpdateType> & DTUpdates,AssumptionCache & AC,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)9422cab237bSDimitry Andric static BasicBlock *buildClonedLoopBlocks(
9432cab237bSDimitry Andric     Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
9442cab237bSDimitry Andric     ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
9452cab237bSDimitry Andric     BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
9464ba319b5SDimitry Andric     const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
9474ba319b5SDimitry Andric     ValueToValueMapTy &VMap,
9484ba319b5SDimitry Andric     SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
949*b5893f02SDimitry Andric     DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
9502cab237bSDimitry Andric   SmallVector<BasicBlock *, 4> NewBlocks;
9512cab237bSDimitry Andric   NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
9522cab237bSDimitry Andric 
9532cab237bSDimitry Andric   // We will need to clone a bunch of blocks, wrap up the clone operation in
9542cab237bSDimitry Andric   // a helper.
9552cab237bSDimitry Andric   auto CloneBlock = [&](BasicBlock *OldBB) {
9562cab237bSDimitry Andric     // Clone the basic block and insert it before the new preheader.
9572cab237bSDimitry Andric     BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
9582cab237bSDimitry Andric     NewBB->moveBefore(LoopPH);
9592cab237bSDimitry Andric 
9602cab237bSDimitry Andric     // Record this block and the mapping.
9612cab237bSDimitry Andric     NewBlocks.push_back(NewBB);
9622cab237bSDimitry Andric     VMap[OldBB] = NewBB;
9632cab237bSDimitry Andric 
9642cab237bSDimitry Andric     return NewBB;
9652cab237bSDimitry Andric   };
9662cab237bSDimitry Andric 
9674ba319b5SDimitry Andric   // We skip cloning blocks when they have a dominating succ that is not the
9684ba319b5SDimitry Andric   // succ we are cloning for.
9694ba319b5SDimitry Andric   auto SkipBlock = [&](BasicBlock *BB) {
9704ba319b5SDimitry Andric     auto It = DominatingSucc.find(BB);
9714ba319b5SDimitry Andric     return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
9724ba319b5SDimitry Andric   };
9734ba319b5SDimitry Andric 
9742cab237bSDimitry Andric   // First, clone the preheader.
9752cab237bSDimitry Andric   auto *ClonedPH = CloneBlock(LoopPH);
9762cab237bSDimitry Andric 
9772cab237bSDimitry Andric   // Then clone all the loop blocks, skipping the ones that aren't necessary.
9782cab237bSDimitry Andric   for (auto *LoopBB : L.blocks())
9794ba319b5SDimitry Andric     if (!SkipBlock(LoopBB))
9802cab237bSDimitry Andric       CloneBlock(LoopBB);
9812cab237bSDimitry Andric 
9822cab237bSDimitry Andric   // Split all the loop exit edges so that when we clone the exit blocks, if
9832cab237bSDimitry Andric   // any of the exit blocks are *also* a preheader for some other loop, we
9842cab237bSDimitry Andric   // don't create multiple predecessors entering the loop header.
9852cab237bSDimitry Andric   for (auto *ExitBB : ExitBlocks) {
9864ba319b5SDimitry Andric     if (SkipBlock(ExitBB))
9872cab237bSDimitry Andric       continue;
9882cab237bSDimitry Andric 
9892cab237bSDimitry Andric     // When we are going to clone an exit, we don't need to clone all the
9902cab237bSDimitry Andric     // instructions in the exit block and we want to ensure we have an easy
9912cab237bSDimitry Andric     // place to merge the CFG, so split the exit first. This is always safe to
9922cab237bSDimitry Andric     // do because there cannot be any non-loop predecessors of a loop exit in
9932cab237bSDimitry Andric     // loop simplified form.
994*b5893f02SDimitry Andric     auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
9952cab237bSDimitry Andric 
9962cab237bSDimitry Andric     // Rearrange the names to make it easier to write test cases by having the
9972cab237bSDimitry Andric     // exit block carry the suffix rather than the merge block carrying the
9982cab237bSDimitry Andric     // suffix.
9992cab237bSDimitry Andric     MergeBB->takeName(ExitBB);
10002cab237bSDimitry Andric     ExitBB->setName(Twine(MergeBB->getName()) + ".split");
10012cab237bSDimitry Andric 
10022cab237bSDimitry Andric     // Now clone the original exit block.
10032cab237bSDimitry Andric     auto *ClonedExitBB = CloneBlock(ExitBB);
10042cab237bSDimitry Andric     assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
10052cab237bSDimitry Andric            "Exit block should have been split to have one successor!");
10062cab237bSDimitry Andric     assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
10072cab237bSDimitry Andric            "Cloned exit block has the wrong successor!");
10082cab237bSDimitry Andric 
10092cab237bSDimitry Andric     // Remap any cloned instructions and create a merge phi node for them.
10102cab237bSDimitry Andric     for (auto ZippedInsts : llvm::zip_first(
10112cab237bSDimitry Andric              llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
10122cab237bSDimitry Andric              llvm::make_range(ClonedExitBB->begin(),
10132cab237bSDimitry Andric                               std::prev(ClonedExitBB->end())))) {
10142cab237bSDimitry Andric       Instruction &I = std::get<0>(ZippedInsts);
10152cab237bSDimitry Andric       Instruction &ClonedI = std::get<1>(ZippedInsts);
10162cab237bSDimitry Andric 
10172cab237bSDimitry Andric       // The only instructions in the exit block should be PHI nodes and
10182cab237bSDimitry Andric       // potentially a landing pad.
10192cab237bSDimitry Andric       assert(
10202cab237bSDimitry Andric           (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
10212cab237bSDimitry Andric           "Bad instruction in exit block!");
10222cab237bSDimitry Andric       // We should have a value map between the instruction and its clone.
10232cab237bSDimitry Andric       assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
10242cab237bSDimitry Andric 
10252cab237bSDimitry Andric       auto *MergePN =
10262cab237bSDimitry Andric           PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
10272cab237bSDimitry Andric                           &*MergeBB->getFirstInsertionPt());
10282cab237bSDimitry Andric       I.replaceAllUsesWith(MergePN);
10292cab237bSDimitry Andric       MergePN->addIncoming(&I, ExitBB);
10302cab237bSDimitry Andric       MergePN->addIncoming(&ClonedI, ClonedExitBB);
10312cab237bSDimitry Andric     }
10322cab237bSDimitry Andric   }
10332cab237bSDimitry Andric 
10342cab237bSDimitry Andric   // Rewrite the instructions in the cloned blocks to refer to the instructions
10352cab237bSDimitry Andric   // in the cloned blocks. We have to do this as a second pass so that we have
10362cab237bSDimitry Andric   // everything available. Also, we have inserted new instructions which may
10372cab237bSDimitry Andric   // include assume intrinsics, so we update the assumption cache while
10382cab237bSDimitry Andric   // processing this.
10392cab237bSDimitry Andric   for (auto *ClonedBB : NewBlocks)
10402cab237bSDimitry Andric     for (Instruction &I : *ClonedBB) {
10412cab237bSDimitry Andric       RemapInstruction(&I, VMap,
10422cab237bSDimitry Andric                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
10432cab237bSDimitry Andric       if (auto *II = dyn_cast<IntrinsicInst>(&I))
10442cab237bSDimitry Andric         if (II->getIntrinsicID() == Intrinsic::assume)
10452cab237bSDimitry Andric           AC.registerAssumption(II);
10462cab237bSDimitry Andric     }
10472cab237bSDimitry Andric 
10484ba319b5SDimitry Andric   // Update any PHI nodes in the cloned successors of the skipped blocks to not
10494ba319b5SDimitry Andric   // have spurious incoming values.
10504ba319b5SDimitry Andric   for (auto *LoopBB : L.blocks())
10514ba319b5SDimitry Andric     if (SkipBlock(LoopBB))
10524ba319b5SDimitry Andric       for (auto *SuccBB : successors(LoopBB))
10534ba319b5SDimitry Andric         if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
10544ba319b5SDimitry Andric           for (PHINode &PN : ClonedSuccBB->phis())
10554ba319b5SDimitry Andric             PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
10564ba319b5SDimitry Andric 
10574ba319b5SDimitry Andric   // Remove the cloned parent as a predecessor of any successor we ended up
10584ba319b5SDimitry Andric   // cloning other than the unswitched one.
10592cab237bSDimitry Andric   auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
10604ba319b5SDimitry Andric   for (auto *SuccBB : successors(ParentBB)) {
10614ba319b5SDimitry Andric     if (SuccBB == UnswitchedSuccBB)
10624ba319b5SDimitry Andric       continue;
10634ba319b5SDimitry Andric 
10644ba319b5SDimitry Andric     auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
10654ba319b5SDimitry Andric     if (!ClonedSuccBB)
10664ba319b5SDimitry Andric       continue;
10674ba319b5SDimitry Andric 
10684ba319b5SDimitry Andric     ClonedSuccBB->removePredecessor(ClonedParentBB,
10692cab237bSDimitry Andric                                     /*DontDeleteUselessPHIs*/ true);
10704ba319b5SDimitry Andric   }
10714ba319b5SDimitry Andric 
10724ba319b5SDimitry Andric   // Replace the cloned branch with an unconditional branch to the cloned
10732cab237bSDimitry Andric   // unswitched successor.
10742cab237bSDimitry Andric   auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
10752cab237bSDimitry Andric   ClonedParentBB->getTerminator()->eraseFromParent();
10762cab237bSDimitry Andric   BranchInst::Create(ClonedSuccBB, ClonedParentBB);
10772cab237bSDimitry Andric 
10784ba319b5SDimitry Andric   // If there are duplicate entries in the PHI nodes because of multiple edges
10794ba319b5SDimitry Andric   // to the unswitched successor, we need to nuke all but one as we replaced it
10804ba319b5SDimitry Andric   // with a direct branch.
10814ba319b5SDimitry Andric   for (PHINode &PN : ClonedSuccBB->phis()) {
10824ba319b5SDimitry Andric     bool Found = false;
10834ba319b5SDimitry Andric     // Loop over the incoming operands backwards so we can easily delete as we
10844ba319b5SDimitry Andric     // go without invalidating the index.
10854ba319b5SDimitry Andric     for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
10864ba319b5SDimitry Andric       if (PN.getIncomingBlock(i) != ClonedParentBB)
10874ba319b5SDimitry Andric         continue;
10884ba319b5SDimitry Andric       if (!Found) {
10894ba319b5SDimitry Andric         Found = true;
10904ba319b5SDimitry Andric         continue;
10914ba319b5SDimitry Andric       }
10924ba319b5SDimitry Andric       PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
10934ba319b5SDimitry Andric     }
10944ba319b5SDimitry Andric   }
10954ba319b5SDimitry Andric 
10964ba319b5SDimitry Andric   // Record the domtree updates for the new blocks.
10974ba319b5SDimitry Andric   SmallPtrSet<BasicBlock *, 4> SuccSet;
10984ba319b5SDimitry Andric   for (auto *ClonedBB : NewBlocks) {
10994ba319b5SDimitry Andric     for (auto *SuccBB : successors(ClonedBB))
11004ba319b5SDimitry Andric       if (SuccSet.insert(SuccBB).second)
11014ba319b5SDimitry Andric         DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
11024ba319b5SDimitry Andric     SuccSet.clear();
11034ba319b5SDimitry Andric   }
11042cab237bSDimitry Andric 
11052cab237bSDimitry Andric   return ClonedPH;
11062cab237bSDimitry Andric }
11072cab237bSDimitry Andric 
11082cab237bSDimitry Andric /// Recursively clone the specified loop and all of its children.
11092cab237bSDimitry Andric ///
11102cab237bSDimitry Andric /// The target parent loop for the clone should be provided, or can be null if
11112cab237bSDimitry Andric /// the clone is a top-level loop. While cloning, all the blocks are mapped
11122cab237bSDimitry Andric /// with the provided value map. The entire original loop must be present in
11132cab237bSDimitry Andric /// the value map. The cloned loop is returned.
cloneLoopNest(Loop & OrigRootL,Loop * RootParentL,const ValueToValueMapTy & VMap,LoopInfo & LI)11142cab237bSDimitry Andric static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
11152cab237bSDimitry Andric                            const ValueToValueMapTy &VMap, LoopInfo &LI) {
11162cab237bSDimitry Andric   auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
11172cab237bSDimitry Andric     assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
11182cab237bSDimitry Andric     ClonedL.reserveBlocks(OrigL.getNumBlocks());
11192cab237bSDimitry Andric     for (auto *BB : OrigL.blocks()) {
11202cab237bSDimitry Andric       auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
11212cab237bSDimitry Andric       ClonedL.addBlockEntry(ClonedBB);
11224ba319b5SDimitry Andric       if (LI.getLoopFor(BB) == &OrigL)
11232cab237bSDimitry Andric         LI.changeLoopFor(ClonedBB, &ClonedL);
11242cab237bSDimitry Andric     }
11252cab237bSDimitry Andric   };
11262cab237bSDimitry Andric 
11272cab237bSDimitry Andric   // We specially handle the first loop because it may get cloned into
11282cab237bSDimitry Andric   // a different parent and because we most commonly are cloning leaf loops.
11292cab237bSDimitry Andric   Loop *ClonedRootL = LI.AllocateLoop();
11302cab237bSDimitry Andric   if (RootParentL)
11312cab237bSDimitry Andric     RootParentL->addChildLoop(ClonedRootL);
11322cab237bSDimitry Andric   else
11332cab237bSDimitry Andric     LI.addTopLevelLoop(ClonedRootL);
11342cab237bSDimitry Andric   AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
11352cab237bSDimitry Andric 
11362cab237bSDimitry Andric   if (OrigRootL.empty())
11372cab237bSDimitry Andric     return ClonedRootL;
11382cab237bSDimitry Andric 
11392cab237bSDimitry Andric   // If we have a nest, we can quickly clone the entire loop nest using an
11402cab237bSDimitry Andric   // iterative approach because it is a tree. We keep the cloned parent in the
11412cab237bSDimitry Andric   // data structure to avoid repeatedly querying through a map to find it.
11422cab237bSDimitry Andric   SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
11432cab237bSDimitry Andric   // Build up the loops to clone in reverse order as we'll clone them from the
11442cab237bSDimitry Andric   // back.
11452cab237bSDimitry Andric   for (Loop *ChildL : llvm::reverse(OrigRootL))
11462cab237bSDimitry Andric     LoopsToClone.push_back({ClonedRootL, ChildL});
11472cab237bSDimitry Andric   do {
11482cab237bSDimitry Andric     Loop *ClonedParentL, *L;
11492cab237bSDimitry Andric     std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
11502cab237bSDimitry Andric     Loop *ClonedL = LI.AllocateLoop();
11512cab237bSDimitry Andric     ClonedParentL->addChildLoop(ClonedL);
11522cab237bSDimitry Andric     AddClonedBlocksToLoop(*L, *ClonedL);
11532cab237bSDimitry Andric     for (Loop *ChildL : llvm::reverse(*L))
11542cab237bSDimitry Andric       LoopsToClone.push_back({ClonedL, ChildL});
11552cab237bSDimitry Andric   } while (!LoopsToClone.empty());
11562cab237bSDimitry Andric 
11572cab237bSDimitry Andric   return ClonedRootL;
11582cab237bSDimitry Andric }
11592cab237bSDimitry Andric 
11602cab237bSDimitry Andric /// Build the cloned loops of an original loop from unswitching.
11612cab237bSDimitry Andric ///
11622cab237bSDimitry Andric /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
11632cab237bSDimitry Andric /// operation. We need to re-verify that there even is a loop (as the backedge
11642cab237bSDimitry Andric /// may not have been cloned), and even if there are remaining backedges the
11652cab237bSDimitry Andric /// backedge set may be different. However, we know that each child loop is
11662cab237bSDimitry Andric /// undisturbed, we only need to find where to place each child loop within
11672cab237bSDimitry Andric /// either any parent loop or within a cloned version of the original loop.
11682cab237bSDimitry Andric ///
11692cab237bSDimitry Andric /// Because child loops may end up cloned outside of any cloned version of the
11702cab237bSDimitry Andric /// original loop, multiple cloned sibling loops may be created. All of them
11712cab237bSDimitry Andric /// are returned so that the newly introduced loop nest roots can be
11722cab237bSDimitry Andric /// identified.
buildClonedLoops(Loop & OrigL,ArrayRef<BasicBlock * > ExitBlocks,const ValueToValueMapTy & VMap,LoopInfo & LI,SmallVectorImpl<Loop * > & NonChildClonedLoops)11734ba319b5SDimitry Andric static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
11742cab237bSDimitry Andric                              const ValueToValueMapTy &VMap, LoopInfo &LI,
11752cab237bSDimitry Andric                              SmallVectorImpl<Loop *> &NonChildClonedLoops) {
11762cab237bSDimitry Andric   Loop *ClonedL = nullptr;
11772cab237bSDimitry Andric 
11782cab237bSDimitry Andric   auto *OrigPH = OrigL.getLoopPreheader();
11792cab237bSDimitry Andric   auto *OrigHeader = OrigL.getHeader();
11802cab237bSDimitry Andric 
11812cab237bSDimitry Andric   auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
11822cab237bSDimitry Andric   auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
11832cab237bSDimitry Andric 
11842cab237bSDimitry Andric   // We need to know the loops of the cloned exit blocks to even compute the
11852cab237bSDimitry Andric   // accurate parent loop. If we only clone exits to some parent of the
11862cab237bSDimitry Andric   // original parent, we want to clone into that outer loop. We also keep track
11872cab237bSDimitry Andric   // of the loops that our cloned exit blocks participate in.
11882cab237bSDimitry Andric   Loop *ParentL = nullptr;
11892cab237bSDimitry Andric   SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
11902cab237bSDimitry Andric   SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
11912cab237bSDimitry Andric   ClonedExitsInLoops.reserve(ExitBlocks.size());
11922cab237bSDimitry Andric   for (auto *ExitBB : ExitBlocks)
11932cab237bSDimitry Andric     if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
11942cab237bSDimitry Andric       if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
11952cab237bSDimitry Andric         ExitLoopMap[ClonedExitBB] = ExitL;
11962cab237bSDimitry Andric         ClonedExitsInLoops.push_back(ClonedExitBB);
11972cab237bSDimitry Andric         if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
11982cab237bSDimitry Andric           ParentL = ExitL;
11992cab237bSDimitry Andric       }
12002cab237bSDimitry Andric   assert((!ParentL || ParentL == OrigL.getParentLoop() ||
12012cab237bSDimitry Andric           ParentL->contains(OrigL.getParentLoop())) &&
12022cab237bSDimitry Andric          "The computed parent loop should always contain (or be) the parent of "
12032cab237bSDimitry Andric          "the original loop.");
12042cab237bSDimitry Andric 
12052cab237bSDimitry Andric   // We build the set of blocks dominated by the cloned header from the set of
12062cab237bSDimitry Andric   // cloned blocks out of the original loop. While not all of these will
12072cab237bSDimitry Andric   // necessarily be in the cloned loop, it is enough to establish that they
12082cab237bSDimitry Andric   // aren't in unreachable cycles, etc.
12092cab237bSDimitry Andric   SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
12102cab237bSDimitry Andric   for (auto *BB : OrigL.blocks())
12112cab237bSDimitry Andric     if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
12122cab237bSDimitry Andric       ClonedLoopBlocks.insert(ClonedBB);
12132cab237bSDimitry Andric 
12142cab237bSDimitry Andric   // Rebuild the set of blocks that will end up in the cloned loop. We may have
12152cab237bSDimitry Andric   // skipped cloning some region of this loop which can in turn skip some of
12162cab237bSDimitry Andric   // the backedges so we have to rebuild the blocks in the loop based on the
12172cab237bSDimitry Andric   // backedges that remain after cloning.
12182cab237bSDimitry Andric   SmallVector<BasicBlock *, 16> Worklist;
12192cab237bSDimitry Andric   SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
12202cab237bSDimitry Andric   for (auto *Pred : predecessors(ClonedHeader)) {
12212cab237bSDimitry Andric     // The only possible non-loop header predecessor is the preheader because
12222cab237bSDimitry Andric     // we know we cloned the loop in simplified form.
12232cab237bSDimitry Andric     if (Pred == ClonedPH)
12242cab237bSDimitry Andric       continue;
12252cab237bSDimitry Andric 
12262cab237bSDimitry Andric     // Because the loop was in simplified form, the only non-loop predecessor
12272cab237bSDimitry Andric     // should be the preheader.
12282cab237bSDimitry Andric     assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
12292cab237bSDimitry Andric                                            "header other than the preheader "
12302cab237bSDimitry Andric                                            "that is not part of the loop!");
12312cab237bSDimitry Andric 
12322cab237bSDimitry Andric     // Insert this block into the loop set and on the first visit (and if it
12332cab237bSDimitry Andric     // isn't the header we're currently walking) put it into the worklist to
12342cab237bSDimitry Andric     // recurse through.
12352cab237bSDimitry Andric     if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
12362cab237bSDimitry Andric       Worklist.push_back(Pred);
12372cab237bSDimitry Andric   }
12382cab237bSDimitry Andric 
12392cab237bSDimitry Andric   // If we had any backedges then there *is* a cloned loop. Put the header into
12402cab237bSDimitry Andric   // the loop set and then walk the worklist backwards to find all the blocks
12412cab237bSDimitry Andric   // that remain within the loop after cloning.
12422cab237bSDimitry Andric   if (!BlocksInClonedLoop.empty()) {
12432cab237bSDimitry Andric     BlocksInClonedLoop.insert(ClonedHeader);
12442cab237bSDimitry Andric 
12452cab237bSDimitry Andric     while (!Worklist.empty()) {
12462cab237bSDimitry Andric       BasicBlock *BB = Worklist.pop_back_val();
12472cab237bSDimitry Andric       assert(BlocksInClonedLoop.count(BB) &&
12482cab237bSDimitry Andric              "Didn't put block into the loop set!");
12492cab237bSDimitry Andric 
12502cab237bSDimitry Andric       // Insert any predecessors that are in the possible set into the cloned
12512cab237bSDimitry Andric       // set, and if the insert is successful, add them to the worklist. Note
12522cab237bSDimitry Andric       // that we filter on the blocks that are definitely reachable via the
12532cab237bSDimitry Andric       // backedge to the loop header so we may prune out dead code within the
12542cab237bSDimitry Andric       // cloned loop.
12552cab237bSDimitry Andric       for (auto *Pred : predecessors(BB))
12562cab237bSDimitry Andric         if (ClonedLoopBlocks.count(Pred) &&
12572cab237bSDimitry Andric             BlocksInClonedLoop.insert(Pred).second)
12582cab237bSDimitry Andric           Worklist.push_back(Pred);
12592cab237bSDimitry Andric     }
12602cab237bSDimitry Andric 
12612cab237bSDimitry Andric     ClonedL = LI.AllocateLoop();
12622cab237bSDimitry Andric     if (ParentL) {
12632cab237bSDimitry Andric       ParentL->addBasicBlockToLoop(ClonedPH, LI);
12642cab237bSDimitry Andric       ParentL->addChildLoop(ClonedL);
12652cab237bSDimitry Andric     } else {
12662cab237bSDimitry Andric       LI.addTopLevelLoop(ClonedL);
12672cab237bSDimitry Andric     }
12684ba319b5SDimitry Andric     NonChildClonedLoops.push_back(ClonedL);
12692cab237bSDimitry Andric 
12702cab237bSDimitry Andric     ClonedL->reserveBlocks(BlocksInClonedLoop.size());
12712cab237bSDimitry Andric     // We don't want to just add the cloned loop blocks based on how we
12722cab237bSDimitry Andric     // discovered them. The original order of blocks was carefully built in
12732cab237bSDimitry Andric     // a way that doesn't rely on predecessor ordering. Rather than re-invent
12742cab237bSDimitry Andric     // that logic, we just re-walk the original blocks (and those of the child
12752cab237bSDimitry Andric     // loops) and filter them as we add them into the cloned loop.
12762cab237bSDimitry Andric     for (auto *BB : OrigL.blocks()) {
12772cab237bSDimitry Andric       auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
12782cab237bSDimitry Andric       if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
12792cab237bSDimitry Andric         continue;
12802cab237bSDimitry Andric 
12812cab237bSDimitry Andric       // Directly add the blocks that are only in this loop.
12822cab237bSDimitry Andric       if (LI.getLoopFor(BB) == &OrigL) {
12832cab237bSDimitry Andric         ClonedL->addBasicBlockToLoop(ClonedBB, LI);
12842cab237bSDimitry Andric         continue;
12852cab237bSDimitry Andric       }
12862cab237bSDimitry Andric 
12872cab237bSDimitry Andric       // We want to manually add it to this loop and parents.
12882cab237bSDimitry Andric       // Registering it with LoopInfo will happen when we clone the top
12892cab237bSDimitry Andric       // loop for this block.
12902cab237bSDimitry Andric       for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
12912cab237bSDimitry Andric         PL->addBlockEntry(ClonedBB);
12922cab237bSDimitry Andric     }
12932cab237bSDimitry Andric 
12942cab237bSDimitry Andric     // Now add each child loop whose header remains within the cloned loop. All
12952cab237bSDimitry Andric     // of the blocks within the loop must satisfy the same constraints as the
12962cab237bSDimitry Andric     // header so once we pass the header checks we can just clone the entire
12972cab237bSDimitry Andric     // child loop nest.
12982cab237bSDimitry Andric     for (Loop *ChildL : OrigL) {
12992cab237bSDimitry Andric       auto *ClonedChildHeader =
13002cab237bSDimitry Andric           cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
13012cab237bSDimitry Andric       if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
13022cab237bSDimitry Andric         continue;
13032cab237bSDimitry Andric 
13042cab237bSDimitry Andric #ifndef NDEBUG
13052cab237bSDimitry Andric       // We should never have a cloned child loop header but fail to have
13062cab237bSDimitry Andric       // all of the blocks for that child loop.
13072cab237bSDimitry Andric       for (auto *ChildLoopBB : ChildL->blocks())
13082cab237bSDimitry Andric         assert(BlocksInClonedLoop.count(
13092cab237bSDimitry Andric                    cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
13102cab237bSDimitry Andric                "Child cloned loop has a header within the cloned outer "
13112cab237bSDimitry Andric                "loop but not all of its blocks!");
13122cab237bSDimitry Andric #endif
13132cab237bSDimitry Andric 
13142cab237bSDimitry Andric       cloneLoopNest(*ChildL, ClonedL, VMap, LI);
13152cab237bSDimitry Andric     }
13162cab237bSDimitry Andric   }
13172cab237bSDimitry Andric 
13182cab237bSDimitry Andric   // Now that we've handled all the components of the original loop that were
13192cab237bSDimitry Andric   // cloned into a new loop, we still need to handle anything from the original
13202cab237bSDimitry Andric   // loop that wasn't in a cloned loop.
13212cab237bSDimitry Andric 
13222cab237bSDimitry Andric   // Figure out what blocks are left to place within any loop nest containing
13232cab237bSDimitry Andric   // the unswitched loop. If we never formed a loop, the cloned PH is one of
13242cab237bSDimitry Andric   // them.
13252cab237bSDimitry Andric   SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
13262cab237bSDimitry Andric   if (BlocksInClonedLoop.empty())
13272cab237bSDimitry Andric     UnloopedBlockSet.insert(ClonedPH);
13282cab237bSDimitry Andric   for (auto *ClonedBB : ClonedLoopBlocks)
13292cab237bSDimitry Andric     if (!BlocksInClonedLoop.count(ClonedBB))
13302cab237bSDimitry Andric       UnloopedBlockSet.insert(ClonedBB);
13312cab237bSDimitry Andric 
13322cab237bSDimitry Andric   // Copy the cloned exits and sort them in ascending loop depth, we'll work
13332cab237bSDimitry Andric   // backwards across these to process them inside out. The order shouldn't
13342cab237bSDimitry Andric   // matter as we're just trying to build up the map from inside-out; we use
13352cab237bSDimitry Andric   // the map in a more stably ordered way below.
13362cab237bSDimitry Andric   auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1337*b5893f02SDimitry Andric   llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
13382cab237bSDimitry Andric     return ExitLoopMap.lookup(LHS)->getLoopDepth() <
13392cab237bSDimitry Andric            ExitLoopMap.lookup(RHS)->getLoopDepth();
13402cab237bSDimitry Andric   });
13412cab237bSDimitry Andric 
13422cab237bSDimitry Andric   // Populate the existing ExitLoopMap with everything reachable from each
13432cab237bSDimitry Andric   // exit, starting from the inner most exit.
13442cab237bSDimitry Andric   while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
13452cab237bSDimitry Andric     assert(Worklist.empty() && "Didn't clear worklist!");
13462cab237bSDimitry Andric 
13472cab237bSDimitry Andric     BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
13482cab237bSDimitry Andric     Loop *ExitL = ExitLoopMap.lookup(ExitBB);
13492cab237bSDimitry Andric 
13502cab237bSDimitry Andric     // Walk the CFG back until we hit the cloned PH adding everything reachable
13512cab237bSDimitry Andric     // and in the unlooped set to this exit block's loop.
13522cab237bSDimitry Andric     Worklist.push_back(ExitBB);
13532cab237bSDimitry Andric     do {
13542cab237bSDimitry Andric       BasicBlock *BB = Worklist.pop_back_val();
13552cab237bSDimitry Andric       // We can stop recursing at the cloned preheader (if we get there).
13562cab237bSDimitry Andric       if (BB == ClonedPH)
13572cab237bSDimitry Andric         continue;
13582cab237bSDimitry Andric 
13592cab237bSDimitry Andric       for (BasicBlock *PredBB : predecessors(BB)) {
13602cab237bSDimitry Andric         // If this pred has already been moved to our set or is part of some
13612cab237bSDimitry Andric         // (inner) loop, no update needed.
13622cab237bSDimitry Andric         if (!UnloopedBlockSet.erase(PredBB)) {
13632cab237bSDimitry Andric           assert(
13642cab237bSDimitry Andric               (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
13652cab237bSDimitry Andric               "Predecessor not mapped to a loop!");
13662cab237bSDimitry Andric           continue;
13672cab237bSDimitry Andric         }
13682cab237bSDimitry Andric 
13692cab237bSDimitry Andric         // We just insert into the loop set here. We'll add these blocks to the
13702cab237bSDimitry Andric         // exit loop after we build up the set in an order that doesn't rely on
13712cab237bSDimitry Andric         // predecessor order (which in turn relies on use list order).
13722cab237bSDimitry Andric         bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
13732cab237bSDimitry Andric         (void)Inserted;
13742cab237bSDimitry Andric         assert(Inserted && "Should only visit an unlooped block once!");
13752cab237bSDimitry Andric 
13762cab237bSDimitry Andric         // And recurse through to its predecessors.
13772cab237bSDimitry Andric         Worklist.push_back(PredBB);
13782cab237bSDimitry Andric       }
13792cab237bSDimitry Andric     } while (!Worklist.empty());
13802cab237bSDimitry Andric   }
13812cab237bSDimitry Andric 
13822cab237bSDimitry Andric   // Now that the ExitLoopMap gives as  mapping for all the non-looping cloned
13832cab237bSDimitry Andric   // blocks to their outer loops, walk the cloned blocks and the cloned exits
13842cab237bSDimitry Andric   // in their original order adding them to the correct loop.
13852cab237bSDimitry Andric 
13862cab237bSDimitry Andric   // We need a stable insertion order. We use the order of the original loop
13872cab237bSDimitry Andric   // order and map into the correct parent loop.
13882cab237bSDimitry Andric   for (auto *BB : llvm::concat<BasicBlock *const>(
13892cab237bSDimitry Andric            makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
13902cab237bSDimitry Andric     if (Loop *OuterL = ExitLoopMap.lookup(BB))
13912cab237bSDimitry Andric       OuterL->addBasicBlockToLoop(BB, LI);
13922cab237bSDimitry Andric 
13932cab237bSDimitry Andric #ifndef NDEBUG
13942cab237bSDimitry Andric   for (auto &BBAndL : ExitLoopMap) {
13952cab237bSDimitry Andric     auto *BB = BBAndL.first;
13962cab237bSDimitry Andric     auto *OuterL = BBAndL.second;
13972cab237bSDimitry Andric     assert(LI.getLoopFor(BB) == OuterL &&
13982cab237bSDimitry Andric            "Failed to put all blocks into outer loops!");
13992cab237bSDimitry Andric   }
14002cab237bSDimitry Andric #endif
14012cab237bSDimitry Andric 
14022cab237bSDimitry Andric   // Now that all the blocks are placed into the correct containing loop in the
14032cab237bSDimitry Andric   // absence of child loops, find all the potentially cloned child loops and
14042cab237bSDimitry Andric   // clone them into whatever outer loop we placed their header into.
14052cab237bSDimitry Andric   for (Loop *ChildL : OrigL) {
14062cab237bSDimitry Andric     auto *ClonedChildHeader =
14072cab237bSDimitry Andric         cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
14082cab237bSDimitry Andric     if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
14092cab237bSDimitry Andric       continue;
14102cab237bSDimitry Andric 
14112cab237bSDimitry Andric #ifndef NDEBUG
14122cab237bSDimitry Andric     for (auto *ChildLoopBB : ChildL->blocks())
14132cab237bSDimitry Andric       assert(VMap.count(ChildLoopBB) &&
14142cab237bSDimitry Andric              "Cloned a child loop header but not all of that loops blocks!");
14152cab237bSDimitry Andric #endif
14162cab237bSDimitry Andric 
14172cab237bSDimitry Andric     NonChildClonedLoops.push_back(cloneLoopNest(
14182cab237bSDimitry Andric         *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
14192cab237bSDimitry Andric   }
14202cab237bSDimitry Andric }
14212cab237bSDimitry Andric 
14224ba319b5SDimitry Andric static void
deleteDeadClonedBlocks(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,DominatorTree & DT,MemorySSAUpdater * MSSAU)14234ba319b5SDimitry Andric deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
14244ba319b5SDimitry Andric                        ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1425*b5893f02SDimitry Andric                        DominatorTree &DT, MemorySSAUpdater *MSSAU) {
14264ba319b5SDimitry Andric   // Find all the dead clones, and remove them from their successors.
14274ba319b5SDimitry Andric   SmallVector<BasicBlock *, 16> DeadBlocks;
14284ba319b5SDimitry Andric   for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
14294ba319b5SDimitry Andric     for (auto &VMap : VMaps)
14304ba319b5SDimitry Andric       if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
14314ba319b5SDimitry Andric         if (!DT.isReachableFromEntry(ClonedBB)) {
14324ba319b5SDimitry Andric           for (BasicBlock *SuccBB : successors(ClonedBB))
14334ba319b5SDimitry Andric             SuccBB->removePredecessor(ClonedBB);
14344ba319b5SDimitry Andric           DeadBlocks.push_back(ClonedBB);
14354ba319b5SDimitry Andric         }
14364ba319b5SDimitry Andric 
1437*b5893f02SDimitry Andric   // Remove all MemorySSA in the dead blocks
1438*b5893f02SDimitry Andric   if (MSSAU) {
1439*b5893f02SDimitry Andric     SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
1440*b5893f02SDimitry Andric                                                DeadBlocks.end());
1441*b5893f02SDimitry Andric     MSSAU->removeBlocks(DeadBlockSet);
1442*b5893f02SDimitry Andric   }
1443*b5893f02SDimitry Andric 
14444ba319b5SDimitry Andric   // Drop any remaining references to break cycles.
14454ba319b5SDimitry Andric   for (BasicBlock *BB : DeadBlocks)
14464ba319b5SDimitry Andric     BB->dropAllReferences();
14474ba319b5SDimitry Andric   // Erase them from the IR.
14484ba319b5SDimitry Andric   for (BasicBlock *BB : DeadBlocks)
14494ba319b5SDimitry Andric     BB->eraseFromParent();
14504ba319b5SDimitry Andric }
14514ba319b5SDimitry Andric 
deleteDeadBlocksFromLoop(Loop & L,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)1452*b5893f02SDimitry Andric static void deleteDeadBlocksFromLoop(Loop &L,
14532cab237bSDimitry Andric                                      SmallVectorImpl<BasicBlock *> &ExitBlocks,
1454*b5893f02SDimitry Andric                                      DominatorTree &DT, LoopInfo &LI,
1455*b5893f02SDimitry Andric                                      MemorySSAUpdater *MSSAU) {
1456*b5893f02SDimitry Andric   // Find all the dead blocks tied to this loop, and remove them from their
1457*b5893f02SDimitry Andric   // successors.
1458*b5893f02SDimitry Andric   SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
1459*b5893f02SDimitry Andric 
1460*b5893f02SDimitry Andric   // Start with loop/exit blocks and get a transitive closure of reachable dead
1461*b5893f02SDimitry Andric   // blocks.
1462*b5893f02SDimitry Andric   SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1463*b5893f02SDimitry Andric                                                 ExitBlocks.end());
1464*b5893f02SDimitry Andric   DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1465*b5893f02SDimitry Andric   while (!DeathCandidates.empty()) {
1466*b5893f02SDimitry Andric     auto *BB = DeathCandidates.pop_back_val();
1467*b5893f02SDimitry Andric     if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1468*b5893f02SDimitry Andric       for (BasicBlock *SuccBB : successors(BB)) {
14694ba319b5SDimitry Andric         SuccBB->removePredecessor(BB);
1470*b5893f02SDimitry Andric         DeathCandidates.push_back(SuccBB);
1471*b5893f02SDimitry Andric       }
1472*b5893f02SDimitry Andric       DeadBlockSet.insert(BB);
1473*b5893f02SDimitry Andric     }
14742cab237bSDimitry Andric   }
14752cab237bSDimitry Andric 
1476*b5893f02SDimitry Andric   // Remove all MemorySSA in the dead blocks
1477*b5893f02SDimitry Andric   if (MSSAU)
1478*b5893f02SDimitry Andric     MSSAU->removeBlocks(DeadBlockSet);
14794ba319b5SDimitry Andric 
14802cab237bSDimitry Andric   // Filter out the dead blocks from the exit blocks list so that it can be
14812cab237bSDimitry Andric   // used in the caller.
14822cab237bSDimitry Andric   llvm::erase_if(ExitBlocks,
14834ba319b5SDimitry Andric                  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
14842cab237bSDimitry Andric 
14852cab237bSDimitry Andric   // Walk from this loop up through its parents removing all of the dead blocks.
14862cab237bSDimitry Andric   for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1487*b5893f02SDimitry Andric     for (auto *BB : DeadBlockSet)
14882cab237bSDimitry Andric       ParentL->getBlocksSet().erase(BB);
14892cab237bSDimitry Andric     llvm::erase_if(ParentL->getBlocksVector(),
14904ba319b5SDimitry Andric                    [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
14912cab237bSDimitry Andric   }
14922cab237bSDimitry Andric 
14932cab237bSDimitry Andric   // Now delete the dead child loops. This raw delete will clear them
14942cab237bSDimitry Andric   // recursively.
14952cab237bSDimitry Andric   llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
14964ba319b5SDimitry Andric     if (!DeadBlockSet.count(ChildL->getHeader()))
14972cab237bSDimitry Andric       return false;
14982cab237bSDimitry Andric 
14992cab237bSDimitry Andric     assert(llvm::all_of(ChildL->blocks(),
15002cab237bSDimitry Andric                         [&](BasicBlock *ChildBB) {
15014ba319b5SDimitry Andric                           return DeadBlockSet.count(ChildBB);
15022cab237bSDimitry Andric                         }) &&
15032cab237bSDimitry Andric            "If the child loop header is dead all blocks in the child loop must "
15042cab237bSDimitry Andric            "be dead as well!");
15052cab237bSDimitry Andric     LI.destroy(ChildL);
15062cab237bSDimitry Andric     return true;
15072cab237bSDimitry Andric   });
15082cab237bSDimitry Andric 
15094ba319b5SDimitry Andric   // Remove the loop mappings for the dead blocks and drop all the references
15104ba319b5SDimitry Andric   // from these blocks to others to handle cyclic references as we start
15114ba319b5SDimitry Andric   // deleting the blocks themselves.
1512*b5893f02SDimitry Andric   for (auto *BB : DeadBlockSet) {
15134ba319b5SDimitry Andric     // Check that the dominator tree has already been updated.
15144ba319b5SDimitry Andric     assert(!DT.getNode(BB) && "Should already have cleared domtree!");
15152cab237bSDimitry Andric     LI.changeLoopFor(BB, nullptr);
15162cab237bSDimitry Andric     BB->dropAllReferences();
15172cab237bSDimitry Andric   }
15184ba319b5SDimitry Andric 
15194ba319b5SDimitry Andric   // Actually delete the blocks now that they've been fully unhooked from the
15204ba319b5SDimitry Andric   // IR.
1521*b5893f02SDimitry Andric   for (auto *BB : DeadBlockSet)
15224ba319b5SDimitry Andric     BB->eraseFromParent();
15232cab237bSDimitry Andric }
15242cab237bSDimitry Andric 
15252cab237bSDimitry Andric /// Recompute the set of blocks in a loop after unswitching.
15262cab237bSDimitry Andric ///
15272cab237bSDimitry Andric /// This walks from the original headers predecessors to rebuild the loop. We
15282cab237bSDimitry Andric /// take advantage of the fact that new blocks can't have been added, and so we
15292cab237bSDimitry Andric /// filter by the original loop's blocks. This also handles potentially
15302cab237bSDimitry Andric /// unreachable code that we don't want to explore but might be found examining
15312cab237bSDimitry Andric /// the predecessors of the header.
15322cab237bSDimitry Andric ///
15332cab237bSDimitry Andric /// If the original loop is no longer a loop, this will return an empty set. If
15342cab237bSDimitry Andric /// it remains a loop, all the blocks within it will be added to the set
15352cab237bSDimitry Andric /// (including those blocks in inner loops).
recomputeLoopBlockSet(Loop & L,LoopInfo & LI)15362cab237bSDimitry Andric static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
15372cab237bSDimitry Andric                                                                  LoopInfo &LI) {
15382cab237bSDimitry Andric   SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
15392cab237bSDimitry Andric 
15402cab237bSDimitry Andric   auto *PH = L.getLoopPreheader();
15412cab237bSDimitry Andric   auto *Header = L.getHeader();
15422cab237bSDimitry Andric 
15432cab237bSDimitry Andric   // A worklist to use while walking backwards from the header.
15442cab237bSDimitry Andric   SmallVector<BasicBlock *, 16> Worklist;
15452cab237bSDimitry Andric 
15462cab237bSDimitry Andric   // First walk the predecessors of the header to find the backedges. This will
15472cab237bSDimitry Andric   // form the basis of our walk.
15482cab237bSDimitry Andric   for (auto *Pred : predecessors(Header)) {
15492cab237bSDimitry Andric     // Skip the preheader.
15502cab237bSDimitry Andric     if (Pred == PH)
15512cab237bSDimitry Andric       continue;
15522cab237bSDimitry Andric 
15532cab237bSDimitry Andric     // Because the loop was in simplified form, the only non-loop predecessor
15542cab237bSDimitry Andric     // is the preheader.
15552cab237bSDimitry Andric     assert(L.contains(Pred) && "Found a predecessor of the loop header other "
15562cab237bSDimitry Andric                                "than the preheader that is not part of the "
15572cab237bSDimitry Andric                                "loop!");
15582cab237bSDimitry Andric 
15592cab237bSDimitry Andric     // Insert this block into the loop set and on the first visit and, if it
15602cab237bSDimitry Andric     // isn't the header we're currently walking, put it into the worklist to
15612cab237bSDimitry Andric     // recurse through.
15622cab237bSDimitry Andric     if (LoopBlockSet.insert(Pred).second && Pred != Header)
15632cab237bSDimitry Andric       Worklist.push_back(Pred);
15642cab237bSDimitry Andric   }
15652cab237bSDimitry Andric 
15662cab237bSDimitry Andric   // If no backedges were found, we're done.
15672cab237bSDimitry Andric   if (LoopBlockSet.empty())
15682cab237bSDimitry Andric     return LoopBlockSet;
15692cab237bSDimitry Andric 
15702cab237bSDimitry Andric   // We found backedges, recurse through them to identify the loop blocks.
15712cab237bSDimitry Andric   while (!Worklist.empty()) {
15722cab237bSDimitry Andric     BasicBlock *BB = Worklist.pop_back_val();
15732cab237bSDimitry Andric     assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
15742cab237bSDimitry Andric 
15754ba319b5SDimitry Andric     // No need to walk past the header.
15764ba319b5SDimitry Andric     if (BB == Header)
15774ba319b5SDimitry Andric       continue;
15784ba319b5SDimitry Andric 
15792cab237bSDimitry Andric     // Because we know the inner loop structure remains valid we can use the
15802cab237bSDimitry Andric     // loop structure to jump immediately across the entire nested loop.
15812cab237bSDimitry Andric     // Further, because it is in loop simplified form, we can directly jump
15822cab237bSDimitry Andric     // to its preheader afterward.
15832cab237bSDimitry Andric     if (Loop *InnerL = LI.getLoopFor(BB))
15842cab237bSDimitry Andric       if (InnerL != &L) {
15852cab237bSDimitry Andric         assert(L.contains(InnerL) &&
15862cab237bSDimitry Andric                "Should not reach a loop *outside* this loop!");
15872cab237bSDimitry Andric         // The preheader is the only possible predecessor of the loop so
15882cab237bSDimitry Andric         // insert it into the set and check whether it was already handled.
15892cab237bSDimitry Andric         auto *InnerPH = InnerL->getLoopPreheader();
15902cab237bSDimitry Andric         assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
15912cab237bSDimitry Andric                                       "but not contain the inner loop "
15922cab237bSDimitry Andric                                       "preheader!");
15932cab237bSDimitry Andric         if (!LoopBlockSet.insert(InnerPH).second)
15942cab237bSDimitry Andric           // The only way to reach the preheader is through the loop body
15952cab237bSDimitry Andric           // itself so if it has been visited the loop is already handled.
15962cab237bSDimitry Andric           continue;
15972cab237bSDimitry Andric 
15982cab237bSDimitry Andric         // Insert all of the blocks (other than those already present) into
15994ba319b5SDimitry Andric         // the loop set. We expect at least the block that led us to find the
16004ba319b5SDimitry Andric         // inner loop to be in the block set, but we may also have other loop
16014ba319b5SDimitry Andric         // blocks if they were already enqueued as predecessors of some other
16024ba319b5SDimitry Andric         // outer loop block.
16032cab237bSDimitry Andric         for (auto *InnerBB : InnerL->blocks()) {
16042cab237bSDimitry Andric           if (InnerBB == BB) {
16052cab237bSDimitry Andric             assert(LoopBlockSet.count(InnerBB) &&
16062cab237bSDimitry Andric                    "Block should already be in the set!");
16072cab237bSDimitry Andric             continue;
16082cab237bSDimitry Andric           }
16092cab237bSDimitry Andric 
16104ba319b5SDimitry Andric           LoopBlockSet.insert(InnerBB);
16112cab237bSDimitry Andric         }
16122cab237bSDimitry Andric 
16132cab237bSDimitry Andric         // Add the preheader to the worklist so we will continue past the
16142cab237bSDimitry Andric         // loop body.
16152cab237bSDimitry Andric         Worklist.push_back(InnerPH);
16162cab237bSDimitry Andric         continue;
16172cab237bSDimitry Andric       }
16182cab237bSDimitry Andric 
16192cab237bSDimitry Andric     // Insert any predecessors that were in the original loop into the new
16202cab237bSDimitry Andric     // set, and if the insert is successful, add them to the worklist.
16212cab237bSDimitry Andric     for (auto *Pred : predecessors(BB))
16222cab237bSDimitry Andric       if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
16232cab237bSDimitry Andric         Worklist.push_back(Pred);
16242cab237bSDimitry Andric   }
16252cab237bSDimitry Andric 
16264ba319b5SDimitry Andric   assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
16274ba319b5SDimitry Andric 
16282cab237bSDimitry Andric   // We've found all the blocks participating in the loop, return our completed
16292cab237bSDimitry Andric   // set.
16302cab237bSDimitry Andric   return LoopBlockSet;
16312cab237bSDimitry Andric }
16322cab237bSDimitry Andric 
16332cab237bSDimitry Andric /// Rebuild a loop after unswitching removes some subset of blocks and edges.
16342cab237bSDimitry Andric ///
16352cab237bSDimitry Andric /// The removal may have removed some child loops entirely but cannot have
16362cab237bSDimitry Andric /// disturbed any remaining child loops. However, they may need to be hoisted
16372cab237bSDimitry Andric /// to the parent loop (or to be top-level loops). The original loop may be
16382cab237bSDimitry Andric /// completely removed.
16392cab237bSDimitry Andric ///
16402cab237bSDimitry Andric /// The sibling loops resulting from this update are returned. If the original
16412cab237bSDimitry Andric /// loop remains a valid loop, it will be the first entry in this list with all
16422cab237bSDimitry Andric /// of the newly sibling loops following it.
16432cab237bSDimitry Andric ///
16442cab237bSDimitry Andric /// Returns true if the loop remains a loop after unswitching, and false if it
16452cab237bSDimitry Andric /// is no longer a loop after unswitching (and should not continue to be
16462cab237bSDimitry Andric /// referenced).
rebuildLoopAfterUnswitch(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,LoopInfo & LI,SmallVectorImpl<Loop * > & HoistedLoops)16472cab237bSDimitry Andric static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
16482cab237bSDimitry Andric                                      LoopInfo &LI,
16492cab237bSDimitry Andric                                      SmallVectorImpl<Loop *> &HoistedLoops) {
16502cab237bSDimitry Andric   auto *PH = L.getLoopPreheader();
16512cab237bSDimitry Andric 
16522cab237bSDimitry Andric   // Compute the actual parent loop from the exit blocks. Because we may have
16532cab237bSDimitry Andric   // pruned some exits the loop may be different from the original parent.
16542cab237bSDimitry Andric   Loop *ParentL = nullptr;
16552cab237bSDimitry Andric   SmallVector<Loop *, 4> ExitLoops;
16562cab237bSDimitry Andric   SmallVector<BasicBlock *, 4> ExitsInLoops;
16572cab237bSDimitry Andric   ExitsInLoops.reserve(ExitBlocks.size());
16582cab237bSDimitry Andric   for (auto *ExitBB : ExitBlocks)
16592cab237bSDimitry Andric     if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
16602cab237bSDimitry Andric       ExitLoops.push_back(ExitL);
16612cab237bSDimitry Andric       ExitsInLoops.push_back(ExitBB);
16622cab237bSDimitry Andric       if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
16632cab237bSDimitry Andric         ParentL = ExitL;
16642cab237bSDimitry Andric     }
16652cab237bSDimitry Andric 
16662cab237bSDimitry Andric   // Recompute the blocks participating in this loop. This may be empty if it
16672cab237bSDimitry Andric   // is no longer a loop.
16682cab237bSDimitry Andric   auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
16692cab237bSDimitry Andric 
16702cab237bSDimitry Andric   // If we still have a loop, we need to re-set the loop's parent as the exit
16712cab237bSDimitry Andric   // block set changing may have moved it within the loop nest. Note that this
16722cab237bSDimitry Andric   // can only happen when this loop has a parent as it can only hoist the loop
16732cab237bSDimitry Andric   // *up* the nest.
16742cab237bSDimitry Andric   if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
16752cab237bSDimitry Andric     // Remove this loop's (original) blocks from all of the intervening loops.
16762cab237bSDimitry Andric     for (Loop *IL = L.getParentLoop(); IL != ParentL;
16772cab237bSDimitry Andric          IL = IL->getParentLoop()) {
16782cab237bSDimitry Andric       IL->getBlocksSet().erase(PH);
16792cab237bSDimitry Andric       for (auto *BB : L.blocks())
16802cab237bSDimitry Andric         IL->getBlocksSet().erase(BB);
16812cab237bSDimitry Andric       llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
16822cab237bSDimitry Andric         return BB == PH || L.contains(BB);
16832cab237bSDimitry Andric       });
16842cab237bSDimitry Andric     }
16852cab237bSDimitry Andric 
16862cab237bSDimitry Andric     LI.changeLoopFor(PH, ParentL);
16872cab237bSDimitry Andric     L.getParentLoop()->removeChildLoop(&L);
16882cab237bSDimitry Andric     if (ParentL)
16892cab237bSDimitry Andric       ParentL->addChildLoop(&L);
16902cab237bSDimitry Andric     else
16912cab237bSDimitry Andric       LI.addTopLevelLoop(&L);
16922cab237bSDimitry Andric   }
16932cab237bSDimitry Andric 
16942cab237bSDimitry Andric   // Now we update all the blocks which are no longer within the loop.
16952cab237bSDimitry Andric   auto &Blocks = L.getBlocksVector();
16962cab237bSDimitry Andric   auto BlocksSplitI =
16972cab237bSDimitry Andric       LoopBlockSet.empty()
16982cab237bSDimitry Andric           ? Blocks.begin()
16992cab237bSDimitry Andric           : std::stable_partition(
17002cab237bSDimitry Andric                 Blocks.begin(), Blocks.end(),
17012cab237bSDimitry Andric                 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
17022cab237bSDimitry Andric 
17032cab237bSDimitry Andric   // Before we erase the list of unlooped blocks, build a set of them.
17042cab237bSDimitry Andric   SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
17052cab237bSDimitry Andric   if (LoopBlockSet.empty())
17062cab237bSDimitry Andric     UnloopedBlocks.insert(PH);
17072cab237bSDimitry Andric 
17082cab237bSDimitry Andric   // Now erase these blocks from the loop.
17092cab237bSDimitry Andric   for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
17102cab237bSDimitry Andric     L.getBlocksSet().erase(BB);
17112cab237bSDimitry Andric   Blocks.erase(BlocksSplitI, Blocks.end());
17122cab237bSDimitry Andric 
17132cab237bSDimitry Andric   // Sort the exits in ascending loop depth, we'll work backwards across these
17142cab237bSDimitry Andric   // to process them inside out.
17152cab237bSDimitry Andric   std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
17162cab237bSDimitry Andric                    [&](BasicBlock *LHS, BasicBlock *RHS) {
17172cab237bSDimitry Andric                      return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
17182cab237bSDimitry Andric                    });
17192cab237bSDimitry Andric 
17202cab237bSDimitry Andric   // We'll build up a set for each exit loop.
17212cab237bSDimitry Andric   SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
17222cab237bSDimitry Andric   Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
17232cab237bSDimitry Andric 
17242cab237bSDimitry Andric   auto RemoveUnloopedBlocksFromLoop =
17252cab237bSDimitry Andric       [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
17262cab237bSDimitry Andric         for (auto *BB : UnloopedBlocks)
17272cab237bSDimitry Andric           L.getBlocksSet().erase(BB);
17282cab237bSDimitry Andric         llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
17292cab237bSDimitry Andric           return UnloopedBlocks.count(BB);
17302cab237bSDimitry Andric         });
17312cab237bSDimitry Andric       };
17322cab237bSDimitry Andric 
17332cab237bSDimitry Andric   SmallVector<BasicBlock *, 16> Worklist;
17342cab237bSDimitry Andric   while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
17352cab237bSDimitry Andric     assert(Worklist.empty() && "Didn't clear worklist!");
17362cab237bSDimitry Andric     assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
17372cab237bSDimitry Andric 
17382cab237bSDimitry Andric     // Grab the next exit block, in decreasing loop depth order.
17392cab237bSDimitry Andric     BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
17402cab237bSDimitry Andric     Loop &ExitL = *LI.getLoopFor(ExitBB);
17412cab237bSDimitry Andric     assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
17422cab237bSDimitry Andric 
17432cab237bSDimitry Andric     // Erase all of the unlooped blocks from the loops between the previous
17442cab237bSDimitry Andric     // exit loop and this exit loop. This works because the ExitInLoops list is
17452cab237bSDimitry Andric     // sorted in increasing order of loop depth and thus we visit loops in
17462cab237bSDimitry Andric     // decreasing order of loop depth.
17472cab237bSDimitry Andric     for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
17482cab237bSDimitry Andric       RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
17492cab237bSDimitry Andric 
17502cab237bSDimitry Andric     // Walk the CFG back until we hit the cloned PH adding everything reachable
17512cab237bSDimitry Andric     // and in the unlooped set to this exit block's loop.
17522cab237bSDimitry Andric     Worklist.push_back(ExitBB);
17532cab237bSDimitry Andric     do {
17542cab237bSDimitry Andric       BasicBlock *BB = Worklist.pop_back_val();
17552cab237bSDimitry Andric       // We can stop recursing at the cloned preheader (if we get there).
17562cab237bSDimitry Andric       if (BB == PH)
17572cab237bSDimitry Andric         continue;
17582cab237bSDimitry Andric 
17592cab237bSDimitry Andric       for (BasicBlock *PredBB : predecessors(BB)) {
17602cab237bSDimitry Andric         // If this pred has already been moved to our set or is part of some
17612cab237bSDimitry Andric         // (inner) loop, no update needed.
17622cab237bSDimitry Andric         if (!UnloopedBlocks.erase(PredBB)) {
17632cab237bSDimitry Andric           assert((NewExitLoopBlocks.count(PredBB) ||
17642cab237bSDimitry Andric                   ExitL.contains(LI.getLoopFor(PredBB))) &&
17652cab237bSDimitry Andric                  "Predecessor not in a nested loop (or already visited)!");
17662cab237bSDimitry Andric           continue;
17672cab237bSDimitry Andric         }
17682cab237bSDimitry Andric 
17692cab237bSDimitry Andric         // We just insert into the loop set here. We'll add these blocks to the
17702cab237bSDimitry Andric         // exit loop after we build up the set in a deterministic order rather
17712cab237bSDimitry Andric         // than the predecessor-influenced visit order.
17722cab237bSDimitry Andric         bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
17732cab237bSDimitry Andric         (void)Inserted;
17742cab237bSDimitry Andric         assert(Inserted && "Should only visit an unlooped block once!");
17752cab237bSDimitry Andric 
17762cab237bSDimitry Andric         // And recurse through to its predecessors.
17772cab237bSDimitry Andric         Worklist.push_back(PredBB);
17782cab237bSDimitry Andric       }
17792cab237bSDimitry Andric     } while (!Worklist.empty());
17802cab237bSDimitry Andric 
17812cab237bSDimitry Andric     // If blocks in this exit loop were directly part of the original loop (as
17822cab237bSDimitry Andric     // opposed to a child loop) update the map to point to this exit loop. This
17832cab237bSDimitry Andric     // just updates a map and so the fact that the order is unstable is fine.
17842cab237bSDimitry Andric     for (auto *BB : NewExitLoopBlocks)
17852cab237bSDimitry Andric       if (Loop *BBL = LI.getLoopFor(BB))
17862cab237bSDimitry Andric         if (BBL == &L || !L.contains(BBL))
17872cab237bSDimitry Andric           LI.changeLoopFor(BB, &ExitL);
17882cab237bSDimitry Andric 
17892cab237bSDimitry Andric     // We will remove the remaining unlooped blocks from this loop in the next
17902cab237bSDimitry Andric     // iteration or below.
17912cab237bSDimitry Andric     NewExitLoopBlocks.clear();
17922cab237bSDimitry Andric   }
17932cab237bSDimitry Andric 
17942cab237bSDimitry Andric   // Any remaining unlooped blocks are no longer part of any loop unless they
17952cab237bSDimitry Andric   // are part of some child loop.
17962cab237bSDimitry Andric   for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
17972cab237bSDimitry Andric     RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
17982cab237bSDimitry Andric   for (auto *BB : UnloopedBlocks)
17992cab237bSDimitry Andric     if (Loop *BBL = LI.getLoopFor(BB))
18002cab237bSDimitry Andric       if (BBL == &L || !L.contains(BBL))
18012cab237bSDimitry Andric         LI.changeLoopFor(BB, nullptr);
18022cab237bSDimitry Andric 
18032cab237bSDimitry Andric   // Sink all the child loops whose headers are no longer in the loop set to
18042cab237bSDimitry Andric   // the parent (or to be top level loops). We reach into the loop and directly
18052cab237bSDimitry Andric   // update its subloop vector to make this batch update efficient.
18062cab237bSDimitry Andric   auto &SubLoops = L.getSubLoopsVector();
18072cab237bSDimitry Andric   auto SubLoopsSplitI =
18082cab237bSDimitry Andric       LoopBlockSet.empty()
18092cab237bSDimitry Andric           ? SubLoops.begin()
18102cab237bSDimitry Andric           : std::stable_partition(
18112cab237bSDimitry Andric                 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
18122cab237bSDimitry Andric                   return LoopBlockSet.count(SubL->getHeader());
18132cab237bSDimitry Andric                 });
18142cab237bSDimitry Andric   for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
18152cab237bSDimitry Andric     HoistedLoops.push_back(HoistedL);
18162cab237bSDimitry Andric     HoistedL->setParentLoop(nullptr);
18172cab237bSDimitry Andric 
18182cab237bSDimitry Andric     // To compute the new parent of this hoisted loop we look at where we
18192cab237bSDimitry Andric     // placed the preheader above. We can't lookup the header itself because we
18202cab237bSDimitry Andric     // retained the mapping from the header to the hoisted loop. But the
18212cab237bSDimitry Andric     // preheader and header should have the exact same new parent computed
18222cab237bSDimitry Andric     // based on the set of exit blocks from the original loop as the preheader
18232cab237bSDimitry Andric     // is a predecessor of the header and so reached in the reverse walk. And
18242cab237bSDimitry Andric     // because the loops were all in simplified form the preheader of the
18252cab237bSDimitry Andric     // hoisted loop can't be part of some *other* loop.
18262cab237bSDimitry Andric     if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
18272cab237bSDimitry Andric       NewParentL->addChildLoop(HoistedL);
18282cab237bSDimitry Andric     else
18292cab237bSDimitry Andric       LI.addTopLevelLoop(HoistedL);
18302cab237bSDimitry Andric   }
18312cab237bSDimitry Andric   SubLoops.erase(SubLoopsSplitI, SubLoops.end());
18322cab237bSDimitry Andric 
18332cab237bSDimitry Andric   // Actually delete the loop if nothing remained within it.
18342cab237bSDimitry Andric   if (Blocks.empty()) {
18352cab237bSDimitry Andric     assert(SubLoops.empty() &&
18362cab237bSDimitry Andric            "Failed to remove all subloops from the original loop!");
18372cab237bSDimitry Andric     if (Loop *ParentL = L.getParentLoop())
18382cab237bSDimitry Andric       ParentL->removeChildLoop(llvm::find(*ParentL, &L));
18392cab237bSDimitry Andric     else
18402cab237bSDimitry Andric       LI.removeLoop(llvm::find(LI, &L));
18412cab237bSDimitry Andric     LI.destroy(&L);
18422cab237bSDimitry Andric     return false;
18432cab237bSDimitry Andric   }
18442cab237bSDimitry Andric 
18452cab237bSDimitry Andric   return true;
18462cab237bSDimitry Andric }
18472cab237bSDimitry Andric 
18482cab237bSDimitry Andric /// Helper to visit a dominator subtree, invoking a callable on each node.
18492cab237bSDimitry Andric ///
18502cab237bSDimitry Andric /// Returning false at any point will stop walking past that node of the tree.
18512cab237bSDimitry Andric template <typename CallableT>
visitDomSubTree(DominatorTree & DT,BasicBlock * BB,CallableT Callable)18522cab237bSDimitry Andric void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
18532cab237bSDimitry Andric   SmallVector<DomTreeNode *, 4> DomWorklist;
18542cab237bSDimitry Andric   DomWorklist.push_back(DT[BB]);
18552cab237bSDimitry Andric #ifndef NDEBUG
18562cab237bSDimitry Andric   SmallPtrSet<DomTreeNode *, 4> Visited;
18572cab237bSDimitry Andric   Visited.insert(DT[BB]);
18582cab237bSDimitry Andric #endif
18592cab237bSDimitry Andric   do {
18602cab237bSDimitry Andric     DomTreeNode *N = DomWorklist.pop_back_val();
18612cab237bSDimitry Andric 
18622cab237bSDimitry Andric     // Visit this node.
18632cab237bSDimitry Andric     if (!Callable(N->getBlock()))
18642cab237bSDimitry Andric       continue;
18652cab237bSDimitry Andric 
18662cab237bSDimitry Andric     // Accumulate the child nodes.
18672cab237bSDimitry Andric     for (DomTreeNode *ChildN : *N) {
18682cab237bSDimitry Andric       assert(Visited.insert(ChildN).second &&
18692cab237bSDimitry Andric              "Cannot visit a node twice when walking a tree!");
18702cab237bSDimitry Andric       DomWorklist.push_back(ChildN);
18712cab237bSDimitry Andric     }
18722cab237bSDimitry Andric   } while (!DomWorklist.empty());
18732cab237bSDimitry Andric }
18742cab237bSDimitry Andric 
unswitchNontrivialInvariants(Loop & L,Instruction & TI,ArrayRef<Value * > Invariants,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)1875*b5893f02SDimitry Andric static void unswitchNontrivialInvariants(
1876*b5893f02SDimitry Andric     Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
1877*b5893f02SDimitry Andric     SmallVectorImpl<BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI,
1878*b5893f02SDimitry Andric     AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
1879*b5893f02SDimitry Andric     ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
18804ba319b5SDimitry Andric   auto *ParentBB = TI.getParent();
18814ba319b5SDimitry Andric   BranchInst *BI = dyn_cast<BranchInst>(&TI);
18824ba319b5SDimitry Andric   SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
18832cab237bSDimitry Andric 
18844ba319b5SDimitry Andric   // We can only unswitch switches, conditional branches with an invariant
18854ba319b5SDimitry Andric   // condition, or combining invariant conditions with an instruction.
18864ba319b5SDimitry Andric   assert((SI || BI->isConditional()) &&
18874ba319b5SDimitry Andric          "Can only unswitch switches and conditional branch!");
18884ba319b5SDimitry Andric   bool FullUnswitch = SI || BI->getCondition() == Invariants[0];
18894ba319b5SDimitry Andric   if (FullUnswitch)
18904ba319b5SDimitry Andric     assert(Invariants.size() == 1 &&
18914ba319b5SDimitry Andric            "Cannot have other invariants with full unswitching!");
18924ba319b5SDimitry Andric   else
18934ba319b5SDimitry Andric     assert(isa<Instruction>(BI->getCondition()) &&
18944ba319b5SDimitry Andric            "Partial unswitching requires an instruction as the condition!");
18952cab237bSDimitry Andric 
1896*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
1897*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
1898*b5893f02SDimitry Andric 
18994ba319b5SDimitry Andric   // Constant and BBs tracking the cloned and continuing successor. When we are
19004ba319b5SDimitry Andric   // unswitching the entire condition, this can just be trivially chosen to
19014ba319b5SDimitry Andric   // unswitch towards `true`. However, when we are unswitching a set of
19024ba319b5SDimitry Andric   // invariants combined with `and` or `or`, the combining operation determines
19034ba319b5SDimitry Andric   // the best direction to unswitch: we want to unswitch the direction that will
19044ba319b5SDimitry Andric   // collapse the branch.
19054ba319b5SDimitry Andric   bool Direction = true;
19064ba319b5SDimitry Andric   int ClonedSucc = 0;
19074ba319b5SDimitry Andric   if (!FullUnswitch) {
19084ba319b5SDimitry Andric     if (cast<Instruction>(BI->getCondition())->getOpcode() != Instruction::Or) {
19094ba319b5SDimitry Andric       assert(cast<Instruction>(BI->getCondition())->getOpcode() ==
19104ba319b5SDimitry Andric                  Instruction::And &&
19114ba319b5SDimitry Andric              "Only `or` and `and` instructions can combine invariants being "
19124ba319b5SDimitry Andric              "unswitched.");
19134ba319b5SDimitry Andric       Direction = false;
19144ba319b5SDimitry Andric       ClonedSucc = 1;
19154ba319b5SDimitry Andric     }
19164ba319b5SDimitry Andric   }
19174ba319b5SDimitry Andric 
19184ba319b5SDimitry Andric   BasicBlock *RetainedSuccBB =
19194ba319b5SDimitry Andric       BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
19204ba319b5SDimitry Andric   SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
19214ba319b5SDimitry Andric   if (BI)
19224ba319b5SDimitry Andric     UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
19234ba319b5SDimitry Andric   else
19244ba319b5SDimitry Andric     for (auto Case : SI->cases())
19254ba319b5SDimitry Andric       if (Case.getCaseSuccessor() != RetainedSuccBB)
19264ba319b5SDimitry Andric         UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
19274ba319b5SDimitry Andric 
19284ba319b5SDimitry Andric   assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
19294ba319b5SDimitry Andric          "Should not unswitch the same successor we are retaining!");
19302cab237bSDimitry Andric 
19312cab237bSDimitry Andric   // The branch should be in this exact loop. Any inner loop's invariant branch
19322cab237bSDimitry Andric   // should be handled by unswitching that inner loop. The caller of this
19332cab237bSDimitry Andric   // routine should filter out any candidates that remain (but were skipped for
19342cab237bSDimitry Andric   // whatever reason).
19352cab237bSDimitry Andric   assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
19362cab237bSDimitry Andric 
19372cab237bSDimitry Andric   // Compute the parent loop now before we start hacking on things.
19382cab237bSDimitry Andric   Loop *ParentL = L.getParentLoop();
1939*b5893f02SDimitry Andric   // Get blocks in RPO order for MSSA update, before changing the CFG.
1940*b5893f02SDimitry Andric   LoopBlocksRPO LBRPO(&L);
1941*b5893f02SDimitry Andric   if (MSSAU)
1942*b5893f02SDimitry Andric     LBRPO.perform(&LI);
19432cab237bSDimitry Andric 
19442cab237bSDimitry Andric   // Compute the outer-most loop containing one of our exit blocks. This is the
19452cab237bSDimitry Andric   // furthest up our loopnest which can be mutated, which we will use below to
19462cab237bSDimitry Andric   // update things.
19472cab237bSDimitry Andric   Loop *OuterExitL = &L;
19482cab237bSDimitry Andric   for (auto *ExitBB : ExitBlocks) {
19492cab237bSDimitry Andric     Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
19502cab237bSDimitry Andric     if (!NewOuterExitL) {
19512cab237bSDimitry Andric       // We exited the entire nest with this block, so we're done.
19522cab237bSDimitry Andric       OuterExitL = nullptr;
19532cab237bSDimitry Andric       break;
19542cab237bSDimitry Andric     }
19552cab237bSDimitry Andric     if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
19562cab237bSDimitry Andric       OuterExitL = NewOuterExitL;
19572cab237bSDimitry Andric   }
19582cab237bSDimitry Andric 
19594ba319b5SDimitry Andric   // At this point, we're definitely going to unswitch something so invalidate
19604ba319b5SDimitry Andric   // any cached information in ScalarEvolution for the outer most loop
19614ba319b5SDimitry Andric   // containing an exit block and all nested loops.
19624ba319b5SDimitry Andric   if (SE) {
19634ba319b5SDimitry Andric     if (OuterExitL)
19644ba319b5SDimitry Andric       SE->forgetLoop(OuterExitL);
19654ba319b5SDimitry Andric     else
19664ba319b5SDimitry Andric       SE->forgetTopmostLoop(&L);
19672cab237bSDimitry Andric   }
19684ba319b5SDimitry Andric 
19694ba319b5SDimitry Andric   // If the edge from this terminator to a successor dominates that successor,
19704ba319b5SDimitry Andric   // store a map from each block in its dominator subtree to it. This lets us
19714ba319b5SDimitry Andric   // tell when cloning for a particular successor if a block is dominated by
19724ba319b5SDimitry Andric   // some *other* successor with a single data structure. We use this to
19734ba319b5SDimitry Andric   // significantly reduce cloning.
19744ba319b5SDimitry Andric   SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
19754ba319b5SDimitry Andric   for (auto *SuccBB : llvm::concat<BasicBlock *const>(
19764ba319b5SDimitry Andric            makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
19774ba319b5SDimitry Andric     if (SuccBB->getUniquePredecessor() ||
19784ba319b5SDimitry Andric         llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
19794ba319b5SDimitry Andric           return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
19804ba319b5SDimitry Andric         }))
19814ba319b5SDimitry Andric       visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
19824ba319b5SDimitry Andric         DominatingSucc[BB] = SuccBB;
19834ba319b5SDimitry Andric         return true;
19842cab237bSDimitry Andric       });
19852cab237bSDimitry Andric 
19862cab237bSDimitry Andric   // Split the preheader, so that we know that there is a safe place to insert
19872cab237bSDimitry Andric   // the conditional branch. We will change the preheader to have a conditional
19882cab237bSDimitry Andric   // branch on LoopCond. The original preheader will become the split point
19892cab237bSDimitry Andric   // between the unswitched versions, and we will have a new preheader for the
19902cab237bSDimitry Andric   // original loop.
19912cab237bSDimitry Andric   BasicBlock *SplitBB = L.getLoopPreheader();
1992*b5893f02SDimitry Andric   BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
19932cab237bSDimitry Andric 
19944ba319b5SDimitry Andric   // Keep track of the dominator tree updates needed.
19954ba319b5SDimitry Andric   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
19962cab237bSDimitry Andric 
19974ba319b5SDimitry Andric   // Clone the loop for each unswitched successor.
19984ba319b5SDimitry Andric   SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
19994ba319b5SDimitry Andric   VMaps.reserve(UnswitchedSuccBBs.size());
20004ba319b5SDimitry Andric   SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
20014ba319b5SDimitry Andric   for (auto *SuccBB : UnswitchedSuccBBs) {
20024ba319b5SDimitry Andric     VMaps.emplace_back(new ValueToValueMapTy());
20034ba319b5SDimitry Andric     ClonedPHs[SuccBB] = buildClonedLoopBlocks(
20044ba319b5SDimitry Andric         L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2005*b5893f02SDimitry Andric         DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
20064ba319b5SDimitry Andric   }
20074ba319b5SDimitry Andric 
20084ba319b5SDimitry Andric   // The stitching of the branched code back together depends on whether we're
20094ba319b5SDimitry Andric   // doing full unswitching or not with the exception that we always want to
20104ba319b5SDimitry Andric   // nuke the initial terminator placed in the split block.
20114ba319b5SDimitry Andric   SplitBB->getTerminator()->eraseFromParent();
20124ba319b5SDimitry Andric   if (FullUnswitch) {
2013*b5893f02SDimitry Andric     // Splice the terminator from the original loop and rewrite its
2014*b5893f02SDimitry Andric     // successors.
2015*b5893f02SDimitry Andric     SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2016*b5893f02SDimitry Andric 
2017*b5893f02SDimitry Andric     // Keep a clone of the terminator for MSSA updates.
2018*b5893f02SDimitry Andric     Instruction *NewTI = TI.clone();
2019*b5893f02SDimitry Andric     ParentBB->getInstList().push_back(NewTI);
2020*b5893f02SDimitry Andric 
2021*b5893f02SDimitry Andric     // First wire up the moved terminator to the preheaders.
2022*b5893f02SDimitry Andric     if (BI) {
2023*b5893f02SDimitry Andric       BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2024*b5893f02SDimitry Andric       BI->setSuccessor(ClonedSucc, ClonedPH);
2025*b5893f02SDimitry Andric       BI->setSuccessor(1 - ClonedSucc, LoopPH);
2026*b5893f02SDimitry Andric       DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2027*b5893f02SDimitry Andric     } else {
2028*b5893f02SDimitry Andric       assert(SI && "Must either be a branch or switch!");
2029*b5893f02SDimitry Andric 
2030*b5893f02SDimitry Andric       // Walk the cases and directly update their successors.
2031*b5893f02SDimitry Andric       assert(SI->getDefaultDest() == RetainedSuccBB &&
2032*b5893f02SDimitry Andric              "Not retaining default successor!");
2033*b5893f02SDimitry Andric       SI->setDefaultDest(LoopPH);
2034*b5893f02SDimitry Andric       for (auto &Case : SI->cases())
2035*b5893f02SDimitry Andric         if (Case.getCaseSuccessor() == RetainedSuccBB)
2036*b5893f02SDimitry Andric           Case.setSuccessor(LoopPH);
2037*b5893f02SDimitry Andric         else
2038*b5893f02SDimitry Andric           Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2039*b5893f02SDimitry Andric 
2040*b5893f02SDimitry Andric       // We need to use the set to populate domtree updates as even when there
2041*b5893f02SDimitry Andric       // are multiple cases pointing at the same successor we only want to
2042*b5893f02SDimitry Andric       // remove and insert one edge in the domtree.
2043*b5893f02SDimitry Andric       for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2044*b5893f02SDimitry Andric         DTUpdates.push_back(
2045*b5893f02SDimitry Andric             {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2046*b5893f02SDimitry Andric     }
2047*b5893f02SDimitry Andric 
2048*b5893f02SDimitry Andric     if (MSSAU) {
2049*b5893f02SDimitry Andric       DT.applyUpdates(DTUpdates);
2050*b5893f02SDimitry Andric       DTUpdates.clear();
2051*b5893f02SDimitry Andric 
2052*b5893f02SDimitry Andric       // Remove all but one edge to the retained block and all unswitched
2053*b5893f02SDimitry Andric       // blocks. This is to avoid having duplicate entries in the cloned Phis,
2054*b5893f02SDimitry Andric       // when we know we only keep a single edge for each case.
2055*b5893f02SDimitry Andric       MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2056*b5893f02SDimitry Andric       for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2057*b5893f02SDimitry Andric         MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2058*b5893f02SDimitry Andric 
2059*b5893f02SDimitry Andric       for (auto &VMap : VMaps)
2060*b5893f02SDimitry Andric         MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2061*b5893f02SDimitry Andric                                    /*IgnoreIncomingWithNoClones=*/true);
2062*b5893f02SDimitry Andric       MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2063*b5893f02SDimitry Andric 
2064*b5893f02SDimitry Andric       // Remove all edges to unswitched blocks.
2065*b5893f02SDimitry Andric       for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2066*b5893f02SDimitry Andric         MSSAU->removeEdge(ParentBB, SuccBB);
2067*b5893f02SDimitry Andric     }
2068*b5893f02SDimitry Andric 
2069*b5893f02SDimitry Andric     // Now unhook the successor relationship as we'll be replacing
20704ba319b5SDimitry Andric     // the terminator with a direct branch. This is much simpler for branches
20714ba319b5SDimitry Andric     // than switches so we handle those first.
20724ba319b5SDimitry Andric     if (BI) {
20734ba319b5SDimitry Andric       // Remove the parent as a predecessor of the unswitched successor.
20744ba319b5SDimitry Andric       assert(UnswitchedSuccBBs.size() == 1 &&
20754ba319b5SDimitry Andric              "Only one possible unswitched block for a branch!");
20764ba319b5SDimitry Andric       BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
20774ba319b5SDimitry Andric       UnswitchedSuccBB->removePredecessor(ParentBB,
20784ba319b5SDimitry Andric                                           /*DontDeleteUselessPHIs*/ true);
20794ba319b5SDimitry Andric       DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
20804ba319b5SDimitry Andric     } else {
20814ba319b5SDimitry Andric       // Note that we actually want to remove the parent block as a predecessor
20824ba319b5SDimitry Andric       // of *every* case successor. The case successor is either unswitched,
20834ba319b5SDimitry Andric       // completely eliminating an edge from the parent to that successor, or it
20844ba319b5SDimitry Andric       // is a duplicate edge to the retained successor as the retained successor
20854ba319b5SDimitry Andric       // is always the default successor and as we'll replace this with a direct
20864ba319b5SDimitry Andric       // branch we no longer need the duplicate entries in the PHI nodes.
2087*b5893f02SDimitry Andric       SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2088*b5893f02SDimitry Andric       assert(NewSI->getDefaultDest() == RetainedSuccBB &&
20894ba319b5SDimitry Andric              "Not retaining default successor!");
2090*b5893f02SDimitry Andric       for (auto &Case : NewSI->cases())
20914ba319b5SDimitry Andric         Case.getCaseSuccessor()->removePredecessor(
20924ba319b5SDimitry Andric             ParentBB,
20934ba319b5SDimitry Andric             /*DontDeleteUselessPHIs*/ true);
20944ba319b5SDimitry Andric 
20954ba319b5SDimitry Andric       // We need to use the set to populate domtree updates as even when there
20964ba319b5SDimitry Andric       // are multiple cases pointing at the same successor we only want to
20974ba319b5SDimitry Andric       // remove and insert one edge in the domtree.
20984ba319b5SDimitry Andric       for (BasicBlock *SuccBB : UnswitchedSuccBBs)
20994ba319b5SDimitry Andric         DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
21004ba319b5SDimitry Andric     }
21014ba319b5SDimitry Andric 
2102*b5893f02SDimitry Andric     // After MSSAU update, remove the cloned terminator instruction NewTI.
2103*b5893f02SDimitry Andric     ParentBB->getTerminator()->eraseFromParent();
21044ba319b5SDimitry Andric 
21054ba319b5SDimitry Andric     // Create a new unconditional branch to the continuing block (as opposed to
21064ba319b5SDimitry Andric     // the one cloned).
21074ba319b5SDimitry Andric     BranchInst::Create(RetainedSuccBB, ParentBB);
21084ba319b5SDimitry Andric   } else {
21094ba319b5SDimitry Andric     assert(BI && "Only branches have partial unswitching.");
21104ba319b5SDimitry Andric     assert(UnswitchedSuccBBs.size() == 1 &&
21114ba319b5SDimitry Andric            "Only one possible unswitched block for a branch!");
21124ba319b5SDimitry Andric     BasicBlock *ClonedPH = ClonedPHs.begin()->second;
21134ba319b5SDimitry Andric     // When doing a partial unswitch, we have to do a bit more work to build up
21144ba319b5SDimitry Andric     // the branch in the split block.
21154ba319b5SDimitry Andric     buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
21164ba319b5SDimitry Andric                                           *ClonedPH, *LoopPH);
21174ba319b5SDimitry Andric     DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
21184ba319b5SDimitry Andric   }
21194ba319b5SDimitry Andric 
21204ba319b5SDimitry Andric   // Apply the updates accumulated above to get an up-to-date dominator tree.
21214ba319b5SDimitry Andric   DT.applyUpdates(DTUpdates);
2122*b5893f02SDimitry Andric   if (!FullUnswitch && MSSAU) {
2123*b5893f02SDimitry Andric     // Update MSSA for partial unswitch, after DT update.
2124*b5893f02SDimitry Andric     SmallVector<CFGUpdate, 1> Updates;
2125*b5893f02SDimitry Andric     Updates.push_back(
2126*b5893f02SDimitry Andric         {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second});
2127*b5893f02SDimitry Andric     MSSAU->applyInsertUpdates(Updates, DT);
2128*b5893f02SDimitry Andric   }
21294ba319b5SDimitry Andric 
21304ba319b5SDimitry Andric   // Now that we have an accurate dominator tree, first delete the dead cloned
21314ba319b5SDimitry Andric   // blocks so that we can accurately build any cloned loops. It is important to
21324ba319b5SDimitry Andric   // not delete the blocks from the original loop yet because we still want to
21334ba319b5SDimitry Andric   // reference the original loop to understand the cloned loop's structure.
2134*b5893f02SDimitry Andric   deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
21352cab237bSDimitry Andric 
21362cab237bSDimitry Andric   // Build the cloned loop structure itself. This may be substantially
21372cab237bSDimitry Andric   // different from the original structure due to the simplified CFG. This also
21382cab237bSDimitry Andric   // handles inserting all the cloned blocks into the correct loops.
21392cab237bSDimitry Andric   SmallVector<Loop *, 4> NonChildClonedLoops;
21404ba319b5SDimitry Andric   for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
21414ba319b5SDimitry Andric     buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
21422cab237bSDimitry Andric 
21434ba319b5SDimitry Andric   // Now that our cloned loops have been built, we can update the original loop.
21444ba319b5SDimitry Andric   // First we delete the dead blocks from it and then we rebuild the loop
21454ba319b5SDimitry Andric   // structure taking these deletions into account.
2146*b5893f02SDimitry Andric   deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
2147*b5893f02SDimitry Andric 
2148*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
2149*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
2150*b5893f02SDimitry Andric 
21512cab237bSDimitry Andric   SmallVector<Loop *, 4> HoistedLoops;
21522cab237bSDimitry Andric   bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
21532cab237bSDimitry Andric 
2154*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
2155*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
2156*b5893f02SDimitry Andric 
21574ba319b5SDimitry Andric   // This transformation has a high risk of corrupting the dominator tree, and
21584ba319b5SDimitry Andric   // the below steps to rebuild loop structures will result in hard to debug
21594ba319b5SDimitry Andric   // errors in that case so verify that the dominator tree is sane first.
21604ba319b5SDimitry Andric   // FIXME: Remove this when the bugs stop showing up and rely on existing
21614ba319b5SDimitry Andric   // verification steps.
21624ba319b5SDimitry Andric   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
21634ba319b5SDimitry Andric 
21644ba319b5SDimitry Andric   if (BI) {
21654ba319b5SDimitry Andric     // If we unswitched a branch which collapses the condition to a known
21664ba319b5SDimitry Andric     // constant we want to replace all the uses of the invariants within both
21674ba319b5SDimitry Andric     // the original and cloned blocks. We do this here so that we can use the
21684ba319b5SDimitry Andric     // now updated dominator tree to identify which side the users are on.
21694ba319b5SDimitry Andric     assert(UnswitchedSuccBBs.size() == 1 &&
21704ba319b5SDimitry Andric            "Only one possible unswitched block for a branch!");
21714ba319b5SDimitry Andric     BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2172*b5893f02SDimitry Andric 
2173*b5893f02SDimitry Andric     // When considering multiple partially-unswitched invariants
2174*b5893f02SDimitry Andric     // we cant just go replace them with constants in both branches.
2175*b5893f02SDimitry Andric     //
2176*b5893f02SDimitry Andric     // For 'AND' we infer that true branch ("continue") means true
2177*b5893f02SDimitry Andric     // for each invariant operand.
2178*b5893f02SDimitry Andric     // For 'OR' we can infer that false branch ("continue") means false
2179*b5893f02SDimitry Andric     // for each invariant operand.
2180*b5893f02SDimitry Andric     // So it happens that for multiple-partial case we dont replace
2181*b5893f02SDimitry Andric     // in the unswitched branch.
2182*b5893f02SDimitry Andric     bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1);
2183*b5893f02SDimitry Andric 
21844ba319b5SDimitry Andric     ConstantInt *UnswitchedReplacement =
21854ba319b5SDimitry Andric         Direction ? ConstantInt::getTrue(BI->getContext())
21864ba319b5SDimitry Andric                   : ConstantInt::getFalse(BI->getContext());
21874ba319b5SDimitry Andric     ConstantInt *ContinueReplacement =
21884ba319b5SDimitry Andric         Direction ? ConstantInt::getFalse(BI->getContext())
21894ba319b5SDimitry Andric                   : ConstantInt::getTrue(BI->getContext());
21904ba319b5SDimitry Andric     for (Value *Invariant : Invariants)
21914ba319b5SDimitry Andric       for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
21924ba319b5SDimitry Andric            UI != UE;) {
21934ba319b5SDimitry Andric         // Grab the use and walk past it so we can clobber it in the use list.
21944ba319b5SDimitry Andric         Use *U = &*UI++;
21954ba319b5SDimitry Andric         Instruction *UserI = dyn_cast<Instruction>(U->getUser());
21964ba319b5SDimitry Andric         if (!UserI)
21974ba319b5SDimitry Andric           continue;
21984ba319b5SDimitry Andric 
21994ba319b5SDimitry Andric         // Replace it with the 'continue' side if in the main loop body, and the
22004ba319b5SDimitry Andric         // unswitched if in the cloned blocks.
22014ba319b5SDimitry Andric         if (DT.dominates(LoopPH, UserI->getParent()))
22024ba319b5SDimitry Andric           U->set(ContinueReplacement);
2203*b5893f02SDimitry Andric         else if (ReplaceUnswitched &&
2204*b5893f02SDimitry Andric                  DT.dominates(ClonedPH, UserI->getParent()))
22054ba319b5SDimitry Andric           U->set(UnswitchedReplacement);
22064ba319b5SDimitry Andric       }
22074ba319b5SDimitry Andric   }
22082cab237bSDimitry Andric 
22092cab237bSDimitry Andric   // We can change which blocks are exit blocks of all the cloned sibling
22102cab237bSDimitry Andric   // loops, the current loop, and any parent loops which shared exit blocks
22112cab237bSDimitry Andric   // with the current loop. As a consequence, we need to re-form LCSSA for
22122cab237bSDimitry Andric   // them. But we shouldn't need to re-form LCSSA for any child loops.
22132cab237bSDimitry Andric   // FIXME: This could be made more efficient by tracking which exit blocks are
22142cab237bSDimitry Andric   // new, and focusing on them, but that isn't likely to be necessary.
22152cab237bSDimitry Andric   //
22162cab237bSDimitry Andric   // In order to reasonably rebuild LCSSA we need to walk inside-out across the
22172cab237bSDimitry Andric   // loop nest and update every loop that could have had its exits changed. We
22182cab237bSDimitry Andric   // also need to cover any intervening loops. We add all of these loops to
22192cab237bSDimitry Andric   // a list and sort them by loop depth to achieve this without updating
22202cab237bSDimitry Andric   // unnecessary loops.
22214ba319b5SDimitry Andric   auto UpdateLoop = [&](Loop &UpdateL) {
22222cab237bSDimitry Andric #ifndef NDEBUG
22234ba319b5SDimitry Andric     UpdateL.verifyLoop();
22244ba319b5SDimitry Andric     for (Loop *ChildL : UpdateL) {
22254ba319b5SDimitry Andric       ChildL->verifyLoop();
22262cab237bSDimitry Andric       assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
22272cab237bSDimitry Andric              "Perturbed a child loop's LCSSA form!");
22284ba319b5SDimitry Andric     }
22292cab237bSDimitry Andric #endif
22302cab237bSDimitry Andric     // First build LCSSA for this loop so that we can preserve it when
22312cab237bSDimitry Andric     // forming dedicated exits. We don't want to perturb some other loop's
22322cab237bSDimitry Andric     // LCSSA while doing that CFG edit.
22334ba319b5SDimitry Andric     formLCSSA(UpdateL, DT, &LI, nullptr);
22342cab237bSDimitry Andric 
22352cab237bSDimitry Andric     // For loops reached by this loop's original exit blocks we may
22362cab237bSDimitry Andric     // introduced new, non-dedicated exits. At least try to re-form dedicated
22372cab237bSDimitry Andric     // exits for these loops. This may fail if they couldn't have dedicated
22382cab237bSDimitry Andric     // exits to start with.
22394ba319b5SDimitry Andric     formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
22404ba319b5SDimitry Andric   };
22414ba319b5SDimitry Andric 
22424ba319b5SDimitry Andric   // For non-child cloned loops and hoisted loops, we just need to update LCSSA
22434ba319b5SDimitry Andric   // and we can do it in any order as they don't nest relative to each other.
22444ba319b5SDimitry Andric   //
22454ba319b5SDimitry Andric   // Also check if any of the loops we have updated have become top-level loops
22464ba319b5SDimitry Andric   // as that will necessitate widening the outer loop scope.
22474ba319b5SDimitry Andric   for (Loop *UpdatedL :
22484ba319b5SDimitry Andric        llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
22494ba319b5SDimitry Andric     UpdateLoop(*UpdatedL);
22504ba319b5SDimitry Andric     if (!UpdatedL->getParentLoop())
22514ba319b5SDimitry Andric       OuterExitL = nullptr;
22522cab237bSDimitry Andric   }
22534ba319b5SDimitry Andric   if (IsStillLoop) {
22544ba319b5SDimitry Andric     UpdateLoop(L);
22554ba319b5SDimitry Andric     if (!L.getParentLoop())
22564ba319b5SDimitry Andric       OuterExitL = nullptr;
22572cab237bSDimitry Andric   }
22582cab237bSDimitry Andric 
22594ba319b5SDimitry Andric   // If the original loop had exit blocks, walk up through the outer most loop
22604ba319b5SDimitry Andric   // of those exit blocks to update LCSSA and form updated dedicated exits.
22614ba319b5SDimitry Andric   if (OuterExitL != &L)
22624ba319b5SDimitry Andric     for (Loop *OuterL = ParentL; OuterL != OuterExitL;
22634ba319b5SDimitry Andric          OuterL = OuterL->getParentLoop())
22644ba319b5SDimitry Andric       UpdateLoop(*OuterL);
22654ba319b5SDimitry Andric 
22662cab237bSDimitry Andric #ifndef NDEBUG
22672cab237bSDimitry Andric   // Verify the entire loop structure to catch any incorrect updates before we
22682cab237bSDimitry Andric   // progress in the pass pipeline.
22692cab237bSDimitry Andric   LI.verify(DT);
22702cab237bSDimitry Andric #endif
22712cab237bSDimitry Andric 
22722cab237bSDimitry Andric   // Now that we've unswitched something, make callbacks to report the changes.
22732cab237bSDimitry Andric   // For that we need to merge together the updated loops and the cloned loops
22742cab237bSDimitry Andric   // and check whether the original loop survived.
22752cab237bSDimitry Andric   SmallVector<Loop *, 4> SibLoops;
22762cab237bSDimitry Andric   for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
22772cab237bSDimitry Andric     if (UpdatedL->getParentLoop() == ParentL)
22782cab237bSDimitry Andric       SibLoops.push_back(UpdatedL);
22794ba319b5SDimitry Andric   UnswitchCB(IsStillLoop, SibLoops);
22802cab237bSDimitry Andric 
2281*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
2282*b5893f02SDimitry Andric     MSSAU->getMemorySSA()->verifyMemorySSA();
2283*b5893f02SDimitry Andric 
2284*b5893f02SDimitry Andric   if (BI)
22852cab237bSDimitry Andric     ++NumBranches;
2286*b5893f02SDimitry Andric   else
2287*b5893f02SDimitry Andric     ++NumSwitches;
22882cab237bSDimitry Andric }
22892cab237bSDimitry Andric 
22902cab237bSDimitry Andric /// Recursively compute the cost of a dominator subtree based on the per-block
22912cab237bSDimitry Andric /// cost map provided.
22922cab237bSDimitry Andric ///
22932cab237bSDimitry Andric /// The recursive computation is memozied into the provided DT-indexed cost map
22942cab237bSDimitry Andric /// to allow querying it for most nodes in the domtree without it becoming
22952cab237bSDimitry Andric /// quadratic.
22962cab237bSDimitry Andric static int
computeDomSubtreeCost(DomTreeNode & N,const SmallDenseMap<BasicBlock *,int,4> & BBCostMap,SmallDenseMap<DomTreeNode *,int,4> & DTCostMap)22972cab237bSDimitry Andric computeDomSubtreeCost(DomTreeNode &N,
22982cab237bSDimitry Andric                       const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
22992cab237bSDimitry Andric                       SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
23002cab237bSDimitry Andric   // Don't accumulate cost (or recurse through) blocks not in our block cost
23012cab237bSDimitry Andric   // map and thus not part of the duplication cost being considered.
23022cab237bSDimitry Andric   auto BBCostIt = BBCostMap.find(N.getBlock());
23032cab237bSDimitry Andric   if (BBCostIt == BBCostMap.end())
23042cab237bSDimitry Andric     return 0;
23052cab237bSDimitry Andric 
23062cab237bSDimitry Andric   // Lookup this node to see if we already computed its cost.
23072cab237bSDimitry Andric   auto DTCostIt = DTCostMap.find(&N);
23082cab237bSDimitry Andric   if (DTCostIt != DTCostMap.end())
23092cab237bSDimitry Andric     return DTCostIt->second;
23102cab237bSDimitry Andric 
23112cab237bSDimitry Andric   // If not, we have to compute it. We can't use insert above and update
23122cab237bSDimitry Andric   // because computing the cost may insert more things into the map.
23132cab237bSDimitry Andric   int Cost = std::accumulate(
23142cab237bSDimitry Andric       N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
23152cab237bSDimitry Andric         return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
23162cab237bSDimitry Andric       });
23172cab237bSDimitry Andric   bool Inserted = DTCostMap.insert({&N, Cost}).second;
23182cab237bSDimitry Andric   (void)Inserted;
23192cab237bSDimitry Andric   assert(Inserted && "Should not insert a node while visiting children!");
23202cab237bSDimitry Andric   return Cost;
23212cab237bSDimitry Andric }
23222cab237bSDimitry Andric 
2323*b5893f02SDimitry Andric /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2324*b5893f02SDimitry Andric /// making the following replacement:
2325*b5893f02SDimitry Andric ///
2326*b5893f02SDimitry Andric ///   --code before guard--
2327*b5893f02SDimitry Andric ///   call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2328*b5893f02SDimitry Andric ///   --code after guard--
2329*b5893f02SDimitry Andric ///
2330*b5893f02SDimitry Andric /// into
2331*b5893f02SDimitry Andric ///
2332*b5893f02SDimitry Andric ///   --code before guard--
2333*b5893f02SDimitry Andric ///   br i1 %cond, label %guarded, label %deopt
2334*b5893f02SDimitry Andric ///
2335*b5893f02SDimitry Andric /// guarded:
2336*b5893f02SDimitry Andric ///   --code after guard--
2337*b5893f02SDimitry Andric ///
2338*b5893f02SDimitry Andric /// deopt:
2339*b5893f02SDimitry Andric ///   call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2340*b5893f02SDimitry Andric ///   unreachable
2341*b5893f02SDimitry Andric ///
2342*b5893f02SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in
2343*b5893f02SDimitry Andric /// valid state after this transform.
2344*b5893f02SDimitry Andric static BranchInst *
turnGuardIntoBranch(IntrinsicInst * GI,Loop & L,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)2345*b5893f02SDimitry Andric turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2346*b5893f02SDimitry Andric                     SmallVectorImpl<BasicBlock *> &ExitBlocks,
2347*b5893f02SDimitry Andric                     DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2348*b5893f02SDimitry Andric   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2349*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2350*b5893f02SDimitry Andric   BasicBlock *CheckBB = GI->getParent();
2351*b5893f02SDimitry Andric 
2352*b5893f02SDimitry Andric   if (MSSAU && VerifyMemorySSA)
2353*b5893f02SDimitry Andric      MSSAU->getMemorySSA()->verifyMemorySSA();
2354*b5893f02SDimitry Andric 
2355*b5893f02SDimitry Andric   // Remove all CheckBB's successors from DomTree. A block can be seen among
2356*b5893f02SDimitry Andric   // successors more than once, but for DomTree it should be added only once.
2357*b5893f02SDimitry Andric   SmallPtrSet<BasicBlock *, 4> Successors;
2358*b5893f02SDimitry Andric   for (auto *Succ : successors(CheckBB))
2359*b5893f02SDimitry Andric     if (Successors.insert(Succ).second)
2360*b5893f02SDimitry Andric       DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2361*b5893f02SDimitry Andric 
2362*b5893f02SDimitry Andric   Instruction *DeoptBlockTerm =
2363*b5893f02SDimitry Andric       SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2364*b5893f02SDimitry Andric   BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2365*b5893f02SDimitry Andric   // SplitBlockAndInsertIfThen inserts control flow that branches to
2366*b5893f02SDimitry Andric   // DeoptBlockTerm if the condition is true.  We want the opposite.
2367*b5893f02SDimitry Andric   CheckBI->swapSuccessors();
2368*b5893f02SDimitry Andric 
2369*b5893f02SDimitry Andric   BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2370*b5893f02SDimitry Andric   GuardedBlock->setName("guarded");
2371*b5893f02SDimitry Andric   CheckBI->getSuccessor(1)->setName("deopt");
2372*b5893f02SDimitry Andric   BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2373*b5893f02SDimitry Andric 
2374*b5893f02SDimitry Andric   // We now have a new exit block.
2375*b5893f02SDimitry Andric   ExitBlocks.push_back(CheckBI->getSuccessor(1));
2376*b5893f02SDimitry Andric 
2377*b5893f02SDimitry Andric   if (MSSAU)
2378*b5893f02SDimitry Andric     MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2379*b5893f02SDimitry Andric 
2380*b5893f02SDimitry Andric   GI->moveBefore(DeoptBlockTerm);
2381*b5893f02SDimitry Andric   GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2382*b5893f02SDimitry Andric 
2383*b5893f02SDimitry Andric   // Add new successors of CheckBB into DomTree.
2384*b5893f02SDimitry Andric   for (auto *Succ : successors(CheckBB))
2385*b5893f02SDimitry Andric     DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2386*b5893f02SDimitry Andric 
2387*b5893f02SDimitry Andric   // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2388*b5893f02SDimitry Andric   // successors.
2389*b5893f02SDimitry Andric   for (auto *Succ : Successors)
2390*b5893f02SDimitry Andric     DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2391*b5893f02SDimitry Andric 
2392*b5893f02SDimitry Andric   // Make proper changes to DT.
2393*b5893f02SDimitry Andric   DT.applyUpdates(DTUpdates);
2394*b5893f02SDimitry Andric   // Inform LI of a new loop block.
2395*b5893f02SDimitry Andric   L.addBasicBlockToLoop(GuardedBlock, LI);
2396*b5893f02SDimitry Andric 
2397*b5893f02SDimitry Andric   if (MSSAU) {
2398*b5893f02SDimitry Andric     MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2399*b5893f02SDimitry Andric     MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
2400*b5893f02SDimitry Andric     if (VerifyMemorySSA)
2401*b5893f02SDimitry Andric       MSSAU->getMemorySSA()->verifyMemorySSA();
2402*b5893f02SDimitry Andric   }
2403*b5893f02SDimitry Andric 
2404*b5893f02SDimitry Andric   ++NumGuards;
2405*b5893f02SDimitry Andric   return CheckBI;
2406*b5893f02SDimitry Andric }
2407*b5893f02SDimitry Andric 
2408*b5893f02SDimitry Andric /// Cost multiplier is a way to limit potentially exponential behavior
2409*b5893f02SDimitry Andric /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2410*b5893f02SDimitry Andric /// candidates available. Also accounting for the number of "sibling" loops with
2411*b5893f02SDimitry Andric /// the idea to account for previous unswitches that already happened on this
2412*b5893f02SDimitry Andric /// cluster of loops. There was an attempt to keep this formula simple,
2413*b5893f02SDimitry Andric /// just enough to limit the worst case behavior. Even if it is not that simple
2414*b5893f02SDimitry Andric /// now it is still not an attempt to provide a detailed heuristic size
2415*b5893f02SDimitry Andric /// prediction.
2416*b5893f02SDimitry Andric ///
2417*b5893f02SDimitry Andric /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2418*b5893f02SDimitry Andric /// unswitch candidates, making adequate predictions instead of wild guesses.
2419*b5893f02SDimitry Andric /// That requires knowing not just the number of "remaining" candidates but
2420*b5893f02SDimitry Andric /// also costs of unswitching for each of these candidates.
calculateUnswitchCostMultiplier(Instruction & TI,Loop & L,LoopInfo & LI,DominatorTree & DT,ArrayRef<std::pair<Instruction *,TinyPtrVector<Value * >>> UnswitchCandidates)2421*b5893f02SDimitry Andric static int calculateUnswitchCostMultiplier(
2422*b5893f02SDimitry Andric     Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2423*b5893f02SDimitry Andric     ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2424*b5893f02SDimitry Andric         UnswitchCandidates) {
2425*b5893f02SDimitry Andric 
2426*b5893f02SDimitry Andric   // Guards and other exiting conditions do not contribute to exponential
2427*b5893f02SDimitry Andric   // explosion as soon as they dominate the latch (otherwise there might be
2428*b5893f02SDimitry Andric   // another path to the latch remaining that does not allow to eliminate the
2429*b5893f02SDimitry Andric   // loop copy on unswitch).
2430*b5893f02SDimitry Andric   BasicBlock *Latch = L.getLoopLatch();
2431*b5893f02SDimitry Andric   BasicBlock *CondBlock = TI.getParent();
2432*b5893f02SDimitry Andric   if (DT.dominates(CondBlock, Latch) &&
2433*b5893f02SDimitry Andric       (isGuard(&TI) ||
2434*b5893f02SDimitry Andric        llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2435*b5893f02SDimitry Andric          return L.contains(SuccBB);
2436*b5893f02SDimitry Andric        }) <= 1)) {
2437*b5893f02SDimitry Andric     NumCostMultiplierSkipped++;
2438*b5893f02SDimitry Andric     return 1;
2439*b5893f02SDimitry Andric   }
2440*b5893f02SDimitry Andric 
2441*b5893f02SDimitry Andric   auto *ParentL = L.getParentLoop();
2442*b5893f02SDimitry Andric   int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2443*b5893f02SDimitry Andric                                : std::distance(LI.begin(), LI.end()));
2444*b5893f02SDimitry Andric   // Count amount of clones that all the candidates might cause during
2445*b5893f02SDimitry Andric   // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2446*b5893f02SDimitry Andric   int UnswitchedClones = 0;
2447*b5893f02SDimitry Andric   for (auto Candidate : UnswitchCandidates) {
2448*b5893f02SDimitry Andric     Instruction *CI = Candidate.first;
2449*b5893f02SDimitry Andric     BasicBlock *CondBlock = CI->getParent();
2450*b5893f02SDimitry Andric     bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2451*b5893f02SDimitry Andric     if (isGuard(CI)) {
2452*b5893f02SDimitry Andric       if (!SkipExitingSuccessors)
2453*b5893f02SDimitry Andric         UnswitchedClones++;
2454*b5893f02SDimitry Andric       continue;
2455*b5893f02SDimitry Andric     }
2456*b5893f02SDimitry Andric     int NonExitingSuccessors = llvm::count_if(
2457*b5893f02SDimitry Andric         successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2458*b5893f02SDimitry Andric           return !SkipExitingSuccessors || L.contains(SuccBB);
2459*b5893f02SDimitry Andric         });
2460*b5893f02SDimitry Andric     UnswitchedClones += Log2_32(NonExitingSuccessors);
2461*b5893f02SDimitry Andric   }
2462*b5893f02SDimitry Andric 
2463*b5893f02SDimitry Andric   // Ignore up to the "unscaled candidates" number of unswitch candidates
2464*b5893f02SDimitry Andric   // when calculating the power-of-two scaling of the cost. The main idea
2465*b5893f02SDimitry Andric   // with this control is to allow a small number of unswitches to happen
2466*b5893f02SDimitry Andric   // and rely more on siblings multiplier (see below) when the number
2467*b5893f02SDimitry Andric   // of candidates is small.
2468*b5893f02SDimitry Andric   unsigned ClonesPower =
2469*b5893f02SDimitry Andric       std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2470*b5893f02SDimitry Andric 
2471*b5893f02SDimitry Andric   // Allowing top-level loops to spread a bit more than nested ones.
2472*b5893f02SDimitry Andric   int SiblingsMultiplier =
2473*b5893f02SDimitry Andric       std::max((ParentL ? SiblingsCount
2474*b5893f02SDimitry Andric                         : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2475*b5893f02SDimitry Andric                1);
2476*b5893f02SDimitry Andric   // Compute the cost multiplier in a way that won't overflow by saturating
2477*b5893f02SDimitry Andric   // at an upper bound.
2478*b5893f02SDimitry Andric   int CostMultiplier;
2479*b5893f02SDimitry Andric   if (ClonesPower > Log2_32(UnswitchThreshold) ||
2480*b5893f02SDimitry Andric       SiblingsMultiplier > UnswitchThreshold)
2481*b5893f02SDimitry Andric     CostMultiplier = UnswitchThreshold;
2482*b5893f02SDimitry Andric   else
2483*b5893f02SDimitry Andric     CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2484*b5893f02SDimitry Andric                               (int)UnswitchThreshold);
2485*b5893f02SDimitry Andric 
2486*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "  Computed multiplier  " << CostMultiplier
2487*b5893f02SDimitry Andric                     << " (siblings " << SiblingsMultiplier << " * clones "
2488*b5893f02SDimitry Andric                     << (1 << ClonesPower) << ")"
2489*b5893f02SDimitry Andric                     << " for unswitch candidate: " << TI << "\n");
2490*b5893f02SDimitry Andric   return CostMultiplier;
2491*b5893f02SDimitry Andric }
2492*b5893f02SDimitry Andric 
24932cab237bSDimitry Andric static bool
unswitchBestCondition(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,TargetTransformInfo & TTI,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)24944ba319b5SDimitry Andric unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
24954ba319b5SDimitry Andric                       AssumptionCache &AC, TargetTransformInfo &TTI,
24964ba319b5SDimitry Andric                       function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2497*b5893f02SDimitry Andric                       ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
24984ba319b5SDimitry Andric   // Collect all invariant conditions within this loop (as opposed to an inner
24994ba319b5SDimitry Andric   // loop which would be handled when visiting that inner loop).
2500*b5893f02SDimitry Andric   SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
25014ba319b5SDimitry Andric       UnswitchCandidates;
2502*b5893f02SDimitry Andric 
2503*b5893f02SDimitry Andric   // Whether or not we should also collect guards in the loop.
2504*b5893f02SDimitry Andric   bool CollectGuards = false;
2505*b5893f02SDimitry Andric   if (UnswitchGuards) {
2506*b5893f02SDimitry Andric     auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2507*b5893f02SDimitry Andric         Intrinsic::getName(Intrinsic::experimental_guard));
2508*b5893f02SDimitry Andric     if (GuardDecl && !GuardDecl->use_empty())
2509*b5893f02SDimitry Andric       CollectGuards = true;
2510*b5893f02SDimitry Andric   }
2511*b5893f02SDimitry Andric 
25124ba319b5SDimitry Andric   for (auto *BB : L.blocks()) {
25134ba319b5SDimitry Andric     if (LI.getLoopFor(BB) != &L)
25144ba319b5SDimitry Andric       continue;
2515f37b6182SDimitry Andric 
2516*b5893f02SDimitry Andric     if (CollectGuards)
2517*b5893f02SDimitry Andric       for (auto &I : *BB)
2518*b5893f02SDimitry Andric         if (isGuard(&I)) {
2519*b5893f02SDimitry Andric           auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2520*b5893f02SDimitry Andric           // TODO: Support AND, OR conditions and partial unswitching.
2521*b5893f02SDimitry Andric           if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2522*b5893f02SDimitry Andric             UnswitchCandidates.push_back({&I, {Cond}});
2523*b5893f02SDimitry Andric         }
2524*b5893f02SDimitry Andric 
25254ba319b5SDimitry Andric     if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
25264ba319b5SDimitry Andric       // We can only consider fully loop-invariant switch conditions as we need
25274ba319b5SDimitry Andric       // to completely eliminate the switch after unswitching.
25284ba319b5SDimitry Andric       if (!isa<Constant>(SI->getCondition()) &&
25294ba319b5SDimitry Andric           L.isLoopInvariant(SI->getCondition()))
25304ba319b5SDimitry Andric         UnswitchCandidates.push_back({SI, {SI->getCondition()}});
25314ba319b5SDimitry Andric       continue;
25324ba319b5SDimitry Andric     }
2533f37b6182SDimitry Andric 
25344ba319b5SDimitry Andric     auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
25354ba319b5SDimitry Andric     if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
25364ba319b5SDimitry Andric         BI->getSuccessor(0) == BI->getSuccessor(1))
25374ba319b5SDimitry Andric       continue;
2538f37b6182SDimitry Andric 
25394ba319b5SDimitry Andric     if (L.isLoopInvariant(BI->getCondition())) {
25404ba319b5SDimitry Andric       UnswitchCandidates.push_back({BI, {BI->getCondition()}});
25414ba319b5SDimitry Andric       continue;
25424ba319b5SDimitry Andric     }
25432cab237bSDimitry Andric 
25444ba319b5SDimitry Andric     Instruction &CondI = *cast<Instruction>(BI->getCondition());
25454ba319b5SDimitry Andric     if (CondI.getOpcode() != Instruction::And &&
25464ba319b5SDimitry Andric       CondI.getOpcode() != Instruction::Or)
25474ba319b5SDimitry Andric       continue;
25484ba319b5SDimitry Andric 
25494ba319b5SDimitry Andric     TinyPtrVector<Value *> Invariants =
25504ba319b5SDimitry Andric         collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
25514ba319b5SDimitry Andric     if (Invariants.empty())
25524ba319b5SDimitry Andric       continue;
25534ba319b5SDimitry Andric 
25544ba319b5SDimitry Andric     UnswitchCandidates.push_back({BI, std::move(Invariants)});
25554ba319b5SDimitry Andric   }
25562cab237bSDimitry Andric 
25572cab237bSDimitry Andric   // If we didn't find any candidates, we're done.
25582cab237bSDimitry Andric   if (UnswitchCandidates.empty())
25594ba319b5SDimitry Andric     return false;
25602cab237bSDimitry Andric 
25614ba319b5SDimitry Andric   // Check if there are irreducible CFG cycles in this loop. If so, we cannot
25624ba319b5SDimitry Andric   // easily unswitch non-trivial edges out of the loop. Doing so might turn the
25634ba319b5SDimitry Andric   // irreducible control flow into reducible control flow and introduce new
25644ba319b5SDimitry Andric   // loops "out of thin air". If we ever discover important use cases for doing
25654ba319b5SDimitry Andric   // this, we can add support to loop unswitch, but it is a lot of complexity
25664ba319b5SDimitry Andric   // for what seems little or no real world benefit.
25674ba319b5SDimitry Andric   LoopBlocksRPO RPOT(&L);
25684ba319b5SDimitry Andric   RPOT.perform(&LI);
25694ba319b5SDimitry Andric   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
25704ba319b5SDimitry Andric     return false;
25714ba319b5SDimitry Andric 
2572*b5893f02SDimitry Andric   SmallVector<BasicBlock *, 4> ExitBlocks;
2573*b5893f02SDimitry Andric   L.getUniqueExitBlocks(ExitBlocks);
2574*b5893f02SDimitry Andric 
2575*b5893f02SDimitry Andric   // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
2576*b5893f02SDimitry Andric   // don't know how to split those exit blocks.
2577*b5893f02SDimitry Andric   // FIXME: We should teach SplitBlock to handle this and remove this
2578*b5893f02SDimitry Andric   // restriction.
2579*b5893f02SDimitry Andric   for (auto *ExitBB : ExitBlocks)
2580*b5893f02SDimitry Andric     if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
2581*b5893f02SDimitry Andric       dbgs() << "Cannot unswitch because of cleanuppad in exit block\n";
2582*b5893f02SDimitry Andric       return false;
2583*b5893f02SDimitry Andric     }
2584*b5893f02SDimitry Andric 
25854ba319b5SDimitry Andric   LLVM_DEBUG(
25864ba319b5SDimitry Andric       dbgs() << "Considering " << UnswitchCandidates.size()
25872cab237bSDimitry Andric              << " non-trivial loop invariant conditions for unswitching.\n");
25882cab237bSDimitry Andric 
25892cab237bSDimitry Andric   // Given that unswitching these terminators will require duplicating parts of
25902cab237bSDimitry Andric   // the loop, so we need to be able to model that cost. Compute the ephemeral
25912cab237bSDimitry Andric   // values and set up a data structure to hold per-BB costs. We cache each
25922cab237bSDimitry Andric   // block's cost so that we don't recompute this when considering different
25932cab237bSDimitry Andric   // subsets of the loop for duplication during unswitching.
25942cab237bSDimitry Andric   SmallPtrSet<const Value *, 4> EphValues;
25952cab237bSDimitry Andric   CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
25962cab237bSDimitry Andric   SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
25972cab237bSDimitry Andric 
25982cab237bSDimitry Andric   // Compute the cost of each block, as well as the total loop cost. Also, bail
25992cab237bSDimitry Andric   // out if we see instructions which are incompatible with loop unswitching
26002cab237bSDimitry Andric   // (convergent, noduplicate, or cross-basic-block tokens).
26012cab237bSDimitry Andric   // FIXME: We might be able to safely handle some of these in non-duplicated
26022cab237bSDimitry Andric   // regions.
26032cab237bSDimitry Andric   int LoopCost = 0;
26042cab237bSDimitry Andric   for (auto *BB : L.blocks()) {
26052cab237bSDimitry Andric     int Cost = 0;
26062cab237bSDimitry Andric     for (auto &I : *BB) {
26072cab237bSDimitry Andric       if (EphValues.count(&I))
26082cab237bSDimitry Andric         continue;
26092cab237bSDimitry Andric 
26102cab237bSDimitry Andric       if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
26114ba319b5SDimitry Andric         return false;
26122cab237bSDimitry Andric       if (auto CS = CallSite(&I))
26132cab237bSDimitry Andric         if (CS.isConvergent() || CS.cannotDuplicate())
26144ba319b5SDimitry Andric           return false;
26152cab237bSDimitry Andric 
26162cab237bSDimitry Andric       Cost += TTI.getUserCost(&I);
26172cab237bSDimitry Andric     }
26182cab237bSDimitry Andric     assert(Cost >= 0 && "Must not have negative costs!");
26192cab237bSDimitry Andric     LoopCost += Cost;
26202cab237bSDimitry Andric     assert(LoopCost >= 0 && "Must not have negative loop costs!");
26212cab237bSDimitry Andric     BBCostMap[BB] = Cost;
26222cab237bSDimitry Andric   }
26234ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Total loop cost: " << LoopCost << "\n");
26242cab237bSDimitry Andric 
26252cab237bSDimitry Andric   // Now we find the best candidate by searching for the one with the following
26262cab237bSDimitry Andric   // properties in order:
26272cab237bSDimitry Andric   //
26282cab237bSDimitry Andric   // 1) An unswitching cost below the threshold
26292cab237bSDimitry Andric   // 2) The smallest number of duplicated unswitch candidates (to avoid
26302cab237bSDimitry Andric   //    creating redundant subsequent unswitching)
26312cab237bSDimitry Andric   // 3) The smallest cost after unswitching.
26322cab237bSDimitry Andric   //
26332cab237bSDimitry Andric   // We prioritize reducing fanout of unswitch candidates provided the cost
26342cab237bSDimitry Andric   // remains below the threshold because this has a multiplicative effect.
26352cab237bSDimitry Andric   //
26362cab237bSDimitry Andric   // This requires memoizing each dominator subtree to avoid redundant work.
26372cab237bSDimitry Andric   //
26382cab237bSDimitry Andric   // FIXME: Need to actually do the number of candidates part above.
26392cab237bSDimitry Andric   SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
26402cab237bSDimitry Andric   // Given a terminator which might be unswitched, computes the non-duplicated
26412cab237bSDimitry Andric   // cost for that terminator.
2642*b5893f02SDimitry Andric   auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
26434ba319b5SDimitry Andric     BasicBlock &BB = *TI.getParent();
26442cab237bSDimitry Andric     SmallPtrSet<BasicBlock *, 4> Visited;
26452cab237bSDimitry Andric 
26462cab237bSDimitry Andric     int Cost = LoopCost;
26472cab237bSDimitry Andric     for (BasicBlock *SuccBB : successors(&BB)) {
26482cab237bSDimitry Andric       // Don't count successors more than once.
26492cab237bSDimitry Andric       if (!Visited.insert(SuccBB).second)
26502cab237bSDimitry Andric         continue;
26512cab237bSDimitry Andric 
26524ba319b5SDimitry Andric       // If this is a partial unswitch candidate, then it must be a conditional
26534ba319b5SDimitry Andric       // branch with a condition of either `or` or `and`. In that case, one of
26544ba319b5SDimitry Andric       // the successors is necessarily duplicated, so don't even try to remove
26554ba319b5SDimitry Andric       // its cost.
26564ba319b5SDimitry Andric       if (!FullUnswitch) {
26574ba319b5SDimitry Andric         auto &BI = cast<BranchInst>(TI);
26584ba319b5SDimitry Andric         if (cast<Instruction>(BI.getCondition())->getOpcode() ==
26594ba319b5SDimitry Andric             Instruction::And) {
26604ba319b5SDimitry Andric           if (SuccBB == BI.getSuccessor(1))
26614ba319b5SDimitry Andric             continue;
26624ba319b5SDimitry Andric         } else {
26634ba319b5SDimitry Andric           assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
26644ba319b5SDimitry Andric                      Instruction::Or &&
26654ba319b5SDimitry Andric                  "Only `and` and `or` conditions can result in a partial "
26664ba319b5SDimitry Andric                  "unswitch!");
26674ba319b5SDimitry Andric           if (SuccBB == BI.getSuccessor(0))
26684ba319b5SDimitry Andric             continue;
26694ba319b5SDimitry Andric         }
26704ba319b5SDimitry Andric       }
26714ba319b5SDimitry Andric 
26722cab237bSDimitry Andric       // This successor's domtree will not need to be duplicated after
26732cab237bSDimitry Andric       // unswitching if the edge to the successor dominates it (and thus the
26742cab237bSDimitry Andric       // entire tree). This essentially means there is no other path into this
26752cab237bSDimitry Andric       // subtree and so it will end up live in only one clone of the loop.
26762cab237bSDimitry Andric       if (SuccBB->getUniquePredecessor() ||
26772cab237bSDimitry Andric           llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
26782cab237bSDimitry Andric             return PredBB == &BB || DT.dominates(SuccBB, PredBB);
26792cab237bSDimitry Andric           })) {
26802cab237bSDimitry Andric         Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
26812cab237bSDimitry Andric         assert(Cost >= 0 &&
26822cab237bSDimitry Andric                "Non-duplicated cost should never exceed total loop cost!");
26832cab237bSDimitry Andric       }
26842cab237bSDimitry Andric     }
26852cab237bSDimitry Andric 
26862cab237bSDimitry Andric     // Now scale the cost by the number of unique successors minus one. We
26872cab237bSDimitry Andric     // subtract one because there is already at least one copy of the entire
26882cab237bSDimitry Andric     // loop. This is computing the new cost of unswitching a condition.
2689*b5893f02SDimitry Andric     // Note that guards always have 2 unique successors that are implicit and
2690*b5893f02SDimitry Andric     // will be materialized if we decide to unswitch it.
2691*b5893f02SDimitry Andric     int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2692*b5893f02SDimitry Andric     assert(SuccessorsCount > 1 &&
26932cab237bSDimitry Andric            "Cannot unswitch a condition without multiple distinct successors!");
2694*b5893f02SDimitry Andric     return Cost * (SuccessorsCount - 1);
26952cab237bSDimitry Andric   };
2696*b5893f02SDimitry Andric   Instruction *BestUnswitchTI = nullptr;
26972cab237bSDimitry Andric   int BestUnswitchCost;
26984ba319b5SDimitry Andric   ArrayRef<Value *> BestUnswitchInvariants;
26994ba319b5SDimitry Andric   for (auto &TerminatorAndInvariants : UnswitchCandidates) {
2700*b5893f02SDimitry Andric     Instruction &TI = *TerminatorAndInvariants.first;
27014ba319b5SDimitry Andric     ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
27024ba319b5SDimitry Andric     BranchInst *BI = dyn_cast<BranchInst>(&TI);
27034ba319b5SDimitry Andric     int CandidateCost = ComputeUnswitchedCost(
27044ba319b5SDimitry Andric         TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
27054ba319b5SDimitry Andric                                      Invariants[0] == BI->getCondition()));
2706*b5893f02SDimitry Andric     // Calculate cost multiplier which is a tool to limit potentially
2707*b5893f02SDimitry Andric     // exponential behavior of loop-unswitch.
2708*b5893f02SDimitry Andric     if (EnableUnswitchCostMultiplier) {
2709*b5893f02SDimitry Andric       int CostMultiplier =
2710*b5893f02SDimitry Andric           calculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2711*b5893f02SDimitry Andric       assert(
2712*b5893f02SDimitry Andric           (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
2713*b5893f02SDimitry Andric           "cost multiplier needs to be in the range of 1..UnswitchThreshold");
2714*b5893f02SDimitry Andric       CandidateCost *= CostMultiplier;
2715*b5893f02SDimitry Andric       LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
2716*b5893f02SDimitry Andric                         << " (multiplier: " << CostMultiplier << ")"
2717*b5893f02SDimitry Andric                         << " for unswitch candidate: " << TI << "\n");
2718*b5893f02SDimitry Andric     } else {
27194ba319b5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
27204ba319b5SDimitry Andric                         << " for unswitch candidate: " << TI << "\n");
2721*b5893f02SDimitry Andric     }
2722*b5893f02SDimitry Andric 
27232cab237bSDimitry Andric     if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
27244ba319b5SDimitry Andric       BestUnswitchTI = &TI;
27252cab237bSDimitry Andric       BestUnswitchCost = CandidateCost;
27264ba319b5SDimitry Andric       BestUnswitchInvariants = Invariants;
27272cab237bSDimitry Andric     }
27282cab237bSDimitry Andric   }
27292cab237bSDimitry Andric 
27304ba319b5SDimitry Andric   if (BestUnswitchCost >= UnswitchThreshold) {
27314ba319b5SDimitry Andric     LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
27324ba319b5SDimitry Andric                       << BestUnswitchCost << "\n");
27334ba319b5SDimitry Andric     return false;
27342cab237bSDimitry Andric   }
2735f37b6182SDimitry Andric 
2736*b5893f02SDimitry Andric   // If the best candidate is a guard, turn it into a branch.
2737*b5893f02SDimitry Andric   if (isGuard(BestUnswitchTI))
2738*b5893f02SDimitry Andric     BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2739*b5893f02SDimitry Andric                                          ExitBlocks, DT, LI, MSSAU);
2740*b5893f02SDimitry Andric 
2741*b5893f02SDimitry Andric   LLVM_DEBUG(dbgs() << "  Unswitching non-trivial (cost = "
27424ba319b5SDimitry Andric                     << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
27434ba319b5SDimitry Andric                     << "\n");
2744*b5893f02SDimitry Andric   unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2745*b5893f02SDimitry Andric                                ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
2746*b5893f02SDimitry Andric   return true;
27474ba319b5SDimitry Andric }
27484ba319b5SDimitry Andric 
27494ba319b5SDimitry Andric /// Unswitch control flow predicated on loop invariant conditions.
27504ba319b5SDimitry Andric ///
27514ba319b5SDimitry Andric /// This first hoists all branches or switches which are trivial (IE, do not
27524ba319b5SDimitry Andric /// require duplicating any part of the loop) out of the loop body. It then
27534ba319b5SDimitry Andric /// looks at other loop invariant control flows and tries to unswitch those as
27544ba319b5SDimitry Andric /// well by cloning the loop if the result is small enough.
27554ba319b5SDimitry Andric ///
27564ba319b5SDimitry Andric /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
27574ba319b5SDimitry Andric /// updated based on the unswitch.
2758*b5893f02SDimitry Andric /// The `MSSA` analysis is also updated if valid (i.e. its use is enabled).
27594ba319b5SDimitry Andric ///
27604ba319b5SDimitry Andric /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
27614ba319b5SDimitry Andric /// true, we will attempt to do non-trivial unswitching as well as trivial
27624ba319b5SDimitry Andric /// unswitching.
27634ba319b5SDimitry Andric ///
27644ba319b5SDimitry Andric /// The `UnswitchCB` callback provided will be run after unswitching is
27654ba319b5SDimitry Andric /// complete, with the first parameter set to `true` if the provided loop
27664ba319b5SDimitry Andric /// remains a loop, and a list of new sibling loops created.
27674ba319b5SDimitry Andric ///
27684ba319b5SDimitry Andric /// If `SE` is non-null, we will update that analysis based on the unswitching
27694ba319b5SDimitry Andric /// done.
unswitchLoop(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,TargetTransformInfo & TTI,bool NonTrivial,function_ref<void (bool,ArrayRef<Loop * >)> UnswitchCB,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)27704ba319b5SDimitry Andric static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
27714ba319b5SDimitry Andric                          AssumptionCache &AC, TargetTransformInfo &TTI,
27724ba319b5SDimitry Andric                          bool NonTrivial,
27734ba319b5SDimitry Andric                          function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
2774*b5893f02SDimitry Andric                          ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
27754ba319b5SDimitry Andric   assert(L.isRecursivelyLCSSAForm(DT, LI) &&
27764ba319b5SDimitry Andric          "Loops must be in LCSSA form before unswitching.");
27774ba319b5SDimitry Andric   bool Changed = false;
27784ba319b5SDimitry Andric 
27794ba319b5SDimitry Andric   // Must be in loop simplified form: we need a preheader and dedicated exits.
27804ba319b5SDimitry Andric   if (!L.isLoopSimplifyForm())
27814ba319b5SDimitry Andric     return false;
27824ba319b5SDimitry Andric 
27834ba319b5SDimitry Andric   // Try trivial unswitch first before loop over other basic blocks in the loop.
2784*b5893f02SDimitry Andric   if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
27854ba319b5SDimitry Andric     // If we unswitched successfully we will want to clean up the loop before
27864ba319b5SDimitry Andric     // processing it further so just mark it as unswitched and return.
27874ba319b5SDimitry Andric     UnswitchCB(/*CurrentLoopValid*/ true, {});
27884ba319b5SDimitry Andric     return true;
27894ba319b5SDimitry Andric   }
27904ba319b5SDimitry Andric 
27914ba319b5SDimitry Andric   // If we're not doing non-trivial unswitching, we're done. We both accept
27924ba319b5SDimitry Andric   // a parameter but also check a local flag that can be used for testing
27934ba319b5SDimitry Andric   // a debugging.
27944ba319b5SDimitry Andric   if (!NonTrivial && !EnableNonTrivialUnswitch)
27954ba319b5SDimitry Andric     return false;
27964ba319b5SDimitry Andric 
27974ba319b5SDimitry Andric   // For non-trivial unswitching, because it often creates new loops, we rely on
27984ba319b5SDimitry Andric   // the pass manager to iterate on the loops rather than trying to immediately
27994ba319b5SDimitry Andric   // reach a fixed point. There is no substantial advantage to iterating
28004ba319b5SDimitry Andric   // internally, and if any of the new loops are simplified enough to contain
28014ba319b5SDimitry Andric   // trivial unswitching we want to prefer those.
28024ba319b5SDimitry Andric 
28034ba319b5SDimitry Andric   // Try to unswitch the best invariant condition. We prefer this full unswitch to
28044ba319b5SDimitry Andric   // a partial unswitch when possible below the threshold.
2805*b5893f02SDimitry Andric   if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU))
28064ba319b5SDimitry Andric     return true;
28074ba319b5SDimitry Andric 
28084ba319b5SDimitry Andric   // No other opportunities to unswitch.
2809f37b6182SDimitry Andric   return Changed;
2810f37b6182SDimitry Andric }
2811f37b6182SDimitry Andric 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)2812f37b6182SDimitry Andric PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
2813f37b6182SDimitry Andric                                               LoopStandardAnalysisResults &AR,
2814f37b6182SDimitry Andric                                               LPMUpdater &U) {
2815f37b6182SDimitry Andric   Function &F = *L.getHeader()->getParent();
2816f37b6182SDimitry Andric   (void)F;
2817f37b6182SDimitry Andric 
28184ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
28194ba319b5SDimitry Andric                     << "\n");
2820f37b6182SDimitry Andric 
28212cab237bSDimitry Andric   // Save the current loop name in a variable so that we can report it even
28222cab237bSDimitry Andric   // after it has been deleted.
28232cab237bSDimitry Andric   std::string LoopName = L.getName();
28242cab237bSDimitry Andric 
28254ba319b5SDimitry Andric   auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
28262cab237bSDimitry Andric                                         ArrayRef<Loop *> NewLoops) {
28272cab237bSDimitry Andric     // If we did a non-trivial unswitch, we have added new (cloned) loops.
28284ba319b5SDimitry Andric     if (!NewLoops.empty())
28292cab237bSDimitry Andric       U.addSiblingLoops(NewLoops);
28302cab237bSDimitry Andric 
28312cab237bSDimitry Andric     // If the current loop remains valid, we should revisit it to catch any
28322cab237bSDimitry Andric     // other unswitch opportunities. Otherwise, we need to mark it as deleted.
28332cab237bSDimitry Andric     if (CurrentLoopValid)
28342cab237bSDimitry Andric       U.revisitCurrentLoop();
28352cab237bSDimitry Andric     else
28362cab237bSDimitry Andric       U.markLoopAsDeleted(L, LoopName);
28372cab237bSDimitry Andric   };
28382cab237bSDimitry Andric 
2839*b5893f02SDimitry Andric   Optional<MemorySSAUpdater> MSSAU;
2840*b5893f02SDimitry Andric   if (AR.MSSA) {
2841*b5893f02SDimitry Andric     MSSAU = MemorySSAUpdater(AR.MSSA);
2842*b5893f02SDimitry Andric     if (VerifyMemorySSA)
2843*b5893f02SDimitry Andric       AR.MSSA->verifyMemorySSA();
2844*b5893f02SDimitry Andric   }
28454ba319b5SDimitry Andric   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
2846*b5893f02SDimitry Andric                     &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
2847f37b6182SDimitry Andric     return PreservedAnalyses::all();
2848f37b6182SDimitry Andric 
2849*b5893f02SDimitry Andric   if (AR.MSSA && VerifyMemorySSA)
2850*b5893f02SDimitry Andric     AR.MSSA->verifyMemorySSA();
2851*b5893f02SDimitry Andric 
2852f37b6182SDimitry Andric   // Historically this pass has had issues with the dominator tree so verify it
2853f37b6182SDimitry Andric   // in asserts builds.
28544ba319b5SDimitry Andric   assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
2855f37b6182SDimitry Andric   return getLoopPassPreservedAnalyses();
2856f37b6182SDimitry Andric }
2857f37b6182SDimitry Andric 
2858f37b6182SDimitry Andric namespace {
285960ff8e32SDimitry Andric 
2860f37b6182SDimitry Andric class SimpleLoopUnswitchLegacyPass : public LoopPass {
28612cab237bSDimitry Andric   bool NonTrivial;
28622cab237bSDimitry Andric 
2863f37b6182SDimitry Andric public:
2864f37b6182SDimitry Andric   static char ID; // Pass ID, replacement for typeid
286560ff8e32SDimitry Andric 
SimpleLoopUnswitchLegacyPass(bool NonTrivial=false)28662cab237bSDimitry Andric   explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
28672cab237bSDimitry Andric       : LoopPass(ID), NonTrivial(NonTrivial) {
2868f37b6182SDimitry Andric     initializeSimpleLoopUnswitchLegacyPassPass(
2869f37b6182SDimitry Andric         *PassRegistry::getPassRegistry());
2870f37b6182SDimitry Andric   }
2871f37b6182SDimitry Andric 
2872f37b6182SDimitry Andric   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
2873f37b6182SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const2874f37b6182SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
2875f37b6182SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
28762cab237bSDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
2877*b5893f02SDimitry Andric     if (EnableMSSALoopDependency) {
2878*b5893f02SDimitry Andric       AU.addRequired<MemorySSAWrapperPass>();
2879*b5893f02SDimitry Andric       AU.addPreserved<MemorySSAWrapperPass>();
2880*b5893f02SDimitry Andric     }
2881f37b6182SDimitry Andric     getLoopAnalysisUsage(AU);
2882f37b6182SDimitry Andric   }
2883f37b6182SDimitry Andric };
288460ff8e32SDimitry Andric 
288560ff8e32SDimitry Andric } // end anonymous namespace
2886f37b6182SDimitry Andric 
runOnLoop(Loop * L,LPPassManager & LPM)2887f37b6182SDimitry Andric bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
2888f37b6182SDimitry Andric   if (skipLoop(L))
2889f37b6182SDimitry Andric     return false;
2890f37b6182SDimitry Andric 
2891f37b6182SDimitry Andric   Function &F = *L->getHeader()->getParent();
2892f37b6182SDimitry Andric 
28934ba319b5SDimitry Andric   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
28944ba319b5SDimitry Andric                     << "\n");
2895f37b6182SDimitry Andric 
2896f37b6182SDimitry Andric   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2897f37b6182SDimitry Andric   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2898f37b6182SDimitry Andric   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
28992cab237bSDimitry Andric   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2900*b5893f02SDimitry Andric   MemorySSA *MSSA = nullptr;
2901*b5893f02SDimitry Andric   Optional<MemorySSAUpdater> MSSAU;
2902*b5893f02SDimitry Andric   if (EnableMSSALoopDependency) {
2903*b5893f02SDimitry Andric     MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
2904*b5893f02SDimitry Andric     MSSAU = MemorySSAUpdater(MSSA);
2905*b5893f02SDimitry Andric   }
2906f37b6182SDimitry Andric 
29074ba319b5SDimitry Andric   auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
29084ba319b5SDimitry Andric   auto *SE = SEWP ? &SEWP->getSE() : nullptr;
29094ba319b5SDimitry Andric 
29104ba319b5SDimitry Andric   auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
29112cab237bSDimitry Andric                                ArrayRef<Loop *> NewLoops) {
29122cab237bSDimitry Andric     // If we did a non-trivial unswitch, we have added new (cloned) loops.
29132cab237bSDimitry Andric     for (auto *NewL : NewLoops)
29142cab237bSDimitry Andric       LPM.addLoop(*NewL);
29152cab237bSDimitry Andric 
29162cab237bSDimitry Andric     // If the current loop remains valid, re-add it to the queue. This is
29172cab237bSDimitry Andric     // a little wasteful as we'll finish processing the current loop as well,
29182cab237bSDimitry Andric     // but it is the best we can do in the old PM.
29192cab237bSDimitry Andric     if (CurrentLoopValid)
29202cab237bSDimitry Andric       LPM.addLoop(*L);
29212cab237bSDimitry Andric     else
29222cab237bSDimitry Andric       LPM.markLoopAsDeleted(*L);
29232cab237bSDimitry Andric   };
29242cab237bSDimitry Andric 
2925*b5893f02SDimitry Andric   if (MSSA && VerifyMemorySSA)
2926*b5893f02SDimitry Andric     MSSA->verifyMemorySSA();
2927*b5893f02SDimitry Andric 
2928*b5893f02SDimitry Andric   bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE,
2929*b5893f02SDimitry Andric                               MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
2930*b5893f02SDimitry Andric 
2931*b5893f02SDimitry Andric   if (MSSA && VerifyMemorySSA)
2932*b5893f02SDimitry Andric     MSSA->verifyMemorySSA();
29332cab237bSDimitry Andric 
29342cab237bSDimitry Andric   // If anything was unswitched, also clear any cached information about this
29352cab237bSDimitry Andric   // loop.
29362cab237bSDimitry Andric   LPM.deleteSimpleAnalysisLoop(L);
2937f37b6182SDimitry Andric 
2938f37b6182SDimitry Andric   // Historically this pass has had issues with the dominator tree so verify it
2939f37b6182SDimitry Andric   // in asserts builds.
29404ba319b5SDimitry Andric   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
29414ba319b5SDimitry Andric 
2942f37b6182SDimitry Andric   return Changed;
2943f37b6182SDimitry Andric }
2944f37b6182SDimitry Andric 
2945f37b6182SDimitry Andric char SimpleLoopUnswitchLegacyPass::ID = 0;
2946f37b6182SDimitry Andric INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2947f37b6182SDimitry Andric                       "Simple unswitch loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)2948f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
29492cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
29502cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2951f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass)
2952*b5893f02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2953f37b6182SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2954f37b6182SDimitry Andric INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2955f37b6182SDimitry Andric                     "Simple unswitch loops", false, false)
2956f37b6182SDimitry Andric 
29572cab237bSDimitry Andric Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
29582cab237bSDimitry Andric   return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2959f37b6182SDimitry Andric }
2960