105ae04c3SSimon Moll //===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
259041687SNicolai Haehnle //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659041687SNicolai Haehnle //
759041687SNicolai Haehnle //===----------------------------------------------------------------------===//
859041687SNicolai Haehnle //
959041687SNicolai Haehnle // This file implements an algorithm that returns for a divergent branch
1059041687SNicolai Haehnle // the set of basic blocks whose phi nodes become divergent due to divergent
1159041687SNicolai Haehnle // control. These are the blocks that are reachable by two disjoint paths from
1259041687SNicolai Haehnle // the branch or loop exits that have a reaching path that is disjoint from a
1359041687SNicolai Haehnle // path to the loop latch.
1459041687SNicolai Haehnle //
1559041687SNicolai Haehnle // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
1659041687SNicolai Haehnle // control-induced divergence in phi nodes.
1759041687SNicolai Haehnle //
1859041687SNicolai Haehnle //
1956db1c07SSimon Moll // -- Reference --
2056db1c07SSimon Moll // The algorithm is presented in Section 5 of
2156db1c07SSimon Moll //
2256db1c07SSimon Moll //   An abstract interpretation for SPMD divergence
2356db1c07SSimon Moll //       on reducible control flow graphs.
2456db1c07SSimon Moll //   Julian Rosemann, Simon Moll and Sebastian Hack
2556db1c07SSimon Moll //   POPL '21
2656db1c07SSimon Moll //
2759041687SNicolai Haehnle //
2859041687SNicolai Haehnle // -- Sync dependence --
2956db1c07SSimon Moll // Sync dependence characterizes the control flow aspect of the
3059041687SNicolai Haehnle // propagation of branch divergence. For example,
3159041687SNicolai Haehnle //
3259041687SNicolai Haehnle //   %cond = icmp slt i32 %tid, 10
3359041687SNicolai Haehnle //   br i1 %cond, label %then, label %else
3459041687SNicolai Haehnle // then:
3559041687SNicolai Haehnle //   br label %merge
3659041687SNicolai Haehnle // else:
3759041687SNicolai Haehnle //   br label %merge
3859041687SNicolai Haehnle // merge:
3959041687SNicolai Haehnle //   %a = phi i32 [ 0, %then ], [ 1, %else ]
4059041687SNicolai Haehnle //
4159041687SNicolai Haehnle // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
4259041687SNicolai Haehnle // because %tid is not on its use-def chains, %a is sync dependent on %tid
4359041687SNicolai Haehnle // because the branch "br i1 %cond" depends on %tid and affects which value %a
4459041687SNicolai Haehnle // is assigned to.
4559041687SNicolai Haehnle //
4656db1c07SSimon Moll //
4759041687SNicolai Haehnle // -- Reduction to SSA construction --
4859041687SNicolai Haehnle // There are two disjoint paths from A to X, if a certain variant of SSA
4956db1c07SSimon Moll // construction places a phi node in X under the following set-up scheme.
5059041687SNicolai Haehnle //
5159041687SNicolai Haehnle // This variant of SSA construction ignores incoming undef values.
5259041687SNicolai Haehnle // That is paths from the entry without a definition do not result in
5359041687SNicolai Haehnle // phi nodes.
5459041687SNicolai Haehnle //
5559041687SNicolai Haehnle //       entry
5659041687SNicolai Haehnle //     /      \
5759041687SNicolai Haehnle //    A        \
5859041687SNicolai Haehnle //  /   \       Y
5959041687SNicolai Haehnle // B     C     /
6059041687SNicolai Haehnle //  \   /  \  /
6159041687SNicolai Haehnle //    D     E
6259041687SNicolai Haehnle //     \   /
6359041687SNicolai Haehnle //       F
6456db1c07SSimon Moll //
6559041687SNicolai Haehnle // Assume that A contains a divergent branch. We are interested
6659041687SNicolai Haehnle // in the set of all blocks where each block is reachable from A
6759041687SNicolai Haehnle // via two disjoint paths. This would be the set {D, F} in this
6859041687SNicolai Haehnle // case.
6959041687SNicolai Haehnle // To generally reduce this query to SSA construction we introduce
7059041687SNicolai Haehnle // a virtual variable x and assign to x different values in each
7159041687SNicolai Haehnle // successor block of A.
7256db1c07SSimon Moll //
7359041687SNicolai Haehnle //           entry
7459041687SNicolai Haehnle //         /      \
7559041687SNicolai Haehnle //        A        \
7659041687SNicolai Haehnle //      /   \       Y
7759041687SNicolai Haehnle // x = 0   x = 1   /
7859041687SNicolai Haehnle //      \  /   \  /
7959041687SNicolai Haehnle //        D     E
8059041687SNicolai Haehnle //         \   /
8159041687SNicolai Haehnle //           F
8256db1c07SSimon Moll //
8359041687SNicolai Haehnle // Our flavor of SSA construction for x will construct the following
8456db1c07SSimon Moll //
8559041687SNicolai Haehnle //            entry
8659041687SNicolai Haehnle //          /      \
8759041687SNicolai Haehnle //         A        \
8859041687SNicolai Haehnle //       /   \       Y
8959041687SNicolai Haehnle // x0 = 0   x1 = 1  /
9059041687SNicolai Haehnle //       \   /   \ /
9159041687SNicolai Haehnle //     x2 = phi   E
9259041687SNicolai Haehnle //         \     /
9359041687SNicolai Haehnle //         x3 = phi
9456db1c07SSimon Moll //
9559041687SNicolai Haehnle // The blocks D and F contain phi nodes and are thus each reachable
9659041687SNicolai Haehnle // by two disjoins paths from A.
9759041687SNicolai Haehnle //
9859041687SNicolai Haehnle // -- Remarks --
9956db1c07SSimon Moll // * In case of loop exits we need to check the disjoint path criterion for loops.
10056db1c07SSimon Moll //   To this end, we check whether the definition of x differs between the
10159041687SNicolai Haehnle //   loop exit and the loop header (_after_ SSA construction).
10259041687SNicolai Haehnle //
10356db1c07SSimon Moll // -- Known Limitations & Future Work --
10456db1c07SSimon Moll // * The algorithm requires reducible loops because the implementation
10556db1c07SSimon Moll //   implicitly performs a single iteration of the underlying data flow analysis.
10656db1c07SSimon Moll //   This was done for pragmatism, simplicity and speed.
10756db1c07SSimon Moll //
10856db1c07SSimon Moll //   Relevant related work for extending the algorithm to irreducible control:
10956db1c07SSimon Moll //     A simple algorithm for global data flow analysis problems.
11056db1c07SSimon Moll //     Matthew S. Hecht and Jeffrey D. Ullman.
11156db1c07SSimon Moll //     SIAM Journal on Computing, 4(4):519–532, December 1975.
11256db1c07SSimon Moll //
11356db1c07SSimon Moll // * Another reason for requiring reducible loops is that points of
11456db1c07SSimon Moll //   synchronization in irreducible loops aren't 'obvious' - there is no unique
11556db1c07SSimon Moll //   header where threads 'should' synchronize when entering or coming back
11656db1c07SSimon Moll //   around from the latch.
11756db1c07SSimon Moll //
11859041687SNicolai Haehnle //===----------------------------------------------------------------------===//
119c6f0940dSBill Wendling 
120c48b06c4SSimon Moll #include "llvm/Analysis/SyncDependenceAnalysis.h"
12159041687SNicolai Haehnle #include "llvm/ADT/SmallPtrSet.h"
122c6f0940dSBill Wendling #include "llvm/Analysis/LoopInfo.h"
12359041687SNicolai Haehnle #include "llvm/IR/BasicBlock.h"
12459041687SNicolai Haehnle #include "llvm/IR/CFG.h"
12559041687SNicolai Haehnle #include "llvm/IR/Dominators.h"
12659041687SNicolai Haehnle #include "llvm/IR/Function.h"
12759041687SNicolai Haehnle 
12805ae04c3SSimon Moll #include <functional>
12959041687SNicolai Haehnle 
13059041687SNicolai Haehnle #define DEBUG_TYPE "sync-dependence"
13159041687SNicolai Haehnle 
13205ae04c3SSimon Moll // The SDA algorithm operates on a modified CFG - we modify the edges leaving
13305ae04c3SSimon Moll // loop headers as follows:
13405ae04c3SSimon Moll //
13505ae04c3SSimon Moll // * We remove all edges leaving all loop headers.
13605ae04c3SSimon Moll // * We add additional edges from the loop headers to their exit blocks.
13705ae04c3SSimon Moll //
13805ae04c3SSimon Moll // The modification is virtual, that is whenever we visit a loop header we
13905ae04c3SSimon Moll // pretend it had different successors.
14005ae04c3SSimon Moll namespace {
14105ae04c3SSimon Moll using namespace llvm;
14205ae04c3SSimon Moll 
14305ae04c3SSimon Moll // Custom Post-Order Traveral
14405ae04c3SSimon Moll //
14505ae04c3SSimon Moll // We cannot use the vanilla (R)PO computation of LLVM because:
14605ae04c3SSimon Moll // * We (virtually) modify the CFG.
14756db1c07SSimon Moll // * We want a loop-compact block enumeration, that is the numbers assigned to
14856db1c07SSimon Moll //   blocks of a loop form an interval
14956db1c07SSimon Moll //
15005ae04c3SSimon Moll using POCB = std::function<void(const BasicBlock &)>;
15105ae04c3SSimon Moll using VisitedSet = std::set<const BasicBlock *>;
15205ae04c3SSimon Moll using BlockStack = std::vector<const BasicBlock *>;
15305ae04c3SSimon Moll 
15405ae04c3SSimon Moll // forward
15505ae04c3SSimon Moll static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
15605ae04c3SSimon Moll                           VisitedSet &Finalized);
15705ae04c3SSimon Moll 
15805ae04c3SSimon Moll // for a nested region (top-level loop or nested loop)
computeStackPO(BlockStack & Stack,const LoopInfo & LI,Loop * Loop,POCB CallBack,VisitedSet & Finalized)15905ae04c3SSimon Moll static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
16005ae04c3SSimon Moll                            POCB CallBack, VisitedSet &Finalized) {
16105ae04c3SSimon Moll   const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
16205ae04c3SSimon Moll   while (!Stack.empty()) {
16305ae04c3SSimon Moll     const auto *NextBB = Stack.back();
16405ae04c3SSimon Moll 
16505ae04c3SSimon Moll     auto *NestedLoop = LI.getLoopFor(NextBB);
16605ae04c3SSimon Moll     bool IsNestedLoop = NestedLoop != Loop;
16705ae04c3SSimon Moll 
16805ae04c3SSimon Moll     // Treat the loop as a node
16905ae04c3SSimon Moll     if (IsNestedLoop) {
17005ae04c3SSimon Moll       SmallVector<BasicBlock *, 3> NestedExits;
17105ae04c3SSimon Moll       NestedLoop->getUniqueExitBlocks(NestedExits);
17205ae04c3SSimon Moll       bool PushedNodes = false;
17305ae04c3SSimon Moll       for (const auto *NestedExitBB : NestedExits) {
17405ae04c3SSimon Moll         if (NestedExitBB == LoopHeader)
17505ae04c3SSimon Moll           continue;
17605ae04c3SSimon Moll         if (Loop && !Loop->contains(NestedExitBB))
17705ae04c3SSimon Moll           continue;
17805ae04c3SSimon Moll         if (Finalized.count(NestedExitBB))
17905ae04c3SSimon Moll           continue;
18005ae04c3SSimon Moll         PushedNodes = true;
18105ae04c3SSimon Moll         Stack.push_back(NestedExitBB);
18205ae04c3SSimon Moll       }
18305ae04c3SSimon Moll       if (!PushedNodes) {
18405ae04c3SSimon Moll         // All loop exits finalized -> finish this node
18505ae04c3SSimon Moll         Stack.pop_back();
18605ae04c3SSimon Moll         computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
18705ae04c3SSimon Moll       }
18805ae04c3SSimon Moll       continue;
18905ae04c3SSimon Moll     }
19005ae04c3SSimon Moll 
19105ae04c3SSimon Moll     // DAG-style
19205ae04c3SSimon Moll     bool PushedNodes = false;
19305ae04c3SSimon Moll     for (const auto *SuccBB : successors(NextBB)) {
19405ae04c3SSimon Moll       if (SuccBB == LoopHeader)
19505ae04c3SSimon Moll         continue;
19605ae04c3SSimon Moll       if (Loop && !Loop->contains(SuccBB))
19705ae04c3SSimon Moll         continue;
19805ae04c3SSimon Moll       if (Finalized.count(SuccBB))
19905ae04c3SSimon Moll         continue;
20005ae04c3SSimon Moll       PushedNodes = true;
20105ae04c3SSimon Moll       Stack.push_back(SuccBB);
20205ae04c3SSimon Moll     }
20305ae04c3SSimon Moll     if (!PushedNodes) {
20405ae04c3SSimon Moll       // Never push nodes twice
20505ae04c3SSimon Moll       Stack.pop_back();
20605ae04c3SSimon Moll       if (!Finalized.insert(NextBB).second)
20705ae04c3SSimon Moll         continue;
20805ae04c3SSimon Moll       CallBack(*NextBB);
20905ae04c3SSimon Moll     }
21005ae04c3SSimon Moll   }
21105ae04c3SSimon Moll }
21205ae04c3SSimon Moll 
computeTopLevelPO(Function & F,const LoopInfo & LI,POCB CallBack)21305ae04c3SSimon Moll static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
21405ae04c3SSimon Moll   VisitedSet Finalized;
21505ae04c3SSimon Moll   BlockStack Stack;
21605ae04c3SSimon Moll   Stack.reserve(24); // FIXME made-up number
21705ae04c3SSimon Moll   Stack.push_back(&F.getEntryBlock());
21805ae04c3SSimon Moll   computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
21905ae04c3SSimon Moll }
22005ae04c3SSimon Moll 
computeLoopPO(const LoopInfo & LI,Loop & Loop,POCB CallBack,VisitedSet & Finalized)22105ae04c3SSimon Moll static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
22205ae04c3SSimon Moll                           VisitedSet &Finalized) {
22305ae04c3SSimon Moll   /// Call CallBack on all loop blocks.
22405ae04c3SSimon Moll   std::vector<const BasicBlock *> Stack;
22505ae04c3SSimon Moll   const auto *LoopHeader = Loop.getHeader();
22605ae04c3SSimon Moll 
22705ae04c3SSimon Moll   // Visit the header last
22805ae04c3SSimon Moll   Finalized.insert(LoopHeader);
22905ae04c3SSimon Moll   CallBack(*LoopHeader);
23005ae04c3SSimon Moll 
23105ae04c3SSimon Moll   // Initialize with immediate successors
23205ae04c3SSimon Moll   for (const auto *BB : successors(LoopHeader)) {
23305ae04c3SSimon Moll     if (!Loop.contains(BB))
23405ae04c3SSimon Moll       continue;
23505ae04c3SSimon Moll     if (BB == LoopHeader)
23605ae04c3SSimon Moll       continue;
23705ae04c3SSimon Moll     Stack.push_back(BB);
23805ae04c3SSimon Moll   }
23905ae04c3SSimon Moll 
24005ae04c3SSimon Moll   // Compute PO inside region
24105ae04c3SSimon Moll   computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
24205ae04c3SSimon Moll }
24305ae04c3SSimon Moll 
24405ae04c3SSimon Moll } // namespace
24505ae04c3SSimon Moll 
24659041687SNicolai Haehnle namespace llvm {
24759041687SNicolai Haehnle 
24805ae04c3SSimon Moll ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
24959041687SNicolai Haehnle 
SyncDependenceAnalysis(const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI)25059041687SNicolai Haehnle SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
25159041687SNicolai Haehnle                                                const PostDominatorTree &PDT,
25259041687SNicolai Haehnle                                                const LoopInfo &LI)
25305ae04c3SSimon Moll     : DT(DT), PDT(PDT), LI(LI) {
25405ae04c3SSimon Moll   computeTopLevelPO(*DT.getRoot()->getParent(), LI,
25505ae04c3SSimon Moll                     [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
25605ae04c3SSimon Moll }
25759041687SNicolai Haehnle 
258*3a3cb929SKazu Hirata SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
25959041687SNicolai Haehnle 
26059041687SNicolai Haehnle // divergence propagator for reducible CFGs
26159041687SNicolai Haehnle struct DivergencePropagator {
26205ae04c3SSimon Moll   const ModifiedPO &LoopPOT;
26359041687SNicolai Haehnle   const DominatorTree &DT;
26459041687SNicolai Haehnle   const PostDominatorTree &PDT;
26559041687SNicolai Haehnle   const LoopInfo &LI;
26605ae04c3SSimon Moll   const BasicBlock &DivTermBlock;
26759041687SNicolai Haehnle 
26805ae04c3SSimon Moll   // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
26905ae04c3SSimon Moll   //   block B
27005ae04c3SSimon Moll   // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
27105ae04c3SSimon Moll   // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
27205ae04c3SSimon Moll   // from X or B is an immediate successor of X (initial value).
27305ae04c3SSimon Moll   using BlockLabelVec = std::vector<const BasicBlock *>;
27405ae04c3SSimon Moll   BlockLabelVec BlockLabels;
27505ae04c3SSimon Moll   // divergent join and loop exit descriptor.
27605ae04c3SSimon Moll   std::unique_ptr<ControlDivergenceDesc> DivDesc;
27759041687SNicolai Haehnle 
DivergencePropagatorllvm::DivergencePropagator27805ae04c3SSimon Moll   DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
27905ae04c3SSimon Moll                        const PostDominatorTree &PDT, const LoopInfo &LI,
28005ae04c3SSimon Moll                        const BasicBlock &DivTermBlock)
28105ae04c3SSimon Moll       : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
28205ae04c3SSimon Moll         BlockLabels(LoopPOT.size(), nullptr),
28305ae04c3SSimon Moll         DivDesc(new ControlDivergenceDesc) {}
28459041687SNicolai Haehnle 
printDefsllvm::DivergencePropagator28559041687SNicolai Haehnle   void printDefs(raw_ostream &Out) {
28605ae04c3SSimon Moll     Out << "Propagator::BlockLabels {\n";
28705ae04c3SSimon Moll     for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
28805ae04c3SSimon Moll       const auto *Label = BlockLabels[BlockIdx];
28905ae04c3SSimon Moll       Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
29005ae04c3SSimon Moll           << ") : ";
29105ae04c3SSimon Moll       if (!Label) {
29205ae04c3SSimon Moll         Out << "<null>\n";
29359041687SNicolai Haehnle       } else {
29405ae04c3SSimon Moll         Out << Label->getName() << "\n";
29559041687SNicolai Haehnle       }
29659041687SNicolai Haehnle     }
29759041687SNicolai Haehnle     Out << "}\n";
29859041687SNicolai Haehnle   }
29959041687SNicolai Haehnle 
30005ae04c3SSimon Moll   // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
30105ae04c3SSimon Moll   // causes a divergent join.
computeJoinllvm::DivergencePropagator30205ae04c3SSimon Moll   bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
30305ae04c3SSimon Moll     auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
30459041687SNicolai Haehnle 
30505ae04c3SSimon Moll     // unset or same reaching label
30605ae04c3SSimon Moll     const auto *OldLabel = BlockLabels[SuccIdx];
30705ae04c3SSimon Moll     if (!OldLabel || (OldLabel == &PushedLabel)) {
30805ae04c3SSimon Moll       BlockLabels[SuccIdx] = &PushedLabel;
30905ae04c3SSimon Moll       return false;
31059041687SNicolai Haehnle     }
31159041687SNicolai Haehnle 
31205ae04c3SSimon Moll     // Update the definition
31305ae04c3SSimon Moll     BlockLabels[SuccIdx] = &SuccBlock;
31405ae04c3SSimon Moll     return true;
31559041687SNicolai Haehnle   }
31659041687SNicolai Haehnle 
31705ae04c3SSimon Moll   // visiting a virtual loop exit edge from the loop header --> temporal
31805ae04c3SSimon Moll   // divergence on join
visitLoopExitEdgellvm::DivergencePropagator31905ae04c3SSimon Moll   bool visitLoopExitEdge(const BasicBlock &ExitBlock,
32005ae04c3SSimon Moll                          const BasicBlock &DefBlock, bool FromParentLoop) {
32105ae04c3SSimon Moll     // Pushing from a non-parent loop cannot cause temporal divergence.
32205ae04c3SSimon Moll     if (!FromParentLoop)
32305ae04c3SSimon Moll       return visitEdge(ExitBlock, DefBlock);
32459041687SNicolai Haehnle 
32505ae04c3SSimon Moll     if (!computeJoin(ExitBlock, DefBlock))
32605ae04c3SSimon Moll       return false;
32705ae04c3SSimon Moll 
32805ae04c3SSimon Moll     // Identified a divergent loop exit
32905ae04c3SSimon Moll     DivDesc->LoopDivBlocks.insert(&ExitBlock);
33005ae04c3SSimon Moll     LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
33105ae04c3SSimon Moll                       << "\n");
33205ae04c3SSimon Moll     return true;
33359041687SNicolai Haehnle   }
33459041687SNicolai Haehnle 
33505ae04c3SSimon Moll   // process \p SuccBlock with reaching definition \p DefBlock
visitEdgellvm::DivergencePropagator33605ae04c3SSimon Moll   bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
33705ae04c3SSimon Moll     if (!computeJoin(SuccBlock, DefBlock))
33805ae04c3SSimon Moll       return false;
33959041687SNicolai Haehnle 
34005ae04c3SSimon Moll     // Divergent, disjoint paths join.
34105ae04c3SSimon Moll     DivDesc->JoinDivBlocks.insert(&SuccBlock);
34205ae04c3SSimon Moll     LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
34305ae04c3SSimon Moll     return true;
34405ae04c3SSimon Moll   }
34505ae04c3SSimon Moll 
computeJoinPointsllvm::DivergencePropagator34605ae04c3SSimon Moll   std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
34705ae04c3SSimon Moll     assert(DivDesc);
34805ae04c3SSimon Moll 
34905ae04c3SSimon Moll     LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
350c48b06c4SSimon Moll                       << "\n");
351c92e51d8SJay Foad 
35205ae04c3SSimon Moll     const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
35305ae04c3SSimon Moll 
35405ae04c3SSimon Moll     // Early stopping criterion
35505ae04c3SSimon Moll     int FloorIdx = LoopPOT.size() - 1;
35605ae04c3SSimon Moll     const BasicBlock *FloorLabel = nullptr;
35705ae04c3SSimon Moll 
35859041687SNicolai Haehnle     // bootstrap with branch targets
35905ae04c3SSimon Moll     int BlockIdx = 0;
36059041687SNicolai Haehnle 
36105ae04c3SSimon Moll     for (const auto *SuccBlock : successors(&DivTermBlock)) {
36205ae04c3SSimon Moll       auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
36305ae04c3SSimon Moll       BlockLabels[SuccIdx] = SuccBlock;
36459041687SNicolai Haehnle 
36505ae04c3SSimon Moll       // Find the successor with the highest index to start with
36605ae04c3SSimon Moll       BlockIdx = std::max<int>(BlockIdx, SuccIdx);
36705ae04c3SSimon Moll       FloorIdx = std::min<int>(FloorIdx, SuccIdx);
368c92e51d8SJay Foad 
36905ae04c3SSimon Moll       // Identify immediate divergent loop exits
37005ae04c3SSimon Moll       if (!DivBlockLoop)
37105ae04c3SSimon Moll         continue;
37259041687SNicolai Haehnle 
37305ae04c3SSimon Moll       const auto *BlockLoop = LI.getLoopFor(SuccBlock);
37405ae04c3SSimon Moll       if (BlockLoop && DivBlockLoop->contains(BlockLoop))
37505ae04c3SSimon Moll         continue;
37605ae04c3SSimon Moll       DivDesc->LoopDivBlocks.insert(SuccBlock);
37705ae04c3SSimon Moll       LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
37805ae04c3SSimon Moll                         << SuccBlock->getName() << "\n");
379fe5f233aSWeverything     }
38059041687SNicolai Haehnle 
38159041687SNicolai Haehnle     // propagate definitions at the immediate successors of the node in RPO
38205ae04c3SSimon Moll     for (; BlockIdx >= FloorIdx; --BlockIdx) {
38305ae04c3SSimon Moll       LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
38405ae04c3SSimon Moll 
38505ae04c3SSimon Moll       // Any label available here
38605ae04c3SSimon Moll       const auto *Label = BlockLabels[BlockIdx];
38705ae04c3SSimon Moll       if (!Label)
38805ae04c3SSimon Moll         continue;
38905ae04c3SSimon Moll 
39005ae04c3SSimon Moll       // Ok. Get the block
39105ae04c3SSimon Moll       const auto *Block = LoopPOT.getBlockAt(BlockIdx);
392c92e51d8SJay Foad       LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
39359041687SNicolai Haehnle 
39459041687SNicolai Haehnle       auto *BlockLoop = LI.getLoopFor(Block);
39505ae04c3SSimon Moll       bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
39605ae04c3SSimon Moll       bool CausedJoin = false;
39705ae04c3SSimon Moll       int LoweredFloorIdx = FloorIdx;
39805ae04c3SSimon Moll       if (IsLoopHeader) {
39905ae04c3SSimon Moll         // Disconnect from immediate successors and propagate directly to loop
40005ae04c3SSimon Moll         // exits.
40159041687SNicolai Haehnle         SmallVector<BasicBlock *, 4> BlockLoopExits;
40259041687SNicolai Haehnle         BlockLoop->getExitBlocks(BlockLoopExits);
40305ae04c3SSimon Moll 
40405ae04c3SSimon Moll         bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
40559041687SNicolai Haehnle         for (const auto *BlockLoopExit : BlockLoopExits) {
40605ae04c3SSimon Moll           CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
40705ae04c3SSimon Moll           LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
40805ae04c3SSimon Moll                                           LoopPOT.getIndexOf(*BlockLoopExit));
40905ae04c3SSimon Moll         }
41005ae04c3SSimon Moll       } else {
41105ae04c3SSimon Moll         // Acyclic successor case
41205ae04c3SSimon Moll         for (const auto *SuccBlock : successors(Block)) {
41305ae04c3SSimon Moll           CausedJoin |= visitEdge(*SuccBlock, *Label);
41405ae04c3SSimon Moll           LoweredFloorIdx =
41505ae04c3SSimon Moll               std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
41605ae04c3SSimon Moll         }
41759041687SNicolai Haehnle       }
41859041687SNicolai Haehnle 
41905ae04c3SSimon Moll       // Floor update
42005ae04c3SSimon Moll       if (CausedJoin) {
42105ae04c3SSimon Moll         // 1. Different labels pushed to successors
42205ae04c3SSimon Moll         FloorIdx = LoweredFloorIdx;
42305ae04c3SSimon Moll       } else if (FloorLabel != Label) {
42405ae04c3SSimon Moll         // 2. No join caused BUT we pushed a label that is different than the
42505ae04c3SSimon Moll         // last pushed label
42605ae04c3SSimon Moll         FloorIdx = LoweredFloorIdx;
42705ae04c3SSimon Moll         FloorLabel = Label;
42859041687SNicolai Haehnle       }
42959041687SNicolai Haehnle     }
43059041687SNicolai Haehnle 
431c92e51d8SJay Foad     LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
432c92e51d8SJay Foad 
43305ae04c3SSimon Moll     return std::move(DivDesc);
43459041687SNicolai Haehnle   }
43559041687SNicolai Haehnle };
43659041687SNicolai Haehnle 
437c36d441bSFangrui Song #ifndef NDEBUG
printBlockSet(ConstBlockSet & Blocks,raw_ostream & Out)43805ae04c3SSimon Moll static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
43905ae04c3SSimon Moll   Out << "[";
4406ac12e4bSKazu Hirata   ListSeparator LS;
4416ac12e4bSKazu Hirata   for (const auto *BB : Blocks)
4426ac12e4bSKazu Hirata     Out << LS << BB->getName();
44305ae04c3SSimon Moll   Out << "]";
44459041687SNicolai Haehnle }
445c36d441bSFangrui Song #endif
44659041687SNicolai Haehnle 
44705ae04c3SSimon Moll const ControlDivergenceDesc &
getJoinBlocks(const Instruction & Term)44805ae04c3SSimon Moll SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
44959041687SNicolai Haehnle   // trivial case
45005ae04c3SSimon Moll   if (Term.getNumSuccessors() <= 1) {
45105ae04c3SSimon Moll     return EmptyDivergenceDesc;
45259041687SNicolai Haehnle   }
45359041687SNicolai Haehnle 
45459041687SNicolai Haehnle   // already available in cache?
45505ae04c3SSimon Moll   auto ItCached = CachedControlDivDescs.find(&Term);
45605ae04c3SSimon Moll   if (ItCached != CachedControlDivDescs.end())
45759041687SNicolai Haehnle     return *ItCached->second;
45859041687SNicolai Haehnle 
45959041687SNicolai Haehnle   // compute all join points
46005ae04c3SSimon Moll   // Special handling of divergent loop exits is not needed for LCSSA
46159041687SNicolai Haehnle   const auto &TermBlock = *Term.getParent();
46205ae04c3SSimon Moll   DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
46305ae04c3SSimon Moll   auto DivDesc = Propagator.computeJoinPoints();
46459041687SNicolai Haehnle 
46505ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
46605ae04c3SSimon Moll              dbgs() << "JoinDivBlocks: ";
46705ae04c3SSimon Moll              printBlockSet(DivDesc->JoinDivBlocks, dbgs());
46805ae04c3SSimon Moll              dbgs() << "\nLoopDivBlocks: ";
46905ae04c3SSimon Moll              printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
47005ae04c3SSimon Moll 
47105ae04c3SSimon Moll   auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
47259041687SNicolai Haehnle   assert(ItInserted.second);
47359041687SNicolai Haehnle   return *ItInserted.first->second;
47459041687SNicolai Haehnle }
47559041687SNicolai Haehnle 
47659041687SNicolai Haehnle } // namespace llvm
477