105ae04c3SSimon Moll //===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
259041687SNicolai Haehnle //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659041687SNicolai Haehnle //
759041687SNicolai Haehnle //===----------------------------------------------------------------------===//
859041687SNicolai Haehnle //
959041687SNicolai Haehnle // This file implements an algorithm that returns for a divergent branch
1059041687SNicolai Haehnle // the set of basic blocks whose phi nodes become divergent due to divergent
1159041687SNicolai Haehnle // control. These are the blocks that are reachable by two disjoint paths from
1259041687SNicolai Haehnle // the branch or loop exits that have a reaching path that is disjoint from a
1359041687SNicolai Haehnle // path to the loop latch.
1459041687SNicolai Haehnle //
1559041687SNicolai Haehnle // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
1659041687SNicolai Haehnle // control-induced divergence in phi nodes.
1759041687SNicolai Haehnle //
1859041687SNicolai Haehnle //
1956db1c07SSimon Moll // -- Reference --
2056db1c07SSimon Moll // The algorithm is presented in Section 5 of
2156db1c07SSimon Moll //
2256db1c07SSimon Moll // An abstract interpretation for SPMD divergence
2356db1c07SSimon Moll // on reducible control flow graphs.
2456db1c07SSimon Moll // Julian Rosemann, Simon Moll and Sebastian Hack
2556db1c07SSimon Moll // POPL '21
2656db1c07SSimon Moll //
2759041687SNicolai Haehnle //
2859041687SNicolai Haehnle // -- Sync dependence --
2956db1c07SSimon Moll // Sync dependence characterizes the control flow aspect of the
3059041687SNicolai Haehnle // propagation of branch divergence. For example,
3159041687SNicolai Haehnle //
3259041687SNicolai Haehnle // %cond = icmp slt i32 %tid, 10
3359041687SNicolai Haehnle // br i1 %cond, label %then, label %else
3459041687SNicolai Haehnle // then:
3559041687SNicolai Haehnle // br label %merge
3659041687SNicolai Haehnle // else:
3759041687SNicolai Haehnle // br label %merge
3859041687SNicolai Haehnle // merge:
3959041687SNicolai Haehnle // %a = phi i32 [ 0, %then ], [ 1, %else ]
4059041687SNicolai Haehnle //
4159041687SNicolai Haehnle // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
4259041687SNicolai Haehnle // because %tid is not on its use-def chains, %a is sync dependent on %tid
4359041687SNicolai Haehnle // because the branch "br i1 %cond" depends on %tid and affects which value %a
4459041687SNicolai Haehnle // is assigned to.
4559041687SNicolai Haehnle //
4656db1c07SSimon Moll //
4759041687SNicolai Haehnle // -- Reduction to SSA construction --
4859041687SNicolai Haehnle // There are two disjoint paths from A to X, if a certain variant of SSA
4956db1c07SSimon Moll // construction places a phi node in X under the following set-up scheme.
5059041687SNicolai Haehnle //
5159041687SNicolai Haehnle // This variant of SSA construction ignores incoming undef values.
5259041687SNicolai Haehnle // That is paths from the entry without a definition do not result in
5359041687SNicolai Haehnle // phi nodes.
5459041687SNicolai Haehnle //
5559041687SNicolai Haehnle // entry
5659041687SNicolai Haehnle // / \
5759041687SNicolai Haehnle // A \
5859041687SNicolai Haehnle // / \ Y
5959041687SNicolai Haehnle // B C /
6059041687SNicolai Haehnle // \ / \ /
6159041687SNicolai Haehnle // D E
6259041687SNicolai Haehnle // \ /
6359041687SNicolai Haehnle // F
6456db1c07SSimon Moll //
6559041687SNicolai Haehnle // Assume that A contains a divergent branch. We are interested
6659041687SNicolai Haehnle // in the set of all blocks where each block is reachable from A
6759041687SNicolai Haehnle // via two disjoint paths. This would be the set {D, F} in this
6859041687SNicolai Haehnle // case.
6959041687SNicolai Haehnle // To generally reduce this query to SSA construction we introduce
7059041687SNicolai Haehnle // a virtual variable x and assign to x different values in each
7159041687SNicolai Haehnle // successor block of A.
7256db1c07SSimon Moll //
7359041687SNicolai Haehnle // entry
7459041687SNicolai Haehnle // / \
7559041687SNicolai Haehnle // A \
7659041687SNicolai Haehnle // / \ Y
7759041687SNicolai Haehnle // x = 0 x = 1 /
7859041687SNicolai Haehnle // \ / \ /
7959041687SNicolai Haehnle // D E
8059041687SNicolai Haehnle // \ /
8159041687SNicolai Haehnle // F
8256db1c07SSimon Moll //
8359041687SNicolai Haehnle // Our flavor of SSA construction for x will construct the following
8456db1c07SSimon Moll //
8559041687SNicolai Haehnle // entry
8659041687SNicolai Haehnle // / \
8759041687SNicolai Haehnle // A \
8859041687SNicolai Haehnle // / \ Y
8959041687SNicolai Haehnle // x0 = 0 x1 = 1 /
9059041687SNicolai Haehnle // \ / \ /
9159041687SNicolai Haehnle // x2 = phi E
9259041687SNicolai Haehnle // \ /
9359041687SNicolai Haehnle // x3 = phi
9456db1c07SSimon Moll //
9559041687SNicolai Haehnle // The blocks D and F contain phi nodes and are thus each reachable
9659041687SNicolai Haehnle // by two disjoins paths from A.
9759041687SNicolai Haehnle //
9859041687SNicolai Haehnle // -- Remarks --
9956db1c07SSimon Moll // * In case of loop exits we need to check the disjoint path criterion for loops.
10056db1c07SSimon Moll // To this end, we check whether the definition of x differs between the
10159041687SNicolai Haehnle // loop exit and the loop header (_after_ SSA construction).
10259041687SNicolai Haehnle //
10356db1c07SSimon Moll // -- Known Limitations & Future Work --
10456db1c07SSimon Moll // * The algorithm requires reducible loops because the implementation
10556db1c07SSimon Moll // implicitly performs a single iteration of the underlying data flow analysis.
10656db1c07SSimon Moll // This was done for pragmatism, simplicity and speed.
10756db1c07SSimon Moll //
10856db1c07SSimon Moll // Relevant related work for extending the algorithm to irreducible control:
10956db1c07SSimon Moll // A simple algorithm for global data flow analysis problems.
11056db1c07SSimon Moll // Matthew S. Hecht and Jeffrey D. Ullman.
11156db1c07SSimon Moll // SIAM Journal on Computing, 4(4):519–532, December 1975.
11256db1c07SSimon Moll //
11356db1c07SSimon Moll // * Another reason for requiring reducible loops is that points of
11456db1c07SSimon Moll // synchronization in irreducible loops aren't 'obvious' - there is no unique
11556db1c07SSimon Moll // header where threads 'should' synchronize when entering or coming back
11656db1c07SSimon Moll // around from the latch.
11756db1c07SSimon Moll //
11859041687SNicolai Haehnle //===----------------------------------------------------------------------===//
119c6f0940dSBill Wendling
120c48b06c4SSimon Moll #include "llvm/Analysis/SyncDependenceAnalysis.h"
12159041687SNicolai Haehnle #include "llvm/ADT/SmallPtrSet.h"
122c6f0940dSBill Wendling #include "llvm/Analysis/LoopInfo.h"
12359041687SNicolai Haehnle #include "llvm/IR/BasicBlock.h"
12459041687SNicolai Haehnle #include "llvm/IR/CFG.h"
12559041687SNicolai Haehnle #include "llvm/IR/Dominators.h"
12659041687SNicolai Haehnle #include "llvm/IR/Function.h"
12759041687SNicolai Haehnle
12805ae04c3SSimon Moll #include <functional>
12959041687SNicolai Haehnle
13059041687SNicolai Haehnle #define DEBUG_TYPE "sync-dependence"
13159041687SNicolai Haehnle
13205ae04c3SSimon Moll // The SDA algorithm operates on a modified CFG - we modify the edges leaving
13305ae04c3SSimon Moll // loop headers as follows:
13405ae04c3SSimon Moll //
13505ae04c3SSimon Moll // * We remove all edges leaving all loop headers.
13605ae04c3SSimon Moll // * We add additional edges from the loop headers to their exit blocks.
13705ae04c3SSimon Moll //
13805ae04c3SSimon Moll // The modification is virtual, that is whenever we visit a loop header we
13905ae04c3SSimon Moll // pretend it had different successors.
14005ae04c3SSimon Moll namespace {
14105ae04c3SSimon Moll using namespace llvm;
14205ae04c3SSimon Moll
14305ae04c3SSimon Moll // Custom Post-Order Traveral
14405ae04c3SSimon Moll //
14505ae04c3SSimon Moll // We cannot use the vanilla (R)PO computation of LLVM because:
14605ae04c3SSimon Moll // * We (virtually) modify the CFG.
14756db1c07SSimon Moll // * We want a loop-compact block enumeration, that is the numbers assigned to
14856db1c07SSimon Moll // blocks of a loop form an interval
14956db1c07SSimon Moll //
15005ae04c3SSimon Moll using POCB = std::function<void(const BasicBlock &)>;
15105ae04c3SSimon Moll using VisitedSet = std::set<const BasicBlock *>;
15205ae04c3SSimon Moll using BlockStack = std::vector<const BasicBlock *>;
15305ae04c3SSimon Moll
15405ae04c3SSimon Moll // forward
15505ae04c3SSimon Moll static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
15605ae04c3SSimon Moll VisitedSet &Finalized);
15705ae04c3SSimon Moll
15805ae04c3SSimon Moll // for a nested region (top-level loop or nested loop)
computeStackPO(BlockStack & Stack,const LoopInfo & LI,Loop * Loop,POCB CallBack,VisitedSet & Finalized)15905ae04c3SSimon Moll static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
16005ae04c3SSimon Moll POCB CallBack, VisitedSet &Finalized) {
16105ae04c3SSimon Moll const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
16205ae04c3SSimon Moll while (!Stack.empty()) {
16305ae04c3SSimon Moll const auto *NextBB = Stack.back();
16405ae04c3SSimon Moll
16505ae04c3SSimon Moll auto *NestedLoop = LI.getLoopFor(NextBB);
16605ae04c3SSimon Moll bool IsNestedLoop = NestedLoop != Loop;
16705ae04c3SSimon Moll
16805ae04c3SSimon Moll // Treat the loop as a node
16905ae04c3SSimon Moll if (IsNestedLoop) {
17005ae04c3SSimon Moll SmallVector<BasicBlock *, 3> NestedExits;
17105ae04c3SSimon Moll NestedLoop->getUniqueExitBlocks(NestedExits);
17205ae04c3SSimon Moll bool PushedNodes = false;
17305ae04c3SSimon Moll for (const auto *NestedExitBB : NestedExits) {
17405ae04c3SSimon Moll if (NestedExitBB == LoopHeader)
17505ae04c3SSimon Moll continue;
17605ae04c3SSimon Moll if (Loop && !Loop->contains(NestedExitBB))
17705ae04c3SSimon Moll continue;
17805ae04c3SSimon Moll if (Finalized.count(NestedExitBB))
17905ae04c3SSimon Moll continue;
18005ae04c3SSimon Moll PushedNodes = true;
18105ae04c3SSimon Moll Stack.push_back(NestedExitBB);
18205ae04c3SSimon Moll }
18305ae04c3SSimon Moll if (!PushedNodes) {
18405ae04c3SSimon Moll // All loop exits finalized -> finish this node
18505ae04c3SSimon Moll Stack.pop_back();
18605ae04c3SSimon Moll computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
18705ae04c3SSimon Moll }
18805ae04c3SSimon Moll continue;
18905ae04c3SSimon Moll }
19005ae04c3SSimon Moll
19105ae04c3SSimon Moll // DAG-style
19205ae04c3SSimon Moll bool PushedNodes = false;
19305ae04c3SSimon Moll for (const auto *SuccBB : successors(NextBB)) {
19405ae04c3SSimon Moll if (SuccBB == LoopHeader)
19505ae04c3SSimon Moll continue;
19605ae04c3SSimon Moll if (Loop && !Loop->contains(SuccBB))
19705ae04c3SSimon Moll continue;
19805ae04c3SSimon Moll if (Finalized.count(SuccBB))
19905ae04c3SSimon Moll continue;
20005ae04c3SSimon Moll PushedNodes = true;
20105ae04c3SSimon Moll Stack.push_back(SuccBB);
20205ae04c3SSimon Moll }
20305ae04c3SSimon Moll if (!PushedNodes) {
20405ae04c3SSimon Moll // Never push nodes twice
20505ae04c3SSimon Moll Stack.pop_back();
20605ae04c3SSimon Moll if (!Finalized.insert(NextBB).second)
20705ae04c3SSimon Moll continue;
20805ae04c3SSimon Moll CallBack(*NextBB);
20905ae04c3SSimon Moll }
21005ae04c3SSimon Moll }
21105ae04c3SSimon Moll }
21205ae04c3SSimon Moll
computeTopLevelPO(Function & F,const LoopInfo & LI,POCB CallBack)21305ae04c3SSimon Moll static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
21405ae04c3SSimon Moll VisitedSet Finalized;
21505ae04c3SSimon Moll BlockStack Stack;
21605ae04c3SSimon Moll Stack.reserve(24); // FIXME made-up number
21705ae04c3SSimon Moll Stack.push_back(&F.getEntryBlock());
21805ae04c3SSimon Moll computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
21905ae04c3SSimon Moll }
22005ae04c3SSimon Moll
computeLoopPO(const LoopInfo & LI,Loop & Loop,POCB CallBack,VisitedSet & Finalized)22105ae04c3SSimon Moll static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
22205ae04c3SSimon Moll VisitedSet &Finalized) {
22305ae04c3SSimon Moll /// Call CallBack on all loop blocks.
22405ae04c3SSimon Moll std::vector<const BasicBlock *> Stack;
22505ae04c3SSimon Moll const auto *LoopHeader = Loop.getHeader();
22605ae04c3SSimon Moll
22705ae04c3SSimon Moll // Visit the header last
22805ae04c3SSimon Moll Finalized.insert(LoopHeader);
22905ae04c3SSimon Moll CallBack(*LoopHeader);
23005ae04c3SSimon Moll
23105ae04c3SSimon Moll // Initialize with immediate successors
23205ae04c3SSimon Moll for (const auto *BB : successors(LoopHeader)) {
23305ae04c3SSimon Moll if (!Loop.contains(BB))
23405ae04c3SSimon Moll continue;
23505ae04c3SSimon Moll if (BB == LoopHeader)
23605ae04c3SSimon Moll continue;
23705ae04c3SSimon Moll Stack.push_back(BB);
23805ae04c3SSimon Moll }
23905ae04c3SSimon Moll
24005ae04c3SSimon Moll // Compute PO inside region
24105ae04c3SSimon Moll computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
24205ae04c3SSimon Moll }
24305ae04c3SSimon Moll
24405ae04c3SSimon Moll } // namespace
24505ae04c3SSimon Moll
24659041687SNicolai Haehnle namespace llvm {
24759041687SNicolai Haehnle
24805ae04c3SSimon Moll ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
24959041687SNicolai Haehnle
SyncDependenceAnalysis(const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI)25059041687SNicolai Haehnle SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
25159041687SNicolai Haehnle const PostDominatorTree &PDT,
25259041687SNicolai Haehnle const LoopInfo &LI)
25305ae04c3SSimon Moll : DT(DT), PDT(PDT), LI(LI) {
25405ae04c3SSimon Moll computeTopLevelPO(*DT.getRoot()->getParent(), LI,
25505ae04c3SSimon Moll [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
25605ae04c3SSimon Moll }
25759041687SNicolai Haehnle
258*3a3cb929SKazu Hirata SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
25959041687SNicolai Haehnle
26059041687SNicolai Haehnle // divergence propagator for reducible CFGs
26159041687SNicolai Haehnle struct DivergencePropagator {
26205ae04c3SSimon Moll const ModifiedPO &LoopPOT;
26359041687SNicolai Haehnle const DominatorTree &DT;
26459041687SNicolai Haehnle const PostDominatorTree &PDT;
26559041687SNicolai Haehnle const LoopInfo &LI;
26605ae04c3SSimon Moll const BasicBlock &DivTermBlock;
26759041687SNicolai Haehnle
26805ae04c3SSimon Moll // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
26905ae04c3SSimon Moll // block B
27005ae04c3SSimon Moll // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
27105ae04c3SSimon Moll // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
27205ae04c3SSimon Moll // from X or B is an immediate successor of X (initial value).
27305ae04c3SSimon Moll using BlockLabelVec = std::vector<const BasicBlock *>;
27405ae04c3SSimon Moll BlockLabelVec BlockLabels;
27505ae04c3SSimon Moll // divergent join and loop exit descriptor.
27605ae04c3SSimon Moll std::unique_ptr<ControlDivergenceDesc> DivDesc;
27759041687SNicolai Haehnle
DivergencePropagatorllvm::DivergencePropagator27805ae04c3SSimon Moll DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
27905ae04c3SSimon Moll const PostDominatorTree &PDT, const LoopInfo &LI,
28005ae04c3SSimon Moll const BasicBlock &DivTermBlock)
28105ae04c3SSimon Moll : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
28205ae04c3SSimon Moll BlockLabels(LoopPOT.size(), nullptr),
28305ae04c3SSimon Moll DivDesc(new ControlDivergenceDesc) {}
28459041687SNicolai Haehnle
printDefsllvm::DivergencePropagator28559041687SNicolai Haehnle void printDefs(raw_ostream &Out) {
28605ae04c3SSimon Moll Out << "Propagator::BlockLabels {\n";
28705ae04c3SSimon Moll for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
28805ae04c3SSimon Moll const auto *Label = BlockLabels[BlockIdx];
28905ae04c3SSimon Moll Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
29005ae04c3SSimon Moll << ") : ";
29105ae04c3SSimon Moll if (!Label) {
29205ae04c3SSimon Moll Out << "<null>\n";
29359041687SNicolai Haehnle } else {
29405ae04c3SSimon Moll Out << Label->getName() << "\n";
29559041687SNicolai Haehnle }
29659041687SNicolai Haehnle }
29759041687SNicolai Haehnle Out << "}\n";
29859041687SNicolai Haehnle }
29959041687SNicolai Haehnle
30005ae04c3SSimon Moll // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
30105ae04c3SSimon Moll // causes a divergent join.
computeJoinllvm::DivergencePropagator30205ae04c3SSimon Moll bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
30305ae04c3SSimon Moll auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
30459041687SNicolai Haehnle
30505ae04c3SSimon Moll // unset or same reaching label
30605ae04c3SSimon Moll const auto *OldLabel = BlockLabels[SuccIdx];
30705ae04c3SSimon Moll if (!OldLabel || (OldLabel == &PushedLabel)) {
30805ae04c3SSimon Moll BlockLabels[SuccIdx] = &PushedLabel;
30905ae04c3SSimon Moll return false;
31059041687SNicolai Haehnle }
31159041687SNicolai Haehnle
31205ae04c3SSimon Moll // Update the definition
31305ae04c3SSimon Moll BlockLabels[SuccIdx] = &SuccBlock;
31405ae04c3SSimon Moll return true;
31559041687SNicolai Haehnle }
31659041687SNicolai Haehnle
31705ae04c3SSimon Moll // visiting a virtual loop exit edge from the loop header --> temporal
31805ae04c3SSimon Moll // divergence on join
visitLoopExitEdgellvm::DivergencePropagator31905ae04c3SSimon Moll bool visitLoopExitEdge(const BasicBlock &ExitBlock,
32005ae04c3SSimon Moll const BasicBlock &DefBlock, bool FromParentLoop) {
32105ae04c3SSimon Moll // Pushing from a non-parent loop cannot cause temporal divergence.
32205ae04c3SSimon Moll if (!FromParentLoop)
32305ae04c3SSimon Moll return visitEdge(ExitBlock, DefBlock);
32459041687SNicolai Haehnle
32505ae04c3SSimon Moll if (!computeJoin(ExitBlock, DefBlock))
32605ae04c3SSimon Moll return false;
32705ae04c3SSimon Moll
32805ae04c3SSimon Moll // Identified a divergent loop exit
32905ae04c3SSimon Moll DivDesc->LoopDivBlocks.insert(&ExitBlock);
33005ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
33105ae04c3SSimon Moll << "\n");
33205ae04c3SSimon Moll return true;
33359041687SNicolai Haehnle }
33459041687SNicolai Haehnle
33505ae04c3SSimon Moll // process \p SuccBlock with reaching definition \p DefBlock
visitEdgellvm::DivergencePropagator33605ae04c3SSimon Moll bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
33705ae04c3SSimon Moll if (!computeJoin(SuccBlock, DefBlock))
33805ae04c3SSimon Moll return false;
33959041687SNicolai Haehnle
34005ae04c3SSimon Moll // Divergent, disjoint paths join.
34105ae04c3SSimon Moll DivDesc->JoinDivBlocks.insert(&SuccBlock);
34205ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
34305ae04c3SSimon Moll return true;
34405ae04c3SSimon Moll }
34505ae04c3SSimon Moll
computeJoinPointsllvm::DivergencePropagator34605ae04c3SSimon Moll std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
34705ae04c3SSimon Moll assert(DivDesc);
34805ae04c3SSimon Moll
34905ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
350c48b06c4SSimon Moll << "\n");
351c92e51d8SJay Foad
35205ae04c3SSimon Moll const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
35305ae04c3SSimon Moll
35405ae04c3SSimon Moll // Early stopping criterion
35505ae04c3SSimon Moll int FloorIdx = LoopPOT.size() - 1;
35605ae04c3SSimon Moll const BasicBlock *FloorLabel = nullptr;
35705ae04c3SSimon Moll
35859041687SNicolai Haehnle // bootstrap with branch targets
35905ae04c3SSimon Moll int BlockIdx = 0;
36059041687SNicolai Haehnle
36105ae04c3SSimon Moll for (const auto *SuccBlock : successors(&DivTermBlock)) {
36205ae04c3SSimon Moll auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
36305ae04c3SSimon Moll BlockLabels[SuccIdx] = SuccBlock;
36459041687SNicolai Haehnle
36505ae04c3SSimon Moll // Find the successor with the highest index to start with
36605ae04c3SSimon Moll BlockIdx = std::max<int>(BlockIdx, SuccIdx);
36705ae04c3SSimon Moll FloorIdx = std::min<int>(FloorIdx, SuccIdx);
368c92e51d8SJay Foad
36905ae04c3SSimon Moll // Identify immediate divergent loop exits
37005ae04c3SSimon Moll if (!DivBlockLoop)
37105ae04c3SSimon Moll continue;
37259041687SNicolai Haehnle
37305ae04c3SSimon Moll const auto *BlockLoop = LI.getLoopFor(SuccBlock);
37405ae04c3SSimon Moll if (BlockLoop && DivBlockLoop->contains(BlockLoop))
37505ae04c3SSimon Moll continue;
37605ae04c3SSimon Moll DivDesc->LoopDivBlocks.insert(SuccBlock);
37705ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
37805ae04c3SSimon Moll << SuccBlock->getName() << "\n");
379fe5f233aSWeverything }
38059041687SNicolai Haehnle
38159041687SNicolai Haehnle // propagate definitions at the immediate successors of the node in RPO
38205ae04c3SSimon Moll for (; BlockIdx >= FloorIdx; --BlockIdx) {
38305ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
38405ae04c3SSimon Moll
38505ae04c3SSimon Moll // Any label available here
38605ae04c3SSimon Moll const auto *Label = BlockLabels[BlockIdx];
38705ae04c3SSimon Moll if (!Label)
38805ae04c3SSimon Moll continue;
38905ae04c3SSimon Moll
39005ae04c3SSimon Moll // Ok. Get the block
39105ae04c3SSimon Moll const auto *Block = LoopPOT.getBlockAt(BlockIdx);
392c92e51d8SJay Foad LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
39359041687SNicolai Haehnle
39459041687SNicolai Haehnle auto *BlockLoop = LI.getLoopFor(Block);
39505ae04c3SSimon Moll bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
39605ae04c3SSimon Moll bool CausedJoin = false;
39705ae04c3SSimon Moll int LoweredFloorIdx = FloorIdx;
39805ae04c3SSimon Moll if (IsLoopHeader) {
39905ae04c3SSimon Moll // Disconnect from immediate successors and propagate directly to loop
40005ae04c3SSimon Moll // exits.
40159041687SNicolai Haehnle SmallVector<BasicBlock *, 4> BlockLoopExits;
40259041687SNicolai Haehnle BlockLoop->getExitBlocks(BlockLoopExits);
40305ae04c3SSimon Moll
40405ae04c3SSimon Moll bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
40559041687SNicolai Haehnle for (const auto *BlockLoopExit : BlockLoopExits) {
40605ae04c3SSimon Moll CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
40705ae04c3SSimon Moll LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
40805ae04c3SSimon Moll LoopPOT.getIndexOf(*BlockLoopExit));
40905ae04c3SSimon Moll }
41005ae04c3SSimon Moll } else {
41105ae04c3SSimon Moll // Acyclic successor case
41205ae04c3SSimon Moll for (const auto *SuccBlock : successors(Block)) {
41305ae04c3SSimon Moll CausedJoin |= visitEdge(*SuccBlock, *Label);
41405ae04c3SSimon Moll LoweredFloorIdx =
41505ae04c3SSimon Moll std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
41605ae04c3SSimon Moll }
41759041687SNicolai Haehnle }
41859041687SNicolai Haehnle
41905ae04c3SSimon Moll // Floor update
42005ae04c3SSimon Moll if (CausedJoin) {
42105ae04c3SSimon Moll // 1. Different labels pushed to successors
42205ae04c3SSimon Moll FloorIdx = LoweredFloorIdx;
42305ae04c3SSimon Moll } else if (FloorLabel != Label) {
42405ae04c3SSimon Moll // 2. No join caused BUT we pushed a label that is different than the
42505ae04c3SSimon Moll // last pushed label
42605ae04c3SSimon Moll FloorIdx = LoweredFloorIdx;
42705ae04c3SSimon Moll FloorLabel = Label;
42859041687SNicolai Haehnle }
42959041687SNicolai Haehnle }
43059041687SNicolai Haehnle
431c92e51d8SJay Foad LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
432c92e51d8SJay Foad
43305ae04c3SSimon Moll return std::move(DivDesc);
43459041687SNicolai Haehnle }
43559041687SNicolai Haehnle };
43659041687SNicolai Haehnle
437c36d441bSFangrui Song #ifndef NDEBUG
printBlockSet(ConstBlockSet & Blocks,raw_ostream & Out)43805ae04c3SSimon Moll static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
43905ae04c3SSimon Moll Out << "[";
4406ac12e4bSKazu Hirata ListSeparator LS;
4416ac12e4bSKazu Hirata for (const auto *BB : Blocks)
4426ac12e4bSKazu Hirata Out << LS << BB->getName();
44305ae04c3SSimon Moll Out << "]";
44459041687SNicolai Haehnle }
445c36d441bSFangrui Song #endif
44659041687SNicolai Haehnle
44705ae04c3SSimon Moll const ControlDivergenceDesc &
getJoinBlocks(const Instruction & Term)44805ae04c3SSimon Moll SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
44959041687SNicolai Haehnle // trivial case
45005ae04c3SSimon Moll if (Term.getNumSuccessors() <= 1) {
45105ae04c3SSimon Moll return EmptyDivergenceDesc;
45259041687SNicolai Haehnle }
45359041687SNicolai Haehnle
45459041687SNicolai Haehnle // already available in cache?
45505ae04c3SSimon Moll auto ItCached = CachedControlDivDescs.find(&Term);
45605ae04c3SSimon Moll if (ItCached != CachedControlDivDescs.end())
45759041687SNicolai Haehnle return *ItCached->second;
45859041687SNicolai Haehnle
45959041687SNicolai Haehnle // compute all join points
46005ae04c3SSimon Moll // Special handling of divergent loop exits is not needed for LCSSA
46159041687SNicolai Haehnle const auto &TermBlock = *Term.getParent();
46205ae04c3SSimon Moll DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
46305ae04c3SSimon Moll auto DivDesc = Propagator.computeJoinPoints();
46459041687SNicolai Haehnle
46505ae04c3SSimon Moll LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
46605ae04c3SSimon Moll dbgs() << "JoinDivBlocks: ";
46705ae04c3SSimon Moll printBlockSet(DivDesc->JoinDivBlocks, dbgs());
46805ae04c3SSimon Moll dbgs() << "\nLoopDivBlocks: ";
46905ae04c3SSimon Moll printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
47005ae04c3SSimon Moll
47105ae04c3SSimon Moll auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
47259041687SNicolai Haehnle assert(ItInserted.second);
47359041687SNicolai Haehnle return *ItInserted.first->second;
47459041687SNicolai Haehnle }
47559041687SNicolai Haehnle
47659041687SNicolai Haehnle } // namespace llvm
477