105ae04c3SSimon Moll //===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==//
259041687SNicolai Haehnle //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659041687SNicolai Haehnle //
759041687SNicolai Haehnle //===----------------------------------------------------------------------===//
859041687SNicolai Haehnle //
959041687SNicolai Haehnle // This file implements a general divergence analysis for loop vectorization
1059041687SNicolai Haehnle // and GPU programs. It determines which branches and values in a loop or GPU
1159041687SNicolai Haehnle // program are divergent. It can help branch optimizations such as jump
1259041687SNicolai Haehnle // threading and loop unswitching to make better decisions.
1359041687SNicolai Haehnle //
1459041687SNicolai Haehnle // GPU programs typically use the SIMD execution model, where multiple threads
1559041687SNicolai Haehnle // in the same execution group have to execute in lock-step. Therefore, if the
1659041687SNicolai Haehnle // code contains divergent branches (i.e., threads in a group do not agree on
1759041687SNicolai Haehnle // which path of the branch to take), the group of threads has to execute all
1859041687SNicolai Haehnle // the paths from that branch with different subsets of threads enabled until
1959041687SNicolai Haehnle // they re-converge.
2059041687SNicolai Haehnle //
2159041687SNicolai Haehnle // Due to this execution model, some optimizations such as jump
2259041687SNicolai Haehnle // threading and loop unswitching can interfere with thread re-convergence.
2359041687SNicolai Haehnle // Therefore, an analysis that computes which branches in a GPU program are
2459041687SNicolai Haehnle // divergent can help the compiler to selectively run these optimizations.
2559041687SNicolai Haehnle //
2659041687SNicolai Haehnle // This implementation is derived from the Vectorization Analysis of the
2756db1c07SSimon Moll // Region Vectorizer (RV). The analysis is based on the approach described in
2859041687SNicolai Haehnle //
2956db1c07SSimon Moll //   An abstract interpretation for SPMD divergence
3056db1c07SSimon Moll //       on reducible control flow graphs.
3156db1c07SSimon Moll //   Julian Rosemann, Simon Moll and Sebastian Hack
3256db1c07SSimon Moll //   POPL '21
3359041687SNicolai Haehnle //
3411bf7da6SSameer Sahasrabuddhe // This implementation is generic in the sense that it does
3559041687SNicolai Haehnle // not itself identify original sources of divergence.
3659041687SNicolai Haehnle // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
3711bf7da6SSameer Sahasrabuddhe // (DivergenceAnalysis) for functions, identify the sources of divergence
3859041687SNicolai Haehnle // (e.g., special variables that hold the thread ID or the iteration variable).
3959041687SNicolai Haehnle //
4059041687SNicolai Haehnle // The generic implementation propagates divergence to variables that are data
4159041687SNicolai Haehnle // or sync dependent on a source of divergence.
4259041687SNicolai Haehnle //
4359041687SNicolai Haehnle // While data dependency is a well-known concept, the notion of sync dependency
4459041687SNicolai Haehnle // is worth more explanation. Sync dependence characterizes the control flow
4559041687SNicolai Haehnle // aspect of the propagation of branch divergence. For example,
4659041687SNicolai Haehnle //
4759041687SNicolai Haehnle //   %cond = icmp slt i32 %tid, 10
4859041687SNicolai Haehnle //   br i1 %cond, label %then, label %else
4959041687SNicolai Haehnle // then:
5059041687SNicolai Haehnle //   br label %merge
5159041687SNicolai Haehnle // else:
5259041687SNicolai Haehnle //   br label %merge
5359041687SNicolai Haehnle // merge:
5459041687SNicolai Haehnle //   %a = phi i32 [ 0, %then ], [ 1, %else ]
5559041687SNicolai Haehnle //
5659041687SNicolai Haehnle // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
5759041687SNicolai Haehnle // because %tid is not on its use-def chains, %a is sync dependent on %tid
5859041687SNicolai Haehnle // because the branch "br i1 %cond" depends on %tid and affects which value %a
5959041687SNicolai Haehnle // is assigned to.
6059041687SNicolai Haehnle //
6159041687SNicolai Haehnle // The sync dependence detection (which branch induces divergence in which join
6259041687SNicolai Haehnle // points) is implemented in the SyncDependenceAnalysis.
6359041687SNicolai Haehnle //
6411bf7da6SSameer Sahasrabuddhe // The current implementation has the following limitations:
6559041687SNicolai Haehnle // 1. intra-procedural. It conservatively considers the arguments of a
6659041687SNicolai Haehnle //    non-kernel-entry function and the return value of a function call as
6759041687SNicolai Haehnle //    divergent.
6859041687SNicolai Haehnle // 2. memory as black box. It conservatively considers values loaded from
6959041687SNicolai Haehnle //    generic or local address as divergent. This can be improved by leveraging
7059041687SNicolai Haehnle //    pointer analysis and/or by modelling non-escaping memory objects in SSA
7159041687SNicolai Haehnle //    as done in RV.
7259041687SNicolai Haehnle //
7359041687SNicolai Haehnle //===----------------------------------------------------------------------===//
7459041687SNicolai Haehnle 
7559041687SNicolai Haehnle #include "llvm/Analysis/DivergenceAnalysis.h"
7671c3a551Sserge-sans-paille #include "llvm/ADT/PostOrderIterator.h"
7711bf7da6SSameer Sahasrabuddhe #include "llvm/Analysis/CFG.h"
7859041687SNicolai Haehnle #include "llvm/Analysis/LoopInfo.h"
7959041687SNicolai Haehnle #include "llvm/Analysis/PostDominators.h"
8059041687SNicolai Haehnle #include "llvm/Analysis/TargetTransformInfo.h"
8159041687SNicolai Haehnle #include "llvm/IR/Dominators.h"
8259041687SNicolai Haehnle #include "llvm/IR/InstIterator.h"
8359041687SNicolai Haehnle #include "llvm/IR/Instructions.h"
8459041687SNicolai Haehnle #include "llvm/IR/Value.h"
8559041687SNicolai Haehnle #include "llvm/Support/Debug.h"
8659041687SNicolai Haehnle #include "llvm/Support/raw_ostream.h"
8759041687SNicolai Haehnle 
8859041687SNicolai Haehnle using namespace llvm;
8959041687SNicolai Haehnle 
9011bf7da6SSameer Sahasrabuddhe #define DEBUG_TYPE "divergence"
9159041687SNicolai Haehnle 
DivergenceAnalysisImpl(const Function & F,const Loop * RegionLoop,const DominatorTree & DT,const LoopInfo & LI,SyncDependenceAnalysis & SDA,bool IsLCSSAForm)9211bf7da6SSameer Sahasrabuddhe DivergenceAnalysisImpl::DivergenceAnalysisImpl(
9359041687SNicolai Haehnle     const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
9459041687SNicolai Haehnle     const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
9559041687SNicolai Haehnle     : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
9659041687SNicolai Haehnle       IsLCSSAForm(IsLCSSAForm) {}
9759041687SNicolai Haehnle 
markDivergent(const Value & DivVal)9811bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
9905ae04c3SSimon Moll   if (isAlwaysUniform(DivVal))
10005ae04c3SSimon Moll     return false;
10159041687SNicolai Haehnle   assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
10259041687SNicolai Haehnle   assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
10305ae04c3SSimon Moll   return DivergentValues.insert(&DivVal).second;
10459041687SNicolai Haehnle }
10559041687SNicolai Haehnle 
addUniformOverride(const Value & UniVal)10611bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
10759041687SNicolai Haehnle   UniformOverrides.insert(&UniVal);
10859041687SNicolai Haehnle }
10959041687SNicolai Haehnle 
isTemporalDivergent(const BasicBlock & ObservingBlock,const Value & Val) const11011bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::isTemporalDivergent(
11111bf7da6SSameer Sahasrabuddhe     const BasicBlock &ObservingBlock, const Value &Val) const {
11259041687SNicolai Haehnle   const auto *Inst = dyn_cast<const Instruction>(&Val);
11359041687SNicolai Haehnle   if (!Inst)
11459041687SNicolai Haehnle     return false;
11559041687SNicolai Haehnle   // check whether any divergent loop carrying Val terminates before control
11659041687SNicolai Haehnle   // proceeds to ObservingBlock
11759041687SNicolai Haehnle   for (const auto *Loop = LI.getLoopFor(Inst->getParent());
11859041687SNicolai Haehnle        Loop != RegionLoop && !Loop->contains(&ObservingBlock);
11959041687SNicolai Haehnle        Loop = Loop->getParentLoop()) {
120805d5959SKazu Hirata     if (DivergentLoops.contains(Loop))
12159041687SNicolai Haehnle       return true;
12259041687SNicolai Haehnle   }
12359041687SNicolai Haehnle 
12459041687SNicolai Haehnle   return false;
12559041687SNicolai Haehnle }
12659041687SNicolai Haehnle 
inRegion(const Instruction & I) const12711bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
12859041687SNicolai Haehnle   return I.getParent() && inRegion(*I.getParent());
12959041687SNicolai Haehnle }
13059041687SNicolai Haehnle 
inRegion(const BasicBlock & BB) const13111bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
132be7dbd67SSimon Pilgrim   return RegionLoop ? RegionLoop->contains(&BB) : (BB.getParent() == &F);
13359041687SNicolai Haehnle }
13459041687SNicolai Haehnle 
pushUsers(const Value & V)13511bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::pushUsers(const Value &V) {
13605ae04c3SSimon Moll   const auto *I = dyn_cast<const Instruction>(&V);
13705ae04c3SSimon Moll 
13805ae04c3SSimon Moll   if (I && I->isTerminator()) {
13905ae04c3SSimon Moll     analyzeControlDivergence(*I);
14005ae04c3SSimon Moll     return;
14105ae04c3SSimon Moll   }
14205ae04c3SSimon Moll 
14305ae04c3SSimon Moll   for (const auto *User : V.users()) {
14405ae04c3SSimon Moll     const auto *UserInst = dyn_cast<const Instruction>(User);
14505ae04c3SSimon Moll     if (!UserInst)
14605ae04c3SSimon Moll       continue;
14705ae04c3SSimon Moll 
14805ae04c3SSimon Moll     // only compute divergent inside loop
14905ae04c3SSimon Moll     if (!inRegion(*UserInst))
15005ae04c3SSimon Moll       continue;
15105ae04c3SSimon Moll 
15205ae04c3SSimon Moll     // All users of divergent values are immediate divergent
15305ae04c3SSimon Moll     if (markDivergent(*UserInst))
15405ae04c3SSimon Moll       Worklist.push_back(UserInst);
15505ae04c3SSimon Moll   }
15605ae04c3SSimon Moll }
15705ae04c3SSimon Moll 
getIfCarriedInstruction(const Use & U,const Loop & DivLoop)15805ae04c3SSimon Moll static const Instruction *getIfCarriedInstruction(const Use &U,
15905ae04c3SSimon Moll                                                   const Loop &DivLoop) {
16005ae04c3SSimon Moll   const auto *I = dyn_cast<const Instruction>(&U);
16105ae04c3SSimon Moll   if (!I)
16205ae04c3SSimon Moll     return nullptr;
16305ae04c3SSimon Moll   if (!DivLoop.contains(I))
16405ae04c3SSimon Moll     return nullptr;
16505ae04c3SSimon Moll   return I;
16605ae04c3SSimon Moll }
16705ae04c3SSimon Moll 
analyzeTemporalDivergence(const Instruction & I,const Loop & OuterDivLoop)16811bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::analyzeTemporalDivergence(
16911bf7da6SSameer Sahasrabuddhe     const Instruction &I, const Loop &OuterDivLoop) {
17005ae04c3SSimon Moll   if (isAlwaysUniform(I))
17105ae04c3SSimon Moll     return;
17205ae04c3SSimon Moll   if (isDivergent(I))
17305ae04c3SSimon Moll     return;
17405ae04c3SSimon Moll 
17505ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n");
17605ae04c3SSimon Moll   assert((isa<PHINode>(I) || !IsLCSSAForm) &&
17705ae04c3SSimon Moll          "In LCSSA form all users of loop-exiting defs are Phi nodes.");
17805ae04c3SSimon Moll   for (const Use &Op : I.operands()) {
17905ae04c3SSimon Moll     const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop);
180d3963b3aSSameer Sahasrabuddhe     if (!OpInst)
181d3963b3aSSameer Sahasrabuddhe       continue;
18205ae04c3SSimon Moll     if (markDivergent(I))
18305ae04c3SSimon Moll       pushUsers(I);
18405ae04c3SSimon Moll     return;
185d3963b3aSSameer Sahasrabuddhe   }
186d3963b3aSSameer Sahasrabuddhe }
187d3963b3aSSameer Sahasrabuddhe 
18859041687SNicolai Haehnle // marks all users of loop-carried values of the loop headed by LoopHeader as
18959041687SNicolai Haehnle // divergent
analyzeLoopExitDivergence(const BasicBlock & DivExit,const Loop & OuterDivLoop)19011bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
19111bf7da6SSameer Sahasrabuddhe     const BasicBlock &DivExit, const Loop &OuterDivLoop) {
19205ae04c3SSimon Moll   // All users are in immediate exit blocks
19305ae04c3SSimon Moll   if (IsLCSSAForm) {
19405ae04c3SSimon Moll     for (const auto &Phi : DivExit.phis()) {
19505ae04c3SSimon Moll       analyzeTemporalDivergence(Phi, OuterDivLoop);
19605ae04c3SSimon Moll     }
19705ae04c3SSimon Moll     return;
19805ae04c3SSimon Moll   }
19959041687SNicolai Haehnle 
20005ae04c3SSimon Moll   // For non-LCSSA we have to follow all live out edges wherever they may lead.
20105ae04c3SSimon Moll   const BasicBlock &LoopHeader = *OuterDivLoop.getHeader();
20205ae04c3SSimon Moll   SmallVector<const BasicBlock *, 8> TaintStack;
20305ae04c3SSimon Moll   TaintStack.push_back(&DivExit);
20459041687SNicolai Haehnle 
20559041687SNicolai Haehnle   // Otherwise potential users of loop-carried values could be anywhere in the
20659041687SNicolai Haehnle   // dominance region of DivLoop (including its fringes for phi nodes)
20759041687SNicolai Haehnle   DenseSet<const BasicBlock *> Visited;
20805ae04c3SSimon Moll   Visited.insert(&DivExit);
20959041687SNicolai Haehnle 
21005ae04c3SSimon Moll   do {
21116baad8fSKazu Hirata     auto *UserBlock = TaintStack.pop_back_val();
21259041687SNicolai Haehnle 
21359041687SNicolai Haehnle     // don't spread divergence beyond the region
21459041687SNicolai Haehnle     if (!inRegion(*UserBlock))
21559041687SNicolai Haehnle       continue;
21659041687SNicolai Haehnle 
21705ae04c3SSimon Moll     assert(!OuterDivLoop.contains(UserBlock) &&
21859041687SNicolai Haehnle            "irreducible control flow detected");
21959041687SNicolai Haehnle 
22059041687SNicolai Haehnle     // phi nodes at the fringes of the dominance region
22159041687SNicolai Haehnle     if (!DT.dominates(&LoopHeader, UserBlock)) {
22259041687SNicolai Haehnle       // all PHI nodes of UserBlock become divergent
223*601b3a13SKazu Hirata       for (const auto &Phi : UserBlock->phis()) {
22405ae04c3SSimon Moll         analyzeTemporalDivergence(Phi, OuterDivLoop);
22559041687SNicolai Haehnle       }
22659041687SNicolai Haehnle       continue;
22759041687SNicolai Haehnle     }
22859041687SNicolai Haehnle 
22905ae04c3SSimon Moll     // Taint outside users of values carried by OuterDivLoop.
230*601b3a13SKazu Hirata     for (const auto &I : *UserBlock) {
23105ae04c3SSimon Moll       analyzeTemporalDivergence(I, OuterDivLoop);
23259041687SNicolai Haehnle     }
23359041687SNicolai Haehnle 
23459041687SNicolai Haehnle     // visit all blocks in the dominance region
235*601b3a13SKazu Hirata     for (const auto *SuccBlock : successors(UserBlock)) {
23659041687SNicolai Haehnle       if (!Visited.insert(SuccBlock).second) {
23759041687SNicolai Haehnle         continue;
23859041687SNicolai Haehnle       }
23959041687SNicolai Haehnle       TaintStack.push_back(SuccBlock);
24059041687SNicolai Haehnle     }
24105ae04c3SSimon Moll   } while (!TaintStack.empty());
24259041687SNicolai Haehnle }
24359041687SNicolai Haehnle 
propagateLoopExitDivergence(const BasicBlock & DivExit,const Loop & InnerDivLoop)24411bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::propagateLoopExitDivergence(
24511bf7da6SSameer Sahasrabuddhe     const BasicBlock &DivExit, const Loop &InnerDivLoop) {
24605ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
24705ae04c3SSimon Moll 
24805ae04c3SSimon Moll   // Find outer-most loop that does not contain \p DivExit
24905ae04c3SSimon Moll   const Loop *DivLoop = &InnerDivLoop;
25005ae04c3SSimon Moll   const Loop *OuterDivLoop = DivLoop;
25105ae04c3SSimon Moll   const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit);
25205ae04c3SSimon Moll   const unsigned LoopExitDepth =
25305ae04c3SSimon Moll       ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0;
25405ae04c3SSimon Moll   while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) {
25505ae04c3SSimon Moll     DivergentLoops.insert(DivLoop); // all crossed loops are divergent
25605ae04c3SSimon Moll     OuterDivLoop = DivLoop;
25705ae04c3SSimon Moll     DivLoop = DivLoop->getParentLoop();
25805ae04c3SSimon Moll   }
25905ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName()
26005ae04c3SSimon Moll                     << "\n");
26105ae04c3SSimon Moll 
26205ae04c3SSimon Moll   analyzeLoopExitDivergence(DivExit, *OuterDivLoop);
26305ae04c3SSimon Moll }
26405ae04c3SSimon Moll 
26505ae04c3SSimon Moll // this is a divergent join point - mark all phi nodes as divergent and push
26605ae04c3SSimon Moll // them onto the stack.
taintAndPushPhiNodes(const BasicBlock & JoinBlock)26711bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
26805ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
26905ae04c3SSimon Moll                     << "\n");
27005ae04c3SSimon Moll 
27105ae04c3SSimon Moll   // ignore divergence outside the region
27205ae04c3SSimon Moll   if (!inRegion(JoinBlock)) {
27305ae04c3SSimon Moll     return;
27405ae04c3SSimon Moll   }
27505ae04c3SSimon Moll 
27605ae04c3SSimon Moll   // push non-divergent phi nodes in JoinBlock to the worklist
27705ae04c3SSimon Moll   for (const auto &Phi : JoinBlock.phis()) {
27859041687SNicolai Haehnle     if (isDivergent(Phi))
27959041687SNicolai Haehnle       continue;
28005ae04c3SSimon Moll     // FIXME Theoretically ,the 'undef' value could be replaced by any other
28105ae04c3SSimon Moll     // value causing spurious divergence.
28205ae04c3SSimon Moll     if (Phi.hasConstantOrUndefValue())
28305ae04c3SSimon Moll       continue;
28405ae04c3SSimon Moll     if (markDivergent(Phi))
28559041687SNicolai Haehnle       Worklist.push_back(&Phi);
28659041687SNicolai Haehnle   }
28759041687SNicolai Haehnle }
28859041687SNicolai Haehnle 
analyzeControlDivergence(const Instruction & Term)28911bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
29005ae04c3SSimon Moll   LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
29105ae04c3SSimon Moll                     << "\n");
29259041687SNicolai Haehnle 
29337aa16ebSAustin Kerbow   // Don't propagate divergence from unreachable blocks.
29437aa16ebSAustin Kerbow   if (!DT.isReachableFromEntry(Term.getParent()))
29537aa16ebSAustin Kerbow     return;
29637aa16ebSAustin Kerbow 
29759041687SNicolai Haehnle   const auto *BranchLoop = LI.getLoopFor(Term.getParent());
29859041687SNicolai Haehnle 
29905ae04c3SSimon Moll   const auto &DivDesc = SDA.getJoinBlocks(Term);
30059041687SNicolai Haehnle 
30105ae04c3SSimon Moll   // Iterate over all blocks now reachable by a disjoint path join
30205ae04c3SSimon Moll   for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
30305ae04c3SSimon Moll     taintAndPushPhiNodes(*JoinBlock);
30459041687SNicolai Haehnle   }
30559041687SNicolai Haehnle 
30605ae04c3SSimon Moll   assert(DivDesc.LoopDivBlocks.empty() || BranchLoop);
30705ae04c3SSimon Moll   for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) {
30805ae04c3SSimon Moll     propagateLoopExitDivergence(*DivExitBlock, *BranchLoop);
30959041687SNicolai Haehnle   }
31059041687SNicolai Haehnle }
31159041687SNicolai Haehnle 
compute()31211bf7da6SSameer Sahasrabuddhe void DivergenceAnalysisImpl::compute() {
31305ae04c3SSimon Moll   // Initialize worklist.
31405ae04c3SSimon Moll   auto DivValuesCopy = DivergentValues;
31505ae04c3SSimon Moll   for (const auto *DivVal : DivValuesCopy) {
31605ae04c3SSimon Moll     assert(isDivergent(*DivVal) && "Worklist invariant violated!");
31759041687SNicolai Haehnle     pushUsers(*DivVal);
31859041687SNicolai Haehnle   }
31959041687SNicolai Haehnle 
32005ae04c3SSimon Moll   // All values on the Worklist are divergent.
32105ae04c3SSimon Moll   // Their users may not have been updated yed.
32259041687SNicolai Haehnle   while (!Worklist.empty()) {
32359041687SNicolai Haehnle     const Instruction &I = *Worklist.back();
32459041687SNicolai Haehnle     Worklist.pop_back();
32559041687SNicolai Haehnle 
32659041687SNicolai Haehnle     // propagate value divergence to users
32705ae04c3SSimon Moll     assert(isDivergent(I) && "Worklist invariant violated!");
32859041687SNicolai Haehnle     pushUsers(I);
32959041687SNicolai Haehnle   }
33059041687SNicolai Haehnle }
33159041687SNicolai Haehnle 
isAlwaysUniform(const Value & V) const33211bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
333805d5959SKazu Hirata   return UniformOverrides.contains(&V);
33459041687SNicolai Haehnle }
33559041687SNicolai Haehnle 
isDivergent(const Value & V) const33611bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
337805d5959SKazu Hirata   return DivergentValues.contains(&V);
33859041687SNicolai Haehnle }
33959041687SNicolai Haehnle 
isDivergentUse(const Use & U) const34011bf7da6SSameer Sahasrabuddhe bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
341dcb75324SJay Foad   Value &V = *U.get();
342dcb75324SJay Foad   Instruction &I = *cast<Instruction>(U.getUser());
343dcb75324SJay Foad   return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
344dcb75324SJay Foad }
345dcb75324SJay Foad 
DivergenceInfo(Function & F,const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI,const TargetTransformInfo & TTI,bool KnownReducible)34611bf7da6SSameer Sahasrabuddhe DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
34711bf7da6SSameer Sahasrabuddhe                                const PostDominatorTree &PDT, const LoopInfo &LI,
34811bf7da6SSameer Sahasrabuddhe                                const TargetTransformInfo &TTI,
34911bf7da6SSameer Sahasrabuddhe                                bool KnownReducible)
350b752eb88SKazu Hirata     : F(F) {
35111bf7da6SSameer Sahasrabuddhe   if (!KnownReducible) {
35211bf7da6SSameer Sahasrabuddhe     using RPOTraversal = ReversePostOrderTraversal<const Function *>;
35311bf7da6SSameer Sahasrabuddhe     RPOTraversal FuncRPOT(&F);
35411bf7da6SSameer Sahasrabuddhe     if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
35511bf7da6SSameer Sahasrabuddhe                                const LoopInfo>(FuncRPOT, LI)) {
35611bf7da6SSameer Sahasrabuddhe       ContainsIrreducible = true;
35759041687SNicolai Haehnle       return;
35859041687SNicolai Haehnle     }
35959041687SNicolai Haehnle   }
36011bf7da6SSameer Sahasrabuddhe   SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
36111bf7da6SSameer Sahasrabuddhe   DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
36211bf7da6SSameer Sahasrabuddhe                                                 /* LCSSA */ false);
36356d0ed2aSNicolai Haehnle   for (auto &I : instructions(F)) {
36456d0ed2aSNicolai Haehnle     if (TTI.isSourceOfDivergence(&I)) {
36511bf7da6SSameer Sahasrabuddhe       DA->markDivergent(I);
36656d0ed2aSNicolai Haehnle     } else if (TTI.isAlwaysUniform(&I)) {
36711bf7da6SSameer Sahasrabuddhe       DA->addUniformOverride(I);
36856d0ed2aSNicolai Haehnle     }
36956d0ed2aSNicolai Haehnle   }
37056d0ed2aSNicolai Haehnle   for (auto &Arg : F.args()) {
37156d0ed2aSNicolai Haehnle     if (TTI.isSourceOfDivergence(&Arg)) {
37211bf7da6SSameer Sahasrabuddhe       DA->markDivergent(Arg);
37356d0ed2aSNicolai Haehnle     }
37456d0ed2aSNicolai Haehnle   }
37556d0ed2aSNicolai Haehnle 
37611bf7da6SSameer Sahasrabuddhe   DA->compute();
37756d0ed2aSNicolai Haehnle }
37856d0ed2aSNicolai Haehnle 
37911bf7da6SSameer Sahasrabuddhe AnalysisKey DivergenceAnalysis::Key;
38011bf7da6SSameer Sahasrabuddhe 
38111bf7da6SSameer Sahasrabuddhe DivergenceAnalysis::Result
run(Function & F,FunctionAnalysisManager & AM)38211bf7da6SSameer Sahasrabuddhe DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
38311bf7da6SSameer Sahasrabuddhe   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
38411bf7da6SSameer Sahasrabuddhe   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
38511bf7da6SSameer Sahasrabuddhe   auto &LI = AM.getResult<LoopAnalysis>(F);
38611bf7da6SSameer Sahasrabuddhe   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
38711bf7da6SSameer Sahasrabuddhe 
38811bf7da6SSameer Sahasrabuddhe   return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
38956d0ed2aSNicolai Haehnle }
39056d0ed2aSNicolai Haehnle 
39111bf7da6SSameer Sahasrabuddhe PreservedAnalyses
run(Function & F,FunctionAnalysisManager & FAM)39211bf7da6SSameer Sahasrabuddhe DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
39311bf7da6SSameer Sahasrabuddhe   auto &DI = FAM.getResult<DivergenceAnalysis>(F);
39411bf7da6SSameer Sahasrabuddhe   OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
39511bf7da6SSameer Sahasrabuddhe   if (DI.hasDivergence()) {
39611bf7da6SSameer Sahasrabuddhe     for (auto &Arg : F.args()) {
39711bf7da6SSameer Sahasrabuddhe       OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : "           ");
39811bf7da6SSameer Sahasrabuddhe       OS << Arg << "\n";
399dcb75324SJay Foad     }
400896d0e1aSKazu Hirata     for (const BasicBlock &BB : F) {
40111bf7da6SSameer Sahasrabuddhe       OS << "\n           " << BB.getName() << ":\n";
402*601b3a13SKazu Hirata       for (const auto &I : BB.instructionsWithoutDebug()) {
40311bf7da6SSameer Sahasrabuddhe         OS << (DI.isDivergent(I) ? "DIVERGENT:     " : "               ");
40411bf7da6SSameer Sahasrabuddhe         OS << I << "\n";
40511bf7da6SSameer Sahasrabuddhe       }
40611bf7da6SSameer Sahasrabuddhe     }
40711bf7da6SSameer Sahasrabuddhe   }
40811bf7da6SSameer Sahasrabuddhe   return PreservedAnalyses::all();
40956d0ed2aSNicolai Haehnle }
410