1f5041ce5SDiego Novillo //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
2f5041ce5SDiego Novillo //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f5041ce5SDiego Novillo //
7f5041ce5SDiego Novillo //===----------------------------------------------------------------------===//
8f5041ce5SDiego Novillo //
9f5041ce5SDiego Novillo // This file adds DWARF discriminators to the IR. Path discriminators are
10f5041ce5SDiego Novillo // used to decide what CFG path was taken inside sub-graphs whose instructions
11f5041ce5SDiego Novillo // share the same line and column number information.
12f5041ce5SDiego Novillo //
13f5041ce5SDiego Novillo // The main user of this is the sample profiler. Instruction samples are
14f5041ce5SDiego Novillo // mapped to line number information. Since a single line may be spread
15f5041ce5SDiego Novillo // out over several basic blocks, discriminators add more precise location
16f5041ce5SDiego Novillo // for the samples.
17f5041ce5SDiego Novillo //
18f5041ce5SDiego Novillo // For example,
19f5041ce5SDiego Novillo //
20f5041ce5SDiego Novillo // 1 #define ASSERT(P)
21f5041ce5SDiego Novillo // 2 if (!(P))
22f5041ce5SDiego Novillo // 3 abort()
23f5041ce5SDiego Novillo // ...
24f5041ce5SDiego Novillo // 100 while (true) {
25f5041ce5SDiego Novillo // 101 ASSERT (sum < 0);
26f5041ce5SDiego Novillo // 102 ...
27f5041ce5SDiego Novillo // 130 }
28f5041ce5SDiego Novillo //
29f5041ce5SDiego Novillo // when converted to IR, this snippet looks something like:
30f5041ce5SDiego Novillo //
31f5041ce5SDiego Novillo // while.body: ; preds = %entry, %if.end
32f5041ce5SDiego Novillo // %0 = load i32* %sum, align 4, !dbg !15
33f5041ce5SDiego Novillo // %cmp = icmp slt i32 %0, 0, !dbg !15
34f5041ce5SDiego Novillo // br i1 %cmp, label %if.end, label %if.then, !dbg !15
35f5041ce5SDiego Novillo //
36f5041ce5SDiego Novillo // if.then: ; preds = %while.body
37f5041ce5SDiego Novillo // call void @abort(), !dbg !15
38f5041ce5SDiego Novillo // br label %if.end, !dbg !15
39f5041ce5SDiego Novillo //
40f5041ce5SDiego Novillo // Notice that all the instructions in blocks 'while.body' and 'if.then'
41f5041ce5SDiego Novillo // have exactly the same debug information. When this program is sampled
42f5041ce5SDiego Novillo // at runtime, the profiler will assume that all these instructions are
43f5041ce5SDiego Novillo // equally frequent. This, in turn, will consider the edge while.body->if.then
44f5041ce5SDiego Novillo // to be frequently taken (which is incorrect).
45f5041ce5SDiego Novillo //
46f5041ce5SDiego Novillo // By adding a discriminator value to the instructions in block 'if.then',
47f5041ce5SDiego Novillo // we can distinguish instructions at line 101 with discriminator 0 from
48f5041ce5SDiego Novillo // the instructions at line 101 with discriminator 1.
49f5041ce5SDiego Novillo //
50f5041ce5SDiego Novillo // For more details about DWARF discriminators, please visit
51f5041ce5SDiego Novillo // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
526cadde7fSEugene Zelenko //
53f5041ce5SDiego Novillo //===----------------------------------------------------------------------===//
54f5041ce5SDiego Novillo
551eaecefaSXinliang David Li #include "llvm/Transforms/Utils/AddDiscriminators.h"
5623e2278eSDehao Chen #include "llvm/ADT/DenseMap.h"
5746f8fbbbSDehao Chen #include "llvm/ADT/DenseSet.h"
586cadde7fSEugene Zelenko #include "llvm/ADT/StringRef.h"
59f5041ce5SDiego Novillo #include "llvm/IR/BasicBlock.h"
606cadde7fSEugene Zelenko #include "llvm/IR/DebugInfoMetadata.h"
616cadde7fSEugene Zelenko #include "llvm/IR/Function.h"
626cadde7fSEugene Zelenko #include "llvm/IR/Instruction.h"
63f5041ce5SDiego Novillo #include "llvm/IR/Instructions.h"
64978060ceSPavel Labath #include "llvm/IR/IntrinsicInst.h"
656cadde7fSEugene Zelenko #include "llvm/IR/PassManager.h"
6605da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
67f5041ce5SDiego Novillo #include "llvm/Pass.h"
686cadde7fSEugene Zelenko #include "llvm/Support/Casting.h"
69f5041ce5SDiego Novillo #include "llvm/Support/CommandLine.h"
70f5041ce5SDiego Novillo #include "llvm/Support/Debug.h"
71f5041ce5SDiego Novillo #include "llvm/Support/raw_ostream.h"
72a373d18eSDavid Blaikie #include "llvm/Transforms/Utils.h"
7382a0bb1aSRong Xu #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
746cadde7fSEugene Zelenko #include <utility>
75f5041ce5SDiego Novillo
76f5041ce5SDiego Novillo using namespace llvm;
7782a0bb1aSRong Xu using namespace sampleprofutil;
78f5041ce5SDiego Novillo
79964daaafSChandler Carruth #define DEBUG_TYPE "add-discriminators"
80964daaafSChandler Carruth
816cadde7fSEugene Zelenko // Command line option to disable discriminator generation even in the
826cadde7fSEugene Zelenko // presence of debug information. This is only needed when debugging
836cadde7fSEugene Zelenko // debug info generation issues.
846cadde7fSEugene Zelenko static cl::opt<bool> NoDiscriminators(
856cadde7fSEugene Zelenko "no-discriminators", cl::init(false),
866cadde7fSEugene Zelenko cl::desc("Disable generation of discriminator information."));
876cadde7fSEugene Zelenko
88f5041ce5SDiego Novillo namespace {
896cadde7fSEugene Zelenko
901eaecefaSXinliang David Li // The legacy pass of AddDiscriminators.
911eaecefaSXinliang David Li struct AddDiscriminatorsLegacyPass : public FunctionPass {
92f5041ce5SDiego Novillo static char ID; // Pass identification, replacement for typeid
936cadde7fSEugene Zelenko
AddDiscriminatorsLegacyPass__anon547f5c520111::AddDiscriminatorsLegacyPass941eaecefaSXinliang David Li AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
951eaecefaSXinliang David Li initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
96f5041ce5SDiego Novillo }
97f5041ce5SDiego Novillo
983e4c697cSCraig Topper bool runOnFunction(Function &F) override;
99f5041ce5SDiego Novillo };
1001eaecefaSXinliang David Li
1016ac3f739SEugene Zelenko } // end anonymous namespace
102f5041ce5SDiego Novillo
1031eaecefaSXinliang David Li char AddDiscriminatorsLegacyPass::ID = 0;
1046cadde7fSEugene Zelenko
1051eaecefaSXinliang David Li INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
106f5041ce5SDiego Novillo "Add DWARF path discriminators", false, false)
1071eaecefaSXinliang David Li INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
108f5041ce5SDiego Novillo "Add DWARF path discriminators", false, false)
109f5041ce5SDiego Novillo
1101eaecefaSXinliang David Li // Create the legacy AddDiscriminatorsPass.
createAddDiscriminatorsPass()111f5041ce5SDiego Novillo FunctionPass *llvm::createAddDiscriminatorsPass() {
1121eaecefaSXinliang David Li return new AddDiscriminatorsLegacyPass();
113f5041ce5SDiego Novillo }
114f5041ce5SDiego Novillo
shouldHaveDiscriminator(const Instruction * I)1158e26936bSAndrea Di Biagio static bool shouldHaveDiscriminator(const Instruction *I) {
1168e26936bSAndrea Di Biagio return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
1178e26936bSAndrea Di Biagio }
1188e26936bSAndrea Di Biagio
1195f8f34e4SAdrian Prantl /// Assign DWARF discriminators.
120f5041ce5SDiego Novillo ///
121f5041ce5SDiego Novillo /// To assign discriminators, we examine the boundaries of every
122f5041ce5SDiego Novillo /// basic block and its successors. Suppose there is a basic block B1
123f5041ce5SDiego Novillo /// with successor B2. The last instruction I1 in B1 and the first
124f5041ce5SDiego Novillo /// instruction I2 in B2 are located at the same file and line number.
125f5041ce5SDiego Novillo /// This situation is illustrated in the following code snippet:
126f5041ce5SDiego Novillo ///
127f5041ce5SDiego Novillo /// if (i < 10) x = i;
128f5041ce5SDiego Novillo ///
129f5041ce5SDiego Novillo /// entry:
130f5041ce5SDiego Novillo /// br i1 %cmp, label %if.then, label %if.end, !dbg !10
131f5041ce5SDiego Novillo /// if.then:
132f5041ce5SDiego Novillo /// %1 = load i32* %i.addr, align 4, !dbg !10
133f5041ce5SDiego Novillo /// store i32 %1, i32* %x, align 4, !dbg !10
134f5041ce5SDiego Novillo /// br label %if.end, !dbg !10
135f5041ce5SDiego Novillo /// if.end:
136f5041ce5SDiego Novillo /// ret void, !dbg !12
137f5041ce5SDiego Novillo ///
138f5041ce5SDiego Novillo /// Notice how the branch instruction in block 'entry' and all the
139f5041ce5SDiego Novillo /// instructions in block 'if.then' have the exact same debug location
140f5041ce5SDiego Novillo /// information (!dbg !10).
141f5041ce5SDiego Novillo ///
142f5041ce5SDiego Novillo /// To distinguish instructions in block 'entry' from instructions in
143f5041ce5SDiego Novillo /// block 'if.then', we generate a new lexical block for all the
144f5041ce5SDiego Novillo /// instruction in block 'if.then' that share the same file and line
145f5041ce5SDiego Novillo /// location with the last instruction of block 'entry'.
146f5041ce5SDiego Novillo ///
147f5041ce5SDiego Novillo /// This new lexical block will have the same location information as
148f5041ce5SDiego Novillo /// the previous one, but with a new DWARF discriminator value.
149f5041ce5SDiego Novillo ///
150f5041ce5SDiego Novillo /// One of the main uses of this discriminator value is in runtime
151f5041ce5SDiego Novillo /// sample profilers. It allows the profiler to distinguish instructions
152f5041ce5SDiego Novillo /// at location !dbg !10 that execute on different basic blocks. This is
153f5041ce5SDiego Novillo /// important because while the predicate 'if (x < 10)' may have been
154f5041ce5SDiego Novillo /// executed millions of times, the assignment 'x = i' may have only
155f5041ce5SDiego Novillo /// executed a handful of times (meaning that the entry->if.then edge is
156f5041ce5SDiego Novillo /// seldom taken).
157f5041ce5SDiego Novillo ///
158f5041ce5SDiego Novillo /// If we did not have discriminator information, the profiler would
159f5041ce5SDiego Novillo /// assign the same weight to both blocks 'entry' and 'if.then', which
160f5041ce5SDiego Novillo /// in turn will make it conclude that the entry->if.then edge is very
161f5041ce5SDiego Novillo /// hot.
162f5041ce5SDiego Novillo ///
163f5041ce5SDiego Novillo /// To decide where to create new discriminator values, this function
164f5041ce5SDiego Novillo /// traverses the CFG and examines instruction at basic block boundaries.
165f5041ce5SDiego Novillo /// If the last instruction I1 of a block B1 is at the same file and line
166f5041ce5SDiego Novillo /// location as instruction I2 of successor B2, then it creates a new
167f5041ce5SDiego Novillo /// lexical block for I2 and all the instruction in B2 that share the same
168f5041ce5SDiego Novillo /// file and line location as I2. This new lexical block will have a
169f5041ce5SDiego Novillo /// different discriminator number than I1.
addDiscriminators(Function & F)1701e16d61fSXinliang David Li static bool addDiscriminators(Function &F) {
171f5041ce5SDiego Novillo // If the function has debug information, but the user has disabled
172f5041ce5SDiego Novillo // discriminators, do nothing.
1730915c047SDiego Novillo // Simlarly, if the function has no debug info, do nothing.
1746e0c8446SDehao Chen if (NoDiscriminators || !F.getSubprogram())
1750915c047SDiego Novillo return false;
176f5041ce5SDiego Novillo
17782a0bb1aSRong Xu // Create FSDiscriminatorVariable if flow sensitive discriminators are used.
17882a0bb1aSRong Xu if (EnableFSDiscriminator)
17982a0bb1aSRong Xu createFSDiscriminatorVariable(F.getParent());
18082a0bb1aSRong Xu
181f5041ce5SDiego Novillo bool Changed = false;
182f5041ce5SDiego Novillo
1836cadde7fSEugene Zelenko using Location = std::pair<StringRef, unsigned>;
1846cadde7fSEugene Zelenko using BBSet = DenseSet<const BasicBlock *>;
1856cadde7fSEugene Zelenko using LocationBBMap = DenseMap<Location, BBSet>;
1866cadde7fSEugene Zelenko using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
1876cadde7fSEugene Zelenko using LocationSet = DenseSet<Location>;
18823e2278eSDehao Chen
18923e2278eSDehao Chen LocationBBMap LBM;
190939993ffSDehao Chen LocationDiscriminatorMap LDM;
19123e2278eSDehao Chen
19223e2278eSDehao Chen // Traverse all instructions in the function. If the source line location
19323e2278eSDehao Chen // of the instruction appears in other basic block, assign a new
19423e2278eSDehao Chen // discriminator for this instruction.
1955b4c837cSDuncan P. N. Exon Smith for (BasicBlock &B : F) {
19623e2278eSDehao Chen for (auto &I : B.getInstList()) {
1978e26936bSAndrea Di Biagio // Not all intrinsic calls should have a discriminator.
1988e26936bSAndrea Di Biagio // We want to avoid a non-deterministic assignment of discriminators at
1998e26936bSAndrea Di Biagio // different debug levels. We still allow discriminators on memory
2008e26936bSAndrea Di Biagio // intrinsic calls because those can be early expanded by SROA into
2018e26936bSAndrea Di Biagio // pairs of loads and stores, and the expanded load/store instructions
2028e26936bSAndrea Di Biagio // should have a valid discriminator.
2038e26936bSAndrea Di Biagio if (!shouldHaveDiscriminator(&I))
204ec819c09SDuncan P. N. Exon Smith continue;
20523e2278eSDehao Chen const DILocation *DIL = I.getDebugLoc();
20623e2278eSDehao Chen if (!DIL)
207ec819c09SDuncan P. N. Exon Smith continue;
20823e2278eSDehao Chen Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
20923e2278eSDehao Chen auto &BBMap = LBM[L];
210e713000eSDehao Chen auto R = BBMap.insert(&B);
21123e2278eSDehao Chen if (BBMap.size() == 1)
21223e2278eSDehao Chen continue;
21328d2d281SAdrian Prantl // If we could insert more than one block with the same line+file, a
21423e2278eSDehao Chen // discriminator is needed to distinguish both instructions.
2152ca9be33SDehao Chen // Only the lowest 7 bits are used to represent a discriminator to fit
2162ca9be33SDehao Chen // it in 1 byte ULEB128 representation.
217fb02f714SDehao Chen unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
218ec026302SMircea Trofin auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
219b53eeb6fSMircea Trofin if (!NewDIL) {
220b53eeb6fSMircea Trofin LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
221b53eeb6fSMircea Trofin << DIL->getFilename() << ":" << DIL->getLine() << ":"
222b53eeb6fSMircea Trofin << DIL->getColumn() << ":" << Discriminator << " "
223b53eeb6fSMircea Trofin << I << "\n");
224b53eeb6fSMircea Trofin } else {
225*7a47ee51SKazu Hirata I.setDebugLoc(*NewDIL);
226d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
2272ca9be33SDehao Chen << DIL->getColumn() << ":" << Discriminator << " " << I
2282ca9be33SDehao Chen << "\n");
229b53eeb6fSMircea Trofin }
230f5041ce5SDiego Novillo Changed = true;
231f5041ce5SDiego Novillo }
232f5041ce5SDiego Novillo }
2333656e306SDehao Chen
2343656e306SDehao Chen // Traverse all instructions and assign new discriminators to call
2353656e306SDehao Chen // instructions with the same lineno that are in the same basic block.
2363656e306SDehao Chen // Sample base profile needs to distinguish different function calls within
2373656e306SDehao Chen // a same source line for correct profile annotation.
2383656e306SDehao Chen for (BasicBlock &B : F) {
23946f8fbbbSDehao Chen LocationSet CallLocations;
2403656e306SDehao Chen for (auto &I : B.getInstList()) {
2418e26936bSAndrea Di Biagio // We bypass intrinsic calls for the following two reasons:
242d68904f9SJames Henderson // 1) We want to avoid a non-deterministic assignment of
2438e26936bSAndrea Di Biagio // discriminators.
2448e26936bSAndrea Di Biagio // 2) We want to minimize the number of base discriminators used.
245d129d3e9SDavid Callahan if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
246978060ceSPavel Labath continue;
247978060ceSPavel Labath
248d129d3e9SDavid Callahan DILocation *CurrentDIL = I.getDebugLoc();
24934cc6767SDehao Chen if (!CurrentDIL)
25034cc6767SDehao Chen continue;
251939993ffSDehao Chen Location L =
25246f8fbbbSDehao Chen std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
25346f8fbbbSDehao Chen if (!CallLocations.insert(L).second) {
254fb02f714SDehao Chen unsigned Discriminator = ++LDM[L];
255ec026302SMircea Trofin auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
256b53eeb6fSMircea Trofin if (!NewDIL) {
257b53eeb6fSMircea Trofin LLVM_DEBUG(dbgs()
258b53eeb6fSMircea Trofin << "Could not encode discriminator: "
259b53eeb6fSMircea Trofin << CurrentDIL->getFilename() << ":"
260b53eeb6fSMircea Trofin << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
261b53eeb6fSMircea Trofin << ":" << Discriminator << " " << I << "\n");
262b53eeb6fSMircea Trofin } else {
263*7a47ee51SKazu Hirata I.setDebugLoc(*NewDIL);
2643656e306SDehao Chen Changed = true;
2653656e306SDehao Chen }
2663656e306SDehao Chen }
2673656e306SDehao Chen }
268b53eeb6fSMircea Trofin }
269f5041ce5SDiego Novillo return Changed;
270f5041ce5SDiego Novillo }
2711eaecefaSXinliang David Li
runOnFunction(Function & F)2721eaecefaSXinliang David Li bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
2731eaecefaSXinliang David Li return addDiscriminators(F);
2741eaecefaSXinliang David Li }
2756cadde7fSEugene Zelenko
run(Function & F,FunctionAnalysisManager & AM)2761eaecefaSXinliang David Li PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
27736e0d01eSSean Silva FunctionAnalysisManager &AM) {
2781e16d61fSXinliang David Li if (!addDiscriminators(F))
2791eaecefaSXinliang David Li return PreservedAnalyses::all();
2801e16d61fSXinliang David Li
2811e16d61fSXinliang David Li // FIXME: should be all()
2821e16d61fSXinliang David Li return PreservedAnalyses::none();
2831eaecefaSXinliang David Li }
284