191bc56edSDimitry Andric //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
291bc56edSDimitry Andric //
391bc56edSDimitry Andric //                      The LLVM Compiler Infrastructure
491bc56edSDimitry Andric //
591bc56edSDimitry Andric // This file is distributed under the University of Illinois Open Source
691bc56edSDimitry Andric // License. See LICENSE.TXT for details.
791bc56edSDimitry Andric //
891bc56edSDimitry Andric //===----------------------------------------------------------------------===//
991bc56edSDimitry Andric //
1091bc56edSDimitry Andric // This file adds DWARF discriminators to the IR. Path discriminators are
1191bc56edSDimitry Andric // used to decide what CFG path was taken inside sub-graphs whose instructions
1291bc56edSDimitry Andric // share the same line and column number information.
1391bc56edSDimitry Andric //
1491bc56edSDimitry Andric // The main user of this is the sample profiler. Instruction samples are
1591bc56edSDimitry Andric // mapped to line number information. Since a single line may be spread
1691bc56edSDimitry Andric // out over several basic blocks, discriminators add more precise location
1791bc56edSDimitry Andric // for the samples.
1891bc56edSDimitry Andric //
1991bc56edSDimitry Andric // For example,
2091bc56edSDimitry Andric //
2191bc56edSDimitry Andric //   1  #define ASSERT(P)
2291bc56edSDimitry Andric //   2      if (!(P))
2391bc56edSDimitry Andric //   3        abort()
2491bc56edSDimitry Andric //   ...
2591bc56edSDimitry Andric //   100   while (true) {
2691bc56edSDimitry Andric //   101     ASSERT (sum < 0);
2791bc56edSDimitry Andric //   102     ...
2891bc56edSDimitry Andric //   130   }
2991bc56edSDimitry Andric //
3091bc56edSDimitry Andric // when converted to IR, this snippet looks something like:
3191bc56edSDimitry Andric //
3291bc56edSDimitry Andric // while.body:                                       ; preds = %entry, %if.end
3391bc56edSDimitry Andric //   %0 = load i32* %sum, align 4, !dbg !15
3491bc56edSDimitry Andric //   %cmp = icmp slt i32 %0, 0, !dbg !15
3591bc56edSDimitry Andric //   br i1 %cmp, label %if.end, label %if.then, !dbg !15
3691bc56edSDimitry Andric //
3791bc56edSDimitry Andric // if.then:                                          ; preds = %while.body
3891bc56edSDimitry Andric //   call void @abort(), !dbg !15
3991bc56edSDimitry Andric //   br label %if.end, !dbg !15
4091bc56edSDimitry Andric //
4191bc56edSDimitry Andric // Notice that all the instructions in blocks 'while.body' and 'if.then'
4291bc56edSDimitry Andric // have exactly the same debug information. When this program is sampled
4391bc56edSDimitry Andric // at runtime, the profiler will assume that all these instructions are
4491bc56edSDimitry Andric // equally frequent. This, in turn, will consider the edge while.body->if.then
4591bc56edSDimitry Andric // to be frequently taken (which is incorrect).
4691bc56edSDimitry Andric //
4791bc56edSDimitry Andric // By adding a discriminator value to the instructions in block 'if.then',
4891bc56edSDimitry Andric // we can distinguish instructions at line 101 with discriminator 0 from
4991bc56edSDimitry Andric // the instructions at line 101 with discriminator 1.
5091bc56edSDimitry Andric //
5191bc56edSDimitry Andric // For more details about DWARF discriminators, please visit
5291bc56edSDimitry Andric // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
532cab237bSDimitry Andric //
5491bc56edSDimitry Andric //===----------------------------------------------------------------------===//
5591bc56edSDimitry Andric 
563ca95b02SDimitry Andric #include "llvm/Transforms/Utils/AddDiscriminators.h"
577d523365SDimitry Andric #include "llvm/ADT/DenseMap.h"
583ca95b02SDimitry Andric #include "llvm/ADT/DenseSet.h"
592cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
6091bc56edSDimitry Andric #include "llvm/IR/BasicBlock.h"
612cab237bSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
622cab237bSDimitry Andric #include "llvm/IR/Function.h"
632cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
6491bc56edSDimitry Andric #include "llvm/IR/Instructions.h"
657d523365SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
662cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
6791bc56edSDimitry Andric #include "llvm/Pass.h"
682cab237bSDimitry Andric #include "llvm/Support/Casting.h"
6991bc56edSDimitry Andric #include "llvm/Support/CommandLine.h"
7091bc56edSDimitry Andric #include "llvm/Support/Debug.h"
7191bc56edSDimitry Andric #include "llvm/Support/raw_ostream.h"
724ba319b5SDimitry Andric #include "llvm/Transforms/Utils.h"
732cab237bSDimitry Andric #include <utility>
7491bc56edSDimitry Andric 
7591bc56edSDimitry Andric using namespace llvm;
7691bc56edSDimitry Andric 
7791bc56edSDimitry Andric #define DEBUG_TYPE "add-discriminators"
7891bc56edSDimitry Andric 
792cab237bSDimitry Andric // Command line option to disable discriminator generation even in the
802cab237bSDimitry Andric // presence of debug information. This is only needed when debugging
812cab237bSDimitry Andric // debug info generation issues.
822cab237bSDimitry Andric static cl::opt<bool> NoDiscriminators(
832cab237bSDimitry Andric     "no-discriminators", cl::init(false),
842cab237bSDimitry Andric     cl::desc("Disable generation of discriminator information."));
852cab237bSDimitry Andric 
8691bc56edSDimitry Andric namespace {
872cab237bSDimitry Andric 
883ca95b02SDimitry Andric // The legacy pass of AddDiscriminators.
893ca95b02SDimitry Andric struct AddDiscriminatorsLegacyPass : public FunctionPass {
9091bc56edSDimitry Andric   static char ID; // Pass identification, replacement for typeid
912cab237bSDimitry Andric 
AddDiscriminatorsLegacyPass__anonb5170a870111::AddDiscriminatorsLegacyPass923ca95b02SDimitry Andric   AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
933ca95b02SDimitry Andric     initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
9491bc56edSDimitry Andric   }
9591bc56edSDimitry Andric 
9691bc56edSDimitry Andric   bool runOnFunction(Function &F) override;
9791bc56edSDimitry Andric };
9891bc56edSDimitry Andric 
993ca95b02SDimitry Andric } // end anonymous namespace
1003ca95b02SDimitry Andric 
1013ca95b02SDimitry Andric char AddDiscriminatorsLegacyPass::ID = 0;
1022cab237bSDimitry Andric 
1033ca95b02SDimitry Andric INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
10491bc56edSDimitry Andric                       "Add DWARF path discriminators", false, false)
1053ca95b02SDimitry Andric INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
10691bc56edSDimitry Andric                     "Add DWARF path discriminators", false, false)
10791bc56edSDimitry Andric 
1083ca95b02SDimitry Andric // Create the legacy AddDiscriminatorsPass.
createAddDiscriminatorsPass()10991bc56edSDimitry Andric FunctionPass *llvm::createAddDiscriminatorsPass() {
1103ca95b02SDimitry Andric   return new AddDiscriminatorsLegacyPass();
11191bc56edSDimitry Andric }
11291bc56edSDimitry Andric 
shouldHaveDiscriminator(const Instruction * I)1137a7e6055SDimitry Andric static bool shouldHaveDiscriminator(const Instruction *I) {
1147a7e6055SDimitry Andric   return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
1157a7e6055SDimitry Andric }
1167a7e6055SDimitry Andric 
1174ba319b5SDimitry Andric /// Assign DWARF discriminators.
11891bc56edSDimitry Andric ///
11991bc56edSDimitry Andric /// To assign discriminators, we examine the boundaries of every
12091bc56edSDimitry Andric /// basic block and its successors. Suppose there is a basic block B1
12191bc56edSDimitry Andric /// with successor B2. The last instruction I1 in B1 and the first
12291bc56edSDimitry Andric /// instruction I2 in B2 are located at the same file and line number.
12391bc56edSDimitry Andric /// This situation is illustrated in the following code snippet:
12491bc56edSDimitry Andric ///
12591bc56edSDimitry Andric ///       if (i < 10) x = i;
12691bc56edSDimitry Andric ///
12791bc56edSDimitry Andric ///     entry:
12891bc56edSDimitry Andric ///       br i1 %cmp, label %if.then, label %if.end, !dbg !10
12991bc56edSDimitry Andric ///     if.then:
13091bc56edSDimitry Andric ///       %1 = load i32* %i.addr, align 4, !dbg !10
13191bc56edSDimitry Andric ///       store i32 %1, i32* %x, align 4, !dbg !10
13291bc56edSDimitry Andric ///       br label %if.end, !dbg !10
13391bc56edSDimitry Andric ///     if.end:
13491bc56edSDimitry Andric ///       ret void, !dbg !12
13591bc56edSDimitry Andric ///
13691bc56edSDimitry Andric /// Notice how the branch instruction in block 'entry' and all the
13791bc56edSDimitry Andric /// instructions in block 'if.then' have the exact same debug location
13891bc56edSDimitry Andric /// information (!dbg !10).
13991bc56edSDimitry Andric ///
14091bc56edSDimitry Andric /// To distinguish instructions in block 'entry' from instructions in
14191bc56edSDimitry Andric /// block 'if.then', we generate a new lexical block for all the
14291bc56edSDimitry Andric /// instruction in block 'if.then' that share the same file and line
14391bc56edSDimitry Andric /// location with the last instruction of block 'entry'.
14491bc56edSDimitry Andric ///
14591bc56edSDimitry Andric /// This new lexical block will have the same location information as
14691bc56edSDimitry Andric /// the previous one, but with a new DWARF discriminator value.
14791bc56edSDimitry Andric ///
14891bc56edSDimitry Andric /// One of the main uses of this discriminator value is in runtime
14991bc56edSDimitry Andric /// sample profilers. It allows the profiler to distinguish instructions
15091bc56edSDimitry Andric /// at location !dbg !10 that execute on different basic blocks. This is
15191bc56edSDimitry Andric /// important because while the predicate 'if (x < 10)' may have been
15291bc56edSDimitry Andric /// executed millions of times, the assignment 'x = i' may have only
15391bc56edSDimitry Andric /// executed a handful of times (meaning that the entry->if.then edge is
15491bc56edSDimitry Andric /// seldom taken).
15591bc56edSDimitry Andric ///
15691bc56edSDimitry Andric /// If we did not have discriminator information, the profiler would
15791bc56edSDimitry Andric /// assign the same weight to both blocks 'entry' and 'if.then', which
15891bc56edSDimitry Andric /// in turn will make it conclude that the entry->if.then edge is very
15991bc56edSDimitry Andric /// hot.
16091bc56edSDimitry Andric ///
16191bc56edSDimitry Andric /// To decide where to create new discriminator values, this function
16291bc56edSDimitry Andric /// traverses the CFG and examines instruction at basic block boundaries.
16391bc56edSDimitry Andric /// If the last instruction I1 of a block B1 is at the same file and line
16491bc56edSDimitry Andric /// location as instruction I2 of successor B2, then it creates a new
16591bc56edSDimitry Andric /// lexical block for I2 and all the instruction in B2 that share the same
16691bc56edSDimitry Andric /// file and line location as I2. This new lexical block will have a
16791bc56edSDimitry Andric /// different discriminator number than I1.
addDiscriminators(Function & F)1683ca95b02SDimitry Andric static bool addDiscriminators(Function &F) {
16991bc56edSDimitry Andric   // If the function has debug information, but the user has disabled
17091bc56edSDimitry Andric   // discriminators, do nothing.
17191bc56edSDimitry Andric   // Simlarly, if the function has no debug info, do nothing.
172d88c1a5aSDimitry Andric   if (NoDiscriminators || !F.getSubprogram())
17391bc56edSDimitry Andric     return false;
17491bc56edSDimitry Andric 
17591bc56edSDimitry Andric   bool Changed = false;
17691bc56edSDimitry Andric 
1772cab237bSDimitry Andric   using Location = std::pair<StringRef, unsigned>;
1782cab237bSDimitry Andric   using BBSet = DenseSet<const BasicBlock *>;
1792cab237bSDimitry Andric   using LocationBBMap = DenseMap<Location, BBSet>;
1802cab237bSDimitry Andric   using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
1812cab237bSDimitry Andric   using LocationSet = DenseSet<Location>;
1827d523365SDimitry Andric 
1837d523365SDimitry Andric   LocationBBMap LBM;
1843ca95b02SDimitry Andric   LocationDiscriminatorMap LDM;
1857d523365SDimitry Andric 
1867d523365SDimitry Andric   // Traverse all instructions in the function. If the source line location
1877d523365SDimitry Andric   // of the instruction appears in other basic block, assign a new
1887d523365SDimitry Andric   // discriminator for this instruction.
1897d523365SDimitry Andric   for (BasicBlock &B : F) {
1907d523365SDimitry Andric     for (auto &I : B.getInstList()) {
1917a7e6055SDimitry Andric       // Not all intrinsic calls should have a discriminator.
1927a7e6055SDimitry Andric       // We want to avoid a non-deterministic assignment of discriminators at
1937a7e6055SDimitry Andric       // different debug levels. We still allow discriminators on memory
1947a7e6055SDimitry Andric       // intrinsic calls because those can be early expanded by SROA into
1957a7e6055SDimitry Andric       // pairs of loads and stores, and the expanded load/store instructions
1967a7e6055SDimitry Andric       // should have a valid discriminator.
1977a7e6055SDimitry Andric       if (!shouldHaveDiscriminator(&I))
198ff0cc061SDimitry Andric         continue;
1997d523365SDimitry Andric       const DILocation *DIL = I.getDebugLoc();
2007d523365SDimitry Andric       if (!DIL)
201ff0cc061SDimitry Andric         continue;
2027d523365SDimitry Andric       Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
2037d523365SDimitry Andric       auto &BBMap = LBM[L];
204d88c1a5aSDimitry Andric       auto R = BBMap.insert(&B);
2057d523365SDimitry Andric       if (BBMap.size() == 1)
2067d523365SDimitry Andric         continue;
207d88c1a5aSDimitry Andric       // If we could insert more than one block with the same line+file, a
2087d523365SDimitry Andric       // discriminator is needed to distinguish both instructions.
209d88c1a5aSDimitry Andric       // Only the lowest 7 bits are used to represent a discriminator to fit
210d88c1a5aSDimitry Andric       // it in 1 byte ULEB128 representation.
2117a7e6055SDimitry Andric       unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
212*b5893f02SDimitry Andric       auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
213*b5893f02SDimitry Andric       if (!NewDIL) {
214*b5893f02SDimitry Andric         LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
215*b5893f02SDimitry Andric                           << DIL->getFilename() << ":" << DIL->getLine() << ":"
216*b5893f02SDimitry Andric                           << DIL->getColumn() << ":" << Discriminator << " "
217*b5893f02SDimitry Andric                           << I << "\n");
218*b5893f02SDimitry Andric       } else {
219*b5893f02SDimitry Andric         I.setDebugLoc(NewDIL.getValue());
2204ba319b5SDimitry Andric         LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
221d88c1a5aSDimitry Andric                    << DIL->getColumn() << ":" << Discriminator << " " << I
222d88c1a5aSDimitry Andric                    << "\n");
223*b5893f02SDimitry Andric       }
22491bc56edSDimitry Andric       Changed = true;
22591bc56edSDimitry Andric     }
22691bc56edSDimitry Andric   }
2277d523365SDimitry Andric 
2287d523365SDimitry Andric   // Traverse all instructions and assign new discriminators to call
2297d523365SDimitry Andric   // instructions with the same lineno that are in the same basic block.
2307d523365SDimitry Andric   // Sample base profile needs to distinguish different function calls within
2317d523365SDimitry Andric   // a same source line for correct profile annotation.
2327d523365SDimitry Andric   for (BasicBlock &B : F) {
2333ca95b02SDimitry Andric     LocationSet CallLocations;
2347d523365SDimitry Andric     for (auto &I : B.getInstList()) {
2357a7e6055SDimitry Andric       // We bypass intrinsic calls for the following two reasons:
2367a7e6055SDimitry Andric       //  1) We want to avoid a non-deterministic assigment of
2377a7e6055SDimitry Andric       //     discriminators.
2387a7e6055SDimitry Andric       //  2) We want to minimize the number of base discriminators used.
239*b5893f02SDimitry Andric       if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
2407d523365SDimitry Andric         continue;
2417d523365SDimitry Andric 
242*b5893f02SDimitry Andric       DILocation *CurrentDIL = I.getDebugLoc();
2433ca95b02SDimitry Andric       if (!CurrentDIL)
2443ca95b02SDimitry Andric         continue;
2453ca95b02SDimitry Andric       Location L =
2463ca95b02SDimitry Andric           std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
2473ca95b02SDimitry Andric       if (!CallLocations.insert(L).second) {
2487a7e6055SDimitry Andric         unsigned Discriminator = ++LDM[L];
249*b5893f02SDimitry Andric         auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
250*b5893f02SDimitry Andric         if (!NewDIL) {
251*b5893f02SDimitry Andric           LLVM_DEBUG(dbgs()
252*b5893f02SDimitry Andric                      << "Could not encode discriminator: "
253*b5893f02SDimitry Andric                      << CurrentDIL->getFilename() << ":"
254*b5893f02SDimitry Andric                      << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
255*b5893f02SDimitry Andric                      << ":" << Discriminator << " " << I << "\n");
256*b5893f02SDimitry Andric         } else {
257*b5893f02SDimitry Andric           I.setDebugLoc(NewDIL.getValue());
2587d523365SDimitry Andric           Changed = true;
2597d523365SDimitry Andric         }
2607d523365SDimitry Andric       }
26191bc56edSDimitry Andric     }
262*b5893f02SDimitry Andric   }
26391bc56edSDimitry Andric   return Changed;
26491bc56edSDimitry Andric }
2653ca95b02SDimitry Andric 
runOnFunction(Function & F)2663ca95b02SDimitry Andric bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
2673ca95b02SDimitry Andric   return addDiscriminators(F);
2683ca95b02SDimitry Andric }
2692cab237bSDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)2703ca95b02SDimitry Andric PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
271d88c1a5aSDimitry Andric                                              FunctionAnalysisManager &AM) {
2723ca95b02SDimitry Andric   if (!addDiscriminators(F))
2733ca95b02SDimitry Andric     return PreservedAnalyses::all();
2743ca95b02SDimitry Andric 
2753ca95b02SDimitry Andric   // FIXME: should be all()
2763ca95b02SDimitry Andric   return PreservedAnalyses::none();
2773ca95b02SDimitry Andric }
278