1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a transformation that synthesizes entry counts for 11 // functions and attaches !prof metadata to functions with the synthesized 12 // counts. The presence of !prof metadata with counter name set to 13 // 'synthesized_function_entry_count' indicate that the value of the counter is 14 // an estimation of the likely execution count of the function. This transform 15 // is applied only in non PGO mode as functions get 'real' profile-based 16 // function entry counts in the PGO mode. 17 // 18 // The transformation works by first assigning some initial values to the entry 19 // counts of all functions and then doing a top-down traversal of the 20 // callgraph-scc to propagate the counts. For each function the set of callsites 21 // and their relative block frequency is gathered. The relative block frequency 22 // multiplied by the entry count of the caller and added to the callee's entry 23 // count. For non-trivial SCCs, the new counts are computed from the previous 24 // counts and updated in one shot. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" 29 #include "llvm/ADT/DenseSet.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/Analysis/BlockFrequencyInfo.h" 32 #include "llvm/Analysis/CallGraph.h" 33 #include "llvm/Analysis/ProfileSummaryInfo.h" 34 #include "llvm/Analysis/SyntheticCountsUtils.h" 35 #include "llvm/IR/CallSite.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/Module.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/raw_ostream.h" 42 43 using namespace llvm; 44 using Scaled64 = ScaledNumber<uint64_t>; 45 using ProfileCount = Function::ProfileCount; 46 47 #define DEBUG_TYPE "synthetic-counts-propagation" 48 49 /// Initial synthetic count assigned to functions. 50 cl::opt<int> 51 InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), 52 cl::ZeroOrMore, 53 cl::desc("Initial value of synthetic entry count.")); 54 55 /// Initial synthetic count assigned to inline functions. 56 static cl::opt<int> InlineSyntheticCount( 57 "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, 58 cl::desc("Initial synthetic entry count for inline functions.")); 59 60 /// Initial synthetic count assigned to cold functions. 61 static cl::opt<int> ColdSyntheticCount( 62 "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, 63 cl::desc("Initial synthetic entry count for cold functions.")); 64 65 // Assign initial synthetic entry counts to functions. 66 static void 67 initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) { 68 auto MayHaveIndirectCalls = [](Function &F) { 69 for (auto *U : F.users()) { 70 if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) 71 return true; 72 } 73 return false; 74 }; 75 76 for (Function &F : M) { 77 uint64_t InitialCount = InitialSyntheticCount; 78 if (F.isDeclaration()) 79 continue; 80 if (F.hasFnAttribute(Attribute::AlwaysInline) || 81 F.hasFnAttribute(Attribute::InlineHint)) { 82 // Use a higher value for inline functions to account for the fact that 83 // these are usually beneficial to inline. 84 InitialCount = InlineSyntheticCount; 85 } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) { 86 // Local functions without inline hints get counts only through 87 // propagation. 88 InitialCount = 0; 89 } else if (F.hasFnAttribute(Attribute::Cold) || 90 F.hasFnAttribute(Attribute::NoInline)) { 91 // Use a lower value for noinline and cold functions. 92 InitialCount = ColdSyntheticCount; 93 } 94 SetCount(&F, InitialCount); 95 } 96 } 97 98 PreservedAnalyses SyntheticCountsPropagation::run(Module &M, 99 ModuleAnalysisManager &MAM) { 100 FunctionAnalysisManager &FAM = 101 MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 102 DenseMap<Function *, Scaled64> Counts; 103 // Set initial entry counts. 104 initializeCounts( 105 M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); }); 106 107 // Edge includes information about the source. Hence ignore the first 108 // parameter. 109 auto GetCallSiteProfCount = [&](const CallGraphNode *, 110 const CallGraphNode::CallRecord &Edge) { 111 Optional<Scaled64> Res = None; 112 if (!Edge.first) 113 return Res; 114 assert(isa<Instruction>(Edge.first)); 115 CallSite CS(cast<Instruction>(Edge.first)); 116 Function *Caller = CS.getCaller(); 117 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); 118 119 // Now compute the callsite count from relative frequency and 120 // entry count: 121 BasicBlock *CSBB = CS.getInstruction()->getParent(); 122 Scaled64 EntryFreq(BFI.getEntryFreq(), 0); 123 Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0); 124 BBCount /= EntryFreq; 125 BBCount *= Counts[Caller]; 126 return Optional<Scaled64>(BBCount); 127 }; 128 129 CallGraph CG(M); 130 // Propgate the entry counts on the callgraph. 131 SyntheticCountsUtils<const CallGraph *>::propagate( 132 &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) { 133 auto F = N->getFunction(); 134 if (!F || F->isDeclaration()) 135 return; 136 137 Counts[F] += New; 138 }); 139 140 // Set the counts as metadata. 141 for (auto Entry : Counts) { 142 Entry.first->setEntryCount(ProfileCount( 143 Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic)); 144 } 145 146 return PreservedAnalyses::all(); 147 } 148