1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for computing profile summary data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/IR/Attributes.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/Metadata.h" 16 #include "llvm/IR/Type.h" 17 #include "llvm/ProfileData/InstrProf.h" 18 #include "llvm/ProfileData/ProfileCommon.h" 19 #include "llvm/ProfileData/SampleProf.h" 20 #include "llvm/Support/Casting.h" 21 #include "llvm/Support/CommandLine.h" 22 23 using namespace llvm; 24 25 cl::opt<bool> UseContextLessSummary( 26 "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, 27 cl::desc("Merge context profiles before calculating thresholds.")); 28 29 // The following two parameters determine the threshold for a count to be 30 // considered hot/cold. These two parameters are percentile values (multiplied 31 // by 10000). If the counts are sorted in descending order, the minimum count to 32 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. 33 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the 34 // threshold for determining cold count (everything <= this threshold is 35 // considered cold). 36 cl::opt<int> ProfileSummaryCutoffHot( 37 "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, 38 cl::desc("A count is hot if it exceeds the minimum count to" 39 " reach this percentile of total counts.")); 40 41 cl::opt<int> ProfileSummaryCutoffCold( 42 "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, 43 cl::desc("A count is cold if it is below the minimum count" 44 " to reach this percentile of total counts.")); 45 46 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( 47 "profile-summary-huge-working-set-size-threshold", cl::Hidden, 48 cl::init(15000), cl::ZeroOrMore, 49 cl::desc("The code working set size is considered huge if the number of" 50 " blocks required to reach the -profile-summary-cutoff-hot" 51 " percentile exceeds this count.")); 52 53 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( 54 "profile-summary-large-working-set-size-threshold", cl::Hidden, 55 cl::init(12500), cl::ZeroOrMore, 56 cl::desc("The code working set size is considered large if the number of" 57 " blocks required to reach the -profile-summary-cutoff-hot" 58 " percentile exceeds this count.")); 59 60 // The next two options override the counts derived from summary computation and 61 // are useful for debugging purposes. 62 cl::opt<uint64_t> ProfileSummaryHotCount( 63 "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, 64 cl::desc("A fixed hot count that overrides the count derived from" 65 " profile-summary-cutoff-hot")); 66 67 cl::opt<uint64_t> ProfileSummaryColdCount( 68 "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, 69 cl::desc("A fixed cold count that overrides the count derived from" 70 " profile-summary-cutoff-cold")); 71 72 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale 73 // (which is 1000000) is a desired percentile of total counts. 74 static const uint32_t DefaultCutoffsData[] = { 75 10000, /* 1% */ 76 100000, /* 10% */ 77 200000, 300000, 400000, 500000, 600000, 700000, 800000, 78 900000, 950000, 990000, 999000, 999900, 999990, 999999}; 79 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = 80 DefaultCutoffsData; 81 82 const ProfileSummaryEntry & 83 ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, 84 uint64_t Percentile) { 85 auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { 86 return Entry.Cutoff < Percentile; 87 }); 88 // The required percentile has to be <= one of the percentiles in the 89 // detailed summary. 90 if (It == DS.end()) 91 report_fatal_error("Desired percentile exceeds the maximum cutoff"); 92 return *It; 93 } 94 95 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { 96 // The first counter is not necessarily an entry count for IR 97 // instrumentation profiles. 98 // Eventually MaxFunctionCount will become obsolete and this can be 99 // removed. 100 addEntryCount(R.Counts[0]); 101 for (size_t I = 1, E = R.Counts.size(); I < E; ++I) 102 addInternalCount(R.Counts[I]); 103 } 104 105 // To compute the detailed summary, we consider each line containing samples as 106 // equivalent to a block with a count in the instrumented profile. 107 void SampleProfileSummaryBuilder::addRecord( 108 const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { 109 if (!isCallsiteSample) { 110 NumFunctions++; 111 if (FS.getHeadSamples() > MaxFunctionCount) 112 MaxFunctionCount = FS.getHeadSamples(); 113 } else if (FS.getContext().hasAttribute( 114 sampleprof::ContextDuplicatedIntoBase)) { 115 // Do not recount callee samples if they are already merged into their base 116 // profiles. This can happen to CS nested profile. 117 return; 118 } 119 120 for (const auto &I : FS.getBodySamples()) { 121 uint64_t Count = I.second.getSamples(); 122 addCount(Count); 123 } 124 for (const auto &I : FS.getCallsiteSamples()) 125 for (const auto &CS : I.second) 126 addRecord(CS.second, true); 127 } 128 129 // The argument to this method is a vector of cutoff percentages and the return 130 // value is a vector of (Cutoff, MinCount, NumCounts) triplets. 131 void ProfileSummaryBuilder::computeDetailedSummary() { 132 if (DetailedSummaryCutoffs.empty()) 133 return; 134 llvm::sort(DetailedSummaryCutoffs); 135 auto Iter = CountFrequencies.begin(); 136 const auto End = CountFrequencies.end(); 137 138 uint32_t CountsSeen = 0; 139 uint64_t CurrSum = 0, Count = 0; 140 141 for (const uint32_t Cutoff : DetailedSummaryCutoffs) { 142 assert(Cutoff <= 999999); 143 APInt Temp(128, TotalCount); 144 APInt N(128, Cutoff); 145 APInt D(128, ProfileSummary::Scale); 146 Temp *= N; 147 Temp = Temp.sdiv(D); 148 uint64_t DesiredCount = Temp.getZExtValue(); 149 assert(DesiredCount <= TotalCount); 150 while (CurrSum < DesiredCount && Iter != End) { 151 Count = Iter->first; 152 uint32_t Freq = Iter->second; 153 CurrSum += (Count * Freq); 154 CountsSeen += Freq; 155 Iter++; 156 } 157 assert(CurrSum >= DesiredCount); 158 ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; 159 DetailedSummary.push_back(PSE); 160 } 161 } 162 163 uint64_t 164 ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { 165 auto &HotEntry = 166 ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot); 167 uint64_t HotCountThreshold = HotEntry.MinCount; 168 if (ProfileSummaryHotCount.getNumOccurrences() > 0) 169 HotCountThreshold = ProfileSummaryHotCount; 170 return HotCountThreshold; 171 } 172 173 uint64_t 174 ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { 175 auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( 176 DS, ProfileSummaryCutoffCold); 177 uint64_t ColdCountThreshold = ColdEntry.MinCount; 178 if (ProfileSummaryColdCount.getNumOccurrences() > 0) 179 ColdCountThreshold = ProfileSummaryColdCount; 180 return ColdCountThreshold; 181 } 182 183 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { 184 computeDetailedSummary(); 185 return std::make_unique<ProfileSummary>( 186 ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, 187 MaxFunctionCount, NumCounts, NumFunctions); 188 } 189 190 std::unique_ptr<ProfileSummary> 191 SampleProfileSummaryBuilder::computeSummaryForProfiles( 192 const SampleProfileMap &Profiles) { 193 assert(NumFunctions == 0 && 194 "This can only be called on an empty summary builder"); 195 sampleprof::SampleProfileMap ContextLessProfiles; 196 const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; 197 // For CSSPGO, context-sensitive profile effectively split a function profile 198 // into many copies each representing the CFG profile of a particular calling 199 // context. That makes the count distribution looks more flat as we now have 200 // more function profiles each with lower counts, which in turn leads to lower 201 // hot thresholds. To compensate for that, by default we merge context 202 // profiles before computing profile summary. 203 if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat && 204 !UseContextLessSummary.getNumOccurrences())) { 205 for (const auto &I : Profiles) { 206 ContextLessProfiles[I.second.getName()].merge(I.second); 207 } 208 ProfilesToUse = &ContextLessProfiles; 209 } 210 211 for (const auto &I : *ProfilesToUse) { 212 const sampleprof::FunctionSamples &Profile = I.second; 213 addRecord(Profile); 214 } 215 216 return getSummary(); 217 } 218 219 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { 220 computeDetailedSummary(); 221 return std::make_unique<ProfileSummary>( 222 ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, 223 MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); 224 } 225 226 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { 227 NumFunctions++; 228 229 // Skip invalid count. 230 if (Count == (uint64_t)-1) 231 return; 232 233 addCount(Count); 234 if (Count > MaxFunctionCount) 235 MaxFunctionCount = Count; 236 } 237 238 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { 239 // Skip invalid count. 240 if (Count == (uint64_t)-1) 241 return; 242 243 addCount(Count); 244 if (Count > MaxInternalBlockCount) 245 MaxInternalBlockCount = Count; 246 } 247