1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for computing profile summary data.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/IR/ProfileSummary.h"
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ProfileData/ProfileCommon.h"
16 #include "llvm/ProfileData/SampleProf.h"
17 #include "llvm/Support/CommandLine.h"
18
19 using namespace llvm;
20
21 cl::opt<bool> UseContextLessSummary(
22 "profile-summary-contextless", cl::Hidden,
23 cl::desc("Merge context profiles before calculating thresholds."));
24
25 // The following two parameters determine the threshold for a count to be
26 // considered hot/cold. These two parameters are percentile values (multiplied
27 // by 10000). If the counts are sorted in descending order, the minimum count to
28 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
29 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
30 // threshold for determining cold count (everything <= this threshold is
31 // considered cold).
32 cl::opt<int> ProfileSummaryCutoffHot(
33 "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000),
34 cl::desc("A count is hot if it exceeds the minimum count to"
35 " reach this percentile of total counts."));
36
37 cl::opt<int> ProfileSummaryCutoffCold(
38 "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999),
39 cl::desc("A count is cold if it is below the minimum count"
40 " to reach this percentile of total counts."));
41
42 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
43 "profile-summary-huge-working-set-size-threshold", cl::Hidden,
44 cl::init(15000),
45 cl::desc("The code working set size is considered huge if the number of"
46 " blocks required to reach the -profile-summary-cutoff-hot"
47 " percentile exceeds this count."));
48
49 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
50 "profile-summary-large-working-set-size-threshold", cl::Hidden,
51 cl::init(12500),
52 cl::desc("The code working set size is considered large if the number of"
53 " blocks required to reach the -profile-summary-cutoff-hot"
54 " percentile exceeds this count."));
55
56 // The next two options override the counts derived from summary computation and
57 // are useful for debugging purposes.
58 cl::opt<uint64_t> ProfileSummaryHotCount(
59 "profile-summary-hot-count", cl::ReallyHidden,
60 cl::desc("A fixed hot count that overrides the count derived from"
61 " profile-summary-cutoff-hot"));
62
63 cl::opt<uint64_t> ProfileSummaryColdCount(
64 "profile-summary-cold-count", cl::ReallyHidden,
65 cl::desc("A fixed cold count that overrides the count derived from"
66 " profile-summary-cutoff-cold"));
67
68 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
69 // (which is 1000000) is a desired percentile of total counts.
70 static const uint32_t DefaultCutoffsData[] = {
71 10000, /* 1% */
72 100000, /* 10% */
73 200000, 300000, 400000, 500000, 600000, 700000, 800000,
74 900000, 950000, 990000, 999000, 999900, 999990, 999999};
75 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
76 DefaultCutoffsData;
77
78 const ProfileSummaryEntry &
getEntryForPercentile(const SummaryEntryVector & DS,uint64_t Percentile)79 ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
80 uint64_t Percentile) {
81 auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
82 return Entry.Cutoff < Percentile;
83 });
84 // The required percentile has to be <= one of the percentiles in the
85 // detailed summary.
86 if (It == DS.end())
87 report_fatal_error("Desired percentile exceeds the maximum cutoff");
88 return *It;
89 }
90
addRecord(const InstrProfRecord & R)91 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
92 // The first counter is not necessarily an entry count for IR
93 // instrumentation profiles.
94 // Eventually MaxFunctionCount will become obsolete and this can be
95 // removed.
96 addEntryCount(R.Counts[0]);
97 for (size_t I = 1, E = R.Counts.size(); I < E; ++I)
98 addInternalCount(R.Counts[I]);
99 }
100
101 // To compute the detailed summary, we consider each line containing samples as
102 // equivalent to a block with a count in the instrumented profile.
addRecord(const sampleprof::FunctionSamples & FS,bool isCallsiteSample)103 void SampleProfileSummaryBuilder::addRecord(
104 const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
105 if (!isCallsiteSample) {
106 NumFunctions++;
107 if (FS.getHeadSamples() > MaxFunctionCount)
108 MaxFunctionCount = FS.getHeadSamples();
109 } else if (FS.getContext().hasAttribute(
110 sampleprof::ContextDuplicatedIntoBase)) {
111 // Do not recount callee samples if they are already merged into their base
112 // profiles. This can happen to CS nested profile.
113 return;
114 }
115
116 for (const auto &I : FS.getBodySamples()) {
117 uint64_t Count = I.second.getSamples();
118 addCount(Count);
119 }
120 for (const auto &I : FS.getCallsiteSamples())
121 for (const auto &CS : I.second)
122 addRecord(CS.second, true);
123 }
124
125 // The argument to this method is a vector of cutoff percentages and the return
126 // value is a vector of (Cutoff, MinCount, NumCounts) triplets.
computeDetailedSummary()127 void ProfileSummaryBuilder::computeDetailedSummary() {
128 if (DetailedSummaryCutoffs.empty())
129 return;
130 llvm::sort(DetailedSummaryCutoffs);
131 auto Iter = CountFrequencies.begin();
132 const auto End = CountFrequencies.end();
133
134 uint32_t CountsSeen = 0;
135 uint64_t CurrSum = 0, Count = 0;
136
137 for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
138 assert(Cutoff <= 999999);
139 APInt Temp(128, TotalCount);
140 APInt N(128, Cutoff);
141 APInt D(128, ProfileSummary::Scale);
142 Temp *= N;
143 Temp = Temp.sdiv(D);
144 uint64_t DesiredCount = Temp.getZExtValue();
145 assert(DesiredCount <= TotalCount);
146 while (CurrSum < DesiredCount && Iter != End) {
147 Count = Iter->first;
148 uint32_t Freq = Iter->second;
149 CurrSum += (Count * Freq);
150 CountsSeen += Freq;
151 Iter++;
152 }
153 assert(CurrSum >= DesiredCount);
154 ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen};
155 DetailedSummary.push_back(PSE);
156 }
157 }
158
159 uint64_t
getHotCountThreshold(const SummaryEntryVector & DS)160 ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
161 auto &HotEntry =
162 ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
163 uint64_t HotCountThreshold = HotEntry.MinCount;
164 if (ProfileSummaryHotCount.getNumOccurrences() > 0)
165 HotCountThreshold = ProfileSummaryHotCount;
166 return HotCountThreshold;
167 }
168
169 uint64_t
getColdCountThreshold(const SummaryEntryVector & DS)170 ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
171 auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
172 DS, ProfileSummaryCutoffCold);
173 uint64_t ColdCountThreshold = ColdEntry.MinCount;
174 if (ProfileSummaryColdCount.getNumOccurrences() > 0)
175 ColdCountThreshold = ProfileSummaryColdCount;
176 return ColdCountThreshold;
177 }
178
getSummary()179 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
180 computeDetailedSummary();
181 return std::make_unique<ProfileSummary>(
182 ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0,
183 MaxFunctionCount, NumCounts, NumFunctions);
184 }
185
186 std::unique_ptr<ProfileSummary>
computeSummaryForProfiles(const SampleProfileMap & Profiles)187 SampleProfileSummaryBuilder::computeSummaryForProfiles(
188 const SampleProfileMap &Profiles) {
189 assert(NumFunctions == 0 &&
190 "This can only be called on an empty summary builder");
191 sampleprof::SampleProfileMap ContextLessProfiles;
192 const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
193 // For CSSPGO, context-sensitive profile effectively split a function profile
194 // into many copies each representing the CFG profile of a particular calling
195 // context. That makes the count distribution looks more flat as we now have
196 // more function profiles each with lower counts, which in turn leads to lower
197 // hot thresholds. To compensate for that, by default we merge context
198 // profiles before computing profile summary.
199 if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
200 !UseContextLessSummary.getNumOccurrences())) {
201 for (const auto &I : Profiles) {
202 ContextLessProfiles[I.second.getName()].merge(I.second);
203 }
204 ProfilesToUse = &ContextLessProfiles;
205 }
206
207 for (const auto &I : *ProfilesToUse) {
208 const sampleprof::FunctionSamples &Profile = I.second;
209 addRecord(Profile);
210 }
211
212 return getSummary();
213 }
214
getSummary()215 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
216 computeDetailedSummary();
217 return std::make_unique<ProfileSummary>(
218 ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount,
219 MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions);
220 }
221
addEntryCount(uint64_t Count)222 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
223 NumFunctions++;
224
225 // Skip invalid count.
226 if (Count == (uint64_t)-1)
227 return;
228
229 addCount(Count);
230 if (Count > MaxFunctionCount)
231 MaxFunctionCount = Count;
232 }
233
addInternalCount(uint64_t Count)234 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
235 // Skip invalid count.
236 if (Count == (uint64_t)-1)
237 return;
238
239 addCount(Count);
240 if (Count > MaxInternalBlockCount)
241 MaxInternalBlockCount = Count;
242 }
243