1 //===- ProfileSummaryInfo.cpp - Global profile summary information --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that provides access to the global profile summary
10 // information.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/ProfileSummaryInfo.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/Metadata.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/ProfileSummary.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Support/CommandLine.h"
23 using namespace llvm;
24 
25 // The following two parameters determine the threshold for a count to be
26 // considered hot/cold. These two parameters are percentile values (multiplied
27 // by 10000). If the counts are sorted in descending order, the minimum count to
28 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
29 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
30 // threshold for determining cold count (everything <= this threshold is
31 // considered cold).
32 
33 static cl::opt<int> ProfileSummaryCutoffHot(
34     "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
35     cl::desc("A count is hot if it exceeds the minimum count to"
36              " reach this percentile of total counts."));
37 
38 static cl::opt<int> ProfileSummaryCutoffCold(
39     "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
40     cl::desc("A count is cold if it is below the minimum count"
41              " to reach this percentile of total counts."));
42 
43 static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
44     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
45     cl::init(15000), cl::ZeroOrMore,
46     cl::desc("The code working set size is considered huge if the number of"
47              " blocks required to reach the -profile-summary-cutoff-hot"
48              " percentile exceeds this count."));
49 
50 static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
51     "profile-summary-large-working-set-size-threshold", cl::Hidden,
52     cl::init(12500), cl::ZeroOrMore,
53     cl::desc("The code working set size is considered large if the number of"
54              " blocks required to reach the -profile-summary-cutoff-hot"
55              " percentile exceeds this count."));
56 
57 // The next two options override the counts derived from summary computation and
58 // are useful for debugging purposes.
59 static cl::opt<int> ProfileSummaryHotCount(
60     "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
61     cl::desc("A fixed hot count that overrides the count derived from"
62              " profile-summary-cutoff-hot"));
63 
64 static cl::opt<int> ProfileSummaryColdCount(
65     "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
66     cl::desc("A fixed cold count that overrides the count derived from"
67              " profile-summary-cutoff-cold"));
68 
69 static cl::opt<bool> PartialProfile(
70     "partial-profile", cl::Hidden, cl::init(false),
71     cl::desc("Specify the current profile is used as a partial profile."));
72 
73 cl::opt<bool> ScalePartialSampleProfileWorkingSetSize(
74     "scale-partial-sample-profile-working-set-size", cl::Hidden, cl::init(true),
75     cl::desc(
76         "If true, scale the working set size of the partial sample profile "
77         "by the partial profile ratio to reflect the size of the program "
78         "being compiled."));
79 
80 static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
81     "partial-sample-profile-working-set-size-scale-factor", cl::Hidden,
82     cl::init(0.008),
83     cl::desc("The scale factor used to scale the working set size of the "
84              "partial sample profile along with the partial profile ratio. "
85              "This includes the factor of the profile counter per block "
86              "and the factor to scale the working set size to use the same "
87              "shared thresholds as PGO."));
88 
89 // Find the summary entry for a desired percentile of counts.
90 static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
91                                                         uint64_t Percentile) {
92   auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
93     return Entry.Cutoff < Percentile;
94   });
95   // The required percentile has to be <= one of the percentiles in the
96   // detailed summary.
97   if (It == DS.end())
98     report_fatal_error("Desired percentile exceeds the maximum cutoff");
99   return *It;
100 }
101 
102 // The profile summary metadata may be attached either by the frontend or by
103 // any backend passes (IR level instrumentation, for example). This method
104 // checks if the Summary is null and if so checks if the summary metadata is now
105 // available in the module and parses it to get the Summary object.
106 void ProfileSummaryInfo::refresh() {
107   if (hasProfileSummary())
108     return;
109   // First try to get context sensitive ProfileSummary.
110   auto *SummaryMD = M.getProfileSummary(/* IsCS */ true);
111   if (SummaryMD)
112     Summary.reset(ProfileSummary::getFromMD(SummaryMD));
113 
114   if (!hasProfileSummary()) {
115     // This will actually return PSK_Instr or PSK_Sample summary.
116     SummaryMD = M.getProfileSummary(/* IsCS */ false);
117     if (SummaryMD)
118       Summary.reset(ProfileSummary::getFromMD(SummaryMD));
119   }
120   if (!hasProfileSummary())
121     return;
122   computeThresholds();
123 }
124 
125 Optional<uint64_t> ProfileSummaryInfo::getProfileCount(
126     const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const {
127   assert((isa<CallInst>(Call) || isa<InvokeInst>(Call)) &&
128          "We can only get profile count for call/invoke instruction.");
129   if (hasSampleProfile()) {
130     // In sample PGO mode, check if there is a profile metadata on the
131     // instruction. If it is present, determine hotness solely based on that,
132     // since the sampled entry count may not be accurate. If there is no
133     // annotated on the instruction, return None.
134     uint64_t TotalCount;
135     if (Call.extractProfTotalWeight(TotalCount))
136       return TotalCount;
137     return None;
138   }
139   if (BFI)
140     return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic);
141   return None;
142 }
143 
144 /// Returns true if the function's entry is hot. If it returns false, it
145 /// either means it is not hot or it is unknown whether it is hot or not (for
146 /// example, no profile data is available).
147 bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const {
148   if (!F || !hasProfileSummary())
149     return false;
150   auto FunctionCount = F->getEntryCount();
151   // FIXME: The heuristic used below for determining hotness is based on
152   // preliminary SPEC tuning for inliner. This will eventually be a
153   // convenience method that calls isHotCount.
154   return FunctionCount && isHotCount(FunctionCount.getCount());
155 }
156 
157 /// Returns true if the function contains hot code. This can include a hot
158 /// function entry count, hot basic block, or (in the case of Sample PGO)
159 /// hot total call edge count.
160 /// If it returns false, it either means it is not hot or it is unknown
161 /// (for example, no profile data is available).
162 bool ProfileSummaryInfo::isFunctionHotInCallGraph(
163     const Function *F, BlockFrequencyInfo &BFI) const {
164   if (!F || !hasProfileSummary())
165     return false;
166   if (auto FunctionCount = F->getEntryCount())
167     if (isHotCount(FunctionCount.getCount()))
168       return true;
169 
170   if (hasSampleProfile()) {
171     uint64_t TotalCallCount = 0;
172     for (const auto &BB : *F)
173       for (const auto &I : BB)
174         if (isa<CallInst>(I) || isa<InvokeInst>(I))
175           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
176             TotalCallCount += CallCount.getValue();
177     if (isHotCount(TotalCallCount))
178       return true;
179   }
180   for (const auto &BB : *F)
181     if (isHotBlock(&BB, &BFI))
182       return true;
183   return false;
184 }
185 
186 /// Returns true if the function only contains cold code. This means that
187 /// the function entry and blocks are all cold, and (in the case of Sample PGO)
188 /// the total call edge count is cold.
189 /// If it returns false, it either means it is not cold or it is unknown
190 /// (for example, no profile data is available).
191 bool ProfileSummaryInfo::isFunctionColdInCallGraph(
192     const Function *F, BlockFrequencyInfo &BFI) const {
193   if (!F || !hasProfileSummary())
194     return false;
195   if (auto FunctionCount = F->getEntryCount())
196     if (!isColdCount(FunctionCount.getCount()))
197       return false;
198 
199   if (hasSampleProfile()) {
200     uint64_t TotalCallCount = 0;
201     for (const auto &BB : *F)
202       for (const auto &I : BB)
203         if (isa<CallInst>(I) || isa<InvokeInst>(I))
204           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
205             TotalCallCount += CallCount.getValue();
206     if (!isColdCount(TotalCallCount))
207       return false;
208   }
209   for (const auto &BB : *F)
210     if (!isColdBlock(&BB, &BFI))
211       return false;
212   return true;
213 }
214 
215 bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const {
216   assert(hasPartialSampleProfile() && "Expect partial sample profile");
217   return !F.getEntryCount().hasValue();
218 }
219 
220 template <bool isHot>
221 bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
222     int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
223   if (!F || !hasProfileSummary())
224     return false;
225   if (auto FunctionCount = F->getEntryCount()) {
226     if (isHot &&
227         isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
228       return true;
229     if (!isHot &&
230         !isColdCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
231       return false;
232   }
233   if (hasSampleProfile()) {
234     uint64_t TotalCallCount = 0;
235     for (const auto &BB : *F)
236       for (const auto &I : BB)
237         if (isa<CallInst>(I) || isa<InvokeInst>(I))
238           if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
239             TotalCallCount += CallCount.getValue();
240     if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
241       return true;
242     if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount))
243       return false;
244   }
245   for (const auto &BB : *F) {
246     if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI))
247       return true;
248     if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &BFI))
249       return false;
250   }
251   return !isHot;
252 }
253 
254 // Like isFunctionHotInCallGraph but for a given cutoff.
255 bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
256     int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
257   return isFunctionHotOrColdInCallGraphNthPercentile<true>(
258       PercentileCutoff, F, BFI);
259 }
260 
261 bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile(
262     int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
263   return isFunctionHotOrColdInCallGraphNthPercentile<false>(
264       PercentileCutoff, F, BFI);
265 }
266 
267 /// Returns true if the function's entry is a cold. If it returns false, it
268 /// either means it is not cold or it is unknown whether it is cold or not (for
269 /// example, no profile data is available).
270 bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const {
271   if (!F)
272     return false;
273   if (F->hasFnAttribute(Attribute::Cold))
274     return true;
275   if (!hasProfileSummary())
276     return false;
277   auto FunctionCount = F->getEntryCount();
278   // FIXME: The heuristic used below for determining coldness is based on
279   // preliminary SPEC tuning for inliner. This will eventually be a
280   // convenience method that calls isHotCount.
281   return FunctionCount && isColdCount(FunctionCount.getCount());
282 }
283 
284 /// Compute the hot and cold thresholds.
285 void ProfileSummaryInfo::computeThresholds() {
286   auto &DetailedSummary = Summary->getDetailedSummary();
287   auto &HotEntry =
288       getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
289   HotCountThreshold = HotEntry.MinCount;
290   if (ProfileSummaryHotCount.getNumOccurrences() > 0)
291     HotCountThreshold = ProfileSummaryHotCount;
292   auto &ColdEntry =
293       getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
294   ColdCountThreshold = ColdEntry.MinCount;
295   if (ProfileSummaryColdCount.getNumOccurrences() > 0)
296     ColdCountThreshold = ProfileSummaryColdCount;
297   assert(ColdCountThreshold <= HotCountThreshold &&
298          "Cold count threshold cannot exceed hot count threshold!");
299   if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
300     HasHugeWorkingSetSize =
301         HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
302     HasLargeWorkingSetSize =
303         HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
304   } else {
305     // Scale the working set size of the partial sample profile to reflect the
306     // size of the program being compiled.
307     double PartialProfileRatio = Summary->getPartialProfileRatio();
308     uint64_t ScaledHotEntryNumCounts =
309         static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio *
310                               PartialSampleProfileWorkingSetSizeScaleFactor);
311     HasHugeWorkingSetSize =
312         ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
313     HasLargeWorkingSetSize =
314         ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
315   }
316 }
317 
318 Optional<uint64_t>
319 ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const {
320   if (!hasProfileSummary())
321     return None;
322   auto iter = ThresholdCache.find(PercentileCutoff);
323   if (iter != ThresholdCache.end()) {
324     return iter->second;
325   }
326   auto &DetailedSummary = Summary->getDetailedSummary();
327   auto &Entry =
328       getEntryForPercentile(DetailedSummary, PercentileCutoff);
329   uint64_t CountThreshold = Entry.MinCount;
330   ThresholdCache[PercentileCutoff] = CountThreshold;
331   return CountThreshold;
332 }
333 
334 bool ProfileSummaryInfo::hasHugeWorkingSetSize() const {
335   return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
336 }
337 
338 bool ProfileSummaryInfo::hasLargeWorkingSetSize() const {
339   return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
340 }
341 
342 bool ProfileSummaryInfo::isHotCount(uint64_t C) const {
343   return HotCountThreshold && C >= HotCountThreshold.getValue();
344 }
345 
346 bool ProfileSummaryInfo::isColdCount(uint64_t C) const {
347   return ColdCountThreshold && C <= ColdCountThreshold.getValue();
348 }
349 
350 template <bool isHot>
351 bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff,
352                                                        uint64_t C) const {
353   auto CountThreshold = computeThreshold(PercentileCutoff);
354   if (isHot)
355     return CountThreshold && C >= CountThreshold.getValue();
356   else
357     return CountThreshold && C <= CountThreshold.getValue();
358 }
359 
360 bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff,
361                                                  uint64_t C) const {
362   return isHotOrColdCountNthPercentile<true>(PercentileCutoff, C);
363 }
364 
365 bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff,
366                                                   uint64_t C) const {
367   return isHotOrColdCountNthPercentile<false>(PercentileCutoff, C);
368 }
369 
370 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const {
371   return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
372 }
373 
374 uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
375   return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
376 }
377 
378 bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,
379                                     BlockFrequencyInfo *BFI) const {
380   auto Count = BFI->getBlockProfileCount(BB);
381   return Count && isHotCount(*Count);
382 }
383 
384 bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
385                                      BlockFrequencyInfo *BFI) const {
386   auto Count = BFI->getBlockProfileCount(BB);
387   return Count && isColdCount(*Count);
388 }
389 
390 template <bool isHot>
391 bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(
392     int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
393   auto Count = BFI->getBlockProfileCount(BB);
394   if (isHot)
395     return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
396   else
397     return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
398 }
399 
400 bool ProfileSummaryInfo::isHotBlockNthPercentile(
401     int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
402   return isHotOrColdBlockNthPercentile<true>(PercentileCutoff, BB, BFI);
403 }
404 
405 bool ProfileSummaryInfo::isColdBlockNthPercentile(
406     int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
407   return isHotOrColdBlockNthPercentile<false>(PercentileCutoff, BB, BFI);
408 }
409 
410 bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB,
411                                        BlockFrequencyInfo *BFI) const {
412   auto C = getProfileCount(CB, BFI);
413   return C && isHotCount(*C);
414 }
415 
416 bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB,
417                                         BlockFrequencyInfo *BFI) const {
418   auto C = getProfileCount(CB, BFI);
419   if (C)
420     return isColdCount(*C);
421 
422   // In SamplePGO, if the caller has been sampled, and there is no profile
423   // annotated on the callsite, we consider the callsite as cold.
424   return hasSampleProfile() && CB.getCaller()->hasProfileData();
425 }
426 
427 bool ProfileSummaryInfo::hasPartialSampleProfile() const {
428   return hasProfileSummary() &&
429          Summary->getKind() == ProfileSummary::PSK_Sample &&
430          (PartialProfile || Summary->isPartialProfile());
431 }
432 
433 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
434                 "Profile summary info", false, true)
435 
436 ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass()
437     : ImmutablePass(ID) {
438   initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
439 }
440 
441 bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) {
442   PSI.reset(new ProfileSummaryInfo(M));
443   return false;
444 }
445 
446 bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) {
447   PSI.reset();
448   return false;
449 }
450 
451 AnalysisKey ProfileSummaryAnalysis::Key;
452 ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M,
453                                                ModuleAnalysisManager &) {
454   return ProfileSummaryInfo(M);
455 }
456 
457 PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M,
458                                                  ModuleAnalysisManager &AM) {
459   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
460 
461   OS << "Functions in " << M.getName() << " with hot/cold annotations: \n";
462   for (auto &F : M) {
463     OS << F.getName();
464     if (PSI.isFunctionEntryHot(&F))
465       OS << " :hot entry ";
466     else if (PSI.isFunctionEntryCold(&F))
467       OS << " :cold entry ";
468     OS << "\n";
469   }
470   return PreservedAnalyses::all();
471 }
472 
473 char ProfileSummaryInfoWrapperPass::ID = 0;
474