1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ErrorHandling.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/ProfileData/ProfileCommon.h"
13 #include <float.h>
14 #include <unordered_set>
15 
16 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
17                                     cl::Required,
18                                     cl::desc("Output profile file"));
19 static cl::alias OutputA("o", cl::desc("Alias for --output"),
20                          cl::aliasopt(OutputFilename));
21 
22 static cl::opt<SampleProfileFormat> OutputFormat(
23     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
24     cl::values(
25         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
26         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
27         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
28         clEnumValN(SPF_Text, "text", "Text encoding"),
29         clEnumValN(SPF_GCC, "gcc",
30                    "GCC encoding (only meaningful for -sample)")));
31 
32 cl::opt<bool> UseMD5(
33     "use-md5", cl::init(false), cl::Hidden,
34     cl::desc("Use md5 to represent function names in the output profile (only "
35              "meaningful for -extbinary)"));
36 
37 static cl::opt<bool> PopulateProfileSymbolList(
38     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
39     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
40 
41 static cl::opt<bool> FillZeroForAllFuncs(
42     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
43     cl::desc("Attribute all functions' range with zero count "
44              "even it's not hit by any samples."));
45 
46 static cl::opt<int32_t, true> RecursionCompression(
47     "compress-recursion",
48     cl::desc("Compressing recursion by deduplicating adjacent frame "
49              "sequences up to the specified size. -1 means no size limit."),
50     cl::Hidden,
51     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
52 
53 static cl::opt<bool>
54     TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
55                     cl::desc("If the total count of the profile is smaller "
56                              "than threshold, it will be trimmed."));
57 
58 static cl::opt<bool> CSProfMergeColdContext(
59     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
60     cl::desc("If the total count of context profile is smaller than "
61              "the threshold, it will be merged into context-less base "
62              "profile."));
63 
64 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
65     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
66     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
67              "context-less base profile"));
68 
69 static cl::opt<int, true> CSProfMaxContextDepth(
70     "csprof-max-context-depth", cl::ZeroOrMore,
71     cl::desc("Keep the last K contexts while merging profile. -1 means no "
72              "depth limit."),
73     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
74 
75 static cl::opt<double> HotFunctionDensityThreshold(
76     "hot-function-density-threshold", llvm::cl::init(1000),
77     llvm::cl::desc(
78         "specify density threshold for hot functions (default: 1000)"),
79     llvm::cl::Optional);
80 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
81                                  llvm::cl::desc("show profile density details"),
82                                  llvm::cl::Optional);
83 
84 static cl::opt<bool> UpdateTotalSamples(
85     "update-total-samples", llvm::cl::init(false),
86     llvm::cl::desc(
87         "Update total samples by accumulating all its body samples."),
88     llvm::cl::Optional);
89 
90 extern cl::opt<int> ProfileSummaryCutoffHot;
91 
92 static cl::opt<bool> GenCSNestedProfile(
93     "gen-cs-nested-profile", cl::Hidden, cl::init(false),
94     cl::desc("Generate nested function profiles for CSSPGO"));
95 
96 using namespace llvm;
97 using namespace sampleprof;
98 
99 namespace llvm {
100 namespace sampleprof {
101 
102 // Initialize the MaxCompressionSize to -1 which means no size limit
103 int32_t CSProfileGenerator::MaxCompressionSize = -1;
104 
105 int CSProfileGenerator::MaxContextDepth = -1;
106 
107 bool ProfileGeneratorBase::UseFSDiscriminator = false;
108 
109 std::unique_ptr<ProfileGeneratorBase>
110 ProfileGeneratorBase::create(ProfiledBinary *Binary,
111                              const ContextSampleCounterMap &SampleCounters,
112                              bool ProfileIsCSFlat) {
113   std::unique_ptr<ProfileGeneratorBase> Generator;
114   if (ProfileIsCSFlat) {
115     if (Binary->useFSDiscriminator())
116       exitWithError("FS discriminator is not supported in CS profile.");
117     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
118   } else {
119     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
120   }
121   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
122   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
123 
124   return Generator;
125 }
126 
127 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
128                                  SampleProfileMap &ProfileMap) {
129   // Populate profile symbol list if extended binary format is used.
130   ProfileSymbolList SymbolList;
131 
132   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
133     Binary->populateSymbolListFromDWARF(SymbolList);
134     Writer->setProfileSymbolList(&SymbolList);
135   }
136 
137   if (std::error_code EC = Writer->write(ProfileMap))
138     exitWithError(std::move(EC));
139 }
140 
141 void ProfileGeneratorBase::write() {
142   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
143   if (std::error_code EC = WriterOrErr.getError())
144     exitWithError(EC, OutputFilename);
145 
146   if (UseMD5) {
147     if (OutputFormat != SPF_Ext_Binary)
148       WithColor::warning() << "-use-md5 is ignored. Specify "
149                               "--format=extbinary to enable it\n";
150     else
151       WriterOrErr.get()->setUseMD5();
152   }
153 
154   write(std::move(WriterOrErr.get()), ProfileMap);
155 }
156 
157 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
158   if (Density == 0.0)
159     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
160                             "set too low. Please check your command.\n";
161   else if (Density < HotFunctionDensityThreshold)
162     WithColor::warning()
163         << "AutoFDO is estimated to optimize better with "
164         << format("%.1f", HotFunctionDensityThreshold / Density)
165         << "x more samples. Please consider increasing sampling rate or "
166            "profiling for longer duration to get more samples.\n";
167 
168   if (ShowDensity)
169     outs() << "Minimum profile density for hot functions with top "
170            << format("%.2f",
171                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
172                          10000)
173            << "% total samples: " << format("%.1f", Density) << "\n";
174 }
175 
176 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
177                                               uint64_t HotCntThreshold) {
178   double Density = DBL_MAX;
179   std::vector<const FunctionSamples *> HotFuncs;
180   for (auto &I : Profiles) {
181     auto &FuncSamples = I.second;
182     if (FuncSamples.getTotalSamples() < HotCntThreshold)
183       continue;
184     HotFuncs.emplace_back(&FuncSamples);
185   }
186 
187   for (auto *FuncSamples : HotFuncs) {
188     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
189     if (!Func)
190       continue;
191     uint64_t FuncSize = Func->getFuncSize();
192     if (FuncSize == 0)
193       continue;
194     Density =
195         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
196                               FuncSize);
197   }
198 
199   return Density == DBL_MAX ? 0.0 : Density;
200 }
201 
202 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
203                                               const RangeSample &Ranges) {
204 
205   /*
206   Regions may overlap with each other. Using the boundary info, find all
207   disjoint ranges and their sample count. BoundaryPoint contains the count
208   multiple samples begin/end at this points.
209 
210   |<--100-->|           Sample1
211   |<------200------>|   Sample2
212   A         B       C
213 
214   In the example above,
215   Sample1 begins at A, ends at B, its value is 100.
216   Sample2 beings at A, ends at C, its value is 200.
217   For A, BeginCount is the sum of sample begins at A, which is 300 and no
218   samples ends at A, so EndCount is 0.
219   Then boundary points A, B, and C with begin/end counts are:
220   A: (300, 0)
221   B: (0, 100)
222   C: (0, 200)
223   */
224   struct BoundaryPoint {
225     // Sum of sample counts beginning at this point
226     uint64_t BeginCount = UINT64_MAX;
227     // Sum of sample counts ending at this point
228     uint64_t EndCount = UINT64_MAX;
229     // Is the begin point of a zero range.
230     bool IsZeroRangeBegin = false;
231     // Is the end point of a zero range.
232     bool IsZeroRangeEnd = false;
233 
234     void addBeginCount(uint64_t Count) {
235       if (BeginCount == UINT64_MAX)
236         BeginCount = 0;
237       BeginCount += Count;
238     }
239 
240     void addEndCount(uint64_t Count) {
241       if (EndCount == UINT64_MAX)
242         EndCount = 0;
243       EndCount += Count;
244     }
245   };
246 
247   /*
248   For the above example. With boundary points, follwing logic finds two
249   disjoint region of
250 
251   [A,B]:   300
252   [B+1,C]: 200
253 
254   If there is a boundary point that both begin and end, the point itself
255   becomes a separate disjoint region. For example, if we have original
256   ranges of
257 
258   |<--- 100 --->|
259                 |<--- 200 --->|
260   A             B             C
261 
262   there are three boundary points with their begin/end counts of
263 
264   A: (100, 0)
265   B: (200, 100)
266   C: (0, 200)
267 
268   the disjoint ranges would be
269 
270   [A, B-1]: 100
271   [B, B]:   300
272   [B+1, C]: 200.
273 
274   Example for zero value range:
275 
276     |<--- 100 --->|
277                        |<--- 200 --->|
278   |<---------------  0 ----------------->|
279   A  B            C    D             E   F
280 
281   [A, B-1]  : 0
282   [B, C]    : 100
283   [C+1, D-1]: 0
284   [D, E]    : 200
285   [E+1, F]  : 0
286   */
287   std::map<uint64_t, BoundaryPoint> Boundaries;
288 
289   for (const auto &Item : Ranges) {
290     assert(Item.first.first <= Item.first.second &&
291            "Invalid instruction range");
292     auto &BeginPoint = Boundaries[Item.first.first];
293     auto &EndPoint = Boundaries[Item.first.second];
294     uint64_t Count = Item.second;
295 
296     BeginPoint.addBeginCount(Count);
297     EndPoint.addEndCount(Count);
298     if (Count == 0) {
299       BeginPoint.IsZeroRangeBegin = true;
300       EndPoint.IsZeroRangeEnd = true;
301     }
302   }
303 
304   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
305   // and the next valid address
306   uint64_t BeginAddress = UINT64_MAX;
307   int ZeroRangeDepth = 0;
308   uint64_t Count = 0;
309   for (const auto &Item : Boundaries) {
310     uint64_t Address = Item.first;
311     const BoundaryPoint &Point = Item.second;
312     if (Point.BeginCount != UINT64_MAX) {
313       if (BeginAddress != UINT64_MAX)
314         DisjointRanges[{BeginAddress, Address - 1}] = Count;
315       Count += Point.BeginCount;
316       BeginAddress = Address;
317       ZeroRangeDepth += Point.IsZeroRangeBegin;
318     }
319     if (Point.EndCount != UINT64_MAX) {
320       assert((BeginAddress != UINT64_MAX) &&
321              "First boundary point cannot be 'end' point");
322       DisjointRanges[{BeginAddress, Address}] = Count;
323       assert(Count >= Point.EndCount && "Mismatched live ranges");
324       Count -= Point.EndCount;
325       BeginAddress = Address + 1;
326       ZeroRangeDepth -= Point.IsZeroRangeEnd;
327       // If the remaining count is zero and it's no longer in a zero range, this
328       // means we consume all the ranges before, thus mark BeginAddress as
329       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
330       //  [<---- 10 ---->]
331       //                       [<---- 20 ---->]
332       //   A             B     C              D
333       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
334       // have the [B+1, C-1] zero range.
335       if (Count == 0 && ZeroRangeDepth == 0)
336         BeginAddress = UINT64_MAX;
337     }
338   }
339 }
340 
341 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
342     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
343     uint64_t Count) {
344   // Use the maximum count of samples with same line location
345   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
346 
347   // Use duplication factor to compensated for loop unroll/vectorization.
348   // Note that this is only needed when we're taking MAX of the counts at
349   // the location instead of SUM.
350   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
351 
352   ErrorOr<uint64_t> R =
353       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
354 
355   uint64_t PreviousCount = R ? R.get() : 0;
356   if (PreviousCount <= Count) {
357     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
358                                    Count - PreviousCount);
359   }
360 }
361 
362 void ProfileGeneratorBase::updateTotalSamples() {
363   if (!UpdateTotalSamples)
364     return;
365 
366   for (auto &Item : ProfileMap) {
367     FunctionSamples &FunctionProfile = Item.second;
368     FunctionProfile.updateTotalSamples();
369   }
370 }
371 
372 FunctionSamples &
373 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
374   SampleContext Context(FuncName);
375   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
376   if (Ret.second) {
377     FunctionSamples &FProfile = Ret.first->second;
378     FProfile.setContext(Context);
379   }
380   return Ret.first->second;
381 }
382 
383 void ProfileGenerator::generateProfile() {
384   if (Binary->usePseudoProbes()) {
385     // TODO: Support probe based profile generation
386     exitWithError("Probe based profile generation not supported for AutoFDO, "
387       "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
388   } else {
389     generateLineNumBasedProfile();
390   }
391   postProcessProfiles();
392 }
393 
394 void ProfileGenerator::postProcessProfiles() {
395   computeSummaryAndThreshold();
396   trimColdProfiles(ProfileMap, ColdCountThreshold);
397   calculateAndShowDensity(ProfileMap);
398 }
399 
400 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
401                                         uint64_t ColdCntThreshold) {
402   if (!TrimColdProfile)
403     return;
404 
405   // Move cold profiles into a tmp container.
406   std::vector<SampleContext> ColdProfiles;
407   for (const auto &I : ProfileMap) {
408     if (I.second.getTotalSamples() < ColdCntThreshold)
409       ColdProfiles.emplace_back(I.first);
410   }
411 
412   // Remove the cold profile from ProfileMap.
413   for (const auto &I : ColdProfiles)
414     ProfileMap.erase(I);
415 }
416 
417 void ProfileGenerator::generateLineNumBasedProfile() {
418   assert(SampleCounters.size() == 1 &&
419          "Must have one entry for profile generation.");
420   const SampleCounter &SC = SampleCounters.begin()->second;
421   // Fill in function body samples
422   populateBodySamplesForAllFunctions(SC.RangeCounter);
423   // Fill in boundary sample counts as well as call site samples for calls
424   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
425 
426   updateTotalSamples();
427 }
428 
429 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
430     const SampleContextFrameVector &FrameVec, uint64_t Count) {
431   // Get top level profile
432   FunctionSamples *FunctionProfile =
433       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
434   FunctionProfile->addTotalSamples(Count);
435 
436   for (size_t I = 1; I < FrameVec.size(); I++) {
437     LineLocation Callsite(
438         FrameVec[I - 1].Location.LineOffset,
439         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
440     FunctionSamplesMap &SamplesMap =
441         FunctionProfile->functionSamplesAt(Callsite);
442     auto Ret =
443         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
444     if (Ret.second) {
445       SampleContext Context(FrameVec[I].FuncName);
446       Ret.first->second.setContext(Context);
447     }
448     FunctionProfile = &Ret.first->second;
449     FunctionProfile->addTotalSamples(Count);
450   }
451 
452   return *FunctionProfile;
453 }
454 
455 RangeSample
456 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
457   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
458   if (FillZeroForAllFuncs) {
459     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
460       for (auto &R : FuncI.second.Ranges) {
461         Ranges[{R.first, R.second - 1}] += 0;
462       }
463     }
464   } else {
465     // For each range, we search for all ranges of the function it belongs to
466     // and initialize it with zero count, so it remains zero if doesn't hit any
467     // samples. This is to be consistent with compiler that interpret zero count
468     // as unexecuted(cold).
469     for (const auto &I : RangeCounter) {
470       uint64_t StartOffset = I.first.first;
471       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
472         Ranges[{Range.first, Range.second - 1}] += 0;
473     }
474   }
475   RangeSample DisjointRanges;
476   findDisjointRanges(DisjointRanges, Ranges);
477   return DisjointRanges;
478 }
479 
480 void ProfileGenerator::populateBodySamplesForAllFunctions(
481     const RangeSample &RangeCounter) {
482   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
483     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
484     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
485     uint64_t Count = Range.second;
486 
487     InstructionPointer IP(Binary, RangeBegin, true);
488     // Disjoint ranges may have range in the middle of two instr,
489     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
490     // can be Addr1+1 to Addr2-1. We should ignore such range.
491     if (IP.Address > RangeEnd)
492       continue;
493 
494     do {
495       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
496       const SampleContextFrameVector &FrameVec =
497           Binary->getFrameLocationStack(Offset);
498       if (!FrameVec.empty()) {
499         // FIXME: As accumulating total count per instruction caused some
500         // regression, we changed to accumulate total count per byte as a
501         // workaround. Tuning hotness threshold on the compiler side might be
502         // necessary in the future.
503         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
504             FrameVec, Count * Binary->getInstSize(Offset));
505         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
506                                             Count);
507       }
508     } while (IP.advance() && IP.Address <= RangeEnd);
509   }
510 }
511 
512 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
513   // Get the function range by branch target if it's a call branch.
514   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
515 
516   // We won't accumulate sample count for a range whose start is not the real
517   // function entry such as outlined function or inner labels.
518   if (!FRange || !FRange->IsFuncEntry)
519     return StringRef();
520 
521   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
522 }
523 
524 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
525     const BranchSample &BranchCounters) {
526   for (const auto &Entry : BranchCounters) {
527     uint64_t SourceOffset = Entry.first.first;
528     uint64_t TargetOffset = Entry.first.second;
529     uint64_t Count = Entry.second;
530     assert(Count != 0 && "Unexpected zero weight branch");
531 
532     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
533     if (CalleeName.size() == 0)
534       continue;
535     // Record called target sample and its count.
536     const SampleContextFrameVector &FrameVec =
537         Binary->getFrameLocationStack(SourceOffset);
538     if (!FrameVec.empty()) {
539       FunctionSamples &FunctionProfile =
540           getLeafProfileAndAddTotalSamples(FrameVec, 0);
541       FunctionProfile.addCalledTargetSamples(
542           FrameVec.back().Location.LineOffset,
543           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
544           CalleeName, Count);
545     }
546     // Add head samples for callee.
547     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
548     CalleeProfile.addHeadSamples(Count);
549   }
550 }
551 
552 void ProfileGeneratorBase::calculateAndShowDensity(
553     const SampleProfileMap &Profiles) {
554   double Density = calculateDensity(Profiles, HotCountThreshold);
555   showDensitySuggestion(Density);
556 }
557 
558 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
559     const SampleContextFrameVector &Context, bool WasLeafInlined) {
560   auto I = ProfileMap.find(SampleContext(Context));
561   if (I == ProfileMap.end()) {
562     // Save the new context for future references.
563     SampleContextFrames NewContext = *Contexts.insert(Context).first;
564     SampleContext FContext(NewContext, RawContext);
565     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
566     if (WasLeafInlined)
567       FContext.setAttribute(ContextWasInlined);
568     FunctionSamples &FProfile = Ret.first->second;
569     FProfile.setContext(FContext);
570     return Ret.first->second;
571   }
572   return I->second;
573 }
574 
575 void CSProfileGenerator::generateProfile() {
576   FunctionSamples::ProfileIsCSFlat = true;
577 
578   if (Binary->getTrackFuncContextSize())
579     computeSizeForProfiledFunctions();
580 
581   if (Binary->usePseudoProbes()) {
582     // Enable pseudo probe functionalities in SampleProf
583     FunctionSamples::ProfileIsProbeBased = true;
584     generateProbeBasedProfile();
585   } else {
586     generateLineNumBasedProfile();
587   }
588   postProcessProfiles();
589 }
590 
591 void CSProfileGenerator::computeSizeForProfiledFunctions() {
592   // Hash map to deduplicate the function range and the item is a pair of
593   // function start and end offset.
594   std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
595   // Go through all the ranges in the CS counters, use the start of the range to
596   // look up the function it belongs and record the function range.
597   for (const auto &CI : SampleCounters) {
598     for (const auto &Item : CI.second.RangeCounter) {
599       // FIXME: Filter the bogus crossing function range.
600       uint64_t StartOffset = Item.first.first;
601       // Note that a function can be spilt into multiple ranges, so get all
602       // ranges of the function.
603       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
604         AggregatedRanges[Range.first] = Range.second;
605     }
606   }
607 
608   for (const auto &I : AggregatedRanges) {
609     uint64_t StartOffset = I.first;
610     uint64_t EndOffset = I.second;
611     Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
612   }
613 }
614 
615 void CSProfileGenerator::generateLineNumBasedProfile() {
616   for (const auto &CI : SampleCounters) {
617     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
618 
619     // Get or create function profile for the range
620     FunctionSamples &FunctionProfile =
621         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
622 
623     // Fill in function body samples
624     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
625     // Fill in boundary sample counts as well as call site samples for calls
626     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
627                                        CI.second.BranchCounter);
628   }
629   // Fill in call site value sample for inlined calls and also use context to
630   // infer missing samples. Since we don't have call count for inlined
631   // functions, we estimate it from inlinee's profile using the entry of the
632   // body sample.
633   populateInferredFunctionSamples();
634 
635   updateTotalSamples();
636 }
637 
638 void CSProfileGenerator::populateBodySamplesForFunction(
639     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
640   // Compute disjoint ranges first, so we can use MAX
641   // for calculating count for each location.
642   RangeSample Ranges;
643   findDisjointRanges(Ranges, RangeCounter);
644   for (const auto &Range : Ranges) {
645     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
646     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
647     uint64_t Count = Range.second;
648     // Disjoint ranges have introduce zero-filled gap that
649     // doesn't belong to current context, filter them out.
650     if (Count == 0)
651       continue;
652 
653     InstructionPointer IP(Binary, RangeBegin, true);
654     // Disjoint ranges may have range in the middle of two instr,
655     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
656     // can be Addr1+1 to Addr2-1. We should ignore such range.
657     if (IP.Address > RangeEnd)
658       continue;
659 
660     do {
661       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
662       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
663       if (LeafLoc.hasValue()) {
664         // Recording body sample for this specific context
665         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
666         FunctionProfile.addTotalSamples(Count);
667       }
668     } while (IP.advance() && IP.Address <= RangeEnd);
669   }
670 }
671 
672 void CSProfileGenerator::populateBoundarySamplesForFunction(
673     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
674     const BranchSample &BranchCounters) {
675 
676   for (const auto &Entry : BranchCounters) {
677     uint64_t SourceOffset = Entry.first.first;
678     uint64_t TargetOffset = Entry.first.second;
679     uint64_t Count = Entry.second;
680     assert(Count != 0 && "Unexpected zero weight branch");
681 
682     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
683     if (CalleeName.size() == 0)
684       continue;
685 
686     // Record called target sample and its count
687     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
688     if (!LeafLoc.hasValue())
689       continue;
690     FunctionProfile.addCalledTargetSamples(
691         LeafLoc->Location.LineOffset,
692         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
693         Count);
694 
695     // Record head sample for called target(callee)
696     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
697     assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
698            "Leaf function name doesn't match");
699     CalleeCtx.back() = *LeafLoc;
700     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
701     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
702     CalleeProfile.addHeadSamples(Count);
703   }
704 }
705 
706 static SampleContextFrame
707 getCallerContext(SampleContextFrames CalleeContext,
708                  SampleContextFrameVector &CallerContext) {
709   assert(CalleeContext.size() > 1 && "Unexpected empty context");
710   CalleeContext = CalleeContext.drop_back();
711   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
712   SampleContextFrame CallerFrame = CallerContext.back();
713   CallerContext.back().Location = LineLocation(0, 0);
714   return CallerFrame;
715 }
716 
717 void CSProfileGenerator::populateInferredFunctionSamples() {
718   for (const auto &Item : ProfileMap) {
719     const auto &CalleeContext = Item.first;
720     const FunctionSamples &CalleeProfile = Item.second;
721 
722     // If we already have head sample counts, we must have value profile
723     // for call sites added already. Skip to avoid double counting.
724     if (CalleeProfile.getHeadSamples())
725       continue;
726     // If we don't have context, nothing to do for caller's call site.
727     // This could happen for entry point function.
728     if (CalleeContext.isBaseContext())
729       continue;
730 
731     // Infer Caller's frame loc and context ID through string splitting
732     SampleContextFrameVector CallerContextId;
733     SampleContextFrame &&CallerLeafFrameLoc =
734         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
735     SampleContextFrames CallerContext(CallerContextId);
736 
737     // It's possible that we haven't seen any sample directly in the caller,
738     // in which case CallerProfile will not exist. But we can't modify
739     // ProfileMap while iterating it.
740     // TODO: created function profile for those callers too
741     if (ProfileMap.find(CallerContext) == ProfileMap.end())
742       continue;
743     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
744 
745     // Since we don't have call count for inlined functions, we
746     // estimate it from inlinee's profile using entry body sample.
747     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
748     // If we don't have samples with location, use 1 to indicate live.
749     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
750       EstimatedCallCount = 1;
751     CallerProfile.addCalledTargetSamples(
752         CallerLeafFrameLoc.Location.LineOffset,
753         CallerLeafFrameLoc.Location.Discriminator,
754         CalleeProfile.getContext().getName(), EstimatedCallCount);
755     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
756                                  CallerLeafFrameLoc.Location.Discriminator,
757                                  EstimatedCallCount);
758     CallerProfile.addTotalSamples(EstimatedCallCount);
759   }
760 }
761 
762 void CSProfileGenerator::postProcessProfiles() {
763   // Compute hot/cold threshold based on profile. This will be used for cold
764   // context profile merging/trimming.
765   computeSummaryAndThreshold();
766 
767   // Run global pre-inliner to adjust/merge context profile based on estimated
768   // inline decisions.
769   if (EnableCSPreInliner) {
770     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
771         .run();
772     // Turn off the profile merger by default unless it is explicitly enabled.
773     if (!CSProfMergeColdContext.getNumOccurrences())
774       CSProfMergeColdContext = false;
775   }
776 
777   // Trim and merge cold context profile using cold threshold above.
778   if (TrimColdProfile || CSProfMergeColdContext) {
779     SampleContextTrimmer(ProfileMap)
780         .trimAndMergeColdContextProfiles(
781             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
782             CSProfMaxColdContextDepth, EnableCSPreInliner);
783   }
784 
785   // Merge function samples of CS profile to calculate profile density.
786   sampleprof::SampleProfileMap ContextLessProfiles;
787   for (const auto &I : ProfileMap) {
788     ContextLessProfiles[I.second.getName()].merge(I.second);
789   }
790 
791   calculateAndShowDensity(ContextLessProfiles);
792   if (GenCSNestedProfile) {
793     CSProfileConverter CSConverter(ProfileMap);
794     CSConverter.convertProfiles();
795     FunctionSamples::ProfileIsCSFlat = false;
796     FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
797   }
798 }
799 
800 void ProfileGeneratorBase::computeSummaryAndThreshold() {
801   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
802   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
803   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
804       (Summary->getDetailedSummary()));
805   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
806       (Summary->getDetailedSummary()));
807 }
808 
809 // Helper function to extract context prefix string stack
810 // Extract context stack for reusing, leaf context stack will
811 // be added compressed while looking up function profile
812 static void extractPrefixContextStack(
813     SampleContextFrameVector &ContextStack,
814     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
815     ProfiledBinary *Binary) {
816   for (const auto *P : Probes) {
817     Binary->getInlineContextForProbe(P, ContextStack, true);
818   }
819 }
820 
821 void CSProfileGenerator::generateProbeBasedProfile() {
822   for (const auto &CI : SampleCounters) {
823     const ProbeBasedCtxKey *CtxKey =
824         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
825     SampleContextFrameVector ContextStack;
826     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
827     // Fill in function body samples from probes, also infer caller's samples
828     // from callee's probe
829     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
830     // Fill in boundary samples for a call probe
831     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
832   }
833 }
834 
835 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
836                                                 ProbeCounterMap &ProbeCounter) {
837   RangeSample Ranges;
838   findDisjointRanges(Ranges, RangeCounter);
839   for (const auto &Range : Ranges) {
840     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
841     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
842     uint64_t Count = Range.second;
843     // Disjoint ranges have introduce zero-filled gap that
844     // doesn't belong to current context, filter them out.
845     if (Count == 0)
846       continue;
847 
848     InstructionPointer IP(Binary, RangeBegin, true);
849     // Disjoint ranges may have range in the middle of two instr,
850     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
851     // can be Addr1+1 to Addr2-1. We should ignore such range.
852     if (IP.Address > RangeEnd)
853       continue;
854 
855     do {
856       const AddressProbesMap &Address2ProbesMap =
857           Binary->getAddress2ProbesMap();
858       auto It = Address2ProbesMap.find(IP.Address);
859       if (It != Address2ProbesMap.end()) {
860         for (const auto &Probe : It->second) {
861           if (!Probe.isBlock())
862             continue;
863           ProbeCounter[&Probe] += Count;
864         }
865       }
866     } while (IP.advance() && IP.Address <= RangeEnd);
867   }
868 }
869 
870 void CSProfileGenerator::populateBodySamplesWithProbes(
871     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
872   ProbeCounterMap ProbeCounter;
873   // Extract the top frame probes by looking up each address among the range in
874   // the Address2ProbeMap
875   extractProbesFromRange(RangeCounter, ProbeCounter);
876   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
877                      std::unordered_set<FunctionSamples *>>
878       FrameSamples;
879   for (const auto &PI : ProbeCounter) {
880     const MCDecodedPseudoProbe *Probe = PI.first;
881     uint64_t Count = PI.second;
882     FunctionSamples &FunctionProfile =
883         getFunctionProfileForLeafProbe(ContextStack, Probe);
884     // Record the current frame and FunctionProfile whenever samples are
885     // collected for non-danglie probes. This is for reporting all of the
886     // zero count probes of the frame later.
887     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
888     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
889     FunctionProfile.addTotalSamples(Count);
890     if (Probe->isEntry()) {
891       FunctionProfile.addHeadSamples(Count);
892       // Look up for the caller's function profile
893       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
894       SampleContextFrames CalleeContextId =
895           FunctionProfile.getContext().getContextFrames();
896       if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
897         // Since the context id will be compressed, we have to use callee's
898         // context id to infer caller's context id to ensure they share the
899         // same context prefix.
900         SampleContextFrameVector CallerContextId;
901         SampleContextFrame &&CallerLeafFrameLoc =
902             getCallerContext(CalleeContextId, CallerContextId);
903         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
904         assert(CallerIndex &&
905                "Inferred caller's location index shouldn't be zero!");
906         FunctionSamples &CallerProfile =
907             getFunctionProfileForContext(CallerContextId);
908         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
909         CallerProfile.addBodySamples(CallerIndex, 0, Count);
910         CallerProfile.addTotalSamples(Count);
911         CallerProfile.addCalledTargetSamples(
912             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
913       }
914     }
915   }
916 
917   // Assign zero count for remaining probes without sample hits to
918   // differentiate from probes optimized away, of which the counts are unknown
919   // and will be inferred by the compiler.
920   for (auto &I : FrameSamples) {
921     for (auto *FunctionProfile : I.second) {
922       for (auto *Probe : I.first->getProbes()) {
923         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
924       }
925     }
926   }
927 }
928 
929 void CSProfileGenerator::populateBoundarySamplesWithProbes(
930     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
931   for (const auto &BI : BranchCounter) {
932     uint64_t SourceOffset = BI.first.first;
933     uint64_t TargetOffset = BI.first.second;
934     uint64_t Count = BI.second;
935     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
936     const MCDecodedPseudoProbe *CallProbe =
937         Binary->getCallProbeForAddr(SourceAddress);
938     if (CallProbe == nullptr)
939       continue;
940     FunctionSamples &FunctionProfile =
941         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
942     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
943     FunctionProfile.addTotalSamples(Count);
944     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
945     if (CalleeName.size() == 0)
946       continue;
947     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
948                                            Count);
949   }
950 }
951 
952 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
953     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
954 
955   // Explicitly copy the context for appending the leaf context
956   SampleContextFrameVector NewContextStack(ContextStack.begin(),
957                                            ContextStack.end());
958   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
959   // For leaf inlined context with the top frame, we should strip off the top
960   // frame's probe id, like:
961   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
962   auto LeafFrame = NewContextStack.back();
963   LeafFrame.Location = LineLocation(0, 0);
964   NewContextStack.pop_back();
965   // Compress the context string except for the leaf frame
966   CSProfileGenerator::compressRecursionContext(NewContextStack);
967   CSProfileGenerator::trimContext(NewContextStack);
968   NewContextStack.push_back(LeafFrame);
969 
970   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
971   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
972   FunctionSamples &FunctionProile =
973       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
974   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
975   return FunctionProile;
976 }
977 
978 } // end namespace sampleprof
979 } // end namespace llvm
980