1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ErrorHandling.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <float.h>
15 #include <unordered_set>
16 
17 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
18                                     cl::Required,
19                                     cl::desc("Output profile file"));
20 static cl::alias OutputA("o", cl::desc("Alias for --output"),
21                          cl::aliasopt(OutputFilename));
22 
23 static cl::opt<SampleProfileFormat> OutputFormat(
24     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
25     cl::values(
26         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
27         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
28         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
29         clEnumValN(SPF_Text, "text", "Text encoding"),
30         clEnumValN(SPF_GCC, "gcc",
31                    "GCC encoding (only meaningful for -sample)")));
32 
33 cl::opt<bool> UseMD5(
34     "use-md5", cl::init(false), cl::Hidden,
35     cl::desc("Use md5 to represent function names in the output profile (only "
36              "meaningful for -extbinary)"));
37 
38 static cl::opt<bool> PopulateProfileSymbolList(
39     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
40     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
41 
42 static cl::opt<bool> FillZeroForAllFuncs(
43     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
44     cl::desc("Attribute all functions' range with zero count "
45              "even it's not hit by any samples."));
46 
47 static cl::opt<int32_t, true> RecursionCompression(
48     "compress-recursion",
49     cl::desc("Compressing recursion by deduplicating adjacent frame "
50              "sequences up to the specified size. -1 means no size limit."),
51     cl::Hidden,
52     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
53 
54 static cl::opt<bool>
55     TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
56                     cl::desc("If the total count of the profile is smaller "
57                              "than threshold, it will be trimmed."));
58 
59 static cl::opt<bool> CSProfMergeColdContext(
60     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
61     cl::desc("If the total count of context profile is smaller than "
62              "the threshold, it will be merged into context-less base "
63              "profile."));
64 
65 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
66     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
67     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
68              "context-less base profile"));
69 
70 static cl::opt<int, true> CSProfMaxContextDepth(
71     "csprof-max-context-depth", cl::ZeroOrMore,
72     cl::desc("Keep the last K contexts while merging profile. -1 means no "
73              "depth limit."),
74     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
75 
76 static cl::opt<double> HotFunctionDensityThreshold(
77     "hot-function-density-threshold", llvm::cl::init(1000),
78     llvm::cl::desc(
79         "specify density threshold for hot functions (default: 1000)"),
80     llvm::cl::Optional);
81 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
82                                  llvm::cl::desc("show profile density details"),
83                                  llvm::cl::Optional);
84 
85 static cl::opt<bool> UpdateTotalSamples(
86     "update-total-samples", llvm::cl::init(false),
87     llvm::cl::desc(
88         "Update total samples by accumulating all its body samples."),
89     llvm::cl::Optional);
90 
91 extern cl::opt<int> ProfileSummaryCutoffHot;
92 
93 static cl::opt<bool> GenCSNestedProfile(
94     "gen-cs-nested-profile", cl::Hidden, cl::init(false),
95     cl::desc("Generate nested function profiles for CSSPGO"));
96 
97 using namespace llvm;
98 using namespace sampleprof;
99 
100 namespace llvm {
101 namespace sampleprof {
102 
103 // Initialize the MaxCompressionSize to -1 which means no size limit
104 int32_t CSProfileGenerator::MaxCompressionSize = -1;
105 
106 int CSProfileGenerator::MaxContextDepth = -1;
107 
108 bool ProfileGeneratorBase::UseFSDiscriminator = false;
109 
110 std::unique_ptr<ProfileGeneratorBase>
111 ProfileGeneratorBase::create(ProfiledBinary *Binary,
112                              const ContextSampleCounterMap &SampleCounters,
113                              bool ProfileIsCSFlat) {
114   std::unique_ptr<ProfileGeneratorBase> Generator;
115   if (ProfileIsCSFlat) {
116     if (Binary->useFSDiscriminator())
117       exitWithError("FS discriminator is not supported in CS profile.");
118     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
119   } else {
120     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
121   }
122   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
123   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
124 
125   return Generator;
126 }
127 
128 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
129                                  SampleProfileMap &ProfileMap) {
130   // Populate profile symbol list if extended binary format is used.
131   ProfileSymbolList SymbolList;
132 
133   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
134     Binary->populateSymbolListFromDWARF(SymbolList);
135     Writer->setProfileSymbolList(&SymbolList);
136   }
137 
138   if (std::error_code EC = Writer->write(ProfileMap))
139     exitWithError(std::move(EC));
140 }
141 
142 void ProfileGeneratorBase::write() {
143   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
144   if (std::error_code EC = WriterOrErr.getError())
145     exitWithError(EC, OutputFilename);
146 
147   if (UseMD5) {
148     if (OutputFormat != SPF_Ext_Binary)
149       WithColor::warning() << "-use-md5 is ignored. Specify "
150                               "--format=extbinary to enable it\n";
151     else
152       WriterOrErr.get()->setUseMD5();
153   }
154 
155   write(std::move(WriterOrErr.get()), ProfileMap);
156 }
157 
158 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
159   if (Density == 0.0)
160     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
161                             "set too low. Please check your command.\n";
162   else if (Density < HotFunctionDensityThreshold)
163     WithColor::warning()
164         << "AutoFDO is estimated to optimize better with "
165         << format("%.1f", HotFunctionDensityThreshold / Density)
166         << "x more samples. Please consider increasing sampling rate or "
167            "profiling for longer duration to get more samples.\n";
168 
169   if (ShowDensity)
170     outs() << "Minimum profile density for hot functions with top "
171            << format("%.2f",
172                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
173                          10000)
174            << "% total samples: " << format("%.1f", Density) << "\n";
175 }
176 
177 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
178                                               uint64_t HotCntThreshold) {
179   double Density = DBL_MAX;
180   std::vector<const FunctionSamples *> HotFuncs;
181   for (auto &I : Profiles) {
182     auto &FuncSamples = I.second;
183     if (FuncSamples.getTotalSamples() < HotCntThreshold)
184       continue;
185     HotFuncs.emplace_back(&FuncSamples);
186   }
187 
188   for (auto *FuncSamples : HotFuncs) {
189     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
190     if (!Func)
191       continue;
192     uint64_t FuncSize = Func->getFuncSize();
193     if (FuncSize == 0)
194       continue;
195     Density =
196         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
197                               FuncSize);
198   }
199 
200   return Density == DBL_MAX ? 0.0 : Density;
201 }
202 
203 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
204                                               const RangeSample &Ranges) {
205 
206   /*
207   Regions may overlap with each other. Using the boundary info, find all
208   disjoint ranges and their sample count. BoundaryPoint contains the count
209   multiple samples begin/end at this points.
210 
211   |<--100-->|           Sample1
212   |<------200------>|   Sample2
213   A         B       C
214 
215   In the example above,
216   Sample1 begins at A, ends at B, its value is 100.
217   Sample2 beings at A, ends at C, its value is 200.
218   For A, BeginCount is the sum of sample begins at A, which is 300 and no
219   samples ends at A, so EndCount is 0.
220   Then boundary points A, B, and C with begin/end counts are:
221   A: (300, 0)
222   B: (0, 100)
223   C: (0, 200)
224   */
225   struct BoundaryPoint {
226     // Sum of sample counts beginning at this point
227     uint64_t BeginCount = UINT64_MAX;
228     // Sum of sample counts ending at this point
229     uint64_t EndCount = UINT64_MAX;
230     // Is the begin point of a zero range.
231     bool IsZeroRangeBegin = false;
232     // Is the end point of a zero range.
233     bool IsZeroRangeEnd = false;
234 
235     void addBeginCount(uint64_t Count) {
236       if (BeginCount == UINT64_MAX)
237         BeginCount = 0;
238       BeginCount += Count;
239     }
240 
241     void addEndCount(uint64_t Count) {
242       if (EndCount == UINT64_MAX)
243         EndCount = 0;
244       EndCount += Count;
245     }
246   };
247 
248   /*
249   For the above example. With boundary points, follwing logic finds two
250   disjoint region of
251 
252   [A,B]:   300
253   [B+1,C]: 200
254 
255   If there is a boundary point that both begin and end, the point itself
256   becomes a separate disjoint region. For example, if we have original
257   ranges of
258 
259   |<--- 100 --->|
260                 |<--- 200 --->|
261   A             B             C
262 
263   there are three boundary points with their begin/end counts of
264 
265   A: (100, 0)
266   B: (200, 100)
267   C: (0, 200)
268 
269   the disjoint ranges would be
270 
271   [A, B-1]: 100
272   [B, B]:   300
273   [B+1, C]: 200.
274 
275   Example for zero value range:
276 
277     |<--- 100 --->|
278                        |<--- 200 --->|
279   |<---------------  0 ----------------->|
280   A  B            C    D             E   F
281 
282   [A, B-1]  : 0
283   [B, C]    : 100
284   [C+1, D-1]: 0
285   [D, E]    : 200
286   [E+1, F]  : 0
287   */
288   std::map<uint64_t, BoundaryPoint> Boundaries;
289 
290   for (const auto &Item : Ranges) {
291     assert(Item.first.first <= Item.first.second &&
292            "Invalid instruction range");
293     auto &BeginPoint = Boundaries[Item.first.first];
294     auto &EndPoint = Boundaries[Item.first.second];
295     uint64_t Count = Item.second;
296 
297     BeginPoint.addBeginCount(Count);
298     EndPoint.addEndCount(Count);
299     if (Count == 0) {
300       BeginPoint.IsZeroRangeBegin = true;
301       EndPoint.IsZeroRangeEnd = true;
302     }
303   }
304 
305   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
306   // and the next valid address
307   uint64_t BeginAddress = UINT64_MAX;
308   int ZeroRangeDepth = 0;
309   uint64_t Count = 0;
310   for (const auto &Item : Boundaries) {
311     uint64_t Address = Item.first;
312     const BoundaryPoint &Point = Item.second;
313     if (Point.BeginCount != UINT64_MAX) {
314       if (BeginAddress != UINT64_MAX)
315         DisjointRanges[{BeginAddress, Address - 1}] = Count;
316       Count += Point.BeginCount;
317       BeginAddress = Address;
318       ZeroRangeDepth += Point.IsZeroRangeBegin;
319     }
320     if (Point.EndCount != UINT64_MAX) {
321       assert((BeginAddress != UINT64_MAX) &&
322              "First boundary point cannot be 'end' point");
323       DisjointRanges[{BeginAddress, Address}] = Count;
324       assert(Count >= Point.EndCount && "Mismatched live ranges");
325       Count -= Point.EndCount;
326       BeginAddress = Address + 1;
327       ZeroRangeDepth -= Point.IsZeroRangeEnd;
328       // If the remaining count is zero and it's no longer in a zero range, this
329       // means we consume all the ranges before, thus mark BeginAddress as
330       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
331       //  [<---- 10 ---->]
332       //                       [<---- 20 ---->]
333       //   A             B     C              D
334       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
335       // have the [B+1, C-1] zero range.
336       if (Count == 0 && ZeroRangeDepth == 0)
337         BeginAddress = UINT64_MAX;
338     }
339   }
340 }
341 
342 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
343     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
344     uint64_t Count) {
345   // Use the maximum count of samples with same line location
346   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
347 
348   // Use duplication factor to compensated for loop unroll/vectorization.
349   // Note that this is only needed when we're taking MAX of the counts at
350   // the location instead of SUM.
351   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
352 
353   ErrorOr<uint64_t> R =
354       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
355 
356   uint64_t PreviousCount = R ? R.get() : 0;
357   if (PreviousCount <= Count) {
358     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
359                                    Count - PreviousCount);
360   }
361 }
362 
363 void ProfileGeneratorBase::updateTotalSamples() {
364   if (!UpdateTotalSamples)
365     return;
366 
367   for (auto &Item : ProfileMap) {
368     FunctionSamples &FunctionProfile = Item.second;
369     FunctionProfile.updateTotalSamples();
370   }
371 }
372 
373 FunctionSamples &
374 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
375   SampleContext Context(FuncName);
376   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
377   if (Ret.second) {
378     FunctionSamples &FProfile = Ret.first->second;
379     FProfile.setContext(Context);
380   }
381   return Ret.first->second;
382 }
383 
384 void ProfileGenerator::generateProfile() {
385   if (Binary->usePseudoProbes()) {
386     generateProbeBasedProfile();
387   } else {
388     generateLineNumBasedProfile();
389   }
390   postProcessProfiles();
391 }
392 
393 void ProfileGenerator::postProcessProfiles() {
394   computeSummaryAndThreshold();
395   trimColdProfiles(ProfileMap, ColdCountThreshold);
396   calculateAndShowDensity(ProfileMap);
397 }
398 
399 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
400                                         uint64_t ColdCntThreshold) {
401   if (!TrimColdProfile)
402     return;
403 
404   // Move cold profiles into a tmp container.
405   std::vector<SampleContext> ColdProfiles;
406   for (const auto &I : ProfileMap) {
407     if (I.second.getTotalSamples() < ColdCntThreshold)
408       ColdProfiles.emplace_back(I.first);
409   }
410 
411   // Remove the cold profile from ProfileMap.
412   for (const auto &I : ColdProfiles)
413     ProfileMap.erase(I);
414 }
415 
416 void ProfileGenerator::generateLineNumBasedProfile() {
417   assert(SampleCounters.size() == 1 &&
418          "Must have one entry for profile generation.");
419   const SampleCounter &SC = SampleCounters.begin()->second;
420   // Fill in function body samples
421   populateBodySamplesForAllFunctions(SC.RangeCounter);
422   // Fill in boundary sample counts as well as call site samples for calls
423   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
424 
425   updateTotalSamples();
426 }
427 
428 void ProfileGenerator::generateProbeBasedProfile() {
429   assert(SampleCounters.size() == 1 &&
430          "Must have one entry for profile generation.");
431   // Enable pseudo probe functionalities in SampleProf
432   FunctionSamples::ProfileIsProbeBased = true;
433   const SampleCounter &SC = SampleCounters.begin()->second;
434   // Fill in function body samples
435   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
436   // Fill in boundary sample counts as well as call site samples for calls
437   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
438 
439   updateTotalSamples();
440 }
441 
442 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
443     const RangeSample &RangeCounter) {
444   ProbeCounterMap ProbeCounter;
445   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside
446   // extractProbesFromRange.
447   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false);
448 
449   for (const auto &PI : ProbeCounter) {
450     const MCDecodedPseudoProbe *Probe = PI.first;
451     uint64_t Count = PI.second;
452     SampleContextFrameVector FrameVec;
453     Binary->getInlineContextForProbe(Probe, FrameVec, true);
454     FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count);
455     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
456     if (Probe->isEntry())
457       FunctionProfile.addHeadSamples(Count);
458   }
459 }
460 
461 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
462     const BranchSample &BranchCounters) {
463   for (const auto &Entry : BranchCounters) {
464     uint64_t SourceOffset = Entry.first.first;
465     uint64_t TargetOffset = Entry.first.second;
466     uint64_t Count = Entry.second;
467     assert(Count != 0 && "Unexpected zero weight branch");
468 
469     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
470     if (CalleeName.size() == 0)
471       continue;
472 
473     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
474     const MCDecodedPseudoProbe *CallProbe =
475         Binary->getCallProbeForAddr(SourceAddress);
476     if (CallProbe == nullptr)
477       continue;
478 
479     // Record called target sample and its count.
480     SampleContextFrameVector FrameVec;
481     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
482 
483     if (!FrameVec.empty()) {
484       FunctionSamples &FunctionProfile =
485           getLeafProfileAndAddTotalSamples(FrameVec, 0);
486       FunctionProfile.addCalledTargetSamples(
487           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
488     }
489   }
490 }
491 
492 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
493     const SampleContextFrameVector &FrameVec, uint64_t Count) {
494   // Get top level profile
495   FunctionSamples *FunctionProfile =
496       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
497   FunctionProfile->addTotalSamples(Count);
498   if (Binary->usePseudoProbes()) {
499     const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
500     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
501   }
502 
503   for (size_t I = 1; I < FrameVec.size(); I++) {
504     LineLocation Callsite(
505         FrameVec[I - 1].Location.LineOffset,
506         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
507     FunctionSamplesMap &SamplesMap =
508         FunctionProfile->functionSamplesAt(Callsite);
509     auto Ret =
510         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
511     if (Ret.second) {
512       SampleContext Context(FrameVec[I].FuncName);
513       Ret.first->second.setContext(Context);
514     }
515     FunctionProfile = &Ret.first->second;
516     FunctionProfile->addTotalSamples(Count);
517     if (Binary->usePseudoProbes()) {
518       const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
519       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
520     }
521   }
522 
523   return *FunctionProfile;
524 }
525 
526 RangeSample
527 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
528   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
529   if (FillZeroForAllFuncs) {
530     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
531       for (auto &R : FuncI.second.Ranges) {
532         Ranges[{R.first, R.second - 1}] += 0;
533       }
534     }
535   } else {
536     // For each range, we search for all ranges of the function it belongs to
537     // and initialize it with zero count, so it remains zero if doesn't hit any
538     // samples. This is to be consistent with compiler that interpret zero count
539     // as unexecuted(cold).
540     for (const auto &I : RangeCounter) {
541       uint64_t StartOffset = I.first.first;
542       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
543         Ranges[{Range.first, Range.second - 1}] += 0;
544     }
545   }
546   RangeSample DisjointRanges;
547   findDisjointRanges(DisjointRanges, Ranges);
548   return DisjointRanges;
549 }
550 
551 void ProfileGenerator::populateBodySamplesForAllFunctions(
552     const RangeSample &RangeCounter) {
553   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
554     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
555     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
556     uint64_t Count = Range.second;
557 
558     InstructionPointer IP(Binary, RangeBegin, true);
559     // Disjoint ranges may have range in the middle of two instr,
560     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
561     // can be Addr1+1 to Addr2-1. We should ignore such range.
562     if (IP.Address > RangeEnd)
563       continue;
564 
565     do {
566       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
567       const SampleContextFrameVector &FrameVec =
568           Binary->getFrameLocationStack(Offset);
569       if (!FrameVec.empty()) {
570         // FIXME: As accumulating total count per instruction caused some
571         // regression, we changed to accumulate total count per byte as a
572         // workaround. Tuning hotness threshold on the compiler side might be
573         // necessary in the future.
574         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
575             FrameVec, Count * Binary->getInstSize(Offset));
576         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
577                                             Count);
578       }
579     } while (IP.advance() && IP.Address <= RangeEnd);
580   }
581 }
582 
583 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
584   // Get the function range by branch target if it's a call branch.
585   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
586 
587   // We won't accumulate sample count for a range whose start is not the real
588   // function entry such as outlined function or inner labels.
589   if (!FRange || !FRange->IsFuncEntry)
590     return StringRef();
591 
592   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
593 }
594 
595 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
596     const BranchSample &BranchCounters) {
597   for (const auto &Entry : BranchCounters) {
598     uint64_t SourceOffset = Entry.first.first;
599     uint64_t TargetOffset = Entry.first.second;
600     uint64_t Count = Entry.second;
601     assert(Count != 0 && "Unexpected zero weight branch");
602 
603     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
604     if (CalleeName.size() == 0)
605       continue;
606     // Record called target sample and its count.
607     const SampleContextFrameVector &FrameVec =
608         Binary->getFrameLocationStack(SourceOffset);
609     if (!FrameVec.empty()) {
610       FunctionSamples &FunctionProfile =
611           getLeafProfileAndAddTotalSamples(FrameVec, 0);
612       FunctionProfile.addCalledTargetSamples(
613           FrameVec.back().Location.LineOffset,
614           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
615           CalleeName, Count);
616     }
617     // Add head samples for callee.
618     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
619     CalleeProfile.addHeadSamples(Count);
620   }
621 }
622 
623 void ProfileGeneratorBase::calculateAndShowDensity(
624     const SampleProfileMap &Profiles) {
625   double Density = calculateDensity(Profiles, HotCountThreshold);
626   showDensitySuggestion(Density);
627 }
628 
629 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
630     const SampleContextFrameVector &Context, bool WasLeafInlined) {
631   auto I = ProfileMap.find(SampleContext(Context));
632   if (I == ProfileMap.end()) {
633     // Save the new context for future references.
634     SampleContextFrames NewContext = *Contexts.insert(Context).first;
635     SampleContext FContext(NewContext, RawContext);
636     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
637     if (WasLeafInlined)
638       FContext.setAttribute(ContextWasInlined);
639     FunctionSamples &FProfile = Ret.first->second;
640     FProfile.setContext(FContext);
641     return Ret.first->second;
642   }
643   return I->second;
644 }
645 
646 void CSProfileGenerator::generateProfile() {
647   FunctionSamples::ProfileIsCSFlat = true;
648 
649   if (Binary->getTrackFuncContextSize())
650     computeSizeForProfiledFunctions();
651 
652   if (Binary->usePseudoProbes()) {
653     generateProbeBasedProfile();
654   } else {
655     generateLineNumBasedProfile();
656   }
657   postProcessProfiles();
658 }
659 
660 void CSProfileGenerator::computeSizeForProfiledFunctions() {
661   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
662 
663   // Go through all the ranges in the CS counters, use the start of the range to
664   // look up the function it belongs and record the function.
665   for (const auto &CI : SampleCounters) {
666     for (const auto &Item : CI.second.RangeCounter) {
667       // FIXME: Filter the bogus crossing function range.
668       uint64_t StartOffset = Item.first.first;
669       if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
670         ProfiledFunctions.insert(FRange->Func);
671     }
672   }
673 
674   for (auto *Func : ProfiledFunctions)
675     Binary->computeInlinedContextSizeForFunc(Func);
676 
677   // Flush the symbolizer to save memory.
678   Binary->flushSymbolizer();
679 }
680 
681 void CSProfileGenerator::generateLineNumBasedProfile() {
682   for (const auto &CI : SampleCounters) {
683     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
684 
685     // Get or create function profile for the range
686     FunctionSamples &FunctionProfile =
687         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
688 
689     // Fill in function body samples
690     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
691     // Fill in boundary sample counts as well as call site samples for calls
692     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
693                                        CI.second.BranchCounter);
694   }
695   // Fill in call site value sample for inlined calls and also use context to
696   // infer missing samples. Since we don't have call count for inlined
697   // functions, we estimate it from inlinee's profile using the entry of the
698   // body sample.
699   populateInferredFunctionSamples();
700 
701   updateTotalSamples();
702 }
703 
704 void CSProfileGenerator::populateBodySamplesForFunction(
705     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
706   // Compute disjoint ranges first, so we can use MAX
707   // for calculating count for each location.
708   RangeSample Ranges;
709   findDisjointRanges(Ranges, RangeCounter);
710   for (const auto &Range : Ranges) {
711     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
712     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
713     uint64_t Count = Range.second;
714     // Disjoint ranges have introduce zero-filled gap that
715     // doesn't belong to current context, filter them out.
716     if (Count == 0)
717       continue;
718 
719     InstructionPointer IP(Binary, RangeBegin, true);
720     // Disjoint ranges may have range in the middle of two instr,
721     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
722     // can be Addr1+1 to Addr2-1. We should ignore such range.
723     if (IP.Address > RangeEnd)
724       continue;
725 
726     do {
727       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
728       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
729       if (LeafLoc.hasValue()) {
730         // Recording body sample for this specific context
731         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
732         FunctionProfile.addTotalSamples(Count);
733       }
734     } while (IP.advance() && IP.Address <= RangeEnd);
735   }
736 }
737 
738 void CSProfileGenerator::populateBoundarySamplesForFunction(
739     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
740     const BranchSample &BranchCounters) {
741 
742   for (const auto &Entry : BranchCounters) {
743     uint64_t SourceOffset = Entry.first.first;
744     uint64_t TargetOffset = Entry.first.second;
745     uint64_t Count = Entry.second;
746     assert(Count != 0 && "Unexpected zero weight branch");
747 
748     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
749     if (CalleeName.size() == 0)
750       continue;
751 
752     // Record called target sample and its count
753     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
754     if (!LeafLoc.hasValue())
755       continue;
756     FunctionProfile.addCalledTargetSamples(
757         LeafLoc->Location.LineOffset,
758         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
759         Count);
760 
761     // Record head sample for called target(callee)
762     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
763     assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
764            "Leaf function name doesn't match");
765     CalleeCtx.back() = *LeafLoc;
766     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
767     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
768     CalleeProfile.addHeadSamples(Count);
769   }
770 }
771 
772 static SampleContextFrame
773 getCallerContext(SampleContextFrames CalleeContext,
774                  SampleContextFrameVector &CallerContext) {
775   assert(CalleeContext.size() > 1 && "Unexpected empty context");
776   CalleeContext = CalleeContext.drop_back();
777   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
778   SampleContextFrame CallerFrame = CallerContext.back();
779   CallerContext.back().Location = LineLocation(0, 0);
780   return CallerFrame;
781 }
782 
783 void CSProfileGenerator::populateInferredFunctionSamples() {
784   for (const auto &Item : ProfileMap) {
785     const auto &CalleeContext = Item.first;
786     const FunctionSamples &CalleeProfile = Item.second;
787 
788     // If we already have head sample counts, we must have value profile
789     // for call sites added already. Skip to avoid double counting.
790     if (CalleeProfile.getHeadSamples())
791       continue;
792     // If we don't have context, nothing to do for caller's call site.
793     // This could happen for entry point function.
794     if (CalleeContext.isBaseContext())
795       continue;
796 
797     // Infer Caller's frame loc and context ID through string splitting
798     SampleContextFrameVector CallerContextId;
799     SampleContextFrame &&CallerLeafFrameLoc =
800         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
801     SampleContextFrames CallerContext(CallerContextId);
802 
803     // It's possible that we haven't seen any sample directly in the caller,
804     // in which case CallerProfile will not exist. But we can't modify
805     // ProfileMap while iterating it.
806     // TODO: created function profile for those callers too
807     if (ProfileMap.find(CallerContext) == ProfileMap.end())
808       continue;
809     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
810 
811     // Since we don't have call count for inlined functions, we
812     // estimate it from inlinee's profile using entry body sample.
813     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
814     // If we don't have samples with location, use 1 to indicate live.
815     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
816       EstimatedCallCount = 1;
817     CallerProfile.addCalledTargetSamples(
818         CallerLeafFrameLoc.Location.LineOffset,
819         CallerLeafFrameLoc.Location.Discriminator,
820         CalleeProfile.getContext().getName(), EstimatedCallCount);
821     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
822                                  CallerLeafFrameLoc.Location.Discriminator,
823                                  EstimatedCallCount);
824     CallerProfile.addTotalSamples(EstimatedCallCount);
825   }
826 }
827 
828 void CSProfileGenerator::postProcessProfiles() {
829   // Compute hot/cold threshold based on profile. This will be used for cold
830   // context profile merging/trimming.
831   computeSummaryAndThreshold();
832 
833   // Run global pre-inliner to adjust/merge context profile based on estimated
834   // inline decisions.
835   if (EnableCSPreInliner) {
836     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
837         .run();
838     // Turn off the profile merger by default unless it is explicitly enabled.
839     if (!CSProfMergeColdContext.getNumOccurrences())
840       CSProfMergeColdContext = false;
841   }
842 
843   // Trim and merge cold context profile using cold threshold above.
844   if (TrimColdProfile || CSProfMergeColdContext) {
845     SampleContextTrimmer(ProfileMap)
846         .trimAndMergeColdContextProfiles(
847             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
848             CSProfMaxColdContextDepth, EnableCSPreInliner);
849   }
850 
851   // Merge function samples of CS profile to calculate profile density.
852   sampleprof::SampleProfileMap ContextLessProfiles;
853   for (const auto &I : ProfileMap) {
854     ContextLessProfiles[I.second.getName()].merge(I.second);
855   }
856 
857   calculateAndShowDensity(ContextLessProfiles);
858   if (GenCSNestedProfile) {
859     CSProfileConverter CSConverter(ProfileMap);
860     CSConverter.convertProfiles();
861     FunctionSamples::ProfileIsCSFlat = false;
862     FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
863   }
864 }
865 
866 void ProfileGeneratorBase::computeSummaryAndThreshold() {
867   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
868   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
869   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
870       (Summary->getDetailedSummary()));
871   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
872       (Summary->getDetailedSummary()));
873 }
874 
875 void ProfileGeneratorBase::extractProbesFromRange(
876     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
877     bool FindDisjointRanges) {
878   const RangeSample *PRanges = &RangeCounter;
879   RangeSample Ranges;
880   if (FindDisjointRanges) {
881     findDisjointRanges(Ranges, RangeCounter);
882     PRanges = &Ranges;
883   }
884 
885   for (const auto &Range : *PRanges) {
886     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
887     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
888     uint64_t Count = Range.second;
889 
890     InstructionPointer IP(Binary, RangeBegin, true);
891     // Disjoint ranges may have range in the middle of two instr,
892     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
893     // can be Addr1+1 to Addr2-1. We should ignore such range.
894     if (IP.Address > RangeEnd)
895       continue;
896 
897     do {
898       const AddressProbesMap &Address2ProbesMap =
899           Binary->getAddress2ProbesMap();
900       auto It = Address2ProbesMap.find(IP.Address);
901       if (It != Address2ProbesMap.end()) {
902         for (const auto &Probe : It->second) {
903           ProbeCounter[&Probe] += Count;
904         }
905       }
906     } while (IP.advance() && IP.Address <= RangeEnd);
907   }
908 }
909 
910 // Helper function to extract context prefix string stack
911 // Extract context stack for reusing, leaf context stack will
912 // be added compressed while looking up function profile
913 static void extractPrefixContextStack(
914     SampleContextFrameVector &ContextStack,
915     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
916     ProfiledBinary *Binary) {
917   for (const auto *P : Probes) {
918     Binary->getInlineContextForProbe(P, ContextStack, true);
919   }
920 }
921 
922 void CSProfileGenerator::generateProbeBasedProfile() {
923   // Enable pseudo probe functionalities in SampleProf
924   FunctionSamples::ProfileIsProbeBased = true;
925   for (const auto &CI : SampleCounters) {
926     const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr());
927     SampleContextFrameVector ContextStack;
928     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
929     // Fill in function body samples from probes, also infer caller's samples
930     // from callee's probe
931     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
932     // Fill in boundary samples for a call probe
933     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
934   }
935 }
936 
937 void CSProfileGenerator::populateBodySamplesWithProbes(
938     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
939   ProbeCounterMap ProbeCounter;
940   // Extract the top frame probes by looking up each address among the range in
941   // the Address2ProbeMap
942   extractProbesFromRange(RangeCounter, ProbeCounter);
943   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
944                      std::unordered_set<FunctionSamples *>>
945       FrameSamples;
946   for (const auto &PI : ProbeCounter) {
947     const MCDecodedPseudoProbe *Probe = PI.first;
948     uint64_t Count = PI.second;
949     // Disjoint ranges have introduce zero-filled gap that
950     // doesn't belong to current context, filter them out.
951     if (!Probe->isBlock() || Count == 0)
952       continue;
953     FunctionSamples &FunctionProfile =
954         getFunctionProfileForLeafProbe(ContextStack, Probe);
955     // Record the current frame and FunctionProfile whenever samples are
956     // collected for non-danglie probes. This is for reporting all of the
957     // zero count probes of the frame later.
958     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
959     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
960     FunctionProfile.addTotalSamples(Count);
961     if (Probe->isEntry()) {
962       FunctionProfile.addHeadSamples(Count);
963       // Look up for the caller's function profile
964       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
965       SampleContextFrames CalleeContextId =
966           FunctionProfile.getContext().getContextFrames();
967       if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
968         // Since the context id will be compressed, we have to use callee's
969         // context id to infer caller's context id to ensure they share the
970         // same context prefix.
971         SampleContextFrameVector CallerContextId;
972         SampleContextFrame &&CallerLeafFrameLoc =
973             getCallerContext(CalleeContextId, CallerContextId);
974         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
975         assert(CallerIndex &&
976                "Inferred caller's location index shouldn't be zero!");
977         FunctionSamples &CallerProfile =
978             getFunctionProfileForContext(CallerContextId);
979         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
980         CallerProfile.addBodySamples(CallerIndex, 0, Count);
981         CallerProfile.addTotalSamples(Count);
982         CallerProfile.addCalledTargetSamples(
983             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
984       }
985     }
986   }
987 
988   // Assign zero count for remaining probes without sample hits to
989   // differentiate from probes optimized away, of which the counts are unknown
990   // and will be inferred by the compiler.
991   for (auto &I : FrameSamples) {
992     for (auto *FunctionProfile : I.second) {
993       for (auto *Probe : I.first->getProbes()) {
994         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
995       }
996     }
997   }
998 }
999 
1000 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1001     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1002   for (const auto &BI : BranchCounter) {
1003     uint64_t SourceOffset = BI.first.first;
1004     uint64_t TargetOffset = BI.first.second;
1005     uint64_t Count = BI.second;
1006     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
1007     const MCDecodedPseudoProbe *CallProbe =
1008         Binary->getCallProbeForAddr(SourceAddress);
1009     if (CallProbe == nullptr)
1010       continue;
1011     FunctionSamples &FunctionProfile =
1012         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1013     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1014     FunctionProfile.addTotalSamples(Count);
1015     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
1016     if (CalleeName.size() == 0)
1017       continue;
1018     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1019                                            Count);
1020   }
1021 }
1022 
1023 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1024     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1025 
1026   // Explicitly copy the context for appending the leaf context
1027   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1028                                            ContextStack.end());
1029   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1030   // For leaf inlined context with the top frame, we should strip off the top
1031   // frame's probe id, like:
1032   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1033   auto LeafFrame = NewContextStack.back();
1034   LeafFrame.Location = LineLocation(0, 0);
1035   NewContextStack.pop_back();
1036   // Compress the context string except for the leaf frame
1037   CSProfileGenerator::compressRecursionContext(NewContextStack);
1038   CSProfileGenerator::trimContext(NewContextStack);
1039   NewContextStack.push_back(LeafFrame);
1040 
1041   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1042   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1043   FunctionSamples &FunctionProile =
1044       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
1045   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
1046   return FunctionProile;
1047 }
1048 
1049 } // end namespace sampleprof
1050 } // end namespace llvm
1051