1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "PerfReader.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <algorithm>
15 #include <float.h>
16 #include <unordered_set>
17 #include <utility>
18 
19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20                                     cl::Required,
21                                     cl::desc("Output profile file"));
22 static cl::alias OutputA("o", cl::desc("Alias for --output"),
23                          cl::aliasopt(OutputFilename));
24 
25 static cl::opt<SampleProfileFormat> OutputFormat(
26     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27     cl::values(
28         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
29         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 cl::opt<bool> UseMD5(
36     "use-md5", cl::init(false), cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore,
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth", cl::ZeroOrMore,
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 extern cl::opt<int> ProfileSummaryCutoffHot;
94 
95 static cl::opt<bool> GenCSNestedProfile(
96     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
97     cl::desc("Generate nested function profiles for CSSPGO"));
98 
99 using namespace llvm;
100 using namespace sampleprof;
101 
102 namespace llvm {
103 namespace sampleprof {
104 
105 // Initialize the MaxCompressionSize to -1 which means no size limit
106 int32_t CSProfileGenerator::MaxCompressionSize = -1;
107 
108 int CSProfileGenerator::MaxContextDepth = -1;
109 
110 bool ProfileGeneratorBase::UseFSDiscriminator = false;
111 
112 std::unique_ptr<ProfileGeneratorBase>
113 ProfileGeneratorBase::create(ProfiledBinary *Binary,
114                              const ContextSampleCounterMap *SampleCounters,
115                              bool ProfileIsCSFlat) {
116   std::unique_ptr<ProfileGeneratorBase> Generator;
117   if (ProfileIsCSFlat) {
118     if (Binary->useFSDiscriminator())
119       exitWithError("FS discriminator is not supported in CS profile.");
120     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
121   } else {
122     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
123   }
124   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
125   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
126 
127   return Generator;
128 }
129 
130 std::unique_ptr<ProfileGeneratorBase>
131 ProfileGeneratorBase::create(ProfiledBinary *Binary,
132                              const SampleProfileMap &&Profiles,
133                              bool ProfileIsCSFlat) {
134   std::unique_ptr<ProfileGeneratorBase> Generator;
135   if (ProfileIsCSFlat) {
136     if (Binary->useFSDiscriminator())
137       exitWithError("FS discriminator is not supported in CS profile.");
138     Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles)));
139   } else {
140     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141   }
142   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144 
145   return Generator;
146 }
147 
148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149                                  SampleProfileMap &ProfileMap) {
150   // Populate profile symbol list if extended binary format is used.
151   ProfileSymbolList SymbolList;
152 
153   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154     Binary->populateSymbolListFromDWARF(SymbolList);
155     Writer->setProfileSymbolList(&SymbolList);
156   }
157 
158   if (std::error_code EC = Writer->write(ProfileMap))
159     exitWithError(std::move(EC));
160 }
161 
162 void ProfileGeneratorBase::write() {
163   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164   if (std::error_code EC = WriterOrErr.getError())
165     exitWithError(EC, OutputFilename);
166 
167   if (UseMD5) {
168     if (OutputFormat != SPF_Ext_Binary)
169       WithColor::warning() << "-use-md5 is ignored. Specify "
170                               "--format=extbinary to enable it\n";
171     else
172       WriterOrErr.get()->setUseMD5();
173   }
174 
175   write(std::move(WriterOrErr.get()), ProfileMap);
176 }
177 
178 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179   if (Density == 0.0)
180     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181                             "set too low. Please check your command.\n";
182   else if (Density < HotFunctionDensityThreshold)
183     WithColor::warning()
184         << "AutoFDO is estimated to optimize better with "
185         << format("%.1f", HotFunctionDensityThreshold / Density)
186         << "x more samples. Please consider increasing sampling rate or "
187            "profiling for longer duration to get more samples.\n";
188 
189   if (ShowDensity)
190     outs() << "Minimum profile density for hot functions with top "
191            << format("%.2f",
192                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193                          10000)
194            << "% total samples: " << format("%.1f", Density) << "\n";
195 }
196 
197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198                                               uint64_t HotCntThreshold) {
199   double Density = DBL_MAX;
200   std::vector<const FunctionSamples *> HotFuncs;
201   for (auto &I : Profiles) {
202     auto &FuncSamples = I.second;
203     if (FuncSamples.getTotalSamples() < HotCntThreshold)
204       continue;
205     HotFuncs.emplace_back(&FuncSamples);
206   }
207 
208   for (auto *FuncSamples : HotFuncs) {
209     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210     if (!Func)
211       continue;
212     uint64_t FuncSize = Func->getFuncSize();
213     if (FuncSize == 0)
214       continue;
215     Density =
216         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217                               FuncSize);
218   }
219 
220   return Density == DBL_MAX ? 0.0 : Density;
221 }
222 
223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224                                               const RangeSample &Ranges) {
225 
226   /*
227   Regions may overlap with each other. Using the boundary info, find all
228   disjoint ranges and their sample count. BoundaryPoint contains the count
229   multiple samples begin/end at this points.
230 
231   |<--100-->|           Sample1
232   |<------200------>|   Sample2
233   A         B       C
234 
235   In the example above,
236   Sample1 begins at A, ends at B, its value is 100.
237   Sample2 beings at A, ends at C, its value is 200.
238   For A, BeginCount is the sum of sample begins at A, which is 300 and no
239   samples ends at A, so EndCount is 0.
240   Then boundary points A, B, and C with begin/end counts are:
241   A: (300, 0)
242   B: (0, 100)
243   C: (0, 200)
244   */
245   struct BoundaryPoint {
246     // Sum of sample counts beginning at this point
247     uint64_t BeginCount = UINT64_MAX;
248     // Sum of sample counts ending at this point
249     uint64_t EndCount = UINT64_MAX;
250     // Is the begin point of a zero range.
251     bool IsZeroRangeBegin = false;
252     // Is the end point of a zero range.
253     bool IsZeroRangeEnd = false;
254 
255     void addBeginCount(uint64_t Count) {
256       if (BeginCount == UINT64_MAX)
257         BeginCount = 0;
258       BeginCount += Count;
259     }
260 
261     void addEndCount(uint64_t Count) {
262       if (EndCount == UINT64_MAX)
263         EndCount = 0;
264       EndCount += Count;
265     }
266   };
267 
268   /*
269   For the above example. With boundary points, follwing logic finds two
270   disjoint region of
271 
272   [A,B]:   300
273   [B+1,C]: 200
274 
275   If there is a boundary point that both begin and end, the point itself
276   becomes a separate disjoint region. For example, if we have original
277   ranges of
278 
279   |<--- 100 --->|
280                 |<--- 200 --->|
281   A             B             C
282 
283   there are three boundary points with their begin/end counts of
284 
285   A: (100, 0)
286   B: (200, 100)
287   C: (0, 200)
288 
289   the disjoint ranges would be
290 
291   [A, B-1]: 100
292   [B, B]:   300
293   [B+1, C]: 200.
294 
295   Example for zero value range:
296 
297     |<--- 100 --->|
298                        |<--- 200 --->|
299   |<---------------  0 ----------------->|
300   A  B            C    D             E   F
301 
302   [A, B-1]  : 0
303   [B, C]    : 100
304   [C+1, D-1]: 0
305   [D, E]    : 200
306   [E+1, F]  : 0
307   */
308   std::map<uint64_t, BoundaryPoint> Boundaries;
309 
310   for (const auto &Item : Ranges) {
311     assert(Item.first.first <= Item.first.second &&
312            "Invalid instruction range");
313     auto &BeginPoint = Boundaries[Item.first.first];
314     auto &EndPoint = Boundaries[Item.first.second];
315     uint64_t Count = Item.second;
316 
317     BeginPoint.addBeginCount(Count);
318     EndPoint.addEndCount(Count);
319     if (Count == 0) {
320       BeginPoint.IsZeroRangeBegin = true;
321       EndPoint.IsZeroRangeEnd = true;
322     }
323   }
324 
325   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326   // and the next valid address
327   uint64_t BeginAddress = UINT64_MAX;
328   int ZeroRangeDepth = 0;
329   uint64_t Count = 0;
330   for (const auto &Item : Boundaries) {
331     uint64_t Address = Item.first;
332     const BoundaryPoint &Point = Item.second;
333     if (Point.BeginCount != UINT64_MAX) {
334       if (BeginAddress != UINT64_MAX)
335         DisjointRanges[{BeginAddress, Address - 1}] = Count;
336       Count += Point.BeginCount;
337       BeginAddress = Address;
338       ZeroRangeDepth += Point.IsZeroRangeBegin;
339     }
340     if (Point.EndCount != UINT64_MAX) {
341       assert((BeginAddress != UINT64_MAX) &&
342              "First boundary point cannot be 'end' point");
343       DisjointRanges[{BeginAddress, Address}] = Count;
344       assert(Count >= Point.EndCount && "Mismatched live ranges");
345       Count -= Point.EndCount;
346       BeginAddress = Address + 1;
347       ZeroRangeDepth -= Point.IsZeroRangeEnd;
348       // If the remaining count is zero and it's no longer in a zero range, this
349       // means we consume all the ranges before, thus mark BeginAddress as
350       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351       //  [<---- 10 ---->]
352       //                       [<---- 20 ---->]
353       //   A             B     C              D
354       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355       // have the [B+1, C-1] zero range.
356       if (Count == 0 && ZeroRangeDepth == 0)
357         BeginAddress = UINT64_MAX;
358     }
359   }
360 }
361 
362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364     uint64_t Count) {
365   // Use the maximum count of samples with same line location
366   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367 
368   // Use duplication factor to compensated for loop unroll/vectorization.
369   // Note that this is only needed when we're taking MAX of the counts at
370   // the location instead of SUM.
371   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372 
373   ErrorOr<uint64_t> R =
374       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375 
376   uint64_t PreviousCount = R ? R.get() : 0;
377   if (PreviousCount <= Count) {
378     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379                                    Count - PreviousCount);
380   }
381 }
382 
383 void ProfileGeneratorBase::updateTotalSamples() {
384   if (!UpdateTotalSamples)
385     return;
386 
387   for (auto &Item : ProfileMap) {
388     FunctionSamples &FunctionProfile = Item.second;
389     FunctionProfile.updateTotalSamples();
390   }
391 }
392 
393 void ProfileGeneratorBase::collectProfiledFunctions() {
394   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
395   if (SampleCounters) {
396     // Go through all the stacks, ranges and branches in sample counters, use
397     // the start of the range to look up the function it belongs and record the
398     // function.
399     for (const auto &CI : *SampleCounters) {
400       if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
401         for (auto Addr : CtxKey->Context) {
402           if (FuncRange *FRange = Binary->findFuncRangeForOffset(
403                   Binary->virtualAddrToOffset(Addr)))
404             ProfiledFunctions.insert(FRange->Func);
405         }
406       }
407 
408       for (auto Item : CI.second.RangeCounter) {
409         uint64_t StartOffset = Item.first.first;
410         if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
411           ProfiledFunctions.insert(FRange->Func);
412       }
413 
414       for (auto Item : CI.second.BranchCounter) {
415         uint64_t SourceOffset = Item.first.first;
416         uint64_t TargetOffset = Item.first.first;
417         if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
418           ProfiledFunctions.insert(FRange->Func);
419         if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
420           ProfiledFunctions.insert(FRange->Func);
421       }
422     }
423   } else {
424     // This is for the case the input is a llvm sample profile.
425     for (const auto &FS : ProfileMap) {
426       if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
427         ProfiledFunctions.insert(Func);
428     }
429   }
430 
431   Binary->setProfiledFunctions(ProfiledFunctions);
432 }
433 
434 FunctionSamples &
435 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
436   SampleContext Context(FuncName);
437   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
438   if (Ret.second) {
439     FunctionSamples &FProfile = Ret.first->second;
440     FProfile.setContext(Context);
441   }
442   return Ret.first->second;
443 }
444 
445 void ProfileGenerator::generateProfile() {
446   collectProfiledFunctions();
447 
448   if (Binary->usePseudoProbes())
449     Binary->decodePseudoProbe();
450 
451   if (SampleCounters) {
452     if (Binary->usePseudoProbes()) {
453       generateProbeBasedProfile();
454     } else {
455       generateLineNumBasedProfile();
456     }
457   }
458 
459   postProcessProfiles();
460 }
461 
462 void ProfileGenerator::postProcessProfiles() {
463   computeSummaryAndThreshold();
464   trimColdProfiles(ProfileMap, ColdCountThreshold);
465   calculateAndShowDensity(ProfileMap);
466 }
467 
468 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
469                                         uint64_t ColdCntThreshold) {
470   if (!TrimColdProfile)
471     return;
472 
473   // Move cold profiles into a tmp container.
474   std::vector<SampleContext> ColdProfiles;
475   for (const auto &I : ProfileMap) {
476     if (I.second.getTotalSamples() < ColdCntThreshold)
477       ColdProfiles.emplace_back(I.first);
478   }
479 
480   // Remove the cold profile from ProfileMap.
481   for (const auto &I : ColdProfiles)
482     ProfileMap.erase(I);
483 }
484 
485 void ProfileGenerator::generateLineNumBasedProfile() {
486   assert(SampleCounters->size() == 1 &&
487          "Must have one entry for profile generation.");
488   const SampleCounter &SC = SampleCounters->begin()->second;
489   // Fill in function body samples
490   populateBodySamplesForAllFunctions(SC.RangeCounter);
491   // Fill in boundary sample counts as well as call site samples for calls
492   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
493 
494   updateTotalSamples();
495 }
496 
497 void ProfileGenerator::generateProbeBasedProfile() {
498   assert(SampleCounters->size() == 1 &&
499          "Must have one entry for profile generation.");
500   // Enable pseudo probe functionalities in SampleProf
501   FunctionSamples::ProfileIsProbeBased = true;
502   const SampleCounter &SC = SampleCounters->begin()->second;
503   // Fill in function body samples
504   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
505   // Fill in boundary sample counts as well as call site samples for calls
506   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
507 
508   updateTotalSamples();
509 }
510 
511 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
512     const RangeSample &RangeCounter) {
513   ProbeCounterMap ProbeCounter;
514   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
515   // inside extractProbesFromRange.
516   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
517                          false);
518 
519   for (const auto &PI : ProbeCounter) {
520     const MCDecodedPseudoProbe *Probe = PI.first;
521     uint64_t Count = PI.second;
522     SampleContextFrameVector FrameVec;
523     Binary->getInlineContextForProbe(Probe, FrameVec, true);
524     FunctionSamples &FunctionProfile =
525         getLeafProfileAndAddTotalSamples(FrameVec, Count);
526     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
527     if (Probe->isEntry())
528       FunctionProfile.addHeadSamples(Count);
529   }
530 }
531 
532 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
533     const BranchSample &BranchCounters) {
534   for (const auto &Entry : BranchCounters) {
535     uint64_t SourceOffset = Entry.first.first;
536     uint64_t TargetOffset = Entry.first.second;
537     uint64_t Count = Entry.second;
538     assert(Count != 0 && "Unexpected zero weight branch");
539 
540     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
541     if (CalleeName.size() == 0)
542       continue;
543 
544     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
545     const MCDecodedPseudoProbe *CallProbe =
546         Binary->getCallProbeForAddr(SourceAddress);
547     if (CallProbe == nullptr)
548       continue;
549 
550     // Record called target sample and its count.
551     SampleContextFrameVector FrameVec;
552     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
553 
554     if (!FrameVec.empty()) {
555       FunctionSamples &FunctionProfile =
556           getLeafProfileAndAddTotalSamples(FrameVec, 0);
557       FunctionProfile.addCalledTargetSamples(
558           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
559     }
560   }
561 }
562 
563 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
564     const SampleContextFrameVector &FrameVec, uint64_t Count) {
565   // Get top level profile
566   FunctionSamples *FunctionProfile =
567       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
568   FunctionProfile->addTotalSamples(Count);
569   if (Binary->usePseudoProbes()) {
570     const auto *FuncDesc = Binary->getFuncDescForGUID(
571         Function::getGUID(FunctionProfile->getName()));
572     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
573   }
574 
575   for (size_t I = 1; I < FrameVec.size(); I++) {
576     LineLocation Callsite(
577         FrameVec[I - 1].Location.LineOffset,
578         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
579     FunctionSamplesMap &SamplesMap =
580         FunctionProfile->functionSamplesAt(Callsite);
581     auto Ret =
582         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
583     if (Ret.second) {
584       SampleContext Context(FrameVec[I].FuncName);
585       Ret.first->second.setContext(Context);
586     }
587     FunctionProfile = &Ret.first->second;
588     FunctionProfile->addTotalSamples(Count);
589     if (Binary->usePseudoProbes()) {
590       const auto *FuncDesc = Binary->getFuncDescForGUID(
591           Function::getGUID(FunctionProfile->getName()));
592       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
593     }
594   }
595 
596   return *FunctionProfile;
597 }
598 
599 RangeSample
600 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
601   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
602   if (FillZeroForAllFuncs) {
603     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
604       for (auto &R : FuncI.second.Ranges) {
605         Ranges[{R.first, R.second - 1}] += 0;
606       }
607     }
608   } else {
609     // For each range, we search for all ranges of the function it belongs to
610     // and initialize it with zero count, so it remains zero if doesn't hit any
611     // samples. This is to be consistent with compiler that interpret zero count
612     // as unexecuted(cold).
613     for (const auto &I : RangeCounter) {
614       uint64_t StartOffset = I.first.first;
615       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
616         Ranges[{Range.first, Range.second - 1}] += 0;
617     }
618   }
619   RangeSample DisjointRanges;
620   findDisjointRanges(DisjointRanges, Ranges);
621   return DisjointRanges;
622 }
623 
624 void ProfileGenerator::populateBodySamplesForAllFunctions(
625     const RangeSample &RangeCounter) {
626   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
627     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
628     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
629     uint64_t Count = Range.second;
630 
631     InstructionPointer IP(Binary, RangeBegin, true);
632     // Disjoint ranges may have range in the middle of two instr,
633     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
634     // can be Addr1+1 to Addr2-1. We should ignore such range.
635     if (IP.Address > RangeEnd)
636       continue;
637 
638     do {
639       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
640       const SampleContextFrameVector &FrameVec =
641           Binary->getFrameLocationStack(Offset);
642       if (!FrameVec.empty()) {
643         // FIXME: As accumulating total count per instruction caused some
644         // regression, we changed to accumulate total count per byte as a
645         // workaround. Tuning hotness threshold on the compiler side might be
646         // necessary in the future.
647         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
648             FrameVec, Count * Binary->getInstSize(Offset));
649         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
650                                             Count);
651       }
652     } while (IP.advance() && IP.Address <= RangeEnd);
653   }
654 }
655 
656 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
657   // Get the function range by branch target if it's a call branch.
658   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
659 
660   // We won't accumulate sample count for a range whose start is not the real
661   // function entry such as outlined function or inner labels.
662   if (!FRange || !FRange->IsFuncEntry)
663     return StringRef();
664 
665   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
666 }
667 
668 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
669     const BranchSample &BranchCounters) {
670   for (const auto &Entry : BranchCounters) {
671     uint64_t SourceOffset = Entry.first.first;
672     uint64_t TargetOffset = Entry.first.second;
673     uint64_t Count = Entry.second;
674     assert(Count != 0 && "Unexpected zero weight branch");
675 
676     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
677     if (CalleeName.size() == 0)
678       continue;
679     // Record called target sample and its count.
680     const SampleContextFrameVector &FrameVec =
681         Binary->getFrameLocationStack(SourceOffset);
682     if (!FrameVec.empty()) {
683       FunctionSamples &FunctionProfile =
684           getLeafProfileAndAddTotalSamples(FrameVec, 0);
685       FunctionProfile.addCalledTargetSamples(
686           FrameVec.back().Location.LineOffset,
687           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
688           CalleeName, Count);
689     }
690     // Add head samples for callee.
691     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
692     CalleeProfile.addHeadSamples(Count);
693   }
694 }
695 
696 void ProfileGeneratorBase::calculateAndShowDensity(
697     const SampleProfileMap &Profiles) {
698   double Density = calculateDensity(Profiles, HotCountThreshold);
699   showDensitySuggestion(Density);
700 }
701 
702 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
703     const SampleContextFrameVector &Context, bool WasLeafInlined) {
704   auto I = ProfileMap.find(SampleContext(Context));
705   if (I == ProfileMap.end()) {
706     // Save the new context for future references.
707     SampleContextFrames NewContext = *Contexts.insert(Context).first;
708     SampleContext FContext(NewContext, RawContext);
709     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
710     if (WasLeafInlined)
711       FContext.setAttribute(ContextWasInlined);
712     FunctionSamples &FProfile = Ret.first->second;
713     FProfile.setContext(FContext);
714     return Ret.first->second;
715   } else {
716     // Update ContextWasInlined attribute for existing contexts.
717     // The current function can be called in two ways:
718     //  - when processing a probe of the current frame
719     //  - when processing the entry probe of an inlinee's frame, which
720     //    is then used to update the callsite count of the current frame.
721     // The two can happen in any order, hence here we are making sure
722     // `ContextWasInlined` is always set as expected.
723     // TODO: Note that the former does not always happen if no probes of the
724     // current frame has samples, and if the latter happens, we could lose the
725     // attribute. This should be fixed.
726     if (WasLeafInlined)
727       I->second.getContext().setAttribute(ContextWasInlined);
728   }
729 
730   return I->second;
731 }
732 
733 void CSProfileGenerator::generateProfile() {
734   FunctionSamples::ProfileIsCSFlat = true;
735 
736   collectProfiledFunctions();
737 
738   if (Binary->usePseudoProbes())
739     Binary->decodePseudoProbe();
740 
741   if (SampleCounters) {
742     if (Binary->usePseudoProbes()) {
743       generateProbeBasedProfile();
744     } else {
745       generateLineNumBasedProfile();
746     }
747   }
748 
749   if (Binary->getTrackFuncContextSize())
750     computeSizeForProfiledFunctions();
751 
752   postProcessProfiles();
753 }
754 
755 void CSProfileGenerator::computeSizeForProfiledFunctions() {
756   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
757   for (auto *Func : Binary->getProfiledFunctions())
758     Binary->computeInlinedContextSizeForFunc(Func);
759 
760   // Flush the symbolizer to save memory.
761   Binary->flushSymbolizer();
762 }
763 
764 void CSProfileGenerator::generateLineNumBasedProfile() {
765   for (const auto &CI : *SampleCounters) {
766     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
767 
768     FunctionSamples *FunctionProfile = nullptr;
769     // Sample context will be empty if the jump is an external-to-internal call
770     // pattern, the head samples should be added for the internal function.
771     if (!CtxKey->Context.empty()) {
772       // Get or create function profile for the range
773       FunctionProfile = &getFunctionProfileForContext(CtxKey->Context,
774                                                       CtxKey->WasLeafInlined);
775       // Fill in function body samples
776       populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter);
777     }
778     // Fill in boundary sample counts as well as call site samples for calls
779     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
780                                        CI.second.BranchCounter);
781   }
782   // Fill in call site value sample for inlined calls and also use context to
783   // infer missing samples. Since we don't have call count for inlined
784   // functions, we estimate it from inlinee's profile using the entry of the
785   // body sample.
786   populateInferredFunctionSamples();
787 
788   updateTotalSamples();
789 }
790 
791 void CSProfileGenerator::populateBodySamplesForFunction(
792     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
793   // Compute disjoint ranges first, so we can use MAX
794   // for calculating count for each location.
795   RangeSample Ranges;
796   findDisjointRanges(Ranges, RangeCounter);
797   for (const auto &Range : Ranges) {
798     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
799     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
800     uint64_t Count = Range.second;
801     // Disjoint ranges have introduce zero-filled gap that
802     // doesn't belong to current context, filter them out.
803     if (Count == 0)
804       continue;
805 
806     InstructionPointer IP(Binary, RangeBegin, true);
807     // Disjoint ranges may have range in the middle of two instr,
808     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
809     // can be Addr1+1 to Addr2-1. We should ignore such range.
810     if (IP.Address > RangeEnd)
811       continue;
812 
813     do {
814       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
815       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
816       if (LeafLoc.hasValue()) {
817         // Recording body sample for this specific context
818         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
819         FunctionProfile.addTotalSamples(Count);
820       }
821     } while (IP.advance() && IP.Address <= RangeEnd);
822   }
823 }
824 
825 void CSProfileGenerator::populateBoundarySamplesForFunction(
826     SampleContextFrames ContextId, FunctionSamples *CallerProfile,
827     const BranchSample &BranchCounters) {
828 
829   for (const auto &Entry : BranchCounters) {
830     uint64_t SourceOffset = Entry.first.first;
831     uint64_t TargetOffset = Entry.first.second;
832     uint64_t Count = Entry.second;
833     assert(Count != 0 && "Unexpected zero weight branch");
834 
835     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
836     if (CalleeName.size() == 0)
837       continue;
838 
839     SampleContextFrameVector CalleeCtx;
840     if (CallerProfile) {
841       assert(!ContextId.empty() &&
842              "CallerProfile is null only if ContextId is empty");
843       // Record called target sample and its count
844       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
845       if (LeafLoc.hasValue()) {
846         CallerProfile->addCalledTargetSamples(
847             LeafLoc->Location.LineOffset,
848             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
849             Count);
850 
851         // Record head sample for called target(callee)
852         CalleeCtx.append(ContextId.begin(), ContextId.end());
853         assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
854                "Leaf function name doesn't match");
855         CalleeCtx.back() = *LeafLoc;
856       }
857     }
858     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
859     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
860     CalleeProfile.addHeadSamples(Count);
861   }
862 }
863 
864 static SampleContextFrame
865 getCallerContext(SampleContextFrames CalleeContext,
866                  SampleContextFrameVector &CallerContext) {
867   assert(CalleeContext.size() > 1 && "Unexpected empty context");
868   CalleeContext = CalleeContext.drop_back();
869   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
870   SampleContextFrame CallerFrame = CallerContext.back();
871   CallerContext.back().Location = LineLocation(0, 0);
872   return CallerFrame;
873 }
874 
875 void CSProfileGenerator::populateInferredFunctionSamples() {
876   for (const auto &Item : ProfileMap) {
877     const auto &CalleeContext = Item.first;
878     const FunctionSamples &CalleeProfile = Item.second;
879 
880     // If we already have head sample counts, we must have value profile
881     // for call sites added already. Skip to avoid double counting.
882     if (CalleeProfile.getHeadSamples())
883       continue;
884     // If we don't have context, nothing to do for caller's call site.
885     // This could happen for entry point function.
886     if (CalleeContext.isBaseContext())
887       continue;
888 
889     // Infer Caller's frame loc and context ID through string splitting
890     SampleContextFrameVector CallerContextId;
891     SampleContextFrame &&CallerLeafFrameLoc =
892         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
893     SampleContextFrames CallerContext(CallerContextId);
894 
895     // It's possible that we haven't seen any sample directly in the caller,
896     // in which case CallerProfile will not exist. But we can't modify
897     // ProfileMap while iterating it.
898     // TODO: created function profile for those callers too
899     if (ProfileMap.find(CallerContext) == ProfileMap.end())
900       continue;
901     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
902 
903     // Since we don't have call count for inlined functions, we
904     // estimate it from inlinee's profile using entry body sample.
905     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
906     // If we don't have samples with location, use 1 to indicate live.
907     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
908       EstimatedCallCount = 1;
909     CallerProfile.addCalledTargetSamples(
910         CallerLeafFrameLoc.Location.LineOffset,
911         CallerLeafFrameLoc.Location.Discriminator,
912         CalleeProfile.getContext().getName(), EstimatedCallCount);
913     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
914                                  CallerLeafFrameLoc.Location.Discriminator,
915                                  EstimatedCallCount);
916     CallerProfile.addTotalSamples(EstimatedCallCount);
917   }
918 }
919 
920 void CSProfileGenerator::postProcessProfiles() {
921   // Compute hot/cold threshold based on profile. This will be used for cold
922   // context profile merging/trimming.
923   computeSummaryAndThreshold();
924 
925   // Run global pre-inliner to adjust/merge context profile based on estimated
926   // inline decisions.
927   if (EnableCSPreInliner) {
928     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
929         .run();
930     // Turn off the profile merger by default unless it is explicitly enabled.
931     if (!CSProfMergeColdContext.getNumOccurrences())
932       CSProfMergeColdContext = false;
933   }
934 
935   // Trim and merge cold context profile using cold threshold above.
936   if (TrimColdProfile || CSProfMergeColdContext) {
937     SampleContextTrimmer(ProfileMap)
938         .trimAndMergeColdContextProfiles(
939             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
940             CSProfMaxColdContextDepth, EnableCSPreInliner);
941   }
942 
943   // Merge function samples of CS profile to calculate profile density.
944   sampleprof::SampleProfileMap ContextLessProfiles;
945   for (const auto &I : ProfileMap) {
946     ContextLessProfiles[I.second.getName()].merge(I.second);
947   }
948 
949   calculateAndShowDensity(ContextLessProfiles);
950   if (GenCSNestedProfile) {
951     CSProfileConverter CSConverter(ProfileMap);
952     CSConverter.convertProfiles();
953     FunctionSamples::ProfileIsCSFlat = false;
954     FunctionSamples::ProfileIsCSNested = EnableCSPreInliner;
955   }
956 }
957 
958 void ProfileGeneratorBase::computeSummaryAndThreshold() {
959   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
960   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
961   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
962       (Summary->getDetailedSummary()));
963   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
964       (Summary->getDetailedSummary()));
965 }
966 
967 void ProfileGeneratorBase::extractProbesFromRange(
968     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
969     bool FindDisjointRanges) {
970   const RangeSample *PRanges = &RangeCounter;
971   RangeSample Ranges;
972   if (FindDisjointRanges) {
973     findDisjointRanges(Ranges, RangeCounter);
974     PRanges = &Ranges;
975   }
976 
977   for (const auto &Range : *PRanges) {
978     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
979     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
980     uint64_t Count = Range.second;
981 
982     InstructionPointer IP(Binary, RangeBegin, true);
983     // Disjoint ranges may have range in the middle of two instr,
984     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
985     // can be Addr1+1 to Addr2-1. We should ignore such range.
986     if (IP.Address > RangeEnd)
987       continue;
988 
989     do {
990       const AddressProbesMap &Address2ProbesMap =
991           Binary->getAddress2ProbesMap();
992       auto It = Address2ProbesMap.find(IP.Address);
993       if (It != Address2ProbesMap.end()) {
994         for (const auto &Probe : It->second) {
995           ProbeCounter[&Probe] += Count;
996         }
997       }
998     } while (IP.advance() && IP.Address <= RangeEnd);
999   }
1000 }
1001 
1002 static void
1003 extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1004                           const SmallVectorImpl<uint64_t> &Addresses,
1005                           ProfiledBinary *Binary) {
1006   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1007   for (auto Addr : reverse(Addresses)) {
1008     const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
1009     // These could be the cases when a probe is not found at a calliste. Cutting
1010     // off the context from here since the inliner will not know how to consume
1011     // a context with unknown callsites.
1012     // 1. for functions that are not sampled when
1013     // --decode-probe-for-profiled-functions-only is on.
1014     // 2. for a merged callsite. Callsite merging may cause the loss of original
1015     // probe IDs.
1016     // 3. for an external callsite.
1017     if (!CallProbe)
1018       break;
1019     Probes.push_back(CallProbe);
1020   }
1021 
1022   std::reverse(Probes.begin(), Probes.end());
1023 
1024   // Extract context stack for reusing, leaf context stack will be added
1025   // compressed while looking up function profile.
1026   for (const auto *P : Probes) {
1027     Binary->getInlineContextForProbe(P, ContextStack, true);
1028   }
1029 }
1030 
1031 void CSProfileGenerator::generateProbeBasedProfile() {
1032   // Enable pseudo probe functionalities in SampleProf
1033   FunctionSamples::ProfileIsProbeBased = true;
1034   for (const auto &CI : *SampleCounters) {
1035     const AddrBasedCtxKey *CtxKey =
1036         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1037     SampleContextFrameVector ContextStack;
1038     extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
1039     // Fill in function body samples from probes, also infer caller's samples
1040     // from callee's probe
1041     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1042     // Fill in boundary samples for a call probe
1043     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1044   }
1045 }
1046 
1047 void CSProfileGenerator::populateBodySamplesWithProbes(
1048     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1049   ProbeCounterMap ProbeCounter;
1050   // Extract the top frame probes by looking up each address among the range in
1051   // the Address2ProbeMap
1052   extractProbesFromRange(RangeCounter, ProbeCounter);
1053   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1054                      std::unordered_set<FunctionSamples *>>
1055       FrameSamples;
1056   for (const auto &PI : ProbeCounter) {
1057     const MCDecodedPseudoProbe *Probe = PI.first;
1058     uint64_t Count = PI.second;
1059     // Disjoint ranges have introduce zero-filled gap that
1060     // doesn't belong to current context, filter them out.
1061     if (!Probe->isBlock() || Count == 0)
1062       continue;
1063     FunctionSamples &FunctionProfile =
1064         getFunctionProfileForLeafProbe(ContextStack, Probe);
1065     // Record the current frame and FunctionProfile whenever samples are
1066     // collected for non-danglie probes. This is for reporting all of the
1067     // zero count probes of the frame later.
1068     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1069     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1070     FunctionProfile.addTotalSamples(Count);
1071     if (Probe->isEntry()) {
1072       FunctionProfile.addHeadSamples(Count);
1073       // Look up for the caller's function profile
1074       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1075       SampleContextFrames CalleeContextId =
1076           FunctionProfile.getContext().getContextFrames();
1077       if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
1078         // Since the context id will be compressed, we have to use callee's
1079         // context id to infer caller's context id to ensure they share the
1080         // same context prefix.
1081         SampleContextFrameVector CallerContextId;
1082         SampleContextFrame &&CallerLeafFrameLoc =
1083             getCallerContext(CalleeContextId, CallerContextId);
1084         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
1085         assert(CallerIndex &&
1086                "Inferred caller's location index shouldn't be zero!");
1087         FunctionSamples &CallerProfile =
1088             getFunctionProfileForContext(CallerContextId);
1089         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1090         CallerProfile.addBodySamples(CallerIndex, 0, Count);
1091         CallerProfile.addTotalSamples(Count);
1092         CallerProfile.addCalledTargetSamples(
1093             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
1094       }
1095     }
1096   }
1097 
1098   // Assign zero count for remaining probes without sample hits to
1099   // differentiate from probes optimized away, of which the counts are unknown
1100   // and will be inferred by the compiler.
1101   for (auto &I : FrameSamples) {
1102     for (auto *FunctionProfile : I.second) {
1103       for (auto *Probe : I.first->getProbes()) {
1104         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1105       }
1106     }
1107   }
1108 }
1109 
1110 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1111     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1112   for (const auto &BI : BranchCounter) {
1113     uint64_t SourceOffset = BI.first.first;
1114     uint64_t TargetOffset = BI.first.second;
1115     uint64_t Count = BI.second;
1116     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
1117     const MCDecodedPseudoProbe *CallProbe =
1118         Binary->getCallProbeForAddr(SourceAddress);
1119     if (CallProbe == nullptr)
1120       continue;
1121     FunctionSamples &FunctionProfile =
1122         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1123     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1124     FunctionProfile.addTotalSamples(Count);
1125     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
1126     if (CalleeName.size() == 0)
1127       continue;
1128     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1129                                            Count);
1130   }
1131 }
1132 
1133 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1134     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1135 
1136   // Explicitly copy the context for appending the leaf context
1137   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1138                                            ContextStack.end());
1139   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1140   // For leaf inlined context with the top frame, we should strip off the top
1141   // frame's probe id, like:
1142   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1143   auto LeafFrame = NewContextStack.back();
1144   LeafFrame.Location = LineLocation(0, 0);
1145   NewContextStack.pop_back();
1146   // Compress the context string except for the leaf frame
1147   CSProfileGenerator::compressRecursionContext(NewContextStack);
1148   CSProfileGenerator::trimContext(NewContextStack);
1149   NewContextStack.push_back(LeafFrame);
1150 
1151   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1152   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1153   FunctionSamples &FunctionProile =
1154       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
1155   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
1156   return FunctionProile;
1157 }
1158 
1159 } // end namespace sampleprof
1160 } // end namespace llvm
1161