1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "PerfReader.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <algorithm>
15 #include <float.h>
16 #include <unordered_set>
17 #include <utility>
18 
19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20                                     cl::Required,
21                                     cl::desc("Output profile file"));
22 static cl::alias OutputA("o", cl::desc("Alias for --output"),
23                          cl::aliasopt(OutputFilename));
24 
25 static cl::opt<SampleProfileFormat> OutputFormat(
26     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27     cl::values(
28         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
29         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 cl::opt<bool> UseMD5(
36     "use-md5", cl::init(false), cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile",
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true),
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1),
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth",
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 extern cl::opt<int> ProfileSummaryCutoffHot;
94 
95 static cl::opt<bool> GenCSNestedProfile(
96     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
97     cl::desc("Generate nested function profiles for CSSPGO"));
98 
99 using namespace llvm;
100 using namespace sampleprof;
101 
102 namespace llvm {
103 namespace sampleprof {
104 
105 // Initialize the MaxCompressionSize to -1 which means no size limit
106 int32_t CSProfileGenerator::MaxCompressionSize = -1;
107 
108 int CSProfileGenerator::MaxContextDepth = -1;
109 
110 bool ProfileGeneratorBase::UseFSDiscriminator = false;
111 
112 std::unique_ptr<ProfileGeneratorBase>
113 ProfileGeneratorBase::create(ProfiledBinary *Binary,
114                              const ContextSampleCounterMap *SampleCounters,
115                              bool ProfileIsCS) {
116   std::unique_ptr<ProfileGeneratorBase> Generator;
117   if (ProfileIsCS) {
118     if (Binary->useFSDiscriminator())
119       exitWithError("FS discriminator is not supported in CS profile.");
120     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
121   } else {
122     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
123   }
124   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
125   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
126 
127   return Generator;
128 }
129 
130 std::unique_ptr<ProfileGeneratorBase>
131 ProfileGeneratorBase::create(ProfiledBinary *Binary,
132                              const SampleProfileMap &&Profiles,
133                              bool ProfileIsCS) {
134   std::unique_ptr<ProfileGeneratorBase> Generator;
135   if (ProfileIsCS) {
136     if (Binary->useFSDiscriminator())
137       exitWithError("FS discriminator is not supported in CS profile.");
138     Generator.reset(new CSProfileGenerator(Binary, std::move(Profiles)));
139   } else {
140     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141   }
142   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144 
145   return Generator;
146 }
147 
148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149                                  SampleProfileMap &ProfileMap) {
150   // Populate profile symbol list if extended binary format is used.
151   ProfileSymbolList SymbolList;
152 
153   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154     Binary->populateSymbolListFromDWARF(SymbolList);
155     Writer->setProfileSymbolList(&SymbolList);
156   }
157 
158   if (std::error_code EC = Writer->write(ProfileMap))
159     exitWithError(std::move(EC));
160 }
161 
162 void ProfileGeneratorBase::write() {
163   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164   if (std::error_code EC = WriterOrErr.getError())
165     exitWithError(EC, OutputFilename);
166 
167   if (UseMD5) {
168     if (OutputFormat != SPF_Ext_Binary)
169       WithColor::warning() << "-use-md5 is ignored. Specify "
170                               "--format=extbinary to enable it\n";
171     else
172       WriterOrErr.get()->setUseMD5();
173   }
174 
175   write(std::move(WriterOrErr.get()), ProfileMap);
176 }
177 
178 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179   if (Density == 0.0)
180     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181                             "set too low. Please check your command.\n";
182   else if (Density < HotFunctionDensityThreshold)
183     WithColor::warning()
184         << "AutoFDO is estimated to optimize better with "
185         << format("%.1f", HotFunctionDensityThreshold / Density)
186         << "x more samples. Please consider increasing sampling rate or "
187            "profiling for longer duration to get more samples.\n";
188 
189   if (ShowDensity)
190     outs() << "Minimum profile density for hot functions with top "
191            << format("%.2f",
192                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193                          10000)
194            << "% total samples: " << format("%.1f", Density) << "\n";
195 }
196 
197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198                                               uint64_t HotCntThreshold) {
199   double Density = DBL_MAX;
200   std::vector<const FunctionSamples *> HotFuncs;
201   for (auto &I : Profiles) {
202     auto &FuncSamples = I.second;
203     if (FuncSamples.getTotalSamples() < HotCntThreshold)
204       continue;
205     HotFuncs.emplace_back(&FuncSamples);
206   }
207 
208   for (auto *FuncSamples : HotFuncs) {
209     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210     if (!Func)
211       continue;
212     uint64_t FuncSize = Func->getFuncSize();
213     if (FuncSize == 0)
214       continue;
215     Density =
216         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217                               FuncSize);
218   }
219 
220   return Density == DBL_MAX ? 0.0 : Density;
221 }
222 
223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224                                               const RangeSample &Ranges) {
225 
226   /*
227   Regions may overlap with each other. Using the boundary info, find all
228   disjoint ranges and their sample count. BoundaryPoint contains the count
229   multiple samples begin/end at this points.
230 
231   |<--100-->|           Sample1
232   |<------200------>|   Sample2
233   A         B       C
234 
235   In the example above,
236   Sample1 begins at A, ends at B, its value is 100.
237   Sample2 beings at A, ends at C, its value is 200.
238   For A, BeginCount is the sum of sample begins at A, which is 300 and no
239   samples ends at A, so EndCount is 0.
240   Then boundary points A, B, and C with begin/end counts are:
241   A: (300, 0)
242   B: (0, 100)
243   C: (0, 200)
244   */
245   struct BoundaryPoint {
246     // Sum of sample counts beginning at this point
247     uint64_t BeginCount = UINT64_MAX;
248     // Sum of sample counts ending at this point
249     uint64_t EndCount = UINT64_MAX;
250     // Is the begin point of a zero range.
251     bool IsZeroRangeBegin = false;
252     // Is the end point of a zero range.
253     bool IsZeroRangeEnd = false;
254 
255     void addBeginCount(uint64_t Count) {
256       if (BeginCount == UINT64_MAX)
257         BeginCount = 0;
258       BeginCount += Count;
259     }
260 
261     void addEndCount(uint64_t Count) {
262       if (EndCount == UINT64_MAX)
263         EndCount = 0;
264       EndCount += Count;
265     }
266   };
267 
268   /*
269   For the above example. With boundary points, follwing logic finds two
270   disjoint region of
271 
272   [A,B]:   300
273   [B+1,C]: 200
274 
275   If there is a boundary point that both begin and end, the point itself
276   becomes a separate disjoint region. For example, if we have original
277   ranges of
278 
279   |<--- 100 --->|
280                 |<--- 200 --->|
281   A             B             C
282 
283   there are three boundary points with their begin/end counts of
284 
285   A: (100, 0)
286   B: (200, 100)
287   C: (0, 200)
288 
289   the disjoint ranges would be
290 
291   [A, B-1]: 100
292   [B, B]:   300
293   [B+1, C]: 200.
294 
295   Example for zero value range:
296 
297     |<--- 100 --->|
298                        |<--- 200 --->|
299   |<---------------  0 ----------------->|
300   A  B            C    D             E   F
301 
302   [A, B-1]  : 0
303   [B, C]    : 100
304   [C+1, D-1]: 0
305   [D, E]    : 200
306   [E+1, F]  : 0
307   */
308   std::map<uint64_t, BoundaryPoint> Boundaries;
309 
310   for (const auto &Item : Ranges) {
311     assert(Item.first.first <= Item.first.second &&
312            "Invalid instruction range");
313     auto &BeginPoint = Boundaries[Item.first.first];
314     auto &EndPoint = Boundaries[Item.first.second];
315     uint64_t Count = Item.second;
316 
317     BeginPoint.addBeginCount(Count);
318     EndPoint.addEndCount(Count);
319     if (Count == 0) {
320       BeginPoint.IsZeroRangeBegin = true;
321       EndPoint.IsZeroRangeEnd = true;
322     }
323   }
324 
325   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326   // and the next valid address
327   uint64_t BeginAddress = UINT64_MAX;
328   int ZeroRangeDepth = 0;
329   uint64_t Count = 0;
330   for (const auto &Item : Boundaries) {
331     uint64_t Address = Item.first;
332     const BoundaryPoint &Point = Item.second;
333     if (Point.BeginCount != UINT64_MAX) {
334       if (BeginAddress != UINT64_MAX)
335         DisjointRanges[{BeginAddress, Address - 1}] = Count;
336       Count += Point.BeginCount;
337       BeginAddress = Address;
338       ZeroRangeDepth += Point.IsZeroRangeBegin;
339     }
340     if (Point.EndCount != UINT64_MAX) {
341       assert((BeginAddress != UINT64_MAX) &&
342              "First boundary point cannot be 'end' point");
343       DisjointRanges[{BeginAddress, Address}] = Count;
344       assert(Count >= Point.EndCount && "Mismatched live ranges");
345       Count -= Point.EndCount;
346       BeginAddress = Address + 1;
347       ZeroRangeDepth -= Point.IsZeroRangeEnd;
348       // If the remaining count is zero and it's no longer in a zero range, this
349       // means we consume all the ranges before, thus mark BeginAddress as
350       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351       //  [<---- 10 ---->]
352       //                       [<---- 20 ---->]
353       //   A             B     C              D
354       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355       // have the [B+1, C-1] zero range.
356       if (Count == 0 && ZeroRangeDepth == 0)
357         BeginAddress = UINT64_MAX;
358     }
359   }
360 }
361 
362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364     uint64_t Count) {
365   // Use the maximum count of samples with same line location
366   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367 
368   // Use duplication factor to compensated for loop unroll/vectorization.
369   // Note that this is only needed when we're taking MAX of the counts at
370   // the location instead of SUM.
371   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372 
373   ErrorOr<uint64_t> R =
374       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375 
376   uint64_t PreviousCount = R ? R.get() : 0;
377   if (PreviousCount <= Count) {
378     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379                                    Count - PreviousCount);
380   }
381 }
382 
383 void ProfileGeneratorBase::updateTotalSamples() {
384   for (auto &Item : ProfileMap) {
385     FunctionSamples &FunctionProfile = Item.second;
386     FunctionProfile.updateTotalSamples();
387   }
388 }
389 
390 void ProfileGeneratorBase::updateCallsiteSamples() {
391   for (auto &Item : ProfileMap) {
392     FunctionSamples &FunctionProfile = Item.second;
393     FunctionProfile.updateCallsiteSamples();
394   }
395 }
396 
397 void ProfileGeneratorBase::updateFunctionSamples() {
398   updateCallsiteSamples();
399 
400   if (UpdateTotalSamples)
401     updateTotalSamples();
402 }
403 
404 void ProfileGeneratorBase::collectProfiledFunctions() {
405   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
406   if (SampleCounters) {
407     // Go through all the stacks, ranges and branches in sample counters, use
408     // the start of the range to look up the function it belongs and record the
409     // function.
410     for (const auto &CI : *SampleCounters) {
411       if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
412         for (auto Addr : CtxKey->Context) {
413           if (FuncRange *FRange = Binary->findFuncRangeForOffset(
414                   Binary->virtualAddrToOffset(Addr)))
415             ProfiledFunctions.insert(FRange->Func);
416         }
417       }
418 
419       for (auto Item : CI.second.RangeCounter) {
420         uint64_t StartOffset = Item.first.first;
421         if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
422           ProfiledFunctions.insert(FRange->Func);
423       }
424 
425       for (auto Item : CI.second.BranchCounter) {
426         uint64_t SourceOffset = Item.first.first;
427         uint64_t TargetOffset = Item.first.first;
428         if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
429           ProfiledFunctions.insert(FRange->Func);
430         if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
431           ProfiledFunctions.insert(FRange->Func);
432       }
433     }
434   } else {
435     // This is for the case the input is a llvm sample profile.
436     for (const auto &FS : ProfileMap) {
437       if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
438         ProfiledFunctions.insert(Func);
439     }
440   }
441 
442   Binary->setProfiledFunctions(ProfiledFunctions);
443 }
444 
445 FunctionSamples &
446 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
447   SampleContext Context(FuncName);
448   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
449   if (Ret.second) {
450     FunctionSamples &FProfile = Ret.first->second;
451     FProfile.setContext(Context);
452   }
453   return Ret.first->second;
454 }
455 
456 void ProfileGenerator::generateProfile() {
457   collectProfiledFunctions();
458 
459   if (Binary->usePseudoProbes())
460     Binary->decodePseudoProbe();
461 
462   if (SampleCounters) {
463     if (Binary->usePseudoProbes()) {
464       generateProbeBasedProfile();
465     } else {
466       generateLineNumBasedProfile();
467     }
468   }
469 
470   postProcessProfiles();
471 }
472 
473 void ProfileGenerator::postProcessProfiles() {
474   computeSummaryAndThreshold();
475   trimColdProfiles(ProfileMap, ColdCountThreshold);
476   calculateAndShowDensity(ProfileMap);
477 }
478 
479 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
480                                         uint64_t ColdCntThreshold) {
481   if (!TrimColdProfile)
482     return;
483 
484   // Move cold profiles into a tmp container.
485   std::vector<SampleContext> ColdProfiles;
486   for (const auto &I : ProfileMap) {
487     if (I.second.getTotalSamples() < ColdCntThreshold)
488       ColdProfiles.emplace_back(I.first);
489   }
490 
491   // Remove the cold profile from ProfileMap.
492   for (const auto &I : ColdProfiles)
493     ProfileMap.erase(I);
494 }
495 
496 void ProfileGenerator::generateLineNumBasedProfile() {
497   assert(SampleCounters->size() == 1 &&
498          "Must have one entry for profile generation.");
499   const SampleCounter &SC = SampleCounters->begin()->second;
500   // Fill in function body samples
501   populateBodySamplesForAllFunctions(SC.RangeCounter);
502   // Fill in boundary sample counts as well as call site samples for calls
503   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
504 
505   updateFunctionSamples();
506 }
507 
508 void ProfileGenerator::generateProbeBasedProfile() {
509   assert(SampleCounters->size() == 1 &&
510          "Must have one entry for profile generation.");
511   // Enable pseudo probe functionalities in SampleProf
512   FunctionSamples::ProfileIsProbeBased = true;
513   const SampleCounter &SC = SampleCounters->begin()->second;
514   // Fill in function body samples
515   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
516   // Fill in boundary sample counts as well as call site samples for calls
517   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
518 
519   updateFunctionSamples();
520 }
521 
522 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
523     const RangeSample &RangeCounter) {
524   ProbeCounterMap ProbeCounter;
525   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
526   // inside extractProbesFromRange.
527   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
528                          false);
529 
530   for (const auto &PI : ProbeCounter) {
531     const MCDecodedPseudoProbe *Probe = PI.first;
532     uint64_t Count = PI.second;
533     SampleContextFrameVector FrameVec;
534     Binary->getInlineContextForProbe(Probe, FrameVec, true);
535     FunctionSamples &FunctionProfile =
536         getLeafProfileAndAddTotalSamples(FrameVec, Count);
537     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
538     if (Probe->isEntry())
539       FunctionProfile.addHeadSamples(Count);
540   }
541 }
542 
543 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
544     const BranchSample &BranchCounters) {
545   for (const auto &Entry : BranchCounters) {
546     uint64_t SourceOffset = Entry.first.first;
547     uint64_t TargetOffset = Entry.first.second;
548     uint64_t Count = Entry.second;
549     assert(Count != 0 && "Unexpected zero weight branch");
550 
551     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
552     if (CalleeName.size() == 0)
553       continue;
554 
555     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
556     const MCDecodedPseudoProbe *CallProbe =
557         Binary->getCallProbeForAddr(SourceAddress);
558     if (CallProbe == nullptr)
559       continue;
560 
561     // Record called target sample and its count.
562     SampleContextFrameVector FrameVec;
563     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
564 
565     if (!FrameVec.empty()) {
566       FunctionSamples &FunctionProfile =
567           getLeafProfileAndAddTotalSamples(FrameVec, 0);
568       FunctionProfile.addCalledTargetSamples(
569           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
570     }
571   }
572 }
573 
574 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
575     const SampleContextFrameVector &FrameVec, uint64_t Count) {
576   // Get top level profile
577   FunctionSamples *FunctionProfile =
578       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
579   FunctionProfile->addTotalSamples(Count);
580   if (Binary->usePseudoProbes()) {
581     const auto *FuncDesc = Binary->getFuncDescForGUID(
582         Function::getGUID(FunctionProfile->getName()));
583     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
584   }
585 
586   for (size_t I = 1; I < FrameVec.size(); I++) {
587     LineLocation Callsite(
588         FrameVec[I - 1].Location.LineOffset,
589         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
590     FunctionSamplesMap &SamplesMap =
591         FunctionProfile->functionSamplesAt(Callsite);
592     auto Ret =
593         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
594     if (Ret.second) {
595       SampleContext Context(FrameVec[I].FuncName);
596       Ret.first->second.setContext(Context);
597     }
598     FunctionProfile = &Ret.first->second;
599     FunctionProfile->addTotalSamples(Count);
600     if (Binary->usePseudoProbes()) {
601       const auto *FuncDesc = Binary->getFuncDescForGUID(
602           Function::getGUID(FunctionProfile->getName()));
603       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
604     }
605   }
606 
607   return *FunctionProfile;
608 }
609 
610 RangeSample
611 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
612   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
613   if (FillZeroForAllFuncs) {
614     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
615       for (auto &R : FuncI.second.Ranges) {
616         Ranges[{R.first, R.second - 1}] += 0;
617       }
618     }
619   } else {
620     // For each range, we search for all ranges of the function it belongs to
621     // and initialize it with zero count, so it remains zero if doesn't hit any
622     // samples. This is to be consistent with compiler that interpret zero count
623     // as unexecuted(cold).
624     for (const auto &I : RangeCounter) {
625       uint64_t StartOffset = I.first.first;
626       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
627         Ranges[{Range.first, Range.second - 1}] += 0;
628     }
629   }
630   RangeSample DisjointRanges;
631   findDisjointRanges(DisjointRanges, Ranges);
632   return DisjointRanges;
633 }
634 
635 void ProfileGenerator::populateBodySamplesForAllFunctions(
636     const RangeSample &RangeCounter) {
637   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
638     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
639     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
640     uint64_t Count = Range.second;
641 
642     InstructionPointer IP(Binary, RangeBegin, true);
643     // Disjoint ranges may have range in the middle of two instr,
644     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
645     // can be Addr1+1 to Addr2-1. We should ignore such range.
646     if (IP.Address > RangeEnd)
647       continue;
648 
649     do {
650       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
651       const SampleContextFrameVector &FrameVec =
652           Binary->getFrameLocationStack(Offset);
653       if (!FrameVec.empty()) {
654         // FIXME: As accumulating total count per instruction caused some
655         // regression, we changed to accumulate total count per byte as a
656         // workaround. Tuning hotness threshold on the compiler side might be
657         // necessary in the future.
658         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
659             FrameVec, Count * Binary->getInstSize(Offset));
660         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
661                                             Count);
662       }
663     } while (IP.advance() && IP.Address <= RangeEnd);
664   }
665 }
666 
667 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
668   // Get the function range by branch target if it's a call branch.
669   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
670 
671   // We won't accumulate sample count for a range whose start is not the real
672   // function entry such as outlined function or inner labels.
673   if (!FRange || !FRange->IsFuncEntry)
674     return StringRef();
675 
676   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
677 }
678 
679 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
680     const BranchSample &BranchCounters) {
681   for (const auto &Entry : BranchCounters) {
682     uint64_t SourceOffset = Entry.first.first;
683     uint64_t TargetOffset = Entry.first.second;
684     uint64_t Count = Entry.second;
685     assert(Count != 0 && "Unexpected zero weight branch");
686 
687     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
688     if (CalleeName.size() == 0)
689       continue;
690     // Record called target sample and its count.
691     const SampleContextFrameVector &FrameVec =
692         Binary->getFrameLocationStack(SourceOffset);
693     if (!FrameVec.empty()) {
694       FunctionSamples &FunctionProfile =
695           getLeafProfileAndAddTotalSamples(FrameVec, 0);
696       FunctionProfile.addCalledTargetSamples(
697           FrameVec.back().Location.LineOffset,
698           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
699           CalleeName, Count);
700     }
701     // Add head samples for callee.
702     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
703     CalleeProfile.addHeadSamples(Count);
704   }
705 }
706 
707 void ProfileGeneratorBase::calculateAndShowDensity(
708     const SampleProfileMap &Profiles) {
709   double Density = calculateDensity(Profiles, HotCountThreshold);
710   showDensitySuggestion(Density);
711 }
712 
713 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
714     const SampleContextFrameVector &Context, bool WasLeafInlined) {
715   auto I = ProfileMap.find(SampleContext(Context));
716   if (I == ProfileMap.end()) {
717     // Save the new context for future references.
718     SampleContextFrames NewContext = *Contexts.insert(Context).first;
719     SampleContext FContext(NewContext, RawContext);
720     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
721     if (WasLeafInlined)
722       FContext.setAttribute(ContextWasInlined);
723     FunctionSamples &FProfile = Ret.first->second;
724     FProfile.setContext(FContext);
725     return Ret.first->second;
726   } else {
727     // Update ContextWasInlined attribute for existing contexts.
728     // The current function can be called in two ways:
729     //  - when processing a probe of the current frame
730     //  - when processing the entry probe of an inlinee's frame, which
731     //    is then used to update the callsite count of the current frame.
732     // The two can happen in any order, hence here we are making sure
733     // `ContextWasInlined` is always set as expected.
734     // TODO: Note that the former does not always happen if no probes of the
735     // current frame has samples, and if the latter happens, we could lose the
736     // attribute. This should be fixed.
737     if (WasLeafInlined)
738       I->second.getContext().setAttribute(ContextWasInlined);
739   }
740 
741   return I->second;
742 }
743 
744 void CSProfileGenerator::generateProfile() {
745   FunctionSamples::ProfileIsCS = true;
746 
747   collectProfiledFunctions();
748 
749   if (Binary->usePseudoProbes())
750     Binary->decodePseudoProbe();
751 
752   if (SampleCounters) {
753     if (Binary->usePseudoProbes()) {
754       generateProbeBasedProfile();
755     } else {
756       generateLineNumBasedProfile();
757     }
758   }
759 
760   if (Binary->getTrackFuncContextSize())
761     computeSizeForProfiledFunctions();
762 
763   postProcessProfiles();
764 }
765 
766 void CSProfileGenerator::computeSizeForProfiledFunctions() {
767   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
768   for (auto *Func : Binary->getProfiledFunctions())
769     Binary->computeInlinedContextSizeForFunc(Func);
770 
771   // Flush the symbolizer to save memory.
772   Binary->flushSymbolizer();
773 }
774 
775 void CSProfileGenerator::generateLineNumBasedProfile() {
776   for (const auto &CI : *SampleCounters) {
777     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
778 
779     FunctionSamples *FunctionProfile = nullptr;
780     // Sample context will be empty if the jump is an external-to-internal call
781     // pattern, the head samples should be added for the internal function.
782     if (!CtxKey->Context.empty()) {
783       // Get or create function profile for the range
784       FunctionProfile = &getFunctionProfileForContext(CtxKey->Context,
785                                                       CtxKey->WasLeafInlined);
786       // Fill in function body samples
787       populateBodySamplesForFunction(*FunctionProfile, CI.second.RangeCounter);
788     }
789     // Fill in boundary sample counts as well as call site samples for calls
790     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
791                                        CI.second.BranchCounter);
792   }
793   // Fill in call site value sample for inlined calls and also use context to
794   // infer missing samples. Since we don't have call count for inlined
795   // functions, we estimate it from inlinee's profile using the entry of the
796   // body sample.
797   populateInferredFunctionSamples();
798 
799   updateFunctionSamples();
800 }
801 
802 void CSProfileGenerator::populateBodySamplesForFunction(
803     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
804   // Compute disjoint ranges first, so we can use MAX
805   // for calculating count for each location.
806   RangeSample Ranges;
807   findDisjointRanges(Ranges, RangeCounter);
808   for (const auto &Range : Ranges) {
809     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
810     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
811     uint64_t Count = Range.second;
812     // Disjoint ranges have introduce zero-filled gap that
813     // doesn't belong to current context, filter them out.
814     if (Count == 0)
815       continue;
816 
817     InstructionPointer IP(Binary, RangeBegin, true);
818     // Disjoint ranges may have range in the middle of two instr,
819     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
820     // can be Addr1+1 to Addr2-1. We should ignore such range.
821     if (IP.Address > RangeEnd)
822       continue;
823 
824     do {
825       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
826       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
827       if (LeafLoc.hasValue()) {
828         // Recording body sample for this specific context
829         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
830         FunctionProfile.addTotalSamples(Count);
831       }
832     } while (IP.advance() && IP.Address <= RangeEnd);
833   }
834 }
835 
836 void CSProfileGenerator::populateBoundarySamplesForFunction(
837     SampleContextFrames ContextId, FunctionSamples *CallerProfile,
838     const BranchSample &BranchCounters) {
839 
840   for (const auto &Entry : BranchCounters) {
841     uint64_t SourceOffset = Entry.first.first;
842     uint64_t TargetOffset = Entry.first.second;
843     uint64_t Count = Entry.second;
844     assert(Count != 0 && "Unexpected zero weight branch");
845 
846     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
847     if (CalleeName.size() == 0)
848       continue;
849 
850     SampleContextFrameVector CalleeCtx;
851     if (CallerProfile) {
852       assert(!ContextId.empty() &&
853              "CallerProfile is null only if ContextId is empty");
854       // Record called target sample and its count
855       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
856       if (LeafLoc.hasValue()) {
857         CallerProfile->addCalledTargetSamples(
858             LeafLoc->Location.LineOffset,
859             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
860             Count);
861 
862         // Record head sample for called target(callee)
863         CalleeCtx.append(ContextId.begin(), ContextId.end());
864         assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
865                "Leaf function name doesn't match");
866         CalleeCtx.back() = *LeafLoc;
867       }
868     }
869     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
870     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
871     CalleeProfile.addHeadSamples(Count);
872   }
873 }
874 
875 static SampleContextFrame
876 getCallerContext(SampleContextFrames CalleeContext,
877                  SampleContextFrameVector &CallerContext) {
878   assert(CalleeContext.size() > 1 && "Unexpected empty context");
879   CalleeContext = CalleeContext.drop_back();
880   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
881   SampleContextFrame CallerFrame = CallerContext.back();
882   CallerContext.back().Location = LineLocation(0, 0);
883   return CallerFrame;
884 }
885 
886 void CSProfileGenerator::populateInferredFunctionSamples() {
887   for (const auto &Item : ProfileMap) {
888     const auto &CalleeContext = Item.first;
889     const FunctionSamples &CalleeProfile = Item.second;
890 
891     // If we already have head sample counts, we must have value profile
892     // for call sites added already. Skip to avoid double counting.
893     if (CalleeProfile.getHeadSamples())
894       continue;
895     // If we don't have context, nothing to do for caller's call site.
896     // This could happen for entry point function.
897     if (CalleeContext.isBaseContext())
898       continue;
899 
900     // Infer Caller's frame loc and context ID through string splitting
901     SampleContextFrameVector CallerContextId;
902     SampleContextFrame &&CallerLeafFrameLoc =
903         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
904     SampleContextFrames CallerContext(CallerContextId);
905 
906     // It's possible that we haven't seen any sample directly in the caller,
907     // in which case CallerProfile will not exist. But we can't modify
908     // ProfileMap while iterating it.
909     // TODO: created function profile for those callers too
910     if (ProfileMap.find(CallerContext) == ProfileMap.end())
911       continue;
912     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
913 
914     // Since we don't have call count for inlined functions, we
915     // estimate it from inlinee's profile using entry body sample.
916     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
917     // If we don't have samples with location, use 1 to indicate live.
918     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
919       EstimatedCallCount = 1;
920     CallerProfile.addCalledTargetSamples(
921         CallerLeafFrameLoc.Location.LineOffset,
922         CallerLeafFrameLoc.Location.Discriminator,
923         CalleeProfile.getContext().getName(), EstimatedCallCount);
924     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
925                                  CallerLeafFrameLoc.Location.Discriminator,
926                                  EstimatedCallCount);
927     CallerProfile.addTotalSamples(EstimatedCallCount);
928   }
929 }
930 
931 void CSProfileGenerator::postProcessProfiles() {
932   // Compute hot/cold threshold based on profile. This will be used for cold
933   // context profile merging/trimming.
934   computeSummaryAndThreshold();
935 
936   // Run global pre-inliner to adjust/merge context profile based on estimated
937   // inline decisions.
938   if (EnableCSPreInliner) {
939     CSPreInliner(ProfileMap, *Binary, Summary.get()).run();
940     // Turn off the profile merger by default unless it is explicitly enabled.
941     if (!CSProfMergeColdContext.getNumOccurrences())
942       CSProfMergeColdContext = false;
943   }
944 
945   // Trim and merge cold context profile using cold threshold above.
946   if (TrimColdProfile || CSProfMergeColdContext) {
947     SampleContextTrimmer(ProfileMap)
948         .trimAndMergeColdContextProfiles(
949             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
950             CSProfMaxColdContextDepth, EnableCSPreInliner);
951   }
952 
953   // Merge function samples of CS profile to calculate profile density.
954   sampleprof::SampleProfileMap ContextLessProfiles;
955   for (const auto &I : ProfileMap) {
956     ContextLessProfiles[I.second.getName()].merge(I.second);
957   }
958 
959   calculateAndShowDensity(ContextLessProfiles);
960   if (GenCSNestedProfile) {
961     CSProfileConverter CSConverter(ProfileMap);
962     CSConverter.convertProfiles();
963     FunctionSamples::ProfileIsCS = false;
964   }
965 }
966 
967 void ProfileGeneratorBase::computeSummaryAndThreshold() {
968   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
969   Summary = Builder.computeSummaryForProfiles(ProfileMap);
970   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
971       (Summary->getDetailedSummary()));
972   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
973       (Summary->getDetailedSummary()));
974 }
975 
976 void ProfileGeneratorBase::extractProbesFromRange(
977     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
978     bool FindDisjointRanges) {
979   const RangeSample *PRanges = &RangeCounter;
980   RangeSample Ranges;
981   if (FindDisjointRanges) {
982     findDisjointRanges(Ranges, RangeCounter);
983     PRanges = &Ranges;
984   }
985 
986   for (const auto &Range : *PRanges) {
987     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
988     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
989     uint64_t Count = Range.second;
990 
991     InstructionPointer IP(Binary, RangeBegin, true);
992     // Disjoint ranges may have range in the middle of two instr,
993     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
994     // can be Addr1+1 to Addr2-1. We should ignore such range.
995     if (IP.Address > RangeEnd)
996       continue;
997 
998     do {
999       const AddressProbesMap &Address2ProbesMap =
1000           Binary->getAddress2ProbesMap();
1001       auto It = Address2ProbesMap.find(IP.Address);
1002       if (It != Address2ProbesMap.end()) {
1003         for (const auto &Probe : It->second) {
1004           ProbeCounter[&Probe] += Count;
1005         }
1006       }
1007     } while (IP.advance() && IP.Address <= RangeEnd);
1008   }
1009 }
1010 
1011 static void
1012 extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1013                           const SmallVectorImpl<uint64_t> &Addresses,
1014                           ProfiledBinary *Binary) {
1015   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1016   for (auto Addr : reverse(Addresses)) {
1017     const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
1018     // These could be the cases when a probe is not found at a calliste. Cutting
1019     // off the context from here since the inliner will not know how to consume
1020     // a context with unknown callsites.
1021     // 1. for functions that are not sampled when
1022     // --decode-probe-for-profiled-functions-only is on.
1023     // 2. for a merged callsite. Callsite merging may cause the loss of original
1024     // probe IDs.
1025     // 3. for an external callsite.
1026     if (!CallProbe)
1027       break;
1028     Probes.push_back(CallProbe);
1029   }
1030 
1031   std::reverse(Probes.begin(), Probes.end());
1032 
1033   // Extract context stack for reusing, leaf context stack will be added
1034   // compressed while looking up function profile.
1035   for (const auto *P : Probes) {
1036     Binary->getInlineContextForProbe(P, ContextStack, true);
1037   }
1038 }
1039 
1040 void CSProfileGenerator::generateProbeBasedProfile() {
1041   // Enable pseudo probe functionalities in SampleProf
1042   FunctionSamples::ProfileIsProbeBased = true;
1043   for (const auto &CI : *SampleCounters) {
1044     const AddrBasedCtxKey *CtxKey =
1045         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1046     SampleContextFrameVector ContextStack;
1047     extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
1048     // Fill in function body samples from probes, also infer caller's samples
1049     // from callee's probe
1050     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1051     // Fill in boundary samples for a call probe
1052     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1053   }
1054 }
1055 
1056 void CSProfileGenerator::populateBodySamplesWithProbes(
1057     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1058   ProbeCounterMap ProbeCounter;
1059   // Extract the top frame probes by looking up each address among the range in
1060   // the Address2ProbeMap
1061   extractProbesFromRange(RangeCounter, ProbeCounter);
1062   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1063                      std::unordered_set<FunctionSamples *>>
1064       FrameSamples;
1065   for (const auto &PI : ProbeCounter) {
1066     const MCDecodedPseudoProbe *Probe = PI.first;
1067     uint64_t Count = PI.second;
1068     // Disjoint ranges have introduce zero-filled gap that
1069     // doesn't belong to current context, filter them out.
1070     if (!Probe->isBlock() || Count == 0)
1071       continue;
1072     FunctionSamples &FunctionProfile =
1073         getFunctionProfileForLeafProbe(ContextStack, Probe);
1074     // Record the current frame and FunctionProfile whenever samples are
1075     // collected for non-danglie probes. This is for reporting all of the
1076     // zero count probes of the frame later.
1077     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1078     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1079     FunctionProfile.addTotalSamples(Count);
1080     if (Probe->isEntry()) {
1081       FunctionProfile.addHeadSamples(Count);
1082       // Look up for the caller's function profile
1083       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1084       SampleContextFrames CalleeContextId =
1085           FunctionProfile.getContext().getContextFrames();
1086       if (InlinerDesc != nullptr && CalleeContextId.size() > 1) {
1087         // Since the context id will be compressed, we have to use callee's
1088         // context id to infer caller's context id to ensure they share the
1089         // same context prefix.
1090         SampleContextFrameVector CallerContextId;
1091         SampleContextFrame &&CallerLeafFrameLoc =
1092             getCallerContext(CalleeContextId, CallerContextId);
1093         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
1094         assert(CallerIndex &&
1095                "Inferred caller's location index shouldn't be zero!");
1096         FunctionSamples &CallerProfile =
1097             getFunctionProfileForContext(CallerContextId);
1098         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1099         CallerProfile.addBodySamples(CallerIndex, 0, Count);
1100         CallerProfile.addTotalSamples(Count);
1101         CallerProfile.addCalledTargetSamples(
1102             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
1103       }
1104     }
1105   }
1106 
1107   // Assign zero count for remaining probes without sample hits to
1108   // differentiate from probes optimized away, of which the counts are unknown
1109   // and will be inferred by the compiler.
1110   for (auto &I : FrameSamples) {
1111     for (auto *FunctionProfile : I.second) {
1112       for (auto *Probe : I.first->getProbes()) {
1113         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1114       }
1115     }
1116   }
1117 }
1118 
1119 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1120     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1121   for (const auto &BI : BranchCounter) {
1122     uint64_t SourceOffset = BI.first.first;
1123     uint64_t TargetOffset = BI.first.second;
1124     uint64_t Count = BI.second;
1125     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
1126     const MCDecodedPseudoProbe *CallProbe =
1127         Binary->getCallProbeForAddr(SourceAddress);
1128     if (CallProbe == nullptr)
1129       continue;
1130     FunctionSamples &FunctionProfile =
1131         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1132     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1133     FunctionProfile.addTotalSamples(Count);
1134     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
1135     if (CalleeName.size() == 0)
1136       continue;
1137     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1138                                            Count);
1139   }
1140 }
1141 
1142 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1143     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1144 
1145   // Explicitly copy the context for appending the leaf context
1146   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1147                                            ContextStack.end());
1148   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1149   // For leaf inlined context with the top frame, we should strip off the top
1150   // frame's probe id, like:
1151   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1152   auto LeafFrame = NewContextStack.back();
1153   LeafFrame.Location = LineLocation(0, 0);
1154   NewContextStack.pop_back();
1155   // Compress the context string except for the leaf frame
1156   CSProfileGenerator::compressRecursionContext(NewContextStack);
1157   CSProfileGenerator::trimContext(NewContextStack);
1158   NewContextStack.push_back(LeafFrame);
1159 
1160   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1161   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1162   FunctionSamples &FunctionProile =
1163       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
1164   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
1165   return FunctionProile;
1166 }
1167 
1168 } // end namespace sampleprof
1169 } // end namespace llvm
1170