1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ProfileData/ProfileCommon.h"
12 #include <unordered_set>
13 
14 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
15                                            cl::Required,
16                                            cl::desc("Output profile file"));
17 static cl::alias OutputA("o", cl::desc("Alias for --output"),
18                          cl::aliasopt(OutputFilename));
19 
20 static cl::opt<SampleProfileFormat> OutputFormat(
21     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
22     cl::values(
23         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
24         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
25         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
26         clEnumValN(SPF_Text, "text", "Text encoding"),
27         clEnumValN(SPF_GCC, "gcc",
28                    "GCC encoding (only meaningful for -sample)")));
29 
30 static cl::opt<int32_t, true> RecursionCompression(
31     "compress-recursion",
32     cl::desc("Compressing recursion by deduplicating adjacent frame "
33              "sequences up to the specified size. -1 means no size limit."),
34     cl::Hidden,
35     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
36 
37 static cl::opt<bool> CSProfMergeColdContext(
38     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
39     cl::desc("If the total count of context profile is smaller than "
40              "the threshold, it will be merged into context-less base "
41              "profile."));
42 
43 static cl::opt<bool> CSProfTrimColdContext(
44     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
45     cl::desc("If the total count of the profile after all merge is done "
46              "is still smaller than threshold, it will be trimmed."));
47 
48 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
49     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
50     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
51              "context-less base profile"));
52 
53 static cl::opt<int, true> CSProfMaxContextDepth(
54     "csprof-max-context-depth", cl::ZeroOrMore,
55     cl::desc("Keep the last K contexts while merging profile. -1 means no "
56              "depth limit."),
57     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
58 
59 extern cl::opt<int> ProfileSummaryCutoffCold;
60 
61 using namespace llvm;
62 using namespace sampleprof;
63 
64 namespace llvm {
65 namespace sampleprof {
66 
67 // Initialize the MaxCompressionSize to -1 which means no size limit
68 int32_t CSProfileGenerator::MaxCompressionSize = -1;
69 
70 int CSProfileGenerator::MaxContextDepth = -1;
71 
72 std::unique_ptr<ProfileGenerator>
73 ProfileGenerator::create(ProfiledBinary *Binary,
74                          const ContextSampleCounterMap &SampleCounters,
75                          enum PerfScriptType SampleType) {
76   std::unique_ptr<ProfileGenerator> ProfileGenerator;
77   if (SampleType == PERF_LBR_STACK) {
78     if (Binary->usePseudoProbes()) {
79       ProfileGenerator.reset(
80           new PseudoProbeCSProfileGenerator(Binary, SampleCounters));
81     } else {
82       ProfileGenerator.reset(new CSProfileGenerator(Binary, SampleCounters));
83     }
84   } else {
85     // TODO:
86     llvm_unreachable("Unsupported perfscript!");
87   }
88 
89   return ProfileGenerator;
90 }
91 
92 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
93                              StringMap<FunctionSamples> &ProfileMap) {
94   if (std::error_code EC = Writer->write(ProfileMap))
95     exitWithError(std::move(EC));
96 }
97 
98 void ProfileGenerator::write() {
99   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
100   if (std::error_code EC = WriterOrErr.getError())
101     exitWithError(EC, OutputFilename);
102   write(std::move(WriterOrErr.get()), ProfileMap);
103 }
104 
105 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
106                                           const RangeSample &Ranges) {
107 
108   /*
109   Regions may overlap with each other. Using the boundary info, find all
110   disjoint ranges and their sample count. BoundaryPoint contains the count
111   multiple samples begin/end at this points.
112 
113   |<--100-->|           Sample1
114   |<------200------>|   Sample2
115   A         B       C
116 
117   In the example above,
118   Sample1 begins at A, ends at B, its value is 100.
119   Sample2 beings at A, ends at C, its value is 200.
120   For A, BeginCount is the sum of sample begins at A, which is 300 and no
121   samples ends at A, so EndCount is 0.
122   Then boundary points A, B, and C with begin/end counts are:
123   A: (300, 0)
124   B: (0, 100)
125   C: (0, 200)
126   */
127   struct BoundaryPoint {
128     // Sum of sample counts beginning at this point
129     uint64_t BeginCount;
130     // Sum of sample counts ending at this point
131     uint64_t EndCount;
132 
133     BoundaryPoint() : BeginCount(0), EndCount(0){};
134 
135     void addBeginCount(uint64_t Count) { BeginCount += Count; }
136 
137     void addEndCount(uint64_t Count) { EndCount += Count; }
138   };
139 
140   /*
141   For the above example. With boundary points, follwing logic finds two
142   disjoint region of
143 
144   [A,B]:   300
145   [B+1,C]: 200
146 
147   If there is a boundary point that both begin and end, the point itself
148   becomes a separate disjoint region. For example, if we have original
149   ranges of
150 
151   |<--- 100 --->|
152                 |<--- 200 --->|
153   A             B             C
154 
155   there are three boundary points with their begin/end counts of
156 
157   A: (100, 0)
158   B: (200, 100)
159   C: (0, 200)
160 
161   the disjoint ranges would be
162 
163   [A, B-1]: 100
164   [B, B]:   300
165   [B+1, C]: 200.
166   */
167   std::map<uint64_t, BoundaryPoint> Boundaries;
168 
169   for (auto Item : Ranges) {
170     uint64_t Begin = Item.first.first;
171     uint64_t End = Item.first.second;
172     uint64_t Count = Item.second;
173     if (Boundaries.find(Begin) == Boundaries.end())
174       Boundaries[Begin] = BoundaryPoint();
175     Boundaries[Begin].addBeginCount(Count);
176 
177     if (Boundaries.find(End) == Boundaries.end())
178       Boundaries[End] = BoundaryPoint();
179     Boundaries[End].addEndCount(Count);
180   }
181 
182   uint64_t BeginAddress = UINT64_MAX;
183   int Count = 0;
184   for (auto Item : Boundaries) {
185     uint64_t Address = Item.first;
186     BoundaryPoint &Point = Item.second;
187     if (Point.BeginCount) {
188       if (BeginAddress != UINT64_MAX)
189         DisjointRanges[{BeginAddress, Address - 1}] = Count;
190       Count += Point.BeginCount;
191       BeginAddress = Address;
192     }
193     if (Point.EndCount) {
194       assert((BeginAddress != UINT64_MAX) &&
195              "First boundary point cannot be 'end' point");
196       DisjointRanges[{BeginAddress, Address}] = Count;
197       Count -= Point.EndCount;
198       BeginAddress = Address + 1;
199     }
200   }
201 }
202 
203 FunctionSamples &
204 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
205                                                  bool WasLeafInlined) {
206   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
207   if (Ret.second) {
208     // Make a copy of the underlying context string in string table
209     // before StringRef wrapper is used for context.
210     auto It = ContextStrings.insert(ContextStr.str());
211     SampleContext FContext(*It.first, RawContext);
212     if (WasLeafInlined)
213       FContext.setAttribute(ContextWasInlined);
214     FunctionSamples &FProfile = Ret.first->second;
215     FProfile.setContext(FContext);
216     FProfile.setName(FContext.getNameWithoutContext());
217   }
218   return Ret.first->second;
219 }
220 
221 void CSProfileGenerator::generateProfile() {
222   FunctionSamples::ProfileIsCS = true;
223   for (const auto &CI : SampleCounters) {
224     const StringBasedCtxKey *CtxKey =
225         dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
226     StringRef ContextId(CtxKey->Context);
227     // Get or create function profile for the range
228     FunctionSamples &FunctionProfile =
229         getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
230 
231     // Fill in function body samples
232     populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter);
233     // Fill in boundary sample counts as well as call site samples for calls
234     populateFunctionBoundarySamples(ContextId, FunctionProfile,
235                                     CI.second.BranchCounter);
236   }
237   // Fill in call site value sample for inlined calls and also use context to
238   // infer missing samples. Since we don't have call count for inlined
239   // functions, we estimate it from inlinee's profile using the entry of the
240   // body sample.
241   populateInferredFunctionSamples();
242 
243   postProcessProfiles();
244 }
245 
246 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
247     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
248     uint64_t Count) {
249   // Filter out invalid negative(int type) lineOffset
250   if (LeafLoc.second.LineOffset & 0x80000000)
251     return;
252   // Use the maximum count of samples with same line location
253   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
254       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
255   uint64_t PreviousCount = R ? R.get() : 0;
256   if (PreviousCount < Count) {
257     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
258                                    LeafLoc.second.Discriminator,
259                                    Count - PreviousCount);
260   }
261 }
262 
263 void CSProfileGenerator::populateFunctionBodySamples(
264     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
265   // Compute disjoint ranges first, so we can use MAX
266   // for calculating count for each location.
267   RangeSample Ranges;
268   findDisjointRanges(Ranges, RangeCounter);
269   for (auto Range : Ranges) {
270     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
271     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
272     uint64_t Count = Range.second;
273     // Disjoint ranges have introduce zero-filled gap that
274     // doesn't belong to current context, filter them out.
275     if (Count == 0)
276       continue;
277 
278     InstructionPointer IP(Binary, RangeBegin, true);
279 
280     // Disjoint ranges may have range in the middle of two instr,
281     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
282     // can be Addr1+1 to Addr2-1. We should ignore such range.
283     if (IP.Address > RangeEnd)
284       continue;
285 
286     while (IP.Address <= RangeEnd) {
287       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
288       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
289       if (LeafLoc.hasValue()) {
290         // Recording body sample for this specific context
291         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
292       }
293       // Accumulate total sample count even it's a line with invalid debug info
294       FunctionProfile.addTotalSamples(Count);
295       // Move to next IP within the range
296       IP.advance();
297     }
298   }
299 }
300 
301 void CSProfileGenerator::populateFunctionBoundarySamples(
302     StringRef ContextId, FunctionSamples &FunctionProfile,
303     const BranchSample &BranchCounters) {
304 
305   for (auto Entry : BranchCounters) {
306     uint64_t SourceOffset = Entry.first.first;
307     uint64_t TargetOffset = Entry.first.second;
308     uint64_t Count = Entry.second;
309     // Get the callee name by branch target if it's a call branch
310     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
311         Binary->getFuncFromStartOffset(TargetOffset));
312     if (CalleeName.size() == 0)
313       continue;
314 
315     // Record called target sample and its count
316     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
317     if (!LeafLoc.hasValue())
318       continue;
319     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
320                                            LeafLoc->second.Discriminator,
321                                            CalleeName, Count);
322 
323     // Record head sample for called target(callee)
324     std::ostringstream OCalleeCtxStr;
325     if (ContextId.find(" @ ") != StringRef::npos) {
326       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
327       OCalleeCtxStr << " @ ";
328     }
329     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
330 
331     FunctionSamples &CalleeProfile =
332         getFunctionProfileForContext(OCalleeCtxStr.str());
333     assert(Count != 0 && "Unexpected zero weight branch");
334     CalleeProfile.addHeadSamples(Count);
335   }
336 }
337 
338 static FrameLocation getCallerContext(StringRef CalleeContext,
339                                       StringRef &CallerNameWithContext) {
340   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
341   CallerNameWithContext = CallerContext.rsplit(':').first;
342   auto ContextSplit = CallerContext.rsplit(" @ ");
343   StringRef CallerFrameStr = ContextSplit.second.size() == 0
344                                  ? ContextSplit.first
345                                  : ContextSplit.second;
346   FrameLocation LeafFrameLoc = {"", {0, 0}};
347   StringRef Funcname;
348   SampleContext::decodeContextString(CallerFrameStr, Funcname,
349                                      LeafFrameLoc.second);
350   LeafFrameLoc.first = Funcname.str();
351   return LeafFrameLoc;
352 }
353 
354 void CSProfileGenerator::populateInferredFunctionSamples() {
355   for (const auto &Item : ProfileMap) {
356     const StringRef CalleeContext = Item.first();
357     const FunctionSamples &CalleeProfile = Item.second;
358 
359     // If we already have head sample counts, we must have value profile
360     // for call sites added already. Skip to avoid double counting.
361     if (CalleeProfile.getHeadSamples())
362       continue;
363     // If we don't have context, nothing to do for caller's call site.
364     // This could happen for entry point function.
365     if (CalleeContext.find(" @ ") == StringRef::npos)
366       continue;
367 
368     // Infer Caller's frame loc and context ID through string splitting
369     StringRef CallerContextId;
370     FrameLocation &&CallerLeafFrameLoc =
371         getCallerContext(CalleeContext, CallerContextId);
372 
373     // It's possible that we haven't seen any sample directly in the caller,
374     // in which case CallerProfile will not exist. But we can't modify
375     // ProfileMap while iterating it.
376     // TODO: created function profile for those callers too
377     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
378       continue;
379     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
380 
381     // Since we don't have call count for inlined functions, we
382     // estimate it from inlinee's profile using entry body sample.
383     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
384     // If we don't have samples with location, use 1 to indicate live.
385     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
386       EstimatedCallCount = 1;
387     CallerProfile.addCalledTargetSamples(
388         CallerLeafFrameLoc.second.LineOffset,
389         CallerLeafFrameLoc.second.Discriminator,
390         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
391     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
392                                  CallerLeafFrameLoc.second.Discriminator,
393                                  EstimatedCallCount);
394     CallerProfile.addTotalSamples(EstimatedCallCount);
395   }
396 }
397 
398 void CSProfileGenerator::postProcessProfiles() {
399   // Compute hot/cold threshold based on profile. This will be used for cold
400   // context profile merging/trimming.
401   computeSummaryAndThreshold();
402 
403   // Run global pre-inliner to adjust/merge context profile based on estimated
404   // inline decisions.
405   if (EnableCSPreInliner) {
406     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
407         .run();
408   }
409 
410   // Trim and merge cold context profile using cold threshold above;
411   SampleContextTrimmer(ProfileMap)
412       .trimAndMergeColdContextProfiles(
413           ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
414           CSProfMaxColdContextDepth);
415 }
416 
417 void CSProfileGenerator::computeSummaryAndThreshold() {
418   // Update the default value of cold cutoff for llvm-profgen.
419   // Do it here because we don't want to change the global default,
420   // which would lead CS profile size too large.
421   if (!ProfileSummaryCutoffCold.getNumOccurrences())
422     ProfileSummaryCutoffCold = 999000;
423 
424   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
425   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
426   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
427       (Summary->getDetailedSummary()));
428   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
429       (Summary->getDetailedSummary()));
430 }
431 
432 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
433                                StringMap<FunctionSamples> &ProfileMap) {
434   if (std::error_code EC = Writer->write(ProfileMap))
435     exitWithError(std::move(EC));
436 }
437 
438 // Helper function to extract context prefix string stack
439 // Extract context stack for reusing, leaf context stack will
440 // be added compressed while looking up function profile
441 static void extractPrefixContextStack(
442     SmallVectorImpl<std::string> &ContextStrStack,
443     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
444     ProfiledBinary *Binary) {
445   for (const auto *P : Probes) {
446     Binary->getInlineContextForProbe(P, ContextStrStack, true);
447   }
448 }
449 
450 void PseudoProbeCSProfileGenerator::generateProfile() {
451   // Enable pseudo probe functionalities in SampleProf
452   FunctionSamples::ProfileIsProbeBased = true;
453   FunctionSamples::ProfileIsCS = true;
454   for (const auto &CI : SampleCounters) {
455     const ProbeBasedCtxKey *CtxKey =
456         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
457     SmallVector<std::string, 16> ContextStrStack;
458     extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
459     // Fill in function body samples from probes, also infer caller's samples
460     // from callee's probe
461     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack);
462     // Fill in boundary samples for a call probe
463     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStrStack);
464   }
465 
466   postProcessProfiles();
467 }
468 
469 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
470     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter) {
471   RangeSample Ranges;
472   findDisjointRanges(Ranges, RangeCounter);
473   for (const auto &Range : Ranges) {
474     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
475     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
476     uint64_t Count = Range.second;
477     // Disjoint ranges have introduce zero-filled gap that
478     // doesn't belong to current context, filter them out.
479     if (Count == 0)
480       continue;
481 
482     InstructionPointer IP(Binary, RangeBegin, true);
483 
484     // Disjoint ranges may have range in the middle of two instr,
485     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
486     // can be Addr1+1 to Addr2-1. We should ignore such range.
487     if (IP.Address > RangeEnd)
488       continue;
489 
490     while (IP.Address <= RangeEnd) {
491       const AddressProbesMap &Address2ProbesMap =
492           Binary->getAddress2ProbesMap();
493       auto It = Address2ProbesMap.find(IP.Address);
494       if (It != Address2ProbesMap.end()) {
495         for (const auto &Probe : It->second) {
496           if (!Probe.isBlock())
497             continue;
498           ProbeCounter[&Probe] += Count;
499         }
500       }
501 
502       IP.advance();
503     }
504   }
505 }
506 
507 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
508     const RangeSample &RangeCounter,
509     SmallVectorImpl<std::string> &ContextStrStack) {
510   ProbeCounterMap ProbeCounter;
511   // Extract the top frame probes by looking up each address among the range in
512   // the Address2ProbeMap
513   extractProbesFromRange(RangeCounter, ProbeCounter);
514   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
515                      std::unordered_set<FunctionSamples *>>
516       FrameSamples;
517   for (auto PI : ProbeCounter) {
518     const MCDecodedPseudoProbe *Probe = PI.first;
519     uint64_t Count = PI.second;
520     FunctionSamples &FunctionProfile =
521         getFunctionProfileForLeafProbe(ContextStrStack, Probe);
522     // Record the current frame and FunctionProfile whenever samples are
523     // collected for non-danglie probes. This is for reporting all of the
524     // zero count probes of the frame later.
525     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
526     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
527     FunctionProfile.addTotalSamples(Count);
528     if (Probe->isEntry()) {
529       FunctionProfile.addHeadSamples(Count);
530       // Look up for the caller's function profile
531       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
532       if (InlinerDesc != nullptr) {
533         // Since the context id will be compressed, we have to use callee's
534         // context id to infer caller's context id to ensure they share the
535         // same context prefix.
536         StringRef CalleeContextId =
537             FunctionProfile.getContext().getNameWithContext();
538         StringRef CallerContextId;
539         FrameLocation &&CallerLeafFrameLoc =
540             getCallerContext(CalleeContextId, CallerContextId);
541         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
542         assert(CallerIndex &&
543                "Inferred caller's location index shouldn't be zero!");
544         FunctionSamples &CallerProfile =
545             getFunctionProfileForContext(CallerContextId);
546         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
547         CallerProfile.addBodySamples(CallerIndex, 0, Count);
548         CallerProfile.addTotalSamples(Count);
549         CallerProfile.addCalledTargetSamples(
550             CallerIndex, 0,
551             FunctionProfile.getContext().getNameWithoutContext(), Count);
552       }
553     }
554   }
555 
556   // Assign zero count for remaining probes without sample hits to
557   // differentiate from probes optimized away, of which the counts are unknown
558   // and will be inferred by the compiler.
559   for (auto &I : FrameSamples) {
560     for (auto *FunctionProfile : I.second) {
561       for (auto *Probe : I.first->getProbes()) {
562         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
563       }
564     }
565   }
566 }
567 
568 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
569     const BranchSample &BranchCounter,
570     SmallVectorImpl<std::string> &ContextStrStack) {
571   for (auto BI : BranchCounter) {
572     uint64_t SourceOffset = BI.first.first;
573     uint64_t TargetOffset = BI.first.second;
574     uint64_t Count = BI.second;
575     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
576     const MCDecodedPseudoProbe *CallProbe =
577         Binary->getCallProbeForAddr(SourceAddress);
578     if (CallProbe == nullptr)
579       continue;
580     FunctionSamples &FunctionProfile =
581         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe);
582     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
583     FunctionProfile.addTotalSamples(Count);
584     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
585         Binary->getFuncFromStartOffset(TargetOffset));
586     if (CalleeName.size() == 0)
587       continue;
588     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
589                                            Count);
590   }
591 }
592 
593 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
594     SmallVectorImpl<std::string> &ContextStrStack,
595     const MCPseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
596   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
597   // Compress the context string except for the leaf frame
598   std::string LeafFrame = ContextStrStack.back();
599   ContextStrStack.pop_back();
600   CSProfileGenerator::compressRecursionContext(ContextStrStack);
601   CSProfileGenerator::trimContext(ContextStrStack);
602 
603   std::ostringstream OContextStr;
604   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
605     if (OContextStr.str().size())
606       OContextStr << " @ ";
607     OContextStr << ContextStrStack[I];
608   }
609   // For leaf inlined context with the top frame, we should strip off the top
610   // frame's probe id, like:
611   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
612   if (OContextStr.str().size())
613     OContextStr << " @ ";
614   OContextStr << StringRef(LeafFrame).split(":").first.str();
615 
616   FunctionSamples &FunctionProile =
617       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
618   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
619   return FunctionProile;
620 }
621 
622 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
623     SmallVectorImpl<std::string> &ContextStrStack,
624     const MCDecodedPseudoProbe *LeafProbe) {
625 
626   // Explicitly copy the context for appending the leaf context
627   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
628                                                    ContextStrStack.end());
629   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
630   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
631   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
632   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
633                                         WasLeafInlined);
634 }
635 
636 } // end namespace sampleprof
637 } // end namespace llvm
638