1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11 
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13                                            cl::Required,
14                                            cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16                          cl::aliasopt(OutputFilename));
17 
18 static cl::opt<SampleProfileFormat> OutputFormat(
19     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
20     cl::values(
21         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24         clEnumValN(SPF_Text, "text", "Text encoding"),
25         clEnumValN(SPF_GCC, "gcc",
26                    "GCC encoding (only meaningful for -sample)")));
27 
28 static cl::opt<int32_t, true> RecursionCompression(
29     "compress-recursion",
30     cl::desc("Compressing recursion by deduplicating adjacent frame "
31              "sequences up to the specified size. -1 means no size limit."),
32     cl::Hidden,
33     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34 
35 static cl::opt<bool> CSProfMergeColdContext(
36     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
37     cl::desc("If the total count of context profile is smaller than "
38              "the threshold, it will be merged into context-less base "
39              "profile."));
40 
41 static cl::opt<bool> CSProfTrimColdContext(
42     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
43     cl::desc("If the total count of the profile after all merge is done "
44              "is still smaller than threshold, it will be trimmed."));
45 
46 extern cl::opt<int> ProfileSummaryCutoffCold;
47 
48 using namespace llvm;
49 using namespace sampleprof;
50 
51 namespace llvm {
52 namespace sampleprof {
53 
54 // Initialize the MaxCompressionSize to -1 which means no size limit
55 int32_t CSProfileGenerator::MaxCompressionSize = -1;
56 
57 static bool
58 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
59   return BinarySampleCounters.size() &&
60          BinarySampleCounters.begin()->first->usePseudoProbes();
61 }
62 
63 std::unique_ptr<ProfileGenerator>
64 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
65                          enum PerfScriptType SampleType) {
66   std::unique_ptr<ProfileGenerator> ProfileGenerator;
67   if (SampleType == PERF_LBR_STACK) {
68     if (usePseudoProbes(BinarySampleCounters)) {
69       ProfileGenerator.reset(
70           new PseudoProbeCSProfileGenerator(BinarySampleCounters));
71     } else {
72       ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
73     }
74   } else {
75     // TODO:
76     llvm_unreachable("Unsupported perfscript!");
77   }
78 
79   return ProfileGenerator;
80 }
81 
82 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
83                              StringMap<FunctionSamples> &ProfileMap) {
84   if (std::error_code EC = Writer->write(ProfileMap))
85     exitWithError(std::move(EC));
86 }
87 
88 void ProfileGenerator::write() {
89   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
90   if (std::error_code EC = WriterOrErr.getError())
91     exitWithError(EC, OutputFilename);
92   write(std::move(WriterOrErr.get()), ProfileMap);
93 }
94 
95 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
96                                           const RangeSample &Ranges) {
97 
98   /*
99   Regions may overlap with each other. Using the boundary info, find all
100   disjoint ranges and their sample count. BoundaryPoint contains the count
101   multiple samples begin/end at this points.
102 
103   |<--100-->|           Sample1
104   |<------200------>|   Sample2
105   A         B       C
106 
107   In the example above,
108   Sample1 begins at A, ends at B, its value is 100.
109   Sample2 beings at A, ends at C, its value is 200.
110   For A, BeginCount is the sum of sample begins at A, which is 300 and no
111   samples ends at A, so EndCount is 0.
112   Then boundary points A, B, and C with begin/end counts are:
113   A: (300, 0)
114   B: (0, 100)
115   C: (0, 200)
116   */
117   struct BoundaryPoint {
118     // Sum of sample counts beginning at this point
119     uint64_t BeginCount;
120     // Sum of sample counts ending at this point
121     uint64_t EndCount;
122 
123     BoundaryPoint() : BeginCount(0), EndCount(0){};
124 
125     void addBeginCount(uint64_t Count) { BeginCount += Count; }
126 
127     void addEndCount(uint64_t Count) { EndCount += Count; }
128   };
129 
130   /*
131   For the above example. With boundary points, follwing logic finds two
132   disjoint region of
133 
134   [A,B]:   300
135   [B+1,C]: 200
136 
137   If there is a boundary point that both begin and end, the point itself
138   becomes a separate disjoint region. For example, if we have original
139   ranges of
140 
141   |<--- 100 --->|
142                 |<--- 200 --->|
143   A             B             C
144 
145   there are three boundary points with their begin/end counts of
146 
147   A: (100, 0)
148   B: (200, 100)
149   C: (0, 200)
150 
151   the disjoint ranges would be
152 
153   [A, B-1]: 100
154   [B, B]:   300
155   [B+1, C]: 200.
156   */
157   std::map<uint64_t, BoundaryPoint> Boundaries;
158 
159   for (auto Item : Ranges) {
160     uint64_t Begin = Item.first.first;
161     uint64_t End = Item.first.second;
162     uint64_t Count = Item.second;
163     if (Boundaries.find(Begin) == Boundaries.end())
164       Boundaries[Begin] = BoundaryPoint();
165     Boundaries[Begin].addBeginCount(Count);
166 
167     if (Boundaries.find(End) == Boundaries.end())
168       Boundaries[End] = BoundaryPoint();
169     Boundaries[End].addEndCount(Count);
170   }
171 
172   uint64_t BeginAddress = 0;
173   int Count = 0;
174   for (auto Item : Boundaries) {
175     uint64_t Address = Item.first;
176     BoundaryPoint &Point = Item.second;
177     if (Point.BeginCount) {
178       if (BeginAddress)
179         DisjointRanges[{BeginAddress, Address - 1}] = Count;
180       Count += Point.BeginCount;
181       BeginAddress = Address;
182     }
183     if (Point.EndCount) {
184       assert(BeginAddress && "First boundary point cannot be 'end' point");
185       DisjointRanges[{BeginAddress, Address}] = Count;
186       Count -= Point.EndCount;
187       BeginAddress = Address + 1;
188     }
189   }
190 }
191 
192 FunctionSamples &
193 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
194                                                  bool WasLeafInlined) {
195   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
196   if (Ret.second) {
197     // Make a copy of the underlying context string in string table
198     // before StringRef wrapper is used for context.
199     auto It = ContextStrings.insert(ContextStr.str());
200     SampleContext FContext(*It.first, RawContext);
201     if (WasLeafInlined)
202       FContext.setAttribute(ContextWasInlined);
203     FunctionSamples &FProfile = Ret.first->second;
204     FProfile.setContext(FContext);
205     FProfile.setName(FContext.getNameWithoutContext());
206   }
207   return Ret.first->second;
208 }
209 
210 void CSProfileGenerator::generateProfile() {
211   FunctionSamples::ProfileIsCS = true;
212   for (const auto &BI : BinarySampleCounters) {
213     ProfiledBinary *Binary = BI.first;
214     for (const auto &CI : BI.second) {
215       const StringBasedCtxKey *CtxKey =
216           dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
217       StringRef ContextId(CtxKey->Context);
218       // Get or create function profile for the range
219       FunctionSamples &FunctionProfile =
220           getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
221 
222       // Fill in function body samples
223       populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
224                                   Binary);
225       // Fill in boundary sample counts as well as call site samples for calls
226       populateFunctionBoundarySamples(ContextId, FunctionProfile,
227                                       CI.second.BranchCounter, Binary);
228     }
229   }
230   // Fill in call site value sample for inlined calls and also use context to
231   // infer missing samples. Since we don't have call count for inlined
232   // functions, we estimate it from inlinee's profile using the entry of the
233   // body sample.
234   populateInferredFunctionSamples();
235 
236   postProcessProfiles();
237 }
238 
239 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
240     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
241     uint64_t Count) {
242   // Filter out invalid negative(int type) lineOffset
243   if (LeafLoc.second.LineOffset & 0x80000000)
244     return;
245   // Use the maximum count of samples with same line location
246   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
247       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
248   uint64_t PreviousCount = R ? R.get() : 0;
249   if (PreviousCount < Count) {
250     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
251                                    LeafLoc.second.Discriminator,
252                                    Count - PreviousCount);
253   }
254 }
255 
256 void CSProfileGenerator::populateFunctionBodySamples(
257     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
258     ProfiledBinary *Binary) {
259   // Compute disjoint ranges first, so we can use MAX
260   // for calculating count for each location.
261   RangeSample Ranges;
262   findDisjointRanges(Ranges, RangeCounter);
263   for (auto Range : Ranges) {
264     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
265     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
266     uint64_t Count = Range.second;
267     // Disjoint ranges have introduce zero-filled gap that
268     // doesn't belong to current context, filter them out.
269     if (Count == 0)
270       continue;
271 
272     InstructionPointer IP(Binary, RangeBegin, true);
273 
274     // Disjoint ranges may have range in the middle of two instr,
275     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
276     // can be Addr1+1 to Addr2-1. We should ignore such range.
277     if (IP.Address > RangeEnd)
278       continue;
279 
280     while (IP.Address <= RangeEnd) {
281       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
282       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
283       if (LeafLoc.hasValue()) {
284         // Recording body sample for this specific context
285         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
286       }
287       // Accumulate total sample count even it's a line with invalid debug info
288       FunctionProfile.addTotalSamples(Count);
289       // Move to next IP within the range
290       IP.advance();
291     }
292   }
293 }
294 
295 void CSProfileGenerator::populateFunctionBoundarySamples(
296     StringRef ContextId, FunctionSamples &FunctionProfile,
297     const BranchSample &BranchCounters, ProfiledBinary *Binary) {
298 
299   for (auto Entry : BranchCounters) {
300     uint64_t SourceOffset = Entry.first.first;
301     uint64_t TargetOffset = Entry.first.second;
302     uint64_t Count = Entry.second;
303     // Get the callee name by branch target if it's a call branch
304     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
305         Binary->getFuncFromStartOffset(TargetOffset));
306     if (CalleeName.size() == 0)
307       continue;
308 
309     // Record called target sample and its count
310     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
311     if (!LeafLoc.hasValue())
312       continue;
313     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
314                                            LeafLoc->second.Discriminator,
315                                            CalleeName, Count);
316 
317     // Record head sample for called target(callee)
318     std::ostringstream OCalleeCtxStr;
319     if (ContextId.find(" @ ") != StringRef::npos) {
320       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
321       OCalleeCtxStr << " @ ";
322     }
323     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
324 
325     FunctionSamples &CalleeProfile =
326         getFunctionProfileForContext(OCalleeCtxStr.str());
327     assert(Count != 0 && "Unexpected zero weight branch");
328     CalleeProfile.addHeadSamples(Count);
329   }
330 }
331 
332 static FrameLocation getCallerContext(StringRef CalleeContext,
333                                       StringRef &CallerNameWithContext) {
334   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
335   CallerNameWithContext = CallerContext.rsplit(':').first;
336   auto ContextSplit = CallerContext.rsplit(" @ ");
337   StringRef CallerFrameStr = ContextSplit.second.size() == 0
338                                  ? ContextSplit.first
339                                  : ContextSplit.second;
340   FrameLocation LeafFrameLoc = {"", {0, 0}};
341   StringRef Funcname;
342   SampleContext::decodeContextString(CallerFrameStr, Funcname,
343                                      LeafFrameLoc.second);
344   LeafFrameLoc.first = Funcname.str();
345   return LeafFrameLoc;
346 }
347 
348 void CSProfileGenerator::populateInferredFunctionSamples() {
349   for (const auto &Item : ProfileMap) {
350     const StringRef CalleeContext = Item.first();
351     const FunctionSamples &CalleeProfile = Item.second;
352 
353     // If we already have head sample counts, we must have value profile
354     // for call sites added already. Skip to avoid double counting.
355     if (CalleeProfile.getHeadSamples())
356       continue;
357     // If we don't have context, nothing to do for caller's call site.
358     // This could happen for entry point function.
359     if (CalleeContext.find(" @ ") == StringRef::npos)
360       continue;
361 
362     // Infer Caller's frame loc and context ID through string splitting
363     StringRef CallerContextId;
364     FrameLocation &&CallerLeafFrameLoc =
365         getCallerContext(CalleeContext, CallerContextId);
366 
367     // It's possible that we haven't seen any sample directly in the caller,
368     // in which case CallerProfile will not exist. But we can't modify
369     // ProfileMap while iterating it.
370     // TODO: created function profile for those callers too
371     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
372       continue;
373     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
374 
375     // Since we don't have call count for inlined functions, we
376     // estimate it from inlinee's profile using entry body sample.
377     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
378     // If we don't have samples with location, use 1 to indicate live.
379     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
380       EstimatedCallCount = 1;
381     CallerProfile.addCalledTargetSamples(
382         CallerLeafFrameLoc.second.LineOffset,
383         CallerLeafFrameLoc.second.Discriminator,
384         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
385     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
386                                  CallerLeafFrameLoc.second.Discriminator,
387                                  EstimatedCallCount);
388     CallerProfile.addTotalSamples(EstimatedCallCount);
389   }
390 }
391 
392 void CSProfileGenerator::postProcessProfiles() {
393   // Compute hot/cold threshold based on profile. This will be used for cold
394   // context profile merging/trimming.
395   computeSummaryAndThreshold();
396 
397   // Run global pre-inliner to adjust/merge context profile based on estimated
398   // inline decisions.
399   CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
400 
401   // Trim and merge cold context profile using cold threshold above;
402   SampleContextTrimmer(ProfileMap)
403       .trimAndMergeColdContextProfiles(
404           ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
405 }
406 
407 void CSProfileGenerator::computeSummaryAndThreshold() {
408   // Update the default value of cold cutoff for llvm-profgen.
409   // Do it here because we don't want to change the global default,
410   // which would lead CS profile size too large.
411   if (!ProfileSummaryCutoffCold.getNumOccurrences())
412     ProfileSummaryCutoffCold = 999000;
413 
414   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
415   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
416   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
417       (Summary->getDetailedSummary()));
418   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
419       (Summary->getDetailedSummary()));
420 }
421 
422 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
423                                StringMap<FunctionSamples> &ProfileMap) {
424   if (std::error_code EC = Writer->write(ProfileMap))
425     exitWithError(std::move(EC));
426 }
427 
428 // Helper function to extract context prefix string stack
429 // Extract context stack for reusing, leaf context stack will
430 // be added compressed while looking up function profile
431 static void
432 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
433                           const SmallVectorImpl<const PseudoProbe *> &Probes,
434                           ProfiledBinary *Binary) {
435   for (const auto *P : Probes) {
436     Binary->getInlineContextForProbe(P, ContextStrStack, true);
437   }
438 }
439 
440 void PseudoProbeCSProfileGenerator::generateProfile() {
441   // Enable pseudo probe functionalities in SampleProf
442   FunctionSamples::ProfileIsProbeBased = true;
443   FunctionSamples::ProfileIsCS = true;
444   for (const auto &BI : BinarySampleCounters) {
445     ProfiledBinary *Binary = BI.first;
446     for (const auto &CI : BI.second) {
447       const ProbeBasedCtxKey *CtxKey =
448           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
449       SmallVector<std::string, 16> ContextStrStack;
450       extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
451       // Fill in function body samples from probes, also infer caller's samples
452       // from callee's probe
453       populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
454                                     Binary);
455       // Fill in boundary samples for a call probe
456       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
457                                         ContextStrStack, Binary);
458     }
459   }
460 
461   postProcessProfiles();
462 }
463 
464 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
465     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
466     ProfiledBinary *Binary) {
467   RangeSample Ranges;
468   findDisjointRanges(Ranges, RangeCounter);
469   for (const auto &Range : Ranges) {
470     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
471     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
472     uint64_t Count = Range.second;
473     // Disjoint ranges have introduce zero-filled gap that
474     // doesn't belong to current context, filter them out.
475     if (Count == 0)
476       continue;
477 
478     InstructionPointer IP(Binary, RangeBegin, true);
479 
480     // Disjoint ranges may have range in the middle of two instr,
481     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
482     // can be Addr1+1 to Addr2-1. We should ignore such range.
483     if (IP.Address > RangeEnd)
484       continue;
485 
486     while (IP.Address <= RangeEnd) {
487       const AddressProbesMap &Address2ProbesMap =
488           Binary->getAddress2ProbesMap();
489       auto It = Address2ProbesMap.find(IP.Address);
490       if (It != Address2ProbesMap.end()) {
491         for (const auto &Probe : It->second) {
492           if (!Probe.isBlock())
493             continue;
494           ProbeCounter[&Probe] += Count;
495         }
496       }
497 
498       IP.advance();
499     }
500   }
501 }
502 
503 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
504     const RangeSample &RangeCounter,
505     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
506   ProbeCounterMap ProbeCounter;
507   // Extract the top frame probes by looking up each address among the range in
508   // the Address2ProbeMap
509   extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
510   std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
511   for (auto PI : ProbeCounter) {
512     const PseudoProbe *Probe = PI.first;
513     uint64_t Count = PI.second;
514     // Ignore dangling probes since they will be reported later if needed.
515     if (Probe->isDangling())
516       continue;
517     FunctionSamples &FunctionProfile =
518         getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
519     // Record the current frame and FunctionProfile whenever samples are
520     // collected for non-danglie probes. This is for reporting all of the
521     // dangling probes of the frame later.
522     FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
523     FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
524     FunctionProfile.addTotalSamples(Count);
525     if (Probe->isEntry()) {
526       FunctionProfile.addHeadSamples(Count);
527       // Look up for the caller's function profile
528       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
529       if (InlinerDesc != nullptr) {
530         // Since the context id will be compressed, we have to use callee's
531         // context id to infer caller's context id to ensure they share the
532         // same context prefix.
533         StringRef CalleeContextId =
534             FunctionProfile.getContext().getNameWithContext();
535         StringRef CallerContextId;
536         FrameLocation &&CallerLeafFrameLoc =
537             getCallerContext(CalleeContextId, CallerContextId);
538         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
539         assert(CallerIndex &&
540                "Inferred caller's location index shouldn't be zero!");
541         FunctionSamples &CallerProfile =
542             getFunctionProfileForContext(CallerContextId);
543         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
544         CallerProfile.addBodySamples(CallerIndex, 0, Count);
545         CallerProfile.addTotalSamples(Count);
546         CallerProfile.addCalledTargetSamples(
547             CallerIndex, 0,
548             FunctionProfile.getContext().getNameWithoutContext(), Count);
549       }
550     }
551 
552     // Report dangling probes for frames that have real samples collected.
553     // Dangling probes are the probes associated to an empty block. With this
554     // place holder, sample count on a dangling probe will not be trusted by the
555     // compiler and we will rely on the counts inference algorithm to get the
556     // probe a reasonable count. Use InvalidProbeCount to mark sample count for
557     // a dangling probe.
558     for (auto &I : FrameSamples) {
559       auto *FunctionProfile = I.second;
560       for (auto *Probe : I.first->getProbes()) {
561         if (Probe->isDangling()) {
562           FunctionProfile->addBodySamplesForProbe(
563               Probe->Index, FunctionSamples::InvalidProbeCount);
564         }
565       }
566     }
567   }
568 }
569 
570 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
571     const BranchSample &BranchCounter,
572     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
573   for (auto BI : BranchCounter) {
574     uint64_t SourceOffset = BI.first.first;
575     uint64_t TargetOffset = BI.first.second;
576     uint64_t Count = BI.second;
577     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
578     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
579     if (CallProbe == nullptr)
580       continue;
581     FunctionSamples &FunctionProfile =
582         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
583     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
584     FunctionProfile.addTotalSamples(Count);
585     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
586         Binary->getFuncFromStartOffset(TargetOffset));
587     if (CalleeName.size() == 0)
588       continue;
589     FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
590                                            Count);
591   }
592 }
593 
594 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
595     SmallVectorImpl<std::string> &ContextStrStack,
596     const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
597   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
598   // Compress the context string except for the leaf frame
599   std::string LeafFrame = ContextStrStack.back();
600   ContextStrStack.pop_back();
601   CSProfileGenerator::compressRecursionContext(ContextStrStack);
602 
603   std::ostringstream OContextStr;
604   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
605     if (OContextStr.str().size())
606       OContextStr << " @ ";
607     OContextStr << ContextStrStack[I];
608   }
609   // For leaf inlined context with the top frame, we should strip off the top
610   // frame's probe id, like:
611   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
612   if (OContextStr.str().size())
613     OContextStr << " @ ";
614   OContextStr << StringRef(LeafFrame).split(":").first.str();
615 
616   FunctionSamples &FunctionProile =
617       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
618   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
619   return FunctionProile;
620 }
621 
622 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
623     SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
624     ProfiledBinary *Binary) {
625   // Explicitly copy the context for appending the leaf context
626   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
627                                                    ContextStrStack.end());
628   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
629   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
630   bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
631   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
632                                         WasLeafInlined);
633 }
634 
635 } // end namespace sampleprof
636 } // end namespace llvm
637