1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11 
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13                                            cl::Required,
14                                            cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16                          cl::aliasopt(OutputFilename));
17 
18 static cl::opt<SampleProfileFormat> OutputFormat(
19     "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
20     cl::values(
21         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24         clEnumValN(SPF_Text, "text", "Text encoding"),
25         clEnumValN(SPF_GCC, "gcc",
26                    "GCC encoding (only meaningful for -sample)")));
27 
28 static cl::opt<int32_t, true> RecursionCompression(
29     "compress-recursion",
30     cl::desc("Compressing recursion by deduplicating adjacent frame "
31              "sequences up to the specified size. -1 means no size limit."),
32     cl::Hidden,
33     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34 
35 static cl::opt<uint64_t> CSProfColdThreshold(
36     "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
37     cl::desc("Specify the total samples threshold for a context profile to "
38              "be considered cold, any cold profiles will be merged into "
39              "context-less base profiles"));
40 
41 static cl::opt<bool> CSProfMergeColdContext(
42     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
43     cl::desc("This works together with --csprof-cold-thres. If the total count "
44              "of context profile is smaller than the threshold, it will be "
45              "merged into context-less base profile."));
46 
47 static cl::opt<bool> CSProfTrimColdContext(
48     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
49     cl::desc("This works together with --csprof-cold-thres. If the total count "
50              "of the profile after all merge is done is still smaller than "
51              "threshold, it will be trimmed."));
52 
53 using namespace llvm;
54 using namespace sampleprof;
55 
56 namespace llvm {
57 namespace sampleprof {
58 
59 // Initialize the MaxCompressionSize to -1 which means no size limit
60 int32_t CSProfileGenerator::MaxCompressionSize = -1;
61 
62 static bool
63 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
64   return BinarySampleCounters.size() &&
65          BinarySampleCounters.begin()->first->usePseudoProbes();
66 }
67 
68 std::unique_ptr<ProfileGenerator>
69 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
70                          enum PerfScriptType SampleType) {
71   std::unique_ptr<ProfileGenerator> ProfileGenerator;
72   if (SampleType == PERF_LBR_STACK) {
73     if (usePseudoProbes(BinarySampleCounters)) {
74       ProfileGenerator.reset(
75           new PseudoProbeCSProfileGenerator(BinarySampleCounters));
76     } else {
77       ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
78     }
79   } else {
80     // TODO:
81     llvm_unreachable("Unsupported perfscript!");
82   }
83 
84   return ProfileGenerator;
85 }
86 
87 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
88                              StringMap<FunctionSamples> &ProfileMap) {
89   Writer->write(ProfileMap);
90 }
91 
92 void ProfileGenerator::write() {
93   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
94   if (std::error_code EC = WriterOrErr.getError())
95     exitWithError(EC, OutputFilename);
96   write(std::move(WriterOrErr.get()), ProfileMap);
97 }
98 
99 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
100                                           const RangeSample &Ranges) {
101 
102   /*
103   Regions may overlap with each other. Using the boundary info, find all
104   disjoint ranges and their sample count. BoundaryPoint contains the count
105   multiple samples begin/end at this points.
106 
107   |<--100-->|           Sample1
108   |<------200------>|   Sample2
109   A         B       C
110 
111   In the example above,
112   Sample1 begins at A, ends at B, its value is 100.
113   Sample2 beings at A, ends at C, its value is 200.
114   For A, BeginCount is the sum of sample begins at A, which is 300 and no
115   samples ends at A, so EndCount is 0.
116   Then boundary points A, B, and C with begin/end counts are:
117   A: (300, 0)
118   B: (0, 100)
119   C: (0, 200)
120   */
121   struct BoundaryPoint {
122     // Sum of sample counts beginning at this point
123     uint64_t BeginCount;
124     // Sum of sample counts ending at this point
125     uint64_t EndCount;
126 
127     BoundaryPoint() : BeginCount(0), EndCount(0){};
128 
129     void addBeginCount(uint64_t Count) { BeginCount += Count; }
130 
131     void addEndCount(uint64_t Count) { EndCount += Count; }
132   };
133 
134   /*
135   For the above example. With boundary points, follwing logic finds two
136   disjoint region of
137 
138   [A,B]:   300
139   [B+1,C]: 200
140 
141   If there is a boundary point that both begin and end, the point itself
142   becomes a separate disjoint region. For example, if we have original
143   ranges of
144 
145   |<--- 100 --->|
146                 |<--- 200 --->|
147   A             B             C
148 
149   there are three boundary points with their begin/end counts of
150 
151   A: (100, 0)
152   B: (200, 100)
153   C: (0, 200)
154 
155   the disjoint ranges would be
156 
157   [A, B-1]: 100
158   [B, B]:   300
159   [B+1, C]: 200.
160   */
161   std::map<uint64_t, BoundaryPoint> Boundaries;
162 
163   for (auto Item : Ranges) {
164     uint64_t Begin = Item.first.first;
165     uint64_t End = Item.first.second;
166     uint64_t Count = Item.second;
167     if (Boundaries.find(Begin) == Boundaries.end())
168       Boundaries[Begin] = BoundaryPoint();
169     Boundaries[Begin].addBeginCount(Count);
170 
171     if (Boundaries.find(End) == Boundaries.end())
172       Boundaries[End] = BoundaryPoint();
173     Boundaries[End].addEndCount(Count);
174   }
175 
176   uint64_t BeginAddress = 0;
177   int Count = 0;
178   for (auto Item : Boundaries) {
179     uint64_t Address = Item.first;
180     BoundaryPoint &Point = Item.second;
181     if (Point.BeginCount) {
182       if (BeginAddress)
183         DisjointRanges[{BeginAddress, Address - 1}] = Count;
184       Count += Point.BeginCount;
185       BeginAddress = Address;
186     }
187     if (Point.EndCount) {
188       assert(BeginAddress && "First boundary point cannot be 'end' point");
189       DisjointRanges[{BeginAddress, Address}] = Count;
190       Count -= Point.EndCount;
191       BeginAddress = Address + 1;
192     }
193   }
194 }
195 
196 FunctionSamples &
197 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
198                                                  bool WasLeafInlined) {
199   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
200   if (Ret.second) {
201     SampleContext FContext(Ret.first->first(), RawContext);
202     if (WasLeafInlined)
203       FContext.setAttribute(ContextWasInlined);
204     FunctionSamples &FProfile = Ret.first->second;
205     FProfile.setContext(FContext);
206     FProfile.setName(FContext.getNameWithoutContext());
207   }
208   return Ret.first->second;
209 }
210 
211 void CSProfileGenerator::generateProfile() {
212   FunctionSamples::ProfileIsCS = true;
213   for (const auto &BI : BinarySampleCounters) {
214     ProfiledBinary *Binary = BI.first;
215     for (const auto &CI : BI.second) {
216       const StringBasedCtxKey *CtxKey =
217           dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
218       StringRef ContextId(CtxKey->Context);
219       // Get or create function profile for the range
220       FunctionSamples &FunctionProfile =
221           getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
222 
223       // Fill in function body samples
224       populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
225                                   Binary);
226       // Fill in boundary sample counts as well as call site samples for calls
227       populateFunctionBoundarySamples(ContextId, FunctionProfile,
228                                       CI.second.BranchCounter, Binary);
229     }
230   }
231   // Fill in call site value sample for inlined calls and also use context to
232   // infer missing samples. Since we don't have call count for inlined
233   // functions, we estimate it from inlinee's profile using the entry of the
234   // body sample.
235   populateInferredFunctionSamples();
236 
237   // Compute hot/cold threshold based on profile. This will be used for cold
238   // context profile merging/trimming.
239   computeSummaryAndThreshold();
240 }
241 
242 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
243     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
244     uint64_t Count) {
245   // Filter out invalid negative(int type) lineOffset
246   if (LeafLoc.second.LineOffset & 0x80000000)
247     return;
248   // Use the maximum count of samples with same line location
249   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
250       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
251   uint64_t PreviousCount = R ? R.get() : 0;
252   if (PreviousCount < Count) {
253     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
254                                    LeafLoc.second.Discriminator,
255                                    Count - PreviousCount);
256   }
257 }
258 
259 void CSProfileGenerator::populateFunctionBodySamples(
260     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
261     ProfiledBinary *Binary) {
262   // Compute disjoint ranges first, so we can use MAX
263   // for calculating count for each location.
264   RangeSample Ranges;
265   findDisjointRanges(Ranges, RangeCounter);
266   for (auto Range : Ranges) {
267     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
268     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
269     uint64_t Count = Range.second;
270     // Disjoint ranges have introduce zero-filled gap that
271     // doesn't belong to current context, filter them out.
272     if (Count == 0)
273       continue;
274 
275     InstructionPointer IP(Binary, RangeBegin, true);
276 
277     // Disjoint ranges may have range in the middle of two instr,
278     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
279     // can be Addr1+1 to Addr2-1. We should ignore such range.
280     if (IP.Address > RangeEnd)
281       continue;
282 
283     while (IP.Address <= RangeEnd) {
284       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
285       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
286       if (LeafLoc.hasValue()) {
287         // Recording body sample for this specific context
288         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
289       }
290       // Accumulate total sample count even it's a line with invalid debug info
291       FunctionProfile.addTotalSamples(Count);
292       // Move to next IP within the range
293       IP.advance();
294     }
295   }
296 }
297 
298 void CSProfileGenerator::populateFunctionBoundarySamples(
299     StringRef ContextId, FunctionSamples &FunctionProfile,
300     const BranchSample &BranchCounters, ProfiledBinary *Binary) {
301 
302   for (auto Entry : BranchCounters) {
303     uint64_t SourceOffset = Entry.first.first;
304     uint64_t TargetOffset = Entry.first.second;
305     uint64_t Count = Entry.second;
306     // Get the callee name by branch target if it's a call branch
307     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
308         Binary->getFuncFromStartOffset(TargetOffset));
309     if (CalleeName.size() == 0)
310       continue;
311 
312     // Record called target sample and its count
313     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
314     if (!LeafLoc.hasValue())
315       continue;
316     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
317                                            LeafLoc->second.Discriminator,
318                                            CalleeName, Count);
319 
320     // Record head sample for called target(callee)
321     std::ostringstream OCalleeCtxStr;
322     if (ContextId.find(" @ ") != StringRef::npos) {
323       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
324       OCalleeCtxStr << " @ ";
325     }
326     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
327 
328     FunctionSamples &CalleeProfile =
329         getFunctionProfileForContext(OCalleeCtxStr.str());
330     assert(Count != 0 && "Unexpected zero weight branch");
331     CalleeProfile.addHeadSamples(Count);
332   }
333 }
334 
335 static FrameLocation getCallerContext(StringRef CalleeContext,
336                                       StringRef &CallerNameWithContext) {
337   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
338   CallerNameWithContext = CallerContext.rsplit(':').first;
339   auto ContextSplit = CallerContext.rsplit(" @ ");
340   StringRef CallerFrameStr = ContextSplit.second.size() == 0
341                                  ? ContextSplit.first
342                                  : ContextSplit.second;
343   FrameLocation LeafFrameLoc = {"", {0, 0}};
344   StringRef Funcname;
345   SampleContext::decodeContextString(CallerFrameStr, Funcname,
346                                      LeafFrameLoc.second);
347   LeafFrameLoc.first = Funcname.str();
348   return LeafFrameLoc;
349 }
350 
351 void CSProfileGenerator::populateInferredFunctionSamples() {
352   for (const auto &Item : ProfileMap) {
353     const StringRef CalleeContext = Item.first();
354     const FunctionSamples &CalleeProfile = Item.second;
355 
356     // If we already have head sample counts, we must have value profile
357     // for call sites added already. Skip to avoid double counting.
358     if (CalleeProfile.getHeadSamples())
359       continue;
360     // If we don't have context, nothing to do for caller's call site.
361     // This could happen for entry point function.
362     if (CalleeContext.find(" @ ") == StringRef::npos)
363       continue;
364 
365     // Infer Caller's frame loc and context ID through string splitting
366     StringRef CallerContextId;
367     FrameLocation &&CallerLeafFrameLoc =
368         getCallerContext(CalleeContext, CallerContextId);
369 
370     // It's possible that we haven't seen any sample directly in the caller,
371     // in which case CallerProfile will not exist. But we can't modify
372     // ProfileMap while iterating it.
373     // TODO: created function profile for those callers too
374     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
375       continue;
376     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
377 
378     // Since we don't have call count for inlined functions, we
379     // estimate it from inlinee's profile using entry body sample.
380     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
381     // If we don't have samples with location, use 1 to indicate live.
382     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
383       EstimatedCallCount = 1;
384     CallerProfile.addCalledTargetSamples(
385         CallerLeafFrameLoc.second.LineOffset,
386         CallerLeafFrameLoc.second.Discriminator,
387         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
388     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
389                                  CallerLeafFrameLoc.second.Discriminator,
390                                  EstimatedCallCount);
391     CallerProfile.addTotalSamples(EstimatedCallCount);
392   }
393 }
394 
395 void CSProfileGenerator::computeSummaryAndThreshold() {
396   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
397   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
398   PSI.reset(new ProfileSummaryInfo(std::move(Summary)));
399 }
400 
401 void CSProfileGenerator::mergeAndTrimColdProfile(
402     StringMap<FunctionSamples> &ProfileMap) {
403   if (!CSProfMergeColdContext && !CSProfTrimColdContext)
404     return;
405 
406   // Use threshold calculated from profile summary unless specified
407   uint64_t ColdThreshold = PSI->getColdCountThreshold();
408   if (CSProfColdThreshold.getNumOccurrences()) {
409     ColdThreshold = CSProfColdThreshold;
410   }
411 
412   // Nothing to merge if sample threshold is zero
413   if (ColdThreshold == 0)
414     return;
415 
416   // Filter the cold profiles from ProfileMap and move them into a tmp
417   // container
418   std::vector<std::pair<StringRef, const FunctionSamples *>> ColdProfiles;
419   for (const auto &I : ProfileMap) {
420     const FunctionSamples &FunctionProfile = I.second;
421     if (FunctionProfile.getTotalSamples() >= ColdThreshold)
422       continue;
423     ColdProfiles.emplace_back(I.getKey(), &I.second);
424   }
425 
426   // Remove the code profile from ProfileMap and merge them into BaseProileMap
427   StringMap<FunctionSamples> BaseProfileMap;
428   for (const auto &I : ColdProfiles) {
429     if (CSProfMergeColdContext) {
430       auto Ret = BaseProfileMap.try_emplace(
431           I.second->getContext().getNameWithoutContext(), FunctionSamples());
432       FunctionSamples &BaseProfile = Ret.first->second;
433       BaseProfile.merge(*I.second);
434     }
435     ProfileMap.erase(I.first);
436   }
437 
438   // Merge the base profiles into ProfileMap;
439   for (const auto &I : BaseProfileMap) {
440     // Filter the cold base profile
441     if (CSProfTrimColdContext &&
442         I.second.getTotalSamples() < CSProfColdThreshold &&
443         ProfileMap.find(I.getKey()) == ProfileMap.end())
444       continue;
445     // Merge the profile if the original profile exists, otherwise just insert
446     // as a new profile
447     FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey());
448     OrigProfile.merge(I.second);
449   }
450 }
451 
452 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
453                                StringMap<FunctionSamples> &ProfileMap) {
454   mergeAndTrimColdProfile(ProfileMap);
455   // Add bracket for context key to support different profile binary format
456   StringMap<FunctionSamples> CxtWithBracketPMap;
457   for (const auto &Item : ProfileMap) {
458     std::string ContextWithBracket = "[" + Item.first().str() + "]";
459     auto Ret = CxtWithBracketPMap.try_emplace(ContextWithBracket, Item.second);
460     assert(Ret.second && "Must be a unique context");
461     SampleContext FContext(Ret.first->first(), RawContext);
462     FunctionSamples &FProfile = Ret.first->second;
463     FContext.setAllAttributes(FProfile.getContext().getAllAttributes());
464     FProfile.setName(FContext.getNameWithContext(true));
465     FProfile.setContext(FContext);
466   }
467   Writer->write(CxtWithBracketPMap);
468 }
469 
470 // Helper function to extract context prefix string stack
471 // Extract context stack for reusing, leaf context stack will
472 // be added compressed while looking up function profile
473 static void
474 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
475                           const SmallVectorImpl<const PseudoProbe *> &Probes,
476                           ProfiledBinary *Binary) {
477   for (const auto *P : Probes) {
478     Binary->getInlineContextForProbe(P, ContextStrStack, true);
479   }
480 }
481 
482 void PseudoProbeCSProfileGenerator::generateProfile() {
483   // Enable pseudo probe functionalities in SampleProf
484   FunctionSamples::ProfileIsProbeBased = true;
485   FunctionSamples::ProfileIsCS = true;
486   for (const auto &BI : BinarySampleCounters) {
487     ProfiledBinary *Binary = BI.first;
488     for (const auto &CI : BI.second) {
489       const ProbeBasedCtxKey *CtxKey =
490           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
491       SmallVector<std::string, 16> ContextStrStack;
492       extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
493       // Fill in function body samples from probes, also infer caller's samples
494       // from callee's probe
495       populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
496                                     Binary);
497       // Fill in boundary samples for a call probe
498       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
499                                         ContextStrStack, Binary);
500     }
501   }
502 
503   // Compute hot/cold threshold based on profile. This will be used for cold
504   // context profile merging/trimming.
505   computeSummaryAndThreshold();
506 }
507 
508 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
509     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
510     ProfiledBinary *Binary) {
511   RangeSample Ranges;
512   findDisjointRanges(Ranges, RangeCounter);
513   for (const auto &Range : Ranges) {
514     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
515     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
516     uint64_t Count = Range.second;
517     // Disjoint ranges have introduce zero-filled gap that
518     // doesn't belong to current context, filter them out.
519     if (Count == 0)
520       continue;
521 
522     InstructionPointer IP(Binary, RangeBegin, true);
523 
524     // Disjoint ranges may have range in the middle of two instr,
525     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
526     // can be Addr1+1 to Addr2-1. We should ignore such range.
527     if (IP.Address > RangeEnd)
528       continue;
529 
530     while (IP.Address <= RangeEnd) {
531       const AddressProbesMap &Address2ProbesMap =
532           Binary->getAddress2ProbesMap();
533       auto It = Address2ProbesMap.find(IP.Address);
534       if (It != Address2ProbesMap.end()) {
535         for (const auto &Probe : It->second) {
536           if (!Probe.isBlock())
537             continue;
538           ProbeCounter[&Probe] += Count;
539         }
540       }
541 
542       IP.advance();
543     }
544   }
545 }
546 
547 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
548     const RangeSample &RangeCounter,
549     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
550   ProbeCounterMap ProbeCounter;
551   // Extract the top frame probes by looking up each address among the range in
552   // the Address2ProbeMap
553   extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
554   for (auto PI : ProbeCounter) {
555     const PseudoProbe *Probe = PI.first;
556     uint64_t Count = PI.second;
557     FunctionSamples &FunctionProfile =
558         getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
559 
560     // Use InvalidProbeCount(UINT64_MAX) to mark sample count for a dangling
561     // probe. Dangling probes are the probes associated to an empty block. With
562     // this place holder, sample count on dangling probe will not be trusted by
563     // the compiler and it will rely on the counts inference algorithm to get
564     // the probe a reasonable count.
565     if (Probe->isDangling()) {
566       FunctionProfile.addBodySamplesForProbe(
567           Probe->Index, FunctionSamples::InvalidProbeCount);
568       continue;
569     }
570     FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
571     FunctionProfile.addTotalSamples(Count);
572     if (Probe->isEntry()) {
573       FunctionProfile.addHeadSamples(Count);
574       // Look up for the caller's function profile
575       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
576       if (InlinerDesc != nullptr) {
577         // Since the context id will be compressed, we have to use callee's
578         // context id to infer caller's context id to ensure they share the
579         // same context prefix.
580         StringRef CalleeContextId =
581             FunctionProfile.getContext().getNameWithContext(true);
582         StringRef CallerContextId;
583         FrameLocation &&CallerLeafFrameLoc =
584             getCallerContext(CalleeContextId, CallerContextId);
585         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
586         assert(CallerIndex &&
587                "Inferred caller's location index shouldn't be zero!");
588         FunctionSamples &CallerProfile =
589             getFunctionProfileForContext(CallerContextId);
590         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
591         CallerProfile.addBodySamples(CallerIndex, 0, Count);
592         CallerProfile.addTotalSamples(Count);
593         CallerProfile.addCalledTargetSamples(
594             CallerIndex, 0,
595             FunctionProfile.getContext().getNameWithoutContext(), Count);
596       }
597     }
598   }
599 }
600 
601 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
602     const BranchSample &BranchCounter,
603     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
604   for (auto BI : BranchCounter) {
605     uint64_t SourceOffset = BI.first.first;
606     uint64_t TargetOffset = BI.first.second;
607     uint64_t Count = BI.second;
608     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
609     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
610     if (CallProbe == nullptr)
611       continue;
612     FunctionSamples &FunctionProfile =
613         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
614     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
615     FunctionProfile.addTotalSamples(Count);
616     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
617         Binary->getFuncFromStartOffset(TargetOffset));
618     if (CalleeName.size() == 0)
619       continue;
620     FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
621                                            Count);
622   }
623 }
624 
625 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
626     SmallVectorImpl<std::string> &ContextStrStack,
627     const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
628   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
629   // Compress the context string except for the leaf frame
630   std::string LeafFrame = ContextStrStack.back();
631   ContextStrStack.pop_back();
632   CSProfileGenerator::compressRecursionContext(ContextStrStack);
633 
634   std::ostringstream OContextStr;
635   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
636     if (OContextStr.str().size())
637       OContextStr << " @ ";
638     OContextStr << ContextStrStack[I];
639   }
640   // For leaf inlined context with the top frame, we should strip off the top
641   // frame's probe id, like:
642   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
643   if (OContextStr.str().size())
644     OContextStr << " @ ";
645   OContextStr << StringRef(LeafFrame).split(":").first.str();
646 
647   FunctionSamples &FunctionProile =
648       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
649   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
650   return FunctionProile;
651 }
652 
653 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
654     SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
655     ProfiledBinary *Binary) {
656   // Explicitly copy the context for appending the leaf context
657   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
658                                                    ContextStrStack.end());
659   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
660   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
661   bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
662   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
663                                         WasLeafInlined);
664 }
665 
666 } // end namespace sampleprof
667 } // end namespace llvm
668