1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ProfileData/ProfileCommon.h"
12 #include <unordered_set>
13 
14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
15                                     cl::Required,
16                                     cl::desc("Output profile file"));
17 static cl::alias OutputA("o", cl::desc("Alias for --output"),
18                          cl::aliasopt(OutputFilename));
19 
20 static cl::opt<SampleProfileFormat> OutputFormat(
21     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
22     cl::values(
23         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
24         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
25         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
26         clEnumValN(SPF_Text, "text", "Text encoding"),
27         clEnumValN(SPF_GCC, "gcc",
28                    "GCC encoding (only meaningful for -sample)")));
29 
30 cl::opt<bool> UseMD5(
31     "use-md5", cl::init(false), cl::Hidden,
32     cl::desc("Use md5 to represent function names in the output profile (only "
33              "meaningful for -extbinary)"));
34 
35 static cl::opt<bool> PopulateProfileSymbolList(
36     "populate-profile-symbol-list", cl::init(true), cl::Hidden,
37     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
38 
39 static cl::opt<int32_t, true> RecursionCompression(
40     "compress-recursion",
41     cl::desc("Compressing recursion by deduplicating adjacent frame "
42              "sequences up to the specified size. -1 means no size limit."),
43     cl::Hidden,
44     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
45 
46 static cl::opt<bool> CSProfMergeColdContext(
47     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
48     cl::desc("If the total count of context profile is smaller than "
49              "the threshold, it will be merged into context-less base "
50              "profile."));
51 
52 static cl::opt<bool> CSProfTrimColdContext(
53     "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
54     cl::desc("If the total count of the profile after all merge is done "
55              "is still smaller than threshold, it will be trimmed."));
56 
57 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
58     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
59     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
60              "context-less base profile"));
61 
62 static cl::opt<int, true> CSProfMaxContextDepth(
63     "csprof-max-context-depth", cl::ZeroOrMore,
64     cl::desc("Keep the last K contexts while merging profile. -1 means no "
65              "depth limit."),
66     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
67 
68 extern cl::opt<int> ProfileSummaryCutoffCold;
69 
70 using namespace llvm;
71 using namespace sampleprof;
72 
73 namespace llvm {
74 namespace sampleprof {
75 
76 // Initialize the MaxCompressionSize to -1 which means no size limit
77 int32_t CSProfileGenerator::MaxCompressionSize = -1;
78 
79 int CSProfileGenerator::MaxContextDepth = -1;
80 
81 std::unique_ptr<ProfileGeneratorBase>
82 ProfileGeneratorBase::create(ProfiledBinary *Binary,
83                              const ContextSampleCounterMap &SampleCounters,
84                              bool ProfileIsCS) {
85   std::unique_ptr<ProfileGeneratorBase> Generator;
86   if (ProfileIsCS) {
87     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
88   } else {
89     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
90   }
91 
92   return Generator;
93 }
94 
95 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
96                                  SampleProfileMap &ProfileMap) {
97   // Populate profile symbol list if extended binary format is used.
98   ProfileSymbolList SymbolList;
99 
100   // Turn it off temporarily for CS profile.
101   if (FunctionSamples::ProfileIsCS &&
102       !PopulateProfileSymbolList.getNumOccurrences())
103     PopulateProfileSymbolList = false;
104 
105   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
106     for (const auto &Item : ProfileMap) {
107       auto &Profile = Item.second;
108       SymbolList.add(Profile.getName(), true);
109     }
110     Writer->setProfileSymbolList(&SymbolList);
111   }
112 
113   if (std::error_code EC = Writer->write(ProfileMap))
114     exitWithError(std::move(EC));
115 }
116 
117 void ProfileGeneratorBase::write() {
118   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
119   if (std::error_code EC = WriterOrErr.getError())
120     exitWithError(EC, OutputFilename);
121 
122   if (UseMD5) {
123     if (OutputFormat != SPF_Ext_Binary)
124       WithColor::warning() << "-use-md5 is ignored. Specify "
125                               "--format=extbinary to enable it\n";
126     else
127       WriterOrErr.get()->setUseMD5();
128   }
129 
130   write(std::move(WriterOrErr.get()), ProfileMap);
131 }
132 
133 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
134                                               const RangeSample &Ranges) {
135 
136   /*
137   Regions may overlap with each other. Using the boundary info, find all
138   disjoint ranges and their sample count. BoundaryPoint contains the count
139   multiple samples begin/end at this points.
140 
141   |<--100-->|           Sample1
142   |<------200------>|   Sample2
143   A         B       C
144 
145   In the example above,
146   Sample1 begins at A, ends at B, its value is 100.
147   Sample2 beings at A, ends at C, its value is 200.
148   For A, BeginCount is the sum of sample begins at A, which is 300 and no
149   samples ends at A, so EndCount is 0.
150   Then boundary points A, B, and C with begin/end counts are:
151   A: (300, 0)
152   B: (0, 100)
153   C: (0, 200)
154   */
155   struct BoundaryPoint {
156     // Sum of sample counts beginning at this point
157     uint64_t BeginCount = UINT64_MAX;
158     // Sum of sample counts ending at this point
159     uint64_t EndCount = UINT64_MAX;
160     // Is the begin point of a zero range.
161     bool IsZeroRangeBegin = false;
162     // Is the end point of a zero range.
163     bool IsZeroRangeEnd = false;
164 
165     void addBeginCount(uint64_t Count) {
166       if (BeginCount == UINT64_MAX)
167         BeginCount = 0;
168       BeginCount += Count;
169     }
170 
171     void addEndCount(uint64_t Count) {
172       if (EndCount == UINT64_MAX)
173         EndCount = 0;
174       EndCount += Count;
175     }
176   };
177 
178   /*
179   For the above example. With boundary points, follwing logic finds two
180   disjoint region of
181 
182   [A,B]:   300
183   [B+1,C]: 200
184 
185   If there is a boundary point that both begin and end, the point itself
186   becomes a separate disjoint region. For example, if we have original
187   ranges of
188 
189   |<--- 100 --->|
190                 |<--- 200 --->|
191   A             B             C
192 
193   there are three boundary points with their begin/end counts of
194 
195   A: (100, 0)
196   B: (200, 100)
197   C: (0, 200)
198 
199   the disjoint ranges would be
200 
201   [A, B-1]: 100
202   [B, B]:   300
203   [B+1, C]: 200.
204 
205   Example for zero value range:
206 
207     |<--- 100 --->|
208                        |<--- 200 --->|
209   |<---------------  0 ----------------->|
210   A  B            C    D             E   F
211 
212   [A, B-1]  : 0
213   [B, C]    : 100
214   [C+1, D-1]: 0
215   [D, E]    : 200
216   [E+1, F]  : 0
217   */
218   std::map<uint64_t, BoundaryPoint> Boundaries;
219 
220   for (auto Item : Ranges) {
221     assert(Item.first.first <= Item.first.second &&
222            "Invalid instruction range");
223     auto &BeginPoint = Boundaries[Item.first.first];
224     auto &EndPoint = Boundaries[Item.first.second];
225     uint64_t Count = Item.second;
226 
227     BeginPoint.addBeginCount(Count);
228     EndPoint.addEndCount(Count);
229     if (Count == 0) {
230       BeginPoint.IsZeroRangeBegin = true;
231       EndPoint.IsZeroRangeEnd = true;
232     }
233   }
234 
235   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
236   // and the next valid address
237   uint64_t BeginAddress = UINT64_MAX;
238   int ZeroRangeDepth = 0;
239   uint64_t Count = 0;
240   for (auto Item : Boundaries) {
241     uint64_t Address = Item.first;
242     BoundaryPoint &Point = Item.second;
243     if (Point.BeginCount != UINT64_MAX) {
244       if (BeginAddress != UINT64_MAX)
245         DisjointRanges[{BeginAddress, Address - 1}] = Count;
246       Count += Point.BeginCount;
247       BeginAddress = Address;
248       ZeroRangeDepth += Point.IsZeroRangeBegin;
249     }
250     if (Point.EndCount != UINT64_MAX) {
251       assert((BeginAddress != UINT64_MAX) &&
252              "First boundary point cannot be 'end' point");
253       DisjointRanges[{BeginAddress, Address}] = Count;
254       assert(Count >= Point.EndCount && "Mismatched live ranges");
255       Count -= Point.EndCount;
256       BeginAddress = Address + 1;
257       ZeroRangeDepth -= Point.IsZeroRangeEnd;
258       // If the remaining count is zero and it's no longer in a zero range, this
259       // means we consume all the ranges before, thus mark BeginAddress as
260       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
261       //  [<---- 10 ---->]
262       //                       [<---- 20 ---->]
263       //   A             B     C              D
264       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
265       // have the [B+1, C-1] zero range.
266       if (Count == 0 && ZeroRangeDepth == 0)
267         BeginAddress = UINT64_MAX;
268     }
269   }
270 }
271 
272 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
273     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
274     uint64_t Count) {
275   // Use the maximum count of samples with same line location
276   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
277   ErrorOr<uint64_t> R =
278       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
279 
280   uint64_t PreviousCount = R ? R.get() : 0;
281   if (PreviousCount <= Count) {
282     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
283                                    Count - PreviousCount);
284   }
285 }
286 
287 FunctionSamples &
288 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
289   SampleContext Context(FuncName);
290   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
291   if (Ret.second) {
292     FunctionSamples &FProfile = Ret.first->second;
293     FProfile.setContext(Context);
294   }
295   return Ret.first->second;
296 }
297 
298 void ProfileGenerator::generateProfile() {
299   if (Binary->usePseudoProbes()) {
300     // TODO: Support probe based profile generation
301   } else {
302     generateLineNumBasedProfile();
303   }
304 }
305 
306 void ProfileGenerator::generateLineNumBasedProfile() {
307   assert(SampleCounters.size() == 1 &&
308          "Must have one entry for profile generation.");
309   const SampleCounter &SC = SampleCounters.begin()->second;
310   // Fill in function body samples
311   populateBodySamplesForAllFunctions(SC.RangeCounter);
312   // Fill in boundary sample counts as well as call site samples for calls
313   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
314 }
315 
316 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
317     const SampleContextFrameVector &FrameVec, uint64_t Count) {
318   // Get top level profile
319   FunctionSamples *FunctionProfile =
320       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
321   FunctionProfile->addTotalSamples(Count);
322 
323   for (size_t I = 1; I < FrameVec.size(); I++) {
324     LineLocation Callsite(
325         FrameVec[I - 1].Location.LineOffset,
326         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
327     FunctionSamplesMap &SamplesMap =
328         FunctionProfile->functionSamplesAt(Callsite);
329     auto Ret =
330         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
331     if (Ret.second) {
332       SampleContext Context(FrameVec[I].FuncName);
333       Ret.first->second.setContext(Context);
334     }
335     FunctionProfile = &Ret.first->second;
336     FunctionProfile->addTotalSamples(Count);
337   }
338 
339   return *FunctionProfile;
340 }
341 
342 RangeSample
343 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
344   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
345   // For each range, we search for the range of the function it belongs to and
346   // initialize it with zero count, so it remains zero if doesn't hit any
347   // samples. This is to be consistent with compiler that interpret zero count
348   // as unexecuted(cold).
349   for (auto I : RangeCounter) {
350     uint64_t RangeBegin = I.first.first;
351     uint64_t RangeEnd = I.first.second;
352     // Find the function offset range the current range begin belongs to.
353     auto FuncRange = Binary->findFuncOffsetRange(RangeBegin);
354     if (FuncRange.second == 0)
355       WithColor::warning()
356           << "[" << format("%8" PRIx64, RangeBegin) << " - "
357           << format("%8" PRIx64, RangeEnd)
358           << "]: Invalid range or disassembling error in profiled binary.\n";
359     else if (RangeEnd > FuncRange.second)
360       WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - "
361                            << format("%8" PRIx64, RangeEnd)
362                            << "]: Range is across different functions.\n";
363     else
364       Ranges[FuncRange] += 0;
365   }
366   RangeSample DisjointRanges;
367   findDisjointRanges(DisjointRanges, Ranges);
368   return DisjointRanges;
369 }
370 
371 void ProfileGenerator::populateBodySamplesForAllFunctions(
372     const RangeSample &RangeCounter) {
373   for (auto Range : preprocessRangeCounter(RangeCounter)) {
374     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
375     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
376     uint64_t Count = Range.second;
377 
378     InstructionPointer IP(Binary, RangeBegin, true);
379     // Disjoint ranges may have range in the middle of two instr,
380     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
381     // can be Addr1+1 to Addr2-1. We should ignore such range.
382     while (IP.Address <= RangeEnd) {
383       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
384       const SampleContextFrameVector &FrameVec =
385           Binary->getFrameLocationStack(Offset);
386       if (!FrameVec.empty()) {
387         uint64_t DC = Count * getDuplicationFactor(
388                                   FrameVec.back().Location.Discriminator);
389         FunctionSamples &FunctionProfile =
390             getLeafProfileAndAddTotalSamples(FrameVec, DC);
391         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
392                                             DC);
393       }
394       // Move to next IP within the range.
395       IP.advance();
396     }
397   }
398 }
399 
400 static bool isOutlinedFunction(StringRef CalleeName) {
401   // Check whether it's from hot-cold func split or coro split.
402   return CalleeName.find(".resume") != StringRef::npos ||
403          CalleeName.find(".cold") != StringRef::npos;
404 }
405 
406 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
407   // Get the callee name by branch target if it's a call branch.
408   StringRef CalleeName = FunctionSamples::getCanonicalFnName(
409       Binary->getFuncFromStartOffset(TargetOffset));
410 
411   // We won't accumulate sample count againt outlined function.
412   if (CalleeName.size() == 0 || isOutlinedFunction(CalleeName))
413     return StringRef();
414 
415   return CalleeName;
416 }
417 
418 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
419     const BranchSample &BranchCounters) {
420   for (auto Entry : BranchCounters) {
421     uint64_t SourceOffset = Entry.first.first;
422     uint64_t TargetOffset = Entry.first.second;
423     uint64_t Count = Entry.second;
424     assert(Count != 0 && "Unexpected zero weight branch");
425 
426     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
427     if (CalleeName.size() == 0)
428       continue;
429     // Record called target sample and its count.
430     const SampleContextFrameVector &FrameVec =
431         Binary->getFrameLocationStack(SourceOffset);
432     if (!FrameVec.empty()) {
433       Count *= getDuplicationFactor(FrameVec.back().Location.Discriminator);
434       FunctionSamples &FunctionProfile =
435           getLeafProfileAndAddTotalSamples(FrameVec, Count);
436       FunctionProfile.addCalledTargetSamples(
437           FrameVec.back().Location.LineOffset,
438           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
439           CalleeName, Count);
440     }
441     // Add head samples for callee.
442     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
443     CalleeProfile.addHeadSamples(Count);
444   }
445 }
446 
447 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
448     const SampleContextFrameVector &Context, bool WasLeafInlined) {
449   auto I = ProfileMap.find(SampleContext(Context));
450   if (I == ProfileMap.end()) {
451     // Save the new context for future references.
452     SampleContextFrames NewContext = *Contexts.insert(Context).first;
453     SampleContext FContext(NewContext, RawContext);
454     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
455     if (WasLeafInlined)
456       FContext.setAttribute(ContextWasInlined);
457     FunctionSamples &FProfile = Ret.first->second;
458     FProfile.setContext(FContext);
459     return Ret.first->second;
460   }
461   return I->second;
462 }
463 
464 void CSProfileGenerator::generateProfile() {
465   FunctionSamples::ProfileIsCS = true;
466 
467   if (Binary->getTrackFuncContextSize())
468     computeSizeForProfiledFunctions();
469 
470   if (Binary->usePseudoProbes()) {
471     // Enable pseudo probe functionalities in SampleProf
472     FunctionSamples::ProfileIsProbeBased = true;
473     generateProbeBasedProfile();
474   } else {
475     generateLineNumBasedProfile();
476   }
477   postProcessProfiles();
478 }
479 
480 void CSProfileGenerator::computeSizeForProfiledFunctions() {
481   // Hash map to deduplicate the function range and the item is a pair of
482   // function start and end offset.
483   std::unordered_map<uint64_t, uint64_t> FuncRanges;
484   // Go through all the ranges in the CS counters, use the start of the range to
485   // look up the function it belongs and record the function range.
486   for (const auto &CI : SampleCounters) {
487     for (auto Item : CI.second.RangeCounter) {
488       // FIXME: Filter the bogus crossing function range.
489       uint64_t RangeStartOffset = Item.first.first;
490       auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset);
491       if (FuncRange.second != 0)
492         FuncRanges[FuncRange.first] = FuncRange.second;
493     }
494   }
495 
496   for (auto I : FuncRanges) {
497     uint64_t StartOffset = I.first;
498     uint64_t EndOffset = I.second;
499     Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
500   }
501 }
502 
503 void CSProfileGenerator::generateLineNumBasedProfile() {
504   for (const auto &CI : SampleCounters) {
505     const StringBasedCtxKey *CtxKey =
506         dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
507     // Get or create function profile for the range
508     FunctionSamples &FunctionProfile =
509         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
510 
511     // Fill in function body samples
512     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
513     // Fill in boundary sample counts as well as call site samples for calls
514     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
515                                        CI.second.BranchCounter);
516   }
517   // Fill in call site value sample for inlined calls and also use context to
518   // infer missing samples. Since we don't have call count for inlined
519   // functions, we estimate it from inlinee's profile using the entry of the
520   // body sample.
521   populateInferredFunctionSamples();
522 }
523 
524 void CSProfileGenerator::populateBodySamplesForFunction(
525     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
526   // Compute disjoint ranges first, so we can use MAX
527   // for calculating count for each location.
528   RangeSample Ranges;
529   findDisjointRanges(Ranges, RangeCounter);
530   for (auto Range : Ranges) {
531     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
532     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
533     uint64_t Count = Range.second;
534     // Disjoint ranges have introduce zero-filled gap that
535     // doesn't belong to current context, filter them out.
536     if (Count == 0)
537       continue;
538 
539     InstructionPointer IP(Binary, RangeBegin, true);
540     // Disjoint ranges may have range in the middle of two instr,
541     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
542     // can be Addr1+1 to Addr2-1. We should ignore such range.
543     while (IP.Address <= RangeEnd) {
544       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
545       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
546       if (LeafLoc.hasValue()) {
547         // Recording body sample for this specific context
548         uint64_t DC =
549             Count * getDuplicationFactor(LeafLoc->Location.Discriminator);
550         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, DC);
551         FunctionProfile.addTotalSamples(DC);
552       }
553 
554       // Move to next IP within the range
555       IP.advance();
556     }
557   }
558 }
559 
560 void CSProfileGenerator::populateBoundarySamplesForFunction(
561     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
562     const BranchSample &BranchCounters) {
563 
564   for (auto Entry : BranchCounters) {
565     uint64_t SourceOffset = Entry.first.first;
566     uint64_t TargetOffset = Entry.first.second;
567     uint64_t Count = Entry.second;
568     assert(Count != 0 && "Unexpected zero weight branch");
569 
570     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
571     if (CalleeName.size() == 0)
572       continue;
573 
574     // Record called target sample and its count
575     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
576     if (!LeafLoc.hasValue())
577       continue;
578     Count *= getDuplicationFactor(LeafLoc->Location.Discriminator);
579     FunctionProfile.addCalledTargetSamples(
580         LeafLoc->Location.LineOffset,
581         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
582         Count);
583 
584     // Record head sample for called target(callee)
585     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
586     assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
587            "Leaf function name doesn't match");
588     CalleeCtx.back() = *LeafLoc;
589     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
590     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
591     CalleeProfile.addHeadSamples(Count);
592   }
593 }
594 
595 static SampleContextFrame
596 getCallerContext(SampleContextFrames CalleeContext,
597                  SampleContextFrameVector &CallerContext) {
598   assert(CalleeContext.size() > 1 && "Unexpected empty context");
599   CalleeContext = CalleeContext.drop_back();
600   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
601   SampleContextFrame CallerFrame = CallerContext.back();
602   CallerContext.back().Location = LineLocation(0, 0);
603   return CallerFrame;
604 }
605 
606 void CSProfileGenerator::populateInferredFunctionSamples() {
607   for (const auto &Item : ProfileMap) {
608     const auto &CalleeContext = Item.first;
609     const FunctionSamples &CalleeProfile = Item.second;
610 
611     // If we already have head sample counts, we must have value profile
612     // for call sites added already. Skip to avoid double counting.
613     if (CalleeProfile.getHeadSamples())
614       continue;
615     // If we don't have context, nothing to do for caller's call site.
616     // This could happen for entry point function.
617     if (CalleeContext.isBaseContext())
618       continue;
619 
620     // Infer Caller's frame loc and context ID through string splitting
621     SampleContextFrameVector CallerContextId;
622     SampleContextFrame &&CallerLeafFrameLoc =
623         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
624     SampleContextFrames CallerContext(CallerContextId);
625 
626     // It's possible that we haven't seen any sample directly in the caller,
627     // in which case CallerProfile will not exist. But we can't modify
628     // ProfileMap while iterating it.
629     // TODO: created function profile for those callers too
630     if (ProfileMap.find(CallerContext) == ProfileMap.end())
631       continue;
632     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
633 
634     // Since we don't have call count for inlined functions, we
635     // estimate it from inlinee's profile using entry body sample.
636     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
637     // If we don't have samples with location, use 1 to indicate live.
638     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
639       EstimatedCallCount = 1;
640     CallerProfile.addCalledTargetSamples(
641         CallerLeafFrameLoc.Location.LineOffset,
642         CallerLeafFrameLoc.Location.Discriminator,
643         CalleeProfile.getContext().getName(), EstimatedCallCount);
644     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
645                                  CallerLeafFrameLoc.Location.Discriminator,
646                                  EstimatedCallCount);
647     CallerProfile.addTotalSamples(EstimatedCallCount);
648   }
649 }
650 
651 void CSProfileGenerator::postProcessProfiles() {
652   // Compute hot/cold threshold based on profile. This will be used for cold
653   // context profile merging/trimming.
654   computeSummaryAndThreshold();
655 
656   // Run global pre-inliner to adjust/merge context profile based on estimated
657   // inline decisions.
658   if (EnableCSPreInliner) {
659     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
660         .run();
661   }
662 
663   // Trim and merge cold context profile using cold threshold above. By default,
664   // we skip such merging and trimming when preinliner is on.
665   if (!EnableCSPreInliner || CSProfTrimColdContext.getNumOccurrences() ||
666       CSProfMergeColdContext.getNumOccurrences()) {
667     SampleContextTrimmer(ProfileMap)
668         .trimAndMergeColdContextProfiles(
669             HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
670             CSProfMaxColdContextDepth);
671   }
672 }
673 
674 void CSProfileGenerator::computeSummaryAndThreshold() {
675   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
676   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
677   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
678       (Summary->getDetailedSummary()));
679   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
680       (Summary->getDetailedSummary()));
681 }
682 
683 // Helper function to extract context prefix string stack
684 // Extract context stack for reusing, leaf context stack will
685 // be added compressed while looking up function profile
686 static void extractPrefixContextStack(
687     SampleContextFrameVector &ContextStack,
688     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
689     ProfiledBinary *Binary) {
690   for (const auto *P : Probes) {
691     Binary->getInlineContextForProbe(P, ContextStack, true);
692   }
693 }
694 
695 void CSProfileGenerator::generateProbeBasedProfile() {
696   for (const auto &CI : SampleCounters) {
697     const ProbeBasedCtxKey *CtxKey =
698         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
699     SampleContextFrameVector ContextStack;
700     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
701     // Fill in function body samples from probes, also infer caller's samples
702     // from callee's probe
703     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
704     // Fill in boundary samples for a call probe
705     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
706   }
707 }
708 
709 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
710                                                 ProbeCounterMap &ProbeCounter) {
711   RangeSample Ranges;
712   findDisjointRanges(Ranges, RangeCounter);
713   for (const auto &Range : Ranges) {
714     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
715     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
716     uint64_t Count = Range.second;
717     // Disjoint ranges have introduce zero-filled gap that
718     // doesn't belong to current context, filter them out.
719     if (Count == 0)
720       continue;
721 
722     InstructionPointer IP(Binary, RangeBegin, true);
723 
724     // Disjoint ranges may have range in the middle of two instr,
725     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
726     // can be Addr1+1 to Addr2-1. We should ignore such range.
727     if (IP.Address > RangeEnd)
728       continue;
729 
730     while (IP.Address <= RangeEnd) {
731       const AddressProbesMap &Address2ProbesMap =
732           Binary->getAddress2ProbesMap();
733       auto It = Address2ProbesMap.find(IP.Address);
734       if (It != Address2ProbesMap.end()) {
735         for (const auto &Probe : It->second) {
736           if (!Probe.isBlock())
737             continue;
738           ProbeCounter[&Probe] += Count;
739         }
740       }
741 
742       IP.advance();
743     }
744   }
745 }
746 
747 void CSProfileGenerator::populateBodySamplesWithProbes(
748     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
749   ProbeCounterMap ProbeCounter;
750   // Extract the top frame probes by looking up each address among the range in
751   // the Address2ProbeMap
752   extractProbesFromRange(RangeCounter, ProbeCounter);
753   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
754                      std::unordered_set<FunctionSamples *>>
755       FrameSamples;
756   for (auto PI : ProbeCounter) {
757     const MCDecodedPseudoProbe *Probe = PI.first;
758     uint64_t Count = PI.second;
759     FunctionSamples &FunctionProfile =
760         getFunctionProfileForLeafProbe(ContextStack, Probe);
761     // Record the current frame and FunctionProfile whenever samples are
762     // collected for non-danglie probes. This is for reporting all of the
763     // zero count probes of the frame later.
764     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
765     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
766     FunctionProfile.addTotalSamples(Count);
767     if (Probe->isEntry()) {
768       FunctionProfile.addHeadSamples(Count);
769       // Look up for the caller's function profile
770       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
771       if (InlinerDesc != nullptr) {
772         // Since the context id will be compressed, we have to use callee's
773         // context id to infer caller's context id to ensure they share the
774         // same context prefix.
775         SampleContextFrames CalleeContextId =
776             FunctionProfile.getContext().getContextFrames();
777         SampleContextFrameVector CallerContextId;
778         SampleContextFrame &&CallerLeafFrameLoc =
779             getCallerContext(CalleeContextId, CallerContextId);
780         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
781         assert(CallerIndex &&
782                "Inferred caller's location index shouldn't be zero!");
783         FunctionSamples &CallerProfile =
784             getFunctionProfileForContext(CallerContextId);
785         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
786         CallerProfile.addBodySamples(CallerIndex, 0, Count);
787         CallerProfile.addTotalSamples(Count);
788         CallerProfile.addCalledTargetSamples(
789             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
790       }
791     }
792   }
793 
794   // Assign zero count for remaining probes without sample hits to
795   // differentiate from probes optimized away, of which the counts are unknown
796   // and will be inferred by the compiler.
797   for (auto &I : FrameSamples) {
798     for (auto *FunctionProfile : I.second) {
799       for (auto *Probe : I.first->getProbes()) {
800         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
801       }
802     }
803   }
804 }
805 
806 void CSProfileGenerator::populateBoundarySamplesWithProbes(
807     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
808   for (auto BI : BranchCounter) {
809     uint64_t SourceOffset = BI.first.first;
810     uint64_t TargetOffset = BI.first.second;
811     uint64_t Count = BI.second;
812     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
813     const MCDecodedPseudoProbe *CallProbe =
814         Binary->getCallProbeForAddr(SourceAddress);
815     if (CallProbe == nullptr)
816       continue;
817     FunctionSamples &FunctionProfile =
818         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
819     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
820     FunctionProfile.addTotalSamples(Count);
821     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
822     if (CalleeName.size() == 0)
823       continue;
824     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
825                                            Count);
826   }
827 }
828 
829 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
830     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
831 
832   // Explicitly copy the context for appending the leaf context
833   SampleContextFrameVector NewContextStack(ContextStack.begin(),
834                                            ContextStack.end());
835   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
836   // For leaf inlined context with the top frame, we should strip off the top
837   // frame's probe id, like:
838   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
839   auto LeafFrame = NewContextStack.back();
840   LeafFrame.Location = LineLocation(0, 0);
841   NewContextStack.pop_back();
842   // Compress the context string except for the leaf frame
843   CSProfileGenerator::compressRecursionContext(NewContextStack);
844   CSProfileGenerator::trimContext(NewContextStack);
845   NewContextStack.push_back(LeafFrame);
846 
847   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
848   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
849   FunctionSamples &FunctionProile =
850       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
851   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
852   return FunctionProile;
853 }
854 
855 } // end namespace sampleprof
856 } // end namespace llvm
857