1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "PerfReader.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <algorithm>
15 #include <float.h>
16 #include <unordered_set>
17 #include <utility>
18 
19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20                                     cl::Required,
21                                     cl::desc("Output profile file"));
22 static cl::alias OutputA("o", cl::desc("Alias for --output"),
23                          cl::aliasopt(OutputFilename));
24 
25 static cl::opt<SampleProfileFormat> OutputFormat(
26     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27     cl::values(
28         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
29         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 cl::opt<bool> UseMD5(
36     "use-md5", cl::init(false), cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile",
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true),
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1),
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth",
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 extern cl::opt<int> ProfileSummaryCutoffHot;
94 extern cl::opt<bool> UseContextLessSummary;
95 
96 static cl::opt<bool> GenCSNestedProfile(
97     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
98     cl::desc("Generate nested function profiles for CSSPGO"));
99 
100 using namespace llvm;
101 using namespace sampleprof;
102 
103 namespace llvm {
104 namespace sampleprof {
105 
106 // Initialize the MaxCompressionSize to -1 which means no size limit
107 int32_t CSProfileGenerator::MaxCompressionSize = -1;
108 
109 int CSProfileGenerator::MaxContextDepth = -1;
110 
111 bool ProfileGeneratorBase::UseFSDiscriminator = false;
112 
113 std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary * Binary,const ContextSampleCounterMap * SampleCounters,bool ProfileIsCS)114 ProfileGeneratorBase::create(ProfiledBinary *Binary,
115                              const ContextSampleCounterMap *SampleCounters,
116                              bool ProfileIsCS) {
117   std::unique_ptr<ProfileGeneratorBase> Generator;
118   if (ProfileIsCS) {
119     if (Binary->useFSDiscriminator())
120       exitWithError("FS discriminator is not supported in CS profile.");
121     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
122   } else {
123     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
124   }
125   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
126   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
127 
128   return Generator;
129 }
130 
131 std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary * Binary,SampleProfileMap & Profiles,bool ProfileIsCS)132 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
133                              bool ProfileIsCS) {
134   std::unique_ptr<ProfileGeneratorBase> Generator;
135   if (ProfileIsCS) {
136     if (Binary->useFSDiscriminator())
137       exitWithError("FS discriminator is not supported in CS profile.");
138     Generator.reset(new CSProfileGenerator(Binary, Profiles));
139   } else {
140     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141   }
142   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144 
145   return Generator;
146 }
147 
write(std::unique_ptr<SampleProfileWriter> Writer,SampleProfileMap & ProfileMap)148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149                                  SampleProfileMap &ProfileMap) {
150   // Populate profile symbol list if extended binary format is used.
151   ProfileSymbolList SymbolList;
152 
153   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154     Binary->populateSymbolListFromDWARF(SymbolList);
155     Writer->setProfileSymbolList(&SymbolList);
156   }
157 
158   if (std::error_code EC = Writer->write(ProfileMap))
159     exitWithError(std::move(EC));
160 }
161 
write()162 void ProfileGeneratorBase::write() {
163   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164   if (std::error_code EC = WriterOrErr.getError())
165     exitWithError(EC, OutputFilename);
166 
167   if (UseMD5) {
168     if (OutputFormat != SPF_Ext_Binary)
169       WithColor::warning() << "-use-md5 is ignored. Specify "
170                               "--format=extbinary to enable it\n";
171     else
172       WriterOrErr.get()->setUseMD5();
173   }
174 
175   write(std::move(WriterOrErr.get()), ProfileMap);
176 }
177 
showDensitySuggestion(double Density)178 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179   if (Density == 0.0)
180     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181                             "set too low. Please check your command.\n";
182   else if (Density < HotFunctionDensityThreshold)
183     WithColor::warning()
184         << "AutoFDO is estimated to optimize better with "
185         << format("%.1f", HotFunctionDensityThreshold / Density)
186         << "x more samples. Please consider increasing sampling rate or "
187            "profiling for longer duration to get more samples.\n";
188 
189   if (ShowDensity)
190     outs() << "Minimum profile density for hot functions with top "
191            << format("%.2f",
192                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193                          10000)
194            << "% total samples: " << format("%.1f", Density) << "\n";
195 }
196 
calculateDensity(const SampleProfileMap & Profiles,uint64_t HotCntThreshold)197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198                                               uint64_t HotCntThreshold) {
199   double Density = DBL_MAX;
200   std::vector<const FunctionSamples *> HotFuncs;
201   for (auto &I : Profiles) {
202     auto &FuncSamples = I.second;
203     if (FuncSamples.getTotalSamples() < HotCntThreshold)
204       continue;
205     HotFuncs.emplace_back(&FuncSamples);
206   }
207 
208   for (auto *FuncSamples : HotFuncs) {
209     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210     if (!Func)
211       continue;
212     uint64_t FuncSize = Func->getFuncSize();
213     if (FuncSize == 0)
214       continue;
215     Density =
216         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217                               FuncSize);
218   }
219 
220   return Density == DBL_MAX ? 0.0 : Density;
221 }
222 
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224                                               const RangeSample &Ranges) {
225 
226   /*
227   Regions may overlap with each other. Using the boundary info, find all
228   disjoint ranges and their sample count. BoundaryPoint contains the count
229   multiple samples begin/end at this points.
230 
231   |<--100-->|           Sample1
232   |<------200------>|   Sample2
233   A         B       C
234 
235   In the example above,
236   Sample1 begins at A, ends at B, its value is 100.
237   Sample2 beings at A, ends at C, its value is 200.
238   For A, BeginCount is the sum of sample begins at A, which is 300 and no
239   samples ends at A, so EndCount is 0.
240   Then boundary points A, B, and C with begin/end counts are:
241   A: (300, 0)
242   B: (0, 100)
243   C: (0, 200)
244   */
245   struct BoundaryPoint {
246     // Sum of sample counts beginning at this point
247     uint64_t BeginCount = UINT64_MAX;
248     // Sum of sample counts ending at this point
249     uint64_t EndCount = UINT64_MAX;
250     // Is the begin point of a zero range.
251     bool IsZeroRangeBegin = false;
252     // Is the end point of a zero range.
253     bool IsZeroRangeEnd = false;
254 
255     void addBeginCount(uint64_t Count) {
256       if (BeginCount == UINT64_MAX)
257         BeginCount = 0;
258       BeginCount += Count;
259     }
260 
261     void addEndCount(uint64_t Count) {
262       if (EndCount == UINT64_MAX)
263         EndCount = 0;
264       EndCount += Count;
265     }
266   };
267 
268   /*
269   For the above example. With boundary points, follwing logic finds two
270   disjoint region of
271 
272   [A,B]:   300
273   [B+1,C]: 200
274 
275   If there is a boundary point that both begin and end, the point itself
276   becomes a separate disjoint region. For example, if we have original
277   ranges of
278 
279   |<--- 100 --->|
280                 |<--- 200 --->|
281   A             B             C
282 
283   there are three boundary points with their begin/end counts of
284 
285   A: (100, 0)
286   B: (200, 100)
287   C: (0, 200)
288 
289   the disjoint ranges would be
290 
291   [A, B-1]: 100
292   [B, B]:   300
293   [B+1, C]: 200.
294 
295   Example for zero value range:
296 
297     |<--- 100 --->|
298                        |<--- 200 --->|
299   |<---------------  0 ----------------->|
300   A  B            C    D             E   F
301 
302   [A, B-1]  : 0
303   [B, C]    : 100
304   [C+1, D-1]: 0
305   [D, E]    : 200
306   [E+1, F]  : 0
307   */
308   std::map<uint64_t, BoundaryPoint> Boundaries;
309 
310   for (const auto &Item : Ranges) {
311     assert(Item.first.first <= Item.first.second &&
312            "Invalid instruction range");
313     auto &BeginPoint = Boundaries[Item.first.first];
314     auto &EndPoint = Boundaries[Item.first.second];
315     uint64_t Count = Item.second;
316 
317     BeginPoint.addBeginCount(Count);
318     EndPoint.addEndCount(Count);
319     if (Count == 0) {
320       BeginPoint.IsZeroRangeBegin = true;
321       EndPoint.IsZeroRangeEnd = true;
322     }
323   }
324 
325   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326   // and the next valid address
327   uint64_t BeginAddress = UINT64_MAX;
328   int ZeroRangeDepth = 0;
329   uint64_t Count = 0;
330   for (const auto &Item : Boundaries) {
331     uint64_t Address = Item.first;
332     const BoundaryPoint &Point = Item.second;
333     if (Point.BeginCount != UINT64_MAX) {
334       if (BeginAddress != UINT64_MAX)
335         DisjointRanges[{BeginAddress, Address - 1}] = Count;
336       Count += Point.BeginCount;
337       BeginAddress = Address;
338       ZeroRangeDepth += Point.IsZeroRangeBegin;
339     }
340     if (Point.EndCount != UINT64_MAX) {
341       assert((BeginAddress != UINT64_MAX) &&
342              "First boundary point cannot be 'end' point");
343       DisjointRanges[{BeginAddress, Address}] = Count;
344       assert(Count >= Point.EndCount && "Mismatched live ranges");
345       Count -= Point.EndCount;
346       BeginAddress = Address + 1;
347       ZeroRangeDepth -= Point.IsZeroRangeEnd;
348       // If the remaining count is zero and it's no longer in a zero range, this
349       // means we consume all the ranges before, thus mark BeginAddress as
350       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351       //  [<---- 10 ---->]
352       //                       [<---- 20 ---->]
353       //   A             B     C              D
354       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355       // have the [B+1, C-1] zero range.
356       if (Count == 0 && ZeroRangeDepth == 0)
357         BeginAddress = UINT64_MAX;
358     }
359   }
360 }
361 
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const SampleContextFrame & LeafLoc,uint64_t Count)362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364     uint64_t Count) {
365   // Use the maximum count of samples with same line location
366   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367 
368   // Use duplication factor to compensated for loop unroll/vectorization.
369   // Note that this is only needed when we're taking MAX of the counts at
370   // the location instead of SUM.
371   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372 
373   ErrorOr<uint64_t> R =
374       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375 
376   uint64_t PreviousCount = R ? R.get() : 0;
377   if (PreviousCount <= Count) {
378     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379                                    Count - PreviousCount);
380   }
381 }
382 
updateTotalSamples()383 void ProfileGeneratorBase::updateTotalSamples() {
384   for (auto &Item : ProfileMap) {
385     FunctionSamples &FunctionProfile = Item.second;
386     FunctionProfile.updateTotalSamples();
387   }
388 }
389 
updateCallsiteSamples()390 void ProfileGeneratorBase::updateCallsiteSamples() {
391   for (auto &Item : ProfileMap) {
392     FunctionSamples &FunctionProfile = Item.second;
393     FunctionProfile.updateCallsiteSamples();
394   }
395 }
396 
updateFunctionSamples()397 void ProfileGeneratorBase::updateFunctionSamples() {
398   updateCallsiteSamples();
399 
400   if (UpdateTotalSamples)
401     updateTotalSamples();
402 }
403 
collectProfiledFunctions()404 void ProfileGeneratorBase::collectProfiledFunctions() {
405   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
406   if (collectFunctionsFromRawProfile(ProfiledFunctions))
407     Binary->setProfiledFunctions(ProfiledFunctions);
408   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
409     Binary->setProfiledFunctions(ProfiledFunctions);
410   else
411     llvm_unreachable("Unsupported input profile");
412 }
413 
collectFunctionsFromRawProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)414 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
415     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
416   if (!SampleCounters)
417     return false;
418   // Go through all the stacks, ranges and branches in sample counters, use
419   // the start of the range to look up the function it belongs and record the
420   // function.
421   for (const auto &CI : *SampleCounters) {
422     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
423       for (auto Addr : CtxKey->Context) {
424         if (FuncRange *FRange = Binary->findFuncRangeForOffset(
425                 Binary->virtualAddrToOffset(Addr)))
426           ProfiledFunctions.insert(FRange->Func);
427       }
428     }
429 
430     for (auto Item : CI.second.RangeCounter) {
431       uint64_t StartOffset = Item.first.first;
432       if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
433         ProfiledFunctions.insert(FRange->Func);
434     }
435 
436     for (auto Item : CI.second.BranchCounter) {
437       uint64_t SourceOffset = Item.first.first;
438       uint64_t TargetOffset = Item.first.first;
439       if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
440         ProfiledFunctions.insert(FRange->Func);
441       if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
442         ProfiledFunctions.insert(FRange->Func);
443     }
444   }
445   return true;
446 }
447 
collectFunctionsFromLLVMProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)448 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
449     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
450   for (const auto &FS : ProfileMap) {
451     if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
452       ProfiledFunctions.insert(Func);
453   }
454   return true;
455 }
456 
collectFunctionsFromLLVMProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)457 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
458     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
459   for (auto *Node : ContextTracker) {
460     if (!Node->getFuncName().empty())
461       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
462         ProfiledFunctions.insert(Func);
463   }
464   return true;
465 }
466 
467 FunctionSamples &
getTopLevelFunctionProfile(StringRef FuncName)468 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
469   SampleContext Context(FuncName);
470   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
471   if (Ret.second) {
472     FunctionSamples &FProfile = Ret.first->second;
473     FProfile.setContext(Context);
474   }
475   return Ret.first->second;
476 }
477 
generateProfile()478 void ProfileGenerator::generateProfile() {
479   collectProfiledFunctions();
480 
481   if (Binary->usePseudoProbes())
482     Binary->decodePseudoProbe();
483 
484   if (SampleCounters) {
485     if (Binary->usePseudoProbes()) {
486       generateProbeBasedProfile();
487     } else {
488       generateLineNumBasedProfile();
489     }
490   }
491 
492   postProcessProfiles();
493 }
494 
postProcessProfiles()495 void ProfileGenerator::postProcessProfiles() {
496   computeSummaryAndThreshold(ProfileMap);
497   trimColdProfiles(ProfileMap, ColdCountThreshold);
498   calculateAndShowDensity(ProfileMap);
499 }
500 
trimColdProfiles(const SampleProfileMap & Profiles,uint64_t ColdCntThreshold)501 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
502                                         uint64_t ColdCntThreshold) {
503   if (!TrimColdProfile)
504     return;
505 
506   // Move cold profiles into a tmp container.
507   std::vector<SampleContext> ColdProfiles;
508   for (const auto &I : ProfileMap) {
509     if (I.second.getTotalSamples() < ColdCntThreshold)
510       ColdProfiles.emplace_back(I.first);
511   }
512 
513   // Remove the cold profile from ProfileMap.
514   for (const auto &I : ColdProfiles)
515     ProfileMap.erase(I);
516 }
517 
generateLineNumBasedProfile()518 void ProfileGenerator::generateLineNumBasedProfile() {
519   assert(SampleCounters->size() == 1 &&
520          "Must have one entry for profile generation.");
521   const SampleCounter &SC = SampleCounters->begin()->second;
522   // Fill in function body samples
523   populateBodySamplesForAllFunctions(SC.RangeCounter);
524   // Fill in boundary sample counts as well as call site samples for calls
525   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
526 
527   updateFunctionSamples();
528 }
529 
generateProbeBasedProfile()530 void ProfileGenerator::generateProbeBasedProfile() {
531   assert(SampleCounters->size() == 1 &&
532          "Must have one entry for profile generation.");
533   // Enable pseudo probe functionalities in SampleProf
534   FunctionSamples::ProfileIsProbeBased = true;
535   const SampleCounter &SC = SampleCounters->begin()->second;
536   // Fill in function body samples
537   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
538   // Fill in boundary sample counts as well as call site samples for calls
539   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
540 
541   updateFunctionSamples();
542 }
543 
populateBodySamplesWithProbesForAllFunctions(const RangeSample & RangeCounter)544 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
545     const RangeSample &RangeCounter) {
546   ProbeCounterMap ProbeCounter;
547   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
548   // inside extractProbesFromRange.
549   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
550                          false);
551 
552   for (const auto &PI : ProbeCounter) {
553     const MCDecodedPseudoProbe *Probe = PI.first;
554     uint64_t Count = PI.second;
555     SampleContextFrameVector FrameVec;
556     Binary->getInlineContextForProbe(Probe, FrameVec, true);
557     FunctionSamples &FunctionProfile =
558         getLeafProfileAndAddTotalSamples(FrameVec, Count);
559     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
560     if (Probe->isEntry())
561       FunctionProfile.addHeadSamples(Count);
562   }
563 }
564 
populateBoundarySamplesWithProbesForAllFunctions(const BranchSample & BranchCounters)565 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
566     const BranchSample &BranchCounters) {
567   for (const auto &Entry : BranchCounters) {
568     uint64_t SourceOffset = Entry.first.first;
569     uint64_t TargetOffset = Entry.first.second;
570     uint64_t Count = Entry.second;
571     assert(Count != 0 && "Unexpected zero weight branch");
572 
573     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
574     if (CalleeName.size() == 0)
575       continue;
576 
577     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
578     const MCDecodedPseudoProbe *CallProbe =
579         Binary->getCallProbeForAddr(SourceAddress);
580     if (CallProbe == nullptr)
581       continue;
582 
583     // Record called target sample and its count.
584     SampleContextFrameVector FrameVec;
585     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
586 
587     if (!FrameVec.empty()) {
588       FunctionSamples &FunctionProfile =
589           getLeafProfileAndAddTotalSamples(FrameVec, 0);
590       FunctionProfile.addCalledTargetSamples(
591           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
592     }
593   }
594 }
595 
getLeafProfileAndAddTotalSamples(const SampleContextFrameVector & FrameVec,uint64_t Count)596 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
597     const SampleContextFrameVector &FrameVec, uint64_t Count) {
598   // Get top level profile
599   FunctionSamples *FunctionProfile =
600       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
601   FunctionProfile->addTotalSamples(Count);
602   if (Binary->usePseudoProbes()) {
603     const auto *FuncDesc = Binary->getFuncDescForGUID(
604         Function::getGUID(FunctionProfile->getName()));
605     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
606   }
607 
608   for (size_t I = 1; I < FrameVec.size(); I++) {
609     LineLocation Callsite(
610         FrameVec[I - 1].Location.LineOffset,
611         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
612     FunctionSamplesMap &SamplesMap =
613         FunctionProfile->functionSamplesAt(Callsite);
614     auto Ret =
615         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
616     if (Ret.second) {
617       SampleContext Context(FrameVec[I].FuncName);
618       Ret.first->second.setContext(Context);
619     }
620     FunctionProfile = &Ret.first->second;
621     FunctionProfile->addTotalSamples(Count);
622     if (Binary->usePseudoProbes()) {
623       const auto *FuncDesc = Binary->getFuncDescForGUID(
624           Function::getGUID(FunctionProfile->getName()));
625       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
626     }
627   }
628 
629   return *FunctionProfile;
630 }
631 
632 RangeSample
preprocessRangeCounter(const RangeSample & RangeCounter)633 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
634   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
635   if (FillZeroForAllFuncs) {
636     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
637       for (auto &R : FuncI.second.Ranges) {
638         Ranges[{R.first, R.second - 1}] += 0;
639       }
640     }
641   } else {
642     // For each range, we search for all ranges of the function it belongs to
643     // and initialize it with zero count, so it remains zero if doesn't hit any
644     // samples. This is to be consistent with compiler that interpret zero count
645     // as unexecuted(cold).
646     for (const auto &I : RangeCounter) {
647       uint64_t StartOffset = I.first.first;
648       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
649         Ranges[{Range.first, Range.second - 1}] += 0;
650     }
651   }
652   RangeSample DisjointRanges;
653   findDisjointRanges(DisjointRanges, Ranges);
654   return DisjointRanges;
655 }
656 
populateBodySamplesForAllFunctions(const RangeSample & RangeCounter)657 void ProfileGenerator::populateBodySamplesForAllFunctions(
658     const RangeSample &RangeCounter) {
659   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
660     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
661     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
662     uint64_t Count = Range.second;
663 
664     InstructionPointer IP(Binary, RangeBegin, true);
665     // Disjoint ranges may have range in the middle of two instr,
666     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
667     // can be Addr1+1 to Addr2-1. We should ignore such range.
668     if (IP.Address > RangeEnd)
669       continue;
670 
671     do {
672       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
673       const SampleContextFrameVector &FrameVec =
674           Binary->getFrameLocationStack(Offset);
675       if (!FrameVec.empty()) {
676         // FIXME: As accumulating total count per instruction caused some
677         // regression, we changed to accumulate total count per byte as a
678         // workaround. Tuning hotness threshold on the compiler side might be
679         // necessary in the future.
680         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
681             FrameVec, Count * Binary->getInstSize(Offset));
682         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
683                                             Count);
684       }
685     } while (IP.advance() && IP.Address <= RangeEnd);
686   }
687 }
688 
getCalleeNameForOffset(uint64_t TargetOffset)689 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
690   // Get the function range by branch target if it's a call branch.
691   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
692 
693   // We won't accumulate sample count for a range whose start is not the real
694   // function entry such as outlined function or inner labels.
695   if (!FRange || !FRange->IsFuncEntry)
696     return StringRef();
697 
698   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
699 }
700 
populateBoundarySamplesForAllFunctions(const BranchSample & BranchCounters)701 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
702     const BranchSample &BranchCounters) {
703   for (const auto &Entry : BranchCounters) {
704     uint64_t SourceOffset = Entry.first.first;
705     uint64_t TargetOffset = Entry.first.second;
706     uint64_t Count = Entry.second;
707     assert(Count != 0 && "Unexpected zero weight branch");
708 
709     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
710     if (CalleeName.size() == 0)
711       continue;
712     // Record called target sample and its count.
713     const SampleContextFrameVector &FrameVec =
714         Binary->getFrameLocationStack(SourceOffset);
715     if (!FrameVec.empty()) {
716       FunctionSamples &FunctionProfile =
717           getLeafProfileAndAddTotalSamples(FrameVec, 0);
718       FunctionProfile.addCalledTargetSamples(
719           FrameVec.back().Location.LineOffset,
720           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
721           CalleeName, Count);
722     }
723     // Add head samples for callee.
724     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
725     CalleeProfile.addHeadSamples(Count);
726   }
727 }
728 
calculateAndShowDensity(const SampleProfileMap & Profiles)729 void ProfileGeneratorBase::calculateAndShowDensity(
730     const SampleProfileMap &Profiles) {
731   double Density = calculateDensity(Profiles, HotCountThreshold);
732   showDensitySuggestion(Density);
733 }
734 
735 FunctionSamples *
getOrCreateFunctionSamples(ContextTrieNode * ContextNode,bool WasLeafInlined)736 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
737                                                bool WasLeafInlined) {
738   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
739   if (!FProfile) {
740     FSamplesList.emplace_back();
741     FProfile = &FSamplesList.back();
742     FProfile->setName(ContextNode->getFuncName());
743     ContextNode->setFunctionSamples(FProfile);
744   }
745   // Update ContextWasInlined attribute for existing contexts.
746   // The current function can be called in two ways:
747   //  - when processing a probe of the current frame
748   //  - when processing the entry probe of an inlinee's frame, which
749   //    is then used to update the callsite count of the current frame.
750   // The two can happen in any order, hence here we are making sure
751   // `ContextWasInlined` is always set as expected.
752   // TODO: Note that the former does not always happen if no probes of the
753   // current frame has samples, and if the latter happens, we could lose the
754   // attribute. This should be fixed.
755   if (WasLeafInlined)
756     FProfile->getContext().setAttribute(ContextWasInlined);
757   return FProfile;
758 }
759 
760 ContextTrieNode *
getOrCreateContextNode(const SampleContextFrames Context,bool WasLeafInlined)761 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
762                                            bool WasLeafInlined) {
763   ContextTrieNode *ContextNode =
764       ContextTracker.getOrCreateContextPath(Context, true);
765   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
766   return ContextNode;
767 }
768 
generateProfile()769 void CSProfileGenerator::generateProfile() {
770   FunctionSamples::ProfileIsCS = true;
771 
772   collectProfiledFunctions();
773 
774   if (Binary->usePseudoProbes())
775     Binary->decodePseudoProbe();
776 
777   if (SampleCounters) {
778     if (Binary->usePseudoProbes()) {
779       generateProbeBasedProfile();
780     } else {
781       generateLineNumBasedProfile();
782     }
783   }
784 
785   if (Binary->getTrackFuncContextSize())
786     computeSizeForProfiledFunctions();
787 
788   postProcessProfiles();
789 }
790 
computeSizeForProfiledFunctions()791 void CSProfileGenerator::computeSizeForProfiledFunctions() {
792   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
793   for (auto *Func : Binary->getProfiledFunctions())
794     Binary->computeInlinedContextSizeForFunc(Func);
795 
796   // Flush the symbolizer to save memory.
797   Binary->flushSymbolizer();
798 }
799 
updateFunctionSamples()800 void CSProfileGenerator::updateFunctionSamples() {
801   for (auto *Node : ContextTracker) {
802     FunctionSamples *FSamples = Node->getFunctionSamples();
803     if (FSamples) {
804       if (UpdateTotalSamples)
805         FSamples->updateTotalSamples();
806       FSamples->updateCallsiteSamples();
807     }
808   }
809 }
810 
generateLineNumBasedProfile()811 void CSProfileGenerator::generateLineNumBasedProfile() {
812   for (const auto &CI : *SampleCounters) {
813     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
814 
815     ContextTrieNode *ContextNode = &getRootContext();
816     // Sample context will be empty if the jump is an external-to-internal call
817     // pattern, the head samples should be added for the internal function.
818     if (!CtxKey->Context.empty()) {
819       // Get or create function profile for the range
820       ContextNode =
821           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
822       // Fill in function body samples
823       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
824                                      CI.second.RangeCounter);
825     }
826     // Fill in boundary sample counts as well as call site samples for calls
827     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
828   }
829   // Fill in call site value sample for inlined calls and also use context to
830   // infer missing samples. Since we don't have call count for inlined
831   // functions, we estimate it from inlinee's profile using the entry of the
832   // body sample.
833   populateInferredFunctionSamples(getRootContext());
834 
835   updateFunctionSamples();
836 }
837 
populateBodySamplesForFunction(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter)838 void CSProfileGenerator::populateBodySamplesForFunction(
839     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
840   // Compute disjoint ranges first, so we can use MAX
841   // for calculating count for each location.
842   RangeSample Ranges;
843   findDisjointRanges(Ranges, RangeCounter);
844   for (const auto &Range : Ranges) {
845     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
846     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
847     uint64_t Count = Range.second;
848     // Disjoint ranges have introduce zero-filled gap that
849     // doesn't belong to current context, filter them out.
850     if (Count == 0)
851       continue;
852 
853     InstructionPointer IP(Binary, RangeBegin, true);
854     // Disjoint ranges may have range in the middle of two instr,
855     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
856     // can be Addr1+1 to Addr2-1. We should ignore such range.
857     if (IP.Address > RangeEnd)
858       continue;
859 
860     do {
861       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
862       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
863       if (LeafLoc) {
864         // Recording body sample for this specific context
865         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
866         FunctionProfile.addTotalSamples(Count);
867       }
868     } while (IP.advance() && IP.Address <= RangeEnd);
869   }
870 }
871 
populateBoundarySamplesForFunction(ContextTrieNode * Node,const BranchSample & BranchCounters)872 void CSProfileGenerator::populateBoundarySamplesForFunction(
873     ContextTrieNode *Node, const BranchSample &BranchCounters) {
874 
875   for (const auto &Entry : BranchCounters) {
876     uint64_t SourceOffset = Entry.first.first;
877     uint64_t TargetOffset = Entry.first.second;
878     uint64_t Count = Entry.second;
879     assert(Count != 0 && "Unexpected zero weight branch");
880 
881     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
882     if (CalleeName.size() == 0)
883       continue;
884 
885     ContextTrieNode *CallerNode = Node;
886     LineLocation CalleeCallSite(0, 0);
887     if (CallerNode != &getRootContext()) {
888       // Record called target sample and its count
889       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
890       if (LeafLoc) {
891         CallerNode->getFunctionSamples()->addCalledTargetSamples(
892             LeafLoc->Location.LineOffset,
893             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
894             Count);
895         // Record head sample for called target(callee)
896         CalleeCallSite = LeafLoc->Location;
897       }
898     }
899 
900     ContextTrieNode *CalleeNode =
901         CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
902     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
903     CalleeProfile->addHeadSamples(Count);
904   }
905 }
906 
populateInferredFunctionSamples(ContextTrieNode & Node)907 void CSProfileGenerator::populateInferredFunctionSamples(
908     ContextTrieNode &Node) {
909   // There is no call jmp sample between the inliner and inlinee, we need to use
910   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
911   // sample depends on child(inlinee)'s sample, so traverse the tree in
912   // post-order.
913   for (auto &It : Node.getAllChildContext())
914     populateInferredFunctionSamples(It.second);
915 
916   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
917   if (!CalleeProfile)
918     return;
919   // If we already have head sample counts, we must have value profile
920   // for call sites added already. Skip to avoid double counting.
921   if (CalleeProfile->getHeadSamples())
922     return;
923   ContextTrieNode *CallerNode = Node.getParentContext();
924   // If we don't have context, nothing to do for caller's call site.
925   // This could happen for entry point function.
926   if (CallerNode == &getRootContext())
927     return;
928 
929   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
930   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
931   // Since we don't have call count for inlined functions, we
932   // estimate it from inlinee's profile using entry body sample.
933   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
934   // If we don't have samples with location, use 1 to indicate live.
935   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
936     EstimatedCallCount = 1;
937   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
938                                        CallerLeafFrameLoc.Discriminator,
939                                        Node.getFuncName(), EstimatedCallCount);
940   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
941                                CallerLeafFrameLoc.Discriminator,
942                                EstimatedCallCount);
943   CallerProfile.addTotalSamples(EstimatedCallCount);
944 }
945 
convertToProfileMap(ContextTrieNode & Node,SampleContextFrameVector & Context)946 void CSProfileGenerator::convertToProfileMap(
947     ContextTrieNode &Node, SampleContextFrameVector &Context) {
948   FunctionSamples *FProfile = Node.getFunctionSamples();
949   if (FProfile) {
950     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
951     // Save the new context for future references.
952     SampleContextFrames NewContext = *Contexts.insert(Context).first;
953     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
954     FunctionSamples &NewProfile = Ret.first->second;
955     NewProfile.getContext().setContext(NewContext);
956     Context.pop_back();
957   }
958 
959   for (auto &It : Node.getAllChildContext()) {
960     ContextTrieNode &ChildNode = It.second;
961     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
962     convertToProfileMap(ChildNode, Context);
963     Context.pop_back();
964   }
965 }
966 
convertToProfileMap()967 void CSProfileGenerator::convertToProfileMap() {
968   assert(ProfileMap.empty() &&
969          "ProfileMap should be empty before converting from the trie");
970   assert(IsProfileValidOnTrie &&
971          "Do not convert the trie twice, it's already destroyed");
972 
973   SampleContextFrameVector Context;
974   for (auto &It : getRootContext().getAllChildContext())
975     convertToProfileMap(It.second, Context);
976 
977   IsProfileValidOnTrie = false;
978 }
979 
postProcessProfiles()980 void CSProfileGenerator::postProcessProfiles() {
981   // Compute hot/cold threshold based on profile. This will be used for cold
982   // context profile merging/trimming.
983   computeSummaryAndThreshold();
984 
985   // Run global pre-inliner to adjust/merge context profile based on estimated
986   // inline decisions.
987   if (EnableCSPreInliner) {
988     ContextTracker.populateFuncToCtxtMap();
989     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
990     // Turn off the profile merger by default unless it is explicitly enabled.
991     if (!CSProfMergeColdContext.getNumOccurrences())
992       CSProfMergeColdContext = false;
993   }
994 
995   convertToProfileMap();
996 
997   // Trim and merge cold context profile using cold threshold above.
998   if (TrimColdProfile || CSProfMergeColdContext) {
999     SampleContextTrimmer(ProfileMap)
1000         .trimAndMergeColdContextProfiles(
1001             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1002             CSProfMaxColdContextDepth, EnableCSPreInliner);
1003   }
1004 
1005   // Merge function samples of CS profile to calculate profile density.
1006   sampleprof::SampleProfileMap ContextLessProfiles;
1007   for (const auto &I : ProfileMap) {
1008     ContextLessProfiles[I.second.getName()].merge(I.second);
1009   }
1010 
1011   calculateAndShowDensity(ContextLessProfiles);
1012   if (GenCSNestedProfile) {
1013     CSProfileConverter CSConverter(ProfileMap);
1014     CSConverter.convertProfiles();
1015     FunctionSamples::ProfileIsCS = false;
1016   }
1017 }
1018 
computeSummaryAndThreshold(SampleProfileMap & Profiles)1019 void ProfileGeneratorBase::computeSummaryAndThreshold(
1020     SampleProfileMap &Profiles) {
1021   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1022   Summary = Builder.computeSummaryForProfiles(Profiles);
1023   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1024       (Summary->getDetailedSummary()));
1025   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1026       (Summary->getDetailedSummary()));
1027 }
1028 
computeSummaryAndThreshold()1029 void CSProfileGenerator::computeSummaryAndThreshold() {
1030   // Always merge and use context-less profile map to compute summary.
1031   SampleProfileMap ContextLessProfiles;
1032   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1033 
1034   // Set the flag below to avoid merging the profile again in
1035   // computeSummaryAndThreshold
1036   FunctionSamples::ProfileIsCS = false;
1037   assert(
1038       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1039       "Don't set --profile-summary-contextless to false for profile "
1040       "generation");
1041   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1042   // Recover the old value.
1043   FunctionSamples::ProfileIsCS = true;
1044 }
1045 
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,bool FindDisjointRanges)1046 void ProfileGeneratorBase::extractProbesFromRange(
1047     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1048     bool FindDisjointRanges) {
1049   const RangeSample *PRanges = &RangeCounter;
1050   RangeSample Ranges;
1051   if (FindDisjointRanges) {
1052     findDisjointRanges(Ranges, RangeCounter);
1053     PRanges = &Ranges;
1054   }
1055 
1056   for (const auto &Range : *PRanges) {
1057     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
1058     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
1059     uint64_t Count = Range.second;
1060 
1061     InstructionPointer IP(Binary, RangeBegin, true);
1062     // Disjoint ranges may have range in the middle of two instr,
1063     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1064     // can be Addr1+1 to Addr2-1. We should ignore such range.
1065     if (IP.Address > RangeEnd)
1066       continue;
1067 
1068     do {
1069       const AddressProbesMap &Address2ProbesMap =
1070           Binary->getAddress2ProbesMap();
1071       auto It = Address2ProbesMap.find(IP.Address);
1072       if (It != Address2ProbesMap.end()) {
1073         for (const auto &Probe : It->second) {
1074           ProbeCounter[&Probe] += Count;
1075         }
1076       }
1077     } while (IP.advance() && IP.Address <= RangeEnd);
1078   }
1079 }
1080 
1081 static void
extractPrefixContextStack(SampleContextFrameVector & ContextStack,const SmallVectorImpl<uint64_t> & Addresses,ProfiledBinary * Binary)1082 extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1083                           const SmallVectorImpl<uint64_t> &Addresses,
1084                           ProfiledBinary *Binary) {
1085   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1086   for (auto Addr : reverse(Addresses)) {
1087     const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
1088     // These could be the cases when a probe is not found at a calliste. Cutting
1089     // off the context from here since the inliner will not know how to consume
1090     // a context with unknown callsites.
1091     // 1. for functions that are not sampled when
1092     // --decode-probe-for-profiled-functions-only is on.
1093     // 2. for a merged callsite. Callsite merging may cause the loss of original
1094     // probe IDs.
1095     // 3. for an external callsite.
1096     if (!CallProbe)
1097       break;
1098     Probes.push_back(CallProbe);
1099   }
1100 
1101   std::reverse(Probes.begin(), Probes.end());
1102 
1103   // Extract context stack for reusing, leaf context stack will be added
1104   // compressed while looking up function profile.
1105   for (const auto *P : Probes) {
1106     Binary->getInlineContextForProbe(P, ContextStack, true);
1107   }
1108 }
1109 
generateProbeBasedProfile()1110 void CSProfileGenerator::generateProbeBasedProfile() {
1111   // Enable pseudo probe functionalities in SampleProf
1112   FunctionSamples::ProfileIsProbeBased = true;
1113   for (const auto &CI : *SampleCounters) {
1114     const AddrBasedCtxKey *CtxKey =
1115         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1116     SampleContextFrameVector ContextStack;
1117     extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
1118     // Fill in function body samples from probes, also infer caller's samples
1119     // from callee's probe
1120     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1121     // Fill in boundary samples for a call probe
1122     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1123   }
1124 }
1125 
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SampleContextFrames ContextStack)1126 void CSProfileGenerator::populateBodySamplesWithProbes(
1127     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1128   ProbeCounterMap ProbeCounter;
1129   // Extract the top frame probes by looking up each address among the range in
1130   // the Address2ProbeMap
1131   extractProbesFromRange(RangeCounter, ProbeCounter);
1132   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1133                      std::unordered_set<FunctionSamples *>>
1134       FrameSamples;
1135   for (const auto &PI : ProbeCounter) {
1136     const MCDecodedPseudoProbe *Probe = PI.first;
1137     uint64_t Count = PI.second;
1138     // Disjoint ranges have introduce zero-filled gap that
1139     // doesn't belong to current context, filter them out.
1140     if (!Probe->isBlock() || Count == 0)
1141       continue;
1142 
1143     ContextTrieNode *ContextNode =
1144         getContextNodeForLeafProbe(ContextStack, Probe);
1145     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1146     // Record the current frame and FunctionProfile whenever samples are
1147     // collected for non-danglie probes. This is for reporting all of the
1148     // zero count probes of the frame later.
1149     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1150     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1151     FunctionProfile.addTotalSamples(Count);
1152     if (Probe->isEntry()) {
1153       FunctionProfile.addHeadSamples(Count);
1154       // Look up for the caller's function profile
1155       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1156       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1157       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1158         // Since the context id will be compressed, we have to use callee's
1159         // context id to infer caller's context id to ensure they share the
1160         // same context prefix.
1161         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1162         assert(CallerIndex &&
1163                "Inferred caller's location index shouldn't be zero!");
1164         FunctionSamples &CallerProfile =
1165             *getOrCreateFunctionSamples(CallerNode);
1166         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1167         CallerProfile.addBodySamples(CallerIndex, 0, Count);
1168         CallerProfile.addTotalSamples(Count);
1169         CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1170                                              ContextNode->getFuncName(), Count);
1171       }
1172     }
1173   }
1174 
1175   // Assign zero count for remaining probes without sample hits to
1176   // differentiate from probes optimized away, of which the counts are unknown
1177   // and will be inferred by the compiler.
1178   for (auto &I : FrameSamples) {
1179     for (auto *FunctionProfile : I.second) {
1180       for (auto *Probe : I.first->getProbes()) {
1181         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1182       }
1183     }
1184   }
1185 }
1186 
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SampleContextFrames ContextStack)1187 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1188     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1189   for (const auto &BI : BranchCounter) {
1190     uint64_t SourceOffset = BI.first.first;
1191     uint64_t TargetOffset = BI.first.second;
1192     uint64_t Count = BI.second;
1193     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
1194     const MCDecodedPseudoProbe *CallProbe =
1195         Binary->getCallProbeForAddr(SourceAddress);
1196     if (CallProbe == nullptr)
1197       continue;
1198     FunctionSamples &FunctionProfile =
1199         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1200     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1201     FunctionProfile.addTotalSamples(Count);
1202     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
1203     if (CalleeName.size() == 0)
1204       continue;
1205     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1206                                            Count);
1207   }
1208 }
1209 
getContextNodeForLeafProbe(SampleContextFrames ContextStack,const MCDecodedPseudoProbe * LeafProbe)1210 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1211     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1212 
1213   // Explicitly copy the context for appending the leaf context
1214   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1215                                            ContextStack.end());
1216   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1217   // For leaf inlined context with the top frame, we should strip off the top
1218   // frame's probe id, like:
1219   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1220   auto LeafFrame = NewContextStack.back();
1221   LeafFrame.Location = LineLocation(0, 0);
1222   NewContextStack.pop_back();
1223   // Compress the context string except for the leaf frame
1224   CSProfileGenerator::compressRecursionContext(NewContextStack);
1225   CSProfileGenerator::trimContext(NewContextStack);
1226   NewContextStack.push_back(LeafFrame);
1227 
1228   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1229   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1230   ContextTrieNode *ContextNode =
1231       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1232   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1233   return ContextNode;
1234 }
1235 
getFunctionProfileForLeafProbe(SampleContextFrames ContextStack,const MCDecodedPseudoProbe * LeafProbe)1236 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1237     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1238   return *getContextNodeForLeafProbe(ContextStack, LeafProbe)
1239               ->getFunctionSamples();
1240 }
1241 
1242 } // end namespace sampleprof
1243 } // end namespace llvm
1244