1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ProfileData/ProfileCommon.h"
12 #include <float.h>
13 #include <unordered_set>
14 
15 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
16                                     cl::Required,
17                                     cl::desc("Output profile file"));
18 static cl::alias OutputA("o", cl::desc("Alias for --output"),
19                          cl::aliasopt(OutputFilename));
20 
21 static cl::opt<SampleProfileFormat> OutputFormat(
22     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
23     cl::values(
24         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
25         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
26         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
27         clEnumValN(SPF_Text, "text", "Text encoding"),
28         clEnumValN(SPF_GCC, "gcc",
29                    "GCC encoding (only meaningful for -sample)")));
30 
31 cl::opt<bool> UseMD5(
32     "use-md5", cl::init(false), cl::Hidden,
33     cl::desc("Use md5 to represent function names in the output profile (only "
34              "meaningful for -extbinary)"));
35 
36 static cl::opt<bool> PopulateProfileSymbolList(
37     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
38     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
39 
40 static cl::opt<bool> FillZeroForAllFuncs(
41     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
42     cl::desc("Attribute all functions' range with zero count "
43              "even it's not hit by any samples."));
44 
45 static cl::opt<int32_t, true> RecursionCompression(
46     "compress-recursion",
47     cl::desc("Compressing recursion by deduplicating adjacent frame "
48              "sequences up to the specified size. -1 means no size limit."),
49     cl::Hidden,
50     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
51 
52 static cl::opt<bool> CSProfMergeColdContext(
53     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
54     cl::desc("If the total count of context profile is smaller than "
55              "the threshold, it will be merged into context-less base "
56              "profile."));
57 
58 static cl::opt<bool> CSProfTrimColdContext(
59     "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
60     cl::desc("If the total count of the profile after all merge is done "
61              "is still smaller than threshold, it will be trimmed."));
62 
63 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
64     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
65     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
66              "context-less base profile"));
67 
68 static cl::opt<int, true> CSProfMaxContextDepth(
69     "csprof-max-context-depth", cl::ZeroOrMore,
70     cl::desc("Keep the last K contexts while merging profile. -1 means no "
71              "depth limit."),
72     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
73 
74 static cl::opt<double> HotFunctionDensityThreshold(
75     "hot-function-density-threshold", llvm::cl::init(1000),
76     llvm::cl::desc(
77         "specify density threshold for hot functions (default: 1000)"),
78     llvm::cl::Optional);
79 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
80                                  llvm::cl::desc("show profile density details"),
81                                  llvm::cl::Optional);
82 
83 extern cl::opt<int> ProfileSummaryCutoffHot;
84 
85 using namespace llvm;
86 using namespace sampleprof;
87 
88 namespace llvm {
89 namespace sampleprof {
90 
91 // Initialize the MaxCompressionSize to -1 which means no size limit
92 int32_t CSProfileGenerator::MaxCompressionSize = -1;
93 
94 int CSProfileGenerator::MaxContextDepth = -1;
95 
96 std::unique_ptr<ProfileGeneratorBase>
97 ProfileGeneratorBase::create(ProfiledBinary *Binary,
98                              const ContextSampleCounterMap &SampleCounters,
99                              bool ProfileIsCS) {
100   std::unique_ptr<ProfileGeneratorBase> Generator;
101   if (ProfileIsCS) {
102     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
103   } else {
104     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
105   }
106 
107   return Generator;
108 }
109 
110 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
111                                  SampleProfileMap &ProfileMap) {
112   // Populate profile symbol list if extended binary format is used.
113   ProfileSymbolList SymbolList;
114 
115   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
116     Binary->populateSymbolListFromDWARF(SymbolList);
117     Writer->setProfileSymbolList(&SymbolList);
118   }
119 
120   if (std::error_code EC = Writer->write(ProfileMap))
121     exitWithError(std::move(EC));
122 }
123 
124 void ProfileGeneratorBase::write() {
125   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
126   if (std::error_code EC = WriterOrErr.getError())
127     exitWithError(EC, OutputFilename);
128 
129   if (UseMD5) {
130     if (OutputFormat != SPF_Ext_Binary)
131       WithColor::warning() << "-use-md5 is ignored. Specify "
132                               "--format=extbinary to enable it\n";
133     else
134       WriterOrErr.get()->setUseMD5();
135   }
136 
137   write(std::move(WriterOrErr.get()), ProfileMap);
138 }
139 
140 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
141   if (Density == 0.0)
142     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
143                             "set too low. Please check your command.\n";
144   else if (Density < HotFunctionDensityThreshold)
145     WithColor::warning()
146         << "AutoFDO is estimated to optimize better with "
147         << format("%.1f", HotFunctionDensityThreshold / Density)
148         << "x more samples. Please consider increasing sampling rate or "
149            "profiling for longer duration to get more samples.\n";
150 
151   if (ShowDensity)
152     outs() << "Minimum profile density for hot functions with top "
153            << format("%.2f",
154                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
155                          10000)
156            << "% total samples: " << format("%.1f", Density) << "\n";
157 }
158 
159 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
160                                               uint64_t HotCntThreshold) {
161   double Density = DBL_MAX;
162   std::vector<const FunctionSamples *> HotFuncs;
163   for (auto &I : Profiles) {
164     auto &FuncSamples = I.second;
165     if (FuncSamples.getTotalSamples() < HotCntThreshold)
166       continue;
167     HotFuncs.emplace_back(&FuncSamples);
168   }
169 
170   for (auto *FuncSamples : HotFuncs) {
171     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
172     if (!Func)
173       continue;
174     uint64_t FuncSize = Func->getFuncSize();
175     if (FuncSize == 0)
176       continue;
177     Density =
178         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
179                               FuncSize);
180   }
181 
182   return Density == DBL_MAX ? 0.0 : Density;
183 }
184 
185 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
186                                               const RangeSample &Ranges) {
187 
188   /*
189   Regions may overlap with each other. Using the boundary info, find all
190   disjoint ranges and their sample count. BoundaryPoint contains the count
191   multiple samples begin/end at this points.
192 
193   |<--100-->|           Sample1
194   |<------200------>|   Sample2
195   A         B       C
196 
197   In the example above,
198   Sample1 begins at A, ends at B, its value is 100.
199   Sample2 beings at A, ends at C, its value is 200.
200   For A, BeginCount is the sum of sample begins at A, which is 300 and no
201   samples ends at A, so EndCount is 0.
202   Then boundary points A, B, and C with begin/end counts are:
203   A: (300, 0)
204   B: (0, 100)
205   C: (0, 200)
206   */
207   struct BoundaryPoint {
208     // Sum of sample counts beginning at this point
209     uint64_t BeginCount = UINT64_MAX;
210     // Sum of sample counts ending at this point
211     uint64_t EndCount = UINT64_MAX;
212     // Is the begin point of a zero range.
213     bool IsZeroRangeBegin = false;
214     // Is the end point of a zero range.
215     bool IsZeroRangeEnd = false;
216 
217     void addBeginCount(uint64_t Count) {
218       if (BeginCount == UINT64_MAX)
219         BeginCount = 0;
220       BeginCount += Count;
221     }
222 
223     void addEndCount(uint64_t Count) {
224       if (EndCount == UINT64_MAX)
225         EndCount = 0;
226       EndCount += Count;
227     }
228   };
229 
230   /*
231   For the above example. With boundary points, follwing logic finds two
232   disjoint region of
233 
234   [A,B]:   300
235   [B+1,C]: 200
236 
237   If there is a boundary point that both begin and end, the point itself
238   becomes a separate disjoint region. For example, if we have original
239   ranges of
240 
241   |<--- 100 --->|
242                 |<--- 200 --->|
243   A             B             C
244 
245   there are three boundary points with their begin/end counts of
246 
247   A: (100, 0)
248   B: (200, 100)
249   C: (0, 200)
250 
251   the disjoint ranges would be
252 
253   [A, B-1]: 100
254   [B, B]:   300
255   [B+1, C]: 200.
256 
257   Example for zero value range:
258 
259     |<--- 100 --->|
260                        |<--- 200 --->|
261   |<---------------  0 ----------------->|
262   A  B            C    D             E   F
263 
264   [A, B-1]  : 0
265   [B, C]    : 100
266   [C+1, D-1]: 0
267   [D, E]    : 200
268   [E+1, F]  : 0
269   */
270   std::map<uint64_t, BoundaryPoint> Boundaries;
271 
272   for (auto Item : Ranges) {
273     assert(Item.first.first <= Item.first.second &&
274            "Invalid instruction range");
275     auto &BeginPoint = Boundaries[Item.first.first];
276     auto &EndPoint = Boundaries[Item.first.second];
277     uint64_t Count = Item.second;
278 
279     BeginPoint.addBeginCount(Count);
280     EndPoint.addEndCount(Count);
281     if (Count == 0) {
282       BeginPoint.IsZeroRangeBegin = true;
283       EndPoint.IsZeroRangeEnd = true;
284     }
285   }
286 
287   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
288   // and the next valid address
289   uint64_t BeginAddress = UINT64_MAX;
290   int ZeroRangeDepth = 0;
291   uint64_t Count = 0;
292   for (auto Item : Boundaries) {
293     uint64_t Address = Item.first;
294     BoundaryPoint &Point = Item.second;
295     if (Point.BeginCount != UINT64_MAX) {
296       if (BeginAddress != UINT64_MAX)
297         DisjointRanges[{BeginAddress, Address - 1}] = Count;
298       Count += Point.BeginCount;
299       BeginAddress = Address;
300       ZeroRangeDepth += Point.IsZeroRangeBegin;
301     }
302     if (Point.EndCount != UINT64_MAX) {
303       assert((BeginAddress != UINT64_MAX) &&
304              "First boundary point cannot be 'end' point");
305       DisjointRanges[{BeginAddress, Address}] = Count;
306       assert(Count >= Point.EndCount && "Mismatched live ranges");
307       Count -= Point.EndCount;
308       BeginAddress = Address + 1;
309       ZeroRangeDepth -= Point.IsZeroRangeEnd;
310       // If the remaining count is zero and it's no longer in a zero range, this
311       // means we consume all the ranges before, thus mark BeginAddress as
312       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
313       //  [<---- 10 ---->]
314       //                       [<---- 20 ---->]
315       //   A             B     C              D
316       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
317       // have the [B+1, C-1] zero range.
318       if (Count == 0 && ZeroRangeDepth == 0)
319         BeginAddress = UINT64_MAX;
320     }
321   }
322 }
323 
324 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
325     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
326     uint64_t Count) {
327   // Use the maximum count of samples with same line location
328   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
329 
330   // Use duplication factor to compensated for loop unroll/vectorization.
331   // Note that this is only needed when we're taking MAX of the counts at
332   // the location instead of SUM.
333   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
334 
335   ErrorOr<uint64_t> R =
336       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
337 
338   uint64_t PreviousCount = R ? R.get() : 0;
339   if (PreviousCount <= Count) {
340     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
341                                    Count - PreviousCount);
342   }
343 }
344 
345 void ProfileGeneratorBase::updateTotalSamples() {
346   for (auto &Item : ProfileMap) {
347     FunctionSamples &FunctionProfile = Item.second;
348     FunctionProfile.updateTotalSamples();
349   }
350 }
351 
352 FunctionSamples &
353 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
354   SampleContext Context(FuncName);
355   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
356   if (Ret.second) {
357     FunctionSamples &FProfile = Ret.first->second;
358     FProfile.setContext(Context);
359   }
360   return Ret.first->second;
361 }
362 
363 void ProfileGenerator::generateProfile() {
364   if (Binary->usePseudoProbes()) {
365     // TODO: Support probe based profile generation
366   } else {
367     generateLineNumBasedProfile();
368   }
369   postProcessProfiles();
370 }
371 
372 void ProfileGenerator::postProcessProfiles() {
373   computeSummaryAndThreshold();
374   calculateAndShowDensity(ProfileMap);
375 }
376 
377 void ProfileGenerator::generateLineNumBasedProfile() {
378   assert(SampleCounters.size() == 1 &&
379          "Must have one entry for profile generation.");
380   const SampleCounter &SC = SampleCounters.begin()->second;
381   // Fill in function body samples
382   populateBodySamplesForAllFunctions(SC.RangeCounter);
383   // Fill in boundary sample counts as well as call site samples for calls
384   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
385 
386   updateTotalSamples();
387 }
388 
389 FunctionSamples &ProfileGenerator::getLeafFrameProfile(
390     const SampleContextFrameVector &FrameVec) {
391   // Get top level profile
392   FunctionSamples *FunctionProfile =
393       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
394 
395   for (size_t I = 1; I < FrameVec.size(); I++) {
396     LineLocation Callsite(
397         FrameVec[I - 1].Location.LineOffset,
398         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
399     FunctionSamplesMap &SamplesMap =
400         FunctionProfile->functionSamplesAt(Callsite);
401     auto Ret =
402         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
403     if (Ret.second) {
404       SampleContext Context(FrameVec[I].FuncName);
405       Ret.first->second.setContext(Context);
406     }
407     FunctionProfile = &Ret.first->second;
408   }
409 
410   return *FunctionProfile;
411 }
412 
413 RangeSample
414 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
415   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
416   if (FillZeroForAllFuncs) {
417     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
418       for (auto &R : FuncI.second.Ranges) {
419         Ranges[{R.first, R.second - 1}] += 0;
420       }
421     }
422   } else {
423     // For each range, we search for all ranges of the function it belongs to
424     // and initialize it with zero count, so it remains zero if doesn't hit any
425     // samples. This is to be consistent with compiler that interpret zero count
426     // as unexecuted(cold).
427     for (auto I : RangeCounter) {
428       uint64_t StartOffset = I.first.first;
429       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
430         Ranges[{Range.first, Range.second - 1}] += 0;
431     }
432   }
433   RangeSample DisjointRanges;
434   findDisjointRanges(DisjointRanges, Ranges);
435   return DisjointRanges;
436 }
437 
438 void ProfileGenerator::populateBodySamplesForAllFunctions(
439     const RangeSample &RangeCounter) {
440   for (auto Range : preprocessRangeCounter(RangeCounter)) {
441     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
442     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
443     uint64_t Count = Range.second;
444 
445     InstructionPointer IP(Binary, RangeBegin, true);
446     // Disjoint ranges may have range in the middle of two instr,
447     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
448     // can be Addr1+1 to Addr2-1. We should ignore such range.
449     if (IP.Address > RangeEnd)
450       continue;
451 
452     do {
453       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
454       const SampleContextFrameVector &FrameVec =
455           Binary->getFrameLocationStack(Offset);
456       if (!FrameVec.empty()) {
457         FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec);
458         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
459                                             Count);
460       }
461     } while (IP.advance() && IP.Address <= RangeEnd);
462   }
463 }
464 
465 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
466   // Get the function range by branch target if it's a call branch.
467   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
468 
469   // We won't accumulate sample count for a range whose start is not the real
470   // function entry such as outlined function or inner labels.
471   if (!FRange || !FRange->IsFuncEntry)
472     return StringRef();
473 
474   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
475 }
476 
477 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
478     const BranchSample &BranchCounters) {
479   for (auto Entry : BranchCounters) {
480     uint64_t SourceOffset = Entry.first.first;
481     uint64_t TargetOffset = Entry.first.second;
482     uint64_t Count = Entry.second;
483     assert(Count != 0 && "Unexpected zero weight branch");
484 
485     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
486     if (CalleeName.size() == 0)
487       continue;
488     // Record called target sample and its count.
489     const SampleContextFrameVector &FrameVec =
490         Binary->getFrameLocationStack(SourceOffset);
491     if (!FrameVec.empty()) {
492       FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec);
493       FunctionProfile.addCalledTargetSamples(
494           FrameVec.back().Location.LineOffset,
495           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
496           CalleeName, Count);
497     }
498     // Add head samples for callee.
499     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
500     CalleeProfile.addHeadSamples(Count);
501   }
502 }
503 
504 void ProfileGeneratorBase::calculateAndShowDensity(
505     const SampleProfileMap &Profiles) {
506   double Density = calculateDensity(Profiles, HotCountThreshold);
507   showDensitySuggestion(Density);
508 }
509 
510 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
511     const SampleContextFrameVector &Context, bool WasLeafInlined) {
512   auto I = ProfileMap.find(SampleContext(Context));
513   if (I == ProfileMap.end()) {
514     // Save the new context for future references.
515     SampleContextFrames NewContext = *Contexts.insert(Context).first;
516     SampleContext FContext(NewContext, RawContext);
517     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
518     if (WasLeafInlined)
519       FContext.setAttribute(ContextWasInlined);
520     FunctionSamples &FProfile = Ret.first->second;
521     FProfile.setContext(FContext);
522     return Ret.first->second;
523   }
524   return I->second;
525 }
526 
527 void CSProfileGenerator::generateProfile() {
528   FunctionSamples::ProfileIsCS = true;
529 
530   if (Binary->getTrackFuncContextSize())
531     computeSizeForProfiledFunctions();
532 
533   if (Binary->usePseudoProbes()) {
534     // Enable pseudo probe functionalities in SampleProf
535     FunctionSamples::ProfileIsProbeBased = true;
536     generateProbeBasedProfile();
537   } else {
538     generateLineNumBasedProfile();
539   }
540   postProcessProfiles();
541 }
542 
543 void CSProfileGenerator::computeSizeForProfiledFunctions() {
544   // Hash map to deduplicate the function range and the item is a pair of
545   // function start and end offset.
546   std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
547   // Go through all the ranges in the CS counters, use the start of the range to
548   // look up the function it belongs and record the function range.
549   for (const auto &CI : SampleCounters) {
550     for (auto Item : CI.second.RangeCounter) {
551       // FIXME: Filter the bogus crossing function range.
552       uint64_t StartOffset = Item.first.first;
553       // Note that a function can be spilt into multiple ranges, so get all
554       // ranges of the function.
555       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
556         AggregatedRanges[Range.first] = Range.second;
557     }
558   }
559 
560   for (auto I : AggregatedRanges) {
561     uint64_t StartOffset = I.first;
562     uint64_t EndOffset = I.second;
563     Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
564   }
565 }
566 
567 void CSProfileGenerator::generateLineNumBasedProfile() {
568   for (const auto &CI : SampleCounters) {
569     const StringBasedCtxKey *CtxKey =
570         dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
571     // Get or create function profile for the range
572     FunctionSamples &FunctionProfile =
573         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
574 
575     // Fill in function body samples
576     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
577     // Fill in boundary sample counts as well as call site samples for calls
578     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
579                                        CI.second.BranchCounter);
580   }
581   // Fill in call site value sample for inlined calls and also use context to
582   // infer missing samples. Since we don't have call count for inlined
583   // functions, we estimate it from inlinee's profile using the entry of the
584   // body sample.
585   populateInferredFunctionSamples();
586 
587   updateTotalSamples();
588 }
589 
590 void CSProfileGenerator::populateBodySamplesForFunction(
591     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
592   // Compute disjoint ranges first, so we can use MAX
593   // for calculating count for each location.
594   RangeSample Ranges;
595   findDisjointRanges(Ranges, RangeCounter);
596   for (auto Range : Ranges) {
597     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
598     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
599     uint64_t Count = Range.second;
600     // Disjoint ranges have introduce zero-filled gap that
601     // doesn't belong to current context, filter them out.
602     if (Count == 0)
603       continue;
604 
605     InstructionPointer IP(Binary, RangeBegin, true);
606     // Disjoint ranges may have range in the middle of two instr,
607     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
608     // can be Addr1+1 to Addr2-1. We should ignore such range.
609     if (IP.Address > RangeEnd)
610       continue;
611 
612     do {
613       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
614       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
615       if (LeafLoc.hasValue()) {
616         // Recording body sample for this specific context
617         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
618       }
619     } while (IP.advance() && IP.Address <= RangeEnd);
620   }
621 }
622 
623 void CSProfileGenerator::populateBoundarySamplesForFunction(
624     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
625     const BranchSample &BranchCounters) {
626 
627   for (auto Entry : BranchCounters) {
628     uint64_t SourceOffset = Entry.first.first;
629     uint64_t TargetOffset = Entry.first.second;
630     uint64_t Count = Entry.second;
631     assert(Count != 0 && "Unexpected zero weight branch");
632 
633     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
634     if (CalleeName.size() == 0)
635       continue;
636 
637     // Record called target sample and its count
638     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
639     if (!LeafLoc.hasValue())
640       continue;
641     FunctionProfile.addCalledTargetSamples(
642         LeafLoc->Location.LineOffset,
643         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
644         Count);
645 
646     // Record head sample for called target(callee)
647     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
648     assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
649            "Leaf function name doesn't match");
650     CalleeCtx.back() = *LeafLoc;
651     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
652     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
653     CalleeProfile.addHeadSamples(Count);
654   }
655 }
656 
657 static SampleContextFrame
658 getCallerContext(SampleContextFrames CalleeContext,
659                  SampleContextFrameVector &CallerContext) {
660   assert(CalleeContext.size() > 1 && "Unexpected empty context");
661   CalleeContext = CalleeContext.drop_back();
662   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
663   SampleContextFrame CallerFrame = CallerContext.back();
664   CallerContext.back().Location = LineLocation(0, 0);
665   return CallerFrame;
666 }
667 
668 void CSProfileGenerator::populateInferredFunctionSamples() {
669   for (const auto &Item : ProfileMap) {
670     const auto &CalleeContext = Item.first;
671     const FunctionSamples &CalleeProfile = Item.second;
672 
673     // If we already have head sample counts, we must have value profile
674     // for call sites added already. Skip to avoid double counting.
675     if (CalleeProfile.getHeadSamples())
676       continue;
677     // If we don't have context, nothing to do for caller's call site.
678     // This could happen for entry point function.
679     if (CalleeContext.isBaseContext())
680       continue;
681 
682     // Infer Caller's frame loc and context ID through string splitting
683     SampleContextFrameVector CallerContextId;
684     SampleContextFrame &&CallerLeafFrameLoc =
685         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
686     SampleContextFrames CallerContext(CallerContextId);
687 
688     // It's possible that we haven't seen any sample directly in the caller,
689     // in which case CallerProfile will not exist. But we can't modify
690     // ProfileMap while iterating it.
691     // TODO: created function profile for those callers too
692     if (ProfileMap.find(CallerContext) == ProfileMap.end())
693       continue;
694     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
695 
696     // Since we don't have call count for inlined functions, we
697     // estimate it from inlinee's profile using entry body sample.
698     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
699     // If we don't have samples with location, use 1 to indicate live.
700     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
701       EstimatedCallCount = 1;
702     CallerProfile.addCalledTargetSamples(
703         CallerLeafFrameLoc.Location.LineOffset,
704         CallerLeafFrameLoc.Location.Discriminator,
705         CalleeProfile.getContext().getName(), EstimatedCallCount);
706     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
707                                  CallerLeafFrameLoc.Location.Discriminator,
708                                  EstimatedCallCount);
709   }
710 }
711 
712 void CSProfileGenerator::postProcessProfiles() {
713   // Compute hot/cold threshold based on profile. This will be used for cold
714   // context profile merging/trimming.
715   computeSummaryAndThreshold();
716 
717   // Run global pre-inliner to adjust/merge context profile based on estimated
718   // inline decisions.
719   if (EnableCSPreInliner) {
720     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
721         .run();
722     // Turn off the profile merger by default unless it is explicitly enabled.
723     if (!CSProfMergeColdContext.getNumOccurrences())
724       CSProfMergeColdContext = false;
725   }
726 
727   // Trim and merge cold context profile using cold threshold above.
728   if (CSProfTrimColdContext || CSProfMergeColdContext) {
729     SampleContextTrimmer(ProfileMap)
730         .trimAndMergeColdContextProfiles(
731             HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
732             CSProfMaxColdContextDepth, EnableCSPreInliner);
733   }
734 
735   // Merge function samples of CS profile to calculate profile density.
736   sampleprof::SampleProfileMap ContextLessProfiles;
737   for (const auto &I : ProfileMap) {
738     ContextLessProfiles[I.second.getName()].merge(I.second);
739   }
740 
741   calculateAndShowDensity(ContextLessProfiles);
742 }
743 
744 void ProfileGeneratorBase::computeSummaryAndThreshold() {
745   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
746   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
747   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
748       (Summary->getDetailedSummary()));
749   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
750       (Summary->getDetailedSummary()));
751 }
752 
753 // Helper function to extract context prefix string stack
754 // Extract context stack for reusing, leaf context stack will
755 // be added compressed while looking up function profile
756 static void extractPrefixContextStack(
757     SampleContextFrameVector &ContextStack,
758     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
759     ProfiledBinary *Binary) {
760   for (const auto *P : Probes) {
761     Binary->getInlineContextForProbe(P, ContextStack, true);
762   }
763 }
764 
765 void CSProfileGenerator::generateProbeBasedProfile() {
766   for (const auto &CI : SampleCounters) {
767     const ProbeBasedCtxKey *CtxKey =
768         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
769     SampleContextFrameVector ContextStack;
770     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
771     // Fill in function body samples from probes, also infer caller's samples
772     // from callee's probe
773     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
774     // Fill in boundary samples for a call probe
775     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
776   }
777 }
778 
779 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
780                                                 ProbeCounterMap &ProbeCounter) {
781   RangeSample Ranges;
782   findDisjointRanges(Ranges, RangeCounter);
783   for (const auto &Range : Ranges) {
784     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
785     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
786     uint64_t Count = Range.second;
787     // Disjoint ranges have introduce zero-filled gap that
788     // doesn't belong to current context, filter them out.
789     if (Count == 0)
790       continue;
791 
792     InstructionPointer IP(Binary, RangeBegin, true);
793     // Disjoint ranges may have range in the middle of two instr,
794     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
795     // can be Addr1+1 to Addr2-1. We should ignore such range.
796     if (IP.Address > RangeEnd)
797       continue;
798 
799     do {
800       const AddressProbesMap &Address2ProbesMap =
801           Binary->getAddress2ProbesMap();
802       auto It = Address2ProbesMap.find(IP.Address);
803       if (It != Address2ProbesMap.end()) {
804         for (const auto &Probe : It->second) {
805           if (!Probe.isBlock())
806             continue;
807           ProbeCounter[&Probe] += Count;
808         }
809       }
810     } while (IP.advance() && IP.Address <= RangeEnd);
811   }
812 }
813 
814 void CSProfileGenerator::populateBodySamplesWithProbes(
815     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
816   ProbeCounterMap ProbeCounter;
817   // Extract the top frame probes by looking up each address among the range in
818   // the Address2ProbeMap
819   extractProbesFromRange(RangeCounter, ProbeCounter);
820   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
821                      std::unordered_set<FunctionSamples *>>
822       FrameSamples;
823   for (auto PI : ProbeCounter) {
824     const MCDecodedPseudoProbe *Probe = PI.first;
825     uint64_t Count = PI.second;
826     FunctionSamples &FunctionProfile =
827         getFunctionProfileForLeafProbe(ContextStack, Probe);
828     // Record the current frame and FunctionProfile whenever samples are
829     // collected for non-danglie probes. This is for reporting all of the
830     // zero count probes of the frame later.
831     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
832     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
833     FunctionProfile.addTotalSamples(Count);
834     if (Probe->isEntry()) {
835       FunctionProfile.addHeadSamples(Count);
836       // Look up for the caller's function profile
837       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
838       if (InlinerDesc != nullptr) {
839         // Since the context id will be compressed, we have to use callee's
840         // context id to infer caller's context id to ensure they share the
841         // same context prefix.
842         SampleContextFrames CalleeContextId =
843             FunctionProfile.getContext().getContextFrames();
844         SampleContextFrameVector CallerContextId;
845         SampleContextFrame &&CallerLeafFrameLoc =
846             getCallerContext(CalleeContextId, CallerContextId);
847         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
848         assert(CallerIndex &&
849                "Inferred caller's location index shouldn't be zero!");
850         FunctionSamples &CallerProfile =
851             getFunctionProfileForContext(CallerContextId);
852         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
853         CallerProfile.addBodySamples(CallerIndex, 0, Count);
854         CallerProfile.addTotalSamples(Count);
855         CallerProfile.addCalledTargetSamples(
856             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
857       }
858     }
859   }
860 
861   // Assign zero count for remaining probes without sample hits to
862   // differentiate from probes optimized away, of which the counts are unknown
863   // and will be inferred by the compiler.
864   for (auto &I : FrameSamples) {
865     for (auto *FunctionProfile : I.second) {
866       for (auto *Probe : I.first->getProbes()) {
867         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
868       }
869     }
870   }
871 }
872 
873 void CSProfileGenerator::populateBoundarySamplesWithProbes(
874     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
875   for (auto BI : BranchCounter) {
876     uint64_t SourceOffset = BI.first.first;
877     uint64_t TargetOffset = BI.first.second;
878     uint64_t Count = BI.second;
879     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
880     const MCDecodedPseudoProbe *CallProbe =
881         Binary->getCallProbeForAddr(SourceAddress);
882     if (CallProbe == nullptr)
883       continue;
884     FunctionSamples &FunctionProfile =
885         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
886     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
887     FunctionProfile.addTotalSamples(Count);
888     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
889     if (CalleeName.size() == 0)
890       continue;
891     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
892                                            Count);
893   }
894 }
895 
896 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
897     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
898 
899   // Explicitly copy the context for appending the leaf context
900   SampleContextFrameVector NewContextStack(ContextStack.begin(),
901                                            ContextStack.end());
902   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
903   // For leaf inlined context with the top frame, we should strip off the top
904   // frame's probe id, like:
905   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
906   auto LeafFrame = NewContextStack.back();
907   LeafFrame.Location = LineLocation(0, 0);
908   NewContextStack.pop_back();
909   // Compress the context string except for the leaf frame
910   CSProfileGenerator::compressRecursionContext(NewContextStack);
911   CSProfileGenerator::trimContext(NewContextStack);
912   NewContextStack.push_back(LeafFrame);
913 
914   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
915   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
916   FunctionSamples &FunctionProile =
917       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
918   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
919   return FunctionProile;
920 }
921 
922 } // end namespace sampleprof
923 } // end namespace llvm
924