1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ProfileData/ProfileCommon.h"
12 #include <unordered_set>
13 
14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
15                                     cl::Required,
16                                     cl::desc("Output profile file"));
17 static cl::alias OutputA("o", cl::desc("Alias for --output"),
18                          cl::aliasopt(OutputFilename));
19 
20 static cl::opt<SampleProfileFormat> OutputFormat(
21     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
22     cl::values(
23         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
24         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
25         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
26         clEnumValN(SPF_Text, "text", "Text encoding"),
27         clEnumValN(SPF_GCC, "gcc",
28                    "GCC encoding (only meaningful for -sample)")));
29 
30 cl::opt<bool> UseMD5(
31     "use-md5", cl::init(false), cl::Hidden,
32     cl::desc("Use md5 to represent function names in the output profile (only "
33              "meaningful for -extbinary)"));
34 
35 static cl::opt<bool> PopulateProfileSymbolList(
36     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
37     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
38 
39 static cl::opt<bool> FillZeroForAllFuncs(
40     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
41     cl::desc("Attribute all functions' range with zero count "
42              "even it's not hit by any samples."));
43 
44 static cl::opt<int32_t, true> RecursionCompression(
45     "compress-recursion",
46     cl::desc("Compressing recursion by deduplicating adjacent frame "
47              "sequences up to the specified size. -1 means no size limit."),
48     cl::Hidden,
49     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
50 
51 static cl::opt<bool> CSProfMergeColdContext(
52     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
53     cl::desc("If the total count of context profile is smaller than "
54              "the threshold, it will be merged into context-less base "
55              "profile."));
56 
57 static cl::opt<bool> CSProfTrimColdContext(
58     "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
59     cl::desc("If the total count of the profile after all merge is done "
60              "is still smaller than threshold, it will be trimmed."));
61 
62 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
63     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
64     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
65              "context-less base profile"));
66 
67 static cl::opt<int, true> CSProfMaxContextDepth(
68     "csprof-max-context-depth", cl::ZeroOrMore,
69     cl::desc("Keep the last K contexts while merging profile. -1 means no "
70              "depth limit."),
71     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
72 
73 extern cl::opt<int> ProfileSummaryCutoffCold;
74 
75 using namespace llvm;
76 using namespace sampleprof;
77 
78 namespace llvm {
79 namespace sampleprof {
80 
81 // Initialize the MaxCompressionSize to -1 which means no size limit
82 int32_t CSProfileGenerator::MaxCompressionSize = -1;
83 
84 int CSProfileGenerator::MaxContextDepth = -1;
85 
86 std::unique_ptr<ProfileGeneratorBase>
87 ProfileGeneratorBase::create(ProfiledBinary *Binary,
88                              const ContextSampleCounterMap &SampleCounters,
89                              bool ProfileIsCS) {
90   std::unique_ptr<ProfileGeneratorBase> Generator;
91   if (ProfileIsCS) {
92     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
93   } else {
94     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
95   }
96 
97   return Generator;
98 }
99 
100 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
101                                  SampleProfileMap &ProfileMap) {
102   // Populate profile symbol list if extended binary format is used.
103   ProfileSymbolList SymbolList;
104 
105   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
106     Binary->populateSymbolListFromDWARF(SymbolList);
107     Writer->setProfileSymbolList(&SymbolList);
108   }
109 
110   if (std::error_code EC = Writer->write(ProfileMap))
111     exitWithError(std::move(EC));
112 }
113 
114 void ProfileGeneratorBase::write() {
115   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
116   if (std::error_code EC = WriterOrErr.getError())
117     exitWithError(EC, OutputFilename);
118 
119   if (UseMD5) {
120     if (OutputFormat != SPF_Ext_Binary)
121       WithColor::warning() << "-use-md5 is ignored. Specify "
122                               "--format=extbinary to enable it\n";
123     else
124       WriterOrErr.get()->setUseMD5();
125   }
126 
127   write(std::move(WriterOrErr.get()), ProfileMap);
128 }
129 
130 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
131                                               const RangeSample &Ranges) {
132 
133   /*
134   Regions may overlap with each other. Using the boundary info, find all
135   disjoint ranges and their sample count. BoundaryPoint contains the count
136   multiple samples begin/end at this points.
137 
138   |<--100-->|           Sample1
139   |<------200------>|   Sample2
140   A         B       C
141 
142   In the example above,
143   Sample1 begins at A, ends at B, its value is 100.
144   Sample2 beings at A, ends at C, its value is 200.
145   For A, BeginCount is the sum of sample begins at A, which is 300 and no
146   samples ends at A, so EndCount is 0.
147   Then boundary points A, B, and C with begin/end counts are:
148   A: (300, 0)
149   B: (0, 100)
150   C: (0, 200)
151   */
152   struct BoundaryPoint {
153     // Sum of sample counts beginning at this point
154     uint64_t BeginCount = UINT64_MAX;
155     // Sum of sample counts ending at this point
156     uint64_t EndCount = UINT64_MAX;
157     // Is the begin point of a zero range.
158     bool IsZeroRangeBegin = false;
159     // Is the end point of a zero range.
160     bool IsZeroRangeEnd = false;
161 
162     void addBeginCount(uint64_t Count) {
163       if (BeginCount == UINT64_MAX)
164         BeginCount = 0;
165       BeginCount += Count;
166     }
167 
168     void addEndCount(uint64_t Count) {
169       if (EndCount == UINT64_MAX)
170         EndCount = 0;
171       EndCount += Count;
172     }
173   };
174 
175   /*
176   For the above example. With boundary points, follwing logic finds two
177   disjoint region of
178 
179   [A,B]:   300
180   [B+1,C]: 200
181 
182   If there is a boundary point that both begin and end, the point itself
183   becomes a separate disjoint region. For example, if we have original
184   ranges of
185 
186   |<--- 100 --->|
187                 |<--- 200 --->|
188   A             B             C
189 
190   there are three boundary points with their begin/end counts of
191 
192   A: (100, 0)
193   B: (200, 100)
194   C: (0, 200)
195 
196   the disjoint ranges would be
197 
198   [A, B-1]: 100
199   [B, B]:   300
200   [B+1, C]: 200.
201 
202   Example for zero value range:
203 
204     |<--- 100 --->|
205                        |<--- 200 --->|
206   |<---------------  0 ----------------->|
207   A  B            C    D             E   F
208 
209   [A, B-1]  : 0
210   [B, C]    : 100
211   [C+1, D-1]: 0
212   [D, E]    : 200
213   [E+1, F]  : 0
214   */
215   std::map<uint64_t, BoundaryPoint> Boundaries;
216 
217   for (auto Item : Ranges) {
218     assert(Item.first.first <= Item.first.second &&
219            "Invalid instruction range");
220     auto &BeginPoint = Boundaries[Item.first.first];
221     auto &EndPoint = Boundaries[Item.first.second];
222     uint64_t Count = Item.second;
223 
224     BeginPoint.addBeginCount(Count);
225     EndPoint.addEndCount(Count);
226     if (Count == 0) {
227       BeginPoint.IsZeroRangeBegin = true;
228       EndPoint.IsZeroRangeEnd = true;
229     }
230   }
231 
232   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
233   // and the next valid address
234   uint64_t BeginAddress = UINT64_MAX;
235   int ZeroRangeDepth = 0;
236   uint64_t Count = 0;
237   for (auto Item : Boundaries) {
238     uint64_t Address = Item.first;
239     BoundaryPoint &Point = Item.second;
240     if (Point.BeginCount != UINT64_MAX) {
241       if (BeginAddress != UINT64_MAX)
242         DisjointRanges[{BeginAddress, Address - 1}] = Count;
243       Count += Point.BeginCount;
244       BeginAddress = Address;
245       ZeroRangeDepth += Point.IsZeroRangeBegin;
246     }
247     if (Point.EndCount != UINT64_MAX) {
248       assert((BeginAddress != UINT64_MAX) &&
249              "First boundary point cannot be 'end' point");
250       DisjointRanges[{BeginAddress, Address}] = Count;
251       assert(Count >= Point.EndCount && "Mismatched live ranges");
252       Count -= Point.EndCount;
253       BeginAddress = Address + 1;
254       ZeroRangeDepth -= Point.IsZeroRangeEnd;
255       // If the remaining count is zero and it's no longer in a zero range, this
256       // means we consume all the ranges before, thus mark BeginAddress as
257       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
258       //  [<---- 10 ---->]
259       //                       [<---- 20 ---->]
260       //   A             B     C              D
261       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
262       // have the [B+1, C-1] zero range.
263       if (Count == 0 && ZeroRangeDepth == 0)
264         BeginAddress = UINT64_MAX;
265     }
266   }
267 }
268 
269 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
270     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
271     uint64_t Count) {
272   // Use the maximum count of samples with same line location
273   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
274 
275   // Use duplication factor to compensated for loop unroll/vectorization.
276   // Note that this is only needed when we're taking MAX of the counts at
277   // the location instead of SUM.
278   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
279 
280   ErrorOr<uint64_t> R =
281       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
282 
283   uint64_t PreviousCount = R ? R.get() : 0;
284   if (PreviousCount <= Count) {
285     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
286                                    Count - PreviousCount);
287   }
288 }
289 
290 void ProfileGeneratorBase::updateTotalSamples() {
291   for (auto &Item : ProfileMap) {
292     FunctionSamples &FunctionProfile = Item.second;
293     FunctionProfile.updateTotalSamples();
294   }
295 }
296 
297 FunctionSamples &
298 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
299   SampleContext Context(FuncName);
300   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
301   if (Ret.second) {
302     FunctionSamples &FProfile = Ret.first->second;
303     FProfile.setContext(Context);
304   }
305   return Ret.first->second;
306 }
307 
308 void ProfileGenerator::generateProfile() {
309   if (Binary->usePseudoProbes()) {
310     // TODO: Support probe based profile generation
311   } else {
312     generateLineNumBasedProfile();
313   }
314 }
315 
316 void ProfileGenerator::generateLineNumBasedProfile() {
317   assert(SampleCounters.size() == 1 &&
318          "Must have one entry for profile generation.");
319   const SampleCounter &SC = SampleCounters.begin()->second;
320   // Fill in function body samples
321   populateBodySamplesForAllFunctions(SC.RangeCounter);
322   // Fill in boundary sample counts as well as call site samples for calls
323   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
324 
325   updateTotalSamples();
326 }
327 
328 FunctionSamples &ProfileGenerator::getLeafFrameProfile(
329     const SampleContextFrameVector &FrameVec) {
330   // Get top level profile
331   FunctionSamples *FunctionProfile =
332       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
333 
334   for (size_t I = 1; I < FrameVec.size(); I++) {
335     LineLocation Callsite(
336         FrameVec[I - 1].Location.LineOffset,
337         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
338     FunctionSamplesMap &SamplesMap =
339         FunctionProfile->functionSamplesAt(Callsite);
340     auto Ret =
341         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
342     if (Ret.second) {
343       SampleContext Context(FrameVec[I].FuncName);
344       Ret.first->second.setContext(Context);
345     }
346     FunctionProfile = &Ret.first->second;
347   }
348 
349   return *FunctionProfile;
350 }
351 
352 RangeSample
353 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
354   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
355   if (FillZeroForAllFuncs) {
356     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
357       for (auto &R : FuncI.second.Ranges) {
358         Ranges[{R.first, R.second - 1}] += 0;
359       }
360     }
361   } else {
362     // For each range, we search for all ranges of the function it belongs to
363     // and initialize it with zero count, so it remains zero if doesn't hit any
364     // samples. This is to be consistent with compiler that interpret zero count
365     // as unexecuted(cold).
366     for (auto I : RangeCounter) {
367       uint64_t StartOffset = I.first.first;
368       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
369         Ranges[{Range.first, Range.second - 1}] += 0;
370     }
371   }
372   RangeSample DisjointRanges;
373   findDisjointRanges(DisjointRanges, Ranges);
374   return DisjointRanges;
375 }
376 
377 void ProfileGenerator::populateBodySamplesForAllFunctions(
378     const RangeSample &RangeCounter) {
379   for (auto Range : preprocessRangeCounter(RangeCounter)) {
380     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
381     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
382     uint64_t Count = Range.second;
383 
384     InstructionPointer IP(Binary, RangeBegin, true);
385     // Disjoint ranges may have range in the middle of two instr,
386     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
387     // can be Addr1+1 to Addr2-1. We should ignore such range.
388     if (IP.Address > RangeEnd)
389       continue;
390 
391     do {
392       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
393       const SampleContextFrameVector &FrameVec =
394           Binary->getFrameLocationStack(Offset);
395       if (!FrameVec.empty()) {
396         FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec);
397         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
398                                             Count);
399       }
400     } while (IP.advance() && IP.Address <= RangeEnd);
401   }
402 }
403 
404 StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
405   // Get the function range by branch target if it's a call branch.
406   auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
407 
408   // We won't accumulate sample count for a range whose start is not the real
409   // function entry such as outlined function or inner labels.
410   if (!FRange || !FRange->IsFuncEntry)
411     return StringRef();
412 
413   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
414 }
415 
416 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
417     const BranchSample &BranchCounters) {
418   for (auto Entry : BranchCounters) {
419     uint64_t SourceOffset = Entry.first.first;
420     uint64_t TargetOffset = Entry.first.second;
421     uint64_t Count = Entry.second;
422     assert(Count != 0 && "Unexpected zero weight branch");
423 
424     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
425     if (CalleeName.size() == 0)
426       continue;
427     // Record called target sample and its count.
428     const SampleContextFrameVector &FrameVec =
429         Binary->getFrameLocationStack(SourceOffset);
430     if (!FrameVec.empty()) {
431       FunctionSamples &FunctionProfile = getLeafFrameProfile(FrameVec);
432       FunctionProfile.addCalledTargetSamples(
433           FrameVec.back().Location.LineOffset,
434           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
435           CalleeName, Count);
436     }
437     // Add head samples for callee.
438     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
439     CalleeProfile.addHeadSamples(Count);
440   }
441 }
442 
443 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
444     const SampleContextFrameVector &Context, bool WasLeafInlined) {
445   auto I = ProfileMap.find(SampleContext(Context));
446   if (I == ProfileMap.end()) {
447     // Save the new context for future references.
448     SampleContextFrames NewContext = *Contexts.insert(Context).first;
449     SampleContext FContext(NewContext, RawContext);
450     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
451     if (WasLeafInlined)
452       FContext.setAttribute(ContextWasInlined);
453     FunctionSamples &FProfile = Ret.first->second;
454     FProfile.setContext(FContext);
455     return Ret.first->second;
456   }
457   return I->second;
458 }
459 
460 void CSProfileGenerator::generateProfile() {
461   FunctionSamples::ProfileIsCS = true;
462 
463   if (Binary->getTrackFuncContextSize())
464     computeSizeForProfiledFunctions();
465 
466   if (Binary->usePseudoProbes()) {
467     // Enable pseudo probe functionalities in SampleProf
468     FunctionSamples::ProfileIsProbeBased = true;
469     generateProbeBasedProfile();
470   } else {
471     generateLineNumBasedProfile();
472   }
473   postProcessProfiles();
474 }
475 
476 void CSProfileGenerator::computeSizeForProfiledFunctions() {
477   // Hash map to deduplicate the function range and the item is a pair of
478   // function start and end offset.
479   std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
480   // Go through all the ranges in the CS counters, use the start of the range to
481   // look up the function it belongs and record the function range.
482   for (const auto &CI : SampleCounters) {
483     for (auto Item : CI.second.RangeCounter) {
484       // FIXME: Filter the bogus crossing function range.
485       uint64_t StartOffset = Item.first.first;
486       // Note that a function can be spilt into multiple ranges, so get all
487       // ranges of the function.
488       for (const auto &Range : Binary->getRangesForOffset(StartOffset))
489         AggregatedRanges[Range.first] = Range.second;
490     }
491   }
492 
493   for (auto I : AggregatedRanges) {
494     uint64_t StartOffset = I.first;
495     uint64_t EndOffset = I.second;
496     Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
497   }
498 }
499 
500 void CSProfileGenerator::generateLineNumBasedProfile() {
501   for (const auto &CI : SampleCounters) {
502     const StringBasedCtxKey *CtxKey =
503         dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
504     // Get or create function profile for the range
505     FunctionSamples &FunctionProfile =
506         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
507 
508     // Fill in function body samples
509     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
510     // Fill in boundary sample counts as well as call site samples for calls
511     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
512                                        CI.second.BranchCounter);
513   }
514   // Fill in call site value sample for inlined calls and also use context to
515   // infer missing samples. Since we don't have call count for inlined
516   // functions, we estimate it from inlinee's profile using the entry of the
517   // body sample.
518   populateInferredFunctionSamples();
519 
520   updateTotalSamples();
521 }
522 
523 void CSProfileGenerator::populateBodySamplesForFunction(
524     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
525   // Compute disjoint ranges first, so we can use MAX
526   // for calculating count for each location.
527   RangeSample Ranges;
528   findDisjointRanges(Ranges, RangeCounter);
529   for (auto Range : Ranges) {
530     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
531     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
532     uint64_t Count = Range.second;
533     // Disjoint ranges have introduce zero-filled gap that
534     // doesn't belong to current context, filter them out.
535     if (Count == 0)
536       continue;
537 
538     InstructionPointer IP(Binary, RangeBegin, true);
539     // Disjoint ranges may have range in the middle of two instr,
540     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
541     // can be Addr1+1 to Addr2-1. We should ignore such range.
542     if (IP.Address > RangeEnd)
543       continue;
544 
545     do {
546       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
547       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
548       if (LeafLoc.hasValue()) {
549         // Recording body sample for this specific context
550         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
551       }
552     } while (IP.advance() && IP.Address <= RangeEnd);
553   }
554 }
555 
556 void CSProfileGenerator::populateBoundarySamplesForFunction(
557     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
558     const BranchSample &BranchCounters) {
559 
560   for (auto Entry : BranchCounters) {
561     uint64_t SourceOffset = Entry.first.first;
562     uint64_t TargetOffset = Entry.first.second;
563     uint64_t Count = Entry.second;
564     assert(Count != 0 && "Unexpected zero weight branch");
565 
566     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
567     if (CalleeName.size() == 0)
568       continue;
569 
570     // Record called target sample and its count
571     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
572     if (!LeafLoc.hasValue())
573       continue;
574     FunctionProfile.addCalledTargetSamples(
575         LeafLoc->Location.LineOffset,
576         getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
577         Count);
578 
579     // Record head sample for called target(callee)
580     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
581     assert(CalleeCtx.back().FuncName == LeafLoc->FuncName &&
582            "Leaf function name doesn't match");
583     CalleeCtx.back() = *LeafLoc;
584     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
585     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
586     CalleeProfile.addHeadSamples(Count);
587   }
588 }
589 
590 static SampleContextFrame
591 getCallerContext(SampleContextFrames CalleeContext,
592                  SampleContextFrameVector &CallerContext) {
593   assert(CalleeContext.size() > 1 && "Unexpected empty context");
594   CalleeContext = CalleeContext.drop_back();
595   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
596   SampleContextFrame CallerFrame = CallerContext.back();
597   CallerContext.back().Location = LineLocation(0, 0);
598   return CallerFrame;
599 }
600 
601 void CSProfileGenerator::populateInferredFunctionSamples() {
602   for (const auto &Item : ProfileMap) {
603     const auto &CalleeContext = Item.first;
604     const FunctionSamples &CalleeProfile = Item.second;
605 
606     // If we already have head sample counts, we must have value profile
607     // for call sites added already. Skip to avoid double counting.
608     if (CalleeProfile.getHeadSamples())
609       continue;
610     // If we don't have context, nothing to do for caller's call site.
611     // This could happen for entry point function.
612     if (CalleeContext.isBaseContext())
613       continue;
614 
615     // Infer Caller's frame loc and context ID through string splitting
616     SampleContextFrameVector CallerContextId;
617     SampleContextFrame &&CallerLeafFrameLoc =
618         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
619     SampleContextFrames CallerContext(CallerContextId);
620 
621     // It's possible that we haven't seen any sample directly in the caller,
622     // in which case CallerProfile will not exist. But we can't modify
623     // ProfileMap while iterating it.
624     // TODO: created function profile for those callers too
625     if (ProfileMap.find(CallerContext) == ProfileMap.end())
626       continue;
627     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
628 
629     // Since we don't have call count for inlined functions, we
630     // estimate it from inlinee's profile using entry body sample.
631     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
632     // If we don't have samples with location, use 1 to indicate live.
633     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
634       EstimatedCallCount = 1;
635     CallerProfile.addCalledTargetSamples(
636         CallerLeafFrameLoc.Location.LineOffset,
637         CallerLeafFrameLoc.Location.Discriminator,
638         CalleeProfile.getContext().getName(), EstimatedCallCount);
639     CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset,
640                                  CallerLeafFrameLoc.Location.Discriminator,
641                                  EstimatedCallCount);
642   }
643 }
644 
645 void CSProfileGenerator::postProcessProfiles() {
646   // Compute hot/cold threshold based on profile. This will be used for cold
647   // context profile merging/trimming.
648   computeSummaryAndThreshold();
649 
650   // Run global pre-inliner to adjust/merge context profile based on estimated
651   // inline decisions.
652   if (EnableCSPreInliner) {
653     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
654         .run();
655     // Turn off the profile merger by default unless it is explicitly enabled.
656     if (!CSProfMergeColdContext.getNumOccurrences())
657       CSProfMergeColdContext = false;
658   }
659 
660   // Trim and merge cold context profile using cold threshold above.
661   if (CSProfTrimColdContext || CSProfMergeColdContext) {
662     SampleContextTrimmer(ProfileMap)
663         .trimAndMergeColdContextProfiles(
664             HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
665             CSProfMaxColdContextDepth, EnableCSPreInliner);
666   }
667 }
668 
669 void CSProfileGenerator::computeSummaryAndThreshold() {
670   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
671   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
672   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
673       (Summary->getDetailedSummary()));
674   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
675       (Summary->getDetailedSummary()));
676 }
677 
678 // Helper function to extract context prefix string stack
679 // Extract context stack for reusing, leaf context stack will
680 // be added compressed while looking up function profile
681 static void extractPrefixContextStack(
682     SampleContextFrameVector &ContextStack,
683     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
684     ProfiledBinary *Binary) {
685   for (const auto *P : Probes) {
686     Binary->getInlineContextForProbe(P, ContextStack, true);
687   }
688 }
689 
690 void CSProfileGenerator::generateProbeBasedProfile() {
691   for (const auto &CI : SampleCounters) {
692     const ProbeBasedCtxKey *CtxKey =
693         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
694     SampleContextFrameVector ContextStack;
695     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
696     // Fill in function body samples from probes, also infer caller's samples
697     // from callee's probe
698     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
699     // Fill in boundary samples for a call probe
700     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
701   }
702 }
703 
704 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
705                                                 ProbeCounterMap &ProbeCounter) {
706   RangeSample Ranges;
707   findDisjointRanges(Ranges, RangeCounter);
708   for (const auto &Range : Ranges) {
709     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
710     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
711     uint64_t Count = Range.second;
712     // Disjoint ranges have introduce zero-filled gap that
713     // doesn't belong to current context, filter them out.
714     if (Count == 0)
715       continue;
716 
717     InstructionPointer IP(Binary, RangeBegin, true);
718     // Disjoint ranges may have range in the middle of two instr,
719     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
720     // can be Addr1+1 to Addr2-1. We should ignore such range.
721     if (IP.Address > RangeEnd)
722       continue;
723 
724     do {
725       const AddressProbesMap &Address2ProbesMap =
726           Binary->getAddress2ProbesMap();
727       auto It = Address2ProbesMap.find(IP.Address);
728       if (It != Address2ProbesMap.end()) {
729         for (const auto &Probe : It->second) {
730           if (!Probe.isBlock())
731             continue;
732           ProbeCounter[&Probe] += Count;
733         }
734       }
735     } while (IP.advance() && IP.Address <= RangeEnd);
736   }
737 }
738 
739 void CSProfileGenerator::populateBodySamplesWithProbes(
740     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
741   ProbeCounterMap ProbeCounter;
742   // Extract the top frame probes by looking up each address among the range in
743   // the Address2ProbeMap
744   extractProbesFromRange(RangeCounter, ProbeCounter);
745   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
746                      std::unordered_set<FunctionSamples *>>
747       FrameSamples;
748   for (auto PI : ProbeCounter) {
749     const MCDecodedPseudoProbe *Probe = PI.first;
750     uint64_t Count = PI.second;
751     FunctionSamples &FunctionProfile =
752         getFunctionProfileForLeafProbe(ContextStack, Probe);
753     // Record the current frame and FunctionProfile whenever samples are
754     // collected for non-danglie probes. This is for reporting all of the
755     // zero count probes of the frame later.
756     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
757     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
758     FunctionProfile.addTotalSamples(Count);
759     if (Probe->isEntry()) {
760       FunctionProfile.addHeadSamples(Count);
761       // Look up for the caller's function profile
762       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
763       if (InlinerDesc != nullptr) {
764         // Since the context id will be compressed, we have to use callee's
765         // context id to infer caller's context id to ensure they share the
766         // same context prefix.
767         SampleContextFrames CalleeContextId =
768             FunctionProfile.getContext().getContextFrames();
769         SampleContextFrameVector CallerContextId;
770         SampleContextFrame &&CallerLeafFrameLoc =
771             getCallerContext(CalleeContextId, CallerContextId);
772         uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset;
773         assert(CallerIndex &&
774                "Inferred caller's location index shouldn't be zero!");
775         FunctionSamples &CallerProfile =
776             getFunctionProfileForContext(CallerContextId);
777         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
778         CallerProfile.addBodySamples(CallerIndex, 0, Count);
779         CallerProfile.addTotalSamples(Count);
780         CallerProfile.addCalledTargetSamples(
781             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
782       }
783     }
784   }
785 
786   // Assign zero count for remaining probes without sample hits to
787   // differentiate from probes optimized away, of which the counts are unknown
788   // and will be inferred by the compiler.
789   for (auto &I : FrameSamples) {
790     for (auto *FunctionProfile : I.second) {
791       for (auto *Probe : I.first->getProbes()) {
792         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
793       }
794     }
795   }
796 }
797 
798 void CSProfileGenerator::populateBoundarySamplesWithProbes(
799     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
800   for (auto BI : BranchCounter) {
801     uint64_t SourceOffset = BI.first.first;
802     uint64_t TargetOffset = BI.first.second;
803     uint64_t Count = BI.second;
804     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
805     const MCDecodedPseudoProbe *CallProbe =
806         Binary->getCallProbeForAddr(SourceAddress);
807     if (CallProbe == nullptr)
808       continue;
809     FunctionSamples &FunctionProfile =
810         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
811     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
812     FunctionProfile.addTotalSamples(Count);
813     StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
814     if (CalleeName.size() == 0)
815       continue;
816     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
817                                            Count);
818   }
819 }
820 
821 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
822     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
823 
824   // Explicitly copy the context for appending the leaf context
825   SampleContextFrameVector NewContextStack(ContextStack.begin(),
826                                            ContextStack.end());
827   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
828   // For leaf inlined context with the top frame, we should strip off the top
829   // frame's probe id, like:
830   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
831   auto LeafFrame = NewContextStack.back();
832   LeafFrame.Location = LineLocation(0, 0);
833   NewContextStack.pop_back();
834   // Compress the context string except for the leaf frame
835   CSProfileGenerator::compressRecursionContext(NewContextStack);
836   CSProfileGenerator::trimContext(NewContextStack);
837   NewContextStack.push_back(LeafFrame);
838 
839   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
840   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
841   FunctionSamples &FunctionProile =
842       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
843   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
844   return FunctionProile;
845 }
846 
847 } // end namespace sampleprof
848 } // end namespace llvm
849