1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ProfileData/ProfileCommon.h"
12 #include <unordered_set>
13 
14 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
15                                     cl::Required,
16                                     cl::desc("Output profile file"));
17 static cl::alias OutputA("o", cl::desc("Alias for --output"),
18                          cl::aliasopt(OutputFilename));
19 
20 static cl::opt<SampleProfileFormat> OutputFormat(
21     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
22     cl::values(
23         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
24         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
25         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
26         clEnumValN(SPF_Text, "text", "Text encoding"),
27         clEnumValN(SPF_GCC, "gcc",
28                    "GCC encoding (only meaningful for -sample)")));
29 
30 cl::opt<bool> UseMD5(
31     "use-md5", cl::init(false), cl::Hidden,
32     cl::desc("Use md5 to represent function names in the output profile (only "
33              "meaningful for -extbinary)"));
34 
35 static cl::opt<int32_t, true> RecursionCompression(
36     "compress-recursion",
37     cl::desc("Compressing recursion by deduplicating adjacent frame "
38              "sequences up to the specified size. -1 means no size limit."),
39     cl::Hidden,
40     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
41 
42 static cl::opt<bool> CSProfMergeColdContext(
43     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
44     cl::desc("If the total count of context profile is smaller than "
45              "the threshold, it will be merged into context-less base "
46              "profile."));
47 
48 static cl::opt<bool> CSProfTrimColdContext(
49     "csprof-trim-cold-context", cl::init(false), cl::ZeroOrMore,
50     cl::desc("If the total count of the profile after all merge is done "
51              "is still smaller than threshold, it will be trimmed."));
52 
53 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
54     "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore,
55     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
56              "context-less base profile"));
57 
58 static cl::opt<int, true> CSProfMaxContextDepth(
59     "csprof-max-context-depth", cl::ZeroOrMore,
60     cl::desc("Keep the last K contexts while merging profile. -1 means no "
61              "depth limit."),
62     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
63 
64 extern cl::opt<int> ProfileSummaryCutoffCold;
65 
66 using namespace llvm;
67 using namespace sampleprof;
68 
69 namespace llvm {
70 namespace sampleprof {
71 
72 // Initialize the MaxCompressionSize to -1 which means no size limit
73 int32_t CSProfileGenerator::MaxCompressionSize = -1;
74 
75 int CSProfileGenerator::MaxContextDepth = -1;
76 
77 std::unique_ptr<ProfileGeneratorBase>
78 ProfileGeneratorBase::create(ProfiledBinary *Binary,
79                              const ContextSampleCounterMap &SampleCounters,
80                              bool ProfileIsCS) {
81   std::unique_ptr<ProfileGeneratorBase> Generator;
82   if (ProfileIsCS) {
83     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
84   } else {
85     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
86   }
87 
88   return Generator;
89 }
90 
91 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
92                                  SampleProfileMap &ProfileMap) {
93   if (std::error_code EC = Writer->write(ProfileMap))
94     exitWithError(std::move(EC));
95 }
96 
97 void ProfileGeneratorBase::write() {
98   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
99   if (std::error_code EC = WriterOrErr.getError())
100     exitWithError(EC, OutputFilename);
101 
102   if (UseMD5) {
103     if (OutputFormat != SPF_Ext_Binary)
104       WithColor::warning() << "-use-md5 is ignored. Specify "
105                               "--format=extbinary to enable it\n";
106     else
107       WriterOrErr.get()->setUseMD5();
108   }
109 
110   write(std::move(WriterOrErr.get()), ProfileMap);
111 }
112 
113 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
114                                               const RangeSample &Ranges) {
115 
116   /*
117   Regions may overlap with each other. Using the boundary info, find all
118   disjoint ranges and their sample count. BoundaryPoint contains the count
119   multiple samples begin/end at this points.
120 
121   |<--100-->|           Sample1
122   |<------200------>|   Sample2
123   A         B       C
124 
125   In the example above,
126   Sample1 begins at A, ends at B, its value is 100.
127   Sample2 beings at A, ends at C, its value is 200.
128   For A, BeginCount is the sum of sample begins at A, which is 300 and no
129   samples ends at A, so EndCount is 0.
130   Then boundary points A, B, and C with begin/end counts are:
131   A: (300, 0)
132   B: (0, 100)
133   C: (0, 200)
134   */
135   struct BoundaryPoint {
136     // Sum of sample counts beginning at this point
137     uint64_t BeginCount = UINT64_MAX;
138     // Sum of sample counts ending at this point
139     uint64_t EndCount = UINT64_MAX;
140     // Is the begin point of a zero range.
141     bool IsZeroRangeBegin = false;
142     // Is the end point of a zero range.
143     bool IsZeroRangeEnd = false;
144 
145     void addBeginCount(uint64_t Count) {
146       if (BeginCount == UINT64_MAX)
147         BeginCount = 0;
148       BeginCount += Count;
149     }
150 
151     void addEndCount(uint64_t Count) {
152       if (EndCount == UINT64_MAX)
153         EndCount = 0;
154       EndCount += Count;
155     }
156   };
157 
158   /*
159   For the above example. With boundary points, follwing logic finds two
160   disjoint region of
161 
162   [A,B]:   300
163   [B+1,C]: 200
164 
165   If there is a boundary point that both begin and end, the point itself
166   becomes a separate disjoint region. For example, if we have original
167   ranges of
168 
169   |<--- 100 --->|
170                 |<--- 200 --->|
171   A             B             C
172 
173   there are three boundary points with their begin/end counts of
174 
175   A: (100, 0)
176   B: (200, 100)
177   C: (0, 200)
178 
179   the disjoint ranges would be
180 
181   [A, B-1]: 100
182   [B, B]:   300
183   [B+1, C]: 200.
184 
185   Example for zero value range:
186 
187     |<--- 100 --->|
188                        |<--- 200 --->|
189   |<---------------  0 ----------------->|
190   A  B            C    D             E   F
191 
192   [A, B-1]  : 0
193   [B, C]    : 100
194   [C+1, D-1]: 0
195   [D, E]    : 200
196   [E+1, F]  : 0
197   */
198   std::map<uint64_t, BoundaryPoint> Boundaries;
199 
200   for (auto Item : Ranges) {
201     assert(Item.first.first <= Item.first.second &&
202            "Invalid instruction range");
203     auto &BeginPoint = Boundaries[Item.first.first];
204     auto &EndPoint = Boundaries[Item.first.second];
205     uint64_t Count = Item.second;
206 
207     BeginPoint.addBeginCount(Count);
208     EndPoint.addEndCount(Count);
209     if (Count == 0) {
210       BeginPoint.IsZeroRangeBegin = true;
211       EndPoint.IsZeroRangeEnd = true;
212     }
213   }
214 
215   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
216   // and the next valid address
217   uint64_t BeginAddress = UINT64_MAX;
218   int ZeroRangeDepth = 0;
219   uint64_t Count = 0;
220   for (auto Item : Boundaries) {
221     uint64_t Address = Item.first;
222     BoundaryPoint &Point = Item.second;
223     if (Point.BeginCount != UINT64_MAX) {
224       if (BeginAddress != UINT64_MAX)
225         DisjointRanges[{BeginAddress, Address - 1}] = Count;
226       Count += Point.BeginCount;
227       BeginAddress = Address;
228       ZeroRangeDepth += Point.IsZeroRangeBegin;
229     }
230     if (Point.EndCount != UINT64_MAX) {
231       assert((BeginAddress != UINT64_MAX) &&
232              "First boundary point cannot be 'end' point");
233       DisjointRanges[{BeginAddress, Address}] = Count;
234       assert(Count >= Point.EndCount && "Mismatched live ranges");
235       Count -= Point.EndCount;
236       BeginAddress = Address + 1;
237       ZeroRangeDepth -= Point.IsZeroRangeEnd;
238       // If the remaining count is zero and it's no longer in a zero range, this
239       // means we consume all the ranges before, thus mark BeginAddress as
240       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
241       //  [<---- 10 ---->]
242       //                       [<---- 20 ---->]
243       //   A             B     C              D
244       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
245       // have the [B+1, C-1] zero range.
246       if (Count == 0 && ZeroRangeDepth == 0)
247         BeginAddress = UINT64_MAX;
248     }
249   }
250 }
251 
252 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
253     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
254     uint64_t Count) {
255   // Filter out invalid negative(int type) lineOffset
256   if (LeafLoc.Callsite.LineOffset & 0x80000000)
257     return;
258   // Use the maximum count of samples with same line location
259   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
260       LeafLoc.Callsite.LineOffset, LeafLoc.Callsite.Discriminator);
261   uint64_t PreviousCount = R ? R.get() : 0;
262   if (PreviousCount <= Count) {
263     FunctionProfile.addBodySamples(LeafLoc.Callsite.LineOffset,
264                                    LeafLoc.Callsite.Discriminator,
265                                    Count - PreviousCount);
266   }
267 }
268 
269 FunctionSamples &
270 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
271   SampleContext Context(FuncName);
272   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
273   if (Ret.second) {
274     FunctionSamples &FProfile = Ret.first->second;
275     FProfile.setContext(Context);
276   }
277   return Ret.first->second;
278 }
279 
280 void ProfileGenerator::generateProfile() {
281   if (Binary->usePseudoProbes()) {
282     // TODO: Support probe based profile generation
283   } else {
284     generateLineNumBasedProfile();
285   }
286 }
287 
288 void ProfileGenerator::generateLineNumBasedProfile() {
289   assert(SampleCounters.size() == 1 &&
290          "Must have one entry for profile generation.");
291   const SampleCounter &SC = SampleCounters.begin()->second;
292   // Fill in function body samples
293   populateBodySamplesForAllFunctions(SC.RangeCounter);
294   // Fill in boundary sample counts as well as call site samples for calls
295   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
296 }
297 
298 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
299     const SampleContextFrameVector &FrameVec, uint64_t Count) {
300   // Get top level profile
301   FunctionSamples *FunctionProfile =
302       &getTopLevelFunctionProfile(FrameVec[0].CallerName);
303   FunctionProfile->addTotalSamples(Count);
304 
305   for (size_t I = 1; I < FrameVec.size(); I++) {
306     FunctionSamplesMap &SamplesMap =
307         FunctionProfile->functionSamplesAt(FrameVec[I - 1].Callsite);
308     auto Ret =
309         SamplesMap.emplace(FrameVec[I].CallerName.str(), FunctionSamples());
310     if (Ret.second) {
311       SampleContext Context(FrameVec[I].CallerName);
312       Ret.first->second.setContext(Context);
313     }
314     FunctionProfile = &Ret.first->second;
315     FunctionProfile->addTotalSamples(Count);
316   }
317 
318   return *FunctionProfile;
319 }
320 
321 RangeSample
322 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
323   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
324   // For each range, we search for the range of the function it belongs to and
325   // initialize it with zero count, so it remains zero if doesn't hit any
326   // samples. This is to be consistent with compiler that interpret zero count
327   // as unexecuted(cold).
328   for (auto I : RangeCounter) {
329     uint64_t RangeBegin = I.first.first;
330     uint64_t RangeEnd = I.first.second;
331     // Find the function offset range the current range begin belongs to.
332     auto FuncRange = Binary->findFuncOffsetRange(RangeBegin);
333     if (FuncRange.second == 0)
334       WithColor::warning()
335           << "[" << format("%8" PRIx64, RangeBegin) << " - "
336           << format("%8" PRIx64, RangeEnd)
337           << "]: Invalid range or disassembling error in profiled binary.\n";
338     else if (RangeEnd > FuncRange.second)
339       WithColor::warning() << "[" << format("%8" PRIx64, RangeBegin) << " - "
340                            << format("%8" PRIx64, RangeEnd)
341                            << "]: Range is across different functions.\n";
342     else
343       Ranges[FuncRange] += 0;
344   }
345   RangeSample DisjointRanges;
346   findDisjointRanges(DisjointRanges, Ranges);
347   return DisjointRanges;
348 }
349 
350 void ProfileGenerator::populateBodySamplesForAllFunctions(
351     const RangeSample &RangeCounter) {
352   for (auto Range : preprocessRangeCounter(RangeCounter)) {
353     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
354     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
355     uint64_t Count = Range.second;
356 
357     InstructionPointer IP(Binary, RangeBegin, true);
358     // Disjoint ranges may have range in the middle of two instr,
359     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
360     // can be Addr1+1 to Addr2-1. We should ignore such range.
361     while (IP.Address <= RangeEnd) {
362       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
363       const SampleContextFrameVector &FrameVec =
364           Binary->getFrameLocationStack(Offset);
365       if (!FrameVec.empty()) {
366         FunctionSamples &FunctionProfile =
367             getLeafProfileAndAddTotalSamples(FrameVec, Count);
368         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
369                                             Count);
370       }
371       // Move to next IP within the range.
372       IP.advance();
373     }
374   }
375 }
376 
377 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
378     const BranchSample &BranchCounters) {
379   for (auto Entry : BranchCounters) {
380     uint64_t SourceOffset = Entry.first.first;
381     uint64_t TargetOffset = Entry.first.second;
382     uint64_t Count = Entry.second;
383     assert(Count != 0 && "Unexpected zero weight branch");
384 
385     // Get the callee name by branch target if it's a call branch.
386     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
387         Binary->getFuncFromStartOffset(TargetOffset));
388     if (CalleeName.size() == 0)
389       continue;
390     // Record called target sample and its count.
391     const SampleContextFrameVector &FrameVec =
392         Binary->getFrameLocationStack(SourceOffset);
393     if (!FrameVec.empty()) {
394       FunctionSamples &FunctionProfile =
395           getLeafProfileAndAddTotalSamples(FrameVec, Count);
396       FunctionProfile.addCalledTargetSamples(
397           FrameVec.back().Callsite.LineOffset,
398           FrameVec.back().Callsite.Discriminator, CalleeName, Count);
399     }
400     // Add head samples for callee.
401     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
402     CalleeProfile.addHeadSamples(Count);
403   }
404 }
405 
406 FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
407     const SampleContextFrameVector &Context, bool WasLeafInlined) {
408   auto I = ProfileMap.find(SampleContext(Context));
409   if (I == ProfileMap.end()) {
410     // Save the new context for future references.
411     SampleContextFrames NewContext = *Contexts.insert(Context).first;
412     SampleContext FContext(NewContext, RawContext);
413     auto Ret = ProfileMap.emplace(FContext, FunctionSamples());
414     if (WasLeafInlined)
415       FContext.setAttribute(ContextWasInlined);
416     FunctionSamples &FProfile = Ret.first->second;
417     FProfile.setContext(FContext);
418     return Ret.first->second;
419   }
420   return I->second;
421 }
422 
423 void CSProfileGenerator::generateProfile() {
424   FunctionSamples::ProfileIsCS = true;
425 
426   if (Binary->getTrackFuncContextSize())
427     computeSizeForProfiledFunctions();
428 
429   if (Binary->usePseudoProbes()) {
430     // Enable pseudo probe functionalities in SampleProf
431     FunctionSamples::ProfileIsProbeBased = true;
432     generateProbeBasedProfile();
433   } else {
434     generateLineNumBasedProfile();
435   }
436   postProcessProfiles();
437 }
438 
439 void CSProfileGenerator::computeSizeForProfiledFunctions() {
440   // Hash map to deduplicate the function range and the item is a pair of
441   // function start and end offset.
442   std::unordered_map<uint64_t, uint64_t> FuncRanges;
443   // Go through all the ranges in the CS counters, use the start of the range to
444   // look up the function it belongs and record the function range.
445   for (const auto &CI : SampleCounters) {
446     for (auto Item : CI.second.RangeCounter) {
447       // FIXME: Filter the bogus crossing function range.
448       uint64_t RangeStartOffset = Item.first.first;
449       auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset);
450       if (FuncRange.second != 0)
451         FuncRanges[FuncRange.first] = FuncRange.second;
452     }
453   }
454 
455   for (auto I : FuncRanges) {
456     uint64_t StartOffset = I.first;
457     uint64_t EndOffset = I.second;
458     Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
459   }
460 }
461 
462 void CSProfileGenerator::generateLineNumBasedProfile() {
463   for (const auto &CI : SampleCounters) {
464     const StringBasedCtxKey *CtxKey =
465         dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
466     // Get or create function profile for the range
467     FunctionSamples &FunctionProfile =
468         getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined);
469 
470     // Fill in function body samples
471     populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter);
472     // Fill in boundary sample counts as well as call site samples for calls
473     populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile,
474                                        CI.second.BranchCounter);
475   }
476   // Fill in call site value sample for inlined calls and also use context to
477   // infer missing samples. Since we don't have call count for inlined
478   // functions, we estimate it from inlinee's profile using the entry of the
479   // body sample.
480   populateInferredFunctionSamples();
481 }
482 
483 void CSProfileGenerator::populateBodySamplesForFunction(
484     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
485   // Compute disjoint ranges first, so we can use MAX
486   // for calculating count for each location.
487   RangeSample Ranges;
488   findDisjointRanges(Ranges, RangeCounter);
489   for (auto Range : Ranges) {
490     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
491     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
492     uint64_t Count = Range.second;
493     // Disjoint ranges have introduce zero-filled gap that
494     // doesn't belong to current context, filter them out.
495     if (Count == 0)
496       continue;
497 
498     InstructionPointer IP(Binary, RangeBegin, true);
499     // Disjoint ranges may have range in the middle of two instr,
500     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
501     // can be Addr1+1 to Addr2-1. We should ignore such range.
502     while (IP.Address <= RangeEnd) {
503       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
504       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
505       if (LeafLoc.hasValue()) {
506         // Recording body sample for this specific context
507         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
508       }
509       // Accumulate total sample count even it's a line with invalid debug info
510       FunctionProfile.addTotalSamples(Count);
511       // Move to next IP within the range
512       IP.advance();
513     }
514   }
515 }
516 
517 void CSProfileGenerator::populateBoundarySamplesForFunction(
518     SampleContextFrames ContextId, FunctionSamples &FunctionProfile,
519     const BranchSample &BranchCounters) {
520 
521   for (auto Entry : BranchCounters) {
522     uint64_t SourceOffset = Entry.first.first;
523     uint64_t TargetOffset = Entry.first.second;
524     uint64_t Count = Entry.second;
525     assert(Count != 0 && "Unexpected zero weight branch");
526 
527     // Get the callee name by branch target if it's a call branch
528     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
529         Binary->getFuncFromStartOffset(TargetOffset));
530     if (CalleeName.size() == 0)
531       continue;
532 
533     // Record called target sample and its count
534     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
535     if (!LeafLoc.hasValue())
536       continue;
537     FunctionProfile.addCalledTargetSamples(LeafLoc->Callsite.LineOffset,
538                                            LeafLoc->Callsite.Discriminator,
539                                            CalleeName, Count);
540 
541     // Record head sample for called target(callee)
542     SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end());
543     assert(CalleeCtx.back().CallerName == LeafLoc->CallerName &&
544            "Leaf function name doesn't match");
545     CalleeCtx.back() = *LeafLoc;
546     CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0));
547     FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx);
548     CalleeProfile.addHeadSamples(Count);
549   }
550 }
551 
552 static SampleContextFrame
553 getCallerContext(SampleContextFrames CalleeContext,
554                  SampleContextFrameVector &CallerContext) {
555   assert(CalleeContext.size() > 1 && "Unexpected empty context");
556   CalleeContext = CalleeContext.drop_back();
557   CallerContext.assign(CalleeContext.begin(), CalleeContext.end());
558   SampleContextFrame CallerFrame = CallerContext.back();
559   CallerContext.back().Callsite = LineLocation(0, 0);
560   return CallerFrame;
561 }
562 
563 void CSProfileGenerator::populateInferredFunctionSamples() {
564   for (const auto &Item : ProfileMap) {
565     const auto &CalleeContext = Item.first;
566     const FunctionSamples &CalleeProfile = Item.second;
567 
568     // If we already have head sample counts, we must have value profile
569     // for call sites added already. Skip to avoid double counting.
570     if (CalleeProfile.getHeadSamples())
571       continue;
572     // If we don't have context, nothing to do for caller's call site.
573     // This could happen for entry point function.
574     if (CalleeContext.isBaseContext())
575       continue;
576 
577     // Infer Caller's frame loc and context ID through string splitting
578     SampleContextFrameVector CallerContextId;
579     SampleContextFrame &&CallerLeafFrameLoc =
580         getCallerContext(CalleeContext.getContextFrames(), CallerContextId);
581     SampleContextFrames CallerContext(CallerContextId);
582 
583     // It's possible that we haven't seen any sample directly in the caller,
584     // in which case CallerProfile will not exist. But we can't modify
585     // ProfileMap while iterating it.
586     // TODO: created function profile for those callers too
587     if (ProfileMap.find(CallerContext) == ProfileMap.end())
588       continue;
589     FunctionSamples &CallerProfile = ProfileMap[CallerContext];
590 
591     // Since we don't have call count for inlined functions, we
592     // estimate it from inlinee's profile using entry body sample.
593     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
594     // If we don't have samples with location, use 1 to indicate live.
595     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
596       EstimatedCallCount = 1;
597     CallerProfile.addCalledTargetSamples(
598         CallerLeafFrameLoc.Callsite.LineOffset,
599         CallerLeafFrameLoc.Callsite.Discriminator,
600         CalleeProfile.getContext().getName(), EstimatedCallCount);
601     CallerProfile.addBodySamples(CallerLeafFrameLoc.Callsite.LineOffset,
602                                  CallerLeafFrameLoc.Callsite.Discriminator,
603                                  EstimatedCallCount);
604     CallerProfile.addTotalSamples(EstimatedCallCount);
605   }
606 }
607 
608 void CSProfileGenerator::postProcessProfiles() {
609   // Compute hot/cold threshold based on profile. This will be used for cold
610   // context profile merging/trimming.
611   computeSummaryAndThreshold();
612 
613   // Run global pre-inliner to adjust/merge context profile based on estimated
614   // inline decisions.
615   if (EnableCSPreInliner) {
616     CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold)
617         .run();
618   }
619 
620   // Trim and merge cold context profile using cold threshold above. By default,
621   // we skip such merging and trimming when preinliner is on.
622   if (!EnableCSPreInliner || CSProfTrimColdContext.getNumOccurrences() ||
623       CSProfMergeColdContext.getNumOccurrences()) {
624     SampleContextTrimmer(ProfileMap)
625         .trimAndMergeColdContextProfiles(
626             HotCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
627             CSProfMaxColdContextDepth);
628   }
629 }
630 
631 void CSProfileGenerator::computeSummaryAndThreshold() {
632   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
633   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
634   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
635       (Summary->getDetailedSummary()));
636   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
637       (Summary->getDetailedSummary()));
638 }
639 
640 // Helper function to extract context prefix string stack
641 // Extract context stack for reusing, leaf context stack will
642 // be added compressed while looking up function profile
643 static void extractPrefixContextStack(
644     SampleContextFrameVector &ContextStack,
645     const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
646     ProfiledBinary *Binary) {
647   for (const auto *P : Probes) {
648     Binary->getInlineContextForProbe(P, ContextStack, true);
649   }
650 }
651 
652 void CSProfileGenerator::generateProbeBasedProfile() {
653   for (const auto &CI : SampleCounters) {
654     const ProbeBasedCtxKey *CtxKey =
655         dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
656     SampleContextFrameVector ContextStack;
657     extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
658     // Fill in function body samples from probes, also infer caller's samples
659     // from callee's probe
660     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
661     // Fill in boundary samples for a call probe
662     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
663   }
664 }
665 
666 void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
667                                                 ProbeCounterMap &ProbeCounter) {
668   RangeSample Ranges;
669   findDisjointRanges(Ranges, RangeCounter);
670   for (const auto &Range : Ranges) {
671     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
672     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
673     uint64_t Count = Range.second;
674     // Disjoint ranges have introduce zero-filled gap that
675     // doesn't belong to current context, filter them out.
676     if (Count == 0)
677       continue;
678 
679     InstructionPointer IP(Binary, RangeBegin, true);
680 
681     // Disjoint ranges may have range in the middle of two instr,
682     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
683     // can be Addr1+1 to Addr2-1. We should ignore such range.
684     if (IP.Address > RangeEnd)
685       continue;
686 
687     while (IP.Address <= RangeEnd) {
688       const AddressProbesMap &Address2ProbesMap =
689           Binary->getAddress2ProbesMap();
690       auto It = Address2ProbesMap.find(IP.Address);
691       if (It != Address2ProbesMap.end()) {
692         for (const auto &Probe : It->second) {
693           if (!Probe.isBlock())
694             continue;
695           ProbeCounter[&Probe] += Count;
696         }
697       }
698 
699       IP.advance();
700     }
701   }
702 }
703 
704 void CSProfileGenerator::populateBodySamplesWithProbes(
705     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
706   ProbeCounterMap ProbeCounter;
707   // Extract the top frame probes by looking up each address among the range in
708   // the Address2ProbeMap
709   extractProbesFromRange(RangeCounter, ProbeCounter);
710   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
711                      std::unordered_set<FunctionSamples *>>
712       FrameSamples;
713   for (auto PI : ProbeCounter) {
714     const MCDecodedPseudoProbe *Probe = PI.first;
715     uint64_t Count = PI.second;
716     FunctionSamples &FunctionProfile =
717         getFunctionProfileForLeafProbe(ContextStack, Probe);
718     // Record the current frame and FunctionProfile whenever samples are
719     // collected for non-danglie probes. This is for reporting all of the
720     // zero count probes of the frame later.
721     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
722     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
723     FunctionProfile.addTotalSamples(Count);
724     if (Probe->isEntry()) {
725       FunctionProfile.addHeadSamples(Count);
726       // Look up for the caller's function profile
727       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
728       if (InlinerDesc != nullptr) {
729         // Since the context id will be compressed, we have to use callee's
730         // context id to infer caller's context id to ensure they share the
731         // same context prefix.
732         SampleContextFrames CalleeContextId =
733             FunctionProfile.getContext().getContextFrames();
734         SampleContextFrameVector CallerContextId;
735         SampleContextFrame &&CallerLeafFrameLoc =
736             getCallerContext(CalleeContextId, CallerContextId);
737         uint64_t CallerIndex = CallerLeafFrameLoc.Callsite.LineOffset;
738         assert(CallerIndex &&
739                "Inferred caller's location index shouldn't be zero!");
740         FunctionSamples &CallerProfile =
741             getFunctionProfileForContext(CallerContextId);
742         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
743         CallerProfile.addBodySamples(CallerIndex, 0, Count);
744         CallerProfile.addTotalSamples(Count);
745         CallerProfile.addCalledTargetSamples(
746             CallerIndex, 0, FunctionProfile.getContext().getName(), Count);
747       }
748     }
749   }
750 
751   // Assign zero count for remaining probes without sample hits to
752   // differentiate from probes optimized away, of which the counts are unknown
753   // and will be inferred by the compiler.
754   for (auto &I : FrameSamples) {
755     for (auto *FunctionProfile : I.second) {
756       for (auto *Probe : I.first->getProbes()) {
757         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
758       }
759     }
760   }
761 }
762 
763 void CSProfileGenerator::populateBoundarySamplesWithProbes(
764     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
765   for (auto BI : BranchCounter) {
766     uint64_t SourceOffset = BI.first.first;
767     uint64_t TargetOffset = BI.first.second;
768     uint64_t Count = BI.second;
769     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
770     const MCDecodedPseudoProbe *CallProbe =
771         Binary->getCallProbeForAddr(SourceAddress);
772     if (CallProbe == nullptr)
773       continue;
774     FunctionSamples &FunctionProfile =
775         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
776     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
777     FunctionProfile.addTotalSamples(Count);
778     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
779         Binary->getFuncFromStartOffset(TargetOffset));
780     if (CalleeName.size() == 0)
781       continue;
782     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
783                                            Count);
784   }
785 }
786 
787 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
788     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
789 
790   // Explicitly copy the context for appending the leaf context
791   SampleContextFrameVector NewContextStack(ContextStack.begin(),
792                                            ContextStack.end());
793   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
794   // For leaf inlined context with the top frame, we should strip off the top
795   // frame's probe id, like:
796   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
797   auto LeafFrame = NewContextStack.back();
798   LeafFrame.Callsite = LineLocation(0, 0);
799   NewContextStack.pop_back();
800   // Compress the context string except for the leaf frame
801   CSProfileGenerator::compressRecursionContext(NewContextStack);
802   CSProfileGenerator::trimContext(NewContextStack);
803   NewContextStack.push_back(LeafFrame);
804 
805   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
806   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
807   FunctionSamples &FunctionProile =
808       getFunctionProfileForContext(NewContextStack, WasLeafInlined);
809   FunctionProile.setFunctionHash(FuncDesc->FuncHash);
810   return FunctionProile;
811 }
812 
813 } // end namespace sampleprof
814 } // end namespace llvm
815