10b57cec5SDimitry Andric //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the class that reads LLVM sample profiles. It
100b57cec5SDimitry Andric // supports three file formats: text, binary and gcov.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // The textual representation is useful for debugging and testing purposes. The
130b57cec5SDimitry Andric // binary representation is more compact, resulting in smaller file sizes.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // The gcov encoding is the one generated by GCC's AutoFDO profile creation
160b57cec5SDimitry Andric // tool (https://github.com/google/autofdo)
170b57cec5SDimitry Andric //
180b57cec5SDimitry Andric // All three encodings can be used interchangeably as an input sample profile.
190b57cec5SDimitry Andric //
200b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #include "llvm/ProfileData/SampleProfReader.h"
230b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
250b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
2681ad6265SDimitry Andric #include "llvm/IR/Module.h"
270b57cec5SDimitry Andric #include "llvm/IR/ProfileSummary.h"
280b57cec5SDimitry Andric #include "llvm/ProfileData/ProfileCommon.h"
290b57cec5SDimitry Andric #include "llvm/ProfileData/SampleProf.h"
30fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h"
318bcb0991SDimitry Andric #include "llvm/Support/Compression.h"
320b57cec5SDimitry Andric #include "llvm/Support/ErrorOr.h"
33bdd1243dSDimitry Andric #include "llvm/Support/JSON.h"
340b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
350b57cec5SDimitry Andric #include "llvm/Support/LineIterator.h"
360b57cec5SDimitry Andric #include "llvm/Support/MD5.h"
370b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
38fe013be4SDimitry Andric #include "llvm/Support/VirtualFileSystem.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include <algorithm>
410b57cec5SDimitry Andric #include <cstddef>
420b57cec5SDimitry Andric #include <cstdint>
430b57cec5SDimitry Andric #include <limits>
440b57cec5SDimitry Andric #include <memory>
450b57cec5SDimitry Andric #include <system_error>
460b57cec5SDimitry Andric #include <vector>
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric using namespace llvm;
490b57cec5SDimitry Andric using namespace sampleprof;
500b57cec5SDimitry Andric 
51fe6060f1SDimitry Andric #define DEBUG_TYPE "samplepgo-reader"
52fe6060f1SDimitry Andric 
53fe6060f1SDimitry Andric // This internal option specifies if the profile uses FS discriminators.
54fe013be4SDimitry Andric // It only applies to text, and binary format profiles.
55fe6060f1SDimitry Andric // For ext-binary format profiles, the flag is set in the summary.
56fe6060f1SDimitry Andric static cl::opt<bool> ProfileIsFSDisciminator(
57fe6060f1SDimitry Andric     "profile-isfs", cl::Hidden, cl::init(false),
58349cc55cSDimitry Andric     cl::desc("Profile uses flow sensitive discriminators"));
59fe6060f1SDimitry Andric 
600b57cec5SDimitry Andric /// Dump the function profile for \p FName.
610b57cec5SDimitry Andric ///
62349cc55cSDimitry Andric /// \param FContext Name + context of the function to print.
630b57cec5SDimitry Andric /// \param OS Stream to emit the output to.
dumpFunctionProfile(const FunctionSamples & FS,raw_ostream & OS)64*c9157d92SDimitry Andric void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
650b57cec5SDimitry Andric                                               raw_ostream &OS) {
66*c9157d92SDimitry Andric   OS << "Function: " << FS.getContext().toString() << ": " << FS;
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)700b57cec5SDimitry Andric void SampleProfileReader::dump(raw_ostream &OS) {
71349cc55cSDimitry Andric   std::vector<NameFunctionSamples> V;
72349cc55cSDimitry Andric   sortFuncProfiles(Profiles, V);
73349cc55cSDimitry Andric   for (const auto &I : V)
74*c9157d92SDimitry Andric     dumpFunctionProfile(*I.second, OS);
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric 
dumpFunctionProfileJson(const FunctionSamples & S,json::OStream & JOS,bool TopLevel=false)77bdd1243dSDimitry Andric static void dumpFunctionProfileJson(const FunctionSamples &S,
78bdd1243dSDimitry Andric                                     json::OStream &JOS, bool TopLevel = false) {
79bdd1243dSDimitry Andric   auto DumpBody = [&](const BodySampleMap &BodySamples) {
80bdd1243dSDimitry Andric     for (const auto &I : BodySamples) {
81bdd1243dSDimitry Andric       const LineLocation &Loc = I.first;
82bdd1243dSDimitry Andric       const SampleRecord &Sample = I.second;
83bdd1243dSDimitry Andric       JOS.object([&] {
84bdd1243dSDimitry Andric         JOS.attribute("line", Loc.LineOffset);
85bdd1243dSDimitry Andric         if (Loc.Discriminator)
86bdd1243dSDimitry Andric           JOS.attribute("discriminator", Loc.Discriminator);
87bdd1243dSDimitry Andric         JOS.attribute("samples", Sample.getSamples());
88bdd1243dSDimitry Andric 
89bdd1243dSDimitry Andric         auto CallTargets = Sample.getSortedCallTargets();
90bdd1243dSDimitry Andric         if (!CallTargets.empty()) {
91bdd1243dSDimitry Andric           JOS.attributeArray("calls", [&] {
92bdd1243dSDimitry Andric             for (const auto &J : CallTargets) {
93bdd1243dSDimitry Andric               JOS.object([&] {
94*c9157d92SDimitry Andric                 JOS.attribute("function", J.first.str());
95bdd1243dSDimitry Andric                 JOS.attribute("samples", J.second);
96bdd1243dSDimitry Andric               });
97bdd1243dSDimitry Andric             }
98bdd1243dSDimitry Andric           });
99bdd1243dSDimitry Andric         }
100bdd1243dSDimitry Andric       });
101bdd1243dSDimitry Andric     }
102bdd1243dSDimitry Andric   };
103bdd1243dSDimitry Andric 
104bdd1243dSDimitry Andric   auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
105bdd1243dSDimitry Andric     for (const auto &I : CallsiteSamples)
106bdd1243dSDimitry Andric       for (const auto &FS : I.second) {
107bdd1243dSDimitry Andric         const LineLocation &Loc = I.first;
108bdd1243dSDimitry Andric         const FunctionSamples &CalleeSamples = FS.second;
109bdd1243dSDimitry Andric         JOS.object([&] {
110bdd1243dSDimitry Andric           JOS.attribute("line", Loc.LineOffset);
111bdd1243dSDimitry Andric           if (Loc.Discriminator)
112bdd1243dSDimitry Andric             JOS.attribute("discriminator", Loc.Discriminator);
113bdd1243dSDimitry Andric           JOS.attributeArray(
114bdd1243dSDimitry Andric               "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
115bdd1243dSDimitry Andric         });
116bdd1243dSDimitry Andric       }
117bdd1243dSDimitry Andric   };
118bdd1243dSDimitry Andric 
119bdd1243dSDimitry Andric   JOS.object([&] {
120*c9157d92SDimitry Andric     JOS.attribute("name", S.getFunction().str());
121bdd1243dSDimitry Andric     JOS.attribute("total", S.getTotalSamples());
122bdd1243dSDimitry Andric     if (TopLevel)
123bdd1243dSDimitry Andric       JOS.attribute("head", S.getHeadSamples());
124bdd1243dSDimitry Andric 
125bdd1243dSDimitry Andric     const auto &BodySamples = S.getBodySamples();
126bdd1243dSDimitry Andric     if (!BodySamples.empty())
127bdd1243dSDimitry Andric       JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
128bdd1243dSDimitry Andric 
129bdd1243dSDimitry Andric     const auto &CallsiteSamples = S.getCallsiteSamples();
130bdd1243dSDimitry Andric     if (!CallsiteSamples.empty())
131bdd1243dSDimitry Andric       JOS.attributeArray("callsites",
132bdd1243dSDimitry Andric                          [&] { DumpCallsiteSamples(CallsiteSamples); });
133bdd1243dSDimitry Andric   });
134bdd1243dSDimitry Andric }
135bdd1243dSDimitry Andric 
136bdd1243dSDimitry Andric /// Dump all the function profiles found on stream \p OS in the JSON format.
dumpJson(raw_ostream & OS)137bdd1243dSDimitry Andric void SampleProfileReader::dumpJson(raw_ostream &OS) {
138bdd1243dSDimitry Andric   std::vector<NameFunctionSamples> V;
139bdd1243dSDimitry Andric   sortFuncProfiles(Profiles, V);
140bdd1243dSDimitry Andric   json::OStream JOS(OS, 2);
141bdd1243dSDimitry Andric   JOS.arrayBegin();
142bdd1243dSDimitry Andric   for (const auto &F : V)
143bdd1243dSDimitry Andric     dumpFunctionProfileJson(*F.second, JOS, true);
144bdd1243dSDimitry Andric   JOS.arrayEnd();
145bdd1243dSDimitry Andric 
146bdd1243dSDimitry Andric   // Emit a newline character at the end as json::OStream doesn't emit one.
147bdd1243dSDimitry Andric   OS << "\n";
148bdd1243dSDimitry Andric }
149bdd1243dSDimitry Andric 
1500b57cec5SDimitry Andric /// Parse \p Input as function head.
1510b57cec5SDimitry Andric ///
1520b57cec5SDimitry Andric /// Parse one line of \p Input, and update function name in \p FName,
1530b57cec5SDimitry Andric /// function's total sample count in \p NumSamples, function's entry
1540b57cec5SDimitry Andric /// count in \p NumHeadSamples.
1550b57cec5SDimitry Andric ///
1560b57cec5SDimitry Andric /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)1570b57cec5SDimitry Andric static bool ParseHead(const StringRef &Input, StringRef &FName,
1580b57cec5SDimitry Andric                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
1590b57cec5SDimitry Andric   if (Input[0] == ' ')
1600b57cec5SDimitry Andric     return false;
1610b57cec5SDimitry Andric   size_t n2 = Input.rfind(':');
1620b57cec5SDimitry Andric   size_t n1 = Input.rfind(':', n2 - 1);
1630b57cec5SDimitry Andric   FName = Input.substr(0, n1);
1640b57cec5SDimitry Andric   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
1650b57cec5SDimitry Andric     return false;
1660b57cec5SDimitry Andric   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
1670b57cec5SDimitry Andric     return false;
1680b57cec5SDimitry Andric   return true;
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)1720b57cec5SDimitry Andric static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
1730b57cec5SDimitry Andric 
174e8d8bef9SDimitry Andric /// Parse \p Input that contains metadata.
175e8d8bef9SDimitry Andric /// Possible metadata:
176e8d8bef9SDimitry Andric /// - CFG Checksum information:
177e8d8bef9SDimitry Andric ///     !CFGChecksum: 12345
178fe6060f1SDimitry Andric /// - CFG Checksum information:
179fe6060f1SDimitry Andric ///     !Attributes: 1
180e8d8bef9SDimitry Andric /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)181fe6060f1SDimitry Andric static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
182fe6060f1SDimitry Andric                           uint32_t &Attributes) {
183*c9157d92SDimitry Andric   if (Input.starts_with("!CFGChecksum:")) {
184e8d8bef9SDimitry Andric     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
185e8d8bef9SDimitry Andric     return !CFGInfo.getAsInteger(10, FunctionHash);
186e8d8bef9SDimitry Andric   }
187e8d8bef9SDimitry Andric 
188*c9157d92SDimitry Andric   if (Input.starts_with("!Attributes:")) {
189fe6060f1SDimitry Andric     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
190fe6060f1SDimitry Andric     return !Attrib.getAsInteger(10, Attributes);
191fe6060f1SDimitry Andric   }
192fe6060f1SDimitry Andric 
193fe6060f1SDimitry Andric   return false;
194fe6060f1SDimitry Andric }
195fe6060f1SDimitry Andric 
196e8d8bef9SDimitry Andric enum class LineType {
197e8d8bef9SDimitry Andric   CallSiteProfile,
198e8d8bef9SDimitry Andric   BodyProfile,
199e8d8bef9SDimitry Andric   Metadata,
200e8d8bef9SDimitry Andric };
201e8d8bef9SDimitry Andric 
2020b57cec5SDimitry Andric /// Parse \p Input as line sample.
2030b57cec5SDimitry Andric ///
2040b57cec5SDimitry Andric /// \param Input input line.
205e8d8bef9SDimitry Andric /// \param LineTy Type of this line.
2060b57cec5SDimitry Andric /// \param Depth the depth of the inline stack.
2070b57cec5SDimitry Andric /// \param NumSamples total samples of the line/inlined callsite.
2080b57cec5SDimitry Andric /// \param LineOffset line offset to the start of the function.
2090b57cec5SDimitry Andric /// \param Discriminator discriminator of the line.
2100b57cec5SDimitry Andric /// \param TargetCountMap map from indirect call target to count.
211e8d8bef9SDimitry Andric /// \param FunctionHash the function's CFG hash, used by pseudo probe.
2120b57cec5SDimitry Andric ///
2130b57cec5SDimitry Andric /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)214e8d8bef9SDimitry Andric static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
2150b57cec5SDimitry Andric                       uint64_t &NumSamples, uint32_t &LineOffset,
2160b57cec5SDimitry Andric                       uint32_t &Discriminator, StringRef &CalleeName,
217e8d8bef9SDimitry Andric                       DenseMap<StringRef, uint64_t> &TargetCountMap,
218fe6060f1SDimitry Andric                       uint64_t &FunctionHash, uint32_t &Attributes) {
2190b57cec5SDimitry Andric   for (Depth = 0; Input[Depth] == ' '; Depth++)
2200b57cec5SDimitry Andric     ;
2210b57cec5SDimitry Andric   if (Depth == 0)
2220b57cec5SDimitry Andric     return false;
2230b57cec5SDimitry Andric 
2240eae32dcSDimitry Andric   if (Input[Depth] == '!') {
225e8d8bef9SDimitry Andric     LineTy = LineType::Metadata;
226fe6060f1SDimitry Andric     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
227e8d8bef9SDimitry Andric   }
228e8d8bef9SDimitry Andric 
2290b57cec5SDimitry Andric   size_t n1 = Input.find(':');
2300b57cec5SDimitry Andric   StringRef Loc = Input.substr(Depth, n1 - Depth);
2310b57cec5SDimitry Andric   size_t n2 = Loc.find('.');
2320b57cec5SDimitry Andric   if (n2 == StringRef::npos) {
2330b57cec5SDimitry Andric     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
2340b57cec5SDimitry Andric       return false;
2350b57cec5SDimitry Andric     Discriminator = 0;
2360b57cec5SDimitry Andric   } else {
2370b57cec5SDimitry Andric     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
2380b57cec5SDimitry Andric       return false;
2390b57cec5SDimitry Andric     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
2400b57cec5SDimitry Andric       return false;
2410b57cec5SDimitry Andric   }
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   StringRef Rest = Input.substr(n1 + 2);
244e8d8bef9SDimitry Andric   if (isDigit(Rest[0])) {
245e8d8bef9SDimitry Andric     LineTy = LineType::BodyProfile;
2460b57cec5SDimitry Andric     size_t n3 = Rest.find(' ');
2470b57cec5SDimitry Andric     if (n3 == StringRef::npos) {
2480b57cec5SDimitry Andric       if (Rest.getAsInteger(10, NumSamples))
2490b57cec5SDimitry Andric         return false;
2500b57cec5SDimitry Andric     } else {
2510b57cec5SDimitry Andric       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
2520b57cec5SDimitry Andric         return false;
2530b57cec5SDimitry Andric     }
2540b57cec5SDimitry Andric     // Find call targets and their sample counts.
2550b57cec5SDimitry Andric     // Note: In some cases, there are symbols in the profile which are not
2560b57cec5SDimitry Andric     // mangled. To accommodate such cases, use colon + integer pairs as the
2570b57cec5SDimitry Andric     // anchor points.
2580b57cec5SDimitry Andric     // An example:
2590b57cec5SDimitry Andric     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
2600b57cec5SDimitry Andric     // ":1000" and ":437" are used as anchor points so the string above will
2610b57cec5SDimitry Andric     // be interpreted as
2620b57cec5SDimitry Andric     // target: _M_construct<char *>
2630b57cec5SDimitry Andric     // count: 1000
2640b57cec5SDimitry Andric     // target: string_view<std::allocator<char> >
2650b57cec5SDimitry Andric     // count: 437
2660b57cec5SDimitry Andric     while (n3 != StringRef::npos) {
2670b57cec5SDimitry Andric       n3 += Rest.substr(n3).find_first_not_of(' ');
2680b57cec5SDimitry Andric       Rest = Rest.substr(n3);
2690b57cec5SDimitry Andric       n3 = Rest.find_first_of(':');
2700b57cec5SDimitry Andric       if (n3 == StringRef::npos || n3 == 0)
2710b57cec5SDimitry Andric         return false;
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric       StringRef Target;
2740b57cec5SDimitry Andric       uint64_t count, n4;
2750b57cec5SDimitry Andric       while (true) {
2760b57cec5SDimitry Andric         // Get the segment after the current colon.
2770b57cec5SDimitry Andric         StringRef AfterColon = Rest.substr(n3 + 1);
2780b57cec5SDimitry Andric         // Get the target symbol before the current colon.
2790b57cec5SDimitry Andric         Target = Rest.substr(0, n3);
2800b57cec5SDimitry Andric         // Check if the word after the current colon is an integer.
2810b57cec5SDimitry Andric         n4 = AfterColon.find_first_of(' ');
2820b57cec5SDimitry Andric         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
2830b57cec5SDimitry Andric         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
2840b57cec5SDimitry Andric         if (!WordAfterColon.getAsInteger(10, count))
2850b57cec5SDimitry Andric           break;
2860b57cec5SDimitry Andric 
2870b57cec5SDimitry Andric         // Try to find the next colon.
2880b57cec5SDimitry Andric         uint64_t n5 = AfterColon.find_first_of(':');
2890b57cec5SDimitry Andric         if (n5 == StringRef::npos)
2900b57cec5SDimitry Andric           return false;
2910b57cec5SDimitry Andric         n3 += n5 + 1;
2920b57cec5SDimitry Andric       }
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric       // An anchor point is found. Save the {target, count} pair
2950b57cec5SDimitry Andric       TargetCountMap[Target] = count;
2960b57cec5SDimitry Andric       if (n4 == Rest.size())
2970b57cec5SDimitry Andric         break;
2980b57cec5SDimitry Andric       // Change n3 to the next blank space after colon + integer pair.
2990b57cec5SDimitry Andric       n3 = n4;
3000b57cec5SDimitry Andric     }
3010b57cec5SDimitry Andric   } else {
302e8d8bef9SDimitry Andric     LineTy = LineType::CallSiteProfile;
3030b57cec5SDimitry Andric     size_t n3 = Rest.find_last_of(':');
3040b57cec5SDimitry Andric     CalleeName = Rest.substr(0, n3);
3050b57cec5SDimitry Andric     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
3060b57cec5SDimitry Andric       return false;
3070b57cec5SDimitry Andric   }
3080b57cec5SDimitry Andric   return true;
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric /// Load samples from a text file.
3120b57cec5SDimitry Andric ///
3130b57cec5SDimitry Andric /// See the documentation at the top of the file for an explanation of
3140b57cec5SDimitry Andric /// the expected format.
3150b57cec5SDimitry Andric ///
3160b57cec5SDimitry Andric /// \returns true if the file was loaded successfully, false otherwise.
readImpl()3178bcb0991SDimitry Andric std::error_code SampleProfileReaderText::readImpl() {
3180b57cec5SDimitry Andric   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
3190b57cec5SDimitry Andric   sampleprof_error Result = sampleprof_error::success;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   InlineCallStack InlineStack;
3220eae32dcSDimitry Andric   uint32_t TopLevelProbeProfileCount = 0;
323e8d8bef9SDimitry Andric 
3240eae32dcSDimitry Andric   // DepthMetadata tracks whether we have processed metadata for the current
3250eae32dcSDimitry Andric   // top-level or nested function profile.
3260eae32dcSDimitry Andric   uint32_t DepthMetadata = 0;
3270b57cec5SDimitry Andric 
328fe6060f1SDimitry Andric   ProfileIsFS = ProfileIsFSDisciminator;
329349cc55cSDimitry Andric   FunctionSamples::ProfileIsFS = ProfileIsFS;
3300b57cec5SDimitry Andric   for (; !LineIt.is_at_eof(); ++LineIt) {
331fe013be4SDimitry Andric     size_t pos = LineIt->find_first_not_of(' ');
332fe013be4SDimitry Andric     if (pos == LineIt->npos || (*LineIt)[pos] == '#')
3330b57cec5SDimitry Andric       continue;
3340b57cec5SDimitry Andric     // Read the header of each function.
3350b57cec5SDimitry Andric     //
3360b57cec5SDimitry Andric     // Note that for function identifiers we are actually expecting
3370b57cec5SDimitry Andric     // mangled names, but we may not always get them. This happens when
3380b57cec5SDimitry Andric     // the compiler decides not to emit the function (e.g., it was inlined
3390b57cec5SDimitry Andric     // and removed). In this case, the binary will not have the linkage
3400b57cec5SDimitry Andric     // name for the function, so the profiler will emit the function's
3410b57cec5SDimitry Andric     // unmangled name, which may contain characters like ':' and '>' in its
3420b57cec5SDimitry Andric     // name (member functions, templates, etc).
3430b57cec5SDimitry Andric     //
3440b57cec5SDimitry Andric     // The only requirement we place on the identifier, then, is that it
3450b57cec5SDimitry Andric     // should not begin with a number.
3460b57cec5SDimitry Andric     if ((*LineIt)[0] != ' ') {
3470b57cec5SDimitry Andric       uint64_t NumSamples, NumHeadSamples;
3480b57cec5SDimitry Andric       StringRef FName;
3490b57cec5SDimitry Andric       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
3500b57cec5SDimitry Andric         reportError(LineIt.line_number(),
3510b57cec5SDimitry Andric                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
3520b57cec5SDimitry Andric         return sampleprof_error::malformed;
3530b57cec5SDimitry Andric       }
3540eae32dcSDimitry Andric       DepthMetadata = 0;
355349cc55cSDimitry Andric       SampleContext FContext(FName, CSNameTable);
356e8d8bef9SDimitry Andric       if (FContext.hasContext())
357e8d8bef9SDimitry Andric         ++CSProfileCount;
358*c9157d92SDimitry Andric       FunctionSamples &FProfile = Profiles.Create(FContext);
3590b57cec5SDimitry Andric       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
3600b57cec5SDimitry Andric       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
3610b57cec5SDimitry Andric       InlineStack.clear();
3620b57cec5SDimitry Andric       InlineStack.push_back(&FProfile);
3630b57cec5SDimitry Andric     } else {
3640b57cec5SDimitry Andric       uint64_t NumSamples;
3650b57cec5SDimitry Andric       StringRef FName;
3660b57cec5SDimitry Andric       DenseMap<StringRef, uint64_t> TargetCountMap;
3670b57cec5SDimitry Andric       uint32_t Depth, LineOffset, Discriminator;
368e8d8bef9SDimitry Andric       LineType LineTy;
369fe6060f1SDimitry Andric       uint64_t FunctionHash = 0;
370fe6060f1SDimitry Andric       uint32_t Attributes = 0;
371e8d8bef9SDimitry Andric       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
372fe6060f1SDimitry Andric                      Discriminator, FName, TargetCountMap, FunctionHash,
373fe6060f1SDimitry Andric                      Attributes)) {
3740b57cec5SDimitry Andric         reportError(LineIt.line_number(),
3750b57cec5SDimitry Andric                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
3760b57cec5SDimitry Andric                         *LineIt);
3770b57cec5SDimitry Andric         return sampleprof_error::malformed;
3780b57cec5SDimitry Andric       }
3790eae32dcSDimitry Andric       if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
380e8d8bef9SDimitry Andric         // Metadata must be put at the end of a function profile.
381e8d8bef9SDimitry Andric         reportError(LineIt.line_number(),
382e8d8bef9SDimitry Andric                     "Found non-metadata after metadata: " + *LineIt);
383e8d8bef9SDimitry Andric         return sampleprof_error::malformed;
384e8d8bef9SDimitry Andric       }
385fe6060f1SDimitry Andric 
386fe6060f1SDimitry Andric       // Here we handle FS discriminators.
387fe6060f1SDimitry Andric       Discriminator &= getDiscriminatorMask();
388fe6060f1SDimitry Andric 
3890b57cec5SDimitry Andric       while (InlineStack.size() > Depth) {
3900b57cec5SDimitry Andric         InlineStack.pop_back();
3910b57cec5SDimitry Andric       }
392e8d8bef9SDimitry Andric       switch (LineTy) {
393e8d8bef9SDimitry Andric       case LineType::CallSiteProfile: {
3940b57cec5SDimitry Andric         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
395*c9157d92SDimitry Andric             LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
396*c9157d92SDimitry Andric         FSamples.setFunction(FunctionId(FName));
3970b57cec5SDimitry Andric         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
3980b57cec5SDimitry Andric         InlineStack.push_back(&FSamples);
3990eae32dcSDimitry Andric         DepthMetadata = 0;
400e8d8bef9SDimitry Andric         break;
401e8d8bef9SDimitry Andric       }
402e8d8bef9SDimitry Andric       case LineType::BodyProfile: {
4030b57cec5SDimitry Andric         while (InlineStack.size() > Depth) {
4040b57cec5SDimitry Andric           InlineStack.pop_back();
4050b57cec5SDimitry Andric         }
4060b57cec5SDimitry Andric         FunctionSamples &FProfile = *InlineStack.back();
4070b57cec5SDimitry Andric         for (const auto &name_count : TargetCountMap) {
4080b57cec5SDimitry Andric           MergeResult(Result, FProfile.addCalledTargetSamples(
409*c9157d92SDimitry Andric                                   LineOffset, Discriminator,
410*c9157d92SDimitry Andric                                   FunctionId(name_count.first),
4110b57cec5SDimitry Andric                                   name_count.second));
4120b57cec5SDimitry Andric         }
4130b57cec5SDimitry Andric         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
4140b57cec5SDimitry Andric                                                     NumSamples));
415e8d8bef9SDimitry Andric         break;
416e8d8bef9SDimitry Andric       }
417e8d8bef9SDimitry Andric       case LineType::Metadata: {
418e8d8bef9SDimitry Andric         FunctionSamples &FProfile = *InlineStack.back();
419fe6060f1SDimitry Andric         if (FunctionHash) {
420e8d8bef9SDimitry Andric           FProfile.setFunctionHash(FunctionHash);
4210eae32dcSDimitry Andric           if (Depth == 1)
4220eae32dcSDimitry Andric             ++TopLevelProbeProfileCount;
423fe6060f1SDimitry Andric         }
424fe6060f1SDimitry Andric         FProfile.getContext().setAllAttributes(Attributes);
4250eae32dcSDimitry Andric         if (Attributes & (uint32_t)ContextShouldBeInlined)
42681ad6265SDimitry Andric           ProfileIsPreInlined = true;
4270eae32dcSDimitry Andric         DepthMetadata = Depth;
428e8d8bef9SDimitry Andric         break;
4290b57cec5SDimitry Andric       }
4300b57cec5SDimitry Andric       }
4310b57cec5SDimitry Andric     }
432e8d8bef9SDimitry Andric   }
433e8d8bef9SDimitry Andric 
434d409305fSDimitry Andric   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
435e8d8bef9SDimitry Andric          "Cannot have both context-sensitive and regular profile");
43681ad6265SDimitry Andric   ProfileIsCS = (CSProfileCount > 0);
4370eae32dcSDimitry Andric   assert((TopLevelProbeProfileCount == 0 ||
4380eae32dcSDimitry Andric           TopLevelProbeProfileCount == Profiles.size()) &&
439e8d8bef9SDimitry Andric          "Cannot have both probe-based profiles and regular profiles");
4400eae32dcSDimitry Andric   ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
441e8d8bef9SDimitry Andric   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
44281ad6265SDimitry Andric   FunctionSamples::ProfileIsCS = ProfileIsCS;
44381ad6265SDimitry Andric   FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
444e8d8bef9SDimitry Andric 
4450b57cec5SDimitry Andric   if (Result == sampleprof_error::success)
4460b57cec5SDimitry Andric     computeSummary();
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric   return Result;
4490b57cec5SDimitry Andric }
4500b57cec5SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)4510b57cec5SDimitry Andric bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
4520b57cec5SDimitry Andric   bool result = false;
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   // Check that the first non-comment line is a valid function header.
4550b57cec5SDimitry Andric   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
4560b57cec5SDimitry Andric   if (!LineIt.is_at_eof()) {
4570b57cec5SDimitry Andric     if ((*LineIt)[0] != ' ') {
4580b57cec5SDimitry Andric       uint64_t NumSamples, NumHeadSamples;
4590b57cec5SDimitry Andric       StringRef FName;
4600b57cec5SDimitry Andric       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
4610b57cec5SDimitry Andric     }
4620b57cec5SDimitry Andric   }
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   return result;
4650b57cec5SDimitry Andric }
4660b57cec5SDimitry Andric 
readNumber()4670b57cec5SDimitry Andric template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
4680b57cec5SDimitry Andric   unsigned NumBytesRead = 0;
4690b57cec5SDimitry Andric   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
4700b57cec5SDimitry Andric 
471*c9157d92SDimitry Andric   if (Val > std::numeric_limits<T>::max()) {
472*c9157d92SDimitry Andric     std::error_code EC = sampleprof_error::malformed;
473*c9157d92SDimitry Andric     reportError(0, EC.message());
474*c9157d92SDimitry Andric     return EC;
475*c9157d92SDimitry Andric   } else if (Data + NumBytesRead > End) {
476*c9157d92SDimitry Andric     std::error_code EC = sampleprof_error::truncated;
4770b57cec5SDimitry Andric     reportError(0, EC.message());
4780b57cec5SDimitry Andric     return EC;
4790b57cec5SDimitry Andric   }
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric   Data += NumBytesRead;
4820b57cec5SDimitry Andric   return static_cast<T>(Val);
4830b57cec5SDimitry Andric }
4840b57cec5SDimitry Andric 
readString()4850b57cec5SDimitry Andric ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
4860b57cec5SDimitry Andric   StringRef Str(reinterpret_cast<const char *>(Data));
4870b57cec5SDimitry Andric   if (Data + Str.size() + 1 > End) {
488*c9157d92SDimitry Andric     std::error_code EC = sampleprof_error::truncated;
4890b57cec5SDimitry Andric     reportError(0, EC.message());
4900b57cec5SDimitry Andric     return EC;
4910b57cec5SDimitry Andric   }
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric   Data += Str.size() + 1;
4940b57cec5SDimitry Andric   return Str;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric template <typename T>
readUnencodedNumber()4980b57cec5SDimitry Andric ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
4990b57cec5SDimitry Andric   if (Data + sizeof(T) > End) {
500*c9157d92SDimitry Andric     std::error_code EC = sampleprof_error::truncated;
5010b57cec5SDimitry Andric     reportError(0, EC.message());
5020b57cec5SDimitry Andric     return EC;
5030b57cec5SDimitry Andric   }
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric   using namespace support;
506*c9157d92SDimitry Andric   T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data);
5070b57cec5SDimitry Andric   return Val;
5080b57cec5SDimitry Andric }
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric template <typename T>
readStringIndex(T & Table)511fe013be4SDimitry Andric inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
512fe013be4SDimitry Andric   auto Idx = readNumber<size_t>();
5130b57cec5SDimitry Andric   if (std::error_code EC = Idx.getError())
5140b57cec5SDimitry Andric     return EC;
5150b57cec5SDimitry Andric   if (*Idx >= Table.size())
5160b57cec5SDimitry Andric     return sampleprof_error::truncated_name_table;
5170b57cec5SDimitry Andric   return *Idx;
5180b57cec5SDimitry Andric }
5190b57cec5SDimitry Andric 
520*c9157d92SDimitry Andric ErrorOr<FunctionId>
readStringFromTable(size_t * RetIdx)521*c9157d92SDimitry Andric SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
5220b57cec5SDimitry Andric   auto Idx = readStringIndex(NameTable);
5230b57cec5SDimitry Andric   if (std::error_code EC = Idx.getError())
5240b57cec5SDimitry Andric     return EC;
525*c9157d92SDimitry Andric   if (RetIdx)
526*c9157d92SDimitry Andric     *RetIdx = *Idx;
527*c9157d92SDimitry Andric   return NameTable[*Idx];
528fe013be4SDimitry Andric }
529fe013be4SDimitry Andric 
530*c9157d92SDimitry Andric ErrorOr<SampleContextFrames>
readContextFromTable(size_t * RetIdx)531*c9157d92SDimitry Andric SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
532fe013be4SDimitry Andric   auto ContextIdx = readNumber<size_t>();
533fe013be4SDimitry Andric   if (std::error_code EC = ContextIdx.getError())
534fe013be4SDimitry Andric     return EC;
535fe013be4SDimitry Andric   if (*ContextIdx >= CSNameTable.size())
536fe013be4SDimitry Andric     return sampleprof_error::truncated_name_table;
537*c9157d92SDimitry Andric   if (RetIdx)
538*c9157d92SDimitry Andric     *RetIdx = *ContextIdx;
539fe013be4SDimitry Andric   return CSNameTable[*ContextIdx];
5400b57cec5SDimitry Andric }
5410b57cec5SDimitry Andric 
542*c9157d92SDimitry Andric ErrorOr<std::pair<SampleContext, uint64_t>>
readSampleContextFromTable()543*c9157d92SDimitry Andric SampleProfileReaderBinary::readSampleContextFromTable() {
544*c9157d92SDimitry Andric   SampleContext Context;
545*c9157d92SDimitry Andric   size_t Idx;
546fe013be4SDimitry Andric   if (ProfileIsCS) {
547*c9157d92SDimitry Andric     auto FContext(readContextFromTable(&Idx));
548fe013be4SDimitry Andric     if (std::error_code EC = FContext.getError())
549fe013be4SDimitry Andric       return EC;
550*c9157d92SDimitry Andric     Context = SampleContext(*FContext);
551fe013be4SDimitry Andric   } else {
552*c9157d92SDimitry Andric     auto FName(readStringFromTable(&Idx));
553349cc55cSDimitry Andric     if (std::error_code EC = FName.getError())
554349cc55cSDimitry Andric       return EC;
555*c9157d92SDimitry Andric     Context = SampleContext(*FName);
556349cc55cSDimitry Andric   }
557*c9157d92SDimitry Andric   // Since MD5SampleContextStart may point to the profile's file data, need to
558*c9157d92SDimitry Andric   // make sure it is reading the same value on big endian CPU.
559*c9157d92SDimitry Andric   uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx);
560*c9157d92SDimitry Andric   // Lazy computing of hash value, write back to the table to cache it. Only
561*c9157d92SDimitry Andric   // compute the context's hash value if it is being referenced for the first
562*c9157d92SDimitry Andric   // time.
563*c9157d92SDimitry Andric   if (Hash == 0) {
564*c9157d92SDimitry Andric     assert(MD5SampleContextStart == MD5SampleContextTable.data());
565*c9157d92SDimitry Andric     Hash = Context.getHashCode();
566*c9157d92SDimitry Andric     support::endian::write64le(&MD5SampleContextTable[Idx], Hash);
567*c9157d92SDimitry Andric   }
568*c9157d92SDimitry Andric   return std::make_pair(Context, Hash);
5690b57cec5SDimitry Andric }
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric std::error_code
readProfile(FunctionSamples & FProfile)5720b57cec5SDimitry Andric SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
5730b57cec5SDimitry Andric   auto NumSamples = readNumber<uint64_t>();
5740b57cec5SDimitry Andric   if (std::error_code EC = NumSamples.getError())
5750b57cec5SDimitry Andric     return EC;
5760b57cec5SDimitry Andric   FProfile.addTotalSamples(*NumSamples);
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric   // Read the samples in the body.
5790b57cec5SDimitry Andric   auto NumRecords = readNumber<uint32_t>();
5800b57cec5SDimitry Andric   if (std::error_code EC = NumRecords.getError())
5810b57cec5SDimitry Andric     return EC;
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric   for (uint32_t I = 0; I < *NumRecords; ++I) {
5840b57cec5SDimitry Andric     auto LineOffset = readNumber<uint64_t>();
5850b57cec5SDimitry Andric     if (std::error_code EC = LineOffset.getError())
5860b57cec5SDimitry Andric       return EC;
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric     if (!isOffsetLegal(*LineOffset)) {
5890b57cec5SDimitry Andric       return std::error_code();
5900b57cec5SDimitry Andric     }
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric     auto Discriminator = readNumber<uint64_t>();
5930b57cec5SDimitry Andric     if (std::error_code EC = Discriminator.getError())
5940b57cec5SDimitry Andric       return EC;
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric     auto NumSamples = readNumber<uint64_t>();
5970b57cec5SDimitry Andric     if (std::error_code EC = NumSamples.getError())
5980b57cec5SDimitry Andric       return EC;
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric     auto NumCalls = readNumber<uint32_t>();
6010b57cec5SDimitry Andric     if (std::error_code EC = NumCalls.getError())
6020b57cec5SDimitry Andric       return EC;
6030b57cec5SDimitry Andric 
604fe6060f1SDimitry Andric     // Here we handle FS discriminators:
605fe6060f1SDimitry Andric     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
606fe6060f1SDimitry Andric 
6070b57cec5SDimitry Andric     for (uint32_t J = 0; J < *NumCalls; ++J) {
6080b57cec5SDimitry Andric       auto CalledFunction(readStringFromTable());
6090b57cec5SDimitry Andric       if (std::error_code EC = CalledFunction.getError())
6100b57cec5SDimitry Andric         return EC;
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric       auto CalledFunctionSamples = readNumber<uint64_t>();
6130b57cec5SDimitry Andric       if (std::error_code EC = CalledFunctionSamples.getError())
6140b57cec5SDimitry Andric         return EC;
6150b57cec5SDimitry Andric 
616fe6060f1SDimitry Andric       FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
6170b57cec5SDimitry Andric                                       *CalledFunction, *CalledFunctionSamples);
6180b57cec5SDimitry Andric     }
6190b57cec5SDimitry Andric 
620fe6060f1SDimitry Andric     FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric   // Read all the samples for inlined function calls.
6240b57cec5SDimitry Andric   auto NumCallsites = readNumber<uint32_t>();
6250b57cec5SDimitry Andric   if (std::error_code EC = NumCallsites.getError())
6260b57cec5SDimitry Andric     return EC;
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   for (uint32_t J = 0; J < *NumCallsites; ++J) {
6290b57cec5SDimitry Andric     auto LineOffset = readNumber<uint64_t>();
6300b57cec5SDimitry Andric     if (std::error_code EC = LineOffset.getError())
6310b57cec5SDimitry Andric       return EC;
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric     auto Discriminator = readNumber<uint64_t>();
6340b57cec5SDimitry Andric     if (std::error_code EC = Discriminator.getError())
6350b57cec5SDimitry Andric       return EC;
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric     auto FName(readStringFromTable());
6380b57cec5SDimitry Andric     if (std::error_code EC = FName.getError())
6390b57cec5SDimitry Andric       return EC;
6400b57cec5SDimitry Andric 
641fe6060f1SDimitry Andric     // Here we handle FS discriminators:
642fe6060f1SDimitry Andric     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
643fe6060f1SDimitry Andric 
6440b57cec5SDimitry Andric     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
645*c9157d92SDimitry Andric         LineLocation(*LineOffset, DiscriminatorVal))[*FName];
646*c9157d92SDimitry Andric     CalleeProfile.setFunction(*FName);
6470b57cec5SDimitry Andric     if (std::error_code EC = readProfile(CalleeProfile))
6480b57cec5SDimitry Andric       return EC;
6490b57cec5SDimitry Andric   }
6500b57cec5SDimitry Andric 
6510b57cec5SDimitry Andric   return sampleprof_error::success;
6520b57cec5SDimitry Andric }
6530b57cec5SDimitry Andric 
6548bcb0991SDimitry Andric std::error_code
readFuncProfile(const uint8_t * Start)6558bcb0991SDimitry Andric SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
6568bcb0991SDimitry Andric   Data = Start;
6570b57cec5SDimitry Andric   auto NumHeadSamples = readNumber<uint64_t>();
6580b57cec5SDimitry Andric   if (std::error_code EC = NumHeadSamples.getError())
6590b57cec5SDimitry Andric     return EC;
6600b57cec5SDimitry Andric 
661*c9157d92SDimitry Andric   auto FContextHash(readSampleContextFromTable());
662*c9157d92SDimitry Andric   if (std::error_code EC = FContextHash.getError())
6630b57cec5SDimitry Andric     return EC;
6640b57cec5SDimitry Andric 
665*c9157d92SDimitry Andric   auto &[FContext, Hash] = *FContextHash;
666*c9157d92SDimitry Andric   // Use the cached hash value for insertion instead of recalculating it.
667*c9157d92SDimitry Andric   auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
668*c9157d92SDimitry Andric   FunctionSamples &FProfile = Res.first->second;
669*c9157d92SDimitry Andric   FProfile.setContext(FContext);
6700b57cec5SDimitry Andric   FProfile.addHeadSamples(*NumHeadSamples);
6710b57cec5SDimitry Andric 
672*c9157d92SDimitry Andric   if (FContext.hasContext())
673d409305fSDimitry Andric     CSProfileCount++;
674d409305fSDimitry Andric 
6750b57cec5SDimitry Andric   if (std::error_code EC = readProfile(FProfile))
6760b57cec5SDimitry Andric     return EC;
6770b57cec5SDimitry Andric   return sampleprof_error::success;
6780b57cec5SDimitry Andric }
6790b57cec5SDimitry Andric 
readImpl()6808bcb0991SDimitry Andric std::error_code SampleProfileReaderBinary::readImpl() {
681fe6060f1SDimitry Andric   ProfileIsFS = ProfileIsFSDisciminator;
682349cc55cSDimitry Andric   FunctionSamples::ProfileIsFS = ProfileIsFS;
683fe013be4SDimitry Andric   while (Data < End) {
6848bcb0991SDimitry Andric     if (std::error_code EC = readFuncProfile(Data))
6850b57cec5SDimitry Andric       return EC;
6860b57cec5SDimitry Andric   }
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric   return sampleprof_error::success;
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric 
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)691e8d8bef9SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
6925ffd83dbSDimitry Andric     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
6938bcb0991SDimitry Andric   Data = Start;
6948bcb0991SDimitry Andric   End = Start + Size;
6955ffd83dbSDimitry Andric   switch (Entry.Type) {
6968bcb0991SDimitry Andric   case SecProfSummary:
6978bcb0991SDimitry Andric     if (std::error_code EC = readSummary())
6988bcb0991SDimitry Andric       return EC;
6995ffd83dbSDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
7005ffd83dbSDimitry Andric       Summary->setPartialProfile(true);
701fe6060f1SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
70281ad6265SDimitry Andric       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
70381ad6265SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
70481ad6265SDimitry Andric       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
705fe6060f1SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
706fe6060f1SDimitry Andric       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
7078bcb0991SDimitry Andric     break;
708e8d8bef9SDimitry Andric   case SecNameTable: {
709fe013be4SDimitry Andric     bool FixedLengthMD5 =
710e8d8bef9SDimitry Andric         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
711e8d8bef9SDimitry Andric     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
712fe013be4SDimitry Andric     // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
713fe013be4SDimitry Andric     // profile uses MD5 for function name matching in IPO passes.
714fe013be4SDimitry Andric     ProfileIsMD5 = ProfileIsMD5 || UseMD5;
715fe6060f1SDimitry Andric     FunctionSamples::HasUniqSuffix =
716fe6060f1SDimitry Andric         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
717fe013be4SDimitry Andric     if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5))
7188bcb0991SDimitry Andric       return EC;
7198bcb0991SDimitry Andric     break;
720e8d8bef9SDimitry Andric   }
721349cc55cSDimitry Andric   case SecCSNameTable: {
722349cc55cSDimitry Andric     if (std::error_code EC = readCSNameTableSec())
723349cc55cSDimitry Andric       return EC;
724349cc55cSDimitry Andric     break;
725349cc55cSDimitry Andric   }
7268bcb0991SDimitry Andric   case SecLBRProfile:
7278bcb0991SDimitry Andric     if (std::error_code EC = readFuncProfiles())
7288bcb0991SDimitry Andric       return EC;
7298bcb0991SDimitry Andric     break;
7308bcb0991SDimitry Andric   case SecFuncOffsetTable:
731fe013be4SDimitry Andric     // If module is absent, we are using LLVM tools, and need to read all
732fe013be4SDimitry Andric     // profiles, so skip reading the function offset table.
733fe013be4SDimitry Andric     if (!M) {
734fe013be4SDimitry Andric       Data = End;
735fe013be4SDimitry Andric     } else {
736fe013be4SDimitry Andric       assert((!ProfileIsCS ||
737fe013be4SDimitry Andric               hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
738fe013be4SDimitry Andric              "func offset table should always be sorted in CS profile");
7398bcb0991SDimitry Andric       if (std::error_code EC = readFuncOffsetTable())
7408bcb0991SDimitry Andric         return EC;
741fe013be4SDimitry Andric     }
7428bcb0991SDimitry Andric     break;
743fe6060f1SDimitry Andric   case SecFuncMetadata: {
744e8d8bef9SDimitry Andric     ProfileIsProbeBased =
745e8d8bef9SDimitry Andric         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
746e8d8bef9SDimitry Andric     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
747fe6060f1SDimitry Andric     bool HasAttribute =
748fe6060f1SDimitry Andric         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
749fe6060f1SDimitry Andric     if (std::error_code EC = readFuncMetadata(HasAttribute))
750e8d8bef9SDimitry Andric       return EC;
751e8d8bef9SDimitry Andric     break;
752fe6060f1SDimitry Andric   }
753e8d8bef9SDimitry Andric   case SecProfileSymbolList:
754e8d8bef9SDimitry Andric     if (std::error_code EC = readProfileSymbolList())
755e8d8bef9SDimitry Andric       return EC;
756e8d8bef9SDimitry Andric     break;
7578bcb0991SDimitry Andric   default:
758e8d8bef9SDimitry Andric     if (std::error_code EC = readCustomSection(Entry))
759e8d8bef9SDimitry Andric       return EC;
7608bcb0991SDimitry Andric     break;
7618bcb0991SDimitry Andric   }
7628bcb0991SDimitry Andric   return sampleprof_error::success;
7638bcb0991SDimitry Andric }
7648bcb0991SDimitry Andric 
useFuncOffsetList() const765fe013be4SDimitry Andric bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
766fe013be4SDimitry Andric   // If profile is CS, the function offset section is expected to consist of
767fe013be4SDimitry Andric   // sequences of contexts in pre-order layout
768fe013be4SDimitry Andric   // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
769fe013be4SDimitry Andric   // context in the module is found, the profiles of all its callees are
770fe013be4SDimitry Andric   // recursively loaded. A list is needed since the order of profiles matters.
771fe013be4SDimitry Andric   if (ProfileIsCS)
772fe013be4SDimitry Andric     return true;
773fe013be4SDimitry Andric 
774fe013be4SDimitry Andric   // If the profile is MD5, use the map container to lookup functions in
775fe013be4SDimitry Andric   // the module. A remapper has no use on MD5 names.
776fe013be4SDimitry Andric   if (useMD5())
777fe013be4SDimitry Andric     return false;
778fe013be4SDimitry Andric 
779fe013be4SDimitry Andric   // Profile is not MD5 and if a remapper is present, the remapped name of
780fe013be4SDimitry Andric   // every function needed to be matched against the module, so use the list
781fe013be4SDimitry Andric   // container since each entry is accessed.
782fe013be4SDimitry Andric   if (Remapper)
783fe013be4SDimitry Andric     return true;
784fe013be4SDimitry Andric 
785fe013be4SDimitry Andric   // Otherwise use the map container for faster lookup.
786fe013be4SDimitry Andric   // TODO: If the cardinality of the function offset section is much smaller
787fe013be4SDimitry Andric   // than the number of functions in the module, using the list container can
788fe013be4SDimitry Andric   // be always faster, but we need to figure out the constant factor to
789fe013be4SDimitry Andric   // determine the cutoff.
790fe013be4SDimitry Andric   return false;
791fe013be4SDimitry Andric }
792fe013be4SDimitry Andric 
793fe013be4SDimitry Andric 
collectFuncsFromModule()794fe6060f1SDimitry Andric bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
795fe6060f1SDimitry Andric   if (!M)
796fe6060f1SDimitry Andric     return false;
7978bcb0991SDimitry Andric   FuncsToUse.clear();
798fe6060f1SDimitry Andric   for (auto &F : *M)
7998bcb0991SDimitry Andric     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
800fe6060f1SDimitry Andric   return true;
8018bcb0991SDimitry Andric }
8028bcb0991SDimitry Andric 
readFuncOffsetTable()803e8d8bef9SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
804fe013be4SDimitry Andric   // If there are more than one function offset section, the profile associated
805fe013be4SDimitry Andric   // with the previous section has to be done reading before next one is read.
806e8d8bef9SDimitry Andric   FuncOffsetTable.clear();
807fe013be4SDimitry Andric   FuncOffsetList.clear();
808e8d8bef9SDimitry Andric 
8098bcb0991SDimitry Andric   auto Size = readNumber<uint64_t>();
8108bcb0991SDimitry Andric   if (std::error_code EC = Size.getError())
8118bcb0991SDimitry Andric     return EC;
8128bcb0991SDimitry Andric 
813fe013be4SDimitry Andric   bool UseFuncOffsetList = useFuncOffsetList();
814fe013be4SDimitry Andric   if (UseFuncOffsetList)
815fe013be4SDimitry Andric     FuncOffsetList.reserve(*Size);
816fe013be4SDimitry Andric   else
8178bcb0991SDimitry Andric     FuncOffsetTable.reserve(*Size);
818349cc55cSDimitry Andric 
819bdd1243dSDimitry Andric   for (uint64_t I = 0; I < *Size; ++I) {
820*c9157d92SDimitry Andric     auto FContextHash(readSampleContextFromTable());
821*c9157d92SDimitry Andric     if (std::error_code EC = FContextHash.getError())
8228bcb0991SDimitry Andric       return EC;
8238bcb0991SDimitry Andric 
824*c9157d92SDimitry Andric     auto &[FContext, Hash] = *FContextHash;
8258bcb0991SDimitry Andric     auto Offset = readNumber<uint64_t>();
8268bcb0991SDimitry Andric     if (std::error_code EC = Offset.getError())
8278bcb0991SDimitry Andric       return EC;
8288bcb0991SDimitry Andric 
829fe013be4SDimitry Andric     if (UseFuncOffsetList)
830*c9157d92SDimitry Andric       FuncOffsetList.emplace_back(FContext, *Offset);
831fe013be4SDimitry Andric     else
832*c9157d92SDimitry Andric       // Because Porfiles replace existing value with new value if collision
833*c9157d92SDimitry Andric       // happens, we also use the latest offset so that they are consistent.
834*c9157d92SDimitry Andric       FuncOffsetTable[Hash] = *Offset;
8358bcb0991SDimitry Andric  }
836349cc55cSDimitry Andric 
8378bcb0991SDimitry Andric  return sampleprof_error::success;
8388bcb0991SDimitry Andric }
8398bcb0991SDimitry Andric 
readFuncProfiles()840e8d8bef9SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
841fe6060f1SDimitry Andric   // Collect functions used by current module if the Reader has been
842fe6060f1SDimitry Andric   // given a module.
843fe6060f1SDimitry Andric   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
844fe6060f1SDimitry Andric   // which will query FunctionSamples::HasUniqSuffix, so it has to be
845fe6060f1SDimitry Andric   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
846fe6060f1SDimitry Andric   // NameTable section is read.
847fe6060f1SDimitry Andric   bool LoadFuncsToBeUsed = collectFuncsFromModule();
848fe6060f1SDimitry Andric 
849fe013be4SDimitry Andric   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
850fe013be4SDimitry Andric   // profiles.
8518bcb0991SDimitry Andric   const uint8_t *Start = Data;
852fe6060f1SDimitry Andric   if (!LoadFuncsToBeUsed) {
8538bcb0991SDimitry Andric     while (Data < End) {
8548bcb0991SDimitry Andric       if (std::error_code EC = readFuncProfile(Data))
8558bcb0991SDimitry Andric         return EC;
8568bcb0991SDimitry Andric     }
8578bcb0991SDimitry Andric     assert(Data == End && "More data is read than expected");
858d409305fSDimitry Andric   } else {
859fe6060f1SDimitry Andric     // Load function profiles on demand.
8608bcb0991SDimitry Andric     if (Remapper) {
8618bcb0991SDimitry Andric       for (auto Name : FuncsToUse) {
8628bcb0991SDimitry Andric         Remapper->insert(Name);
8638bcb0991SDimitry Andric       }
8648bcb0991SDimitry Andric     }
8658bcb0991SDimitry Andric 
86681ad6265SDimitry Andric     if (ProfileIsCS) {
867fe013be4SDimitry Andric       assert(useFuncOffsetList());
868349cc55cSDimitry Andric       DenseSet<uint64_t> FuncGuidsToUse;
869349cc55cSDimitry Andric       if (useMD5()) {
870349cc55cSDimitry Andric         for (auto Name : FuncsToUse)
871349cc55cSDimitry Andric           FuncGuidsToUse.insert(Function::getGUID(Name));
872349cc55cSDimitry Andric       }
873349cc55cSDimitry Andric 
874349cc55cSDimitry Andric       // For each function in current module, load all context profiles for
875349cc55cSDimitry Andric       // the function as well as their callee contexts which can help profile
876349cc55cSDimitry Andric       // guided importing for ThinLTO. This can be achieved by walking
877349cc55cSDimitry Andric       // through an ordered context container, where contexts are laid out
878349cc55cSDimitry Andric       // as if they were walked in preorder of a context trie. While
879349cc55cSDimitry Andric       // traversing the trie, a link to the highest common ancestor node is
880349cc55cSDimitry Andric       // kept so that all of its decendants will be loaded.
881349cc55cSDimitry Andric       const SampleContext *CommonContext = nullptr;
882fe013be4SDimitry Andric       for (const auto &NameOffset : FuncOffsetList) {
883349cc55cSDimitry Andric         const auto &FContext = NameOffset.first;
884*c9157d92SDimitry Andric         FunctionId FName = FContext.getFunction();
885*c9157d92SDimitry Andric         StringRef FNameString;
886*c9157d92SDimitry Andric         if (!useMD5())
887*c9157d92SDimitry Andric           FNameString = FName.stringRef();
888*c9157d92SDimitry Andric 
889349cc55cSDimitry Andric         // For function in the current module, keep its farthest ancestor
890349cc55cSDimitry Andric         // context. This can be used to load itself and its child and
891349cc55cSDimitry Andric         // sibling contexts.
892*c9157d92SDimitry Andric         if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
893*c9157d92SDimitry Andric             (!useMD5() && (FuncsToUse.count(FNameString) ||
894*c9157d92SDimitry Andric                            (Remapper && Remapper->exist(FNameString))))) {
895349cc55cSDimitry Andric           if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
896349cc55cSDimitry Andric             CommonContext = &FContext;
897349cc55cSDimitry Andric         }
898349cc55cSDimitry Andric 
899349cc55cSDimitry Andric         if (CommonContext == &FContext ||
900349cc55cSDimitry Andric             (CommonContext && CommonContext->IsPrefixOf(FContext))) {
901349cc55cSDimitry Andric           // Load profile for the current context which originated from
902349cc55cSDimitry Andric           // the common ancestor.
903349cc55cSDimitry Andric           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
904349cc55cSDimitry Andric           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
905349cc55cSDimitry Andric             return EC;
906349cc55cSDimitry Andric         }
907349cc55cSDimitry Andric       }
908fe013be4SDimitry Andric     } else if (useMD5()) {
909fe013be4SDimitry Andric       assert(!useFuncOffsetList());
9105ffd83dbSDimitry Andric       for (auto Name : FuncsToUse) {
911*c9157d92SDimitry Andric         auto GUID = MD5Hash(Name);
912*c9157d92SDimitry Andric         auto iter = FuncOffsetTable.find(GUID);
9135ffd83dbSDimitry Andric         if (iter == FuncOffsetTable.end())
9145ffd83dbSDimitry Andric           continue;
9155ffd83dbSDimitry Andric         const uint8_t *FuncProfileAddr = Start + iter->second;
916fe013be4SDimitry Andric         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
917fe013be4SDimitry Andric           return EC;
918fe013be4SDimitry Andric       }
919fe013be4SDimitry Andric     } else if (Remapper) {
920fe013be4SDimitry Andric       assert(useFuncOffsetList());
921fe013be4SDimitry Andric       for (auto NameOffset : FuncOffsetList) {
922fe013be4SDimitry Andric         SampleContext FContext(NameOffset.first);
923*c9157d92SDimitry Andric         auto FuncName = FContext.getFunction();
924*c9157d92SDimitry Andric         StringRef FuncNameStr = FuncName.stringRef();
925*c9157d92SDimitry Andric         if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
926fe013be4SDimitry Andric           continue;
927fe013be4SDimitry Andric         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
9285ffd83dbSDimitry Andric         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
9295ffd83dbSDimitry Andric           return EC;
9305ffd83dbSDimitry Andric       }
9315ffd83dbSDimitry Andric     } else {
932fe013be4SDimitry Andric       assert(!useFuncOffsetList());
933fe013be4SDimitry Andric       for (auto Name : FuncsToUse) {
934*c9157d92SDimitry Andric         auto iter = FuncOffsetTable.find(MD5Hash(Name));
935fe013be4SDimitry Andric         if (iter == FuncOffsetTable.end())
9368bcb0991SDimitry Andric           continue;
937fe013be4SDimitry Andric         const uint8_t *FuncProfileAddr = Start + iter->second;
9388bcb0991SDimitry Andric         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
9398bcb0991SDimitry Andric           return EC;
9408bcb0991SDimitry Andric       }
9415ffd83dbSDimitry Andric     }
9428bcb0991SDimitry Andric     Data = End;
943d409305fSDimitry Andric   }
944d409305fSDimitry Andric   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
945d409305fSDimitry Andric          "Cannot have both context-sensitive and regular profile");
94681ad6265SDimitry Andric   assert((!CSProfileCount || ProfileIsCS) &&
947fe6060f1SDimitry Andric          "Section flag should be consistent with actual profile");
9488bcb0991SDimitry Andric   return sampleprof_error::success;
9498bcb0991SDimitry Andric }
9508bcb0991SDimitry Andric 
readProfileSymbolList()951e8d8bef9SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
9528bcb0991SDimitry Andric   if (!ProfSymList)
9538bcb0991SDimitry Andric     ProfSymList = std::make_unique<ProfileSymbolList>();
9548bcb0991SDimitry Andric 
9558bcb0991SDimitry Andric   if (std::error_code EC = ProfSymList->read(Data, End - Data))
9568bcb0991SDimitry Andric     return EC;
9578bcb0991SDimitry Andric 
9588bcb0991SDimitry Andric   Data = End;
9598bcb0991SDimitry Andric   return sampleprof_error::success;
9608bcb0991SDimitry Andric }
9618bcb0991SDimitry Andric 
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)9628bcb0991SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
9638bcb0991SDimitry Andric     const uint8_t *SecStart, const uint64_t SecSize,
9648bcb0991SDimitry Andric     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
9658bcb0991SDimitry Andric   Data = SecStart;
9668bcb0991SDimitry Andric   End = SecStart + SecSize;
9678bcb0991SDimitry Andric   auto DecompressSize = readNumber<uint64_t>();
9688bcb0991SDimitry Andric   if (std::error_code EC = DecompressSize.getError())
9698bcb0991SDimitry Andric     return EC;
9708bcb0991SDimitry Andric   DecompressBufSize = *DecompressSize;
9718bcb0991SDimitry Andric 
9728bcb0991SDimitry Andric   auto CompressSize = readNumber<uint64_t>();
9738bcb0991SDimitry Andric   if (std::error_code EC = CompressSize.getError())
9748bcb0991SDimitry Andric     return EC;
9758bcb0991SDimitry Andric 
976753f127fSDimitry Andric   if (!llvm::compression::zlib::isAvailable())
9778bcb0991SDimitry Andric     return sampleprof_error::zlib_unavailable;
9788bcb0991SDimitry Andric 
979753f127fSDimitry Andric   uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
9808bcb0991SDimitry Andric   size_t UCSize = DecompressBufSize;
981bdd1243dSDimitry Andric   llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
982bdd1243dSDimitry Andric                                                 Buffer, UCSize);
9838bcb0991SDimitry Andric   if (E)
9848bcb0991SDimitry Andric     return sampleprof_error::uncompress_failed;
9858bcb0991SDimitry Andric   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
9868bcb0991SDimitry Andric   return sampleprof_error::success;
9878bcb0991SDimitry Andric }
9888bcb0991SDimitry Andric 
readImpl()9898bcb0991SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
9908bcb0991SDimitry Andric   const uint8_t *BufStart =
9918bcb0991SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
9928bcb0991SDimitry Andric 
9938bcb0991SDimitry Andric   for (auto &Entry : SecHdrTable) {
9948bcb0991SDimitry Andric     // Skip empty section.
9958bcb0991SDimitry Andric     if (!Entry.Size)
9968bcb0991SDimitry Andric       continue;
9978bcb0991SDimitry Andric 
998e8d8bef9SDimitry Andric     // Skip sections without context when SkipFlatProf is true.
999e8d8bef9SDimitry Andric     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1000e8d8bef9SDimitry Andric       continue;
1001e8d8bef9SDimitry Andric 
10028bcb0991SDimitry Andric     const uint8_t *SecStart = BufStart + Entry.Offset;
10038bcb0991SDimitry Andric     uint64_t SecSize = Entry.Size;
10048bcb0991SDimitry Andric 
10058bcb0991SDimitry Andric     // If the section is compressed, decompress it into a buffer
10068bcb0991SDimitry Andric     // DecompressBuf before reading the actual data. The pointee of
10078bcb0991SDimitry Andric     // 'Data' will be changed to buffer hold by DecompressBuf
10088bcb0991SDimitry Andric     // temporarily when reading the actual data.
10095ffd83dbSDimitry Andric     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
10108bcb0991SDimitry Andric     if (isCompressed) {
10118bcb0991SDimitry Andric       const uint8_t *DecompressBuf;
10128bcb0991SDimitry Andric       uint64_t DecompressBufSize;
10138bcb0991SDimitry Andric       if (std::error_code EC = decompressSection(
10148bcb0991SDimitry Andric               SecStart, SecSize, DecompressBuf, DecompressBufSize))
10158bcb0991SDimitry Andric         return EC;
10168bcb0991SDimitry Andric       SecStart = DecompressBuf;
10178bcb0991SDimitry Andric       SecSize = DecompressBufSize;
10188bcb0991SDimitry Andric     }
10198bcb0991SDimitry Andric 
10205ffd83dbSDimitry Andric     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
10218bcb0991SDimitry Andric       return EC;
10228bcb0991SDimitry Andric     if (Data != SecStart + SecSize)
10238bcb0991SDimitry Andric       return sampleprof_error::malformed;
10248bcb0991SDimitry Andric 
10258bcb0991SDimitry Andric     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
10268bcb0991SDimitry Andric     if (isCompressed) {
10278bcb0991SDimitry Andric       Data = BufStart + Entry.Offset;
10288bcb0991SDimitry Andric       End = BufStart + Buffer->getBufferSize();
10298bcb0991SDimitry Andric     }
10308bcb0991SDimitry Andric   }
10318bcb0991SDimitry Andric 
10328bcb0991SDimitry Andric   return sampleprof_error::success;
10338bcb0991SDimitry Andric }
10348bcb0991SDimitry Andric 
verifySPMagic(uint64_t Magic)10350b57cec5SDimitry Andric std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
10360b57cec5SDimitry Andric   if (Magic == SPMagic())
10370b57cec5SDimitry Andric     return sampleprof_error::success;
10380b57cec5SDimitry Andric   return sampleprof_error::bad_magic;
10390b57cec5SDimitry Andric }
10400b57cec5SDimitry Andric 
verifySPMagic(uint64_t Magic)10418bcb0991SDimitry Andric std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
10428bcb0991SDimitry Andric   if (Magic == SPMagic(SPF_Ext_Binary))
10438bcb0991SDimitry Andric     return sampleprof_error::success;
10448bcb0991SDimitry Andric   return sampleprof_error::bad_magic;
10458bcb0991SDimitry Andric }
10468bcb0991SDimitry Andric 
readNameTable()10478bcb0991SDimitry Andric std::error_code SampleProfileReaderBinary::readNameTable() {
1048fe013be4SDimitry Andric   auto Size = readNumber<size_t>();
10490b57cec5SDimitry Andric   if (std::error_code EC = Size.getError())
10500b57cec5SDimitry Andric     return EC;
1051fe013be4SDimitry Andric 
1052fe013be4SDimitry Andric   // Normally if useMD5 is true, the name table should have MD5 values, not
1053fe013be4SDimitry Andric   // strings, however in the case that ExtBinary profile has multiple name
1054fe013be4SDimitry Andric   // tables mixing string and MD5, all of them have to be normalized to use MD5,
1055fe013be4SDimitry Andric   // because optimization passes can only handle either type.
1056fe013be4SDimitry Andric   bool UseMD5 = useMD5();
1057fe013be4SDimitry Andric 
1058fe013be4SDimitry Andric   NameTable.clear();
1059fe013be4SDimitry Andric   NameTable.reserve(*Size);
1060*c9157d92SDimitry Andric   if (!ProfileIsCS) {
1061*c9157d92SDimitry Andric     MD5SampleContextTable.clear();
1062*c9157d92SDimitry Andric     if (UseMD5)
1063*c9157d92SDimitry Andric       MD5SampleContextTable.reserve(*Size);
1064*c9157d92SDimitry Andric     else
1065*c9157d92SDimitry Andric       // If we are using strings, delay MD5 computation since only a portion of
1066*c9157d92SDimitry Andric       // names are used by top level functions. Use 0 to indicate MD5 value is
1067*c9157d92SDimitry Andric       // to be calculated as no known string has a MD5 value of 0.
1068*c9157d92SDimitry Andric       MD5SampleContextTable.resize(*Size);
1069*c9157d92SDimitry Andric   }
1070fe013be4SDimitry Andric   for (size_t I = 0; I < *Size; ++I) {
10710b57cec5SDimitry Andric     auto Name(readString());
10720b57cec5SDimitry Andric     if (std::error_code EC = Name.getError())
10730b57cec5SDimitry Andric       return EC;
1074fe013be4SDimitry Andric     if (UseMD5) {
1075*c9157d92SDimitry Andric       FunctionId FID(*Name);
1076*c9157d92SDimitry Andric       if (!ProfileIsCS)
1077*c9157d92SDimitry Andric         MD5SampleContextTable.emplace_back(FID.getHashCode());
1078*c9157d92SDimitry Andric       NameTable.emplace_back(FID);
1079fe013be4SDimitry Andric     } else
1080*c9157d92SDimitry Andric       NameTable.push_back(FunctionId(*Name));
10810b57cec5SDimitry Andric   }
1082*c9157d92SDimitry Andric   if (!ProfileIsCS)
1083*c9157d92SDimitry Andric     MD5SampleContextStart = MD5SampleContextTable.data();
10840b57cec5SDimitry Andric   return sampleprof_error::success;
10850b57cec5SDimitry Andric }
10860b57cec5SDimitry Andric 
1087fe013be4SDimitry Andric std::error_code
readNameTableSec(bool IsMD5,bool FixedLengthMD5)1088fe013be4SDimitry Andric SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
1089fe013be4SDimitry Andric                                                    bool FixedLengthMD5) {
1090fe013be4SDimitry Andric   if (FixedLengthMD5) {
1091fe013be4SDimitry Andric     if (!IsMD5)
1092fe013be4SDimitry Andric       errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
1093fe013be4SDimitry Andric     auto Size = readNumber<size_t>();
10945ffd83dbSDimitry Andric     if (std::error_code EC = Size.getError())
10955ffd83dbSDimitry Andric       return EC;
1096fe013be4SDimitry Andric 
1097fe013be4SDimitry Andric     assert(Data + (*Size) * sizeof(uint64_t) == End &&
1098fe013be4SDimitry Andric            "Fixed length MD5 name table does not contain specified number of "
1099fe013be4SDimitry Andric            "entries");
1100fe013be4SDimitry Andric     if (Data + (*Size) * sizeof(uint64_t) > End)
1101fe013be4SDimitry Andric       return sampleprof_error::truncated;
1102fe013be4SDimitry Andric 
1103fe013be4SDimitry Andric     NameTable.clear();
1104*c9157d92SDimitry Andric     NameTable.reserve(*Size);
1105*c9157d92SDimitry Andric     for (size_t I = 0; I < *Size; ++I) {
1106*c9157d92SDimitry Andric       using namespace support;
1107*c9157d92SDimitry Andric       uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>(
1108*c9157d92SDimitry Andric           Data + I * sizeof(uint64_t));
1109*c9157d92SDimitry Andric       NameTable.emplace_back(FunctionId(FID));
1110*c9157d92SDimitry Andric     }
1111*c9157d92SDimitry Andric     if (!ProfileIsCS)
1112*c9157d92SDimitry Andric       MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
1113e8d8bef9SDimitry Andric     Data = Data + (*Size) * sizeof(uint64_t);
1114e8d8bef9SDimitry Andric     return sampleprof_error::success;
1115e8d8bef9SDimitry Andric   }
1116fe013be4SDimitry Andric 
1117fe013be4SDimitry Andric   if (IsMD5) {
1118fe013be4SDimitry Andric     assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
1119fe013be4SDimitry Andric     auto Size = readNumber<size_t>();
1120fe013be4SDimitry Andric     if (std::error_code EC = Size.getError())
1121fe013be4SDimitry Andric       return EC;
1122fe013be4SDimitry Andric 
1123fe013be4SDimitry Andric     NameTable.clear();
1124e8d8bef9SDimitry Andric     NameTable.reserve(*Size);
1125*c9157d92SDimitry Andric     if (!ProfileIsCS)
1126*c9157d92SDimitry Andric       MD5SampleContextTable.resize(*Size);
1127fe013be4SDimitry Andric     for (size_t I = 0; I < *Size; ++I) {
11285ffd83dbSDimitry Andric       auto FID = readNumber<uint64_t>();
11295ffd83dbSDimitry Andric       if (std::error_code EC = FID.getError())
11305ffd83dbSDimitry Andric         return EC;
1131*c9157d92SDimitry Andric       if (!ProfileIsCS)
1132*c9157d92SDimitry Andric         support::endian::write64le(&MD5SampleContextTable[I], *FID);
1133*c9157d92SDimitry Andric       NameTable.emplace_back(FunctionId(*FID));
11345ffd83dbSDimitry Andric     }
1135*c9157d92SDimitry Andric     if (!ProfileIsCS)
1136*c9157d92SDimitry Andric       MD5SampleContextStart = MD5SampleContextTable.data();
11375ffd83dbSDimitry Andric     return sampleprof_error::success;
11385ffd83dbSDimitry Andric   }
11395ffd83dbSDimitry Andric 
11405ffd83dbSDimitry Andric   return SampleProfileReaderBinary::readNameTable();
11415ffd83dbSDimitry Andric }
11425ffd83dbSDimitry Andric 
1143349cc55cSDimitry Andric // Read in the CS name table section, which basically contains a list of context
1144349cc55cSDimitry Andric // vectors. Each element of a context vector, aka a frame, refers to the
1145349cc55cSDimitry Andric // underlying raw function names that are stored in the name table, as well as
1146349cc55cSDimitry Andric // a callsite identifier that only makes sense for non-leaf frames.
readCSNameTableSec()1147349cc55cSDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1148fe013be4SDimitry Andric   auto Size = readNumber<size_t>();
1149349cc55cSDimitry Andric   if (std::error_code EC = Size.getError())
1150349cc55cSDimitry Andric     return EC;
1151349cc55cSDimitry Andric 
1152fe013be4SDimitry Andric   CSNameTable.clear();
1153fe013be4SDimitry Andric   CSNameTable.reserve(*Size);
1154*c9157d92SDimitry Andric   if (ProfileIsCS) {
1155*c9157d92SDimitry Andric     // Delay MD5 computation of CS context until they are needed. Use 0 to
1156*c9157d92SDimitry Andric     // indicate MD5 value is to be calculated as no known string has a MD5
1157*c9157d92SDimitry Andric     // value of 0.
1158*c9157d92SDimitry Andric     MD5SampleContextTable.clear();
1159*c9157d92SDimitry Andric     MD5SampleContextTable.resize(*Size);
1160*c9157d92SDimitry Andric     MD5SampleContextStart = MD5SampleContextTable.data();
1161*c9157d92SDimitry Andric   }
1162fe013be4SDimitry Andric   for (size_t I = 0; I < *Size; ++I) {
1163fe013be4SDimitry Andric     CSNameTable.emplace_back(SampleContextFrameVector());
1164349cc55cSDimitry Andric     auto ContextSize = readNumber<uint32_t>();
1165349cc55cSDimitry Andric     if (std::error_code EC = ContextSize.getError())
1166349cc55cSDimitry Andric       return EC;
1167349cc55cSDimitry Andric     for (uint32_t J = 0; J < *ContextSize; ++J) {
1168e8d8bef9SDimitry Andric       auto FName(readStringFromTable());
1169e8d8bef9SDimitry Andric       if (std::error_code EC = FName.getError())
1170e8d8bef9SDimitry Andric         return EC;
1171349cc55cSDimitry Andric       auto LineOffset = readNumber<uint64_t>();
1172349cc55cSDimitry Andric       if (std::error_code EC = LineOffset.getError())
1173349cc55cSDimitry Andric         return EC;
1174e8d8bef9SDimitry Andric 
1175349cc55cSDimitry Andric       if (!isOffsetLegal(*LineOffset))
1176349cc55cSDimitry Andric         return std::error_code();
1177fe6060f1SDimitry Andric 
1178349cc55cSDimitry Andric       auto Discriminator = readNumber<uint64_t>();
1179349cc55cSDimitry Andric       if (std::error_code EC = Discriminator.getError())
1180349cc55cSDimitry Andric         return EC;
1181349cc55cSDimitry Andric 
1182fe013be4SDimitry Andric       CSNameTable.back().emplace_back(
1183349cc55cSDimitry Andric           FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1184349cc55cSDimitry Andric     }
1185349cc55cSDimitry Andric   }
1186349cc55cSDimitry Andric 
1187349cc55cSDimitry Andric   return sampleprof_error::success;
1188349cc55cSDimitry Andric }
1189349cc55cSDimitry Andric 
1190349cc55cSDimitry Andric std::error_code
readFuncMetadata(bool ProfileHasAttribute,FunctionSamples * FProfile)11910eae32dcSDimitry Andric SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
11920eae32dcSDimitry Andric                                                    FunctionSamples *FProfile) {
11930eae32dcSDimitry Andric   if (Data < End) {
1194fe6060f1SDimitry Andric     if (ProfileIsProbeBased) {
1195e8d8bef9SDimitry Andric       auto Checksum = readNumber<uint64_t>();
1196e8d8bef9SDimitry Andric       if (std::error_code EC = Checksum.getError())
1197e8d8bef9SDimitry Andric         return EC;
11980eae32dcSDimitry Andric       if (FProfile)
11990eae32dcSDimitry Andric         FProfile->setFunctionHash(*Checksum);
1200e8d8bef9SDimitry Andric     }
1201d409305fSDimitry Andric 
1202fe6060f1SDimitry Andric     if (ProfileHasAttribute) {
1203fe6060f1SDimitry Andric       auto Attributes = readNumber<uint32_t>();
1204fe6060f1SDimitry Andric       if (std::error_code EC = Attributes.getError())
1205fe6060f1SDimitry Andric         return EC;
12060eae32dcSDimitry Andric       if (FProfile)
12070eae32dcSDimitry Andric         FProfile->getContext().setAllAttributes(*Attributes);
1208fe6060f1SDimitry Andric     }
12090eae32dcSDimitry Andric 
121081ad6265SDimitry Andric     if (!ProfileIsCS) {
12110eae32dcSDimitry Andric       // Read all the attributes for inlined function calls.
12120eae32dcSDimitry Andric       auto NumCallsites = readNumber<uint32_t>();
12130eae32dcSDimitry Andric       if (std::error_code EC = NumCallsites.getError())
12140eae32dcSDimitry Andric         return EC;
12150eae32dcSDimitry Andric 
12160eae32dcSDimitry Andric       for (uint32_t J = 0; J < *NumCallsites; ++J) {
12170eae32dcSDimitry Andric         auto LineOffset = readNumber<uint64_t>();
12180eae32dcSDimitry Andric         if (std::error_code EC = LineOffset.getError())
12190eae32dcSDimitry Andric           return EC;
12200eae32dcSDimitry Andric 
12210eae32dcSDimitry Andric         auto Discriminator = readNumber<uint64_t>();
12220eae32dcSDimitry Andric         if (std::error_code EC = Discriminator.getError())
12230eae32dcSDimitry Andric           return EC;
12240eae32dcSDimitry Andric 
1225*c9157d92SDimitry Andric         auto FContextHash(readSampleContextFromTable());
1226*c9157d92SDimitry Andric         if (std::error_code EC = FContextHash.getError())
12270eae32dcSDimitry Andric           return EC;
12280eae32dcSDimitry Andric 
1229*c9157d92SDimitry Andric         auto &[FContext, Hash] = *FContextHash;
12300eae32dcSDimitry Andric         FunctionSamples *CalleeProfile = nullptr;
12310eae32dcSDimitry Andric         if (FProfile) {
12320eae32dcSDimitry Andric           CalleeProfile = const_cast<FunctionSamples *>(
12330eae32dcSDimitry Andric               &FProfile->functionSamplesAt(LineLocation(
12340eae32dcSDimitry Andric                   *LineOffset,
1235*c9157d92SDimitry Andric                   *Discriminator))[FContext.getFunction()]);
12360eae32dcSDimitry Andric         }
12370eae32dcSDimitry Andric         if (std::error_code EC =
12380eae32dcSDimitry Andric                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
12390eae32dcSDimitry Andric           return EC;
12400eae32dcSDimitry Andric       }
12410eae32dcSDimitry Andric     }
12420eae32dcSDimitry Andric   }
12430eae32dcSDimitry Andric 
12440eae32dcSDimitry Andric   return sampleprof_error::success;
12450eae32dcSDimitry Andric }
12460eae32dcSDimitry Andric 
12470eae32dcSDimitry Andric std::error_code
readFuncMetadata(bool ProfileHasAttribute)12480eae32dcSDimitry Andric SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
12490eae32dcSDimitry Andric   while (Data < End) {
1250*c9157d92SDimitry Andric     auto FContextHash(readSampleContextFromTable());
1251*c9157d92SDimitry Andric     if (std::error_code EC = FContextHash.getError())
12520eae32dcSDimitry Andric       return EC;
1253*c9157d92SDimitry Andric     auto &[FContext, Hash] = *FContextHash;
12540eae32dcSDimitry Andric     FunctionSamples *FProfile = nullptr;
1255*c9157d92SDimitry Andric     auto It = Profiles.find(FContext);
12560eae32dcSDimitry Andric     if (It != Profiles.end())
12570eae32dcSDimitry Andric       FProfile = &It->second;
12580eae32dcSDimitry Andric 
12590eae32dcSDimitry Andric     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
12600eae32dcSDimitry Andric       return EC;
1261fe6060f1SDimitry Andric   }
1262fe6060f1SDimitry Andric 
1263d409305fSDimitry Andric   assert(Data == End && "More data is read than expected");
1264e8d8bef9SDimitry Andric   return sampleprof_error::success;
1265e8d8bef9SDimitry Andric }
1266e8d8bef9SDimitry Andric 
1267e8d8bef9SDimitry Andric std::error_code
readSecHdrTableEntry(uint64_t Idx)1268fe013be4SDimitry Andric SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
12698bcb0991SDimitry Andric   SecHdrTableEntry Entry;
12708bcb0991SDimitry Andric   auto Type = readUnencodedNumber<uint64_t>();
12718bcb0991SDimitry Andric   if (std::error_code EC = Type.getError())
12728bcb0991SDimitry Andric     return EC;
12738bcb0991SDimitry Andric   Entry.Type = static_cast<SecType>(*Type);
12740b57cec5SDimitry Andric 
12758bcb0991SDimitry Andric   auto Flags = readUnencodedNumber<uint64_t>();
12768bcb0991SDimitry Andric   if (std::error_code EC = Flags.getError())
12778bcb0991SDimitry Andric     return EC;
12788bcb0991SDimitry Andric   Entry.Flags = *Flags;
12798bcb0991SDimitry Andric 
12808bcb0991SDimitry Andric   auto Offset = readUnencodedNumber<uint64_t>();
12818bcb0991SDimitry Andric   if (std::error_code EC = Offset.getError())
12828bcb0991SDimitry Andric     return EC;
12838bcb0991SDimitry Andric   Entry.Offset = *Offset;
12848bcb0991SDimitry Andric 
12858bcb0991SDimitry Andric   auto Size = readUnencodedNumber<uint64_t>();
12868bcb0991SDimitry Andric   if (std::error_code EC = Size.getError())
12878bcb0991SDimitry Andric     return EC;
12888bcb0991SDimitry Andric   Entry.Size = *Size;
12898bcb0991SDimitry Andric 
1290e8d8bef9SDimitry Andric   Entry.LayoutIndex = Idx;
12918bcb0991SDimitry Andric   SecHdrTable.push_back(std::move(Entry));
12928bcb0991SDimitry Andric   return sampleprof_error::success;
12938bcb0991SDimitry Andric }
12948bcb0991SDimitry Andric 
readSecHdrTable()12958bcb0991SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
12968bcb0991SDimitry Andric   auto EntryNum = readUnencodedNumber<uint64_t>();
12978bcb0991SDimitry Andric   if (std::error_code EC = EntryNum.getError())
12988bcb0991SDimitry Andric     return EC;
12998bcb0991SDimitry Andric 
1300bdd1243dSDimitry Andric   for (uint64_t i = 0; i < (*EntryNum); i++)
1301e8d8bef9SDimitry Andric     if (std::error_code EC = readSecHdrTableEntry(i))
13028bcb0991SDimitry Andric       return EC;
13038bcb0991SDimitry Andric 
13048bcb0991SDimitry Andric   return sampleprof_error::success;
13058bcb0991SDimitry Andric }
13068bcb0991SDimitry Andric 
readHeader()13078bcb0991SDimitry Andric std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
13088bcb0991SDimitry Andric   const uint8_t *BufStart =
13098bcb0991SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
13108bcb0991SDimitry Andric   Data = BufStart;
13118bcb0991SDimitry Andric   End = BufStart + Buffer->getBufferSize();
13128bcb0991SDimitry Andric 
13138bcb0991SDimitry Andric   if (std::error_code EC = readMagicIdent())
13148bcb0991SDimitry Andric     return EC;
13158bcb0991SDimitry Andric 
13168bcb0991SDimitry Andric   if (std::error_code EC = readSecHdrTable())
13178bcb0991SDimitry Andric     return EC;
13188bcb0991SDimitry Andric 
13198bcb0991SDimitry Andric   return sampleprof_error::success;
13208bcb0991SDimitry Andric }
13218bcb0991SDimitry Andric 
getSectionSize(SecType Type)13228bcb0991SDimitry Andric uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1323e8d8bef9SDimitry Andric   uint64_t Size = 0;
13248bcb0991SDimitry Andric   for (auto &Entry : SecHdrTable) {
13258bcb0991SDimitry Andric     if (Entry.Type == Type)
1326e8d8bef9SDimitry Andric       Size += Entry.Size;
13278bcb0991SDimitry Andric   }
1328e8d8bef9SDimitry Andric   return Size;
13298bcb0991SDimitry Andric }
13308bcb0991SDimitry Andric 
getFileSize()13318bcb0991SDimitry Andric uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
13328bcb0991SDimitry Andric   // Sections in SecHdrTable is not necessarily in the same order as
13338bcb0991SDimitry Andric   // sections in the profile because section like FuncOffsetTable needs
13348bcb0991SDimitry Andric   // to be written after section LBRProfile but needs to be read before
13358bcb0991SDimitry Andric   // section LBRProfile, so we cannot simply use the last entry in
13368bcb0991SDimitry Andric   // SecHdrTable to calculate the file size.
13378bcb0991SDimitry Andric   uint64_t FileSize = 0;
13388bcb0991SDimitry Andric   for (auto &Entry : SecHdrTable) {
13398bcb0991SDimitry Andric     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
13408bcb0991SDimitry Andric   }
13418bcb0991SDimitry Andric   return FileSize;
13428bcb0991SDimitry Andric }
13438bcb0991SDimitry Andric 
getSecFlagsStr(const SecHdrTableEntry & Entry)13445ffd83dbSDimitry Andric static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
13455ffd83dbSDimitry Andric   std::string Flags;
13465ffd83dbSDimitry Andric   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
13475ffd83dbSDimitry Andric     Flags.append("{compressed,");
13485ffd83dbSDimitry Andric   else
13495ffd83dbSDimitry Andric     Flags.append("{");
13505ffd83dbSDimitry Andric 
1351e8d8bef9SDimitry Andric   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1352e8d8bef9SDimitry Andric     Flags.append("flat,");
1353e8d8bef9SDimitry Andric 
13545ffd83dbSDimitry Andric   switch (Entry.Type) {
13555ffd83dbSDimitry Andric   case SecNameTable:
1356e8d8bef9SDimitry Andric     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1357e8d8bef9SDimitry Andric       Flags.append("fixlenmd5,");
1358e8d8bef9SDimitry Andric     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
13595ffd83dbSDimitry Andric       Flags.append("md5,");
1360fe6060f1SDimitry Andric     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1361fe6060f1SDimitry Andric       Flags.append("uniq,");
13625ffd83dbSDimitry Andric     break;
13635ffd83dbSDimitry Andric   case SecProfSummary:
13645ffd83dbSDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
13655ffd83dbSDimitry Andric       Flags.append("partial,");
1366fe6060f1SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1367fe6060f1SDimitry Andric       Flags.append("context,");
136881ad6265SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
136981ad6265SDimitry Andric       Flags.append("preInlined,");
1370fe6060f1SDimitry Andric     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1371fe6060f1SDimitry Andric       Flags.append("fs-discriminator,");
13725ffd83dbSDimitry Andric     break;
1373349cc55cSDimitry Andric   case SecFuncOffsetTable:
1374349cc55cSDimitry Andric     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1375349cc55cSDimitry Andric       Flags.append("ordered,");
1376349cc55cSDimitry Andric     break;
1377349cc55cSDimitry Andric   case SecFuncMetadata:
1378349cc55cSDimitry Andric     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1379349cc55cSDimitry Andric       Flags.append("probe,");
1380349cc55cSDimitry Andric     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1381349cc55cSDimitry Andric       Flags.append("attr,");
1382349cc55cSDimitry Andric     break;
13835ffd83dbSDimitry Andric   default:
13845ffd83dbSDimitry Andric     break;
13855ffd83dbSDimitry Andric   }
13865ffd83dbSDimitry Andric   char &last = Flags.back();
13875ffd83dbSDimitry Andric   if (last == ',')
13885ffd83dbSDimitry Andric     last = '}';
13895ffd83dbSDimitry Andric   else
13905ffd83dbSDimitry Andric     Flags.append("}");
13915ffd83dbSDimitry Andric   return Flags;
13925ffd83dbSDimitry Andric }
13935ffd83dbSDimitry Andric 
dumpSectionInfo(raw_ostream & OS)13948bcb0991SDimitry Andric bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
13958bcb0991SDimitry Andric   uint64_t TotalSecsSize = 0;
13968bcb0991SDimitry Andric   for (auto &Entry : SecHdrTable) {
13978bcb0991SDimitry Andric     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
13985ffd83dbSDimitry Andric        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
13995ffd83dbSDimitry Andric        << "\n";
14005ffd83dbSDimitry Andric     ;
1401e8d8bef9SDimitry Andric     TotalSecsSize += Entry.Size;
14028bcb0991SDimitry Andric   }
14038bcb0991SDimitry Andric   uint64_t HeaderSize = SecHdrTable.front().Offset;
14048bcb0991SDimitry Andric   assert(HeaderSize + TotalSecsSize == getFileSize() &&
14058bcb0991SDimitry Andric          "Size of 'header + sections' doesn't match the total size of profile");
14068bcb0991SDimitry Andric 
14078bcb0991SDimitry Andric   OS << "Header Size: " << HeaderSize << "\n";
14088bcb0991SDimitry Andric   OS << "Total Sections Size: " << TotalSecsSize << "\n";
14098bcb0991SDimitry Andric   OS << "File Size: " << getFileSize() << "\n";
14108bcb0991SDimitry Andric   return true;
14118bcb0991SDimitry Andric }
14128bcb0991SDimitry Andric 
readMagicIdent()14138bcb0991SDimitry Andric std::error_code SampleProfileReaderBinary::readMagicIdent() {
14140b57cec5SDimitry Andric   // Read and check the magic identifier.
14150b57cec5SDimitry Andric   auto Magic = readNumber<uint64_t>();
14160b57cec5SDimitry Andric   if (std::error_code EC = Magic.getError())
14170b57cec5SDimitry Andric     return EC;
14180b57cec5SDimitry Andric   else if (std::error_code EC = verifySPMagic(*Magic))
14190b57cec5SDimitry Andric     return EC;
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric   // Read the version number.
14220b57cec5SDimitry Andric   auto Version = readNumber<uint64_t>();
14230b57cec5SDimitry Andric   if (std::error_code EC = Version.getError())
14240b57cec5SDimitry Andric     return EC;
14250b57cec5SDimitry Andric   else if (*Version != SPVersion())
14260b57cec5SDimitry Andric     return sampleprof_error::unsupported_version;
14270b57cec5SDimitry Andric 
14288bcb0991SDimitry Andric   return sampleprof_error::success;
14298bcb0991SDimitry Andric }
14308bcb0991SDimitry Andric 
readHeader()14318bcb0991SDimitry Andric std::error_code SampleProfileReaderBinary::readHeader() {
14328bcb0991SDimitry Andric   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
14338bcb0991SDimitry Andric   End = Data + Buffer->getBufferSize();
14348bcb0991SDimitry Andric 
14358bcb0991SDimitry Andric   if (std::error_code EC = readMagicIdent())
14368bcb0991SDimitry Andric     return EC;
14378bcb0991SDimitry Andric 
14380b57cec5SDimitry Andric   if (std::error_code EC = readSummary())
14390b57cec5SDimitry Andric     return EC;
14400b57cec5SDimitry Andric 
14410b57cec5SDimitry Andric   if (std::error_code EC = readNameTable())
14420b57cec5SDimitry Andric     return EC;
14430b57cec5SDimitry Andric   return sampleprof_error::success;
14440b57cec5SDimitry Andric }
14450b57cec5SDimitry Andric 
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)14460b57cec5SDimitry Andric std::error_code SampleProfileReaderBinary::readSummaryEntry(
14470b57cec5SDimitry Andric     std::vector<ProfileSummaryEntry> &Entries) {
14480b57cec5SDimitry Andric   auto Cutoff = readNumber<uint64_t>();
14490b57cec5SDimitry Andric   if (std::error_code EC = Cutoff.getError())
14500b57cec5SDimitry Andric     return EC;
14510b57cec5SDimitry Andric 
14520b57cec5SDimitry Andric   auto MinBlockCount = readNumber<uint64_t>();
14530b57cec5SDimitry Andric   if (std::error_code EC = MinBlockCount.getError())
14540b57cec5SDimitry Andric     return EC;
14550b57cec5SDimitry Andric 
14560b57cec5SDimitry Andric   auto NumBlocks = readNumber<uint64_t>();
14570b57cec5SDimitry Andric   if (std::error_code EC = NumBlocks.getError())
14580b57cec5SDimitry Andric     return EC;
14590b57cec5SDimitry Andric 
14600b57cec5SDimitry Andric   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
14610b57cec5SDimitry Andric   return sampleprof_error::success;
14620b57cec5SDimitry Andric }
14630b57cec5SDimitry Andric 
readSummary()14640b57cec5SDimitry Andric std::error_code SampleProfileReaderBinary::readSummary() {
14650b57cec5SDimitry Andric   auto TotalCount = readNumber<uint64_t>();
14660b57cec5SDimitry Andric   if (std::error_code EC = TotalCount.getError())
14670b57cec5SDimitry Andric     return EC;
14680b57cec5SDimitry Andric 
14690b57cec5SDimitry Andric   auto MaxBlockCount = readNumber<uint64_t>();
14700b57cec5SDimitry Andric   if (std::error_code EC = MaxBlockCount.getError())
14710b57cec5SDimitry Andric     return EC;
14720b57cec5SDimitry Andric 
14730b57cec5SDimitry Andric   auto MaxFunctionCount = readNumber<uint64_t>();
14740b57cec5SDimitry Andric   if (std::error_code EC = MaxFunctionCount.getError())
14750b57cec5SDimitry Andric     return EC;
14760b57cec5SDimitry Andric 
14770b57cec5SDimitry Andric   auto NumBlocks = readNumber<uint64_t>();
14780b57cec5SDimitry Andric   if (std::error_code EC = NumBlocks.getError())
14790b57cec5SDimitry Andric     return EC;
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric   auto NumFunctions = readNumber<uint64_t>();
14820b57cec5SDimitry Andric   if (std::error_code EC = NumFunctions.getError())
14830b57cec5SDimitry Andric     return EC;
14840b57cec5SDimitry Andric 
14850b57cec5SDimitry Andric   auto NumSummaryEntries = readNumber<uint64_t>();
14860b57cec5SDimitry Andric   if (std::error_code EC = NumSummaryEntries.getError())
14870b57cec5SDimitry Andric     return EC;
14880b57cec5SDimitry Andric 
14890b57cec5SDimitry Andric   std::vector<ProfileSummaryEntry> Entries;
14900b57cec5SDimitry Andric   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
14910b57cec5SDimitry Andric     std::error_code EC = readSummaryEntry(Entries);
14920b57cec5SDimitry Andric     if (EC != sampleprof_error::success)
14930b57cec5SDimitry Andric       return EC;
14940b57cec5SDimitry Andric   }
14958bcb0991SDimitry Andric   Summary = std::make_unique<ProfileSummary>(
14960b57cec5SDimitry Andric       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
14970b57cec5SDimitry Andric       *MaxFunctionCount, *NumBlocks, *NumFunctions);
14980b57cec5SDimitry Andric 
14990b57cec5SDimitry Andric   return sampleprof_error::success;
15000b57cec5SDimitry Andric }
15010b57cec5SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)15020b57cec5SDimitry Andric bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
15030b57cec5SDimitry Andric   const uint8_t *Data =
15040b57cec5SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
15050b57cec5SDimitry Andric   uint64_t Magic = decodeULEB128(Data);
15060b57cec5SDimitry Andric   return Magic == SPMagic();
15070b57cec5SDimitry Andric }
15080b57cec5SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)15098bcb0991SDimitry Andric bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
15108bcb0991SDimitry Andric   const uint8_t *Data =
15118bcb0991SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
15128bcb0991SDimitry Andric   uint64_t Magic = decodeULEB128(Data);
15138bcb0991SDimitry Andric   return Magic == SPMagic(SPF_Ext_Binary);
15148bcb0991SDimitry Andric }
15158bcb0991SDimitry Andric 
skipNextWord()15160b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::skipNextWord() {
15170b57cec5SDimitry Andric   uint32_t dummy;
15180b57cec5SDimitry Andric   if (!GcovBuffer.readInt(dummy))
15190b57cec5SDimitry Andric     return sampleprof_error::truncated;
15200b57cec5SDimitry Andric   return sampleprof_error::success;
15210b57cec5SDimitry Andric }
15220b57cec5SDimitry Andric 
readNumber()15230b57cec5SDimitry Andric template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
15240b57cec5SDimitry Andric   if (sizeof(T) <= sizeof(uint32_t)) {
15250b57cec5SDimitry Andric     uint32_t Val;
15260b57cec5SDimitry Andric     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
15270b57cec5SDimitry Andric       return static_cast<T>(Val);
15280b57cec5SDimitry Andric   } else if (sizeof(T) <= sizeof(uint64_t)) {
15290b57cec5SDimitry Andric     uint64_t Val;
15300b57cec5SDimitry Andric     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
15310b57cec5SDimitry Andric       return static_cast<T>(Val);
15320b57cec5SDimitry Andric   }
15330b57cec5SDimitry Andric 
15340b57cec5SDimitry Andric   std::error_code EC = sampleprof_error::malformed;
15350b57cec5SDimitry Andric   reportError(0, EC.message());
15360b57cec5SDimitry Andric   return EC;
15370b57cec5SDimitry Andric }
15380b57cec5SDimitry Andric 
readString()15390b57cec5SDimitry Andric ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
15400b57cec5SDimitry Andric   StringRef Str;
15410b57cec5SDimitry Andric   if (!GcovBuffer.readString(Str))
15420b57cec5SDimitry Andric     return sampleprof_error::truncated;
15430b57cec5SDimitry Andric   return Str;
15440b57cec5SDimitry Andric }
15450b57cec5SDimitry Andric 
readHeader()15460b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::readHeader() {
15470b57cec5SDimitry Andric   // Read the magic identifier.
15480b57cec5SDimitry Andric   if (!GcovBuffer.readGCDAFormat())
15490b57cec5SDimitry Andric     return sampleprof_error::unrecognized_format;
15500b57cec5SDimitry Andric 
15510b57cec5SDimitry Andric   // Read the version number. Note - the GCC reader does not validate this
15520b57cec5SDimitry Andric   // version, but the profile creator generates v704.
15530b57cec5SDimitry Andric   GCOV::GCOVVersion version;
15540b57cec5SDimitry Andric   if (!GcovBuffer.readGCOVVersion(version))
15550b57cec5SDimitry Andric     return sampleprof_error::unrecognized_format;
15560b57cec5SDimitry Andric 
15575ffd83dbSDimitry Andric   if (version != GCOV::V407)
15580b57cec5SDimitry Andric     return sampleprof_error::unsupported_version;
15590b57cec5SDimitry Andric 
15600b57cec5SDimitry Andric   // Skip the empty integer.
15610b57cec5SDimitry Andric   if (std::error_code EC = skipNextWord())
15620b57cec5SDimitry Andric     return EC;
15630b57cec5SDimitry Andric 
15640b57cec5SDimitry Andric   return sampleprof_error::success;
15650b57cec5SDimitry Andric }
15660b57cec5SDimitry Andric 
readSectionTag(uint32_t Expected)15670b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
15680b57cec5SDimitry Andric   uint32_t Tag;
15690b57cec5SDimitry Andric   if (!GcovBuffer.readInt(Tag))
15700b57cec5SDimitry Andric     return sampleprof_error::truncated;
15710b57cec5SDimitry Andric 
15720b57cec5SDimitry Andric   if (Tag != Expected)
15730b57cec5SDimitry Andric     return sampleprof_error::malformed;
15740b57cec5SDimitry Andric 
15750b57cec5SDimitry Andric   if (std::error_code EC = skipNextWord())
15760b57cec5SDimitry Andric     return EC;
15770b57cec5SDimitry Andric 
15780b57cec5SDimitry Andric   return sampleprof_error::success;
15790b57cec5SDimitry Andric }
15800b57cec5SDimitry Andric 
readNameTable()15810b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::readNameTable() {
15820b57cec5SDimitry Andric   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
15830b57cec5SDimitry Andric     return EC;
15840b57cec5SDimitry Andric 
15850b57cec5SDimitry Andric   uint32_t Size;
15860b57cec5SDimitry Andric   if (!GcovBuffer.readInt(Size))
15870b57cec5SDimitry Andric     return sampleprof_error::truncated;
15880b57cec5SDimitry Andric 
15890b57cec5SDimitry Andric   for (uint32_t I = 0; I < Size; ++I) {
15900b57cec5SDimitry Andric     StringRef Str;
15910b57cec5SDimitry Andric     if (!GcovBuffer.readString(Str))
15920b57cec5SDimitry Andric       return sampleprof_error::truncated;
15935ffd83dbSDimitry Andric     Names.push_back(std::string(Str));
15940b57cec5SDimitry Andric   }
15950b57cec5SDimitry Andric 
15960b57cec5SDimitry Andric   return sampleprof_error::success;
15970b57cec5SDimitry Andric }
15980b57cec5SDimitry Andric 
readFunctionProfiles()15990b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
16000b57cec5SDimitry Andric   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
16010b57cec5SDimitry Andric     return EC;
16020b57cec5SDimitry Andric 
16030b57cec5SDimitry Andric   uint32_t NumFunctions;
16040b57cec5SDimitry Andric   if (!GcovBuffer.readInt(NumFunctions))
16050b57cec5SDimitry Andric     return sampleprof_error::truncated;
16060b57cec5SDimitry Andric 
16070b57cec5SDimitry Andric   InlineCallStack Stack;
16080b57cec5SDimitry Andric   for (uint32_t I = 0; I < NumFunctions; ++I)
16090b57cec5SDimitry Andric     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
16100b57cec5SDimitry Andric       return EC;
16110b57cec5SDimitry Andric 
16120b57cec5SDimitry Andric   computeSummary();
16130b57cec5SDimitry Andric   return sampleprof_error::success;
16140b57cec5SDimitry Andric }
16150b57cec5SDimitry Andric 
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)16160b57cec5SDimitry Andric std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
16170b57cec5SDimitry Andric     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
16180b57cec5SDimitry Andric   uint64_t HeadCount = 0;
16190b57cec5SDimitry Andric   if (InlineStack.size() == 0)
16200b57cec5SDimitry Andric     if (!GcovBuffer.readInt64(HeadCount))
16210b57cec5SDimitry Andric       return sampleprof_error::truncated;
16220b57cec5SDimitry Andric 
16230b57cec5SDimitry Andric   uint32_t NameIdx;
16240b57cec5SDimitry Andric   if (!GcovBuffer.readInt(NameIdx))
16250b57cec5SDimitry Andric     return sampleprof_error::truncated;
16260b57cec5SDimitry Andric 
16270b57cec5SDimitry Andric   StringRef Name(Names[NameIdx]);
16280b57cec5SDimitry Andric 
16290b57cec5SDimitry Andric   uint32_t NumPosCounts;
16300b57cec5SDimitry Andric   if (!GcovBuffer.readInt(NumPosCounts))
16310b57cec5SDimitry Andric     return sampleprof_error::truncated;
16320b57cec5SDimitry Andric 
16330b57cec5SDimitry Andric   uint32_t NumCallsites;
16340b57cec5SDimitry Andric   if (!GcovBuffer.readInt(NumCallsites))
16350b57cec5SDimitry Andric     return sampleprof_error::truncated;
16360b57cec5SDimitry Andric 
16370b57cec5SDimitry Andric   FunctionSamples *FProfile = nullptr;
16380b57cec5SDimitry Andric   if (InlineStack.size() == 0) {
16390b57cec5SDimitry Andric     // If this is a top function that we have already processed, do not
16400b57cec5SDimitry Andric     // update its profile again.  This happens in the presence of
16410b57cec5SDimitry Andric     // function aliases.  Since these aliases share the same function
16420b57cec5SDimitry Andric     // body, there will be identical replicated profiles for the
16430b57cec5SDimitry Andric     // original function.  In this case, we simply not bother updating
16440b57cec5SDimitry Andric     // the profile of the original function.
1645*c9157d92SDimitry Andric     FProfile = &Profiles[FunctionId(Name)];
16460b57cec5SDimitry Andric     FProfile->addHeadSamples(HeadCount);
16470b57cec5SDimitry Andric     if (FProfile->getTotalSamples() > 0)
16480b57cec5SDimitry Andric       Update = false;
16490b57cec5SDimitry Andric   } else {
16500b57cec5SDimitry Andric     // Otherwise, we are reading an inlined instance. The top of the
16510b57cec5SDimitry Andric     // inline stack contains the profile of the caller. Insert this
16520b57cec5SDimitry Andric     // callee in the caller's CallsiteMap.
16530b57cec5SDimitry Andric     FunctionSamples *CallerProfile = InlineStack.front();
16540b57cec5SDimitry Andric     uint32_t LineOffset = Offset >> 16;
16550b57cec5SDimitry Andric     uint32_t Discriminator = Offset & 0xffff;
16560b57cec5SDimitry Andric     FProfile = &CallerProfile->functionSamplesAt(
1657*c9157d92SDimitry Andric         LineLocation(LineOffset, Discriminator))[FunctionId(Name)];
16580b57cec5SDimitry Andric   }
1659*c9157d92SDimitry Andric   FProfile->setFunction(FunctionId(Name));
16600b57cec5SDimitry Andric 
16610b57cec5SDimitry Andric   for (uint32_t I = 0; I < NumPosCounts; ++I) {
16620b57cec5SDimitry Andric     uint32_t Offset;
16630b57cec5SDimitry Andric     if (!GcovBuffer.readInt(Offset))
16640b57cec5SDimitry Andric       return sampleprof_error::truncated;
16650b57cec5SDimitry Andric 
16660b57cec5SDimitry Andric     uint32_t NumTargets;
16670b57cec5SDimitry Andric     if (!GcovBuffer.readInt(NumTargets))
16680b57cec5SDimitry Andric       return sampleprof_error::truncated;
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric     uint64_t Count;
16710b57cec5SDimitry Andric     if (!GcovBuffer.readInt64(Count))
16720b57cec5SDimitry Andric       return sampleprof_error::truncated;
16730b57cec5SDimitry Andric 
16740b57cec5SDimitry Andric     // The line location is encoded in the offset as:
16750b57cec5SDimitry Andric     //   high 16 bits: line offset to the start of the function.
16760b57cec5SDimitry Andric     //   low 16 bits: discriminator.
16770b57cec5SDimitry Andric     uint32_t LineOffset = Offset >> 16;
16780b57cec5SDimitry Andric     uint32_t Discriminator = Offset & 0xffff;
16790b57cec5SDimitry Andric 
16800b57cec5SDimitry Andric     InlineCallStack NewStack;
16810b57cec5SDimitry Andric     NewStack.push_back(FProfile);
1682e8d8bef9SDimitry Andric     llvm::append_range(NewStack, InlineStack);
16830b57cec5SDimitry Andric     if (Update) {
16840b57cec5SDimitry Andric       // Walk up the inline stack, adding the samples on this line to
16850b57cec5SDimitry Andric       // the total sample count of the callers in the chain.
1686bdd1243dSDimitry Andric       for (auto *CallerProfile : NewStack)
16870b57cec5SDimitry Andric         CallerProfile->addTotalSamples(Count);
16880b57cec5SDimitry Andric 
16890b57cec5SDimitry Andric       // Update the body samples for the current profile.
16900b57cec5SDimitry Andric       FProfile->addBodySamples(LineOffset, Discriminator, Count);
16910b57cec5SDimitry Andric     }
16920b57cec5SDimitry Andric 
16930b57cec5SDimitry Andric     // Process the list of functions called at an indirect call site.
16940b57cec5SDimitry Andric     // These are all the targets that a function pointer (or virtual
16950b57cec5SDimitry Andric     // function) resolved at runtime.
16960b57cec5SDimitry Andric     for (uint32_t J = 0; J < NumTargets; J++) {
16970b57cec5SDimitry Andric       uint32_t HistVal;
16980b57cec5SDimitry Andric       if (!GcovBuffer.readInt(HistVal))
16990b57cec5SDimitry Andric         return sampleprof_error::truncated;
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
17020b57cec5SDimitry Andric         return sampleprof_error::malformed;
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric       uint64_t TargetIdx;
17050b57cec5SDimitry Andric       if (!GcovBuffer.readInt64(TargetIdx))
17060b57cec5SDimitry Andric         return sampleprof_error::truncated;
17070b57cec5SDimitry Andric       StringRef TargetName(Names[TargetIdx]);
17080b57cec5SDimitry Andric 
17090b57cec5SDimitry Andric       uint64_t TargetCount;
17100b57cec5SDimitry Andric       if (!GcovBuffer.readInt64(TargetCount))
17110b57cec5SDimitry Andric         return sampleprof_error::truncated;
17120b57cec5SDimitry Andric 
17130b57cec5SDimitry Andric       if (Update)
17140b57cec5SDimitry Andric         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1715*c9157d92SDimitry Andric                                          FunctionId(TargetName),
1716*c9157d92SDimitry Andric                                          TargetCount);
17170b57cec5SDimitry Andric     }
17180b57cec5SDimitry Andric   }
17190b57cec5SDimitry Andric 
17200b57cec5SDimitry Andric   // Process all the inlined callers into the current function. These
17210b57cec5SDimitry Andric   // are all the callsites that were inlined into this function.
17220b57cec5SDimitry Andric   for (uint32_t I = 0; I < NumCallsites; I++) {
17230b57cec5SDimitry Andric     // The offset is encoded as:
17240b57cec5SDimitry Andric     //   high 16 bits: line offset to the start of the function.
17250b57cec5SDimitry Andric     //   low 16 bits: discriminator.
17260b57cec5SDimitry Andric     uint32_t Offset;
17270b57cec5SDimitry Andric     if (!GcovBuffer.readInt(Offset))
17280b57cec5SDimitry Andric       return sampleprof_error::truncated;
17290b57cec5SDimitry Andric     InlineCallStack NewStack;
17300b57cec5SDimitry Andric     NewStack.push_back(FProfile);
1731e8d8bef9SDimitry Andric     llvm::append_range(NewStack, InlineStack);
17320b57cec5SDimitry Andric     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
17330b57cec5SDimitry Andric       return EC;
17340b57cec5SDimitry Andric   }
17350b57cec5SDimitry Andric 
17360b57cec5SDimitry Andric   return sampleprof_error::success;
17370b57cec5SDimitry Andric }
17380b57cec5SDimitry Andric 
17390b57cec5SDimitry Andric /// Read a GCC AutoFDO profile.
17400b57cec5SDimitry Andric ///
17410b57cec5SDimitry Andric /// This format is generated by the Linux Perf conversion tool at
17420b57cec5SDimitry Andric /// https://github.com/google/autofdo.
readImpl()17438bcb0991SDimitry Andric std::error_code SampleProfileReaderGCC::readImpl() {
1744fe6060f1SDimitry Andric   assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
17450b57cec5SDimitry Andric   // Read the string table.
17460b57cec5SDimitry Andric   if (std::error_code EC = readNameTable())
17470b57cec5SDimitry Andric     return EC;
17480b57cec5SDimitry Andric 
17490b57cec5SDimitry Andric   // Read the source profile.
17500b57cec5SDimitry Andric   if (std::error_code EC = readFunctionProfiles())
17510b57cec5SDimitry Andric     return EC;
17520b57cec5SDimitry Andric 
17530b57cec5SDimitry Andric   return sampleprof_error::success;
17540b57cec5SDimitry Andric }
17550b57cec5SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)17560b57cec5SDimitry Andric bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
17570b57cec5SDimitry Andric   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
17580b57cec5SDimitry Andric   return Magic == "adcg*704";
17590b57cec5SDimitry Andric }
17600b57cec5SDimitry Andric 
applyRemapping(LLVMContext & Ctx)17618bcb0991SDimitry Andric void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
17625ffd83dbSDimitry Andric   // If the reader uses MD5 to represent string, we can't remap it because
17630b57cec5SDimitry Andric   // we don't know what the original function names were.
17645ffd83dbSDimitry Andric   if (Reader.useMD5()) {
17650b57cec5SDimitry Andric     Ctx.diagnose(DiagnosticInfoSampleProfile(
17668bcb0991SDimitry Andric         Reader.getBuffer()->getBufferIdentifier(),
17670b57cec5SDimitry Andric         "Profile data remapping cannot be applied to profile data "
1768fe013be4SDimitry Andric         "using MD5 names (original mangled names are not available).",
17690b57cec5SDimitry Andric         DS_Warning));
17708bcb0991SDimitry Andric     return;
17710b57cec5SDimitry Andric   }
17720b57cec5SDimitry Andric 
1773e8d8bef9SDimitry Andric   // CSSPGO-TODO: Remapper is not yet supported.
1774e8d8bef9SDimitry Andric   // We will need to remap the entire context string.
17758bcb0991SDimitry Andric   assert(Remappings && "should be initialized while creating remapper");
1776e8d8bef9SDimitry Andric   for (auto &Sample : Reader.getProfiles()) {
1777*c9157d92SDimitry Andric     DenseSet<FunctionId> NamesInSample;
1778e8d8bef9SDimitry Andric     Sample.second.findAllNames(NamesInSample);
1779*c9157d92SDimitry Andric     for (auto &Name : NamesInSample) {
1780*c9157d92SDimitry Andric       StringRef NameStr = Name.stringRef();
1781*c9157d92SDimitry Andric       if (auto Key = Remappings->insert(NameStr))
1782*c9157d92SDimitry Andric         NameMap.insert({Key, NameStr});
1783*c9157d92SDimitry Andric     }
1784e8d8bef9SDimitry Andric   }
17850b57cec5SDimitry Andric 
17868bcb0991SDimitry Andric   RemappingApplied = true;
17870b57cec5SDimitry Andric }
17880b57cec5SDimitry Andric 
1789bdd1243dSDimitry Andric std::optional<StringRef>
lookUpNameInProfile(StringRef Fname)1790e8d8bef9SDimitry Andric SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1791*c9157d92SDimitry Andric   if (auto Key = Remappings->lookup(Fname)) {
1792*c9157d92SDimitry Andric     StringRef Result = NameMap.lookup(Key);
1793*c9157d92SDimitry Andric     if (!Result.empty())
1794*c9157d92SDimitry Andric       return Result;
1795*c9157d92SDimitry Andric   }
1796bdd1243dSDimitry Andric   return std::nullopt;
17970b57cec5SDimitry Andric }
17980b57cec5SDimitry Andric 
17990b57cec5SDimitry Andric /// Prepare a memory buffer for the contents of \p Filename.
18000b57cec5SDimitry Andric ///
18010b57cec5SDimitry Andric /// \returns an error code indicating the status of the buffer.
18020b57cec5SDimitry Andric static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename,vfs::FileSystem & FS)1803fe013be4SDimitry Andric setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
1804fe013be4SDimitry Andric   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
1805fe013be4SDimitry Andric                                            : FS.getBufferForFile(Filename);
18060b57cec5SDimitry Andric   if (std::error_code EC = BufferOrErr.getError())
18070b57cec5SDimitry Andric     return EC;
18080b57cec5SDimitry Andric   auto Buffer = std::move(BufferOrErr.get());
18090b57cec5SDimitry Andric 
18100b57cec5SDimitry Andric   return std::move(Buffer);
18110b57cec5SDimitry Andric }
18120b57cec5SDimitry Andric 
18130b57cec5SDimitry Andric /// Create a sample profile reader based on the format of the input file.
18140b57cec5SDimitry Andric ///
18150b57cec5SDimitry Andric /// \param Filename The file to open.
18160b57cec5SDimitry Andric ///
18170b57cec5SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
18180b57cec5SDimitry Andric ///
1819fe6060f1SDimitry Andric /// \param P The FSDiscriminatorPass.
1820fe6060f1SDimitry Andric ///
18218bcb0991SDimitry Andric /// \param RemapFilename The file used for profile remapping.
18228bcb0991SDimitry Andric ///
18230b57cec5SDimitry Andric /// \returns an error code indicating the status of the created reader.
18240b57cec5SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,vfs::FileSystem & FS,FSDiscriminatorPass P,const std::string RemapFilename)18258bcb0991SDimitry Andric SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1826fe013be4SDimitry Andric                             vfs::FileSystem &FS, FSDiscriminatorPass P,
18278bcb0991SDimitry Andric                             const std::string RemapFilename) {
1828fe013be4SDimitry Andric   auto BufferOrError = setupMemoryBuffer(Filename, FS);
18290b57cec5SDimitry Andric   if (std::error_code EC = BufferOrError.getError())
18300b57cec5SDimitry Andric     return EC;
1831fe013be4SDimitry Andric   return create(BufferOrError.get(), C, FS, P, RemapFilename);
18320b57cec5SDimitry Andric }
18330b57cec5SDimitry Andric 
18340b57cec5SDimitry Andric /// Create a sample profile remapper from the given input, to remap the
18350b57cec5SDimitry Andric /// function names in the given profile data.
18360b57cec5SDimitry Andric ///
18370b57cec5SDimitry Andric /// \param Filename The file to open.
18380b57cec5SDimitry Andric ///
18398bcb0991SDimitry Andric /// \param Reader The profile reader the remapper is going to be applied to.
18408bcb0991SDimitry Andric ///
18410b57cec5SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
18420b57cec5SDimitry Andric ///
18430b57cec5SDimitry Andric /// \returns an error code indicating the status of the created reader.
18448bcb0991SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,vfs::FileSystem & FS,SampleProfileReader & Reader,LLVMContext & C)18458bcb0991SDimitry Andric SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1846fe013be4SDimitry Andric                                            vfs::FileSystem &FS,
18478bcb0991SDimitry Andric                                            SampleProfileReader &Reader,
18488bcb0991SDimitry Andric                                            LLVMContext &C) {
1849fe013be4SDimitry Andric   auto BufferOrError = setupMemoryBuffer(Filename, FS);
18500b57cec5SDimitry Andric   if (std::error_code EC = BufferOrError.getError())
18510b57cec5SDimitry Andric     return EC;
18528bcb0991SDimitry Andric   return create(BufferOrError.get(), Reader, C);
18538bcb0991SDimitry Andric }
18548bcb0991SDimitry Andric 
18558bcb0991SDimitry Andric /// Create a sample profile remapper from the given input, to remap the
18568bcb0991SDimitry Andric /// function names in the given profile data.
18578bcb0991SDimitry Andric ///
18588bcb0991SDimitry Andric /// \param B The memory buffer to create the reader from (assumes ownership).
18598bcb0991SDimitry Andric ///
18608bcb0991SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
18618bcb0991SDimitry Andric ///
18628bcb0991SDimitry Andric /// \param Reader The profile reader the remapper is going to be applied to.
18638bcb0991SDimitry Andric ///
18648bcb0991SDimitry Andric /// \returns an error code indicating the status of the created reader.
18658bcb0991SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)18668bcb0991SDimitry Andric SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
18678bcb0991SDimitry Andric                                            SampleProfileReader &Reader,
18688bcb0991SDimitry Andric                                            LLVMContext &C) {
18698bcb0991SDimitry Andric   auto Remappings = std::make_unique<SymbolRemappingReader>();
187081ad6265SDimitry Andric   if (Error E = Remappings->read(*B)) {
18718bcb0991SDimitry Andric     handleAllErrors(
18728bcb0991SDimitry Andric         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
18738bcb0991SDimitry Andric           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
18748bcb0991SDimitry Andric                                                  ParseError.getLineNum(),
18758bcb0991SDimitry Andric                                                  ParseError.getMessage()));
18768bcb0991SDimitry Andric         });
18778bcb0991SDimitry Andric     return sampleprof_error::malformed;
18788bcb0991SDimitry Andric   }
18798bcb0991SDimitry Andric 
18808bcb0991SDimitry Andric   return std::make_unique<SampleProfileReaderItaniumRemapper>(
18818bcb0991SDimitry Andric       std::move(B), std::move(Remappings), Reader);
18820b57cec5SDimitry Andric }
18830b57cec5SDimitry Andric 
18840b57cec5SDimitry Andric /// Create a sample profile reader based on the format of the input data.
18850b57cec5SDimitry Andric ///
18860b57cec5SDimitry Andric /// \param B The memory buffer to create the reader from (assumes ownership).
18870b57cec5SDimitry Andric ///
18880b57cec5SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
18890b57cec5SDimitry Andric ///
1890fe6060f1SDimitry Andric /// \param P The FSDiscriminatorPass.
1891fe6060f1SDimitry Andric ///
18928bcb0991SDimitry Andric /// \param RemapFilename The file used for profile remapping.
18938bcb0991SDimitry Andric ///
18940b57cec5SDimitry Andric /// \returns an error code indicating the status of the created reader.
18950b57cec5SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,vfs::FileSystem & FS,FSDiscriminatorPass P,const std::string RemapFilename)18968bcb0991SDimitry Andric SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1897fe013be4SDimitry Andric                             vfs::FileSystem &FS, FSDiscriminatorPass P,
18988bcb0991SDimitry Andric                             const std::string RemapFilename) {
18990b57cec5SDimitry Andric   std::unique_ptr<SampleProfileReader> Reader;
19000b57cec5SDimitry Andric   if (SampleProfileReaderRawBinary::hasFormat(*B))
19010b57cec5SDimitry Andric     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
19028bcb0991SDimitry Andric   else if (SampleProfileReaderExtBinary::hasFormat(*B))
19038bcb0991SDimitry Andric     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
19040b57cec5SDimitry Andric   else if (SampleProfileReaderGCC::hasFormat(*B))
19050b57cec5SDimitry Andric     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
19060b57cec5SDimitry Andric   else if (SampleProfileReaderText::hasFormat(*B))
19070b57cec5SDimitry Andric     Reader.reset(new SampleProfileReaderText(std::move(B), C));
19080b57cec5SDimitry Andric   else
19090b57cec5SDimitry Andric     return sampleprof_error::unrecognized_format;
19100b57cec5SDimitry Andric 
19118bcb0991SDimitry Andric   if (!RemapFilename.empty()) {
1912fe013be4SDimitry Andric     auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
1913fe013be4SDimitry Andric         RemapFilename, FS, *Reader, C);
19148bcb0991SDimitry Andric     if (std::error_code EC = ReaderOrErr.getError()) {
19158bcb0991SDimitry Andric       std::string Msg = "Could not create remapper: " + EC.message();
19168bcb0991SDimitry Andric       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
19170b57cec5SDimitry Andric       return EC;
19188bcb0991SDimitry Andric     }
19198bcb0991SDimitry Andric     Reader->Remapper = std::move(ReaderOrErr.get());
19208bcb0991SDimitry Andric   }
19218bcb0991SDimitry Andric 
19228bcb0991SDimitry Andric   if (std::error_code EC = Reader->readHeader()) {
19238bcb0991SDimitry Andric     return EC;
19248bcb0991SDimitry Andric   }
19250b57cec5SDimitry Andric 
1926fe6060f1SDimitry Andric   Reader->setDiscriminatorMaskedBitFrom(P);
1927fe6060f1SDimitry Andric 
19280b57cec5SDimitry Andric   return std::move(Reader);
19290b57cec5SDimitry Andric }
19300b57cec5SDimitry Andric 
19310b57cec5SDimitry Andric // For text and GCC file formats, we compute the summary after reading the
19320b57cec5SDimitry Andric // profile. Binary format has the profile summary in its header.
computeSummary()19330b57cec5SDimitry Andric void SampleProfileReader::computeSummary() {
19340b57cec5SDimitry Andric   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1935d409305fSDimitry Andric   Summary = Builder.computeSummaryForProfiles(Profiles);
19360b57cec5SDimitry Andric }
1937