139d628a0SDimitry Andric //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
239d628a0SDimitry Andric //
339d628a0SDimitry Andric //                      The LLVM Compiler Infrastructure
439d628a0SDimitry Andric //
539d628a0SDimitry Andric // This file is distributed under the University of Illinois Open Source
639d628a0SDimitry Andric // License. See LICENSE.TXT for details.
739d628a0SDimitry Andric //
839d628a0SDimitry Andric //===----------------------------------------------------------------------===//
939d628a0SDimitry Andric //
1039d628a0SDimitry Andric // This file implements the class that reads LLVM sample profiles. It
117d523365SDimitry Andric // supports three file formats: text, binary and gcov.
1239d628a0SDimitry Andric //
137d523365SDimitry Andric // The textual representation is useful for debugging and testing purposes. The
147d523365SDimitry Andric // binary representation is more compact, resulting in smaller file sizes.
1539d628a0SDimitry Andric //
167d523365SDimitry Andric // The gcov encoding is the one generated by GCC's AutoFDO profile creation
177d523365SDimitry Andric // tool (https://github.com/google/autofdo)
1839d628a0SDimitry Andric //
197d523365SDimitry Andric // All three encodings can be used interchangeably as an input sample profile.
2039d628a0SDimitry Andric //
2139d628a0SDimitry Andric //===----------------------------------------------------------------------===//
2239d628a0SDimitry Andric 
2339d628a0SDimitry Andric #include "llvm/ProfileData/SampleProfReader.h"
247d523365SDimitry Andric #include "llvm/ADT/DenseMap.h"
253ca95b02SDimitry Andric #include "llvm/ADT/STLExtras.h"
267a7e6055SDimitry Andric #include "llvm/ADT/StringRef.h"
277a7e6055SDimitry Andric #include "llvm/IR/ProfileSummary.h"
287a7e6055SDimitry Andric #include "llvm/ProfileData/ProfileCommon.h"
297a7e6055SDimitry Andric #include "llvm/ProfileData/SampleProf.h"
3039d628a0SDimitry Andric #include "llvm/Support/ErrorOr.h"
3139d628a0SDimitry Andric #include "llvm/Support/LEB128.h"
3239d628a0SDimitry Andric #include "llvm/Support/LineIterator.h"
33*b5893f02SDimitry Andric #include "llvm/Support/MD5.h"
3439d628a0SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
357a7e6055SDimitry Andric #include "llvm/Support/raw_ostream.h"
367a7e6055SDimitry Andric #include <algorithm>
377a7e6055SDimitry Andric #include <cstddef>
387a7e6055SDimitry Andric #include <cstdint>
397a7e6055SDimitry Andric #include <limits>
407a7e6055SDimitry Andric #include <memory>
417a7e6055SDimitry Andric #include <system_error>
427a7e6055SDimitry Andric #include <vector>
4339d628a0SDimitry Andric 
4439d628a0SDimitry Andric using namespace llvm;
457a7e6055SDimitry Andric using namespace sampleprof;
4639d628a0SDimitry Andric 
474ba319b5SDimitry Andric /// Dump the function profile for \p FName.
4839d628a0SDimitry Andric ///
4939d628a0SDimitry Andric /// \param FName Name of the function to print.
5039d628a0SDimitry Andric /// \param OS Stream to emit the output to.
dumpFunctionProfile(StringRef FName,raw_ostream & OS)5139d628a0SDimitry Andric void SampleProfileReader::dumpFunctionProfile(StringRef FName,
5239d628a0SDimitry Andric                                               raw_ostream &OS) {
537d523365SDimitry Andric   OS << "Function: " << FName << ": " << Profiles[FName];
5439d628a0SDimitry Andric }
5539d628a0SDimitry Andric 
564ba319b5SDimitry Andric /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)5739d628a0SDimitry Andric void SampleProfileReader::dump(raw_ostream &OS) {
5839d628a0SDimitry Andric   for (const auto &I : Profiles)
5939d628a0SDimitry Andric     dumpFunctionProfile(I.getKey(), OS);
6039d628a0SDimitry Andric }
6139d628a0SDimitry Andric 
624ba319b5SDimitry Andric /// Parse \p Input as function head.
637d523365SDimitry Andric ///
647d523365SDimitry Andric /// Parse one line of \p Input, and update function name in \p FName,
657d523365SDimitry Andric /// function's total sample count in \p NumSamples, function's entry
667d523365SDimitry Andric /// count in \p NumHeadSamples.
677d523365SDimitry Andric ///
687d523365SDimitry Andric /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)697d523365SDimitry Andric static bool ParseHead(const StringRef &Input, StringRef &FName,
707d523365SDimitry Andric                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
717d523365SDimitry Andric   if (Input[0] == ' ')
727d523365SDimitry Andric     return false;
737d523365SDimitry Andric   size_t n2 = Input.rfind(':');
747d523365SDimitry Andric   size_t n1 = Input.rfind(':', n2 - 1);
757d523365SDimitry Andric   FName = Input.substr(0, n1);
767d523365SDimitry Andric   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
777d523365SDimitry Andric     return false;
787d523365SDimitry Andric   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
797d523365SDimitry Andric     return false;
807d523365SDimitry Andric   return true;
817d523365SDimitry Andric }
827d523365SDimitry Andric 
834ba319b5SDimitry Andric /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)843ca95b02SDimitry Andric static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
857d523365SDimitry Andric 
864ba319b5SDimitry Andric /// Parse \p Input as line sample.
877d523365SDimitry Andric ///
887d523365SDimitry Andric /// \param Input input line.
897d523365SDimitry Andric /// \param IsCallsite true if the line represents an inlined callsite.
907d523365SDimitry Andric /// \param Depth the depth of the inline stack.
917d523365SDimitry Andric /// \param NumSamples total samples of the line/inlined callsite.
927d523365SDimitry Andric /// \param LineOffset line offset to the start of the function.
937d523365SDimitry Andric /// \param Discriminator discriminator of the line.
947d523365SDimitry Andric /// \param TargetCountMap map from indirect call target to count.
957d523365SDimitry Andric ///
967d523365SDimitry Andric /// returns true if parsing is successful.
ParseLine(const StringRef & Input,bool & IsCallsite,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap)977d523365SDimitry Andric static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
987d523365SDimitry Andric                       uint64_t &NumSamples, uint32_t &LineOffset,
997d523365SDimitry Andric                       uint32_t &Discriminator, StringRef &CalleeName,
1007d523365SDimitry Andric                       DenseMap<StringRef, uint64_t> &TargetCountMap) {
1017d523365SDimitry Andric   for (Depth = 0; Input[Depth] == ' '; Depth++)
1027d523365SDimitry Andric     ;
1037d523365SDimitry Andric   if (Depth == 0)
1047d523365SDimitry Andric     return false;
1057d523365SDimitry Andric 
1067d523365SDimitry Andric   size_t n1 = Input.find(':');
1077d523365SDimitry Andric   StringRef Loc = Input.substr(Depth, n1 - Depth);
1087d523365SDimitry Andric   size_t n2 = Loc.find('.');
1097d523365SDimitry Andric   if (n2 == StringRef::npos) {
1107d523365SDimitry Andric     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
1117d523365SDimitry Andric       return false;
1127d523365SDimitry Andric     Discriminator = 0;
1137d523365SDimitry Andric   } else {
1147d523365SDimitry Andric     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
1157d523365SDimitry Andric       return false;
1167d523365SDimitry Andric     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
1177d523365SDimitry Andric       return false;
1187d523365SDimitry Andric   }
1197d523365SDimitry Andric 
1207d523365SDimitry Andric   StringRef Rest = Input.substr(n1 + 2);
1217d523365SDimitry Andric   if (Rest[0] >= '0' && Rest[0] <= '9') {
1227d523365SDimitry Andric     IsCallsite = false;
1237d523365SDimitry Andric     size_t n3 = Rest.find(' ');
1247d523365SDimitry Andric     if (n3 == StringRef::npos) {
1257d523365SDimitry Andric       if (Rest.getAsInteger(10, NumSamples))
1267d523365SDimitry Andric         return false;
1277d523365SDimitry Andric     } else {
1287d523365SDimitry Andric       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
1297d523365SDimitry Andric         return false;
1307d523365SDimitry Andric     }
1314ba319b5SDimitry Andric     // Find call targets and their sample counts.
1324ba319b5SDimitry Andric     // Note: In some cases, there are symbols in the profile which are not
1334ba319b5SDimitry Andric     // mangled. To accommodate such cases, use colon + integer pairs as the
1344ba319b5SDimitry Andric     // anchor points.
1354ba319b5SDimitry Andric     // An example:
1364ba319b5SDimitry Andric     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
1374ba319b5SDimitry Andric     // ":1000" and ":437" are used as anchor points so the string above will
1384ba319b5SDimitry Andric     // be interpreted as
1394ba319b5SDimitry Andric     // target: _M_construct<char *>
1404ba319b5SDimitry Andric     // count: 1000
1414ba319b5SDimitry Andric     // target: string_view<std::allocator<char> >
1424ba319b5SDimitry Andric     // count: 437
1437d523365SDimitry Andric     while (n3 != StringRef::npos) {
1447d523365SDimitry Andric       n3 += Rest.substr(n3).find_first_not_of(' ');
1457d523365SDimitry Andric       Rest = Rest.substr(n3);
1464ba319b5SDimitry Andric       n3 = Rest.find_first_of(':');
1474ba319b5SDimitry Andric       if (n3 == StringRef::npos || n3 == 0)
1487d523365SDimitry Andric         return false;
1494ba319b5SDimitry Andric 
1504ba319b5SDimitry Andric       StringRef Target;
1514ba319b5SDimitry Andric       uint64_t count, n4;
1524ba319b5SDimitry Andric       while (true) {
1534ba319b5SDimitry Andric         // Get the segment after the current colon.
1544ba319b5SDimitry Andric         StringRef AfterColon = Rest.substr(n3 + 1);
1554ba319b5SDimitry Andric         // Get the target symbol before the current colon.
1564ba319b5SDimitry Andric         Target = Rest.substr(0, n3);
1574ba319b5SDimitry Andric         // Check if the word after the current colon is an integer.
1584ba319b5SDimitry Andric         n4 = AfterColon.find_first_of(' ');
1594ba319b5SDimitry Andric         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
1604ba319b5SDimitry Andric         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
1614ba319b5SDimitry Andric         if (!WordAfterColon.getAsInteger(10, count))
1624ba319b5SDimitry Andric           break;
1634ba319b5SDimitry Andric 
1644ba319b5SDimitry Andric         // Try to find the next colon.
1654ba319b5SDimitry Andric         uint64_t n5 = AfterColon.find_first_of(':');
1664ba319b5SDimitry Andric         if (n5 == StringRef::npos)
1674ba319b5SDimitry Andric           return false;
1684ba319b5SDimitry Andric         n3 += n5 + 1;
1694ba319b5SDimitry Andric       }
1704ba319b5SDimitry Andric 
1714ba319b5SDimitry Andric       // An anchor point is found. Save the {target, count} pair
1724ba319b5SDimitry Andric       TargetCountMap[Target] = count;
1734ba319b5SDimitry Andric       if (n4 == Rest.size())
1744ba319b5SDimitry Andric         break;
1754ba319b5SDimitry Andric       // Change n3 to the next blank space after colon + integer pair.
1764ba319b5SDimitry Andric       n3 = n4;
1777d523365SDimitry Andric     }
1787d523365SDimitry Andric   } else {
1797d523365SDimitry Andric     IsCallsite = true;
1807d523365SDimitry Andric     size_t n3 = Rest.find_last_of(':');
1817d523365SDimitry Andric     CalleeName = Rest.substr(0, n3);
1827d523365SDimitry Andric     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
1837d523365SDimitry Andric       return false;
1847d523365SDimitry Andric   }
1857d523365SDimitry Andric   return true;
1867d523365SDimitry Andric }
1877d523365SDimitry Andric 
1884ba319b5SDimitry Andric /// Load samples from a text file.
18939d628a0SDimitry Andric ///
19039d628a0SDimitry Andric /// See the documentation at the top of the file for an explanation of
19139d628a0SDimitry Andric /// the expected format.
19239d628a0SDimitry Andric ///
19339d628a0SDimitry Andric /// \returns true if the file was loaded successfully, false otherwise.
read()19439d628a0SDimitry Andric std::error_code SampleProfileReaderText::read() {
19539d628a0SDimitry Andric   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
1967d523365SDimitry Andric   sampleprof_error Result = sampleprof_error::success;
19739d628a0SDimitry Andric 
1987d523365SDimitry Andric   InlineCallStack InlineStack;
1997d523365SDimitry Andric 
2007d523365SDimitry Andric   for (; !LineIt.is_at_eof(); ++LineIt) {
2017d523365SDimitry Andric     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
2027d523365SDimitry Andric       continue;
20339d628a0SDimitry Andric     // Read the header of each function.
20439d628a0SDimitry Andric     //
20539d628a0SDimitry Andric     // Note that for function identifiers we are actually expecting
20639d628a0SDimitry Andric     // mangled names, but we may not always get them. This happens when
20739d628a0SDimitry Andric     // the compiler decides not to emit the function (e.g., it was inlined
20839d628a0SDimitry Andric     // and removed). In this case, the binary will not have the linkage
20939d628a0SDimitry Andric     // name for the function, so the profiler will emit the function's
21039d628a0SDimitry Andric     // unmangled name, which may contain characters like ':' and '>' in its
21139d628a0SDimitry Andric     // name (member functions, templates, etc).
21239d628a0SDimitry Andric     //
21339d628a0SDimitry Andric     // The only requirement we place on the identifier, then, is that it
21439d628a0SDimitry Andric     // should not begin with a number.
2157d523365SDimitry Andric     if ((*LineIt)[0] != ' ') {
2167d523365SDimitry Andric       uint64_t NumSamples, NumHeadSamples;
2177d523365SDimitry Andric       StringRef FName;
2187d523365SDimitry Andric       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
2197d523365SDimitry Andric         reportError(LineIt.line_number(),
22039d628a0SDimitry Andric                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
22139d628a0SDimitry Andric         return sampleprof_error::malformed;
22239d628a0SDimitry Andric       }
22339d628a0SDimitry Andric       Profiles[FName] = FunctionSamples();
22439d628a0SDimitry Andric       FunctionSamples &FProfile = Profiles[FName];
2253ca95b02SDimitry Andric       FProfile.setName(FName);
2267d523365SDimitry Andric       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
2277d523365SDimitry Andric       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
2287d523365SDimitry Andric       InlineStack.clear();
2297d523365SDimitry Andric       InlineStack.push_back(&FProfile);
2307d523365SDimitry Andric     } else {
2317d523365SDimitry Andric       uint64_t NumSamples;
2327d523365SDimitry Andric       StringRef FName;
2337d523365SDimitry Andric       DenseMap<StringRef, uint64_t> TargetCountMap;
2347d523365SDimitry Andric       bool IsCallsite;
2357d523365SDimitry Andric       uint32_t Depth, LineOffset, Discriminator;
2367d523365SDimitry Andric       if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
2377d523365SDimitry Andric                      Discriminator, FName, TargetCountMap)) {
2387d523365SDimitry Andric         reportError(LineIt.line_number(),
2397d523365SDimitry Andric                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
2407d523365SDimitry Andric                         *LineIt);
24139d628a0SDimitry Andric         return sampleprof_error::malformed;
24239d628a0SDimitry Andric       }
2437d523365SDimitry Andric       if (IsCallsite) {
2447d523365SDimitry Andric         while (InlineStack.size() > Depth) {
2457d523365SDimitry Andric           InlineStack.pop_back();
24639d628a0SDimitry Andric         }
2477d523365SDimitry Andric         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
2487a7e6055SDimitry Andric             LineLocation(LineOffset, Discriminator))[FName];
2493ca95b02SDimitry Andric         FSamples.setName(FName);
2507d523365SDimitry Andric         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
2517d523365SDimitry Andric         InlineStack.push_back(&FSamples);
2527d523365SDimitry Andric       } else {
2537d523365SDimitry Andric         while (InlineStack.size() > Depth) {
2547d523365SDimitry Andric           InlineStack.pop_back();
25539d628a0SDimitry Andric         }
2567d523365SDimitry Andric         FunctionSamples &FProfile = *InlineStack.back();
2577d523365SDimitry Andric         for (const auto &name_count : TargetCountMap) {
2587d523365SDimitry Andric           MergeResult(Result, FProfile.addCalledTargetSamples(
2597d523365SDimitry Andric                                   LineOffset, Discriminator, name_count.first,
2607d523365SDimitry Andric                                   name_count.second));
2617d523365SDimitry Andric         }
2627d523365SDimitry Andric         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
2637d523365SDimitry Andric                                                     NumSamples));
2647d523365SDimitry Andric       }
26539d628a0SDimitry Andric     }
26639d628a0SDimitry Andric   }
2673ca95b02SDimitry Andric   if (Result == sampleprof_error::success)
2683ca95b02SDimitry Andric     computeSummary();
26939d628a0SDimitry Andric 
2707d523365SDimitry Andric   return Result;
2717d523365SDimitry Andric }
2727d523365SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)2737d523365SDimitry Andric bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
2747d523365SDimitry Andric   bool result = false;
2757d523365SDimitry Andric 
2767d523365SDimitry Andric   // Check that the first non-comment line is a valid function header.
2777d523365SDimitry Andric   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
2787d523365SDimitry Andric   if (!LineIt.is_at_eof()) {
2797d523365SDimitry Andric     if ((*LineIt)[0] != ' ') {
2807d523365SDimitry Andric       uint64_t NumSamples, NumHeadSamples;
2817d523365SDimitry Andric       StringRef FName;
2827d523365SDimitry Andric       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
2837d523365SDimitry Andric     }
2847d523365SDimitry Andric   }
2857d523365SDimitry Andric 
2867d523365SDimitry Andric   return result;
28739d628a0SDimitry Andric }
28839d628a0SDimitry Andric 
readNumber()28939d628a0SDimitry Andric template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
29039d628a0SDimitry Andric   unsigned NumBytesRead = 0;
29139d628a0SDimitry Andric   std::error_code EC;
29239d628a0SDimitry Andric   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
29339d628a0SDimitry Andric 
29439d628a0SDimitry Andric   if (Val > std::numeric_limits<T>::max())
29539d628a0SDimitry Andric     EC = sampleprof_error::malformed;
29639d628a0SDimitry Andric   else if (Data + NumBytesRead > End)
29739d628a0SDimitry Andric     EC = sampleprof_error::truncated;
29839d628a0SDimitry Andric   else
29939d628a0SDimitry Andric     EC = sampleprof_error::success;
30039d628a0SDimitry Andric 
30139d628a0SDimitry Andric   if (EC) {
3027d523365SDimitry Andric     reportError(0, EC.message());
30339d628a0SDimitry Andric     return EC;
30439d628a0SDimitry Andric   }
30539d628a0SDimitry Andric 
30639d628a0SDimitry Andric   Data += NumBytesRead;
30739d628a0SDimitry Andric   return static_cast<T>(Val);
30839d628a0SDimitry Andric }
30939d628a0SDimitry Andric 
readString()31039d628a0SDimitry Andric ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
31139d628a0SDimitry Andric   std::error_code EC;
31239d628a0SDimitry Andric   StringRef Str(reinterpret_cast<const char *>(Data));
31339d628a0SDimitry Andric   if (Data + Str.size() + 1 > End) {
31439d628a0SDimitry Andric     EC = sampleprof_error::truncated;
3157d523365SDimitry Andric     reportError(0, EC.message());
31639d628a0SDimitry Andric     return EC;
31739d628a0SDimitry Andric   }
31839d628a0SDimitry Andric 
31939d628a0SDimitry Andric   Data += Str.size() + 1;
32039d628a0SDimitry Andric   return Str;
32139d628a0SDimitry Andric }
32239d628a0SDimitry Andric 
3234ba319b5SDimitry Andric template <typename T>
readUnencodedNumber()324*b5893f02SDimitry Andric ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
325*b5893f02SDimitry Andric   std::error_code EC;
326*b5893f02SDimitry Andric 
327*b5893f02SDimitry Andric   if (Data + sizeof(T) > End) {
328*b5893f02SDimitry Andric     EC = sampleprof_error::truncated;
329*b5893f02SDimitry Andric     reportError(0, EC.message());
330*b5893f02SDimitry Andric     return EC;
331*b5893f02SDimitry Andric   }
332*b5893f02SDimitry Andric 
333*b5893f02SDimitry Andric   using namespace support;
334*b5893f02SDimitry Andric   T Val = endian::readNext<T, little, unaligned>(Data);
335*b5893f02SDimitry Andric   return Val;
336*b5893f02SDimitry Andric }
337*b5893f02SDimitry Andric 
338*b5893f02SDimitry Andric template <typename T>
readStringIndex(T & Table)3394ba319b5SDimitry Andric inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
3407d523365SDimitry Andric   std::error_code EC;
3417d523365SDimitry Andric   auto Idx = readNumber<uint32_t>();
3427d523365SDimitry Andric   if (std::error_code EC = Idx.getError())
34339d628a0SDimitry Andric     return EC;
3444ba319b5SDimitry Andric   if (*Idx >= Table.size())
3457d523365SDimitry Andric     return sampleprof_error::truncated_name_table;
3464ba319b5SDimitry Andric   return *Idx;
3474ba319b5SDimitry Andric }
3484ba319b5SDimitry Andric 
readStringFromTable()3494ba319b5SDimitry Andric ErrorOr<StringRef> SampleProfileReaderRawBinary::readStringFromTable() {
3504ba319b5SDimitry Andric   auto Idx = readStringIndex(NameTable);
3514ba319b5SDimitry Andric   if (std::error_code EC = Idx.getError())
3524ba319b5SDimitry Andric     return EC;
3534ba319b5SDimitry Andric 
3547d523365SDimitry Andric   return NameTable[*Idx];
3557d523365SDimitry Andric }
35639d628a0SDimitry Andric 
readStringFromTable()3574ba319b5SDimitry Andric ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
3584ba319b5SDimitry Andric   auto Idx = readStringIndex(NameTable);
3594ba319b5SDimitry Andric   if (std::error_code EC = Idx.getError())
3604ba319b5SDimitry Andric     return EC;
3614ba319b5SDimitry Andric 
3624ba319b5SDimitry Andric   return StringRef(NameTable[*Idx]);
3634ba319b5SDimitry Andric }
3644ba319b5SDimitry Andric 
3657d523365SDimitry Andric std::error_code
readProfile(FunctionSamples & FProfile)3667d523365SDimitry Andric SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
3677d523365SDimitry Andric   auto NumSamples = readNumber<uint64_t>();
3687d523365SDimitry Andric   if (std::error_code EC = NumSamples.getError())
36939d628a0SDimitry Andric     return EC;
3707d523365SDimitry Andric   FProfile.addTotalSamples(*NumSamples);
37139d628a0SDimitry Andric 
37239d628a0SDimitry Andric   // Read the samples in the body.
3737d523365SDimitry Andric   auto NumRecords = readNumber<uint32_t>();
37439d628a0SDimitry Andric   if (std::error_code EC = NumRecords.getError())
37539d628a0SDimitry Andric     return EC;
3767d523365SDimitry Andric 
3777d523365SDimitry Andric   for (uint32_t I = 0; I < *NumRecords; ++I) {
37839d628a0SDimitry Andric     auto LineOffset = readNumber<uint64_t>();
37939d628a0SDimitry Andric     if (std::error_code EC = LineOffset.getError())
38039d628a0SDimitry Andric       return EC;
38139d628a0SDimitry Andric 
3827d523365SDimitry Andric     if (!isOffsetLegal(*LineOffset)) {
3837d523365SDimitry Andric       return std::error_code();
3847d523365SDimitry Andric     }
3857d523365SDimitry Andric 
38639d628a0SDimitry Andric     auto Discriminator = readNumber<uint64_t>();
38739d628a0SDimitry Andric     if (std::error_code EC = Discriminator.getError())
38839d628a0SDimitry Andric       return EC;
38939d628a0SDimitry Andric 
39039d628a0SDimitry Andric     auto NumSamples = readNumber<uint64_t>();
39139d628a0SDimitry Andric     if (std::error_code EC = NumSamples.getError())
39239d628a0SDimitry Andric       return EC;
39339d628a0SDimitry Andric 
3947d523365SDimitry Andric     auto NumCalls = readNumber<uint32_t>();
39539d628a0SDimitry Andric     if (std::error_code EC = NumCalls.getError())
39639d628a0SDimitry Andric       return EC;
39739d628a0SDimitry Andric 
3987d523365SDimitry Andric     for (uint32_t J = 0; J < *NumCalls; ++J) {
3997d523365SDimitry Andric       auto CalledFunction(readStringFromTable());
40039d628a0SDimitry Andric       if (std::error_code EC = CalledFunction.getError())
40139d628a0SDimitry Andric         return EC;
40239d628a0SDimitry Andric 
40339d628a0SDimitry Andric       auto CalledFunctionSamples = readNumber<uint64_t>();
40439d628a0SDimitry Andric       if (std::error_code EC = CalledFunctionSamples.getError())
40539d628a0SDimitry Andric         return EC;
40639d628a0SDimitry Andric 
40739d628a0SDimitry Andric       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
4087d523365SDimitry Andric                                       *CalledFunction, *CalledFunctionSamples);
40939d628a0SDimitry Andric     }
41039d628a0SDimitry Andric 
41139d628a0SDimitry Andric     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
41239d628a0SDimitry Andric   }
4137d523365SDimitry Andric 
4147d523365SDimitry Andric   // Read all the samples for inlined function calls.
4157d523365SDimitry Andric   auto NumCallsites = readNumber<uint32_t>();
4167d523365SDimitry Andric   if (std::error_code EC = NumCallsites.getError())
4177d523365SDimitry Andric     return EC;
4187d523365SDimitry Andric 
4197d523365SDimitry Andric   for (uint32_t J = 0; J < *NumCallsites; ++J) {
4207d523365SDimitry Andric     auto LineOffset = readNumber<uint64_t>();
4217d523365SDimitry Andric     if (std::error_code EC = LineOffset.getError())
4227d523365SDimitry Andric       return EC;
4237d523365SDimitry Andric 
4247d523365SDimitry Andric     auto Discriminator = readNumber<uint64_t>();
4257d523365SDimitry Andric     if (std::error_code EC = Discriminator.getError())
4267d523365SDimitry Andric       return EC;
4277d523365SDimitry Andric 
4287d523365SDimitry Andric     auto FName(readStringFromTable());
4297d523365SDimitry Andric     if (std::error_code EC = FName.getError())
4307d523365SDimitry Andric       return EC;
4317d523365SDimitry Andric 
4327a7e6055SDimitry Andric     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
4337a7e6055SDimitry Andric         LineLocation(*LineOffset, *Discriminator))[*FName];
4343ca95b02SDimitry Andric     CalleeProfile.setName(*FName);
4357d523365SDimitry Andric     if (std::error_code EC = readProfile(CalleeProfile))
4367d523365SDimitry Andric       return EC;
4377d523365SDimitry Andric   }
4387d523365SDimitry Andric 
4397d523365SDimitry Andric   return sampleprof_error::success;
4407d523365SDimitry Andric }
4417d523365SDimitry Andric 
readFuncProfile()442*b5893f02SDimitry Andric std::error_code SampleProfileReaderBinary::readFuncProfile() {
4437d523365SDimitry Andric   auto NumHeadSamples = readNumber<uint64_t>();
4447d523365SDimitry Andric   if (std::error_code EC = NumHeadSamples.getError())
4457d523365SDimitry Andric     return EC;
4467d523365SDimitry Andric 
4477d523365SDimitry Andric   auto FName(readStringFromTable());
4487d523365SDimitry Andric   if (std::error_code EC = FName.getError())
4497d523365SDimitry Andric     return EC;
4507d523365SDimitry Andric 
4517d523365SDimitry Andric   Profiles[*FName] = FunctionSamples();
4527d523365SDimitry Andric   FunctionSamples &FProfile = Profiles[*FName];
4533ca95b02SDimitry Andric   FProfile.setName(*FName);
4547d523365SDimitry Andric 
4557d523365SDimitry Andric   FProfile.addHeadSamples(*NumHeadSamples);
4567d523365SDimitry Andric 
4577d523365SDimitry Andric   if (std::error_code EC = readProfile(FProfile))
4587d523365SDimitry Andric     return EC;
459*b5893f02SDimitry Andric   return sampleprof_error::success;
46039d628a0SDimitry Andric }
46139d628a0SDimitry Andric 
read()462*b5893f02SDimitry Andric std::error_code SampleProfileReaderBinary::read() {
463*b5893f02SDimitry Andric   while (!at_eof()) {
464*b5893f02SDimitry Andric     if (std::error_code EC = readFuncProfile())
465*b5893f02SDimitry Andric       return EC;
466*b5893f02SDimitry Andric   }
467*b5893f02SDimitry Andric 
468*b5893f02SDimitry Andric   return sampleprof_error::success;
469*b5893f02SDimitry Andric }
470*b5893f02SDimitry Andric 
read()471*b5893f02SDimitry Andric std::error_code SampleProfileReaderCompactBinary::read() {
472*b5893f02SDimitry Andric   for (auto Name : FuncsToUse) {
473*b5893f02SDimitry Andric     auto GUID = std::to_string(MD5Hash(Name));
474*b5893f02SDimitry Andric     auto iter = FuncOffsetTable.find(StringRef(GUID));
475*b5893f02SDimitry Andric     if (iter == FuncOffsetTable.end())
476*b5893f02SDimitry Andric       continue;
477*b5893f02SDimitry Andric     const uint8_t *SavedData = Data;
478*b5893f02SDimitry Andric     Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
479*b5893f02SDimitry Andric            iter->second;
480*b5893f02SDimitry Andric     if (std::error_code EC = readFuncProfile())
481*b5893f02SDimitry Andric       return EC;
482*b5893f02SDimitry Andric     Data = SavedData;
483*b5893f02SDimitry Andric   }
48439d628a0SDimitry Andric   return sampleprof_error::success;
48539d628a0SDimitry Andric }
48639d628a0SDimitry Andric 
verifySPMagic(uint64_t Magic)4874ba319b5SDimitry Andric std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
4884ba319b5SDimitry Andric   if (Magic == SPMagic())
4894ba319b5SDimitry Andric     return sampleprof_error::success;
49039d628a0SDimitry Andric   return sampleprof_error::bad_magic;
4914ba319b5SDimitry Andric }
49239d628a0SDimitry Andric 
4934ba319b5SDimitry Andric std::error_code
verifySPMagic(uint64_t Magic)4944ba319b5SDimitry Andric SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
4954ba319b5SDimitry Andric   if (Magic == SPMagic(SPF_Compact_Binary))
4964ba319b5SDimitry Andric     return sampleprof_error::success;
4974ba319b5SDimitry Andric   return sampleprof_error::bad_magic;
4984ba319b5SDimitry Andric }
49939d628a0SDimitry Andric 
readNameTable()5004ba319b5SDimitry Andric std::error_code SampleProfileReaderRawBinary::readNameTable() {
5017d523365SDimitry Andric   auto Size = readNumber<uint32_t>();
5027d523365SDimitry Andric   if (std::error_code EC = Size.getError())
5037d523365SDimitry Andric     return EC;
5047d523365SDimitry Andric   NameTable.reserve(*Size);
5057d523365SDimitry Andric   for (uint32_t I = 0; I < *Size; ++I) {
5067d523365SDimitry Andric     auto Name(readString());
5077d523365SDimitry Andric     if (std::error_code EC = Name.getError())
5087d523365SDimitry Andric       return EC;
5097d523365SDimitry Andric     NameTable.push_back(*Name);
5107d523365SDimitry Andric   }
5117d523365SDimitry Andric 
51239d628a0SDimitry Andric   return sampleprof_error::success;
51339d628a0SDimitry Andric }
51439d628a0SDimitry Andric 
readNameTable()5154ba319b5SDimitry Andric std::error_code SampleProfileReaderCompactBinary::readNameTable() {
5164ba319b5SDimitry Andric   auto Size = readNumber<uint64_t>();
5174ba319b5SDimitry Andric   if (std::error_code EC = Size.getError())
5184ba319b5SDimitry Andric     return EC;
5194ba319b5SDimitry Andric   NameTable.reserve(*Size);
5204ba319b5SDimitry Andric   for (uint32_t I = 0; I < *Size; ++I) {
5214ba319b5SDimitry Andric     auto FID = readNumber<uint64_t>();
5224ba319b5SDimitry Andric     if (std::error_code EC = FID.getError())
5234ba319b5SDimitry Andric       return EC;
5244ba319b5SDimitry Andric     NameTable.push_back(std::to_string(*FID));
5254ba319b5SDimitry Andric   }
5264ba319b5SDimitry Andric   return sampleprof_error::success;
5274ba319b5SDimitry Andric }
5284ba319b5SDimitry Andric 
readHeader()5294ba319b5SDimitry Andric std::error_code SampleProfileReaderBinary::readHeader() {
5304ba319b5SDimitry Andric   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
5314ba319b5SDimitry Andric   End = Data + Buffer->getBufferSize();
5324ba319b5SDimitry Andric 
5334ba319b5SDimitry Andric   // Read and check the magic identifier.
5344ba319b5SDimitry Andric   auto Magic = readNumber<uint64_t>();
5354ba319b5SDimitry Andric   if (std::error_code EC = Magic.getError())
5364ba319b5SDimitry Andric     return EC;
5374ba319b5SDimitry Andric   else if (std::error_code EC = verifySPMagic(*Magic))
5384ba319b5SDimitry Andric     return EC;
5394ba319b5SDimitry Andric 
5404ba319b5SDimitry Andric   // Read the version number.
5414ba319b5SDimitry Andric   auto Version = readNumber<uint64_t>();
5424ba319b5SDimitry Andric   if (std::error_code EC = Version.getError())
5434ba319b5SDimitry Andric     return EC;
5444ba319b5SDimitry Andric   else if (*Version != SPVersion())
5454ba319b5SDimitry Andric     return sampleprof_error::unsupported_version;
5464ba319b5SDimitry Andric 
5474ba319b5SDimitry Andric   if (std::error_code EC = readSummary())
5484ba319b5SDimitry Andric     return EC;
5494ba319b5SDimitry Andric 
5504ba319b5SDimitry Andric   if (std::error_code EC = readNameTable())
5514ba319b5SDimitry Andric     return EC;
5524ba319b5SDimitry Andric   return sampleprof_error::success;
5534ba319b5SDimitry Andric }
5544ba319b5SDimitry Andric 
readHeader()555*b5893f02SDimitry Andric std::error_code SampleProfileReaderCompactBinary::readHeader() {
556*b5893f02SDimitry Andric   SampleProfileReaderBinary::readHeader();
557*b5893f02SDimitry Andric   if (std::error_code EC = readFuncOffsetTable())
558*b5893f02SDimitry Andric     return EC;
559*b5893f02SDimitry Andric   return sampleprof_error::success;
560*b5893f02SDimitry Andric }
561*b5893f02SDimitry Andric 
readFuncOffsetTable()562*b5893f02SDimitry Andric std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
563*b5893f02SDimitry Andric   auto TableOffset = readUnencodedNumber<uint64_t>();
564*b5893f02SDimitry Andric   if (std::error_code EC = TableOffset.getError())
565*b5893f02SDimitry Andric     return EC;
566*b5893f02SDimitry Andric 
567*b5893f02SDimitry Andric   const uint8_t *SavedData = Data;
568*b5893f02SDimitry Andric   const uint8_t *TableStart =
569*b5893f02SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
570*b5893f02SDimitry Andric       *TableOffset;
571*b5893f02SDimitry Andric   Data = TableStart;
572*b5893f02SDimitry Andric 
573*b5893f02SDimitry Andric   auto Size = readNumber<uint64_t>();
574*b5893f02SDimitry Andric   if (std::error_code EC = Size.getError())
575*b5893f02SDimitry Andric     return EC;
576*b5893f02SDimitry Andric 
577*b5893f02SDimitry Andric   FuncOffsetTable.reserve(*Size);
578*b5893f02SDimitry Andric   for (uint32_t I = 0; I < *Size; ++I) {
579*b5893f02SDimitry Andric     auto FName(readStringFromTable());
580*b5893f02SDimitry Andric     if (std::error_code EC = FName.getError())
581*b5893f02SDimitry Andric       return EC;
582*b5893f02SDimitry Andric 
583*b5893f02SDimitry Andric     auto Offset = readNumber<uint64_t>();
584*b5893f02SDimitry Andric     if (std::error_code EC = Offset.getError())
585*b5893f02SDimitry Andric       return EC;
586*b5893f02SDimitry Andric 
587*b5893f02SDimitry Andric     FuncOffsetTable[*FName] = *Offset;
588*b5893f02SDimitry Andric   }
589*b5893f02SDimitry Andric   End = TableStart;
590*b5893f02SDimitry Andric   Data = SavedData;
591*b5893f02SDimitry Andric   return sampleprof_error::success;
592*b5893f02SDimitry Andric }
593*b5893f02SDimitry Andric 
collectFuncsToUse(const Module & M)594*b5893f02SDimitry Andric void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
595*b5893f02SDimitry Andric   FuncsToUse.clear();
596*b5893f02SDimitry Andric   for (auto &F : M) {
597*b5893f02SDimitry Andric     StringRef Fname = F.getName().split('.').first;
598*b5893f02SDimitry Andric     FuncsToUse.insert(Fname);
599*b5893f02SDimitry Andric   }
600*b5893f02SDimitry Andric }
601*b5893f02SDimitry Andric 
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)6023ca95b02SDimitry Andric std::error_code SampleProfileReaderBinary::readSummaryEntry(
6033ca95b02SDimitry Andric     std::vector<ProfileSummaryEntry> &Entries) {
6043ca95b02SDimitry Andric   auto Cutoff = readNumber<uint64_t>();
6053ca95b02SDimitry Andric   if (std::error_code EC = Cutoff.getError())
6063ca95b02SDimitry Andric     return EC;
6073ca95b02SDimitry Andric 
6083ca95b02SDimitry Andric   auto MinBlockCount = readNumber<uint64_t>();
6093ca95b02SDimitry Andric   if (std::error_code EC = MinBlockCount.getError())
6103ca95b02SDimitry Andric     return EC;
6113ca95b02SDimitry Andric 
6123ca95b02SDimitry Andric   auto NumBlocks = readNumber<uint64_t>();
6133ca95b02SDimitry Andric   if (std::error_code EC = NumBlocks.getError())
6143ca95b02SDimitry Andric     return EC;
6153ca95b02SDimitry Andric 
6163ca95b02SDimitry Andric   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
6173ca95b02SDimitry Andric   return sampleprof_error::success;
6183ca95b02SDimitry Andric }
6193ca95b02SDimitry Andric 
readSummary()6203ca95b02SDimitry Andric std::error_code SampleProfileReaderBinary::readSummary() {
6213ca95b02SDimitry Andric   auto TotalCount = readNumber<uint64_t>();
6223ca95b02SDimitry Andric   if (std::error_code EC = TotalCount.getError())
6233ca95b02SDimitry Andric     return EC;
6243ca95b02SDimitry Andric 
6253ca95b02SDimitry Andric   auto MaxBlockCount = readNumber<uint64_t>();
6263ca95b02SDimitry Andric   if (std::error_code EC = MaxBlockCount.getError())
6273ca95b02SDimitry Andric     return EC;
6283ca95b02SDimitry Andric 
6293ca95b02SDimitry Andric   auto MaxFunctionCount = readNumber<uint64_t>();
6303ca95b02SDimitry Andric   if (std::error_code EC = MaxFunctionCount.getError())
6313ca95b02SDimitry Andric     return EC;
6323ca95b02SDimitry Andric 
6333ca95b02SDimitry Andric   auto NumBlocks = readNumber<uint64_t>();
6343ca95b02SDimitry Andric   if (std::error_code EC = NumBlocks.getError())
6353ca95b02SDimitry Andric     return EC;
6363ca95b02SDimitry Andric 
6373ca95b02SDimitry Andric   auto NumFunctions = readNumber<uint64_t>();
6383ca95b02SDimitry Andric   if (std::error_code EC = NumFunctions.getError())
6393ca95b02SDimitry Andric     return EC;
6403ca95b02SDimitry Andric 
6413ca95b02SDimitry Andric   auto NumSummaryEntries = readNumber<uint64_t>();
6423ca95b02SDimitry Andric   if (std::error_code EC = NumSummaryEntries.getError())
6433ca95b02SDimitry Andric     return EC;
6443ca95b02SDimitry Andric 
6453ca95b02SDimitry Andric   std::vector<ProfileSummaryEntry> Entries;
6463ca95b02SDimitry Andric   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
6473ca95b02SDimitry Andric     std::error_code EC = readSummaryEntry(Entries);
6483ca95b02SDimitry Andric     if (EC != sampleprof_error::success)
6493ca95b02SDimitry Andric       return EC;
6503ca95b02SDimitry Andric   }
6513ca95b02SDimitry Andric   Summary = llvm::make_unique<ProfileSummary>(
6523ca95b02SDimitry Andric       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
6533ca95b02SDimitry Andric       *MaxFunctionCount, *NumBlocks, *NumFunctions);
6543ca95b02SDimitry Andric 
6553ca95b02SDimitry Andric   return sampleprof_error::success;
6563ca95b02SDimitry Andric }
6573ca95b02SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)6584ba319b5SDimitry Andric bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
65939d628a0SDimitry Andric   const uint8_t *Data =
66039d628a0SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
66139d628a0SDimitry Andric   uint64_t Magic = decodeULEB128(Data);
66239d628a0SDimitry Andric   return Magic == SPMagic();
66339d628a0SDimitry Andric }
66439d628a0SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)6654ba319b5SDimitry Andric bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
6664ba319b5SDimitry Andric   const uint8_t *Data =
6674ba319b5SDimitry Andric       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
6684ba319b5SDimitry Andric   uint64_t Magic = decodeULEB128(Data);
6694ba319b5SDimitry Andric   return Magic == SPMagic(SPF_Compact_Binary);
6704ba319b5SDimitry Andric }
6714ba319b5SDimitry Andric 
skipNextWord()6727d523365SDimitry Andric std::error_code SampleProfileReaderGCC::skipNextWord() {
6737d523365SDimitry Andric   uint32_t dummy;
6747d523365SDimitry Andric   if (!GcovBuffer.readInt(dummy))
6757d523365SDimitry Andric     return sampleprof_error::truncated;
6767d523365SDimitry Andric   return sampleprof_error::success;
6777d523365SDimitry Andric }
6787d523365SDimitry Andric 
readNumber()6797d523365SDimitry Andric template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
6807d523365SDimitry Andric   if (sizeof(T) <= sizeof(uint32_t)) {
6817d523365SDimitry Andric     uint32_t Val;
6827d523365SDimitry Andric     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
6837d523365SDimitry Andric       return static_cast<T>(Val);
6847d523365SDimitry Andric   } else if (sizeof(T) <= sizeof(uint64_t)) {
6857d523365SDimitry Andric     uint64_t Val;
6867d523365SDimitry Andric     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
6877d523365SDimitry Andric       return static_cast<T>(Val);
6887d523365SDimitry Andric   }
6897d523365SDimitry Andric 
6907d523365SDimitry Andric   std::error_code EC = sampleprof_error::malformed;
6917d523365SDimitry Andric   reportError(0, EC.message());
6927d523365SDimitry Andric   return EC;
6937d523365SDimitry Andric }
6947d523365SDimitry Andric 
readString()6957d523365SDimitry Andric ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
6967d523365SDimitry Andric   StringRef Str;
6977d523365SDimitry Andric   if (!GcovBuffer.readString(Str))
6987d523365SDimitry Andric     return sampleprof_error::truncated;
6997d523365SDimitry Andric   return Str;
7007d523365SDimitry Andric }
7017d523365SDimitry Andric 
readHeader()7027d523365SDimitry Andric std::error_code SampleProfileReaderGCC::readHeader() {
7037d523365SDimitry Andric   // Read the magic identifier.
7047d523365SDimitry Andric   if (!GcovBuffer.readGCDAFormat())
7057d523365SDimitry Andric     return sampleprof_error::unrecognized_format;
7067d523365SDimitry Andric 
7077d523365SDimitry Andric   // Read the version number. Note - the GCC reader does not validate this
7087d523365SDimitry Andric   // version, but the profile creator generates v704.
7097d523365SDimitry Andric   GCOV::GCOVVersion version;
7107d523365SDimitry Andric   if (!GcovBuffer.readGCOVVersion(version))
7117d523365SDimitry Andric     return sampleprof_error::unrecognized_format;
7127d523365SDimitry Andric 
7137d523365SDimitry Andric   if (version != GCOV::V704)
7147d523365SDimitry Andric     return sampleprof_error::unsupported_version;
7157d523365SDimitry Andric 
7167d523365SDimitry Andric   // Skip the empty integer.
7177d523365SDimitry Andric   if (std::error_code EC = skipNextWord())
7187d523365SDimitry Andric     return EC;
7197d523365SDimitry Andric 
7207d523365SDimitry Andric   return sampleprof_error::success;
7217d523365SDimitry Andric }
7227d523365SDimitry Andric 
readSectionTag(uint32_t Expected)7237d523365SDimitry Andric std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
7247d523365SDimitry Andric   uint32_t Tag;
7257d523365SDimitry Andric   if (!GcovBuffer.readInt(Tag))
7267d523365SDimitry Andric     return sampleprof_error::truncated;
7277d523365SDimitry Andric 
7287d523365SDimitry Andric   if (Tag != Expected)
7297d523365SDimitry Andric     return sampleprof_error::malformed;
7307d523365SDimitry Andric 
7317d523365SDimitry Andric   if (std::error_code EC = skipNextWord())
7327d523365SDimitry Andric     return EC;
7337d523365SDimitry Andric 
7347d523365SDimitry Andric   return sampleprof_error::success;
7357d523365SDimitry Andric }
7367d523365SDimitry Andric 
readNameTable()7377d523365SDimitry Andric std::error_code SampleProfileReaderGCC::readNameTable() {
7387d523365SDimitry Andric   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
7397d523365SDimitry Andric     return EC;
7407d523365SDimitry Andric 
7417d523365SDimitry Andric   uint32_t Size;
7427d523365SDimitry Andric   if (!GcovBuffer.readInt(Size))
7437d523365SDimitry Andric     return sampleprof_error::truncated;
7447d523365SDimitry Andric 
7457d523365SDimitry Andric   for (uint32_t I = 0; I < Size; ++I) {
7467d523365SDimitry Andric     StringRef Str;
7477d523365SDimitry Andric     if (!GcovBuffer.readString(Str))
7487d523365SDimitry Andric       return sampleprof_error::truncated;
7497d523365SDimitry Andric     Names.push_back(Str);
7507d523365SDimitry Andric   }
7517d523365SDimitry Andric 
7527d523365SDimitry Andric   return sampleprof_error::success;
7537d523365SDimitry Andric }
7547d523365SDimitry Andric 
readFunctionProfiles()7557d523365SDimitry Andric std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
7567d523365SDimitry Andric   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
7577d523365SDimitry Andric     return EC;
7587d523365SDimitry Andric 
7597d523365SDimitry Andric   uint32_t NumFunctions;
7607d523365SDimitry Andric   if (!GcovBuffer.readInt(NumFunctions))
7617d523365SDimitry Andric     return sampleprof_error::truncated;
7627d523365SDimitry Andric 
7637d523365SDimitry Andric   InlineCallStack Stack;
7647d523365SDimitry Andric   for (uint32_t I = 0; I < NumFunctions; ++I)
7657d523365SDimitry Andric     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
7667d523365SDimitry Andric       return EC;
7677d523365SDimitry Andric 
7683ca95b02SDimitry Andric   computeSummary();
7697d523365SDimitry Andric   return sampleprof_error::success;
7707d523365SDimitry Andric }
7717d523365SDimitry Andric 
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)7727d523365SDimitry Andric std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
7737d523365SDimitry Andric     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
7747d523365SDimitry Andric   uint64_t HeadCount = 0;
7757d523365SDimitry Andric   if (InlineStack.size() == 0)
7767d523365SDimitry Andric     if (!GcovBuffer.readInt64(HeadCount))
7777d523365SDimitry Andric       return sampleprof_error::truncated;
7787d523365SDimitry Andric 
7797d523365SDimitry Andric   uint32_t NameIdx;
7807d523365SDimitry Andric   if (!GcovBuffer.readInt(NameIdx))
7817d523365SDimitry Andric     return sampleprof_error::truncated;
7827d523365SDimitry Andric 
7837d523365SDimitry Andric   StringRef Name(Names[NameIdx]);
7847d523365SDimitry Andric 
7857d523365SDimitry Andric   uint32_t NumPosCounts;
7867d523365SDimitry Andric   if (!GcovBuffer.readInt(NumPosCounts))
7877d523365SDimitry Andric     return sampleprof_error::truncated;
7887d523365SDimitry Andric 
7897d523365SDimitry Andric   uint32_t NumCallsites;
7907d523365SDimitry Andric   if (!GcovBuffer.readInt(NumCallsites))
7917d523365SDimitry Andric     return sampleprof_error::truncated;
7927d523365SDimitry Andric 
7937d523365SDimitry Andric   FunctionSamples *FProfile = nullptr;
7947d523365SDimitry Andric   if (InlineStack.size() == 0) {
7957d523365SDimitry Andric     // If this is a top function that we have already processed, do not
7967d523365SDimitry Andric     // update its profile again.  This happens in the presence of
7977d523365SDimitry Andric     // function aliases.  Since these aliases share the same function
7987d523365SDimitry Andric     // body, there will be identical replicated profiles for the
7997d523365SDimitry Andric     // original function.  In this case, we simply not bother updating
8007d523365SDimitry Andric     // the profile of the original function.
8017d523365SDimitry Andric     FProfile = &Profiles[Name];
8027d523365SDimitry Andric     FProfile->addHeadSamples(HeadCount);
8037d523365SDimitry Andric     if (FProfile->getTotalSamples() > 0)
8047d523365SDimitry Andric       Update = false;
8057d523365SDimitry Andric   } else {
8067d523365SDimitry Andric     // Otherwise, we are reading an inlined instance. The top of the
8077d523365SDimitry Andric     // inline stack contains the profile of the caller. Insert this
8087d523365SDimitry Andric     // callee in the caller's CallsiteMap.
8097d523365SDimitry Andric     FunctionSamples *CallerProfile = InlineStack.front();
8107d523365SDimitry Andric     uint32_t LineOffset = Offset >> 16;
8117d523365SDimitry Andric     uint32_t Discriminator = Offset & 0xffff;
8127d523365SDimitry Andric     FProfile = &CallerProfile->functionSamplesAt(
8137a7e6055SDimitry Andric         LineLocation(LineOffset, Discriminator))[Name];
8147d523365SDimitry Andric   }
8153ca95b02SDimitry Andric   FProfile->setName(Name);
8167d523365SDimitry Andric 
8177d523365SDimitry Andric   for (uint32_t I = 0; I < NumPosCounts; ++I) {
8187d523365SDimitry Andric     uint32_t Offset;
8197d523365SDimitry Andric     if (!GcovBuffer.readInt(Offset))
8207d523365SDimitry Andric       return sampleprof_error::truncated;
8217d523365SDimitry Andric 
8227d523365SDimitry Andric     uint32_t NumTargets;
8237d523365SDimitry Andric     if (!GcovBuffer.readInt(NumTargets))
8247d523365SDimitry Andric       return sampleprof_error::truncated;
8257d523365SDimitry Andric 
8267d523365SDimitry Andric     uint64_t Count;
8277d523365SDimitry Andric     if (!GcovBuffer.readInt64(Count))
8287d523365SDimitry Andric       return sampleprof_error::truncated;
8297d523365SDimitry Andric 
8307d523365SDimitry Andric     // The line location is encoded in the offset as:
8317d523365SDimitry Andric     //   high 16 bits: line offset to the start of the function.
8327d523365SDimitry Andric     //   low 16 bits: discriminator.
8337d523365SDimitry Andric     uint32_t LineOffset = Offset >> 16;
8347d523365SDimitry Andric     uint32_t Discriminator = Offset & 0xffff;
8357d523365SDimitry Andric 
8367d523365SDimitry Andric     InlineCallStack NewStack;
8377d523365SDimitry Andric     NewStack.push_back(FProfile);
8387d523365SDimitry Andric     NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
8397d523365SDimitry Andric     if (Update) {
8407d523365SDimitry Andric       // Walk up the inline stack, adding the samples on this line to
8417d523365SDimitry Andric       // the total sample count of the callers in the chain.
8427d523365SDimitry Andric       for (auto CallerProfile : NewStack)
8437d523365SDimitry Andric         CallerProfile->addTotalSamples(Count);
8447d523365SDimitry Andric 
8457d523365SDimitry Andric       // Update the body samples for the current profile.
8467d523365SDimitry Andric       FProfile->addBodySamples(LineOffset, Discriminator, Count);
8477d523365SDimitry Andric     }
8487d523365SDimitry Andric 
8497d523365SDimitry Andric     // Process the list of functions called at an indirect call site.
8507d523365SDimitry Andric     // These are all the targets that a function pointer (or virtual
8517d523365SDimitry Andric     // function) resolved at runtime.
8527d523365SDimitry Andric     for (uint32_t J = 0; J < NumTargets; J++) {
8537d523365SDimitry Andric       uint32_t HistVal;
8547d523365SDimitry Andric       if (!GcovBuffer.readInt(HistVal))
8557d523365SDimitry Andric         return sampleprof_error::truncated;
8567d523365SDimitry Andric 
8577d523365SDimitry Andric       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
8587d523365SDimitry Andric         return sampleprof_error::malformed;
8597d523365SDimitry Andric 
8607d523365SDimitry Andric       uint64_t TargetIdx;
8617d523365SDimitry Andric       if (!GcovBuffer.readInt64(TargetIdx))
8627d523365SDimitry Andric         return sampleprof_error::truncated;
8637d523365SDimitry Andric       StringRef TargetName(Names[TargetIdx]);
8647d523365SDimitry Andric 
8657d523365SDimitry Andric       uint64_t TargetCount;
8667d523365SDimitry Andric       if (!GcovBuffer.readInt64(TargetCount))
8677d523365SDimitry Andric         return sampleprof_error::truncated;
8687d523365SDimitry Andric 
8697a7e6055SDimitry Andric       if (Update)
8707a7e6055SDimitry Andric         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
8717d523365SDimitry Andric                                          TargetName, TargetCount);
8727d523365SDimitry Andric     }
8737d523365SDimitry Andric   }
8747d523365SDimitry Andric 
8757d523365SDimitry Andric   // Process all the inlined callers into the current function. These
8767d523365SDimitry Andric   // are all the callsites that were inlined into this function.
8777d523365SDimitry Andric   for (uint32_t I = 0; I < NumCallsites; I++) {
8787d523365SDimitry Andric     // The offset is encoded as:
8797d523365SDimitry Andric     //   high 16 bits: line offset to the start of the function.
8807d523365SDimitry Andric     //   low 16 bits: discriminator.
8817d523365SDimitry Andric     uint32_t Offset;
8827d523365SDimitry Andric     if (!GcovBuffer.readInt(Offset))
8837d523365SDimitry Andric       return sampleprof_error::truncated;
8847d523365SDimitry Andric     InlineCallStack NewStack;
8857d523365SDimitry Andric     NewStack.push_back(FProfile);
8867d523365SDimitry Andric     NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
8877d523365SDimitry Andric     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
8887d523365SDimitry Andric       return EC;
8897d523365SDimitry Andric   }
8907d523365SDimitry Andric 
8917d523365SDimitry Andric   return sampleprof_error::success;
8927d523365SDimitry Andric }
8937d523365SDimitry Andric 
8944ba319b5SDimitry Andric /// Read a GCC AutoFDO profile.
8957d523365SDimitry Andric ///
8967d523365SDimitry Andric /// This format is generated by the Linux Perf conversion tool at
8977d523365SDimitry Andric /// https://github.com/google/autofdo.
read()8987d523365SDimitry Andric std::error_code SampleProfileReaderGCC::read() {
8997d523365SDimitry Andric   // Read the string table.
9007d523365SDimitry Andric   if (std::error_code EC = readNameTable())
9017d523365SDimitry Andric     return EC;
9027d523365SDimitry Andric 
9037d523365SDimitry Andric   // Read the source profile.
9047d523365SDimitry Andric   if (std::error_code EC = readFunctionProfiles())
9057d523365SDimitry Andric     return EC;
9067d523365SDimitry Andric 
9077d523365SDimitry Andric   return sampleprof_error::success;
9087d523365SDimitry Andric }
9097d523365SDimitry Andric 
hasFormat(const MemoryBuffer & Buffer)9107d523365SDimitry Andric bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
9117d523365SDimitry Andric   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
9127d523365SDimitry Andric   return Magic == "adcg*704";
9137d523365SDimitry Andric }
9147d523365SDimitry Andric 
read()915*b5893f02SDimitry Andric std::error_code SampleProfileReaderItaniumRemapper::read() {
916*b5893f02SDimitry Andric   // If the underlying data is in compact format, we can't remap it because
917*b5893f02SDimitry Andric   // we don't know what the original function names were.
918*b5893f02SDimitry Andric   if (getFormat() == SPF_Compact_Binary) {
919*b5893f02SDimitry Andric     Ctx.diagnose(DiagnosticInfoSampleProfile(
920*b5893f02SDimitry Andric         Buffer->getBufferIdentifier(),
921*b5893f02SDimitry Andric         "Profile data remapping cannot be applied to profile data "
922*b5893f02SDimitry Andric         "in compact format (original mangled names are not available).",
923*b5893f02SDimitry Andric         DS_Warning));
924*b5893f02SDimitry Andric     return sampleprof_error::success;
925*b5893f02SDimitry Andric   }
926*b5893f02SDimitry Andric 
927*b5893f02SDimitry Andric   if (Error E = Remappings.read(*Buffer)) {
928*b5893f02SDimitry Andric     handleAllErrors(
929*b5893f02SDimitry Andric         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
930*b5893f02SDimitry Andric           reportError(ParseError.getLineNum(), ParseError.getMessage());
931*b5893f02SDimitry Andric         });
932*b5893f02SDimitry Andric     return sampleprof_error::malformed;
933*b5893f02SDimitry Andric   }
934*b5893f02SDimitry Andric 
935*b5893f02SDimitry Andric   for (auto &Sample : getProfiles())
936*b5893f02SDimitry Andric     if (auto Key = Remappings.insert(Sample.first()))
937*b5893f02SDimitry Andric       SampleMap.insert({Key, &Sample.second});
938*b5893f02SDimitry Andric 
939*b5893f02SDimitry Andric   return sampleprof_error::success;
940*b5893f02SDimitry Andric }
941*b5893f02SDimitry Andric 
942*b5893f02SDimitry Andric FunctionSamples *
getSamplesFor(StringRef Fname)943*b5893f02SDimitry Andric SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) {
944*b5893f02SDimitry Andric   if (auto Key = Remappings.lookup(Fname))
945*b5893f02SDimitry Andric     return SampleMap.lookup(Key);
946*b5893f02SDimitry Andric   return SampleProfileReader::getSamplesFor(Fname);
947*b5893f02SDimitry Andric }
948*b5893f02SDimitry Andric 
9494ba319b5SDimitry Andric /// Prepare a memory buffer for the contents of \p Filename.
95039d628a0SDimitry Andric ///
95139d628a0SDimitry Andric /// \returns an error code indicating the status of the buffer.
95239d628a0SDimitry Andric static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)9533ca95b02SDimitry Andric setupMemoryBuffer(const Twine &Filename) {
95439d628a0SDimitry Andric   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
95539d628a0SDimitry Andric   if (std::error_code EC = BufferOrErr.getError())
95639d628a0SDimitry Andric     return EC;
95739d628a0SDimitry Andric   auto Buffer = std::move(BufferOrErr.get());
95839d628a0SDimitry Andric 
95939d628a0SDimitry Andric   // Sanity check the file.
9602cab237bSDimitry Andric   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
96139d628a0SDimitry Andric     return sampleprof_error::too_large;
96239d628a0SDimitry Andric 
96339d628a0SDimitry Andric   return std::move(Buffer);
96439d628a0SDimitry Andric }
96539d628a0SDimitry Andric 
9664ba319b5SDimitry Andric /// Create a sample profile reader based on the format of the input file.
96739d628a0SDimitry Andric ///
96839d628a0SDimitry Andric /// \param Filename The file to open.
96939d628a0SDimitry Andric ///
97039d628a0SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
97139d628a0SDimitry Andric ///
97239d628a0SDimitry Andric /// \returns an error code indicating the status of the created reader.
97339d628a0SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const Twine & Filename,LLVMContext & C)9743ca95b02SDimitry Andric SampleProfileReader::create(const Twine &Filename, LLVMContext &C) {
97539d628a0SDimitry Andric   auto BufferOrError = setupMemoryBuffer(Filename);
97639d628a0SDimitry Andric   if (std::error_code EC = BufferOrError.getError())
97739d628a0SDimitry Andric     return EC;
9787d523365SDimitry Andric   return create(BufferOrError.get(), C);
9797d523365SDimitry Andric }
98039d628a0SDimitry Andric 
981*b5893f02SDimitry Andric /// Create a sample profile remapper from the given input, to remap the
982*b5893f02SDimitry Andric /// function names in the given profile data.
983*b5893f02SDimitry Andric ///
984*b5893f02SDimitry Andric /// \param Filename The file to open.
985*b5893f02SDimitry Andric ///
986*b5893f02SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
987*b5893f02SDimitry Andric ///
988*b5893f02SDimitry Andric /// \param Underlying The underlying profile data reader to remap.
989*b5893f02SDimitry Andric ///
990*b5893f02SDimitry Andric /// \returns an error code indicating the status of the created reader.
991*b5893f02SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const Twine & Filename,LLVMContext & C,std::unique_ptr<SampleProfileReader> Underlying)992*b5893f02SDimitry Andric SampleProfileReaderItaniumRemapper::create(
993*b5893f02SDimitry Andric     const Twine &Filename, LLVMContext &C,
994*b5893f02SDimitry Andric     std::unique_ptr<SampleProfileReader> Underlying) {
995*b5893f02SDimitry Andric   auto BufferOrError = setupMemoryBuffer(Filename);
996*b5893f02SDimitry Andric   if (std::error_code EC = BufferOrError.getError())
997*b5893f02SDimitry Andric     return EC;
998*b5893f02SDimitry Andric   return llvm::make_unique<SampleProfileReaderItaniumRemapper>(
999*b5893f02SDimitry Andric       std::move(BufferOrError.get()), C, std::move(Underlying));
1000*b5893f02SDimitry Andric }
1001*b5893f02SDimitry Andric 
10024ba319b5SDimitry Andric /// Create a sample profile reader based on the format of the input data.
10037d523365SDimitry Andric ///
10047d523365SDimitry Andric /// \param B The memory buffer to create the reader from (assumes ownership).
10057d523365SDimitry Andric ///
10067d523365SDimitry Andric /// \param C The LLVM context to use to emit diagnostics.
10077d523365SDimitry Andric ///
10087d523365SDimitry Andric /// \returns an error code indicating the status of the created reader.
10097d523365SDimitry Andric ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C)10107d523365SDimitry Andric SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
101139d628a0SDimitry Andric   std::unique_ptr<SampleProfileReader> Reader;
10124ba319b5SDimitry Andric   if (SampleProfileReaderRawBinary::hasFormat(*B))
10134ba319b5SDimitry Andric     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
10144ba319b5SDimitry Andric   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
10154ba319b5SDimitry Andric     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
10167d523365SDimitry Andric   else if (SampleProfileReaderGCC::hasFormat(*B))
10177d523365SDimitry Andric     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
10187d523365SDimitry Andric   else if (SampleProfileReaderText::hasFormat(*B))
10197d523365SDimitry Andric     Reader.reset(new SampleProfileReaderText(std::move(B), C));
102039d628a0SDimitry Andric   else
10217d523365SDimitry Andric     return sampleprof_error::unrecognized_format;
102239d628a0SDimitry Andric 
1023*b5893f02SDimitry Andric   FunctionSamples::Format = Reader->getFormat();
102439d628a0SDimitry Andric   if (std::error_code EC = Reader->readHeader())
102539d628a0SDimitry Andric     return EC;
102639d628a0SDimitry Andric 
102739d628a0SDimitry Andric   return std::move(Reader);
102839d628a0SDimitry Andric }
10293ca95b02SDimitry Andric 
10303ca95b02SDimitry Andric // For text and GCC file formats, we compute the summary after reading the
10313ca95b02SDimitry Andric // profile. Binary format has the profile summary in its header.
computeSummary()10323ca95b02SDimitry Andric void SampleProfileReader::computeSummary() {
10333ca95b02SDimitry Andric   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
10343ca95b02SDimitry Andric   for (const auto &I : Profiles) {
10353ca95b02SDimitry Andric     const FunctionSamples &Profile = I.second;
10363ca95b02SDimitry Andric     Builder.addRecord(Profile);
10373ca95b02SDimitry Andric   }
10383ca95b02SDimitry Andric   Summary = Builder.getSummary();
10393ca95b02SDimitry Andric }
1040