1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace sampleprof;
46 
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
51 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
52                                               raw_ostream &OS) {
53   OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55 
56 /// Dump all the function profiles found on stream \p OS.
57 void SampleProfileReader::dump(raw_ostream &OS) {
58   for (const auto &I : Profiles)
59     dumpFunctionProfile(I.getKey(), OS);
60 }
61 
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71   if (Input[0] == ' ')
72     return false;
73   size_t n2 = Input.rfind(':');
74   size_t n1 = Input.rfind(':', n2 - 1);
75   FName = Input.substr(0, n1);
76   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77     return false;
78   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79     return false;
80   return true;
81 }
82 
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85 
86 /// Parse \p Input that contains metadata.
87 /// Possible metadata:
88 /// - CFG Checksum information:
89 ///     !CFGChecksum: 12345
90 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
91 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
92   if (!Input.startswith("!CFGChecksum:"))
93     return false;
94 
95   StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
96   return !CFGInfo.getAsInteger(10, FunctionHash);
97 }
98 
99 enum class LineType {
100   CallSiteProfile,
101   BodyProfile,
102   Metadata,
103 };
104 
105 /// Parse \p Input as line sample.
106 ///
107 /// \param Input input line.
108 /// \param LineTy Type of this line.
109 /// \param Depth the depth of the inline stack.
110 /// \param NumSamples total samples of the line/inlined callsite.
111 /// \param LineOffset line offset to the start of the function.
112 /// \param Discriminator discriminator of the line.
113 /// \param TargetCountMap map from indirect call target to count.
114 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
115 ///
116 /// returns true if parsing is successful.
117 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
118                       uint64_t &NumSamples, uint32_t &LineOffset,
119                       uint32_t &Discriminator, StringRef &CalleeName,
120                       DenseMap<StringRef, uint64_t> &TargetCountMap,
121                       uint64_t &FunctionHash) {
122   for (Depth = 0; Input[Depth] == ' '; Depth++)
123     ;
124   if (Depth == 0)
125     return false;
126 
127   if (Depth == 1 && Input[Depth] == '!') {
128     LineTy = LineType::Metadata;
129     return parseMetadata(Input.substr(Depth), FunctionHash);
130   }
131 
132   size_t n1 = Input.find(':');
133   StringRef Loc = Input.substr(Depth, n1 - Depth);
134   size_t n2 = Loc.find('.');
135   if (n2 == StringRef::npos) {
136     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
137       return false;
138     Discriminator = 0;
139   } else {
140     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
141       return false;
142     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
143       return false;
144   }
145 
146   StringRef Rest = Input.substr(n1 + 2);
147   if (Rest[0] >= '0' && Rest[0] <= '9') {
148     LineTy = LineType::BodyProfile;
149     size_t n3 = Rest.find(' ');
150     if (n3 == StringRef::npos) {
151       if (Rest.getAsInteger(10, NumSamples))
152         return false;
153     } else {
154       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
155         return false;
156     }
157     // Find call targets and their sample counts.
158     // Note: In some cases, there are symbols in the profile which are not
159     // mangled. To accommodate such cases, use colon + integer pairs as the
160     // anchor points.
161     // An example:
162     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
163     // ":1000" and ":437" are used as anchor points so the string above will
164     // be interpreted as
165     // target: _M_construct<char *>
166     // count: 1000
167     // target: string_view<std::allocator<char> >
168     // count: 437
169     while (n3 != StringRef::npos) {
170       n3 += Rest.substr(n3).find_first_not_of(' ');
171       Rest = Rest.substr(n3);
172       n3 = Rest.find_first_of(':');
173       if (n3 == StringRef::npos || n3 == 0)
174         return false;
175 
176       StringRef Target;
177       uint64_t count, n4;
178       while (true) {
179         // Get the segment after the current colon.
180         StringRef AfterColon = Rest.substr(n3 + 1);
181         // Get the target symbol before the current colon.
182         Target = Rest.substr(0, n3);
183         // Check if the word after the current colon is an integer.
184         n4 = AfterColon.find_first_of(' ');
185         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
186         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
187         if (!WordAfterColon.getAsInteger(10, count))
188           break;
189 
190         // Try to find the next colon.
191         uint64_t n5 = AfterColon.find_first_of(':');
192         if (n5 == StringRef::npos)
193           return false;
194         n3 += n5 + 1;
195       }
196 
197       // An anchor point is found. Save the {target, count} pair
198       TargetCountMap[Target] = count;
199       if (n4 == Rest.size())
200         break;
201       // Change n3 to the next blank space after colon + integer pair.
202       n3 = n4;
203     }
204   } else {
205     LineTy = LineType::CallSiteProfile;
206     size_t n3 = Rest.find_last_of(':');
207     CalleeName = Rest.substr(0, n3);
208     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
209       return false;
210   }
211   return true;
212 }
213 
214 /// Load samples from a text file.
215 ///
216 /// See the documentation at the top of the file for an explanation of
217 /// the expected format.
218 ///
219 /// \returns true if the file was loaded successfully, false otherwise.
220 std::error_code SampleProfileReaderText::readImpl() {
221   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
222   sampleprof_error Result = sampleprof_error::success;
223 
224   InlineCallStack InlineStack;
225   int CSProfileCount = 0;
226   int RegularProfileCount = 0;
227   uint32_t ProbeProfileCount = 0;
228 
229   // SeenMetadata tracks whether we have processed metadata for the current
230   // top-level function profile.
231   bool SeenMetadata = false;
232 
233   for (; !LineIt.is_at_eof(); ++LineIt) {
234     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
235       continue;
236     // Read the header of each function.
237     //
238     // Note that for function identifiers we are actually expecting
239     // mangled names, but we may not always get them. This happens when
240     // the compiler decides not to emit the function (e.g., it was inlined
241     // and removed). In this case, the binary will not have the linkage
242     // name for the function, so the profiler will emit the function's
243     // unmangled name, which may contain characters like ':' and '>' in its
244     // name (member functions, templates, etc).
245     //
246     // The only requirement we place on the identifier, then, is that it
247     // should not begin with a number.
248     if ((*LineIt)[0] != ' ') {
249       uint64_t NumSamples, NumHeadSamples;
250       StringRef FName;
251       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
252         reportError(LineIt.line_number(),
253                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
254         return sampleprof_error::malformed;
255       }
256       SeenMetadata = false;
257       SampleContext FContext(FName);
258       if (FContext.hasContext())
259         ++CSProfileCount;
260       else
261         ++RegularProfileCount;
262       Profiles[FContext] = FunctionSamples();
263       FunctionSamples &FProfile = Profiles[FContext];
264       FProfile.setName(FContext.getName());
265       FProfile.setContext(FContext);
266       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
267       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
268       InlineStack.clear();
269       InlineStack.push_back(&FProfile);
270     } else {
271       uint64_t NumSamples;
272       StringRef FName;
273       DenseMap<StringRef, uint64_t> TargetCountMap;
274       uint32_t Depth, LineOffset, Discriminator;
275       LineType LineTy;
276       uint64_t FunctionHash;
277       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
278                      Discriminator, FName, TargetCountMap, FunctionHash)) {
279         reportError(LineIt.line_number(),
280                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
281                         *LineIt);
282         return sampleprof_error::malformed;
283       }
284       if (SeenMetadata && LineTy != LineType::Metadata) {
285         // Metadata must be put at the end of a function profile.
286         reportError(LineIt.line_number(),
287                     "Found non-metadata after metadata: " + *LineIt);
288         return sampleprof_error::malformed;
289       }
290       while (InlineStack.size() > Depth) {
291         InlineStack.pop_back();
292       }
293       switch (LineTy) {
294       case LineType::CallSiteProfile: {
295         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
296             LineLocation(LineOffset, Discriminator))[std::string(FName)];
297         FSamples.setName(FName);
298         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
299         InlineStack.push_back(&FSamples);
300         break;
301       }
302       case LineType::BodyProfile: {
303         while (InlineStack.size() > Depth) {
304           InlineStack.pop_back();
305         }
306         FunctionSamples &FProfile = *InlineStack.back();
307         for (const auto &name_count : TargetCountMap) {
308           MergeResult(Result, FProfile.addCalledTargetSamples(
309                                   LineOffset, Discriminator, name_count.first,
310                                   name_count.second));
311         }
312         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
313                                                     NumSamples));
314         break;
315       }
316       case LineType::Metadata: {
317         FunctionSamples &FProfile = *InlineStack.back();
318         FProfile.setFunctionHash(FunctionHash);
319         ++ProbeProfileCount;
320         SeenMetadata = true;
321         break;
322       }
323       }
324     }
325   }
326 
327   assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
328          "Cannot have both context-sensitive and regular profile");
329   ProfileIsCS = (CSProfileCount > 0);
330   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
331          "Cannot have both probe-based profiles and regular profiles");
332   ProfileIsProbeBased = (ProbeProfileCount > 0);
333   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
334 
335   if (Result == sampleprof_error::success)
336     computeSummary();
337 
338   return Result;
339 }
340 
341 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
342   bool result = false;
343 
344   // Check that the first non-comment line is a valid function header.
345   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
346   if (!LineIt.is_at_eof()) {
347     if ((*LineIt)[0] != ' ') {
348       uint64_t NumSamples, NumHeadSamples;
349       StringRef FName;
350       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
351     }
352   }
353 
354   return result;
355 }
356 
357 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
358   unsigned NumBytesRead = 0;
359   std::error_code EC;
360   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
361 
362   if (Val > std::numeric_limits<T>::max())
363     EC = sampleprof_error::malformed;
364   else if (Data + NumBytesRead > End)
365     EC = sampleprof_error::truncated;
366   else
367     EC = sampleprof_error::success;
368 
369   if (EC) {
370     reportError(0, EC.message());
371     return EC;
372   }
373 
374   Data += NumBytesRead;
375   return static_cast<T>(Val);
376 }
377 
378 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
379   std::error_code EC;
380   StringRef Str(reinterpret_cast<const char *>(Data));
381   if (Data + Str.size() + 1 > End) {
382     EC = sampleprof_error::truncated;
383     reportError(0, EC.message());
384     return EC;
385   }
386 
387   Data += Str.size() + 1;
388   return Str;
389 }
390 
391 template <typename T>
392 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
393   std::error_code EC;
394 
395   if (Data + sizeof(T) > End) {
396     EC = sampleprof_error::truncated;
397     reportError(0, EC.message());
398     return EC;
399   }
400 
401   using namespace support;
402   T Val = endian::readNext<T, little, unaligned>(Data);
403   return Val;
404 }
405 
406 template <typename T>
407 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
408   std::error_code EC;
409   auto Idx = readNumber<uint32_t>();
410   if (std::error_code EC = Idx.getError())
411     return EC;
412   if (*Idx >= Table.size())
413     return sampleprof_error::truncated_name_table;
414   return *Idx;
415 }
416 
417 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
418   auto Idx = readStringIndex(NameTable);
419   if (std::error_code EC = Idx.getError())
420     return EC;
421 
422   return NameTable[*Idx];
423 }
424 
425 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
426   if (!FixedLengthMD5)
427     return SampleProfileReaderBinary::readStringFromTable();
428 
429   // read NameTable index.
430   auto Idx = readStringIndex(NameTable);
431   if (std::error_code EC = Idx.getError())
432     return EC;
433 
434   // Check whether the name to be accessed has been accessed before,
435   // if not, read it from memory directly.
436   StringRef &SR = NameTable[*Idx];
437   if (SR.empty()) {
438     const uint8_t *SavedData = Data;
439     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
440     auto FID = readUnencodedNumber<uint64_t>();
441     if (std::error_code EC = FID.getError())
442       return EC;
443     // Save the string converted from uint64_t in MD5StringBuf. All the
444     // references to the name are all StringRefs refering to the string
445     // in MD5StringBuf.
446     MD5StringBuf->push_back(std::to_string(*FID));
447     SR = MD5StringBuf->back();
448     Data = SavedData;
449   }
450   return SR;
451 }
452 
453 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
454   auto Idx = readStringIndex(NameTable);
455   if (std::error_code EC = Idx.getError())
456     return EC;
457 
458   return StringRef(NameTable[*Idx]);
459 }
460 
461 std::error_code
462 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
463   auto NumSamples = readNumber<uint64_t>();
464   if (std::error_code EC = NumSamples.getError())
465     return EC;
466   FProfile.addTotalSamples(*NumSamples);
467 
468   // Read the samples in the body.
469   auto NumRecords = readNumber<uint32_t>();
470   if (std::error_code EC = NumRecords.getError())
471     return EC;
472 
473   for (uint32_t I = 0; I < *NumRecords; ++I) {
474     auto LineOffset = readNumber<uint64_t>();
475     if (std::error_code EC = LineOffset.getError())
476       return EC;
477 
478     if (!isOffsetLegal(*LineOffset)) {
479       return std::error_code();
480     }
481 
482     auto Discriminator = readNumber<uint64_t>();
483     if (std::error_code EC = Discriminator.getError())
484       return EC;
485 
486     auto NumSamples = readNumber<uint64_t>();
487     if (std::error_code EC = NumSamples.getError())
488       return EC;
489 
490     auto NumCalls = readNumber<uint32_t>();
491     if (std::error_code EC = NumCalls.getError())
492       return EC;
493 
494     for (uint32_t J = 0; J < *NumCalls; ++J) {
495       auto CalledFunction(readStringFromTable());
496       if (std::error_code EC = CalledFunction.getError())
497         return EC;
498 
499       auto CalledFunctionSamples = readNumber<uint64_t>();
500       if (std::error_code EC = CalledFunctionSamples.getError())
501         return EC;
502 
503       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
504                                       *CalledFunction, *CalledFunctionSamples);
505     }
506 
507     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
508   }
509 
510   // Read all the samples for inlined function calls.
511   auto NumCallsites = readNumber<uint32_t>();
512   if (std::error_code EC = NumCallsites.getError())
513     return EC;
514 
515   for (uint32_t J = 0; J < *NumCallsites; ++J) {
516     auto LineOffset = readNumber<uint64_t>();
517     if (std::error_code EC = LineOffset.getError())
518       return EC;
519 
520     auto Discriminator = readNumber<uint64_t>();
521     if (std::error_code EC = Discriminator.getError())
522       return EC;
523 
524     auto FName(readStringFromTable());
525     if (std::error_code EC = FName.getError())
526       return EC;
527 
528     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
529         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
530     CalleeProfile.setName(*FName);
531     if (std::error_code EC = readProfile(CalleeProfile))
532       return EC;
533   }
534 
535   return sampleprof_error::success;
536 }
537 
538 std::error_code
539 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
540   Data = Start;
541   auto NumHeadSamples = readNumber<uint64_t>();
542   if (std::error_code EC = NumHeadSamples.getError())
543     return EC;
544 
545   auto FName(readStringFromTable());
546   if (std::error_code EC = FName.getError())
547     return EC;
548 
549   Profiles[*FName] = FunctionSamples();
550   FunctionSamples &FProfile = Profiles[*FName];
551   FProfile.setName(*FName);
552 
553   FProfile.addHeadSamples(*NumHeadSamples);
554 
555   if (std::error_code EC = readProfile(FProfile))
556     return EC;
557   return sampleprof_error::success;
558 }
559 
560 std::error_code SampleProfileReaderBinary::readImpl() {
561   while (!at_eof()) {
562     if (std::error_code EC = readFuncProfile(Data))
563       return EC;
564   }
565 
566   return sampleprof_error::success;
567 }
568 
569 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
570     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
571   Data = Start;
572   End = Start + Size;
573   switch (Entry.Type) {
574   case SecProfSummary:
575     if (std::error_code EC = readSummary())
576       return EC;
577     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
578       Summary->setPartialProfile(true);
579     break;
580   case SecNameTable: {
581     FixedLengthMD5 =
582         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
583     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
584     assert((!FixedLengthMD5 || UseMD5) &&
585            "If FixedLengthMD5 is true, UseMD5 has to be true");
586     if (std::error_code EC = readNameTableSec(UseMD5))
587       return EC;
588     break;
589   }
590   case SecLBRProfile:
591     if (std::error_code EC = readFuncProfiles())
592       return EC;
593     break;
594   case SecFuncOffsetTable:
595     if (std::error_code EC = readFuncOffsetTable())
596       return EC;
597     break;
598   case SecFuncMetadata:
599     ProfileIsProbeBased =
600         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
601     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
602     if (std::error_code EC = readFuncMetadata())
603       return EC;
604     break;
605   case SecProfileSymbolList:
606     if (std::error_code EC = readProfileSymbolList())
607       return EC;
608     break;
609   default:
610     if (std::error_code EC = readCustomSection(Entry))
611       return EC;
612     break;
613   }
614   return sampleprof_error::success;
615 }
616 
617 void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
618   UseAllFuncs = false;
619   FuncsToUse.clear();
620   for (auto &F : M)
621     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
622 }
623 
624 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
625   // If there are more than one FuncOffsetTable, the profile read associated
626   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
627   // is read.
628   FuncOffsetTable.clear();
629 
630   auto Size = readNumber<uint64_t>();
631   if (std::error_code EC = Size.getError())
632     return EC;
633 
634   FuncOffsetTable.reserve(*Size);
635   for (uint32_t I = 0; I < *Size; ++I) {
636     auto FName(readStringFromTable());
637     if (std::error_code EC = FName.getError())
638       return EC;
639 
640     auto Offset = readNumber<uint64_t>();
641     if (std::error_code EC = Offset.getError())
642       return EC;
643 
644     FuncOffsetTable[*FName] = *Offset;
645   }
646   return sampleprof_error::success;
647 }
648 
649 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
650   const uint8_t *Start = Data;
651   if (UseAllFuncs) {
652     while (Data < End) {
653       if (std::error_code EC = readFuncProfile(Data))
654         return EC;
655     }
656     assert(Data == End && "More data is read than expected");
657     return sampleprof_error::success;
658   }
659 
660   if (Remapper) {
661     for (auto Name : FuncsToUse) {
662       Remapper->insert(Name);
663     }
664   }
665 
666   if (useMD5()) {
667     for (auto Name : FuncsToUse) {
668       auto GUID = std::to_string(MD5Hash(Name));
669       auto iter = FuncOffsetTable.find(StringRef(GUID));
670       if (iter == FuncOffsetTable.end())
671         continue;
672       const uint8_t *FuncProfileAddr = Start + iter->second;
673       assert(FuncProfileAddr < End && "out of LBRProfile section");
674       if (std::error_code EC = readFuncProfile(FuncProfileAddr))
675         return EC;
676     }
677   } else {
678     for (auto NameOffset : FuncOffsetTable) {
679       auto FuncName = NameOffset.first;
680       if (!FuncsToUse.count(FuncName) &&
681           (!Remapper || !Remapper->exist(FuncName)))
682         continue;
683       const uint8_t *FuncProfileAddr = Start + NameOffset.second;
684       assert(FuncProfileAddr < End && "out of LBRProfile section");
685       if (std::error_code EC = readFuncProfile(FuncProfileAddr))
686         return EC;
687     }
688   }
689 
690   Data = End;
691   return sampleprof_error::success;
692 }
693 
694 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
695   if (!ProfSymList)
696     ProfSymList = std::make_unique<ProfileSymbolList>();
697 
698   if (std::error_code EC = ProfSymList->read(Data, End - Data))
699     return EC;
700 
701   Data = End;
702   return sampleprof_error::success;
703 }
704 
705 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
706     const uint8_t *SecStart, const uint64_t SecSize,
707     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
708   Data = SecStart;
709   End = SecStart + SecSize;
710   auto DecompressSize = readNumber<uint64_t>();
711   if (std::error_code EC = DecompressSize.getError())
712     return EC;
713   DecompressBufSize = *DecompressSize;
714 
715   auto CompressSize = readNumber<uint64_t>();
716   if (std::error_code EC = CompressSize.getError())
717     return EC;
718 
719   if (!llvm::zlib::isAvailable())
720     return sampleprof_error::zlib_unavailable;
721 
722   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
723                               *CompressSize);
724   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
725   size_t UCSize = DecompressBufSize;
726   llvm::Error E =
727       zlib::uncompress(CompressedStrings, Buffer, UCSize);
728   if (E)
729     return sampleprof_error::uncompress_failed;
730   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
731   return sampleprof_error::success;
732 }
733 
734 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
735   const uint8_t *BufStart =
736       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
737 
738   for (auto &Entry : SecHdrTable) {
739     // Skip empty section.
740     if (!Entry.Size)
741       continue;
742 
743     const uint8_t *SecStart = BufStart + Entry.Offset;
744     uint64_t SecSize = Entry.Size;
745 
746     // If the section is compressed, decompress it into a buffer
747     // DecompressBuf before reading the actual data. The pointee of
748     // 'Data' will be changed to buffer hold by DecompressBuf
749     // temporarily when reading the actual data.
750     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
751     if (isCompressed) {
752       const uint8_t *DecompressBuf;
753       uint64_t DecompressBufSize;
754       if (std::error_code EC = decompressSection(
755               SecStart, SecSize, DecompressBuf, DecompressBufSize))
756         return EC;
757       SecStart = DecompressBuf;
758       SecSize = DecompressBufSize;
759     }
760 
761     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
762       return EC;
763     if (Data != SecStart + SecSize)
764       return sampleprof_error::malformed;
765 
766     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
767     if (isCompressed) {
768       Data = BufStart + Entry.Offset;
769       End = BufStart + Buffer->getBufferSize();
770     }
771   }
772 
773   return sampleprof_error::success;
774 }
775 
776 std::error_code SampleProfileReaderCompactBinary::readImpl() {
777   std::vector<uint64_t> OffsetsToUse;
778   if (UseAllFuncs) {
779     for (auto FuncEntry : FuncOffsetTable) {
780       OffsetsToUse.push_back(FuncEntry.second);
781     }
782   }
783   else {
784     for (auto Name : FuncsToUse) {
785       auto GUID = std::to_string(MD5Hash(Name));
786       auto iter = FuncOffsetTable.find(StringRef(GUID));
787       if (iter == FuncOffsetTable.end())
788         continue;
789       OffsetsToUse.push_back(iter->second);
790     }
791   }
792 
793   for (auto Offset : OffsetsToUse) {
794     const uint8_t *SavedData = Data;
795     if (std::error_code EC = readFuncProfile(
796             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
797             Offset))
798       return EC;
799     Data = SavedData;
800   }
801   return sampleprof_error::success;
802 }
803 
804 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
805   if (Magic == SPMagic())
806     return sampleprof_error::success;
807   return sampleprof_error::bad_magic;
808 }
809 
810 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
811   if (Magic == SPMagic(SPF_Ext_Binary))
812     return sampleprof_error::success;
813   return sampleprof_error::bad_magic;
814 }
815 
816 std::error_code
817 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
818   if (Magic == SPMagic(SPF_Compact_Binary))
819     return sampleprof_error::success;
820   return sampleprof_error::bad_magic;
821 }
822 
823 std::error_code SampleProfileReaderBinary::readNameTable() {
824   auto Size = readNumber<uint32_t>();
825   if (std::error_code EC = Size.getError())
826     return EC;
827   NameTable.reserve(*Size + NameTable.size());
828   for (uint32_t I = 0; I < *Size; ++I) {
829     auto Name(readString());
830     if (std::error_code EC = Name.getError())
831       return EC;
832     NameTable.push_back(*Name);
833   }
834 
835   return sampleprof_error::success;
836 }
837 
838 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
839   auto Size = readNumber<uint64_t>();
840   if (std::error_code EC = Size.getError())
841     return EC;
842   MD5StringBuf = std::make_unique<std::vector<std::string>>();
843   MD5StringBuf->reserve(*Size);
844   if (FixedLengthMD5) {
845     // Preallocate and initialize NameTable so we can check whether a name
846     // index has been read before by checking whether the element in the
847     // NameTable is empty, meanwhile readStringIndex can do the boundary
848     // check using the size of NameTable.
849     NameTable.resize(*Size + NameTable.size());
850 
851     MD5NameMemStart = Data;
852     Data = Data + (*Size) * sizeof(uint64_t);
853     return sampleprof_error::success;
854   }
855   NameTable.reserve(*Size);
856   for (uint32_t I = 0; I < *Size; ++I) {
857     auto FID = readNumber<uint64_t>();
858     if (std::error_code EC = FID.getError())
859       return EC;
860     MD5StringBuf->push_back(std::to_string(*FID));
861     // NameTable is a vector of StringRef. Here it is pushing back a
862     // StringRef initialized with the last string in MD5stringBuf.
863     NameTable.push_back(MD5StringBuf->back());
864   }
865   return sampleprof_error::success;
866 }
867 
868 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
869   if (IsMD5)
870     return readMD5NameTable();
871   return SampleProfileReaderBinary::readNameTable();
872 }
873 
874 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
875   if (!ProfileIsProbeBased)
876     return sampleprof_error::success;
877   for (unsigned I = 0; I < Profiles.size(); ++I) {
878     auto FName(readStringFromTable());
879     if (std::error_code EC = FName.getError())
880       return EC;
881 
882     auto Checksum = readNumber<uint64_t>();
883     if (std::error_code EC = Checksum.getError())
884       return EC;
885 
886     Profiles[*FName].setFunctionHash(*Checksum);
887   }
888   return sampleprof_error::success;
889 }
890 
891 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
892   auto Size = readNumber<uint64_t>();
893   if (std::error_code EC = Size.getError())
894     return EC;
895   NameTable.reserve(*Size);
896   for (uint32_t I = 0; I < *Size; ++I) {
897     auto FID = readNumber<uint64_t>();
898     if (std::error_code EC = FID.getError())
899       return EC;
900     NameTable.push_back(std::to_string(*FID));
901   }
902   return sampleprof_error::success;
903 }
904 
905 std::error_code
906 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
907   SecHdrTableEntry Entry;
908   auto Type = readUnencodedNumber<uint64_t>();
909   if (std::error_code EC = Type.getError())
910     return EC;
911   Entry.Type = static_cast<SecType>(*Type);
912 
913   auto Flags = readUnencodedNumber<uint64_t>();
914   if (std::error_code EC = Flags.getError())
915     return EC;
916   Entry.Flags = *Flags;
917 
918   auto Offset = readUnencodedNumber<uint64_t>();
919   if (std::error_code EC = Offset.getError())
920     return EC;
921   Entry.Offset = *Offset;
922 
923   auto Size = readUnencodedNumber<uint64_t>();
924   if (std::error_code EC = Size.getError())
925     return EC;
926   Entry.Size = *Size;
927 
928   Entry.LayoutIndex = Idx;
929   SecHdrTable.push_back(std::move(Entry));
930   return sampleprof_error::success;
931 }
932 
933 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
934   auto EntryNum = readUnencodedNumber<uint64_t>();
935   if (std::error_code EC = EntryNum.getError())
936     return EC;
937 
938   for (uint32_t i = 0; i < (*EntryNum); i++)
939     if (std::error_code EC = readSecHdrTableEntry(i))
940       return EC;
941 
942   return sampleprof_error::success;
943 }
944 
945 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
946   const uint8_t *BufStart =
947       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
948   Data = BufStart;
949   End = BufStart + Buffer->getBufferSize();
950 
951   if (std::error_code EC = readMagicIdent())
952     return EC;
953 
954   if (std::error_code EC = readSecHdrTable())
955     return EC;
956 
957   return sampleprof_error::success;
958 }
959 
960 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
961   uint64_t Size = 0;
962   for (auto &Entry : SecHdrTable) {
963     if (Entry.Type == Type)
964       Size += Entry.Size;
965   }
966   return Size;
967 }
968 
969 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
970   // Sections in SecHdrTable is not necessarily in the same order as
971   // sections in the profile because section like FuncOffsetTable needs
972   // to be written after section LBRProfile but needs to be read before
973   // section LBRProfile, so we cannot simply use the last entry in
974   // SecHdrTable to calculate the file size.
975   uint64_t FileSize = 0;
976   for (auto &Entry : SecHdrTable) {
977     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
978   }
979   return FileSize;
980 }
981 
982 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
983   std::string Flags;
984   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
985     Flags.append("{compressed,");
986   else
987     Flags.append("{");
988 
989   switch (Entry.Type) {
990   case SecNameTable:
991     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
992       Flags.append("fixlenmd5,");
993     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
994       Flags.append("md5,");
995     break;
996   case SecProfSummary:
997     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
998       Flags.append("partial,");
999     break;
1000   default:
1001     break;
1002   }
1003   char &last = Flags.back();
1004   if (last == ',')
1005     last = '}';
1006   else
1007     Flags.append("}");
1008   return Flags;
1009 }
1010 
1011 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1012   uint64_t TotalSecsSize = 0;
1013   for (auto &Entry : SecHdrTable) {
1014     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1015        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1016        << "\n";
1017     ;
1018     TotalSecsSize += Entry.Size;
1019   }
1020   uint64_t HeaderSize = SecHdrTable.front().Offset;
1021   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1022          "Size of 'header + sections' doesn't match the total size of profile");
1023 
1024   OS << "Header Size: " << HeaderSize << "\n";
1025   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1026   OS << "File Size: " << getFileSize() << "\n";
1027   return true;
1028 }
1029 
1030 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1031   // Read and check the magic identifier.
1032   auto Magic = readNumber<uint64_t>();
1033   if (std::error_code EC = Magic.getError())
1034     return EC;
1035   else if (std::error_code EC = verifySPMagic(*Magic))
1036     return EC;
1037 
1038   // Read the version number.
1039   auto Version = readNumber<uint64_t>();
1040   if (std::error_code EC = Version.getError())
1041     return EC;
1042   else if (*Version != SPVersion())
1043     return sampleprof_error::unsupported_version;
1044 
1045   return sampleprof_error::success;
1046 }
1047 
1048 std::error_code SampleProfileReaderBinary::readHeader() {
1049   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1050   End = Data + Buffer->getBufferSize();
1051 
1052   if (std::error_code EC = readMagicIdent())
1053     return EC;
1054 
1055   if (std::error_code EC = readSummary())
1056     return EC;
1057 
1058   if (std::error_code EC = readNameTable())
1059     return EC;
1060   return sampleprof_error::success;
1061 }
1062 
1063 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1064   SampleProfileReaderBinary::readHeader();
1065   if (std::error_code EC = readFuncOffsetTable())
1066     return EC;
1067   return sampleprof_error::success;
1068 }
1069 
1070 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1071   auto TableOffset = readUnencodedNumber<uint64_t>();
1072   if (std::error_code EC = TableOffset.getError())
1073     return EC;
1074 
1075   const uint8_t *SavedData = Data;
1076   const uint8_t *TableStart =
1077       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1078       *TableOffset;
1079   Data = TableStart;
1080 
1081   auto Size = readNumber<uint64_t>();
1082   if (std::error_code EC = Size.getError())
1083     return EC;
1084 
1085   FuncOffsetTable.reserve(*Size);
1086   for (uint32_t I = 0; I < *Size; ++I) {
1087     auto FName(readStringFromTable());
1088     if (std::error_code EC = FName.getError())
1089       return EC;
1090 
1091     auto Offset = readNumber<uint64_t>();
1092     if (std::error_code EC = Offset.getError())
1093       return EC;
1094 
1095     FuncOffsetTable[*FName] = *Offset;
1096   }
1097   End = TableStart;
1098   Data = SavedData;
1099   return sampleprof_error::success;
1100 }
1101 
1102 void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
1103   UseAllFuncs = false;
1104   FuncsToUse.clear();
1105   for (auto &F : M)
1106     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1107 }
1108 
1109 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1110     std::vector<ProfileSummaryEntry> &Entries) {
1111   auto Cutoff = readNumber<uint64_t>();
1112   if (std::error_code EC = Cutoff.getError())
1113     return EC;
1114 
1115   auto MinBlockCount = readNumber<uint64_t>();
1116   if (std::error_code EC = MinBlockCount.getError())
1117     return EC;
1118 
1119   auto NumBlocks = readNumber<uint64_t>();
1120   if (std::error_code EC = NumBlocks.getError())
1121     return EC;
1122 
1123   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1124   return sampleprof_error::success;
1125 }
1126 
1127 std::error_code SampleProfileReaderBinary::readSummary() {
1128   auto TotalCount = readNumber<uint64_t>();
1129   if (std::error_code EC = TotalCount.getError())
1130     return EC;
1131 
1132   auto MaxBlockCount = readNumber<uint64_t>();
1133   if (std::error_code EC = MaxBlockCount.getError())
1134     return EC;
1135 
1136   auto MaxFunctionCount = readNumber<uint64_t>();
1137   if (std::error_code EC = MaxFunctionCount.getError())
1138     return EC;
1139 
1140   auto NumBlocks = readNumber<uint64_t>();
1141   if (std::error_code EC = NumBlocks.getError())
1142     return EC;
1143 
1144   auto NumFunctions = readNumber<uint64_t>();
1145   if (std::error_code EC = NumFunctions.getError())
1146     return EC;
1147 
1148   auto NumSummaryEntries = readNumber<uint64_t>();
1149   if (std::error_code EC = NumSummaryEntries.getError())
1150     return EC;
1151 
1152   std::vector<ProfileSummaryEntry> Entries;
1153   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1154     std::error_code EC = readSummaryEntry(Entries);
1155     if (EC != sampleprof_error::success)
1156       return EC;
1157   }
1158   Summary = std::make_unique<ProfileSummary>(
1159       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1160       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1161 
1162   return sampleprof_error::success;
1163 }
1164 
1165 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1166   const uint8_t *Data =
1167       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1168   uint64_t Magic = decodeULEB128(Data);
1169   return Magic == SPMagic();
1170 }
1171 
1172 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1173   const uint8_t *Data =
1174       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1175   uint64_t Magic = decodeULEB128(Data);
1176   return Magic == SPMagic(SPF_Ext_Binary);
1177 }
1178 
1179 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1180   const uint8_t *Data =
1181       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1182   uint64_t Magic = decodeULEB128(Data);
1183   return Magic == SPMagic(SPF_Compact_Binary);
1184 }
1185 
1186 std::error_code SampleProfileReaderGCC::skipNextWord() {
1187   uint32_t dummy;
1188   if (!GcovBuffer.readInt(dummy))
1189     return sampleprof_error::truncated;
1190   return sampleprof_error::success;
1191 }
1192 
1193 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1194   if (sizeof(T) <= sizeof(uint32_t)) {
1195     uint32_t Val;
1196     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1197       return static_cast<T>(Val);
1198   } else if (sizeof(T) <= sizeof(uint64_t)) {
1199     uint64_t Val;
1200     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1201       return static_cast<T>(Val);
1202   }
1203 
1204   std::error_code EC = sampleprof_error::malformed;
1205   reportError(0, EC.message());
1206   return EC;
1207 }
1208 
1209 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1210   StringRef Str;
1211   if (!GcovBuffer.readString(Str))
1212     return sampleprof_error::truncated;
1213   return Str;
1214 }
1215 
1216 std::error_code SampleProfileReaderGCC::readHeader() {
1217   // Read the magic identifier.
1218   if (!GcovBuffer.readGCDAFormat())
1219     return sampleprof_error::unrecognized_format;
1220 
1221   // Read the version number. Note - the GCC reader does not validate this
1222   // version, but the profile creator generates v704.
1223   GCOV::GCOVVersion version;
1224   if (!GcovBuffer.readGCOVVersion(version))
1225     return sampleprof_error::unrecognized_format;
1226 
1227   if (version != GCOV::V407)
1228     return sampleprof_error::unsupported_version;
1229 
1230   // Skip the empty integer.
1231   if (std::error_code EC = skipNextWord())
1232     return EC;
1233 
1234   return sampleprof_error::success;
1235 }
1236 
1237 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1238   uint32_t Tag;
1239   if (!GcovBuffer.readInt(Tag))
1240     return sampleprof_error::truncated;
1241 
1242   if (Tag != Expected)
1243     return sampleprof_error::malformed;
1244 
1245   if (std::error_code EC = skipNextWord())
1246     return EC;
1247 
1248   return sampleprof_error::success;
1249 }
1250 
1251 std::error_code SampleProfileReaderGCC::readNameTable() {
1252   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1253     return EC;
1254 
1255   uint32_t Size;
1256   if (!GcovBuffer.readInt(Size))
1257     return sampleprof_error::truncated;
1258 
1259   for (uint32_t I = 0; I < Size; ++I) {
1260     StringRef Str;
1261     if (!GcovBuffer.readString(Str))
1262       return sampleprof_error::truncated;
1263     Names.push_back(std::string(Str));
1264   }
1265 
1266   return sampleprof_error::success;
1267 }
1268 
1269 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1270   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1271     return EC;
1272 
1273   uint32_t NumFunctions;
1274   if (!GcovBuffer.readInt(NumFunctions))
1275     return sampleprof_error::truncated;
1276 
1277   InlineCallStack Stack;
1278   for (uint32_t I = 0; I < NumFunctions; ++I)
1279     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1280       return EC;
1281 
1282   computeSummary();
1283   return sampleprof_error::success;
1284 }
1285 
1286 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1287     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1288   uint64_t HeadCount = 0;
1289   if (InlineStack.size() == 0)
1290     if (!GcovBuffer.readInt64(HeadCount))
1291       return sampleprof_error::truncated;
1292 
1293   uint32_t NameIdx;
1294   if (!GcovBuffer.readInt(NameIdx))
1295     return sampleprof_error::truncated;
1296 
1297   StringRef Name(Names[NameIdx]);
1298 
1299   uint32_t NumPosCounts;
1300   if (!GcovBuffer.readInt(NumPosCounts))
1301     return sampleprof_error::truncated;
1302 
1303   uint32_t NumCallsites;
1304   if (!GcovBuffer.readInt(NumCallsites))
1305     return sampleprof_error::truncated;
1306 
1307   FunctionSamples *FProfile = nullptr;
1308   if (InlineStack.size() == 0) {
1309     // If this is a top function that we have already processed, do not
1310     // update its profile again.  This happens in the presence of
1311     // function aliases.  Since these aliases share the same function
1312     // body, there will be identical replicated profiles for the
1313     // original function.  In this case, we simply not bother updating
1314     // the profile of the original function.
1315     FProfile = &Profiles[Name];
1316     FProfile->addHeadSamples(HeadCount);
1317     if (FProfile->getTotalSamples() > 0)
1318       Update = false;
1319   } else {
1320     // Otherwise, we are reading an inlined instance. The top of the
1321     // inline stack contains the profile of the caller. Insert this
1322     // callee in the caller's CallsiteMap.
1323     FunctionSamples *CallerProfile = InlineStack.front();
1324     uint32_t LineOffset = Offset >> 16;
1325     uint32_t Discriminator = Offset & 0xffff;
1326     FProfile = &CallerProfile->functionSamplesAt(
1327         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1328   }
1329   FProfile->setName(Name);
1330 
1331   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1332     uint32_t Offset;
1333     if (!GcovBuffer.readInt(Offset))
1334       return sampleprof_error::truncated;
1335 
1336     uint32_t NumTargets;
1337     if (!GcovBuffer.readInt(NumTargets))
1338       return sampleprof_error::truncated;
1339 
1340     uint64_t Count;
1341     if (!GcovBuffer.readInt64(Count))
1342       return sampleprof_error::truncated;
1343 
1344     // The line location is encoded in the offset as:
1345     //   high 16 bits: line offset to the start of the function.
1346     //   low 16 bits: discriminator.
1347     uint32_t LineOffset = Offset >> 16;
1348     uint32_t Discriminator = Offset & 0xffff;
1349 
1350     InlineCallStack NewStack;
1351     NewStack.push_back(FProfile);
1352     NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
1353     if (Update) {
1354       // Walk up the inline stack, adding the samples on this line to
1355       // the total sample count of the callers in the chain.
1356       for (auto CallerProfile : NewStack)
1357         CallerProfile->addTotalSamples(Count);
1358 
1359       // Update the body samples for the current profile.
1360       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1361     }
1362 
1363     // Process the list of functions called at an indirect call site.
1364     // These are all the targets that a function pointer (or virtual
1365     // function) resolved at runtime.
1366     for (uint32_t J = 0; J < NumTargets; J++) {
1367       uint32_t HistVal;
1368       if (!GcovBuffer.readInt(HistVal))
1369         return sampleprof_error::truncated;
1370 
1371       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1372         return sampleprof_error::malformed;
1373 
1374       uint64_t TargetIdx;
1375       if (!GcovBuffer.readInt64(TargetIdx))
1376         return sampleprof_error::truncated;
1377       StringRef TargetName(Names[TargetIdx]);
1378 
1379       uint64_t TargetCount;
1380       if (!GcovBuffer.readInt64(TargetCount))
1381         return sampleprof_error::truncated;
1382 
1383       if (Update)
1384         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1385                                          TargetName, TargetCount);
1386     }
1387   }
1388 
1389   // Process all the inlined callers into the current function. These
1390   // are all the callsites that were inlined into this function.
1391   for (uint32_t I = 0; I < NumCallsites; I++) {
1392     // The offset is encoded as:
1393     //   high 16 bits: line offset to the start of the function.
1394     //   low 16 bits: discriminator.
1395     uint32_t Offset;
1396     if (!GcovBuffer.readInt(Offset))
1397       return sampleprof_error::truncated;
1398     InlineCallStack NewStack;
1399     NewStack.push_back(FProfile);
1400     NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
1401     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1402       return EC;
1403   }
1404 
1405   return sampleprof_error::success;
1406 }
1407 
1408 /// Read a GCC AutoFDO profile.
1409 ///
1410 /// This format is generated by the Linux Perf conversion tool at
1411 /// https://github.com/google/autofdo.
1412 std::error_code SampleProfileReaderGCC::readImpl() {
1413   // Read the string table.
1414   if (std::error_code EC = readNameTable())
1415     return EC;
1416 
1417   // Read the source profile.
1418   if (std::error_code EC = readFunctionProfiles())
1419     return EC;
1420 
1421   return sampleprof_error::success;
1422 }
1423 
1424 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1425   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1426   return Magic == "adcg*704";
1427 }
1428 
1429 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1430   // If the reader uses MD5 to represent string, we can't remap it because
1431   // we don't know what the original function names were.
1432   if (Reader.useMD5()) {
1433     Ctx.diagnose(DiagnosticInfoSampleProfile(
1434         Reader.getBuffer()->getBufferIdentifier(),
1435         "Profile data remapping cannot be applied to profile data "
1436         "in compact format (original mangled names are not available).",
1437         DS_Warning));
1438     return;
1439   }
1440 
1441   // CSSPGO-TODO: Remapper is not yet supported.
1442   // We will need to remap the entire context string.
1443   assert(Remappings && "should be initialized while creating remapper");
1444   for (auto &Sample : Reader.getProfiles()) {
1445     DenseSet<StringRef> NamesInSample;
1446     Sample.second.findAllNames(NamesInSample);
1447     for (auto &Name : NamesInSample)
1448       if (auto Key = Remappings->insert(Name))
1449         NameMap.insert({Key, Name});
1450   }
1451 
1452   RemappingApplied = true;
1453 }
1454 
1455 Optional<StringRef>
1456 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1457   if (auto Key = Remappings->lookup(Fname))
1458     return NameMap.lookup(Key);
1459   return None;
1460 }
1461 
1462 /// Prepare a memory buffer for the contents of \p Filename.
1463 ///
1464 /// \returns an error code indicating the status of the buffer.
1465 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1466 setupMemoryBuffer(const Twine &Filename) {
1467   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
1468   if (std::error_code EC = BufferOrErr.getError())
1469     return EC;
1470   auto Buffer = std::move(BufferOrErr.get());
1471 
1472   // Sanity check the file.
1473   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1474     return sampleprof_error::too_large;
1475 
1476   return std::move(Buffer);
1477 }
1478 
1479 /// Create a sample profile reader based on the format of the input file.
1480 ///
1481 /// \param Filename The file to open.
1482 ///
1483 /// \param C The LLVM context to use to emit diagnostics.
1484 ///
1485 /// \param RemapFilename The file used for profile remapping.
1486 ///
1487 /// \returns an error code indicating the status of the created reader.
1488 ErrorOr<std::unique_ptr<SampleProfileReader>>
1489 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1490                             const std::string RemapFilename) {
1491   auto BufferOrError = setupMemoryBuffer(Filename);
1492   if (std::error_code EC = BufferOrError.getError())
1493     return EC;
1494   return create(BufferOrError.get(), C, RemapFilename);
1495 }
1496 
1497 /// Create a sample profile remapper from the given input, to remap the
1498 /// function names in the given profile data.
1499 ///
1500 /// \param Filename The file to open.
1501 ///
1502 /// \param Reader The profile reader the remapper is going to be applied to.
1503 ///
1504 /// \param C The LLVM context to use to emit diagnostics.
1505 ///
1506 /// \returns an error code indicating the status of the created reader.
1507 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1508 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1509                                            SampleProfileReader &Reader,
1510                                            LLVMContext &C) {
1511   auto BufferOrError = setupMemoryBuffer(Filename);
1512   if (std::error_code EC = BufferOrError.getError())
1513     return EC;
1514   return create(BufferOrError.get(), Reader, C);
1515 }
1516 
1517 /// Create a sample profile remapper from the given input, to remap the
1518 /// function names in the given profile data.
1519 ///
1520 /// \param B The memory buffer to create the reader from (assumes ownership).
1521 ///
1522 /// \param C The LLVM context to use to emit diagnostics.
1523 ///
1524 /// \param Reader The profile reader the remapper is going to be applied to.
1525 ///
1526 /// \returns an error code indicating the status of the created reader.
1527 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1528 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1529                                            SampleProfileReader &Reader,
1530                                            LLVMContext &C) {
1531   auto Remappings = std::make_unique<SymbolRemappingReader>();
1532   if (Error E = Remappings->read(*B.get())) {
1533     handleAllErrors(
1534         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1535           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1536                                                  ParseError.getLineNum(),
1537                                                  ParseError.getMessage()));
1538         });
1539     return sampleprof_error::malformed;
1540   }
1541 
1542   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1543       std::move(B), std::move(Remappings), Reader);
1544 }
1545 
1546 /// Create a sample profile reader based on the format of the input data.
1547 ///
1548 /// \param B The memory buffer to create the reader from (assumes ownership).
1549 ///
1550 /// \param C The LLVM context to use to emit diagnostics.
1551 ///
1552 /// \param RemapFilename The file used for profile remapping.
1553 ///
1554 /// \returns an error code indicating the status of the created reader.
1555 ErrorOr<std::unique_ptr<SampleProfileReader>>
1556 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1557                             const std::string RemapFilename) {
1558   std::unique_ptr<SampleProfileReader> Reader;
1559   if (SampleProfileReaderRawBinary::hasFormat(*B))
1560     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1561   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1562     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1563   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1564     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1565   else if (SampleProfileReaderGCC::hasFormat(*B))
1566     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1567   else if (SampleProfileReaderText::hasFormat(*B))
1568     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1569   else
1570     return sampleprof_error::unrecognized_format;
1571 
1572   if (!RemapFilename.empty()) {
1573     auto ReaderOrErr =
1574         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1575     if (std::error_code EC = ReaderOrErr.getError()) {
1576       std::string Msg = "Could not create remapper: " + EC.message();
1577       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1578       return EC;
1579     }
1580     Reader->Remapper = std::move(ReaderOrErr.get());
1581   }
1582 
1583   FunctionSamples::Format = Reader->getFormat();
1584   if (std::error_code EC = Reader->readHeader()) {
1585     return EC;
1586   }
1587 
1588   return std::move(Reader);
1589 }
1590 
1591 // For text and GCC file formats, we compute the summary after reading the
1592 // profile. Binary format has the profile summary in its header.
1593 void SampleProfileReader::computeSummary() {
1594   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1595   for (const auto &I : Profiles) {
1596     const FunctionSamples &Profile = I.second;
1597     Builder.addRecord(Profile);
1598   }
1599   Summary = Builder.getSummary();
1600 }
1601