1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace sampleprof;
46 
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
51 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
52                                               raw_ostream &OS) {
53   OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55 
56 /// Dump all the function profiles found on stream \p OS.
57 void SampleProfileReader::dump(raw_ostream &OS) {
58   for (const auto &I : Profiles)
59     dumpFunctionProfile(I.getKey(), OS);
60 }
61 
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71   if (Input[0] == ' ')
72     return false;
73   size_t n2 = Input.rfind(':');
74   size_t n1 = Input.rfind(':', n2 - 1);
75   FName = Input.substr(0, n1);
76   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77     return false;
78   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79     return false;
80   return true;
81 }
82 
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85 
86 /// Parse \p Input that contains metadata.
87 /// Possible metadata:
88 /// - CFG Checksum information:
89 ///     !CFGChecksum: 12345
90 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
91 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
92   if (!Input.startswith("!CFGChecksum:"))
93     return false;
94 
95   StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
96   return !CFGInfo.getAsInteger(10, FunctionHash);
97 }
98 
99 enum class LineType {
100   CallSiteProfile,
101   BodyProfile,
102   Metadata,
103 };
104 
105 /// Parse \p Input as line sample.
106 ///
107 /// \param Input input line.
108 /// \param LineTy Type of this line.
109 /// \param Depth the depth of the inline stack.
110 /// \param NumSamples total samples of the line/inlined callsite.
111 /// \param LineOffset line offset to the start of the function.
112 /// \param Discriminator discriminator of the line.
113 /// \param TargetCountMap map from indirect call target to count.
114 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
115 ///
116 /// returns true if parsing is successful.
117 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
118                       uint64_t &NumSamples, uint32_t &LineOffset,
119                       uint32_t &Discriminator, StringRef &CalleeName,
120                       DenseMap<StringRef, uint64_t> &TargetCountMap,
121                       uint64_t &FunctionHash) {
122   for (Depth = 0; Input[Depth] == ' '; Depth++)
123     ;
124   if (Depth == 0)
125     return false;
126 
127   if (Depth == 1 && Input[Depth] == '!') {
128     LineTy = LineType::Metadata;
129     return parseMetadata(Input.substr(Depth), FunctionHash);
130   }
131 
132   size_t n1 = Input.find(':');
133   StringRef Loc = Input.substr(Depth, n1 - Depth);
134   size_t n2 = Loc.find('.');
135   if (n2 == StringRef::npos) {
136     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
137       return false;
138     Discriminator = 0;
139   } else {
140     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
141       return false;
142     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
143       return false;
144   }
145 
146   StringRef Rest = Input.substr(n1 + 2);
147   if (isDigit(Rest[0])) {
148     LineTy = LineType::BodyProfile;
149     size_t n3 = Rest.find(' ');
150     if (n3 == StringRef::npos) {
151       if (Rest.getAsInteger(10, NumSamples))
152         return false;
153     } else {
154       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
155         return false;
156     }
157     // Find call targets and their sample counts.
158     // Note: In some cases, there are symbols in the profile which are not
159     // mangled. To accommodate such cases, use colon + integer pairs as the
160     // anchor points.
161     // An example:
162     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
163     // ":1000" and ":437" are used as anchor points so the string above will
164     // be interpreted as
165     // target: _M_construct<char *>
166     // count: 1000
167     // target: string_view<std::allocator<char> >
168     // count: 437
169     while (n3 != StringRef::npos) {
170       n3 += Rest.substr(n3).find_first_not_of(' ');
171       Rest = Rest.substr(n3);
172       n3 = Rest.find_first_of(':');
173       if (n3 == StringRef::npos || n3 == 0)
174         return false;
175 
176       StringRef Target;
177       uint64_t count, n4;
178       while (true) {
179         // Get the segment after the current colon.
180         StringRef AfterColon = Rest.substr(n3 + 1);
181         // Get the target symbol before the current colon.
182         Target = Rest.substr(0, n3);
183         // Check if the word after the current colon is an integer.
184         n4 = AfterColon.find_first_of(' ');
185         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
186         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
187         if (!WordAfterColon.getAsInteger(10, count))
188           break;
189 
190         // Try to find the next colon.
191         uint64_t n5 = AfterColon.find_first_of(':');
192         if (n5 == StringRef::npos)
193           return false;
194         n3 += n5 + 1;
195       }
196 
197       // An anchor point is found. Save the {target, count} pair
198       TargetCountMap[Target] = count;
199       if (n4 == Rest.size())
200         break;
201       // Change n3 to the next blank space after colon + integer pair.
202       n3 = n4;
203     }
204   } else {
205     LineTy = LineType::CallSiteProfile;
206     size_t n3 = Rest.find_last_of(':');
207     CalleeName = Rest.substr(0, n3);
208     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
209       return false;
210   }
211   return true;
212 }
213 
214 /// Load samples from a text file.
215 ///
216 /// See the documentation at the top of the file for an explanation of
217 /// the expected format.
218 ///
219 /// \returns true if the file was loaded successfully, false otherwise.
220 std::error_code SampleProfileReaderText::readImpl() {
221   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
222   sampleprof_error Result = sampleprof_error::success;
223 
224   InlineCallStack InlineStack;
225   uint32_t ProbeProfileCount = 0;
226 
227   // SeenMetadata tracks whether we have processed metadata for the current
228   // top-level function profile.
229   bool SeenMetadata = false;
230 
231   for (; !LineIt.is_at_eof(); ++LineIt) {
232     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
233       continue;
234     // Read the header of each function.
235     //
236     // Note that for function identifiers we are actually expecting
237     // mangled names, but we may not always get them. This happens when
238     // the compiler decides not to emit the function (e.g., it was inlined
239     // and removed). In this case, the binary will not have the linkage
240     // name for the function, so the profiler will emit the function's
241     // unmangled name, which may contain characters like ':' and '>' in its
242     // name (member functions, templates, etc).
243     //
244     // The only requirement we place on the identifier, then, is that it
245     // should not begin with a number.
246     if ((*LineIt)[0] != ' ') {
247       uint64_t NumSamples, NumHeadSamples;
248       StringRef FName;
249       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
250         reportError(LineIt.line_number(),
251                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
252         return sampleprof_error::malformed;
253       }
254       SeenMetadata = false;
255       SampleContext FContext(FName);
256       if (FContext.hasContext())
257         ++CSProfileCount;
258       Profiles[FContext] = FunctionSamples();
259       FunctionSamples &FProfile = Profiles[FContext];
260       FProfile.setName(FContext.getNameWithoutContext());
261       FProfile.setContext(FContext);
262       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
263       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
264       InlineStack.clear();
265       InlineStack.push_back(&FProfile);
266     } else {
267       uint64_t NumSamples;
268       StringRef FName;
269       DenseMap<StringRef, uint64_t> TargetCountMap;
270       uint32_t Depth, LineOffset, Discriminator;
271       LineType LineTy;
272       uint64_t FunctionHash;
273       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
274                      Discriminator, FName, TargetCountMap, FunctionHash)) {
275         reportError(LineIt.line_number(),
276                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
277                         *LineIt);
278         return sampleprof_error::malformed;
279       }
280       if (SeenMetadata && LineTy != LineType::Metadata) {
281         // Metadata must be put at the end of a function profile.
282         reportError(LineIt.line_number(),
283                     "Found non-metadata after metadata: " + *LineIt);
284         return sampleprof_error::malformed;
285       }
286       while (InlineStack.size() > Depth) {
287         InlineStack.pop_back();
288       }
289       switch (LineTy) {
290       case LineType::CallSiteProfile: {
291         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
292             LineLocation(LineOffset, Discriminator))[std::string(FName)];
293         FSamples.setName(FName);
294         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
295         InlineStack.push_back(&FSamples);
296         break;
297       }
298       case LineType::BodyProfile: {
299         while (InlineStack.size() > Depth) {
300           InlineStack.pop_back();
301         }
302         FunctionSamples &FProfile = *InlineStack.back();
303         for (const auto &name_count : TargetCountMap) {
304           MergeResult(Result, FProfile.addCalledTargetSamples(
305                                   LineOffset, Discriminator, name_count.first,
306                                   name_count.second));
307         }
308         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
309                                                     NumSamples));
310         break;
311       }
312       case LineType::Metadata: {
313         FunctionSamples &FProfile = *InlineStack.back();
314         FProfile.setFunctionHash(FunctionHash);
315         ++ProbeProfileCount;
316         SeenMetadata = true;
317         break;
318       }
319       }
320     }
321   }
322 
323   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
324          "Cannot have both context-sensitive and regular profile");
325   ProfileIsCS = (CSProfileCount > 0);
326   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
327          "Cannot have both probe-based profiles and regular profiles");
328   ProfileIsProbeBased = (ProbeProfileCount > 0);
329   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
330   FunctionSamples::ProfileIsCS = ProfileIsCS;
331 
332   if (Result == sampleprof_error::success)
333     computeSummary();
334 
335   return Result;
336 }
337 
338 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
339   bool result = false;
340 
341   // Check that the first non-comment line is a valid function header.
342   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
343   if (!LineIt.is_at_eof()) {
344     if ((*LineIt)[0] != ' ') {
345       uint64_t NumSamples, NumHeadSamples;
346       StringRef FName;
347       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
348     }
349   }
350 
351   return result;
352 }
353 
354 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
355   unsigned NumBytesRead = 0;
356   std::error_code EC;
357   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
358 
359   if (Val > std::numeric_limits<T>::max())
360     EC = sampleprof_error::malformed;
361   else if (Data + NumBytesRead > End)
362     EC = sampleprof_error::truncated;
363   else
364     EC = sampleprof_error::success;
365 
366   if (EC) {
367     reportError(0, EC.message());
368     return EC;
369   }
370 
371   Data += NumBytesRead;
372   return static_cast<T>(Val);
373 }
374 
375 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
376   std::error_code EC;
377   StringRef Str(reinterpret_cast<const char *>(Data));
378   if (Data + Str.size() + 1 > End) {
379     EC = sampleprof_error::truncated;
380     reportError(0, EC.message());
381     return EC;
382   }
383 
384   Data += Str.size() + 1;
385   return Str;
386 }
387 
388 template <typename T>
389 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
390   std::error_code EC;
391 
392   if (Data + sizeof(T) > End) {
393     EC = sampleprof_error::truncated;
394     reportError(0, EC.message());
395     return EC;
396   }
397 
398   using namespace support;
399   T Val = endian::readNext<T, little, unaligned>(Data);
400   return Val;
401 }
402 
403 template <typename T>
404 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
405   std::error_code EC;
406   auto Idx = readNumber<uint32_t>();
407   if (std::error_code EC = Idx.getError())
408     return EC;
409   if (*Idx >= Table.size())
410     return sampleprof_error::truncated_name_table;
411   return *Idx;
412 }
413 
414 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
415   auto Idx = readStringIndex(NameTable);
416   if (std::error_code EC = Idx.getError())
417     return EC;
418 
419   return NameTable[*Idx];
420 }
421 
422 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
423   if (!FixedLengthMD5)
424     return SampleProfileReaderBinary::readStringFromTable();
425 
426   // read NameTable index.
427   auto Idx = readStringIndex(NameTable);
428   if (std::error_code EC = Idx.getError())
429     return EC;
430 
431   // Check whether the name to be accessed has been accessed before,
432   // if not, read it from memory directly.
433   StringRef &SR = NameTable[*Idx];
434   if (SR.empty()) {
435     const uint8_t *SavedData = Data;
436     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
437     auto FID = readUnencodedNumber<uint64_t>();
438     if (std::error_code EC = FID.getError())
439       return EC;
440     // Save the string converted from uint64_t in MD5StringBuf. All the
441     // references to the name are all StringRefs refering to the string
442     // in MD5StringBuf.
443     MD5StringBuf->push_back(std::to_string(*FID));
444     SR = MD5StringBuf->back();
445     Data = SavedData;
446   }
447   return SR;
448 }
449 
450 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
451   auto Idx = readStringIndex(NameTable);
452   if (std::error_code EC = Idx.getError())
453     return EC;
454 
455   return StringRef(NameTable[*Idx]);
456 }
457 
458 std::error_code
459 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
460   auto NumSamples = readNumber<uint64_t>();
461   if (std::error_code EC = NumSamples.getError())
462     return EC;
463   FProfile.addTotalSamples(*NumSamples);
464 
465   // Read the samples in the body.
466   auto NumRecords = readNumber<uint32_t>();
467   if (std::error_code EC = NumRecords.getError())
468     return EC;
469 
470   for (uint32_t I = 0; I < *NumRecords; ++I) {
471     auto LineOffset = readNumber<uint64_t>();
472     if (std::error_code EC = LineOffset.getError())
473       return EC;
474 
475     if (!isOffsetLegal(*LineOffset)) {
476       return std::error_code();
477     }
478 
479     auto Discriminator = readNumber<uint64_t>();
480     if (std::error_code EC = Discriminator.getError())
481       return EC;
482 
483     auto NumSamples = readNumber<uint64_t>();
484     if (std::error_code EC = NumSamples.getError())
485       return EC;
486 
487     auto NumCalls = readNumber<uint32_t>();
488     if (std::error_code EC = NumCalls.getError())
489       return EC;
490 
491     for (uint32_t J = 0; J < *NumCalls; ++J) {
492       auto CalledFunction(readStringFromTable());
493       if (std::error_code EC = CalledFunction.getError())
494         return EC;
495 
496       auto CalledFunctionSamples = readNumber<uint64_t>();
497       if (std::error_code EC = CalledFunctionSamples.getError())
498         return EC;
499 
500       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
501                                       *CalledFunction, *CalledFunctionSamples);
502     }
503 
504     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
505   }
506 
507   // Read all the samples for inlined function calls.
508   auto NumCallsites = readNumber<uint32_t>();
509   if (std::error_code EC = NumCallsites.getError())
510     return EC;
511 
512   for (uint32_t J = 0; J < *NumCallsites; ++J) {
513     auto LineOffset = readNumber<uint64_t>();
514     if (std::error_code EC = LineOffset.getError())
515       return EC;
516 
517     auto Discriminator = readNumber<uint64_t>();
518     if (std::error_code EC = Discriminator.getError())
519       return EC;
520 
521     auto FName(readStringFromTable());
522     if (std::error_code EC = FName.getError())
523       return EC;
524 
525     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
526         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
527     CalleeProfile.setName(*FName);
528     if (std::error_code EC = readProfile(CalleeProfile))
529       return EC;
530   }
531 
532   return sampleprof_error::success;
533 }
534 
535 std::error_code
536 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
537   Data = Start;
538   auto NumHeadSamples = readNumber<uint64_t>();
539   if (std::error_code EC = NumHeadSamples.getError())
540     return EC;
541 
542   auto FName(readStringFromTable());
543   if (std::error_code EC = FName.getError())
544     return EC;
545 
546   SampleContext FContext(*FName);
547   Profiles[FContext] = FunctionSamples();
548   FunctionSamples &FProfile = Profiles[FContext];
549   FProfile.setName(FContext.getNameWithoutContext());
550   FProfile.setContext(FContext);
551   FProfile.addHeadSamples(*NumHeadSamples);
552 
553   if (FContext.hasContext())
554     CSProfileCount++;
555 
556   if (std::error_code EC = readProfile(FProfile))
557     return EC;
558   return sampleprof_error::success;
559 }
560 
561 std::error_code SampleProfileReaderBinary::readImpl() {
562   while (!at_eof()) {
563     if (std::error_code EC = readFuncProfile(Data))
564       return EC;
565   }
566 
567   return sampleprof_error::success;
568 }
569 
570 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
571     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
572   Data = Start;
573   End = Start + Size;
574   switch (Entry.Type) {
575   case SecProfSummary:
576     if (std::error_code EC = readSummary())
577       return EC;
578     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
579       Summary->setPartialProfile(true);
580     break;
581   case SecNameTable: {
582     FixedLengthMD5 =
583         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
584     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
585     assert((!FixedLengthMD5 || UseMD5) &&
586            "If FixedLengthMD5 is true, UseMD5 has to be true");
587     FunctionSamples::HasUniqSuffix =
588         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
589     if (std::error_code EC = readNameTableSec(UseMD5))
590       return EC;
591     break;
592   }
593   case SecLBRProfile:
594     if (std::error_code EC = readFuncProfiles())
595       return EC;
596     break;
597   case SecFuncOffsetTable:
598     if (std::error_code EC = readFuncOffsetTable())
599       return EC;
600     break;
601   case SecFuncMetadata:
602     ProfileIsProbeBased =
603         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
604     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
605     if (std::error_code EC = readFuncMetadata())
606       return EC;
607     break;
608   case SecProfileSymbolList:
609     if (std::error_code EC = readProfileSymbolList())
610       return EC;
611     break;
612   default:
613     if (std::error_code EC = readCustomSection(Entry))
614       return EC;
615     break;
616   }
617   return sampleprof_error::success;
618 }
619 
620 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
621   if (!M)
622     return false;
623   FuncsToUse.clear();
624   for (auto &F : *M)
625     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
626   return true;
627 }
628 
629 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
630   // If there are more than one FuncOffsetTable, the profile read associated
631   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
632   // is read.
633   FuncOffsetTable.clear();
634 
635   auto Size = readNumber<uint64_t>();
636   if (std::error_code EC = Size.getError())
637     return EC;
638 
639   FuncOffsetTable.reserve(*Size);
640   for (uint32_t I = 0; I < *Size; ++I) {
641     auto FName(readStringFromTable());
642     if (std::error_code EC = FName.getError())
643       return EC;
644 
645     auto Offset = readNumber<uint64_t>();
646     if (std::error_code EC = Offset.getError())
647       return EC;
648 
649     FuncOffsetTable[*FName] = *Offset;
650   }
651   return sampleprof_error::success;
652 }
653 
654 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
655   // Collect functions used by current module if the Reader has been
656   // given a module.
657   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
658   // which will query FunctionSamples::HasUniqSuffix, so it has to be
659   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
660   // NameTable section is read.
661   bool LoadFuncsToBeUsed = collectFuncsFromModule();
662 
663   // When LoadFuncsToBeUsed is false, load all the function profiles.
664   const uint8_t *Start = Data;
665   if (!LoadFuncsToBeUsed) {
666     while (Data < End) {
667       if (std::error_code EC = readFuncProfile(Data))
668         return EC;
669     }
670     assert(Data == End && "More data is read than expected");
671   } else {
672     // Load function profiles on demand.
673     if (Remapper) {
674       for (auto Name : FuncsToUse) {
675         Remapper->insert(Name);
676       }
677     }
678 
679     if (useMD5()) {
680       for (auto Name : FuncsToUse) {
681         auto GUID = std::to_string(MD5Hash(Name));
682         auto iter = FuncOffsetTable.find(StringRef(GUID));
683         if (iter == FuncOffsetTable.end())
684           continue;
685         const uint8_t *FuncProfileAddr = Start + iter->second;
686         assert(FuncProfileAddr < End && "out of LBRProfile section");
687         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
688           return EC;
689       }
690     } else {
691       for (auto NameOffset : FuncOffsetTable) {
692         SampleContext FContext(NameOffset.first);
693         auto FuncName = FContext.getNameWithoutContext();
694         if (!FuncsToUse.count(FuncName) &&
695             (!Remapper || !Remapper->exist(FuncName)))
696           continue;
697         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
698         assert(FuncProfileAddr < End && "out of LBRProfile section");
699         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
700           return EC;
701       }
702     }
703     Data = End;
704   }
705   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
706          "Cannot have both context-sensitive and regular profile");
707   ProfileIsCS = (CSProfileCount > 0);
708   FunctionSamples::ProfileIsCS = ProfileIsCS;
709   return sampleprof_error::success;
710 }
711 
712 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
713   if (!ProfSymList)
714     ProfSymList = std::make_unique<ProfileSymbolList>();
715 
716   if (std::error_code EC = ProfSymList->read(Data, End - Data))
717     return EC;
718 
719   Data = End;
720   return sampleprof_error::success;
721 }
722 
723 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
724     const uint8_t *SecStart, const uint64_t SecSize,
725     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
726   Data = SecStart;
727   End = SecStart + SecSize;
728   auto DecompressSize = readNumber<uint64_t>();
729   if (std::error_code EC = DecompressSize.getError())
730     return EC;
731   DecompressBufSize = *DecompressSize;
732 
733   auto CompressSize = readNumber<uint64_t>();
734   if (std::error_code EC = CompressSize.getError())
735     return EC;
736 
737   if (!llvm::zlib::isAvailable())
738     return sampleprof_error::zlib_unavailable;
739 
740   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
741                               *CompressSize);
742   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
743   size_t UCSize = DecompressBufSize;
744   llvm::Error E =
745       zlib::uncompress(CompressedStrings, Buffer, UCSize);
746   if (E)
747     return sampleprof_error::uncompress_failed;
748   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
749   return sampleprof_error::success;
750 }
751 
752 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
753   const uint8_t *BufStart =
754       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
755 
756   for (auto &Entry : SecHdrTable) {
757     // Skip empty section.
758     if (!Entry.Size)
759       continue;
760 
761     // Skip sections without context when SkipFlatProf is true.
762     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
763       continue;
764 
765     const uint8_t *SecStart = BufStart + Entry.Offset;
766     uint64_t SecSize = Entry.Size;
767 
768     // If the section is compressed, decompress it into a buffer
769     // DecompressBuf before reading the actual data. The pointee of
770     // 'Data' will be changed to buffer hold by DecompressBuf
771     // temporarily when reading the actual data.
772     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
773     if (isCompressed) {
774       const uint8_t *DecompressBuf;
775       uint64_t DecompressBufSize;
776       if (std::error_code EC = decompressSection(
777               SecStart, SecSize, DecompressBuf, DecompressBufSize))
778         return EC;
779       SecStart = DecompressBuf;
780       SecSize = DecompressBufSize;
781     }
782 
783     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
784       return EC;
785     if (Data != SecStart + SecSize)
786       return sampleprof_error::malformed;
787 
788     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
789     if (isCompressed) {
790       Data = BufStart + Entry.Offset;
791       End = BufStart + Buffer->getBufferSize();
792     }
793   }
794 
795   return sampleprof_error::success;
796 }
797 
798 std::error_code SampleProfileReaderCompactBinary::readImpl() {
799   // Collect functions used by current module if the Reader has been
800   // given a module.
801   bool LoadFuncsToBeUsed = collectFuncsFromModule();
802 
803   std::vector<uint64_t> OffsetsToUse;
804   if (!LoadFuncsToBeUsed) {
805     // load all the function profiles.
806     for (auto FuncEntry : FuncOffsetTable) {
807       OffsetsToUse.push_back(FuncEntry.second);
808     }
809   } else {
810     // load function profiles on demand.
811     for (auto Name : FuncsToUse) {
812       auto GUID = std::to_string(MD5Hash(Name));
813       auto iter = FuncOffsetTable.find(StringRef(GUID));
814       if (iter == FuncOffsetTable.end())
815         continue;
816       OffsetsToUse.push_back(iter->second);
817     }
818   }
819 
820   for (auto Offset : OffsetsToUse) {
821     const uint8_t *SavedData = Data;
822     if (std::error_code EC = readFuncProfile(
823             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
824             Offset))
825       return EC;
826     Data = SavedData;
827   }
828   return sampleprof_error::success;
829 }
830 
831 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
832   if (Magic == SPMagic())
833     return sampleprof_error::success;
834   return sampleprof_error::bad_magic;
835 }
836 
837 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
838   if (Magic == SPMagic(SPF_Ext_Binary))
839     return sampleprof_error::success;
840   return sampleprof_error::bad_magic;
841 }
842 
843 std::error_code
844 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
845   if (Magic == SPMagic(SPF_Compact_Binary))
846     return sampleprof_error::success;
847   return sampleprof_error::bad_magic;
848 }
849 
850 std::error_code SampleProfileReaderBinary::readNameTable() {
851   auto Size = readNumber<uint32_t>();
852   if (std::error_code EC = Size.getError())
853     return EC;
854   NameTable.reserve(*Size + NameTable.size());
855   for (uint32_t I = 0; I < *Size; ++I) {
856     auto Name(readString());
857     if (std::error_code EC = Name.getError())
858       return EC;
859     NameTable.push_back(*Name);
860   }
861 
862   return sampleprof_error::success;
863 }
864 
865 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
866   auto Size = readNumber<uint64_t>();
867   if (std::error_code EC = Size.getError())
868     return EC;
869   MD5StringBuf = std::make_unique<std::vector<std::string>>();
870   MD5StringBuf->reserve(*Size);
871   if (FixedLengthMD5) {
872     // Preallocate and initialize NameTable so we can check whether a name
873     // index has been read before by checking whether the element in the
874     // NameTable is empty, meanwhile readStringIndex can do the boundary
875     // check using the size of NameTable.
876     NameTable.resize(*Size + NameTable.size());
877 
878     MD5NameMemStart = Data;
879     Data = Data + (*Size) * sizeof(uint64_t);
880     return sampleprof_error::success;
881   }
882   NameTable.reserve(*Size);
883   for (uint32_t I = 0; I < *Size; ++I) {
884     auto FID = readNumber<uint64_t>();
885     if (std::error_code EC = FID.getError())
886       return EC;
887     MD5StringBuf->push_back(std::to_string(*FID));
888     // NameTable is a vector of StringRef. Here it is pushing back a
889     // StringRef initialized with the last string in MD5stringBuf.
890     NameTable.push_back(MD5StringBuf->back());
891   }
892   return sampleprof_error::success;
893 }
894 
895 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
896   if (IsMD5)
897     return readMD5NameTable();
898   return SampleProfileReaderBinary::readNameTable();
899 }
900 
901 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
902   if (!ProfileIsProbeBased)
903     return sampleprof_error::success;
904   while (Data < End) {
905     auto FName(readStringFromTable());
906     if (std::error_code EC = FName.getError())
907       return EC;
908 
909     auto Checksum = readNumber<uint64_t>();
910     if (std::error_code EC = Checksum.getError())
911       return EC;
912 
913     SampleContext FContext(*FName);
914     // No need to load metadata for profiles that are not loaded in the current
915     // module.
916     if (Profiles.count(FContext))
917       Profiles[FContext].setFunctionHash(*Checksum);
918   }
919 
920   assert(Data == End && "More data is read than expected");
921   return sampleprof_error::success;
922 }
923 
924 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
925   auto Size = readNumber<uint64_t>();
926   if (std::error_code EC = Size.getError())
927     return EC;
928   NameTable.reserve(*Size);
929   for (uint32_t I = 0; I < *Size; ++I) {
930     auto FID = readNumber<uint64_t>();
931     if (std::error_code EC = FID.getError())
932       return EC;
933     NameTable.push_back(std::to_string(*FID));
934   }
935   return sampleprof_error::success;
936 }
937 
938 std::error_code
939 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
940   SecHdrTableEntry Entry;
941   auto Type = readUnencodedNumber<uint64_t>();
942   if (std::error_code EC = Type.getError())
943     return EC;
944   Entry.Type = static_cast<SecType>(*Type);
945 
946   auto Flags = readUnencodedNumber<uint64_t>();
947   if (std::error_code EC = Flags.getError())
948     return EC;
949   Entry.Flags = *Flags;
950 
951   auto Offset = readUnencodedNumber<uint64_t>();
952   if (std::error_code EC = Offset.getError())
953     return EC;
954   Entry.Offset = *Offset;
955 
956   auto Size = readUnencodedNumber<uint64_t>();
957   if (std::error_code EC = Size.getError())
958     return EC;
959   Entry.Size = *Size;
960 
961   Entry.LayoutIndex = Idx;
962   SecHdrTable.push_back(std::move(Entry));
963   return sampleprof_error::success;
964 }
965 
966 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
967   auto EntryNum = readUnencodedNumber<uint64_t>();
968   if (std::error_code EC = EntryNum.getError())
969     return EC;
970 
971   for (uint32_t i = 0; i < (*EntryNum); i++)
972     if (std::error_code EC = readSecHdrTableEntry(i))
973       return EC;
974 
975   return sampleprof_error::success;
976 }
977 
978 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
979   const uint8_t *BufStart =
980       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
981   Data = BufStart;
982   End = BufStart + Buffer->getBufferSize();
983 
984   if (std::error_code EC = readMagicIdent())
985     return EC;
986 
987   if (std::error_code EC = readSecHdrTable())
988     return EC;
989 
990   return sampleprof_error::success;
991 }
992 
993 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
994   uint64_t Size = 0;
995   for (auto &Entry : SecHdrTable) {
996     if (Entry.Type == Type)
997       Size += Entry.Size;
998   }
999   return Size;
1000 }
1001 
1002 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1003   // Sections in SecHdrTable is not necessarily in the same order as
1004   // sections in the profile because section like FuncOffsetTable needs
1005   // to be written after section LBRProfile but needs to be read before
1006   // section LBRProfile, so we cannot simply use the last entry in
1007   // SecHdrTable to calculate the file size.
1008   uint64_t FileSize = 0;
1009   for (auto &Entry : SecHdrTable) {
1010     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1011   }
1012   return FileSize;
1013 }
1014 
1015 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1016   std::string Flags;
1017   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1018     Flags.append("{compressed,");
1019   else
1020     Flags.append("{");
1021 
1022   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1023     Flags.append("flat,");
1024 
1025   switch (Entry.Type) {
1026   case SecNameTable:
1027     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1028       Flags.append("fixlenmd5,");
1029     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1030       Flags.append("md5,");
1031     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1032       Flags.append("uniq,");
1033     break;
1034   case SecProfSummary:
1035     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1036       Flags.append("partial,");
1037     break;
1038   default:
1039     break;
1040   }
1041   char &last = Flags.back();
1042   if (last == ',')
1043     last = '}';
1044   else
1045     Flags.append("}");
1046   return Flags;
1047 }
1048 
1049 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1050   uint64_t TotalSecsSize = 0;
1051   for (auto &Entry : SecHdrTable) {
1052     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1053        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1054        << "\n";
1055     ;
1056     TotalSecsSize += Entry.Size;
1057   }
1058   uint64_t HeaderSize = SecHdrTable.front().Offset;
1059   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1060          "Size of 'header + sections' doesn't match the total size of profile");
1061 
1062   OS << "Header Size: " << HeaderSize << "\n";
1063   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1064   OS << "File Size: " << getFileSize() << "\n";
1065   return true;
1066 }
1067 
1068 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1069   // Read and check the magic identifier.
1070   auto Magic = readNumber<uint64_t>();
1071   if (std::error_code EC = Magic.getError())
1072     return EC;
1073   else if (std::error_code EC = verifySPMagic(*Magic))
1074     return EC;
1075 
1076   // Read the version number.
1077   auto Version = readNumber<uint64_t>();
1078   if (std::error_code EC = Version.getError())
1079     return EC;
1080   else if (*Version != SPVersion())
1081     return sampleprof_error::unsupported_version;
1082 
1083   return sampleprof_error::success;
1084 }
1085 
1086 std::error_code SampleProfileReaderBinary::readHeader() {
1087   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1088   End = Data + Buffer->getBufferSize();
1089 
1090   if (std::error_code EC = readMagicIdent())
1091     return EC;
1092 
1093   if (std::error_code EC = readSummary())
1094     return EC;
1095 
1096   if (std::error_code EC = readNameTable())
1097     return EC;
1098   return sampleprof_error::success;
1099 }
1100 
1101 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1102   SampleProfileReaderBinary::readHeader();
1103   if (std::error_code EC = readFuncOffsetTable())
1104     return EC;
1105   return sampleprof_error::success;
1106 }
1107 
1108 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1109   auto TableOffset = readUnencodedNumber<uint64_t>();
1110   if (std::error_code EC = TableOffset.getError())
1111     return EC;
1112 
1113   const uint8_t *SavedData = Data;
1114   const uint8_t *TableStart =
1115       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1116       *TableOffset;
1117   Data = TableStart;
1118 
1119   auto Size = readNumber<uint64_t>();
1120   if (std::error_code EC = Size.getError())
1121     return EC;
1122 
1123   FuncOffsetTable.reserve(*Size);
1124   for (uint32_t I = 0; I < *Size; ++I) {
1125     auto FName(readStringFromTable());
1126     if (std::error_code EC = FName.getError())
1127       return EC;
1128 
1129     auto Offset = readNumber<uint64_t>();
1130     if (std::error_code EC = Offset.getError())
1131       return EC;
1132 
1133     FuncOffsetTable[*FName] = *Offset;
1134   }
1135   End = TableStart;
1136   Data = SavedData;
1137   return sampleprof_error::success;
1138 }
1139 
1140 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1141   if (!M)
1142     return false;
1143   FuncsToUse.clear();
1144   for (auto &F : *M)
1145     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1146   return true;
1147 }
1148 
1149 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1150     std::vector<ProfileSummaryEntry> &Entries) {
1151   auto Cutoff = readNumber<uint64_t>();
1152   if (std::error_code EC = Cutoff.getError())
1153     return EC;
1154 
1155   auto MinBlockCount = readNumber<uint64_t>();
1156   if (std::error_code EC = MinBlockCount.getError())
1157     return EC;
1158 
1159   auto NumBlocks = readNumber<uint64_t>();
1160   if (std::error_code EC = NumBlocks.getError())
1161     return EC;
1162 
1163   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1164   return sampleprof_error::success;
1165 }
1166 
1167 std::error_code SampleProfileReaderBinary::readSummary() {
1168   auto TotalCount = readNumber<uint64_t>();
1169   if (std::error_code EC = TotalCount.getError())
1170     return EC;
1171 
1172   auto MaxBlockCount = readNumber<uint64_t>();
1173   if (std::error_code EC = MaxBlockCount.getError())
1174     return EC;
1175 
1176   auto MaxFunctionCount = readNumber<uint64_t>();
1177   if (std::error_code EC = MaxFunctionCount.getError())
1178     return EC;
1179 
1180   auto NumBlocks = readNumber<uint64_t>();
1181   if (std::error_code EC = NumBlocks.getError())
1182     return EC;
1183 
1184   auto NumFunctions = readNumber<uint64_t>();
1185   if (std::error_code EC = NumFunctions.getError())
1186     return EC;
1187 
1188   auto NumSummaryEntries = readNumber<uint64_t>();
1189   if (std::error_code EC = NumSummaryEntries.getError())
1190     return EC;
1191 
1192   std::vector<ProfileSummaryEntry> Entries;
1193   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1194     std::error_code EC = readSummaryEntry(Entries);
1195     if (EC != sampleprof_error::success)
1196       return EC;
1197   }
1198   Summary = std::make_unique<ProfileSummary>(
1199       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1200       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1201 
1202   return sampleprof_error::success;
1203 }
1204 
1205 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1206   const uint8_t *Data =
1207       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1208   uint64_t Magic = decodeULEB128(Data);
1209   return Magic == SPMagic();
1210 }
1211 
1212 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1213   const uint8_t *Data =
1214       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1215   uint64_t Magic = decodeULEB128(Data);
1216   return Magic == SPMagic(SPF_Ext_Binary);
1217 }
1218 
1219 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1220   const uint8_t *Data =
1221       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1222   uint64_t Magic = decodeULEB128(Data);
1223   return Magic == SPMagic(SPF_Compact_Binary);
1224 }
1225 
1226 std::error_code SampleProfileReaderGCC::skipNextWord() {
1227   uint32_t dummy;
1228   if (!GcovBuffer.readInt(dummy))
1229     return sampleprof_error::truncated;
1230   return sampleprof_error::success;
1231 }
1232 
1233 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1234   if (sizeof(T) <= sizeof(uint32_t)) {
1235     uint32_t Val;
1236     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1237       return static_cast<T>(Val);
1238   } else if (sizeof(T) <= sizeof(uint64_t)) {
1239     uint64_t Val;
1240     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1241       return static_cast<T>(Val);
1242   }
1243 
1244   std::error_code EC = sampleprof_error::malformed;
1245   reportError(0, EC.message());
1246   return EC;
1247 }
1248 
1249 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1250   StringRef Str;
1251   if (!GcovBuffer.readString(Str))
1252     return sampleprof_error::truncated;
1253   return Str;
1254 }
1255 
1256 std::error_code SampleProfileReaderGCC::readHeader() {
1257   // Read the magic identifier.
1258   if (!GcovBuffer.readGCDAFormat())
1259     return sampleprof_error::unrecognized_format;
1260 
1261   // Read the version number. Note - the GCC reader does not validate this
1262   // version, but the profile creator generates v704.
1263   GCOV::GCOVVersion version;
1264   if (!GcovBuffer.readGCOVVersion(version))
1265     return sampleprof_error::unrecognized_format;
1266 
1267   if (version != GCOV::V407)
1268     return sampleprof_error::unsupported_version;
1269 
1270   // Skip the empty integer.
1271   if (std::error_code EC = skipNextWord())
1272     return EC;
1273 
1274   return sampleprof_error::success;
1275 }
1276 
1277 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1278   uint32_t Tag;
1279   if (!GcovBuffer.readInt(Tag))
1280     return sampleprof_error::truncated;
1281 
1282   if (Tag != Expected)
1283     return sampleprof_error::malformed;
1284 
1285   if (std::error_code EC = skipNextWord())
1286     return EC;
1287 
1288   return sampleprof_error::success;
1289 }
1290 
1291 std::error_code SampleProfileReaderGCC::readNameTable() {
1292   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1293     return EC;
1294 
1295   uint32_t Size;
1296   if (!GcovBuffer.readInt(Size))
1297     return sampleprof_error::truncated;
1298 
1299   for (uint32_t I = 0; I < Size; ++I) {
1300     StringRef Str;
1301     if (!GcovBuffer.readString(Str))
1302       return sampleprof_error::truncated;
1303     Names.push_back(std::string(Str));
1304   }
1305 
1306   return sampleprof_error::success;
1307 }
1308 
1309 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1310   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1311     return EC;
1312 
1313   uint32_t NumFunctions;
1314   if (!GcovBuffer.readInt(NumFunctions))
1315     return sampleprof_error::truncated;
1316 
1317   InlineCallStack Stack;
1318   for (uint32_t I = 0; I < NumFunctions; ++I)
1319     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1320       return EC;
1321 
1322   computeSummary();
1323   return sampleprof_error::success;
1324 }
1325 
1326 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1327     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1328   uint64_t HeadCount = 0;
1329   if (InlineStack.size() == 0)
1330     if (!GcovBuffer.readInt64(HeadCount))
1331       return sampleprof_error::truncated;
1332 
1333   uint32_t NameIdx;
1334   if (!GcovBuffer.readInt(NameIdx))
1335     return sampleprof_error::truncated;
1336 
1337   StringRef Name(Names[NameIdx]);
1338 
1339   uint32_t NumPosCounts;
1340   if (!GcovBuffer.readInt(NumPosCounts))
1341     return sampleprof_error::truncated;
1342 
1343   uint32_t NumCallsites;
1344   if (!GcovBuffer.readInt(NumCallsites))
1345     return sampleprof_error::truncated;
1346 
1347   FunctionSamples *FProfile = nullptr;
1348   if (InlineStack.size() == 0) {
1349     // If this is a top function that we have already processed, do not
1350     // update its profile again.  This happens in the presence of
1351     // function aliases.  Since these aliases share the same function
1352     // body, there will be identical replicated profiles for the
1353     // original function.  In this case, we simply not bother updating
1354     // the profile of the original function.
1355     FProfile = &Profiles[Name];
1356     FProfile->addHeadSamples(HeadCount);
1357     if (FProfile->getTotalSamples() > 0)
1358       Update = false;
1359   } else {
1360     // Otherwise, we are reading an inlined instance. The top of the
1361     // inline stack contains the profile of the caller. Insert this
1362     // callee in the caller's CallsiteMap.
1363     FunctionSamples *CallerProfile = InlineStack.front();
1364     uint32_t LineOffset = Offset >> 16;
1365     uint32_t Discriminator = Offset & 0xffff;
1366     FProfile = &CallerProfile->functionSamplesAt(
1367         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1368   }
1369   FProfile->setName(Name);
1370 
1371   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1372     uint32_t Offset;
1373     if (!GcovBuffer.readInt(Offset))
1374       return sampleprof_error::truncated;
1375 
1376     uint32_t NumTargets;
1377     if (!GcovBuffer.readInt(NumTargets))
1378       return sampleprof_error::truncated;
1379 
1380     uint64_t Count;
1381     if (!GcovBuffer.readInt64(Count))
1382       return sampleprof_error::truncated;
1383 
1384     // The line location is encoded in the offset as:
1385     //   high 16 bits: line offset to the start of the function.
1386     //   low 16 bits: discriminator.
1387     uint32_t LineOffset = Offset >> 16;
1388     uint32_t Discriminator = Offset & 0xffff;
1389 
1390     InlineCallStack NewStack;
1391     NewStack.push_back(FProfile);
1392     llvm::append_range(NewStack, InlineStack);
1393     if (Update) {
1394       // Walk up the inline stack, adding the samples on this line to
1395       // the total sample count of the callers in the chain.
1396       for (auto CallerProfile : NewStack)
1397         CallerProfile->addTotalSamples(Count);
1398 
1399       // Update the body samples for the current profile.
1400       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1401     }
1402 
1403     // Process the list of functions called at an indirect call site.
1404     // These are all the targets that a function pointer (or virtual
1405     // function) resolved at runtime.
1406     for (uint32_t J = 0; J < NumTargets; J++) {
1407       uint32_t HistVal;
1408       if (!GcovBuffer.readInt(HistVal))
1409         return sampleprof_error::truncated;
1410 
1411       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1412         return sampleprof_error::malformed;
1413 
1414       uint64_t TargetIdx;
1415       if (!GcovBuffer.readInt64(TargetIdx))
1416         return sampleprof_error::truncated;
1417       StringRef TargetName(Names[TargetIdx]);
1418 
1419       uint64_t TargetCount;
1420       if (!GcovBuffer.readInt64(TargetCount))
1421         return sampleprof_error::truncated;
1422 
1423       if (Update)
1424         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1425                                          TargetName, TargetCount);
1426     }
1427   }
1428 
1429   // Process all the inlined callers into the current function. These
1430   // are all the callsites that were inlined into this function.
1431   for (uint32_t I = 0; I < NumCallsites; I++) {
1432     // The offset is encoded as:
1433     //   high 16 bits: line offset to the start of the function.
1434     //   low 16 bits: discriminator.
1435     uint32_t Offset;
1436     if (!GcovBuffer.readInt(Offset))
1437       return sampleprof_error::truncated;
1438     InlineCallStack NewStack;
1439     NewStack.push_back(FProfile);
1440     llvm::append_range(NewStack, InlineStack);
1441     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1442       return EC;
1443   }
1444 
1445   return sampleprof_error::success;
1446 }
1447 
1448 /// Read a GCC AutoFDO profile.
1449 ///
1450 /// This format is generated by the Linux Perf conversion tool at
1451 /// https://github.com/google/autofdo.
1452 std::error_code SampleProfileReaderGCC::readImpl() {
1453   // Read the string table.
1454   if (std::error_code EC = readNameTable())
1455     return EC;
1456 
1457   // Read the source profile.
1458   if (std::error_code EC = readFunctionProfiles())
1459     return EC;
1460 
1461   return sampleprof_error::success;
1462 }
1463 
1464 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1465   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1466   return Magic == "adcg*704";
1467 }
1468 
1469 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1470   // If the reader uses MD5 to represent string, we can't remap it because
1471   // we don't know what the original function names were.
1472   if (Reader.useMD5()) {
1473     Ctx.diagnose(DiagnosticInfoSampleProfile(
1474         Reader.getBuffer()->getBufferIdentifier(),
1475         "Profile data remapping cannot be applied to profile data "
1476         "in compact format (original mangled names are not available).",
1477         DS_Warning));
1478     return;
1479   }
1480 
1481   // CSSPGO-TODO: Remapper is not yet supported.
1482   // We will need to remap the entire context string.
1483   assert(Remappings && "should be initialized while creating remapper");
1484   for (auto &Sample : Reader.getProfiles()) {
1485     DenseSet<StringRef> NamesInSample;
1486     Sample.second.findAllNames(NamesInSample);
1487     for (auto &Name : NamesInSample)
1488       if (auto Key = Remappings->insert(Name))
1489         NameMap.insert({Key, Name});
1490   }
1491 
1492   RemappingApplied = true;
1493 }
1494 
1495 Optional<StringRef>
1496 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1497   if (auto Key = Remappings->lookup(Fname))
1498     return NameMap.lookup(Key);
1499   return None;
1500 }
1501 
1502 /// Prepare a memory buffer for the contents of \p Filename.
1503 ///
1504 /// \returns an error code indicating the status of the buffer.
1505 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1506 setupMemoryBuffer(const Twine &Filename) {
1507   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
1508   if (std::error_code EC = BufferOrErr.getError())
1509     return EC;
1510   auto Buffer = std::move(BufferOrErr.get());
1511 
1512   // Sanity check the file.
1513   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1514     return sampleprof_error::too_large;
1515 
1516   return std::move(Buffer);
1517 }
1518 
1519 /// Create a sample profile reader based on the format of the input file.
1520 ///
1521 /// \param Filename The file to open.
1522 ///
1523 /// \param C The LLVM context to use to emit diagnostics.
1524 ///
1525 /// \param RemapFilename The file used for profile remapping.
1526 ///
1527 /// \returns an error code indicating the status of the created reader.
1528 ErrorOr<std::unique_ptr<SampleProfileReader>>
1529 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1530                             const std::string RemapFilename) {
1531   auto BufferOrError = setupMemoryBuffer(Filename);
1532   if (std::error_code EC = BufferOrError.getError())
1533     return EC;
1534   return create(BufferOrError.get(), C, RemapFilename);
1535 }
1536 
1537 /// Create a sample profile remapper from the given input, to remap the
1538 /// function names in the given profile data.
1539 ///
1540 /// \param Filename The file to open.
1541 ///
1542 /// \param Reader The profile reader the remapper is going to be applied to.
1543 ///
1544 /// \param C The LLVM context to use to emit diagnostics.
1545 ///
1546 /// \returns an error code indicating the status of the created reader.
1547 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1548 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1549                                            SampleProfileReader &Reader,
1550                                            LLVMContext &C) {
1551   auto BufferOrError = setupMemoryBuffer(Filename);
1552   if (std::error_code EC = BufferOrError.getError())
1553     return EC;
1554   return create(BufferOrError.get(), Reader, C);
1555 }
1556 
1557 /// Create a sample profile remapper from the given input, to remap the
1558 /// function names in the given profile data.
1559 ///
1560 /// \param B The memory buffer to create the reader from (assumes ownership).
1561 ///
1562 /// \param C The LLVM context to use to emit diagnostics.
1563 ///
1564 /// \param Reader The profile reader the remapper is going to be applied to.
1565 ///
1566 /// \returns an error code indicating the status of the created reader.
1567 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1568 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1569                                            SampleProfileReader &Reader,
1570                                            LLVMContext &C) {
1571   auto Remappings = std::make_unique<SymbolRemappingReader>();
1572   if (Error E = Remappings->read(*B.get())) {
1573     handleAllErrors(
1574         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1575           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1576                                                  ParseError.getLineNum(),
1577                                                  ParseError.getMessage()));
1578         });
1579     return sampleprof_error::malformed;
1580   }
1581 
1582   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1583       std::move(B), std::move(Remappings), Reader);
1584 }
1585 
1586 /// Create a sample profile reader based on the format of the input data.
1587 ///
1588 /// \param B The memory buffer to create the reader from (assumes ownership).
1589 ///
1590 /// \param C The LLVM context to use to emit diagnostics.
1591 ///
1592 /// \param RemapFilename The file used for profile remapping.
1593 ///
1594 /// \returns an error code indicating the status of the created reader.
1595 ErrorOr<std::unique_ptr<SampleProfileReader>>
1596 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1597                             const std::string RemapFilename) {
1598   std::unique_ptr<SampleProfileReader> Reader;
1599   if (SampleProfileReaderRawBinary::hasFormat(*B))
1600     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1601   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1602     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1603   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1604     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1605   else if (SampleProfileReaderGCC::hasFormat(*B))
1606     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1607   else if (SampleProfileReaderText::hasFormat(*B))
1608     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1609   else
1610     return sampleprof_error::unrecognized_format;
1611 
1612   if (!RemapFilename.empty()) {
1613     auto ReaderOrErr =
1614         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1615     if (std::error_code EC = ReaderOrErr.getError()) {
1616       std::string Msg = "Could not create remapper: " + EC.message();
1617       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1618       return EC;
1619     }
1620     Reader->Remapper = std::move(ReaderOrErr.get());
1621   }
1622 
1623   FunctionSamples::Format = Reader->getFormat();
1624   if (std::error_code EC = Reader->readHeader()) {
1625     return EC;
1626   }
1627 
1628   return std::move(Reader);
1629 }
1630 
1631 // For text and GCC file formats, we compute the summary after reading the
1632 // profile. Binary format has the profile summary in its header.
1633 void SampleProfileReader::computeSummary() {
1634   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1635   Summary = Builder.computeSummaryForProfiles(Profiles);
1636 }
1637