1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfReader.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/IR/ProfileSummary.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/ErrorOr.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/SwapByteOrder.h"
28 #include "llvm/Support/SymbolRemappingReader.h"
29 #include <algorithm>
30 #include <cctype>
31 #include <cstddef>
32 #include <cstdint>
33 #include <limits>
34 #include <memory>
35 #include <system_error>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 // Extracts the variant information from the top 8 bits in the version and
42 // returns an enum specifying the variants present.
43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
44   InstrProfKind ProfileKind = InstrProfKind::Unknown;
45   if (Version & VARIANT_MASK_IR_PROF) {
46     ProfileKind |= InstrProfKind::IR;
47   }
48   if (Version & VARIANT_MASK_CSIR_PROF) {
49     ProfileKind |= InstrProfKind::CS;
50   }
51   if (Version & VARIANT_MASK_INSTR_ENTRY) {
52     ProfileKind |= InstrProfKind::BB;
53   }
54   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
55     ProfileKind |= InstrProfKind::SingleByteCoverage;
56   }
57   if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
58     ProfileKind |= InstrProfKind::FunctionEntryOnly;
59   }
60   return ProfileKind;
61 }
62 
63 static Expected<std::unique_ptr<MemoryBuffer>>
64 setupMemoryBuffer(const Twine &Path) {
65   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
66       MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
67   if (std::error_code EC = BufferOrErr.getError())
68     return errorCodeToError(EC);
69   return std::move(BufferOrErr.get());
70 }
71 
72 static Error initializeReader(InstrProfReader &Reader) {
73   return Reader.readHeader();
74 }
75 
76 Expected<std::unique_ptr<InstrProfReader>>
77 InstrProfReader::create(const Twine &Path,
78                         const InstrProfCorrelator *Correlator) {
79   // Set up the buffer to read.
80   auto BufferOrError = setupMemoryBuffer(Path);
81   if (Error E = BufferOrError.takeError())
82     return std::move(E);
83   return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
84 }
85 
86 Expected<std::unique_ptr<InstrProfReader>>
87 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
88                         const InstrProfCorrelator *Correlator) {
89   // Sanity check the buffer.
90   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
91     return make_error<InstrProfError>(instrprof_error::too_large);
92 
93   if (Buffer->getBufferSize() == 0)
94     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
95 
96   std::unique_ptr<InstrProfReader> Result;
97   // Create the reader.
98   if (IndexedInstrProfReader::hasFormat(*Buffer))
99     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
100   else if (RawInstrProfReader64::hasFormat(*Buffer))
101     Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
102   else if (RawInstrProfReader32::hasFormat(*Buffer))
103     Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
104   else if (TextInstrProfReader::hasFormat(*Buffer))
105     Result.reset(new TextInstrProfReader(std::move(Buffer)));
106   else
107     return make_error<InstrProfError>(instrprof_error::unrecognized_format);
108 
109   // Initialize the reader and return the result.
110   if (Error E = initializeReader(*Result))
111     return std::move(E);
112 
113   return std::move(Result);
114 }
115 
116 Expected<std::unique_ptr<IndexedInstrProfReader>>
117 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
118   // Set up the buffer to read.
119   auto BufferOrError = setupMemoryBuffer(Path);
120   if (Error E = BufferOrError.takeError())
121     return std::move(E);
122 
123   // Set up the remapping buffer if requested.
124   std::unique_ptr<MemoryBuffer> RemappingBuffer;
125   std::string RemappingPathStr = RemappingPath.str();
126   if (!RemappingPathStr.empty()) {
127     auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
128     if (Error E = RemappingBufferOrError.takeError())
129       return std::move(E);
130     RemappingBuffer = std::move(RemappingBufferOrError.get());
131   }
132 
133   return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
134                                         std::move(RemappingBuffer));
135 }
136 
137 Expected<std::unique_ptr<IndexedInstrProfReader>>
138 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
139                                std::unique_ptr<MemoryBuffer> RemappingBuffer) {
140   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
141     return make_error<InstrProfError>(instrprof_error::too_large);
142 
143   // Create the reader.
144   if (!IndexedInstrProfReader::hasFormat(*Buffer))
145     return make_error<InstrProfError>(instrprof_error::bad_magic);
146   auto Result = std::make_unique<IndexedInstrProfReader>(
147       std::move(Buffer), std::move(RemappingBuffer));
148 
149   // Initialize the reader and return the result.
150   if (Error E = initializeReader(*Result))
151     return std::move(E);
152 
153   return std::move(Result);
154 }
155 
156 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
157   // Verify that this really looks like plain ASCII text by checking a
158   // 'reasonable' number of characters (up to profile magic size).
159   size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
160   StringRef buffer = Buffer.getBufferStart();
161   return count == 0 ||
162          std::all_of(buffer.begin(), buffer.begin() + count,
163                      [](char c) { return isPrint(c) || isSpace(c); });
164 }
165 
166 // Read the profile variant flag from the header: ":FE" means this is a FE
167 // generated profile. ":IR" means this is an IR level profile. Other strings
168 // with a leading ':' will be reported an error format.
169 Error TextInstrProfReader::readHeader() {
170   Symtab.reset(new InstrProfSymtab());
171 
172   while (Line->startswith(":")) {
173     StringRef Str = Line->substr(1);
174     if (Str.equals_insensitive("ir"))
175       ProfileKind |= InstrProfKind::IR;
176     else if (Str.equals_insensitive("fe"))
177       ProfileKind |= InstrProfKind::FE;
178     else if (Str.equals_insensitive("csir")) {
179       ProfileKind |= InstrProfKind::IR;
180       ProfileKind |= InstrProfKind::CS;
181     } else if (Str.equals_insensitive("entry_first"))
182       ProfileKind |= InstrProfKind::BB;
183     else if (Str.equals_insensitive("not_entry_first"))
184       ProfileKind &= ~InstrProfKind::BB;
185     else
186       return error(instrprof_error::bad_header);
187     ++Line;
188   }
189   return success();
190 }
191 
192 Error
193 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
194 
195 #define CHECK_LINE_END(Line)                                                   \
196   if (Line.is_at_end())                                                        \
197     return error(instrprof_error::truncated);
198 #define READ_NUM(Str, Dst)                                                     \
199   if ((Str).getAsInteger(10, (Dst)))                                           \
200     return error(instrprof_error::malformed);
201 #define VP_READ_ADVANCE(Val)                                                   \
202   CHECK_LINE_END(Line);                                                        \
203   uint32_t Val;                                                                \
204   READ_NUM((*Line), (Val));                                                    \
205   Line++;
206 
207   if (Line.is_at_end())
208     return success();
209 
210   uint32_t NumValueKinds;
211   if (Line->getAsInteger(10, NumValueKinds)) {
212     // No value profile data
213     return success();
214   }
215   if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
216     return error(instrprof_error::malformed,
217                  "number of value kinds is invalid");
218   Line++;
219 
220   for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
221     VP_READ_ADVANCE(ValueKind);
222     if (ValueKind > IPVK_Last)
223       return error(instrprof_error::malformed, "value kind is invalid");
224     ;
225     VP_READ_ADVANCE(NumValueSites);
226     if (!NumValueSites)
227       continue;
228 
229     Record.reserveSites(VK, NumValueSites);
230     for (uint32_t S = 0; S < NumValueSites; S++) {
231       VP_READ_ADVANCE(NumValueData);
232 
233       std::vector<InstrProfValueData> CurrentValues;
234       for (uint32_t V = 0; V < NumValueData; V++) {
235         CHECK_LINE_END(Line);
236         std::pair<StringRef, StringRef> VD = Line->rsplit(':');
237         uint64_t TakenCount, Value;
238         if (ValueKind == IPVK_IndirectCallTarget) {
239           if (InstrProfSymtab::isExternalSymbol(VD.first)) {
240             Value = 0;
241           } else {
242             if (Error E = Symtab->addFuncName(VD.first))
243               return E;
244             Value = IndexedInstrProf::ComputeHash(VD.first);
245           }
246         } else {
247           READ_NUM(VD.first, Value);
248         }
249         READ_NUM(VD.second, TakenCount);
250         CurrentValues.push_back({Value, TakenCount});
251         Line++;
252       }
253       Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
254                           nullptr);
255     }
256   }
257   return success();
258 
259 #undef CHECK_LINE_END
260 #undef READ_NUM
261 #undef VP_READ_ADVANCE
262 }
263 
264 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
265   // Skip empty lines and comments.
266   while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
267     ++Line;
268   // If we hit EOF while looking for a name, we're done.
269   if (Line.is_at_end()) {
270     return error(instrprof_error::eof);
271   }
272 
273   // Read the function name.
274   Record.Name = *Line++;
275   if (Error E = Symtab->addFuncName(Record.Name))
276     return error(std::move(E));
277 
278   // Read the function hash.
279   if (Line.is_at_end())
280     return error(instrprof_error::truncated);
281   if ((Line++)->getAsInteger(0, Record.Hash))
282     return error(instrprof_error::malformed,
283                  "function hash is not a valid integer");
284 
285   // Read the number of counters.
286   uint64_t NumCounters;
287   if (Line.is_at_end())
288     return error(instrprof_error::truncated);
289   if ((Line++)->getAsInteger(10, NumCounters))
290     return error(instrprof_error::malformed,
291                  "number of counters is not a valid integer");
292   if (NumCounters == 0)
293     return error(instrprof_error::malformed, "number of counters is zero");
294 
295   // Read each counter and fill our internal storage with the values.
296   Record.Clear();
297   Record.Counts.reserve(NumCounters);
298   for (uint64_t I = 0; I < NumCounters; ++I) {
299     if (Line.is_at_end())
300       return error(instrprof_error::truncated);
301     uint64_t Count;
302     if ((Line++)->getAsInteger(10, Count))
303       return error(instrprof_error::malformed, "count is invalid");
304     Record.Counts.push_back(Count);
305   }
306 
307   // Check if value profile data exists and read it if so.
308   if (Error E = readValueProfileData(Record))
309     return error(std::move(E));
310 
311   return success();
312 }
313 
314 template <class IntPtrT>
315 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
316   return getProfileKindFromVersion(Version);
317 }
318 
319 template <class IntPtrT>
320 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
321   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
322     return false;
323   uint64_t Magic =
324     *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
325   return RawInstrProf::getMagic<IntPtrT>() == Magic ||
326          sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
327 }
328 
329 template <class IntPtrT>
330 Error RawInstrProfReader<IntPtrT>::readHeader() {
331   if (!hasFormat(*DataBuffer))
332     return error(instrprof_error::bad_magic);
333   if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
334     return error(instrprof_error::bad_header);
335   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
336       DataBuffer->getBufferStart());
337   ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
338   return readHeader(*Header);
339 }
340 
341 template <class IntPtrT>
342 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
343   const char *End = DataBuffer->getBufferEnd();
344   // Skip zero padding between profiles.
345   while (CurrentPos != End && *CurrentPos == 0)
346     ++CurrentPos;
347   // If there's nothing left, we're done.
348   if (CurrentPos == End)
349     return make_error<InstrProfError>(instrprof_error::eof);
350   // If there isn't enough space for another header, this is probably just
351   // garbage at the end of the file.
352   if (CurrentPos + sizeof(RawInstrProf::Header) > End)
353     return make_error<InstrProfError>(instrprof_error::malformed,
354                                       "not enough space for another header");
355   // The writer ensures each profile is padded to start at an aligned address.
356   if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
357     return make_error<InstrProfError>(instrprof_error::malformed,
358                                       "insufficient padding");
359   // The magic should have the same byte order as in the previous header.
360   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
361   if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
362     return make_error<InstrProfError>(instrprof_error::bad_magic);
363 
364   // There's another profile to read, so we need to process the header.
365   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
366   return readHeader(*Header);
367 }
368 
369 template <class IntPtrT>
370 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
371   if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
372     return error(std::move(E));
373   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
374     const IntPtrT FPtr = swap(I->FunctionPointer);
375     if (!FPtr)
376       continue;
377     Symtab.mapAddress(FPtr, I->NameRef);
378   }
379   return success();
380 }
381 
382 template <class IntPtrT>
383 Error RawInstrProfReader<IntPtrT>::readHeader(
384     const RawInstrProf::Header &Header) {
385   Version = swap(Header.Version);
386   if (GET_VERSION(Version) != RawInstrProf::Version)
387     return error(instrprof_error::unsupported_version);
388   if (useDebugInfoCorrelate() && !Correlator)
389     return error(instrprof_error::missing_debug_info_for_correlation);
390   if (!useDebugInfoCorrelate() && Correlator)
391     return error(instrprof_error::unexpected_debug_info_for_correlation);
392 
393   BinaryIdsSize = swap(Header.BinaryIdsSize);
394   if (BinaryIdsSize % sizeof(uint64_t))
395     return error(instrprof_error::bad_header);
396 
397   CountersDelta = swap(Header.CountersDelta);
398   NamesDelta = swap(Header.NamesDelta);
399   auto NumData = swap(Header.DataSize);
400   auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
401   auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize();
402   auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
403   auto NamesSize = swap(Header.NamesSize);
404   ValueKindLast = swap(Header.ValueKindLast);
405 
406   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
407   auto PaddingSize = getNumPaddingBytes(NamesSize);
408 
409   // Profile data starts after profile header and binary ids if exist.
410   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
411   ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
412   ptrdiff_t NamesOffset =
413       CountersOffset + CountersSize + PaddingBytesAfterCounters;
414   ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
415 
416   auto *Start = reinterpret_cast<const char *>(&Header);
417   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
418     return error(instrprof_error::bad_header);
419 
420   if (Correlator) {
421     // These sizes in the raw file are zero because we constructed them in the
422     // Correlator.
423     assert(DataSize == 0 && NamesSize == 0);
424     assert(CountersDelta == 0 && NamesDelta == 0);
425     Data = Correlator->getDataPointer();
426     DataEnd = Data + Correlator->getDataSize();
427     NamesStart = Correlator->getNamesPointer();
428     NamesEnd = NamesStart + Correlator->getNamesSize();
429   } else {
430     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
431         Start + DataOffset);
432     DataEnd = Data + NumData;
433     NamesStart = Start + NamesOffset;
434     NamesEnd = NamesStart + NamesSize;
435   }
436 
437   // Binary ids start just after the header.
438   BinaryIdsStart =
439       reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
440   CountersStart = Start + CountersOffset;
441   CountersEnd = CountersStart + CountersSize;
442   ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
443 
444   const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
445   if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
446     return error(instrprof_error::bad_header);
447 
448   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
449   if (Error E = createSymtab(*NewSymtab.get()))
450     return E;
451 
452   Symtab = std::move(NewSymtab);
453   return success();
454 }
455 
456 template <class IntPtrT>
457 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
458   Record.Name = getName(Data->NameRef);
459   return success();
460 }
461 
462 template <class IntPtrT>
463 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
464   Record.Hash = swap(Data->FuncHash);
465   return success();
466 }
467 
468 template <class IntPtrT>
469 Error RawInstrProfReader<IntPtrT>::readRawCounts(
470     InstrProfRecord &Record) {
471   uint32_t NumCounters = swap(Data->NumCounters);
472   if (NumCounters == 0)
473     return error(instrprof_error::malformed, "number of counters is zero");
474 
475   ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
476   if (CounterBaseOffset < 0)
477     return error(
478         instrprof_error::malformed,
479         ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
480 
481   if (CounterBaseOffset >= CountersEnd - CountersStart)
482     return error(instrprof_error::malformed,
483                  ("counter offset " + Twine(CounterBaseOffset) +
484                   " is greater than the maximum counter offset " +
485                   Twine(CountersEnd - CountersStart - 1))
486                      .str());
487 
488   uint64_t MaxNumCounters =
489       (CountersEnd - (CountersStart + CounterBaseOffset)) /
490       getCounterTypeSize();
491   if (NumCounters > MaxNumCounters)
492     return error(instrprof_error::malformed,
493                  ("number of counters " + Twine(NumCounters) +
494                   " is greater than the maximum number of counters " +
495                   Twine(MaxNumCounters))
496                      .str());
497 
498   Record.Counts.clear();
499   Record.Counts.reserve(NumCounters);
500   for (uint32_t I = 0; I < NumCounters; I++) {
501     const char *Ptr =
502         CountersStart + CounterBaseOffset + I * getCounterTypeSize();
503     if (hasSingleByteCoverage()) {
504       // A value of zero signifies the block is covered.
505       Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
506     } else {
507       const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
508       Record.Counts.push_back(swap(*CounterValue));
509     }
510   }
511 
512   return success();
513 }
514 
515 template <class IntPtrT>
516 Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
517     InstrProfRecord &Record) {
518   Record.clearValueData();
519   CurValueDataSize = 0;
520   // Need to match the logic in value profile dumper code in compiler-rt:
521   uint32_t NumValueKinds = 0;
522   for (uint32_t I = 0; I < IPVK_Last + 1; I++)
523     NumValueKinds += (Data->NumValueSites[I] != 0);
524 
525   if (!NumValueKinds)
526     return success();
527 
528   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
529       ValueProfData::getValueProfData(
530           ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
531           getDataEndianness());
532 
533   if (Error E = VDataPtrOrErr.takeError())
534     return E;
535 
536   // Note that besides deserialization, this also performs the conversion for
537   // indirect call targets.  The function pointers from the raw profile are
538   // remapped into function name hashes.
539   VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
540   CurValueDataSize = VDataPtrOrErr.get()->getSize();
541   return success();
542 }
543 
544 template <class IntPtrT>
545 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
546   if (atEnd())
547     // At this point, ValueDataStart field points to the next header.
548     if (Error E = readNextHeader(getNextHeaderPos()))
549       return error(std::move(E));
550 
551   // Read name ad set it in Record.
552   if (Error E = readName(Record))
553     return error(std::move(E));
554 
555   // Read FuncHash and set it in Record.
556   if (Error E = readFuncHash(Record))
557     return error(std::move(E));
558 
559   // Read raw counts and set Record.
560   if (Error E = readRawCounts(Record))
561     return error(std::move(E));
562 
563   // Read value data and set Record.
564   if (Error E = readValueProfilingData(Record))
565     return error(std::move(E));
566 
567   // Iterate.
568   advanceData();
569   return success();
570 }
571 
572 static size_t RoundUp(size_t size, size_t align) {
573   return (size + align - 1) & ~(align - 1);
574 }
575 
576 template <class IntPtrT>
577 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
578   if (BinaryIdsSize == 0)
579     return success();
580 
581   OS << "Binary IDs: \n";
582   const uint8_t *BI = BinaryIdsStart;
583   const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
584   while (BI < BIEnd) {
585     size_t Remaining = BIEnd - BI;
586 
587     // There should be enough left to read the binary ID size field.
588     if (Remaining < sizeof(uint64_t))
589       return make_error<InstrProfError>(
590           instrprof_error::malformed,
591           "not enough data to read binary id length");
592 
593     uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
594 
595     // There should be enough left to read the binary ID size field, and the
596     // binary ID.
597     if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
598       return make_error<InstrProfError>(
599           instrprof_error::malformed, "not enough data to read binary id data");
600 
601     // Increment by binary id length data type size.
602     BI += sizeof(BinaryIdLen);
603     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
604       return make_error<InstrProfError>(
605           instrprof_error::malformed,
606           "binary id that is read is bigger than buffer size");
607 
608     for (uint64_t I = 0; I < BinaryIdLen; I++)
609       OS << format("%02x", BI[I]);
610     OS << "\n";
611 
612     // Increment by binary id data length, rounded to the next 8 bytes. This
613     // accounts for the zero-padding after each build ID.
614     BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
615     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
616       return make_error<InstrProfError>(instrprof_error::malformed);
617   }
618 
619   return success();
620 }
621 
622 namespace llvm {
623 
624 template class RawInstrProfReader<uint32_t>;
625 template class RawInstrProfReader<uint64_t>;
626 
627 } // end namespace llvm
628 
629 InstrProfLookupTrait::hash_value_type
630 InstrProfLookupTrait::ComputeHash(StringRef K) {
631   return IndexedInstrProf::ComputeHash(HashType, K);
632 }
633 
634 using data_type = InstrProfLookupTrait::data_type;
635 using offset_type = InstrProfLookupTrait::offset_type;
636 
637 bool InstrProfLookupTrait::readValueProfilingData(
638     const unsigned char *&D, const unsigned char *const End) {
639   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
640       ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
641 
642   if (VDataPtrOrErr.takeError())
643     return false;
644 
645   VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
646   D += VDataPtrOrErr.get()->TotalSize;
647 
648   return true;
649 }
650 
651 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
652                                          offset_type N) {
653   using namespace support;
654 
655   // Check if the data is corrupt. If so, don't try to read it.
656   if (N % sizeof(uint64_t))
657     return data_type();
658 
659   DataBuffer.clear();
660   std::vector<uint64_t> CounterBuffer;
661 
662   const unsigned char *End = D + N;
663   while (D < End) {
664     // Read hash.
665     if (D + sizeof(uint64_t) >= End)
666       return data_type();
667     uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
668 
669     // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
670     uint64_t CountsSize = N / sizeof(uint64_t) - 1;
671     // If format version is different then read the number of counters.
672     if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
673       if (D + sizeof(uint64_t) > End)
674         return data_type();
675       CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
676     }
677     // Read counter values.
678     if (D + CountsSize * sizeof(uint64_t) > End)
679       return data_type();
680 
681     CounterBuffer.clear();
682     CounterBuffer.reserve(CountsSize);
683     for (uint64_t J = 0; J < CountsSize; ++J)
684       CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
685 
686     DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
687 
688     // Read value profiling data.
689     if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
690         !readValueProfilingData(D, End)) {
691       DataBuffer.clear();
692       return data_type();
693     }
694   }
695   return DataBuffer;
696 }
697 
698 template <typename HashTableImpl>
699 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
700     StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
701   auto Iter = HashTable->find(FuncName);
702   if (Iter == HashTable->end())
703     return make_error<InstrProfError>(instrprof_error::unknown_function);
704 
705   Data = (*Iter);
706   if (Data.empty())
707     return make_error<InstrProfError>(instrprof_error::malformed,
708                                       "profile data is empty");
709 
710   return Error::success();
711 }
712 
713 template <typename HashTableImpl>
714 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
715     ArrayRef<NamedInstrProfRecord> &Data) {
716   if (atEnd())
717     return make_error<InstrProfError>(instrprof_error::eof);
718 
719   Data = *RecordIterator;
720 
721   if (Data.empty())
722     return make_error<InstrProfError>(instrprof_error::malformed,
723                                       "profile data is empty");
724 
725   return Error::success();
726 }
727 
728 template <typename HashTableImpl>
729 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
730     const unsigned char *Buckets, const unsigned char *const Payload,
731     const unsigned char *const Base, IndexedInstrProf::HashT HashType,
732     uint64_t Version) {
733   FormatVersion = Version;
734   HashTable.reset(HashTableImpl::Create(
735       Buckets, Payload, Base,
736       typename HashTableImpl::InfoType(HashType, Version)));
737   RecordIterator = HashTable->data_begin();
738 }
739 
740 template <typename HashTableImpl>
741 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
742   return getProfileKindFromVersion(FormatVersion);
743 }
744 
745 namespace {
746 /// A remapper that does not apply any remappings.
747 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
748   InstrProfReaderIndexBase &Underlying;
749 
750 public:
751   InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
752       : Underlying(Underlying) {}
753 
754   Error getRecords(StringRef FuncName,
755                    ArrayRef<NamedInstrProfRecord> &Data) override {
756     return Underlying.getRecords(FuncName, Data);
757   }
758 };
759 } // namespace
760 
761 /// A remapper that applies remappings based on a symbol remapping file.
762 template <typename HashTableImpl>
763 class llvm::InstrProfReaderItaniumRemapper
764     : public InstrProfReaderRemapper {
765 public:
766   InstrProfReaderItaniumRemapper(
767       std::unique_ptr<MemoryBuffer> RemapBuffer,
768       InstrProfReaderIndex<HashTableImpl> &Underlying)
769       : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
770   }
771 
772   /// Extract the original function name from a PGO function name.
773   static StringRef extractName(StringRef Name) {
774     // We can have multiple :-separated pieces; there can be pieces both
775     // before and after the mangled name. Find the first part that starts
776     // with '_Z'; we'll assume that's the mangled name we want.
777     std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
778     while (true) {
779       Parts = Parts.second.split(':');
780       if (Parts.first.startswith("_Z"))
781         return Parts.first;
782       if (Parts.second.empty())
783         return Name;
784     }
785   }
786 
787   /// Given a mangled name extracted from a PGO function name, and a new
788   /// form for that mangled name, reconstitute the name.
789   static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
790                                StringRef Replacement,
791                                SmallVectorImpl<char> &Out) {
792     Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
793     Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
794     Out.insert(Out.end(), Replacement.begin(), Replacement.end());
795     Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
796   }
797 
798   Error populateRemappings() override {
799     if (Error E = Remappings.read(*RemapBuffer))
800       return E;
801     for (StringRef Name : Underlying.HashTable->keys()) {
802       StringRef RealName = extractName(Name);
803       if (auto Key = Remappings.insert(RealName)) {
804         // FIXME: We could theoretically map the same equivalence class to
805         // multiple names in the profile data. If that happens, we should
806         // return NamedInstrProfRecords from all of them.
807         MappedNames.insert({Key, RealName});
808       }
809     }
810     return Error::success();
811   }
812 
813   Error getRecords(StringRef FuncName,
814                    ArrayRef<NamedInstrProfRecord> &Data) override {
815     StringRef RealName = extractName(FuncName);
816     if (auto Key = Remappings.lookup(RealName)) {
817       StringRef Remapped = MappedNames.lookup(Key);
818       if (!Remapped.empty()) {
819         if (RealName.begin() == FuncName.begin() &&
820             RealName.end() == FuncName.end())
821           FuncName = Remapped;
822         else {
823           // Try rebuilding the name from the given remapping.
824           SmallString<256> Reconstituted;
825           reconstituteName(FuncName, RealName, Remapped, Reconstituted);
826           Error E = Underlying.getRecords(Reconstituted, Data);
827           if (!E)
828             return E;
829 
830           // If we failed because the name doesn't exist, fall back to asking
831           // about the original name.
832           if (Error Unhandled = handleErrors(
833                   std::move(E), [](std::unique_ptr<InstrProfError> Err) {
834                     return Err->get() == instrprof_error::unknown_function
835                                ? Error::success()
836                                : Error(std::move(Err));
837                   }))
838             return Unhandled;
839         }
840       }
841     }
842     return Underlying.getRecords(FuncName, Data);
843   }
844 
845 private:
846   /// The memory buffer containing the remapping configuration. Remappings
847   /// holds pointers into this buffer.
848   std::unique_ptr<MemoryBuffer> RemapBuffer;
849 
850   /// The mangling remapper.
851   SymbolRemappingReader Remappings;
852 
853   /// Mapping from mangled name keys to the name used for the key in the
854   /// profile data.
855   /// FIXME: Can we store a location within the on-disk hash table instead of
856   /// redoing lookup?
857   DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
858 
859   /// The real profile data reader.
860   InstrProfReaderIndex<HashTableImpl> &Underlying;
861 };
862 
863 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
864   using namespace support;
865 
866   if (DataBuffer.getBufferSize() < 8)
867     return false;
868   uint64_t Magic =
869       endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
870   // Verify that it's magical.
871   return Magic == IndexedInstrProf::Magic;
872 }
873 
874 const unsigned char *
875 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
876                                     const unsigned char *Cur, bool UseCS) {
877   using namespace IndexedInstrProf;
878   using namespace support;
879 
880   if (Version >= IndexedInstrProf::Version4) {
881     const IndexedInstrProf::Summary *SummaryInLE =
882         reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
883     uint64_t NFields =
884         endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
885     uint64_t NEntries =
886         endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
887     uint32_t SummarySize =
888         IndexedInstrProf::Summary::getSize(NFields, NEntries);
889     std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
890         IndexedInstrProf::allocSummary(SummarySize);
891 
892     const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
893     uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
894     for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
895       Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
896 
897     SummaryEntryVector DetailedSummary;
898     for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
899       const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
900       DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
901                                    Ent.NumBlocks);
902     }
903     std::unique_ptr<llvm::ProfileSummary> &Summary =
904         UseCS ? this->CS_Summary : this->Summary;
905 
906     // initialize InstrProfSummary using the SummaryData from disk.
907     Summary = std::make_unique<ProfileSummary>(
908         UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
909         DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
910         SummaryData->get(Summary::MaxBlockCount),
911         SummaryData->get(Summary::MaxInternalBlockCount),
912         SummaryData->get(Summary::MaxFunctionCount),
913         SummaryData->get(Summary::TotalNumBlocks),
914         SummaryData->get(Summary::TotalNumFunctions));
915     return Cur + SummarySize;
916   } else {
917     // The older versions do not support a profile summary. This just computes
918     // an empty summary, which will not result in accurate hot/cold detection.
919     // We would need to call addRecord for all NamedInstrProfRecords to get the
920     // correct summary. However, this version is old (prior to early 2016) and
921     // has not been supporting an accurate summary for several years.
922     InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
923     Summary = Builder.getSummary();
924     return Cur;
925   }
926 }
927 
928 Error IndexedInstrProfReader::readHeader() {
929   using namespace support;
930 
931   const unsigned char *Start =
932       (const unsigned char *)DataBuffer->getBufferStart();
933   const unsigned char *Cur = Start;
934   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
935     return error(instrprof_error::truncated);
936 
937   auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
938   Cur += sizeof(IndexedInstrProf::Header);
939 
940   // Check the magic number.
941   uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
942   if (Magic != IndexedInstrProf::Magic)
943     return error(instrprof_error::bad_magic);
944 
945   // Read the version.
946   uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
947   if (GET_VERSION(FormatVersion) >
948       IndexedInstrProf::ProfVersion::CurrentVersion)
949     return error(instrprof_error::unsupported_version);
950 
951   Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
952                     /* UseCS */ false);
953   if (FormatVersion & VARIANT_MASK_CSIR_PROF)
954     Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
955                       /* UseCS */ true);
956 
957   // Read the hash type and start offset.
958   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
959       endian::byte_swap<uint64_t, little>(Header->HashType));
960   if (HashType > IndexedInstrProf::HashT::Last)
961     return error(instrprof_error::unsupported_hash_type);
962 
963   uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
964 
965   // The rest of the file is an on disk hash table.
966   auto IndexPtr =
967       std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
968           Start + HashOffset, Cur, Start, HashType, FormatVersion);
969 
970   // Load the remapping table now if requested.
971   if (RemappingBuffer) {
972     Remapper = std::make_unique<
973         InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
974         std::move(RemappingBuffer), *IndexPtr);
975     if (Error E = Remapper->populateRemappings())
976       return E;
977   } else {
978     Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
979   }
980   Index = std::move(IndexPtr);
981 
982   return success();
983 }
984 
985 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
986   if (Symtab.get())
987     return *Symtab.get();
988 
989   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
990   if (Error E = Index->populateSymtab(*NewSymtab.get())) {
991     consumeError(error(InstrProfError::take(std::move(E))));
992   }
993 
994   Symtab = std::move(NewSymtab);
995   return *Symtab.get();
996 }
997 
998 Expected<InstrProfRecord>
999 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
1000                                            uint64_t FuncHash) {
1001   ArrayRef<NamedInstrProfRecord> Data;
1002   Error Err = Remapper->getRecords(FuncName, Data);
1003   if (Err)
1004     return std::move(Err);
1005   // Found it. Look for counters with the right hash.
1006   for (const NamedInstrProfRecord &I : Data) {
1007     // Check for a match and fill the vector if there is one.
1008     if (I.Hash == FuncHash)
1009       return std::move(I);
1010   }
1011   return error(instrprof_error::hash_mismatch);
1012 }
1013 
1014 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1015                                                 uint64_t FuncHash,
1016                                                 std::vector<uint64_t> &Counts) {
1017   Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1018   if (Error E = Record.takeError())
1019     return error(std::move(E));
1020 
1021   Counts = Record.get().Counts;
1022   return success();
1023 }
1024 
1025 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1026   ArrayRef<NamedInstrProfRecord> Data;
1027 
1028   Error E = Index->getRecords(Data);
1029   if (E)
1030     return error(std::move(E));
1031 
1032   Record = Data[RecordIndex++];
1033   if (RecordIndex >= Data.size()) {
1034     Index->advanceToNextKey();
1035     RecordIndex = 0;
1036   }
1037   return success();
1038 }
1039 
1040 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1041   uint64_t NumFuncs = 0;
1042   for (const auto &Func : *this) {
1043     if (isIRLevelProfile()) {
1044       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
1045       if (FuncIsCS != IsCS)
1046         continue;
1047     }
1048     Func.accumulateCounts(Sum);
1049     ++NumFuncs;
1050   }
1051   Sum.NumEntries = NumFuncs;
1052 }
1053