1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfReader.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/IR/ProfileSummary.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/ProfileData/MemProf.h"
23 #include "llvm/ProfileData/ProfileCommon.h"
24 #include "llvm/ProfileData/RawMemProfReader.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/ErrorOr.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/SwapByteOrder.h"
30 #include "llvm/Support/SymbolRemappingReader.h"
31 #include <algorithm>
32 #include <cctype>
33 #include <cstddef>
34 #include <cstdint>
35 #include <limits>
36 #include <memory>
37 #include <system_error>
38 #include <utility>
39 #include <vector>
40 
41 using namespace llvm;
42 
43 // Extracts the variant information from the top 8 bits in the version and
44 // returns an enum specifying the variants present.
45 static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
46   InstrProfKind ProfileKind = InstrProfKind::Unknown;
47   if (Version & VARIANT_MASK_IR_PROF) {
48     ProfileKind |= InstrProfKind::IR;
49   }
50   if (Version & VARIANT_MASK_CSIR_PROF) {
51     ProfileKind |= InstrProfKind::CS;
52   }
53   if (Version & VARIANT_MASK_INSTR_ENTRY) {
54     ProfileKind |= InstrProfKind::BB;
55   }
56   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
57     ProfileKind |= InstrProfKind::SingleByteCoverage;
58   }
59   if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
60     ProfileKind |= InstrProfKind::FunctionEntryOnly;
61   }
62   if (Version & VARIANT_MASK_MEMPROF) {
63     ProfileKind |= InstrProfKind::MemProf;
64   }
65   return ProfileKind;
66 }
67 
68 static Expected<std::unique_ptr<MemoryBuffer>>
69 setupMemoryBuffer(const Twine &Path) {
70   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
71       MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
72   if (std::error_code EC = BufferOrErr.getError())
73     return errorCodeToError(EC);
74   return std::move(BufferOrErr.get());
75 }
76 
77 static Error initializeReader(InstrProfReader &Reader) {
78   return Reader.readHeader();
79 }
80 
81 Expected<std::unique_ptr<InstrProfReader>>
82 InstrProfReader::create(const Twine &Path,
83                         const InstrProfCorrelator *Correlator) {
84   // Set up the buffer to read.
85   auto BufferOrError = setupMemoryBuffer(Path);
86   if (Error E = BufferOrError.takeError())
87     return std::move(E);
88   return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
89 }
90 
91 Expected<std::unique_ptr<InstrProfReader>>
92 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
93                         const InstrProfCorrelator *Correlator) {
94   // Sanity check the buffer.
95   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
96     return make_error<InstrProfError>(instrprof_error::too_large);
97 
98   if (Buffer->getBufferSize() == 0)
99     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
100 
101   std::unique_ptr<InstrProfReader> Result;
102   // Create the reader.
103   if (IndexedInstrProfReader::hasFormat(*Buffer))
104     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
105   else if (RawInstrProfReader64::hasFormat(*Buffer))
106     Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
107   else if (RawInstrProfReader32::hasFormat(*Buffer))
108     Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
109   else if (TextInstrProfReader::hasFormat(*Buffer))
110     Result.reset(new TextInstrProfReader(std::move(Buffer)));
111   else
112     return make_error<InstrProfError>(instrprof_error::unrecognized_format);
113 
114   // Initialize the reader and return the result.
115   if (Error E = initializeReader(*Result))
116     return std::move(E);
117 
118   return std::move(Result);
119 }
120 
121 Expected<std::unique_ptr<IndexedInstrProfReader>>
122 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
123   // Set up the buffer to read.
124   auto BufferOrError = setupMemoryBuffer(Path);
125   if (Error E = BufferOrError.takeError())
126     return std::move(E);
127 
128   // Set up the remapping buffer if requested.
129   std::unique_ptr<MemoryBuffer> RemappingBuffer;
130   std::string RemappingPathStr = RemappingPath.str();
131   if (!RemappingPathStr.empty()) {
132     auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
133     if (Error E = RemappingBufferOrError.takeError())
134       return std::move(E);
135     RemappingBuffer = std::move(RemappingBufferOrError.get());
136   }
137 
138   return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
139                                         std::move(RemappingBuffer));
140 }
141 
142 Expected<std::unique_ptr<IndexedInstrProfReader>>
143 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
144                                std::unique_ptr<MemoryBuffer> RemappingBuffer) {
145   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
146     return make_error<InstrProfError>(instrprof_error::too_large);
147 
148   // Create the reader.
149   if (!IndexedInstrProfReader::hasFormat(*Buffer))
150     return make_error<InstrProfError>(instrprof_error::bad_magic);
151   auto Result = std::make_unique<IndexedInstrProfReader>(
152       std::move(Buffer), std::move(RemappingBuffer));
153 
154   // Initialize the reader and return the result.
155   if (Error E = initializeReader(*Result))
156     return std::move(E);
157 
158   return std::move(Result);
159 }
160 
161 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
162   // Verify that this really looks like plain ASCII text by checking a
163   // 'reasonable' number of characters (up to profile magic size).
164   size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
165   StringRef buffer = Buffer.getBufferStart();
166   return count == 0 ||
167          std::all_of(buffer.begin(), buffer.begin() + count,
168                      [](char c) { return isPrint(c) || isSpace(c); });
169 }
170 
171 // Read the profile variant flag from the header: ":FE" means this is a FE
172 // generated profile. ":IR" means this is an IR level profile. Other strings
173 // with a leading ':' will be reported an error format.
174 Error TextInstrProfReader::readHeader() {
175   Symtab.reset(new InstrProfSymtab());
176 
177   while (Line->startswith(":")) {
178     StringRef Str = Line->substr(1);
179     if (Str.equals_insensitive("ir"))
180       ProfileKind |= InstrProfKind::IR;
181     else if (Str.equals_insensitive("fe"))
182       ProfileKind |= InstrProfKind::FE;
183     else if (Str.equals_insensitive("csir")) {
184       ProfileKind |= InstrProfKind::IR;
185       ProfileKind |= InstrProfKind::CS;
186     } else if (Str.equals_insensitive("entry_first"))
187       ProfileKind |= InstrProfKind::BB;
188     else if (Str.equals_insensitive("not_entry_first"))
189       ProfileKind &= ~InstrProfKind::BB;
190     else
191       return error(instrprof_error::bad_header);
192     ++Line;
193   }
194   return success();
195 }
196 
197 Error
198 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
199 
200 #define CHECK_LINE_END(Line)                                                   \
201   if (Line.is_at_end())                                                        \
202     return error(instrprof_error::truncated);
203 #define READ_NUM(Str, Dst)                                                     \
204   if ((Str).getAsInteger(10, (Dst)))                                           \
205     return error(instrprof_error::malformed);
206 #define VP_READ_ADVANCE(Val)                                                   \
207   CHECK_LINE_END(Line);                                                        \
208   uint32_t Val;                                                                \
209   READ_NUM((*Line), (Val));                                                    \
210   Line++;
211 
212   if (Line.is_at_end())
213     return success();
214 
215   uint32_t NumValueKinds;
216   if (Line->getAsInteger(10, NumValueKinds)) {
217     // No value profile data
218     return success();
219   }
220   if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
221     return error(instrprof_error::malformed,
222                  "number of value kinds is invalid");
223   Line++;
224 
225   for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
226     VP_READ_ADVANCE(ValueKind);
227     if (ValueKind > IPVK_Last)
228       return error(instrprof_error::malformed, "value kind is invalid");
229     ;
230     VP_READ_ADVANCE(NumValueSites);
231     if (!NumValueSites)
232       continue;
233 
234     Record.reserveSites(VK, NumValueSites);
235     for (uint32_t S = 0; S < NumValueSites; S++) {
236       VP_READ_ADVANCE(NumValueData);
237 
238       std::vector<InstrProfValueData> CurrentValues;
239       for (uint32_t V = 0; V < NumValueData; V++) {
240         CHECK_LINE_END(Line);
241         std::pair<StringRef, StringRef> VD = Line->rsplit(':');
242         uint64_t TakenCount, Value;
243         if (ValueKind == IPVK_IndirectCallTarget) {
244           if (InstrProfSymtab::isExternalSymbol(VD.first)) {
245             Value = 0;
246           } else {
247             if (Error E = Symtab->addFuncName(VD.first))
248               return E;
249             Value = IndexedInstrProf::ComputeHash(VD.first);
250           }
251         } else {
252           READ_NUM(VD.first, Value);
253         }
254         READ_NUM(VD.second, TakenCount);
255         CurrentValues.push_back({Value, TakenCount});
256         Line++;
257       }
258       Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
259                           nullptr);
260     }
261   }
262   return success();
263 
264 #undef CHECK_LINE_END
265 #undef READ_NUM
266 #undef VP_READ_ADVANCE
267 }
268 
269 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
270   // Skip empty lines and comments.
271   while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
272     ++Line;
273   // If we hit EOF while looking for a name, we're done.
274   if (Line.is_at_end()) {
275     return error(instrprof_error::eof);
276   }
277 
278   // Read the function name.
279   Record.Name = *Line++;
280   if (Error E = Symtab->addFuncName(Record.Name))
281     return error(std::move(E));
282 
283   // Read the function hash.
284   if (Line.is_at_end())
285     return error(instrprof_error::truncated);
286   if ((Line++)->getAsInteger(0, Record.Hash))
287     return error(instrprof_error::malformed,
288                  "function hash is not a valid integer");
289 
290   // Read the number of counters.
291   uint64_t NumCounters;
292   if (Line.is_at_end())
293     return error(instrprof_error::truncated);
294   if ((Line++)->getAsInteger(10, NumCounters))
295     return error(instrprof_error::malformed,
296                  "number of counters is not a valid integer");
297   if (NumCounters == 0)
298     return error(instrprof_error::malformed, "number of counters is zero");
299 
300   // Read each counter and fill our internal storage with the values.
301   Record.Clear();
302   Record.Counts.reserve(NumCounters);
303   for (uint64_t I = 0; I < NumCounters; ++I) {
304     if (Line.is_at_end())
305       return error(instrprof_error::truncated);
306     uint64_t Count;
307     if ((Line++)->getAsInteger(10, Count))
308       return error(instrprof_error::malformed, "count is invalid");
309     Record.Counts.push_back(Count);
310   }
311 
312   // Check if value profile data exists and read it if so.
313   if (Error E = readValueProfileData(Record))
314     return error(std::move(E));
315 
316   return success();
317 }
318 
319 template <class IntPtrT>
320 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
321   return getProfileKindFromVersion(Version);
322 }
323 
324 template <class IntPtrT>
325 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
326   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
327     return false;
328   uint64_t Magic =
329     *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
330   return RawInstrProf::getMagic<IntPtrT>() == Magic ||
331          sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
332 }
333 
334 template <class IntPtrT>
335 Error RawInstrProfReader<IntPtrT>::readHeader() {
336   if (!hasFormat(*DataBuffer))
337     return error(instrprof_error::bad_magic);
338   if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
339     return error(instrprof_error::bad_header);
340   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
341       DataBuffer->getBufferStart());
342   ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
343   return readHeader(*Header);
344 }
345 
346 template <class IntPtrT>
347 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
348   const char *End = DataBuffer->getBufferEnd();
349   // Skip zero padding between profiles.
350   while (CurrentPos != End && *CurrentPos == 0)
351     ++CurrentPos;
352   // If there's nothing left, we're done.
353   if (CurrentPos == End)
354     return make_error<InstrProfError>(instrprof_error::eof);
355   // If there isn't enough space for another header, this is probably just
356   // garbage at the end of the file.
357   if (CurrentPos + sizeof(RawInstrProf::Header) > End)
358     return make_error<InstrProfError>(instrprof_error::malformed,
359                                       "not enough space for another header");
360   // The writer ensures each profile is padded to start at an aligned address.
361   if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
362     return make_error<InstrProfError>(instrprof_error::malformed,
363                                       "insufficient padding");
364   // The magic should have the same byte order as in the previous header.
365   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
366   if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
367     return make_error<InstrProfError>(instrprof_error::bad_magic);
368 
369   // There's another profile to read, so we need to process the header.
370   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
371   return readHeader(*Header);
372 }
373 
374 template <class IntPtrT>
375 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
376   if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
377     return error(std::move(E));
378   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
379     const IntPtrT FPtr = swap(I->FunctionPointer);
380     if (!FPtr)
381       continue;
382     Symtab.mapAddress(FPtr, I->NameRef);
383   }
384   return success();
385 }
386 
387 template <class IntPtrT>
388 Error RawInstrProfReader<IntPtrT>::readHeader(
389     const RawInstrProf::Header &Header) {
390   Version = swap(Header.Version);
391   if (GET_VERSION(Version) != RawInstrProf::Version)
392     return error(instrprof_error::unsupported_version);
393   if (useDebugInfoCorrelate() && !Correlator)
394     return error(instrprof_error::missing_debug_info_for_correlation);
395   if (!useDebugInfoCorrelate() && Correlator)
396     return error(instrprof_error::unexpected_debug_info_for_correlation);
397 
398   BinaryIdsSize = swap(Header.BinaryIdsSize);
399   if (BinaryIdsSize % sizeof(uint64_t))
400     return error(instrprof_error::bad_header);
401 
402   CountersDelta = swap(Header.CountersDelta);
403   NamesDelta = swap(Header.NamesDelta);
404   auto NumData = swap(Header.DataSize);
405   auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
406   auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize();
407   auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
408   auto NamesSize = swap(Header.NamesSize);
409   ValueKindLast = swap(Header.ValueKindLast);
410 
411   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
412   auto PaddingSize = getNumPaddingBytes(NamesSize);
413 
414   // Profile data starts after profile header and binary ids if exist.
415   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
416   ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
417   ptrdiff_t NamesOffset =
418       CountersOffset + CountersSize + PaddingBytesAfterCounters;
419   ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
420 
421   auto *Start = reinterpret_cast<const char *>(&Header);
422   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
423     return error(instrprof_error::bad_header);
424 
425   if (Correlator) {
426     // These sizes in the raw file are zero because we constructed them in the
427     // Correlator.
428     assert(DataSize == 0 && NamesSize == 0);
429     assert(CountersDelta == 0 && NamesDelta == 0);
430     Data = Correlator->getDataPointer();
431     DataEnd = Data + Correlator->getDataSize();
432     NamesStart = Correlator->getNamesPointer();
433     NamesEnd = NamesStart + Correlator->getNamesSize();
434   } else {
435     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
436         Start + DataOffset);
437     DataEnd = Data + NumData;
438     NamesStart = Start + NamesOffset;
439     NamesEnd = NamesStart + NamesSize;
440   }
441 
442   // Binary ids start just after the header.
443   BinaryIdsStart =
444       reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
445   CountersStart = Start + CountersOffset;
446   CountersEnd = CountersStart + CountersSize;
447   ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
448 
449   const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
450   if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
451     return error(instrprof_error::bad_header);
452 
453   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
454   if (Error E = createSymtab(*NewSymtab.get()))
455     return E;
456 
457   Symtab = std::move(NewSymtab);
458   return success();
459 }
460 
461 template <class IntPtrT>
462 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
463   Record.Name = getName(Data->NameRef);
464   return success();
465 }
466 
467 template <class IntPtrT>
468 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
469   Record.Hash = swap(Data->FuncHash);
470   return success();
471 }
472 
473 template <class IntPtrT>
474 Error RawInstrProfReader<IntPtrT>::readRawCounts(
475     InstrProfRecord &Record) {
476   uint32_t NumCounters = swap(Data->NumCounters);
477   if (NumCounters == 0)
478     return error(instrprof_error::malformed, "number of counters is zero");
479 
480   ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
481   if (CounterBaseOffset < 0)
482     return error(
483         instrprof_error::malformed,
484         ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
485 
486   if (CounterBaseOffset >= CountersEnd - CountersStart)
487     return error(instrprof_error::malformed,
488                  ("counter offset " + Twine(CounterBaseOffset) +
489                   " is greater than the maximum counter offset " +
490                   Twine(CountersEnd - CountersStart - 1))
491                      .str());
492 
493   uint64_t MaxNumCounters =
494       (CountersEnd - (CountersStart + CounterBaseOffset)) /
495       getCounterTypeSize();
496   if (NumCounters > MaxNumCounters)
497     return error(instrprof_error::malformed,
498                  ("number of counters " + Twine(NumCounters) +
499                   " is greater than the maximum number of counters " +
500                   Twine(MaxNumCounters))
501                      .str());
502 
503   Record.Counts.clear();
504   Record.Counts.reserve(NumCounters);
505   for (uint32_t I = 0; I < NumCounters; I++) {
506     const char *Ptr =
507         CountersStart + CounterBaseOffset + I * getCounterTypeSize();
508     if (hasSingleByteCoverage()) {
509       // A value of zero signifies the block is covered.
510       Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
511     } else {
512       const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
513       Record.Counts.push_back(swap(*CounterValue));
514     }
515   }
516 
517   return success();
518 }
519 
520 template <class IntPtrT>
521 Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
522     InstrProfRecord &Record) {
523   Record.clearValueData();
524   CurValueDataSize = 0;
525   // Need to match the logic in value profile dumper code in compiler-rt:
526   uint32_t NumValueKinds = 0;
527   for (uint32_t I = 0; I < IPVK_Last + 1; I++)
528     NumValueKinds += (Data->NumValueSites[I] != 0);
529 
530   if (!NumValueKinds)
531     return success();
532 
533   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
534       ValueProfData::getValueProfData(
535           ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
536           getDataEndianness());
537 
538   if (Error E = VDataPtrOrErr.takeError())
539     return E;
540 
541   // Note that besides deserialization, this also performs the conversion for
542   // indirect call targets.  The function pointers from the raw profile are
543   // remapped into function name hashes.
544   VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
545   CurValueDataSize = VDataPtrOrErr.get()->getSize();
546   return success();
547 }
548 
549 template <class IntPtrT>
550 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
551   if (atEnd())
552     // At this point, ValueDataStart field points to the next header.
553     if (Error E = readNextHeader(getNextHeaderPos()))
554       return error(std::move(E));
555 
556   // Read name ad set it in Record.
557   if (Error E = readName(Record))
558     return error(std::move(E));
559 
560   // Read FuncHash and set it in Record.
561   if (Error E = readFuncHash(Record))
562     return error(std::move(E));
563 
564   // Read raw counts and set Record.
565   if (Error E = readRawCounts(Record))
566     return error(std::move(E));
567 
568   // Read value data and set Record.
569   if (Error E = readValueProfilingData(Record))
570     return error(std::move(E));
571 
572   // Iterate.
573   advanceData();
574   return success();
575 }
576 
577 static size_t RoundUp(size_t size, size_t align) {
578   return (size + align - 1) & ~(align - 1);
579 }
580 
581 template <class IntPtrT>
582 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
583   if (BinaryIdsSize == 0)
584     return success();
585 
586   OS << "Binary IDs: \n";
587   const uint8_t *BI = BinaryIdsStart;
588   const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
589   while (BI < BIEnd) {
590     size_t Remaining = BIEnd - BI;
591 
592     // There should be enough left to read the binary ID size field.
593     if (Remaining < sizeof(uint64_t))
594       return make_error<InstrProfError>(
595           instrprof_error::malformed,
596           "not enough data to read binary id length");
597 
598     uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
599 
600     // There should be enough left to read the binary ID size field, and the
601     // binary ID.
602     if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
603       return make_error<InstrProfError>(
604           instrprof_error::malformed, "not enough data to read binary id data");
605 
606     // Increment by binary id length data type size.
607     BI += sizeof(BinaryIdLen);
608     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
609       return make_error<InstrProfError>(
610           instrprof_error::malformed,
611           "binary id that is read is bigger than buffer size");
612 
613     for (uint64_t I = 0; I < BinaryIdLen; I++)
614       OS << format("%02x", BI[I]);
615     OS << "\n";
616 
617     // Increment by binary id data length, rounded to the next 8 bytes. This
618     // accounts for the zero-padding after each build ID.
619     BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
620     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
621       return make_error<InstrProfError>(instrprof_error::malformed);
622   }
623 
624   return success();
625 }
626 
627 namespace llvm {
628 
629 template class RawInstrProfReader<uint32_t>;
630 template class RawInstrProfReader<uint64_t>;
631 
632 } // end namespace llvm
633 
634 InstrProfLookupTrait::hash_value_type
635 InstrProfLookupTrait::ComputeHash(StringRef K) {
636   return IndexedInstrProf::ComputeHash(HashType, K);
637 }
638 
639 using data_type = InstrProfLookupTrait::data_type;
640 using offset_type = InstrProfLookupTrait::offset_type;
641 
642 bool InstrProfLookupTrait::readValueProfilingData(
643     const unsigned char *&D, const unsigned char *const End) {
644   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
645       ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
646 
647   if (VDataPtrOrErr.takeError())
648     return false;
649 
650   VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
651   D += VDataPtrOrErr.get()->TotalSize;
652 
653   return true;
654 }
655 
656 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
657                                          offset_type N) {
658   using namespace support;
659 
660   // Check if the data is corrupt. If so, don't try to read it.
661   if (N % sizeof(uint64_t))
662     return data_type();
663 
664   DataBuffer.clear();
665   std::vector<uint64_t> CounterBuffer;
666 
667   const unsigned char *End = D + N;
668   while (D < End) {
669     // Read hash.
670     if (D + sizeof(uint64_t) >= End)
671       return data_type();
672     uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
673 
674     // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
675     uint64_t CountsSize = N / sizeof(uint64_t) - 1;
676     // If format version is different then read the number of counters.
677     if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
678       if (D + sizeof(uint64_t) > End)
679         return data_type();
680       CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
681     }
682     // Read counter values.
683     if (D + CountsSize * sizeof(uint64_t) > End)
684       return data_type();
685 
686     CounterBuffer.clear();
687     CounterBuffer.reserve(CountsSize);
688     for (uint64_t J = 0; J < CountsSize; ++J)
689       CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
690 
691     DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
692 
693     // Read value profiling data.
694     if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
695         !readValueProfilingData(D, End)) {
696       DataBuffer.clear();
697       return data_type();
698     }
699   }
700   return DataBuffer;
701 }
702 
703 template <typename HashTableImpl>
704 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
705     StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
706   auto Iter = HashTable->find(FuncName);
707   if (Iter == HashTable->end())
708     return make_error<InstrProfError>(instrprof_error::unknown_function);
709 
710   Data = (*Iter);
711   if (Data.empty())
712     return make_error<InstrProfError>(instrprof_error::malformed,
713                                       "profile data is empty");
714 
715   return Error::success();
716 }
717 
718 template <typename HashTableImpl>
719 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
720     ArrayRef<NamedInstrProfRecord> &Data) {
721   if (atEnd())
722     return make_error<InstrProfError>(instrprof_error::eof);
723 
724   Data = *RecordIterator;
725 
726   if (Data.empty())
727     return make_error<InstrProfError>(instrprof_error::malformed,
728                                       "profile data is empty");
729 
730   return Error::success();
731 }
732 
733 template <typename HashTableImpl>
734 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
735     const unsigned char *Buckets, const unsigned char *const Payload,
736     const unsigned char *const Base, IndexedInstrProf::HashT HashType,
737     uint64_t Version) {
738   FormatVersion = Version;
739   HashTable.reset(HashTableImpl::Create(
740       Buckets, Payload, Base,
741       typename HashTableImpl::InfoType(HashType, Version)));
742   RecordIterator = HashTable->data_begin();
743 }
744 
745 template <typename HashTableImpl>
746 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
747   return getProfileKindFromVersion(FormatVersion);
748 }
749 
750 namespace {
751 /// A remapper that does not apply any remappings.
752 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
753   InstrProfReaderIndexBase &Underlying;
754 
755 public:
756   InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
757       : Underlying(Underlying) {}
758 
759   Error getRecords(StringRef FuncName,
760                    ArrayRef<NamedInstrProfRecord> &Data) override {
761     return Underlying.getRecords(FuncName, Data);
762   }
763 };
764 } // namespace
765 
766 /// A remapper that applies remappings based on a symbol remapping file.
767 template <typename HashTableImpl>
768 class llvm::InstrProfReaderItaniumRemapper
769     : public InstrProfReaderRemapper {
770 public:
771   InstrProfReaderItaniumRemapper(
772       std::unique_ptr<MemoryBuffer> RemapBuffer,
773       InstrProfReaderIndex<HashTableImpl> &Underlying)
774       : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
775   }
776 
777   /// Extract the original function name from a PGO function name.
778   static StringRef extractName(StringRef Name) {
779     // We can have multiple :-separated pieces; there can be pieces both
780     // before and after the mangled name. Find the first part that starts
781     // with '_Z'; we'll assume that's the mangled name we want.
782     std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
783     while (true) {
784       Parts = Parts.second.split(':');
785       if (Parts.first.startswith("_Z"))
786         return Parts.first;
787       if (Parts.second.empty())
788         return Name;
789     }
790   }
791 
792   /// Given a mangled name extracted from a PGO function name, and a new
793   /// form for that mangled name, reconstitute the name.
794   static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
795                                StringRef Replacement,
796                                SmallVectorImpl<char> &Out) {
797     Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
798     Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
799     Out.insert(Out.end(), Replacement.begin(), Replacement.end());
800     Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
801   }
802 
803   Error populateRemappings() override {
804     if (Error E = Remappings.read(*RemapBuffer))
805       return E;
806     for (StringRef Name : Underlying.HashTable->keys()) {
807       StringRef RealName = extractName(Name);
808       if (auto Key = Remappings.insert(RealName)) {
809         // FIXME: We could theoretically map the same equivalence class to
810         // multiple names in the profile data. If that happens, we should
811         // return NamedInstrProfRecords from all of them.
812         MappedNames.insert({Key, RealName});
813       }
814     }
815     return Error::success();
816   }
817 
818   Error getRecords(StringRef FuncName,
819                    ArrayRef<NamedInstrProfRecord> &Data) override {
820     StringRef RealName = extractName(FuncName);
821     if (auto Key = Remappings.lookup(RealName)) {
822       StringRef Remapped = MappedNames.lookup(Key);
823       if (!Remapped.empty()) {
824         if (RealName.begin() == FuncName.begin() &&
825             RealName.end() == FuncName.end())
826           FuncName = Remapped;
827         else {
828           // Try rebuilding the name from the given remapping.
829           SmallString<256> Reconstituted;
830           reconstituteName(FuncName, RealName, Remapped, Reconstituted);
831           Error E = Underlying.getRecords(Reconstituted, Data);
832           if (!E)
833             return E;
834 
835           // If we failed because the name doesn't exist, fall back to asking
836           // about the original name.
837           if (Error Unhandled = handleErrors(
838                   std::move(E), [](std::unique_ptr<InstrProfError> Err) {
839                     return Err->get() == instrprof_error::unknown_function
840                                ? Error::success()
841                                : Error(std::move(Err));
842                   }))
843             return Unhandled;
844         }
845       }
846     }
847     return Underlying.getRecords(FuncName, Data);
848   }
849 
850 private:
851   /// The memory buffer containing the remapping configuration. Remappings
852   /// holds pointers into this buffer.
853   std::unique_ptr<MemoryBuffer> RemapBuffer;
854 
855   /// The mangling remapper.
856   SymbolRemappingReader Remappings;
857 
858   /// Mapping from mangled name keys to the name used for the key in the
859   /// profile data.
860   /// FIXME: Can we store a location within the on-disk hash table instead of
861   /// redoing lookup?
862   DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
863 
864   /// The real profile data reader.
865   InstrProfReaderIndex<HashTableImpl> &Underlying;
866 };
867 
868 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
869   using namespace support;
870 
871   if (DataBuffer.getBufferSize() < 8)
872     return false;
873   uint64_t Magic =
874       endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
875   // Verify that it's magical.
876   return Magic == IndexedInstrProf::Magic;
877 }
878 
879 const unsigned char *
880 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
881                                     const unsigned char *Cur, bool UseCS) {
882   using namespace IndexedInstrProf;
883   using namespace support;
884 
885   if (Version >= IndexedInstrProf::Version4) {
886     const IndexedInstrProf::Summary *SummaryInLE =
887         reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
888     uint64_t NFields =
889         endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
890     uint64_t NEntries =
891         endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
892     uint32_t SummarySize =
893         IndexedInstrProf::Summary::getSize(NFields, NEntries);
894     std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
895         IndexedInstrProf::allocSummary(SummarySize);
896 
897     const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
898     uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
899     for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
900       Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
901 
902     SummaryEntryVector DetailedSummary;
903     for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
904       const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
905       DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
906                                    Ent.NumBlocks);
907     }
908     std::unique_ptr<llvm::ProfileSummary> &Summary =
909         UseCS ? this->CS_Summary : this->Summary;
910 
911     // initialize InstrProfSummary using the SummaryData from disk.
912     Summary = std::make_unique<ProfileSummary>(
913         UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
914         DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
915         SummaryData->get(Summary::MaxBlockCount),
916         SummaryData->get(Summary::MaxInternalBlockCount),
917         SummaryData->get(Summary::MaxFunctionCount),
918         SummaryData->get(Summary::TotalNumBlocks),
919         SummaryData->get(Summary::TotalNumFunctions));
920     return Cur + SummarySize;
921   } else {
922     // The older versions do not support a profile summary. This just computes
923     // an empty summary, which will not result in accurate hot/cold detection.
924     // We would need to call addRecord for all NamedInstrProfRecords to get the
925     // correct summary. However, this version is old (prior to early 2016) and
926     // has not been supporting an accurate summary for several years.
927     InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
928     Summary = Builder.getSummary();
929     return Cur;
930   }
931 }
932 
933 Error IndexedInstrProfReader::readHeader() {
934   using namespace support;
935 
936   const unsigned char *Start =
937       (const unsigned char *)DataBuffer->getBufferStart();
938   const unsigned char *Cur = Start;
939   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
940     return error(instrprof_error::truncated);
941 
942   auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
943   if (!HeaderOr)
944     return HeaderOr.takeError();
945 
946   const IndexedInstrProf::Header *Header = &HeaderOr.get();
947   Cur += Header->size();
948 
949   Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur,
950                     /* UseCS */ false);
951   if (Header->Version & VARIANT_MASK_CSIR_PROF)
952     Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur,
953                       /* UseCS */ true);
954 
955   // Read the hash type and start offset.
956   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
957       endian::byte_swap<uint64_t, little>(Header->HashType));
958   if (HashType > IndexedInstrProf::HashT::Last)
959     return error(instrprof_error::unsupported_hash_type);
960 
961   uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
962 
963   // The hash table with profile counts comes next.
964   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
965       Start + HashOffset, Cur, Start, HashType, Header->Version);
966 
967   // The MemProfOffset field in the header is only valid when the format version
968   // is higher than 8 (when it was introduced).
969   if (GET_VERSION(Header->Version) >= 8 &&
970       Header->Version & VARIANT_MASK_MEMPROF) {
971     uint64_t MemProfOffset =
972         endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
973 
974     const unsigned char *Ptr = Start + MemProfOffset;
975     // The value returned from Generator.Emit.
976     const uint64_t TableOffset =
977         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
978 
979     // Read the schema.
980     auto SchemaOr = memprof::readMemProfSchema(Ptr);
981     if (!SchemaOr)
982       return SchemaOr.takeError();
983     Schema = SchemaOr.get();
984 
985     // Now initialize the table reader with a pointer into data buffer.
986     MemProfTable.reset(MemProfHashTable::Create(
987         /*Buckets=*/Start + TableOffset,
988         /*Payload=*/Ptr,
989         /*Base=*/Start, memprof::MemProfRecordLookupTrait(Schema)));
990   }
991 
992   // Load the remapping table now if requested.
993   if (RemappingBuffer) {
994     Remapper = std::make_unique<
995         InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
996         std::move(RemappingBuffer), *IndexPtr);
997     if (Error E = Remapper->populateRemappings())
998       return E;
999   } else {
1000     Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
1001   }
1002   Index = std::move(IndexPtr);
1003 
1004   return success();
1005 }
1006 
1007 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
1008   if (Symtab.get())
1009     return *Symtab.get();
1010 
1011   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
1012   if (Error E = Index->populateSymtab(*NewSymtab.get())) {
1013     consumeError(error(InstrProfError::take(std::move(E))));
1014   }
1015 
1016   Symtab = std::move(NewSymtab);
1017   return *Symtab.get();
1018 }
1019 
1020 Expected<InstrProfRecord>
1021 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
1022                                            uint64_t FuncHash) {
1023   ArrayRef<NamedInstrProfRecord> Data;
1024   Error Err = Remapper->getRecords(FuncName, Data);
1025   if (Err)
1026     return std::move(Err);
1027   // Found it. Look for counters with the right hash.
1028   for (const NamedInstrProfRecord &I : Data) {
1029     // Check for a match and fill the vector if there is one.
1030     if (I.Hash == FuncHash)
1031       return std::move(I);
1032   }
1033   return error(instrprof_error::hash_mismatch);
1034 }
1035 
1036 Expected<ArrayRef<memprof::MemProfRecord>>
1037 IndexedInstrProfReader::getMemProfRecord(uint64_t FuncNameHash) {
1038   auto Iter = MemProfTable->find(FuncNameHash);
1039   if (Iter == MemProfTable->end())
1040     // TODO: Add memprof specific errors.
1041     return make_error<InstrProfError>(instrprof_error::hash_mismatch,
1042                                       "memprof record not found for hash " +
1043                                           Twine(FuncNameHash));
1044   return *Iter;
1045 }
1046 
1047 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1048                                                 uint64_t FuncHash,
1049                                                 std::vector<uint64_t> &Counts) {
1050   Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1051   if (Error E = Record.takeError())
1052     return error(std::move(E));
1053 
1054   Counts = Record.get().Counts;
1055   return success();
1056 }
1057 
1058 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1059   ArrayRef<NamedInstrProfRecord> Data;
1060 
1061   Error E = Index->getRecords(Data);
1062   if (E)
1063     return error(std::move(E));
1064 
1065   Record = Data[RecordIndex++];
1066   if (RecordIndex >= Data.size()) {
1067     Index->advanceToNextKey();
1068     RecordIndex = 0;
1069   }
1070   return success();
1071 }
1072 
1073 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1074   uint64_t NumFuncs = 0;
1075   for (const auto &Func : *this) {
1076     if (isIRLevelProfile()) {
1077       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
1078       if (FuncIsCS != IsCS)
1079         continue;
1080     }
1081     Func.accumulateCounts(Sum);
1082     ++NumFuncs;
1083   }
1084   Sum.NumEntries = NumFuncs;
1085 }
1086