1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/IR/ProfileSummary.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/LineIterator.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/OnDiskHashTable.h"
27 #include "llvm/Support/SwapByteOrder.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <iterator>
33 #include <memory>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class InstrProfReader;
40 
41 /// A file format agnostic iterator over profiling data.
42 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
43                                                NamedInstrProfRecord> {
44   InstrProfReader *Reader = nullptr;
45   value_type Record;
46 
47   void Increment();
48 
49 public:
50   InstrProfIterator() = default;
InstrProfIterator(InstrProfReader * Reader)51   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
52 
53   InstrProfIterator &operator++() { Increment(); return *this; }
54   bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
55   bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
56   value_type &operator*() { return Record; }
57   value_type *operator->() { return &Record; }
58 };
59 
60 /// Base class and interface for reading profiling data of any known instrprof
61 /// format. Provides an iterator over NamedInstrProfRecords.
62 class InstrProfReader {
63   instrprof_error LastError = instrprof_error::success;
64 
65 public:
66   InstrProfReader() = default;
67   virtual ~InstrProfReader() = default;
68 
69   /// Read the header.  Required before reading first record.
70   virtual Error readHeader() = 0;
71 
72   /// Read a single record.
73   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
74 
75   /// Iterator over profile data.
begin()76   InstrProfIterator begin() { return InstrProfIterator(this); }
end()77   InstrProfIterator end() { return InstrProfIterator(); }
78 
79   virtual bool isIRLevelProfile() const = 0;
80 
81   /// Return the PGO symtab. There are three different readers:
82   /// Raw, Text, and Indexed profile readers. The first two types
83   /// of readers are used only by llvm-profdata tool, while the indexed
84   /// profile reader is also used by llvm-cov tool and the compiler (
85   /// backend or frontend). Since creating PGO symtab can create
86   /// significant runtime and memory overhead (as it touches data
87   /// for the whole program), InstrProfSymtab for the indexed profile
88   /// reader should be created on demand and it is recommended to be
89   /// only used for dumping purpose with llvm-proftool, not with the
90   /// compiler.
91   virtual InstrProfSymtab &getSymtab() = 0;
92 
93 protected:
94   std::unique_ptr<InstrProfSymtab> Symtab;
95 
96   /// Set the current error and return same.
error(instrprof_error Err)97   Error error(instrprof_error Err) {
98     LastError = Err;
99     if (Err == instrprof_error::success)
100       return Error::success();
101     return make_error<InstrProfError>(Err);
102   }
103 
error(Error && E)104   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
105 
106   /// Clear the current error and return a successful one.
success()107   Error success() { return error(instrprof_error::success); }
108 
109 public:
110   /// Return true if the reader has finished reading the profile data.
isEOF()111   bool isEOF() { return LastError == instrprof_error::eof; }
112 
113   /// Return true if the reader encountered an error reading profiling data.
hasError()114   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
115 
116   /// Get the current error.
getError()117   Error getError() {
118     if (hasError())
119       return make_error<InstrProfError>(LastError);
120     return Error::success();
121   }
122 
123   /// Factory method to create an appropriately typed reader for the given
124   /// instrprof file.
125   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
126 
127   static Expected<std::unique_ptr<InstrProfReader>>
128   create(std::unique_ptr<MemoryBuffer> Buffer);
129 };
130 
131 /// Reader for the simple text based instrprof format.
132 ///
133 /// This format is a simple text format that's suitable for test data. Records
134 /// are separated by one or more blank lines, and record fields are separated by
135 /// new lines.
136 ///
137 /// Each record consists of a function name, a function hash, a number of
138 /// counters, and then each counter value, in that order.
139 class TextInstrProfReader : public InstrProfReader {
140 private:
141   /// The profile data file contents.
142   std::unique_ptr<MemoryBuffer> DataBuffer;
143   /// Iterator over the profile data.
144   line_iterator Line;
145   bool IsIRLevelProfile = false;
146 
147   Error readValueProfileData(InstrProfRecord &Record);
148 
149 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)150   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
151       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
152   TextInstrProfReader(const TextInstrProfReader &) = delete;
153   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
154 
155   /// Return true if the given buffer is in text instrprof format.
156   static bool hasFormat(const MemoryBuffer &Buffer);
157 
isIRLevelProfile()158   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
159 
160   /// Read the header.
161   Error readHeader() override;
162 
163   /// Read a single record.
164   Error readNextRecord(NamedInstrProfRecord &Record) override;
165 
getSymtab()166   InstrProfSymtab &getSymtab() override {
167     assert(Symtab.get());
168     return *Symtab.get();
169   }
170 };
171 
172 /// Reader for the raw instrprof binary format from runtime.
173 ///
174 /// This format is a raw memory dump of the instrumentation-baed profiling data
175 /// from the runtime.  It has no index.
176 ///
177 /// Templated on the unsigned type whose size matches pointers on the platform
178 /// that wrote the profile.
179 template <class IntPtrT>
180 class RawInstrProfReader : public InstrProfReader {
181 private:
182   /// The profile data file contents.
183   std::unique_ptr<MemoryBuffer> DataBuffer;
184   bool ShouldSwapBytes;
185   // The value of the version field of the raw profile data header. The lower 56
186   // bits specifies the format version and the most significant 8 bits specify
187   // the variant types of the profile.
188   uint64_t Version;
189   uint64_t CountersDelta;
190   uint64_t NamesDelta;
191   const RawInstrProf::ProfileData<IntPtrT> *Data;
192   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
193   const uint64_t *CountersStart;
194   const char *NamesStart;
195   uint64_t NamesSize;
196   // After value profile is all read, this pointer points to
197   // the header of next profile data (if exists)
198   const uint8_t *ValueDataStart;
199   uint32_t ValueKindLast;
200   uint32_t CurValueDataSize;
201 
202 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)203   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
204       : DataBuffer(std::move(DataBuffer)) {}
205   RawInstrProfReader(const RawInstrProfReader &) = delete;
206   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
207 
208   static bool hasFormat(const MemoryBuffer &DataBuffer);
209   Error readHeader() override;
210   Error readNextRecord(NamedInstrProfRecord &Record) override;
211 
isIRLevelProfile()212   bool isIRLevelProfile() const override {
213     return (Version & VARIANT_MASK_IR_PROF) != 0;
214   }
215 
getSymtab()216   InstrProfSymtab &getSymtab() override {
217     assert(Symtab.get());
218     return *Symtab.get();
219   }
220 
221 private:
222   Error createSymtab(InstrProfSymtab &Symtab);
223   Error readNextHeader(const char *CurrentPos);
224   Error readHeader(const RawInstrProf::Header &Header);
225 
swap(IntT Int)226   template <class IntT> IntT swap(IntT Int) const {
227     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
228   }
229 
getDataEndianness()230   support::endianness getDataEndianness() const {
231     support::endianness HostEndian = getHostEndianness();
232     if (!ShouldSwapBytes)
233       return HostEndian;
234     if (HostEndian == support::little)
235       return support::big;
236     else
237       return support::little;
238   }
239 
getNumPaddingBytes(uint64_t SizeInBytes)240   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
241     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
242   }
243 
244   Error readName(NamedInstrProfRecord &Record);
245   Error readFuncHash(NamedInstrProfRecord &Record);
246   Error readRawCounts(InstrProfRecord &Record);
247   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()248   bool atEnd() const { return Data == DataEnd; }
249 
advanceData()250   void advanceData() {
251     Data++;
252     ValueDataStart += CurValueDataSize;
253   }
254 
getNextHeaderPos()255   const char *getNextHeaderPos() const {
256       assert(atEnd());
257       return (const char *)ValueDataStart;
258   }
259 
getCounter(IntPtrT CounterPtr)260   const uint64_t *getCounter(IntPtrT CounterPtr) const {
261     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
262     return CountersStart + Offset;
263   }
264 
getName(uint64_t NameRef)265   StringRef getName(uint64_t NameRef) const {
266     return Symtab->getFuncName(swap(NameRef));
267   }
268 };
269 
270 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
271 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
272 
273 namespace IndexedInstrProf {
274 
275 enum class HashT : uint32_t;
276 
277 } // end namespace IndexedInstrProf
278 
279 /// Trait for lookups into the on-disk hash table for the binary instrprof
280 /// format.
281 class InstrProfLookupTrait {
282   std::vector<NamedInstrProfRecord> DataBuffer;
283   IndexedInstrProf::HashT HashType;
284   unsigned FormatVersion;
285   // Endianness of the input value profile data.
286   // It should be LE by default, but can be changed
287   // for testing purpose.
288   support::endianness ValueProfDataEndianness = support::little;
289 
290 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)291   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
292       : HashType(HashType), FormatVersion(FormatVersion) {}
293 
294   using data_type = ArrayRef<NamedInstrProfRecord>;
295 
296   using internal_key_type = StringRef;
297   using external_key_type = StringRef;
298   using hash_value_type = uint64_t;
299   using offset_type = uint64_t;
300 
EqualKey(StringRef A,StringRef B)301   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)302   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)303   static StringRef GetExternalKey(StringRef K) { return K; }
304 
305   hash_value_type ComputeHash(StringRef K);
306 
307   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)308   ReadKeyDataLength(const unsigned char *&D) {
309     using namespace support;
310 
311     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
312     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
313     return std::make_pair(KeyLen, DataLen);
314   }
315 
ReadKey(const unsigned char * D,offset_type N)316   StringRef ReadKey(const unsigned char *D, offset_type N) {
317     return StringRef((const char *)D, N);
318   }
319 
320   bool readValueProfilingData(const unsigned char *&D,
321                               const unsigned char *const End);
322   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
323 
324   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)325   void setValueProfDataEndianness(support::endianness Endianness) {
326     ValueProfDataEndianness = Endianness;
327   }
328 };
329 
330 struct InstrProfReaderIndexBase {
331   virtual ~InstrProfReaderIndexBase() = default;
332 
333   // Read all the profile records with the same key pointed to the current
334   // iterator.
335   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
336 
337   // Read all the profile records with the key equal to FuncName
338   virtual Error getRecords(StringRef FuncName,
339                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
340   virtual void advanceToNextKey() = 0;
341   virtual bool atEnd() const = 0;
342   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
343   virtual uint64_t getVersion() const = 0;
344   virtual bool isIRLevelProfile() const = 0;
345   virtual Error populateSymtab(InstrProfSymtab &) = 0;
346 };
347 
348 using OnDiskHashTableImplV3 =
349     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
350 
351 template <typename HashTableImpl>
352 class InstrProfReaderItaniumRemapper;
353 
354 template <typename HashTableImpl>
355 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
356 private:
357   std::unique_ptr<HashTableImpl> HashTable;
358   typename HashTableImpl::data_iterator RecordIterator;
359   uint64_t FormatVersion;
360 
361   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
362 
363 public:
364   InstrProfReaderIndex(const unsigned char *Buckets,
365                        const unsigned char *const Payload,
366                        const unsigned char *const Base,
367                        IndexedInstrProf::HashT HashType, uint64_t Version);
368   ~InstrProfReaderIndex() override = default;
369 
370   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
371   Error getRecords(StringRef FuncName,
372                    ArrayRef<NamedInstrProfRecord> &Data) override;
advanceToNextKey()373   void advanceToNextKey() override { RecordIterator++; }
374 
atEnd()375   bool atEnd() const override {
376     return RecordIterator == HashTable->data_end();
377   }
378 
setValueProfDataEndianness(support::endianness Endianness)379   void setValueProfDataEndianness(support::endianness Endianness) override {
380     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
381   }
382 
getVersion()383   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
384 
isIRLevelProfile()385   bool isIRLevelProfile() const override {
386     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
387   }
388 
populateSymtab(InstrProfSymtab & Symtab)389   Error populateSymtab(InstrProfSymtab &Symtab) override {
390     return Symtab.create(HashTable->keys());
391   }
392 };
393 
394 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
395 class InstrProfReaderRemapper {
396 public:
~InstrProfReaderRemapper()397   virtual ~InstrProfReaderRemapper() {}
populateRemappings()398   virtual Error populateRemappings() { return Error::success(); }
399   virtual Error getRecords(StringRef FuncName,
400                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
401 };
402 
403 /// Reader for the indexed binary instrprof format.
404 class IndexedInstrProfReader : public InstrProfReader {
405 private:
406   /// The profile data file contents.
407   std::unique_ptr<MemoryBuffer> DataBuffer;
408   /// The profile remapping file contents.
409   std::unique_ptr<MemoryBuffer> RemappingBuffer;
410   /// The index into the profile data.
411   std::unique_ptr<InstrProfReaderIndexBase> Index;
412   /// The profile remapping file contents.
413   std::unique_ptr<InstrProfReaderRemapper> Remapper;
414   /// Profile summary data.
415   std::unique_ptr<ProfileSummary> Summary;
416   // Index to the current record in the record array.
417   unsigned RecordIndex;
418 
419   // Read the profile summary. Return a pointer pointing to one byte past the
420   // end of the summary data if it exists or the input \c Cur.
421   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
422                                    const unsigned char *Cur);
423 
424 public:
425   IndexedInstrProfReader(
426       std::unique_ptr<MemoryBuffer> DataBuffer,
427       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
DataBuffer(std::move (DataBuffer))428       : DataBuffer(std::move(DataBuffer)),
429         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
430   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
431   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
432 
433   /// Return the profile version.
getVersion()434   uint64_t getVersion() const { return Index->getVersion(); }
isIRLevelProfile()435   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
436 
437   /// Return true if the given buffer is in an indexed instrprof format.
438   static bool hasFormat(const MemoryBuffer &DataBuffer);
439 
440   /// Read the file header.
441   Error readHeader() override;
442   /// Read a single record.
443   Error readNextRecord(NamedInstrProfRecord &Record) override;
444 
445   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
446   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
447                                                uint64_t FuncHash);
448 
449   /// Fill Counts with the profile data for the given function name.
450   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
451                           std::vector<uint64_t> &Counts);
452 
453   /// Return the maximum of all known function counts.
getMaximumFunctionCount()454   uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
455 
456   /// Factory method to create an indexed reader.
457   static Expected<std::unique_ptr<IndexedInstrProfReader>>
458   create(const Twine &Path, const Twine &RemappingPath = "");
459 
460   static Expected<std::unique_ptr<IndexedInstrProfReader>>
461   create(std::unique_ptr<MemoryBuffer> Buffer,
462          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
463 
464   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)465   void setValueProfDataEndianness(support::endianness Endianness) {
466     Index->setValueProfDataEndianness(Endianness);
467   }
468 
469   // See description in the base class. This interface is designed
470   // to be used by llvm-profdata (for dumping). Avoid using this when
471   // the client is the compiler.
472   InstrProfSymtab &getSymtab() override;
getSummary()473   ProfileSummary &getSummary() { return *(Summary.get()); }
474 };
475 
476 } // end namespace llvm
477 
478 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
479