1 //=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains support for reading profiling data for clang's 11 // instrumentation based PGO and coverage. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ProfileData/InstrProfReader.h" 16 #include "InstrProfIndexed.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include <cassert> 19 20 using namespace llvm; 21 22 static ErrorOr<std::unique_ptr<MemoryBuffer>> 23 setupMemoryBuffer(std::string Path) { 24 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 25 MemoryBuffer::getFileOrSTDIN(Path); 26 if (std::error_code EC = BufferOrErr.getError()) 27 return EC; 28 return std::move(BufferOrErr.get()); 29 } 30 31 static std::error_code initializeReader(InstrProfReader &Reader) { 32 return Reader.readHeader(); 33 } 34 35 ErrorOr<std::unique_ptr<InstrProfReader>> 36 InstrProfReader::create(std::string Path) { 37 // Set up the buffer to read. 38 auto BufferOrError = setupMemoryBuffer(Path); 39 if (std::error_code EC = BufferOrError.getError()) 40 return EC; 41 return InstrProfReader::create(std::move(BufferOrError.get())); 42 } 43 44 ErrorOr<std::unique_ptr<InstrProfReader>> 45 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 46 // Sanity check the buffer. 47 if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) 48 return instrprof_error::too_large; 49 50 std::unique_ptr<InstrProfReader> Result; 51 // Create the reader. 52 if (IndexedInstrProfReader::hasFormat(*Buffer)) 53 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 54 else if (RawInstrProfReader64::hasFormat(*Buffer)) 55 Result.reset(new RawInstrProfReader64(std::move(Buffer))); 56 else if (RawInstrProfReader32::hasFormat(*Buffer)) 57 Result.reset(new RawInstrProfReader32(std::move(Buffer))); 58 else 59 Result.reset(new TextInstrProfReader(std::move(Buffer))); 60 61 // Initialize the reader and return the result. 62 if (std::error_code EC = initializeReader(*Result)) 63 return EC; 64 65 return std::move(Result); 66 } 67 68 ErrorOr<std::unique_ptr<IndexedInstrProfReader>> 69 IndexedInstrProfReader::create(std::string Path) { 70 // Set up the buffer to read. 71 auto BufferOrError = setupMemoryBuffer(Path); 72 if (std::error_code EC = BufferOrError.getError()) 73 return EC; 74 return IndexedInstrProfReader::create(std::move(BufferOrError.get())); 75 } 76 77 78 ErrorOr<std::unique_ptr<IndexedInstrProfReader>> 79 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 80 // Sanity check the buffer. 81 if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) 82 return instrprof_error::too_large; 83 84 // Create the reader. 85 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 86 return instrprof_error::bad_magic; 87 auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer)); 88 89 // Initialize the reader and return the result. 90 if (std::error_code EC = initializeReader(*Result)) 91 return EC; 92 93 return std::move(Result); 94 } 95 96 void InstrProfIterator::Increment() { 97 if (Reader->readNextRecord(Record)) 98 *this = InstrProfIterator(); 99 } 100 101 std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { 102 // Skip empty lines and comments. 103 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 104 ++Line; 105 // If we hit EOF while looking for a name, we're done. 106 if (Line.is_at_end()) 107 return error(instrprof_error::eof); 108 109 // Read the function name. 110 Record.Name = *Line++; 111 112 // Read the function hash. 113 if (Line.is_at_end()) 114 return error(instrprof_error::truncated); 115 if ((Line++)->getAsInteger(0, Record.Hash)) 116 return error(instrprof_error::malformed); 117 118 // Read the number of counters. 119 uint64_t NumCounters; 120 if (Line.is_at_end()) 121 return error(instrprof_error::truncated); 122 if ((Line++)->getAsInteger(10, NumCounters)) 123 return error(instrprof_error::malformed); 124 if (NumCounters == 0) 125 return error(instrprof_error::malformed); 126 127 // Read each counter and fill our internal storage with the values. 128 Record.Counts.clear(); 129 Record.Counts.reserve(NumCounters); 130 for (uint64_t I = 0; I < NumCounters; ++I) { 131 if (Line.is_at_end()) 132 return error(instrprof_error::truncated); 133 uint64_t Count; 134 if ((Line++)->getAsInteger(10, Count)) 135 return error(instrprof_error::malformed); 136 Record.Counts.push_back(Count); 137 } 138 139 return success(); 140 } 141 142 template <class IntPtrT> 143 static uint64_t getRawMagic(); 144 145 template <> 146 uint64_t getRawMagic<uint64_t>() { 147 return 148 uint64_t(255) << 56 | 149 uint64_t('l') << 48 | 150 uint64_t('p') << 40 | 151 uint64_t('r') << 32 | 152 uint64_t('o') << 24 | 153 uint64_t('f') << 16 | 154 uint64_t('r') << 8 | 155 uint64_t(129); 156 } 157 158 template <> 159 uint64_t getRawMagic<uint32_t>() { 160 return 161 uint64_t(255) << 56 | 162 uint64_t('l') << 48 | 163 uint64_t('p') << 40 | 164 uint64_t('r') << 32 | 165 uint64_t('o') << 24 | 166 uint64_t('f') << 16 | 167 uint64_t('R') << 8 | 168 uint64_t(129); 169 } 170 171 template <class IntPtrT> 172 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 173 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 174 return false; 175 uint64_t Magic = 176 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 177 return getRawMagic<IntPtrT>() == Magic || 178 sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic; 179 } 180 181 template <class IntPtrT> 182 std::error_code RawInstrProfReader<IntPtrT>::readHeader() { 183 if (!hasFormat(*DataBuffer)) 184 return error(instrprof_error::bad_magic); 185 if (DataBuffer->getBufferSize() < sizeof(RawHeader)) 186 return error(instrprof_error::bad_header); 187 auto *Header = 188 reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart()); 189 ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>(); 190 return readHeader(*Header); 191 } 192 193 template <class IntPtrT> 194 std::error_code 195 RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 196 const char *End = DataBuffer->getBufferEnd(); 197 // Skip zero padding between profiles. 198 while (CurrentPos != End && *CurrentPos == 0) 199 ++CurrentPos; 200 // If there's nothing left, we're done. 201 if (CurrentPos == End) 202 return instrprof_error::eof; 203 // If there isn't enough space for another header, this is probably just 204 // garbage at the end of the file. 205 if (CurrentPos + sizeof(RawHeader) > End) 206 return instrprof_error::malformed; 207 // The writer ensures each profile is padded to start at an aligned address. 208 if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>()) 209 return instrprof_error::malformed; 210 // The magic should have the same byte order as in the previous header. 211 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 212 if (Magic != swap(getRawMagic<IntPtrT>())) 213 return instrprof_error::bad_magic; 214 215 // There's another profile to read, so we need to process the header. 216 auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos); 217 return readHeader(*Header); 218 } 219 220 static uint64_t getRawVersion() { 221 return 1; 222 } 223 224 template <class IntPtrT> 225 std::error_code 226 RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { 227 if (swap(Header.Version) != getRawVersion()) 228 return error(instrprof_error::unsupported_version); 229 230 CountersDelta = swap(Header.CountersDelta); 231 NamesDelta = swap(Header.NamesDelta); 232 auto DataSize = swap(Header.DataSize); 233 auto CountersSize = swap(Header.CountersSize); 234 auto NamesSize = swap(Header.NamesSize); 235 236 ptrdiff_t DataOffset = sizeof(RawHeader); 237 ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; 238 ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; 239 size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; 240 241 auto *Start = reinterpret_cast<const char *>(&Header); 242 if (Start + ProfileSize > DataBuffer->getBufferEnd()) 243 return error(instrprof_error::bad_header); 244 245 Data = reinterpret_cast<const ProfileData *>(Start + DataOffset); 246 DataEnd = Data + DataSize; 247 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 248 NamesStart = Start + NamesOffset; 249 ProfileEnd = Start + ProfileSize; 250 251 return success(); 252 } 253 254 template <class IntPtrT> 255 std::error_code 256 RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { 257 if (Data == DataEnd) 258 if (std::error_code EC = readNextHeader(ProfileEnd)) 259 return EC; 260 261 // Get the raw data. 262 StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); 263 uint32_t NumCounters = swap(Data->NumCounters); 264 if (NumCounters == 0) 265 return error(instrprof_error::malformed); 266 auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters); 267 268 // Check bounds. 269 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 270 if (RawName.data() < NamesStart || 271 RawName.data() + RawName.size() > DataBuffer->getBufferEnd() || 272 RawCounts.data() < CountersStart || 273 RawCounts.data() + RawCounts.size() > NamesStartAsCounter) 274 return error(instrprof_error::malformed); 275 276 // Store the data in Record, byte-swapping as necessary. 277 Record.Hash = swap(Data->FuncHash); 278 Record.Name = RawName; 279 if (ShouldSwapBytes) { 280 Record.Counts.clear(); 281 Record.Counts.reserve(RawCounts.size()); 282 for (uint64_t Count : RawCounts) 283 Record.Counts.push_back(swap(Count)); 284 } else 285 Record.Counts = RawCounts; 286 287 // Iterate. 288 ++Data; 289 return success(); 290 } 291 292 namespace llvm { 293 template class RawInstrProfReader<uint32_t>; 294 template class RawInstrProfReader<uint64_t>; 295 } 296 297 InstrProfLookupTrait::hash_value_type 298 InstrProfLookupTrait::ComputeHash(StringRef K) { 299 return IndexedInstrProf::ComputeHash(HashType, K); 300 } 301 302 typedef InstrProfLookupTrait::data_type data_type; 303 typedef InstrProfLookupTrait::offset_type offset_type; 304 305 bool InstrProfLookupTrait::ReadValueProfilingData( 306 const unsigned char *&D, const unsigned char *const End) { 307 308 using namespace support; 309 // Read number of value kinds with value sites. 310 if (D + sizeof(uint64_t) > End) 311 return false; 312 uint64_t ValueKindCount = endian::readNext<uint64_t, little, unaligned>(D); 313 314 for (uint32_t Kind = 0; Kind < ValueKindCount; ++Kind) { 315 316 // Read value kind and number of value sites for kind. 317 if (D + 2 * sizeof(uint64_t) > End) 318 return false; 319 uint64_t ValueKind = endian::readNext<uint64_t, little, unaligned>(D); 320 uint64_t ValueSiteCount = endian::readNext<uint64_t, little, unaligned>(D); 321 322 std::vector<InstrProfValueSiteRecord> &ValueSites = 323 DataBuffer.back().getValueSitesForKind(ValueKind); 324 ValueSites.reserve(ValueSiteCount); 325 for (uint64_t VSite = 0; VSite < ValueSiteCount; ++VSite) { 326 // Read number of value data pairs at value site. 327 if (D + sizeof(uint64_t) > End) 328 return false; 329 uint64_t ValueDataCount = 330 endian::readNext<uint64_t, little, unaligned>(D); 331 332 // Check if there are as many ValueDataPairs as ValueDataCount in memory. 333 if (D + (ValueDataCount << 1) * sizeof(uint64_t) > End) 334 return false; 335 336 InstrProfValueSiteRecord VSiteRecord; 337 for (uint64_t VCount = 0; VCount < ValueDataCount; ++VCount) { 338 uint64_t Value = endian::readNext<uint64_t, little, unaligned>(D); 339 uint64_t NumTaken = endian::readNext<uint64_t, little, unaligned>(D); 340 switch (ValueKind) { 341 case IPVK_IndirectCallTarget: { 342 auto Result = 343 std::lower_bound(HashKeys.begin(), HashKeys.end(), Value, 344 [](const std::pair<uint64_t, const char *> &LHS, 345 uint64_t RHS) { return LHS.first < RHS; }); 346 assert(Result != HashKeys.end() && 347 "Hash does not match any known keys\n"); 348 Value = (uint64_t)Result->second; 349 break; 350 } 351 } 352 VSiteRecord.ValueData.push_back(std::make_pair(Value, NumTaken)); 353 } 354 ValueSites.push_back(std::move(VSiteRecord)); 355 } 356 } 357 return true; 358 } 359 360 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 361 offset_type N) { 362 // Check if the data is corrupt. If so, don't try to read it. 363 if (N % sizeof(uint64_t)) 364 return data_type(); 365 366 DataBuffer.clear(); 367 std::vector<uint64_t> CounterBuffer; 368 369 using namespace support; 370 const unsigned char *End = D + N; 371 while (D < End) { 372 // Read hash 373 if (D + sizeof(uint64_t) >= End) 374 return data_type(); 375 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 376 377 // Initialize number of counters for FormatVersion == 1 378 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 379 // If format version is different then read number of counters 380 if (FormatVersion != 1) { 381 if (D + sizeof(uint64_t) > End) 382 return data_type(); 383 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 384 } 385 // Read counter values 386 if (D + CountsSize * sizeof(uint64_t) > End) 387 return data_type(); 388 389 CounterBuffer.clear(); 390 CounterBuffer.reserve(CountsSize); 391 for (uint64_t J = 0; J < CountsSize; ++J) 392 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 393 394 DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer))); 395 396 // Read value profiling data 397 if (FormatVersion > 2 && !ReadValueProfilingData(D, End)) { 398 DataBuffer.clear(); 399 return data_type(); 400 } 401 } 402 return DataBuffer; 403 } 404 405 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 406 if (DataBuffer.getBufferSize() < 8) 407 return false; 408 using namespace support; 409 uint64_t Magic = 410 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 411 return Magic == IndexedInstrProf::Magic; 412 } 413 414 std::error_code IndexedInstrProfReader::readHeader() { 415 const unsigned char *Start = 416 (const unsigned char *)DataBuffer->getBufferStart(); 417 const unsigned char *Cur = Start; 418 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 419 return error(instrprof_error::truncated); 420 421 using namespace support; 422 423 // Check the magic number. 424 uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur); 425 if (Magic != IndexedInstrProf::Magic) 426 return error(instrprof_error::bad_magic); 427 428 // Read the version. 429 FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur); 430 if (FormatVersion > IndexedInstrProf::Version) 431 return error(instrprof_error::unsupported_version); 432 433 // Read the maximal function count. 434 MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur); 435 436 // Read the hash type and start offset. 437 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 438 endian::readNext<uint64_t, little, unaligned>(Cur)); 439 if (HashType > IndexedInstrProf::HashT::Last) 440 return error(instrprof_error::unsupported_hash_type); 441 uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur); 442 443 // The rest of the file is an on disk hash table. 444 Index.reset(InstrProfReaderIndex::Create( 445 Start + HashOffset, Cur, Start, 446 InstrProfLookupTrait(HashType, FormatVersion))); 447 448 // Form the map of hash values to const char* keys in profiling data. 449 std::vector<std::pair<uint64_t, const char *>> HashKeys; 450 for (auto Key : Index->keys()) { 451 const char *KeyTableRef = StringTable.insertString(Key); 452 HashKeys.push_back(std::make_pair(ComputeHash(HashType, Key), KeyTableRef)); 453 } 454 std::sort(HashKeys.begin(), HashKeys.end(), less_first()); 455 HashKeys.erase(std::unique(HashKeys.begin(), HashKeys.end()), HashKeys.end()); 456 // Set the hash key map for the InstrLookupTrait 457 Index->getInfoObj().setHashKeys(std::move(HashKeys)); 458 // Set up our iterator for readNextRecord. 459 RecordIterator = Index->data_begin(); 460 461 return success(); 462 } 463 464 std::error_code IndexedInstrProfReader::getFunctionCounts( 465 StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) { 466 auto Iter = Index->find(FuncName); 467 if (Iter == Index->end()) 468 return error(instrprof_error::unknown_function); 469 470 // Found it. Look for counters with the right hash. 471 ArrayRef<InstrProfRecord> Data = (*Iter); 472 if (Data.empty()) 473 return error(instrprof_error::malformed); 474 475 for (unsigned I = 0, E = Data.size(); I < E; ++I) { 476 // Check for a match and fill the vector if there is one. 477 if (Data[I].Hash == FuncHash) { 478 Counts = Data[I].Counts; 479 return success(); 480 } 481 } 482 return error(instrprof_error::hash_mismatch); 483 } 484 485 std::error_code 486 IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { 487 // Are we out of records? 488 if (RecordIterator == Index->data_end()) 489 return error(instrprof_error::eof); 490 491 if ((*RecordIterator).empty()) 492 return error(instrprof_error::malformed); 493 494 static unsigned RecordIndex = 0; 495 ArrayRef<InstrProfRecord> Data = (*RecordIterator); 496 Record = Data[RecordIndex++]; 497 if (RecordIndex >= Data.size()) { 498 ++RecordIterator; 499 RecordIndex = 0; 500 } 501 return success(); 502 } 503