1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path, 56 const InstrProfCorrelator *Correlator) { 57 // Set up the buffer to read. 58 auto BufferOrError = setupMemoryBuffer(Path); 59 if (Error E = BufferOrError.takeError()) 60 return std::move(E); 61 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 62 } 63 64 Expected<std::unique_ptr<InstrProfReader>> 65 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 66 const InstrProfCorrelator *Correlator) { 67 // Sanity check the buffer. 68 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 69 return make_error<InstrProfError>(instrprof_error::too_large); 70 71 if (Buffer->getBufferSize() == 0) 72 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 73 74 std::unique_ptr<InstrProfReader> Result; 75 // Create the reader. 76 if (IndexedInstrProfReader::hasFormat(*Buffer)) 77 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 78 else if (RawInstrProfReader64::hasFormat(*Buffer)) 79 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 80 else if (RawInstrProfReader32::hasFormat(*Buffer)) 81 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 82 else if (TextInstrProfReader::hasFormat(*Buffer)) 83 Result.reset(new TextInstrProfReader(std::move(Buffer))); 84 else 85 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 86 87 // Initialize the reader and return the result. 88 if (Error E = initializeReader(*Result)) 89 return std::move(E); 90 91 return std::move(Result); 92 } 93 94 Expected<std::unique_ptr<IndexedInstrProfReader>> 95 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 96 // Set up the buffer to read. 97 auto BufferOrError = setupMemoryBuffer(Path); 98 if (Error E = BufferOrError.takeError()) 99 return std::move(E); 100 101 // Set up the remapping buffer if requested. 102 std::unique_ptr<MemoryBuffer> RemappingBuffer; 103 std::string RemappingPathStr = RemappingPath.str(); 104 if (!RemappingPathStr.empty()) { 105 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 106 if (Error E = RemappingBufferOrError.takeError()) 107 return std::move(E); 108 RemappingBuffer = std::move(RemappingBufferOrError.get()); 109 } 110 111 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 112 std::move(RemappingBuffer)); 113 } 114 115 Expected<std::unique_ptr<IndexedInstrProfReader>> 116 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 117 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 118 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 119 return make_error<InstrProfError>(instrprof_error::too_large); 120 121 // Create the reader. 122 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 123 return make_error<InstrProfError>(instrprof_error::bad_magic); 124 auto Result = std::make_unique<IndexedInstrProfReader>( 125 std::move(Buffer), std::move(RemappingBuffer)); 126 127 // Initialize the reader and return the result. 128 if (Error E = initializeReader(*Result)) 129 return std::move(E); 130 131 return std::move(Result); 132 } 133 134 void InstrProfIterator::Increment() { 135 if (auto E = Reader->readNextRecord(Record)) { 136 // Handle errors in the reader. 137 InstrProfError::take(std::move(E)); 138 *this = InstrProfIterator(); 139 } 140 } 141 142 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 143 // Verify that this really looks like plain ASCII text by checking a 144 // 'reasonable' number of characters (up to profile magic size). 145 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 146 StringRef buffer = Buffer.getBufferStart(); 147 return count == 0 || 148 std::all_of(buffer.begin(), buffer.begin() + count, 149 [](char c) { return isPrint(c) || isSpace(c); }); 150 } 151 152 // Read the profile variant flag from the header: ":FE" means this is a FE 153 // generated profile. ":IR" means this is an IR level profile. Other strings 154 // with a leading ':' will be reported an error format. 155 Error TextInstrProfReader::readHeader() { 156 Symtab.reset(new InstrProfSymtab()); 157 158 while (Line->startswith(":")) { 159 StringRef Str = Line->substr(1); 160 if (Str.equals_insensitive("ir")) 161 ProfileKind |= InstrProfKind::IR; 162 else if (Str.equals_insensitive("fe")) 163 ProfileKind |= InstrProfKind::FE; 164 else if (Str.equals_insensitive("csir")) { 165 ProfileKind |= InstrProfKind::IR; 166 ProfileKind |= InstrProfKind::CS; 167 } else if (Str.equals_insensitive("entry_first")) 168 ProfileKind |= InstrProfKind::BB; 169 else if (Str.equals_insensitive("not_entry_first")) 170 ProfileKind &= ~InstrProfKind::BB; 171 else 172 return error(instrprof_error::bad_header); 173 ++Line; 174 } 175 return success(); 176 } 177 178 Error 179 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 180 181 #define CHECK_LINE_END(Line) \ 182 if (Line.is_at_end()) \ 183 return error(instrprof_error::truncated); 184 #define READ_NUM(Str, Dst) \ 185 if ((Str).getAsInteger(10, (Dst))) \ 186 return error(instrprof_error::malformed); 187 #define VP_READ_ADVANCE(Val) \ 188 CHECK_LINE_END(Line); \ 189 uint32_t Val; \ 190 READ_NUM((*Line), (Val)); \ 191 Line++; 192 193 if (Line.is_at_end()) 194 return success(); 195 196 uint32_t NumValueKinds; 197 if (Line->getAsInteger(10, NumValueKinds)) { 198 // No value profile data 199 return success(); 200 } 201 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 202 return error(instrprof_error::malformed, 203 "number of value kinds is invalid"); 204 Line++; 205 206 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 207 VP_READ_ADVANCE(ValueKind); 208 if (ValueKind > IPVK_Last) 209 return error(instrprof_error::malformed, "value kind is invalid"); 210 ; 211 VP_READ_ADVANCE(NumValueSites); 212 if (!NumValueSites) 213 continue; 214 215 Record.reserveSites(VK, NumValueSites); 216 for (uint32_t S = 0; S < NumValueSites; S++) { 217 VP_READ_ADVANCE(NumValueData); 218 219 std::vector<InstrProfValueData> CurrentValues; 220 for (uint32_t V = 0; V < NumValueData; V++) { 221 CHECK_LINE_END(Line); 222 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 223 uint64_t TakenCount, Value; 224 if (ValueKind == IPVK_IndirectCallTarget) { 225 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 226 Value = 0; 227 } else { 228 if (Error E = Symtab->addFuncName(VD.first)) 229 return E; 230 Value = IndexedInstrProf::ComputeHash(VD.first); 231 } 232 } else { 233 READ_NUM(VD.first, Value); 234 } 235 READ_NUM(VD.second, TakenCount); 236 CurrentValues.push_back({Value, TakenCount}); 237 Line++; 238 } 239 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 240 nullptr); 241 } 242 } 243 return success(); 244 245 #undef CHECK_LINE_END 246 #undef READ_NUM 247 #undef VP_READ_ADVANCE 248 } 249 250 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 251 // Skip empty lines and comments. 252 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 253 ++Line; 254 // If we hit EOF while looking for a name, we're done. 255 if (Line.is_at_end()) { 256 return error(instrprof_error::eof); 257 } 258 259 // Read the function name. 260 Record.Name = *Line++; 261 if (Error E = Symtab->addFuncName(Record.Name)) 262 return error(std::move(E)); 263 264 // Read the function hash. 265 if (Line.is_at_end()) 266 return error(instrprof_error::truncated); 267 if ((Line++)->getAsInteger(0, Record.Hash)) 268 return error(instrprof_error::malformed, 269 "function hash is not a valid integer"); 270 271 // Read the number of counters. 272 uint64_t NumCounters; 273 if (Line.is_at_end()) 274 return error(instrprof_error::truncated); 275 if ((Line++)->getAsInteger(10, NumCounters)) 276 return error(instrprof_error::malformed, 277 "number of counters is not a valid integer"); 278 if (NumCounters == 0) 279 return error(instrprof_error::malformed, "number of counters is zero"); 280 281 // Read each counter and fill our internal storage with the values. 282 Record.Clear(); 283 Record.Counts.reserve(NumCounters); 284 for (uint64_t I = 0; I < NumCounters; ++I) { 285 if (Line.is_at_end()) 286 return error(instrprof_error::truncated); 287 uint64_t Count; 288 if ((Line++)->getAsInteger(10, Count)) 289 return error(instrprof_error::malformed, "count is invalid"); 290 Record.Counts.push_back(Count); 291 } 292 293 // Check if value profile data exists and read it if so. 294 if (Error E = readValueProfileData(Record)) 295 return error(std::move(E)); 296 297 return success(); 298 } 299 300 template <class IntPtrT> 301 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 302 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 303 return false; 304 uint64_t Magic = 305 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 306 return RawInstrProf::getMagic<IntPtrT>() == Magic || 307 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 308 } 309 310 template <class IntPtrT> 311 Error RawInstrProfReader<IntPtrT>::readHeader() { 312 if (!hasFormat(*DataBuffer)) 313 return error(instrprof_error::bad_magic); 314 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 315 return error(instrprof_error::bad_header); 316 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 317 DataBuffer->getBufferStart()); 318 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 319 return readHeader(*Header); 320 } 321 322 template <class IntPtrT> 323 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 324 const char *End = DataBuffer->getBufferEnd(); 325 // Skip zero padding between profiles. 326 while (CurrentPos != End && *CurrentPos == 0) 327 ++CurrentPos; 328 // If there's nothing left, we're done. 329 if (CurrentPos == End) 330 return make_error<InstrProfError>(instrprof_error::eof); 331 // If there isn't enough space for another header, this is probably just 332 // garbage at the end of the file. 333 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 334 return make_error<InstrProfError>(instrprof_error::malformed, 335 "not enough space for another header"); 336 // The writer ensures each profile is padded to start at an aligned address. 337 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 338 return make_error<InstrProfError>(instrprof_error::malformed, 339 "insufficient padding"); 340 // The magic should have the same byte order as in the previous header. 341 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 342 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 343 return make_error<InstrProfError>(instrprof_error::bad_magic); 344 345 // There's another profile to read, so we need to process the header. 346 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 347 return readHeader(*Header); 348 } 349 350 template <class IntPtrT> 351 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 352 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 353 return error(std::move(E)); 354 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 355 const IntPtrT FPtr = swap(I->FunctionPointer); 356 if (!FPtr) 357 continue; 358 Symtab.mapAddress(FPtr, I->NameRef); 359 } 360 return success(); 361 } 362 363 template <class IntPtrT> 364 Error RawInstrProfReader<IntPtrT>::readHeader( 365 const RawInstrProf::Header &Header) { 366 Version = swap(Header.Version); 367 if (GET_VERSION(Version) != RawInstrProf::Version) 368 return error(instrprof_error::unsupported_version); 369 if (useDebugInfoCorrelate() && !Correlator) 370 return error(instrprof_error::missing_debug_info_for_correlation); 371 if (!useDebugInfoCorrelate() && Correlator) 372 return error(instrprof_error::unexpected_debug_info_for_correlation); 373 374 BinaryIdsSize = swap(Header.BinaryIdsSize); 375 if (BinaryIdsSize % sizeof(uint64_t)) 376 return error(instrprof_error::bad_header); 377 378 CountersDelta = swap(Header.CountersDelta); 379 NamesDelta = swap(Header.NamesDelta); 380 auto NumData = swap(Header.DataSize); 381 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 382 auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); 383 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 384 auto NamesSize = swap(Header.NamesSize); 385 ValueKindLast = swap(Header.ValueKindLast); 386 387 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 388 auto PaddingSize = getNumPaddingBytes(NamesSize); 389 390 // Profile data starts after profile header and binary ids if exist. 391 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 392 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 393 ptrdiff_t NamesOffset = 394 CountersOffset + CountersSize + PaddingBytesAfterCounters; 395 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 396 397 auto *Start = reinterpret_cast<const char *>(&Header); 398 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 399 return error(instrprof_error::bad_header); 400 401 if (Correlator) { 402 // These sizes in the raw file are zero because we constructed them in the 403 // Correlator. 404 assert(DataSize == 0 && NamesSize == 0); 405 assert(CountersDelta == 0 && NamesDelta == 0); 406 Data = Correlator->getDataPointer(); 407 DataEnd = Data + Correlator->getDataSize(); 408 NamesStart = Correlator->getNamesPointer(); 409 NamesEnd = NamesStart + Correlator->getNamesSize(); 410 } else { 411 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 412 Start + DataOffset); 413 DataEnd = Data + NumData; 414 NamesStart = Start + NamesOffset; 415 NamesEnd = NamesStart + NamesSize; 416 } 417 418 // Binary ids start just after the header. 419 BinaryIdsStart = 420 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 421 CountersStart = Start + CountersOffset; 422 CountersEnd = CountersStart + CountersSize; 423 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 424 425 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 426 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 427 return error(instrprof_error::bad_header); 428 429 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 430 if (Error E = createSymtab(*NewSymtab.get())) 431 return E; 432 433 Symtab = std::move(NewSymtab); 434 return success(); 435 } 436 437 template <class IntPtrT> 438 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 439 Record.Name = getName(Data->NameRef); 440 return success(); 441 } 442 443 template <class IntPtrT> 444 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 445 Record.Hash = swap(Data->FuncHash); 446 return success(); 447 } 448 449 template <class IntPtrT> 450 Error RawInstrProfReader<IntPtrT>::readRawCounts( 451 InstrProfRecord &Record) { 452 uint32_t NumCounters = swap(Data->NumCounters); 453 if (NumCounters == 0) 454 return error(instrprof_error::malformed, "number of counters is zero"); 455 456 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 457 if (CounterBaseOffset < 0) 458 return error( 459 instrprof_error::malformed, 460 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 461 462 if (CounterBaseOffset >= CountersEnd - CountersStart) 463 return error(instrprof_error::malformed, 464 ("counter offset " + Twine(CounterBaseOffset) + 465 " is greater than the maximum counter offset " + 466 Twine(CountersEnd - CountersStart - 1)) 467 .str()); 468 469 uint64_t MaxNumCounters = 470 (CountersEnd - (CountersStart + CounterBaseOffset)) / 471 getCounterTypeSize(); 472 if (NumCounters > MaxNumCounters) 473 return error(instrprof_error::malformed, 474 ("number of counters " + Twine(NumCounters) + 475 " is greater than the maximum number of counters " + 476 Twine(MaxNumCounters)) 477 .str()); 478 479 Record.Counts.clear(); 480 Record.Counts.reserve(NumCounters); 481 for (uint32_t I = 0; I < NumCounters; I++) { 482 const auto *CounterValue = reinterpret_cast<const uint64_t *>( 483 CountersStart + CounterBaseOffset + I * getCounterTypeSize()); 484 Record.Counts.push_back(swap(*CounterValue)); 485 } 486 487 return success(); 488 } 489 490 template <class IntPtrT> 491 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 492 InstrProfRecord &Record) { 493 Record.clearValueData(); 494 CurValueDataSize = 0; 495 // Need to match the logic in value profile dumper code in compiler-rt: 496 uint32_t NumValueKinds = 0; 497 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 498 NumValueKinds += (Data->NumValueSites[I] != 0); 499 500 if (!NumValueKinds) 501 return success(); 502 503 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 504 ValueProfData::getValueProfData( 505 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 506 getDataEndianness()); 507 508 if (Error E = VDataPtrOrErr.takeError()) 509 return E; 510 511 // Note that besides deserialization, this also performs the conversion for 512 // indirect call targets. The function pointers from the raw profile are 513 // remapped into function name hashes. 514 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 515 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 516 return success(); 517 } 518 519 template <class IntPtrT> 520 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 521 if (atEnd()) 522 // At this point, ValueDataStart field points to the next header. 523 if (Error E = readNextHeader(getNextHeaderPos())) 524 return error(std::move(E)); 525 526 // Read name ad set it in Record. 527 if (Error E = readName(Record)) 528 return error(std::move(E)); 529 530 // Read FuncHash and set it in Record. 531 if (Error E = readFuncHash(Record)) 532 return error(std::move(E)); 533 534 // Read raw counts and set Record. 535 if (Error E = readRawCounts(Record)) 536 return error(std::move(E)); 537 538 // Read value data and set Record. 539 if (Error E = readValueProfilingData(Record)) 540 return error(std::move(E)); 541 542 // Iterate. 543 advanceData(); 544 return success(); 545 } 546 547 static size_t RoundUp(size_t size, size_t align) { 548 return (size + align - 1) & ~(align - 1); 549 } 550 551 template <class IntPtrT> 552 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 553 if (BinaryIdsSize == 0) 554 return success(); 555 556 OS << "Binary IDs: \n"; 557 const uint8_t *BI = BinaryIdsStart; 558 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 559 while (BI < BIEnd) { 560 size_t Remaining = BIEnd - BI; 561 562 // There should be enough left to read the binary ID size field. 563 if (Remaining < sizeof(uint64_t)) 564 return make_error<InstrProfError>( 565 instrprof_error::malformed, 566 "not enough data to read binary id length"); 567 568 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 569 570 // There should be enough left to read the binary ID size field, and the 571 // binary ID. 572 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 573 return make_error<InstrProfError>( 574 instrprof_error::malformed, "not enough data to read binary id data"); 575 576 // Increment by binary id length data type size. 577 BI += sizeof(BinaryIdLen); 578 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 579 return make_error<InstrProfError>( 580 instrprof_error::malformed, 581 "binary id that is read is bigger than buffer size"); 582 583 for (uint64_t I = 0; I < BinaryIdLen; I++) 584 OS << format("%02x", BI[I]); 585 OS << "\n"; 586 587 // Increment by binary id data length, rounded to the next 8 bytes. This 588 // accounts for the zero-padding after each build ID. 589 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 590 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 591 return make_error<InstrProfError>(instrprof_error::malformed); 592 } 593 594 return success(); 595 } 596 597 namespace llvm { 598 599 template class RawInstrProfReader<uint32_t>; 600 template class RawInstrProfReader<uint64_t>; 601 602 } // end namespace llvm 603 604 InstrProfLookupTrait::hash_value_type 605 InstrProfLookupTrait::ComputeHash(StringRef K) { 606 return IndexedInstrProf::ComputeHash(HashType, K); 607 } 608 609 using data_type = InstrProfLookupTrait::data_type; 610 using offset_type = InstrProfLookupTrait::offset_type; 611 612 bool InstrProfLookupTrait::readValueProfilingData( 613 const unsigned char *&D, const unsigned char *const End) { 614 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 615 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 616 617 if (VDataPtrOrErr.takeError()) 618 return false; 619 620 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 621 D += VDataPtrOrErr.get()->TotalSize; 622 623 return true; 624 } 625 626 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 627 offset_type N) { 628 using namespace support; 629 630 // Check if the data is corrupt. If so, don't try to read it. 631 if (N % sizeof(uint64_t)) 632 return data_type(); 633 634 DataBuffer.clear(); 635 std::vector<uint64_t> CounterBuffer; 636 637 const unsigned char *End = D + N; 638 while (D < End) { 639 // Read hash. 640 if (D + sizeof(uint64_t) >= End) 641 return data_type(); 642 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 643 644 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 645 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 646 // If format version is different then read the number of counters. 647 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 648 if (D + sizeof(uint64_t) > End) 649 return data_type(); 650 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 651 } 652 // Read counter values. 653 if (D + CountsSize * sizeof(uint64_t) > End) 654 return data_type(); 655 656 CounterBuffer.clear(); 657 CounterBuffer.reserve(CountsSize); 658 for (uint64_t J = 0; J < CountsSize; ++J) 659 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 660 661 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 662 663 // Read value profiling data. 664 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 665 !readValueProfilingData(D, End)) { 666 DataBuffer.clear(); 667 return data_type(); 668 } 669 } 670 return DataBuffer; 671 } 672 673 template <typename HashTableImpl> 674 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 675 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 676 auto Iter = HashTable->find(FuncName); 677 if (Iter == HashTable->end()) 678 return make_error<InstrProfError>(instrprof_error::unknown_function); 679 680 Data = (*Iter); 681 if (Data.empty()) 682 return make_error<InstrProfError>(instrprof_error::malformed, 683 "profile data is empty"); 684 685 return Error::success(); 686 } 687 688 template <typename HashTableImpl> 689 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 690 ArrayRef<NamedInstrProfRecord> &Data) { 691 if (atEnd()) 692 return make_error<InstrProfError>(instrprof_error::eof); 693 694 Data = *RecordIterator; 695 696 if (Data.empty()) 697 return make_error<InstrProfError>(instrprof_error::malformed, 698 "profile data is empty"); 699 700 return Error::success(); 701 } 702 703 template <typename HashTableImpl> 704 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 705 const unsigned char *Buckets, const unsigned char *const Payload, 706 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 707 uint64_t Version) { 708 FormatVersion = Version; 709 HashTable.reset(HashTableImpl::Create( 710 Buckets, Payload, Base, 711 typename HashTableImpl::InfoType(HashType, Version))); 712 RecordIterator = HashTable->data_begin(); 713 } 714 715 namespace { 716 /// A remapper that does not apply any remappings. 717 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 718 InstrProfReaderIndexBase &Underlying; 719 720 public: 721 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 722 : Underlying(Underlying) {} 723 724 Error getRecords(StringRef FuncName, 725 ArrayRef<NamedInstrProfRecord> &Data) override { 726 return Underlying.getRecords(FuncName, Data); 727 } 728 }; 729 } // namespace 730 731 /// A remapper that applies remappings based on a symbol remapping file. 732 template <typename HashTableImpl> 733 class llvm::InstrProfReaderItaniumRemapper 734 : public InstrProfReaderRemapper { 735 public: 736 InstrProfReaderItaniumRemapper( 737 std::unique_ptr<MemoryBuffer> RemapBuffer, 738 InstrProfReaderIndex<HashTableImpl> &Underlying) 739 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 740 } 741 742 /// Extract the original function name from a PGO function name. 743 static StringRef extractName(StringRef Name) { 744 // We can have multiple :-separated pieces; there can be pieces both 745 // before and after the mangled name. Find the first part that starts 746 // with '_Z'; we'll assume that's the mangled name we want. 747 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 748 while (true) { 749 Parts = Parts.second.split(':'); 750 if (Parts.first.startswith("_Z")) 751 return Parts.first; 752 if (Parts.second.empty()) 753 return Name; 754 } 755 } 756 757 /// Given a mangled name extracted from a PGO function name, and a new 758 /// form for that mangled name, reconstitute the name. 759 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 760 StringRef Replacement, 761 SmallVectorImpl<char> &Out) { 762 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 763 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 764 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 765 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 766 } 767 768 Error populateRemappings() override { 769 if (Error E = Remappings.read(*RemapBuffer)) 770 return E; 771 for (StringRef Name : Underlying.HashTable->keys()) { 772 StringRef RealName = extractName(Name); 773 if (auto Key = Remappings.insert(RealName)) { 774 // FIXME: We could theoretically map the same equivalence class to 775 // multiple names in the profile data. If that happens, we should 776 // return NamedInstrProfRecords from all of them. 777 MappedNames.insert({Key, RealName}); 778 } 779 } 780 return Error::success(); 781 } 782 783 Error getRecords(StringRef FuncName, 784 ArrayRef<NamedInstrProfRecord> &Data) override { 785 StringRef RealName = extractName(FuncName); 786 if (auto Key = Remappings.lookup(RealName)) { 787 StringRef Remapped = MappedNames.lookup(Key); 788 if (!Remapped.empty()) { 789 if (RealName.begin() == FuncName.begin() && 790 RealName.end() == FuncName.end()) 791 FuncName = Remapped; 792 else { 793 // Try rebuilding the name from the given remapping. 794 SmallString<256> Reconstituted; 795 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 796 Error E = Underlying.getRecords(Reconstituted, Data); 797 if (!E) 798 return E; 799 800 // If we failed because the name doesn't exist, fall back to asking 801 // about the original name. 802 if (Error Unhandled = handleErrors( 803 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 804 return Err->get() == instrprof_error::unknown_function 805 ? Error::success() 806 : Error(std::move(Err)); 807 })) 808 return Unhandled; 809 } 810 } 811 } 812 return Underlying.getRecords(FuncName, Data); 813 } 814 815 private: 816 /// The memory buffer containing the remapping configuration. Remappings 817 /// holds pointers into this buffer. 818 std::unique_ptr<MemoryBuffer> RemapBuffer; 819 820 /// The mangling remapper. 821 SymbolRemappingReader Remappings; 822 823 /// Mapping from mangled name keys to the name used for the key in the 824 /// profile data. 825 /// FIXME: Can we store a location within the on-disk hash table instead of 826 /// redoing lookup? 827 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 828 829 /// The real profile data reader. 830 InstrProfReaderIndex<HashTableImpl> &Underlying; 831 }; 832 833 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 834 using namespace support; 835 836 if (DataBuffer.getBufferSize() < 8) 837 return false; 838 uint64_t Magic = 839 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 840 // Verify that it's magical. 841 return Magic == IndexedInstrProf::Magic; 842 } 843 844 const unsigned char * 845 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 846 const unsigned char *Cur, bool UseCS) { 847 using namespace IndexedInstrProf; 848 using namespace support; 849 850 if (Version >= IndexedInstrProf::Version4) { 851 const IndexedInstrProf::Summary *SummaryInLE = 852 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 853 uint64_t NFields = 854 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 855 uint64_t NEntries = 856 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 857 uint32_t SummarySize = 858 IndexedInstrProf::Summary::getSize(NFields, NEntries); 859 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 860 IndexedInstrProf::allocSummary(SummarySize); 861 862 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 863 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 864 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 865 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 866 867 SummaryEntryVector DetailedSummary; 868 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 869 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 870 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 871 Ent.NumBlocks); 872 } 873 std::unique_ptr<llvm::ProfileSummary> &Summary = 874 UseCS ? this->CS_Summary : this->Summary; 875 876 // initialize InstrProfSummary using the SummaryData from disk. 877 Summary = std::make_unique<ProfileSummary>( 878 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 879 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 880 SummaryData->get(Summary::MaxBlockCount), 881 SummaryData->get(Summary::MaxInternalBlockCount), 882 SummaryData->get(Summary::MaxFunctionCount), 883 SummaryData->get(Summary::TotalNumBlocks), 884 SummaryData->get(Summary::TotalNumFunctions)); 885 return Cur + SummarySize; 886 } else { 887 // The older versions do not support a profile summary. This just computes 888 // an empty summary, which will not result in accurate hot/cold detection. 889 // We would need to call addRecord for all NamedInstrProfRecords to get the 890 // correct summary. However, this version is old (prior to early 2016) and 891 // has not been supporting an accurate summary for several years. 892 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 893 Summary = Builder.getSummary(); 894 return Cur; 895 } 896 } 897 898 Error IndexedInstrProfReader::readHeader() { 899 using namespace support; 900 901 const unsigned char *Start = 902 (const unsigned char *)DataBuffer->getBufferStart(); 903 const unsigned char *Cur = Start; 904 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 905 return error(instrprof_error::truncated); 906 907 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 908 Cur += sizeof(IndexedInstrProf::Header); 909 910 // Check the magic number. 911 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 912 if (Magic != IndexedInstrProf::Magic) 913 return error(instrprof_error::bad_magic); 914 915 // Read the version. 916 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 917 if (GET_VERSION(FormatVersion) > 918 IndexedInstrProf::ProfVersion::CurrentVersion) 919 return error(instrprof_error::unsupported_version); 920 921 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 922 /* UseCS */ false); 923 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 924 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 925 /* UseCS */ true); 926 927 // Read the hash type and start offset. 928 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 929 endian::byte_swap<uint64_t, little>(Header->HashType)); 930 if (HashType > IndexedInstrProf::HashT::Last) 931 return error(instrprof_error::unsupported_hash_type); 932 933 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 934 935 // The rest of the file is an on disk hash table. 936 auto IndexPtr = 937 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 938 Start + HashOffset, Cur, Start, HashType, FormatVersion); 939 940 // Load the remapping table now if requested. 941 if (RemappingBuffer) { 942 Remapper = std::make_unique< 943 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 944 std::move(RemappingBuffer), *IndexPtr); 945 if (Error E = Remapper->populateRemappings()) 946 return E; 947 } else { 948 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 949 } 950 Index = std::move(IndexPtr); 951 952 return success(); 953 } 954 955 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 956 if (Symtab.get()) 957 return *Symtab.get(); 958 959 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 960 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 961 consumeError(error(InstrProfError::take(std::move(E)))); 962 } 963 964 Symtab = std::move(NewSymtab); 965 return *Symtab.get(); 966 } 967 968 Expected<InstrProfRecord> 969 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 970 uint64_t FuncHash) { 971 ArrayRef<NamedInstrProfRecord> Data; 972 Error Err = Remapper->getRecords(FuncName, Data); 973 if (Err) 974 return std::move(Err); 975 // Found it. Look for counters with the right hash. 976 for (const NamedInstrProfRecord &I : Data) { 977 // Check for a match and fill the vector if there is one. 978 if (I.Hash == FuncHash) 979 return std::move(I); 980 } 981 return error(instrprof_error::hash_mismatch); 982 } 983 984 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 985 uint64_t FuncHash, 986 std::vector<uint64_t> &Counts) { 987 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 988 if (Error E = Record.takeError()) 989 return error(std::move(E)); 990 991 Counts = Record.get().Counts; 992 return success(); 993 } 994 995 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 996 ArrayRef<NamedInstrProfRecord> Data; 997 998 Error E = Index->getRecords(Data); 999 if (E) 1000 return error(std::move(E)); 1001 1002 Record = Data[RecordIndex++]; 1003 if (RecordIndex >= Data.size()) { 1004 Index->advanceToNextKey(); 1005 RecordIndex = 0; 1006 } 1007 return success(); 1008 } 1009 1010 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1011 uint64_t NumFuncs = 0; 1012 for (const auto &Func : *this) { 1013 if (isIRLevelProfile()) { 1014 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1015 if (FuncIsCS != IsCS) 1016 continue; 1017 } 1018 Func.accumulateCounts(Sum); 1019 ++NumFuncs; 1020 } 1021 Sum.NumEntries = NumFuncs; 1022 } 1023