1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 // Extracts the variant information from the top 8 bits in the version and 42 // returns an enum specifying the variants present. 43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 44 InstrProfKind ProfileKind = InstrProfKind::Unknown; 45 if (Version & VARIANT_MASK_IR_PROF) { 46 ProfileKind |= InstrProfKind::IR; 47 } 48 if (Version & VARIANT_MASK_CSIR_PROF) { 49 ProfileKind |= InstrProfKind::CS; 50 } 51 if (Version & VARIANT_MASK_INSTR_ENTRY) { 52 ProfileKind |= InstrProfKind::BB; 53 } 54 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 55 ProfileKind |= InstrProfKind::SingleByteCoverage; 56 } 57 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 58 ProfileKind |= InstrProfKind::FunctionEntryOnly; 59 } 60 return ProfileKind; 61 } 62 63 static Expected<std::unique_ptr<MemoryBuffer>> 64 setupMemoryBuffer(const Twine &Path) { 65 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 66 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 67 if (std::error_code EC = BufferOrErr.getError()) 68 return errorCodeToError(EC); 69 return std::move(BufferOrErr.get()); 70 } 71 72 static Error initializeReader(InstrProfReader &Reader) { 73 return Reader.readHeader(); 74 } 75 76 Expected<std::unique_ptr<InstrProfReader>> 77 InstrProfReader::create(const Twine &Path, 78 const InstrProfCorrelator *Correlator) { 79 // Set up the buffer to read. 80 auto BufferOrError = setupMemoryBuffer(Path); 81 if (Error E = BufferOrError.takeError()) 82 return std::move(E); 83 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 84 } 85 86 Expected<std::unique_ptr<InstrProfReader>> 87 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 88 const InstrProfCorrelator *Correlator) { 89 // Sanity check the buffer. 90 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 91 return make_error<InstrProfError>(instrprof_error::too_large); 92 93 if (Buffer->getBufferSize() == 0) 94 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 95 96 std::unique_ptr<InstrProfReader> Result; 97 // Create the reader. 98 if (IndexedInstrProfReader::hasFormat(*Buffer)) 99 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 100 else if (RawInstrProfReader64::hasFormat(*Buffer)) 101 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 102 else if (RawInstrProfReader32::hasFormat(*Buffer)) 103 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 104 else if (TextInstrProfReader::hasFormat(*Buffer)) 105 Result.reset(new TextInstrProfReader(std::move(Buffer))); 106 else 107 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 108 109 // Initialize the reader and return the result. 110 if (Error E = initializeReader(*Result)) 111 return std::move(E); 112 113 return std::move(Result); 114 } 115 116 Expected<std::unique_ptr<IndexedInstrProfReader>> 117 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 118 // Set up the buffer to read. 119 auto BufferOrError = setupMemoryBuffer(Path); 120 if (Error E = BufferOrError.takeError()) 121 return std::move(E); 122 123 // Set up the remapping buffer if requested. 124 std::unique_ptr<MemoryBuffer> RemappingBuffer; 125 std::string RemappingPathStr = RemappingPath.str(); 126 if (!RemappingPathStr.empty()) { 127 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 128 if (Error E = RemappingBufferOrError.takeError()) 129 return std::move(E); 130 RemappingBuffer = std::move(RemappingBufferOrError.get()); 131 } 132 133 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 134 std::move(RemappingBuffer)); 135 } 136 137 Expected<std::unique_ptr<IndexedInstrProfReader>> 138 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 139 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 140 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 141 return make_error<InstrProfError>(instrprof_error::too_large); 142 143 // Create the reader. 144 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 145 return make_error<InstrProfError>(instrprof_error::bad_magic); 146 auto Result = std::make_unique<IndexedInstrProfReader>( 147 std::move(Buffer), std::move(RemappingBuffer)); 148 149 // Initialize the reader and return the result. 150 if (Error E = initializeReader(*Result)) 151 return std::move(E); 152 153 return std::move(Result); 154 } 155 156 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 157 // Verify that this really looks like plain ASCII text by checking a 158 // 'reasonable' number of characters (up to profile magic size). 159 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 160 StringRef buffer = Buffer.getBufferStart(); 161 return count == 0 || 162 std::all_of(buffer.begin(), buffer.begin() + count, 163 [](char c) { return isPrint(c) || isSpace(c); }); 164 } 165 166 // Read the profile variant flag from the header: ":FE" means this is a FE 167 // generated profile. ":IR" means this is an IR level profile. Other strings 168 // with a leading ':' will be reported an error format. 169 Error TextInstrProfReader::readHeader() { 170 Symtab.reset(new InstrProfSymtab()); 171 172 while (Line->startswith(":")) { 173 StringRef Str = Line->substr(1); 174 if (Str.equals_insensitive("ir")) 175 ProfileKind |= InstrProfKind::IR; 176 else if (Str.equals_insensitive("fe")) 177 ProfileKind |= InstrProfKind::FE; 178 else if (Str.equals_insensitive("csir")) { 179 ProfileKind |= InstrProfKind::IR; 180 ProfileKind |= InstrProfKind::CS; 181 } else if (Str.equals_insensitive("entry_first")) 182 ProfileKind |= InstrProfKind::BB; 183 else if (Str.equals_insensitive("not_entry_first")) 184 ProfileKind &= ~InstrProfKind::BB; 185 else 186 return error(instrprof_error::bad_header); 187 ++Line; 188 } 189 return success(); 190 } 191 192 Error 193 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 194 195 #define CHECK_LINE_END(Line) \ 196 if (Line.is_at_end()) \ 197 return error(instrprof_error::truncated); 198 #define READ_NUM(Str, Dst) \ 199 if ((Str).getAsInteger(10, (Dst))) \ 200 return error(instrprof_error::malformed); 201 #define VP_READ_ADVANCE(Val) \ 202 CHECK_LINE_END(Line); \ 203 uint32_t Val; \ 204 READ_NUM((*Line), (Val)); \ 205 Line++; 206 207 if (Line.is_at_end()) 208 return success(); 209 210 uint32_t NumValueKinds; 211 if (Line->getAsInteger(10, NumValueKinds)) { 212 // No value profile data 213 return success(); 214 } 215 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 216 return error(instrprof_error::malformed, 217 "number of value kinds is invalid"); 218 Line++; 219 220 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 221 VP_READ_ADVANCE(ValueKind); 222 if (ValueKind > IPVK_Last) 223 return error(instrprof_error::malformed, "value kind is invalid"); 224 ; 225 VP_READ_ADVANCE(NumValueSites); 226 if (!NumValueSites) 227 continue; 228 229 Record.reserveSites(VK, NumValueSites); 230 for (uint32_t S = 0; S < NumValueSites; S++) { 231 VP_READ_ADVANCE(NumValueData); 232 233 std::vector<InstrProfValueData> CurrentValues; 234 for (uint32_t V = 0; V < NumValueData; V++) { 235 CHECK_LINE_END(Line); 236 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 237 uint64_t TakenCount, Value; 238 if (ValueKind == IPVK_IndirectCallTarget) { 239 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 240 Value = 0; 241 } else { 242 if (Error E = Symtab->addFuncName(VD.first)) 243 return E; 244 Value = IndexedInstrProf::ComputeHash(VD.first); 245 } 246 } else { 247 READ_NUM(VD.first, Value); 248 } 249 READ_NUM(VD.second, TakenCount); 250 CurrentValues.push_back({Value, TakenCount}); 251 Line++; 252 } 253 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 254 nullptr); 255 } 256 } 257 return success(); 258 259 #undef CHECK_LINE_END 260 #undef READ_NUM 261 #undef VP_READ_ADVANCE 262 } 263 264 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 265 // Skip empty lines and comments. 266 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 267 ++Line; 268 // If we hit EOF while looking for a name, we're done. 269 if (Line.is_at_end()) { 270 return error(instrprof_error::eof); 271 } 272 273 // Read the function name. 274 Record.Name = *Line++; 275 if (Error E = Symtab->addFuncName(Record.Name)) 276 return error(std::move(E)); 277 278 // Read the function hash. 279 if (Line.is_at_end()) 280 return error(instrprof_error::truncated); 281 if ((Line++)->getAsInteger(0, Record.Hash)) 282 return error(instrprof_error::malformed, 283 "function hash is not a valid integer"); 284 285 // Read the number of counters. 286 uint64_t NumCounters; 287 if (Line.is_at_end()) 288 return error(instrprof_error::truncated); 289 if ((Line++)->getAsInteger(10, NumCounters)) 290 return error(instrprof_error::malformed, 291 "number of counters is not a valid integer"); 292 if (NumCounters == 0) 293 return error(instrprof_error::malformed, "number of counters is zero"); 294 295 // Read each counter and fill our internal storage with the values. 296 Record.Clear(); 297 Record.Counts.reserve(NumCounters); 298 for (uint64_t I = 0; I < NumCounters; ++I) { 299 if (Line.is_at_end()) 300 return error(instrprof_error::truncated); 301 uint64_t Count; 302 if ((Line++)->getAsInteger(10, Count)) 303 return error(instrprof_error::malformed, "count is invalid"); 304 Record.Counts.push_back(Count); 305 } 306 307 // Check if value profile data exists and read it if so. 308 if (Error E = readValueProfileData(Record)) 309 return error(std::move(E)); 310 311 return success(); 312 } 313 314 template <class IntPtrT> 315 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 316 return getProfileKindFromVersion(Version); 317 } 318 319 template <class IntPtrT> 320 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 321 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 322 return false; 323 uint64_t Magic = 324 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 325 return RawInstrProf::getMagic<IntPtrT>() == Magic || 326 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 327 } 328 329 template <class IntPtrT> 330 Error RawInstrProfReader<IntPtrT>::readHeader() { 331 if (!hasFormat(*DataBuffer)) 332 return error(instrprof_error::bad_magic); 333 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 334 return error(instrprof_error::bad_header); 335 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 336 DataBuffer->getBufferStart()); 337 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 338 return readHeader(*Header); 339 } 340 341 template <class IntPtrT> 342 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 343 const char *End = DataBuffer->getBufferEnd(); 344 // Skip zero padding between profiles. 345 while (CurrentPos != End && *CurrentPos == 0) 346 ++CurrentPos; 347 // If there's nothing left, we're done. 348 if (CurrentPos == End) 349 return make_error<InstrProfError>(instrprof_error::eof); 350 // If there isn't enough space for another header, this is probably just 351 // garbage at the end of the file. 352 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 353 return make_error<InstrProfError>(instrprof_error::malformed, 354 "not enough space for another header"); 355 // The writer ensures each profile is padded to start at an aligned address. 356 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 357 return make_error<InstrProfError>(instrprof_error::malformed, 358 "insufficient padding"); 359 // The magic should have the same byte order as in the previous header. 360 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 361 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 362 return make_error<InstrProfError>(instrprof_error::bad_magic); 363 364 // There's another profile to read, so we need to process the header. 365 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 366 return readHeader(*Header); 367 } 368 369 template <class IntPtrT> 370 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 371 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 372 return error(std::move(E)); 373 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 374 const IntPtrT FPtr = swap(I->FunctionPointer); 375 if (!FPtr) 376 continue; 377 Symtab.mapAddress(FPtr, I->NameRef); 378 } 379 return success(); 380 } 381 382 template <class IntPtrT> 383 Error RawInstrProfReader<IntPtrT>::readHeader( 384 const RawInstrProf::Header &Header) { 385 Version = swap(Header.Version); 386 if (GET_VERSION(Version) != RawInstrProf::Version) 387 return error(instrprof_error::unsupported_version); 388 if (useDebugInfoCorrelate() && !Correlator) 389 return error(instrprof_error::missing_debug_info_for_correlation); 390 if (!useDebugInfoCorrelate() && Correlator) 391 return error(instrprof_error::unexpected_debug_info_for_correlation); 392 393 BinaryIdsSize = swap(Header.BinaryIdsSize); 394 if (BinaryIdsSize % sizeof(uint64_t)) 395 return error(instrprof_error::bad_header); 396 397 CountersDelta = swap(Header.CountersDelta); 398 NamesDelta = swap(Header.NamesDelta); 399 auto NumData = swap(Header.DataSize); 400 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 401 auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); 402 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 403 auto NamesSize = swap(Header.NamesSize); 404 ValueKindLast = swap(Header.ValueKindLast); 405 406 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 407 auto PaddingSize = getNumPaddingBytes(NamesSize); 408 409 // Profile data starts after profile header and binary ids if exist. 410 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 411 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 412 ptrdiff_t NamesOffset = 413 CountersOffset + CountersSize + PaddingBytesAfterCounters; 414 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 415 416 auto *Start = reinterpret_cast<const char *>(&Header); 417 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 418 return error(instrprof_error::bad_header); 419 420 if (Correlator) { 421 // These sizes in the raw file are zero because we constructed them in the 422 // Correlator. 423 assert(DataSize == 0 && NamesSize == 0); 424 assert(CountersDelta == 0 && NamesDelta == 0); 425 Data = Correlator->getDataPointer(); 426 DataEnd = Data + Correlator->getDataSize(); 427 NamesStart = Correlator->getNamesPointer(); 428 NamesEnd = NamesStart + Correlator->getNamesSize(); 429 } else { 430 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 431 Start + DataOffset); 432 DataEnd = Data + NumData; 433 NamesStart = Start + NamesOffset; 434 NamesEnd = NamesStart + NamesSize; 435 } 436 437 // Binary ids start just after the header. 438 BinaryIdsStart = 439 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 440 CountersStart = Start + CountersOffset; 441 CountersEnd = CountersStart + CountersSize; 442 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 443 444 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 445 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 446 return error(instrprof_error::bad_header); 447 448 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 449 if (Error E = createSymtab(*NewSymtab.get())) 450 return E; 451 452 Symtab = std::move(NewSymtab); 453 return success(); 454 } 455 456 template <class IntPtrT> 457 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 458 Record.Name = getName(Data->NameRef); 459 return success(); 460 } 461 462 template <class IntPtrT> 463 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 464 Record.Hash = swap(Data->FuncHash); 465 return success(); 466 } 467 468 template <class IntPtrT> 469 Error RawInstrProfReader<IntPtrT>::readRawCounts( 470 InstrProfRecord &Record) { 471 uint32_t NumCounters = swap(Data->NumCounters); 472 if (NumCounters == 0) 473 return error(instrprof_error::malformed, "number of counters is zero"); 474 475 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 476 if (CounterBaseOffset < 0) 477 return error( 478 instrprof_error::malformed, 479 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 480 481 if (CounterBaseOffset >= CountersEnd - CountersStart) 482 return error(instrprof_error::malformed, 483 ("counter offset " + Twine(CounterBaseOffset) + 484 " is greater than the maximum counter offset " + 485 Twine(CountersEnd - CountersStart - 1)) 486 .str()); 487 488 uint64_t MaxNumCounters = 489 (CountersEnd - (CountersStart + CounterBaseOffset)) / 490 getCounterTypeSize(); 491 if (NumCounters > MaxNumCounters) 492 return error(instrprof_error::malformed, 493 ("number of counters " + Twine(NumCounters) + 494 " is greater than the maximum number of counters " + 495 Twine(MaxNumCounters)) 496 .str()); 497 498 Record.Counts.clear(); 499 Record.Counts.reserve(NumCounters); 500 for (uint32_t I = 0; I < NumCounters; I++) { 501 const char *Ptr = 502 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 503 if (hasSingleByteCoverage()) { 504 // A value of zero signifies the block is covered. 505 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 506 } else { 507 const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr); 508 Record.Counts.push_back(swap(*CounterValue)); 509 } 510 } 511 512 return success(); 513 } 514 515 template <class IntPtrT> 516 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 517 InstrProfRecord &Record) { 518 Record.clearValueData(); 519 CurValueDataSize = 0; 520 // Need to match the logic in value profile dumper code in compiler-rt: 521 uint32_t NumValueKinds = 0; 522 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 523 NumValueKinds += (Data->NumValueSites[I] != 0); 524 525 if (!NumValueKinds) 526 return success(); 527 528 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 529 ValueProfData::getValueProfData( 530 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 531 getDataEndianness()); 532 533 if (Error E = VDataPtrOrErr.takeError()) 534 return E; 535 536 // Note that besides deserialization, this also performs the conversion for 537 // indirect call targets. The function pointers from the raw profile are 538 // remapped into function name hashes. 539 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 540 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 541 return success(); 542 } 543 544 template <class IntPtrT> 545 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 546 if (atEnd()) 547 // At this point, ValueDataStart field points to the next header. 548 if (Error E = readNextHeader(getNextHeaderPos())) 549 return error(std::move(E)); 550 551 // Read name ad set it in Record. 552 if (Error E = readName(Record)) 553 return error(std::move(E)); 554 555 // Read FuncHash and set it in Record. 556 if (Error E = readFuncHash(Record)) 557 return error(std::move(E)); 558 559 // Read raw counts and set Record. 560 if (Error E = readRawCounts(Record)) 561 return error(std::move(E)); 562 563 // Read value data and set Record. 564 if (Error E = readValueProfilingData(Record)) 565 return error(std::move(E)); 566 567 // Iterate. 568 advanceData(); 569 return success(); 570 } 571 572 static size_t RoundUp(size_t size, size_t align) { 573 return (size + align - 1) & ~(align - 1); 574 } 575 576 template <class IntPtrT> 577 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 578 if (BinaryIdsSize == 0) 579 return success(); 580 581 OS << "Binary IDs: \n"; 582 const uint8_t *BI = BinaryIdsStart; 583 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 584 while (BI < BIEnd) { 585 size_t Remaining = BIEnd - BI; 586 587 // There should be enough left to read the binary ID size field. 588 if (Remaining < sizeof(uint64_t)) 589 return make_error<InstrProfError>( 590 instrprof_error::malformed, 591 "not enough data to read binary id length"); 592 593 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 594 595 // There should be enough left to read the binary ID size field, and the 596 // binary ID. 597 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 598 return make_error<InstrProfError>( 599 instrprof_error::malformed, "not enough data to read binary id data"); 600 601 // Increment by binary id length data type size. 602 BI += sizeof(BinaryIdLen); 603 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 604 return make_error<InstrProfError>( 605 instrprof_error::malformed, 606 "binary id that is read is bigger than buffer size"); 607 608 for (uint64_t I = 0; I < BinaryIdLen; I++) 609 OS << format("%02x", BI[I]); 610 OS << "\n"; 611 612 // Increment by binary id data length, rounded to the next 8 bytes. This 613 // accounts for the zero-padding after each build ID. 614 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 615 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 616 return make_error<InstrProfError>(instrprof_error::malformed); 617 } 618 619 return success(); 620 } 621 622 namespace llvm { 623 624 template class RawInstrProfReader<uint32_t>; 625 template class RawInstrProfReader<uint64_t>; 626 627 } // end namespace llvm 628 629 InstrProfLookupTrait::hash_value_type 630 InstrProfLookupTrait::ComputeHash(StringRef K) { 631 return IndexedInstrProf::ComputeHash(HashType, K); 632 } 633 634 using data_type = InstrProfLookupTrait::data_type; 635 using offset_type = InstrProfLookupTrait::offset_type; 636 637 bool InstrProfLookupTrait::readValueProfilingData( 638 const unsigned char *&D, const unsigned char *const End) { 639 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 640 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 641 642 if (VDataPtrOrErr.takeError()) 643 return false; 644 645 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 646 D += VDataPtrOrErr.get()->TotalSize; 647 648 return true; 649 } 650 651 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 652 offset_type N) { 653 using namespace support; 654 655 // Check if the data is corrupt. If so, don't try to read it. 656 if (N % sizeof(uint64_t)) 657 return data_type(); 658 659 DataBuffer.clear(); 660 std::vector<uint64_t> CounterBuffer; 661 662 const unsigned char *End = D + N; 663 while (D < End) { 664 // Read hash. 665 if (D + sizeof(uint64_t) >= End) 666 return data_type(); 667 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 668 669 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 670 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 671 // If format version is different then read the number of counters. 672 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 673 if (D + sizeof(uint64_t) > End) 674 return data_type(); 675 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 676 } 677 // Read counter values. 678 if (D + CountsSize * sizeof(uint64_t) > End) 679 return data_type(); 680 681 CounterBuffer.clear(); 682 CounterBuffer.reserve(CountsSize); 683 for (uint64_t J = 0; J < CountsSize; ++J) 684 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 685 686 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 687 688 // Read value profiling data. 689 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 690 !readValueProfilingData(D, End)) { 691 DataBuffer.clear(); 692 return data_type(); 693 } 694 } 695 return DataBuffer; 696 } 697 698 template <typename HashTableImpl> 699 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 700 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 701 auto Iter = HashTable->find(FuncName); 702 if (Iter == HashTable->end()) 703 return make_error<InstrProfError>(instrprof_error::unknown_function); 704 705 Data = (*Iter); 706 if (Data.empty()) 707 return make_error<InstrProfError>(instrprof_error::malformed, 708 "profile data is empty"); 709 710 return Error::success(); 711 } 712 713 template <typename HashTableImpl> 714 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 715 ArrayRef<NamedInstrProfRecord> &Data) { 716 if (atEnd()) 717 return make_error<InstrProfError>(instrprof_error::eof); 718 719 Data = *RecordIterator; 720 721 if (Data.empty()) 722 return make_error<InstrProfError>(instrprof_error::malformed, 723 "profile data is empty"); 724 725 return Error::success(); 726 } 727 728 template <typename HashTableImpl> 729 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 730 const unsigned char *Buckets, const unsigned char *const Payload, 731 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 732 uint64_t Version) { 733 FormatVersion = Version; 734 HashTable.reset(HashTableImpl::Create( 735 Buckets, Payload, Base, 736 typename HashTableImpl::InfoType(HashType, Version))); 737 RecordIterator = HashTable->data_begin(); 738 } 739 740 template <typename HashTableImpl> 741 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 742 return getProfileKindFromVersion(FormatVersion); 743 } 744 745 namespace { 746 /// A remapper that does not apply any remappings. 747 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 748 InstrProfReaderIndexBase &Underlying; 749 750 public: 751 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 752 : Underlying(Underlying) {} 753 754 Error getRecords(StringRef FuncName, 755 ArrayRef<NamedInstrProfRecord> &Data) override { 756 return Underlying.getRecords(FuncName, Data); 757 } 758 }; 759 } // namespace 760 761 /// A remapper that applies remappings based on a symbol remapping file. 762 template <typename HashTableImpl> 763 class llvm::InstrProfReaderItaniumRemapper 764 : public InstrProfReaderRemapper { 765 public: 766 InstrProfReaderItaniumRemapper( 767 std::unique_ptr<MemoryBuffer> RemapBuffer, 768 InstrProfReaderIndex<HashTableImpl> &Underlying) 769 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 770 } 771 772 /// Extract the original function name from a PGO function name. 773 static StringRef extractName(StringRef Name) { 774 // We can have multiple :-separated pieces; there can be pieces both 775 // before and after the mangled name. Find the first part that starts 776 // with '_Z'; we'll assume that's the mangled name we want. 777 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 778 while (true) { 779 Parts = Parts.second.split(':'); 780 if (Parts.first.startswith("_Z")) 781 return Parts.first; 782 if (Parts.second.empty()) 783 return Name; 784 } 785 } 786 787 /// Given a mangled name extracted from a PGO function name, and a new 788 /// form for that mangled name, reconstitute the name. 789 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 790 StringRef Replacement, 791 SmallVectorImpl<char> &Out) { 792 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 793 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 794 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 795 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 796 } 797 798 Error populateRemappings() override { 799 if (Error E = Remappings.read(*RemapBuffer)) 800 return E; 801 for (StringRef Name : Underlying.HashTable->keys()) { 802 StringRef RealName = extractName(Name); 803 if (auto Key = Remappings.insert(RealName)) { 804 // FIXME: We could theoretically map the same equivalence class to 805 // multiple names in the profile data. If that happens, we should 806 // return NamedInstrProfRecords from all of them. 807 MappedNames.insert({Key, RealName}); 808 } 809 } 810 return Error::success(); 811 } 812 813 Error getRecords(StringRef FuncName, 814 ArrayRef<NamedInstrProfRecord> &Data) override { 815 StringRef RealName = extractName(FuncName); 816 if (auto Key = Remappings.lookup(RealName)) { 817 StringRef Remapped = MappedNames.lookup(Key); 818 if (!Remapped.empty()) { 819 if (RealName.begin() == FuncName.begin() && 820 RealName.end() == FuncName.end()) 821 FuncName = Remapped; 822 else { 823 // Try rebuilding the name from the given remapping. 824 SmallString<256> Reconstituted; 825 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 826 Error E = Underlying.getRecords(Reconstituted, Data); 827 if (!E) 828 return E; 829 830 // If we failed because the name doesn't exist, fall back to asking 831 // about the original name. 832 if (Error Unhandled = handleErrors( 833 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 834 return Err->get() == instrprof_error::unknown_function 835 ? Error::success() 836 : Error(std::move(Err)); 837 })) 838 return Unhandled; 839 } 840 } 841 } 842 return Underlying.getRecords(FuncName, Data); 843 } 844 845 private: 846 /// The memory buffer containing the remapping configuration. Remappings 847 /// holds pointers into this buffer. 848 std::unique_ptr<MemoryBuffer> RemapBuffer; 849 850 /// The mangling remapper. 851 SymbolRemappingReader Remappings; 852 853 /// Mapping from mangled name keys to the name used for the key in the 854 /// profile data. 855 /// FIXME: Can we store a location within the on-disk hash table instead of 856 /// redoing lookup? 857 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 858 859 /// The real profile data reader. 860 InstrProfReaderIndex<HashTableImpl> &Underlying; 861 }; 862 863 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 864 using namespace support; 865 866 if (DataBuffer.getBufferSize() < 8) 867 return false; 868 uint64_t Magic = 869 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 870 // Verify that it's magical. 871 return Magic == IndexedInstrProf::Magic; 872 } 873 874 const unsigned char * 875 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 876 const unsigned char *Cur, bool UseCS) { 877 using namespace IndexedInstrProf; 878 using namespace support; 879 880 if (Version >= IndexedInstrProf::Version4) { 881 const IndexedInstrProf::Summary *SummaryInLE = 882 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 883 uint64_t NFields = 884 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 885 uint64_t NEntries = 886 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 887 uint32_t SummarySize = 888 IndexedInstrProf::Summary::getSize(NFields, NEntries); 889 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 890 IndexedInstrProf::allocSummary(SummarySize); 891 892 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 893 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 894 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 895 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 896 897 SummaryEntryVector DetailedSummary; 898 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 899 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 900 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 901 Ent.NumBlocks); 902 } 903 std::unique_ptr<llvm::ProfileSummary> &Summary = 904 UseCS ? this->CS_Summary : this->Summary; 905 906 // initialize InstrProfSummary using the SummaryData from disk. 907 Summary = std::make_unique<ProfileSummary>( 908 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 909 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 910 SummaryData->get(Summary::MaxBlockCount), 911 SummaryData->get(Summary::MaxInternalBlockCount), 912 SummaryData->get(Summary::MaxFunctionCount), 913 SummaryData->get(Summary::TotalNumBlocks), 914 SummaryData->get(Summary::TotalNumFunctions)); 915 return Cur + SummarySize; 916 } else { 917 // The older versions do not support a profile summary. This just computes 918 // an empty summary, which will not result in accurate hot/cold detection. 919 // We would need to call addRecord for all NamedInstrProfRecords to get the 920 // correct summary. However, this version is old (prior to early 2016) and 921 // has not been supporting an accurate summary for several years. 922 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 923 Summary = Builder.getSummary(); 924 return Cur; 925 } 926 } 927 928 Error IndexedInstrProfReader::readHeader() { 929 using namespace support; 930 931 const unsigned char *Start = 932 (const unsigned char *)DataBuffer->getBufferStart(); 933 const unsigned char *Cur = Start; 934 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 935 return error(instrprof_error::truncated); 936 937 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 938 Cur += sizeof(IndexedInstrProf::Header); 939 940 // Check the magic number. 941 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 942 if (Magic != IndexedInstrProf::Magic) 943 return error(instrprof_error::bad_magic); 944 945 // Read the version. 946 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 947 if (GET_VERSION(FormatVersion) > 948 IndexedInstrProf::ProfVersion::CurrentVersion) 949 return error(instrprof_error::unsupported_version); 950 951 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 952 /* UseCS */ false); 953 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 954 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 955 /* UseCS */ true); 956 957 // Read the hash type and start offset. 958 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 959 endian::byte_swap<uint64_t, little>(Header->HashType)); 960 if (HashType > IndexedInstrProf::HashT::Last) 961 return error(instrprof_error::unsupported_hash_type); 962 963 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 964 965 // The rest of the file is an on disk hash table. 966 auto IndexPtr = 967 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 968 Start + HashOffset, Cur, Start, HashType, FormatVersion); 969 970 // Load the remapping table now if requested. 971 if (RemappingBuffer) { 972 Remapper = std::make_unique< 973 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 974 std::move(RemappingBuffer), *IndexPtr); 975 if (Error E = Remapper->populateRemappings()) 976 return E; 977 } else { 978 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 979 } 980 Index = std::move(IndexPtr); 981 982 return success(); 983 } 984 985 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 986 if (Symtab.get()) 987 return *Symtab.get(); 988 989 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 990 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 991 consumeError(error(InstrProfError::take(std::move(E)))); 992 } 993 994 Symtab = std::move(NewSymtab); 995 return *Symtab.get(); 996 } 997 998 Expected<InstrProfRecord> 999 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 1000 uint64_t FuncHash) { 1001 ArrayRef<NamedInstrProfRecord> Data; 1002 Error Err = Remapper->getRecords(FuncName, Data); 1003 if (Err) 1004 return std::move(Err); 1005 // Found it. Look for counters with the right hash. 1006 for (const NamedInstrProfRecord &I : Data) { 1007 // Check for a match and fill the vector if there is one. 1008 if (I.Hash == FuncHash) 1009 return std::move(I); 1010 } 1011 return error(instrprof_error::hash_mismatch); 1012 } 1013 1014 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1015 uint64_t FuncHash, 1016 std::vector<uint64_t> &Counts) { 1017 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1018 if (Error E = Record.takeError()) 1019 return error(std::move(E)); 1020 1021 Counts = Record.get().Counts; 1022 return success(); 1023 } 1024 1025 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1026 ArrayRef<NamedInstrProfRecord> Data; 1027 1028 Error E = Index->getRecords(Data); 1029 if (E) 1030 return error(std::move(E)); 1031 1032 Record = Data[RecordIndex++]; 1033 if (RecordIndex >= Data.size()) { 1034 Index->advanceToNextKey(); 1035 RecordIndex = 0; 1036 } 1037 return success(); 1038 } 1039 1040 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1041 uint64_t NumFuncs = 0; 1042 for (const auto &Func : *this) { 1043 if (isIRLevelProfile()) { 1044 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1045 if (FuncIsCS != IsCS) 1046 continue; 1047 } 1048 Func.accumulateCounts(Sum); 1049 ++NumFuncs; 1050 } 1051 Sum.NumEntries = NumFuncs; 1052 } 1053