1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/MemProf.h" 23 #include "llvm/ProfileData/ProfileCommon.h" 24 #include "llvm/ProfileData/RawMemProfReader.h" 25 #include "llvm/Support/Endian.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/ErrorOr.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 #include "llvm/Support/SwapByteOrder.h" 30 #include "llvm/Support/SymbolRemappingReader.h" 31 #include <algorithm> 32 #include <cctype> 33 #include <cstddef> 34 #include <cstdint> 35 #include <limits> 36 #include <memory> 37 #include <system_error> 38 #include <utility> 39 #include <vector> 40 41 using namespace llvm; 42 43 // Extracts the variant information from the top 8 bits in the version and 44 // returns an enum specifying the variants present. 45 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 46 InstrProfKind ProfileKind = InstrProfKind::Unknown; 47 if (Version & VARIANT_MASK_IR_PROF) { 48 ProfileKind |= InstrProfKind::IR; 49 } 50 if (Version & VARIANT_MASK_CSIR_PROF) { 51 ProfileKind |= InstrProfKind::CS; 52 } 53 if (Version & VARIANT_MASK_INSTR_ENTRY) { 54 ProfileKind |= InstrProfKind::BB; 55 } 56 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 57 ProfileKind |= InstrProfKind::SingleByteCoverage; 58 } 59 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 60 ProfileKind |= InstrProfKind::FunctionEntryOnly; 61 } 62 if (Version & VARIANT_MASK_MEMPROF) { 63 ProfileKind |= InstrProfKind::MemProf; 64 } 65 return ProfileKind; 66 } 67 68 static Expected<std::unique_ptr<MemoryBuffer>> 69 setupMemoryBuffer(const Twine &Path) { 70 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 71 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 72 if (std::error_code EC = BufferOrErr.getError()) 73 return errorCodeToError(EC); 74 return std::move(BufferOrErr.get()); 75 } 76 77 static Error initializeReader(InstrProfReader &Reader) { 78 return Reader.readHeader(); 79 } 80 81 Expected<std::unique_ptr<InstrProfReader>> 82 InstrProfReader::create(const Twine &Path, 83 const InstrProfCorrelator *Correlator) { 84 // Set up the buffer to read. 85 auto BufferOrError = setupMemoryBuffer(Path); 86 if (Error E = BufferOrError.takeError()) 87 return std::move(E); 88 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 89 } 90 91 Expected<std::unique_ptr<InstrProfReader>> 92 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 93 const InstrProfCorrelator *Correlator) { 94 // Sanity check the buffer. 95 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 96 return make_error<InstrProfError>(instrprof_error::too_large); 97 98 if (Buffer->getBufferSize() == 0) 99 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 100 101 std::unique_ptr<InstrProfReader> Result; 102 // Create the reader. 103 if (IndexedInstrProfReader::hasFormat(*Buffer)) 104 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 105 else if (RawInstrProfReader64::hasFormat(*Buffer)) 106 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 107 else if (RawInstrProfReader32::hasFormat(*Buffer)) 108 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 109 else if (TextInstrProfReader::hasFormat(*Buffer)) 110 Result.reset(new TextInstrProfReader(std::move(Buffer))); 111 else 112 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 113 114 // Initialize the reader and return the result. 115 if (Error E = initializeReader(*Result)) 116 return std::move(E); 117 118 return std::move(Result); 119 } 120 121 Expected<std::unique_ptr<IndexedInstrProfReader>> 122 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 123 // Set up the buffer to read. 124 auto BufferOrError = setupMemoryBuffer(Path); 125 if (Error E = BufferOrError.takeError()) 126 return std::move(E); 127 128 // Set up the remapping buffer if requested. 129 std::unique_ptr<MemoryBuffer> RemappingBuffer; 130 std::string RemappingPathStr = RemappingPath.str(); 131 if (!RemappingPathStr.empty()) { 132 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 133 if (Error E = RemappingBufferOrError.takeError()) 134 return std::move(E); 135 RemappingBuffer = std::move(RemappingBufferOrError.get()); 136 } 137 138 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 139 std::move(RemappingBuffer)); 140 } 141 142 Expected<std::unique_ptr<IndexedInstrProfReader>> 143 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 144 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 145 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 146 return make_error<InstrProfError>(instrprof_error::too_large); 147 148 // Create the reader. 149 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 150 return make_error<InstrProfError>(instrprof_error::bad_magic); 151 auto Result = std::make_unique<IndexedInstrProfReader>( 152 std::move(Buffer), std::move(RemappingBuffer)); 153 154 // Initialize the reader and return the result. 155 if (Error E = initializeReader(*Result)) 156 return std::move(E); 157 158 return std::move(Result); 159 } 160 161 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 162 // Verify that this really looks like plain ASCII text by checking a 163 // 'reasonable' number of characters (up to profile magic size). 164 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 165 StringRef buffer = Buffer.getBufferStart(); 166 return count == 0 || 167 std::all_of(buffer.begin(), buffer.begin() + count, 168 [](char c) { return isPrint(c) || isSpace(c); }); 169 } 170 171 // Read the profile variant flag from the header: ":FE" means this is a FE 172 // generated profile. ":IR" means this is an IR level profile. Other strings 173 // with a leading ':' will be reported an error format. 174 Error TextInstrProfReader::readHeader() { 175 Symtab.reset(new InstrProfSymtab()); 176 177 while (Line->startswith(":")) { 178 StringRef Str = Line->substr(1); 179 if (Str.equals_insensitive("ir")) 180 ProfileKind |= InstrProfKind::IR; 181 else if (Str.equals_insensitive("fe")) 182 ProfileKind |= InstrProfKind::FE; 183 else if (Str.equals_insensitive("csir")) { 184 ProfileKind |= InstrProfKind::IR; 185 ProfileKind |= InstrProfKind::CS; 186 } else if (Str.equals_insensitive("entry_first")) 187 ProfileKind |= InstrProfKind::BB; 188 else if (Str.equals_insensitive("not_entry_first")) 189 ProfileKind &= ~InstrProfKind::BB; 190 else 191 return error(instrprof_error::bad_header); 192 ++Line; 193 } 194 return success(); 195 } 196 197 Error 198 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 199 200 #define CHECK_LINE_END(Line) \ 201 if (Line.is_at_end()) \ 202 return error(instrprof_error::truncated); 203 #define READ_NUM(Str, Dst) \ 204 if ((Str).getAsInteger(10, (Dst))) \ 205 return error(instrprof_error::malformed); 206 #define VP_READ_ADVANCE(Val) \ 207 CHECK_LINE_END(Line); \ 208 uint32_t Val; \ 209 READ_NUM((*Line), (Val)); \ 210 Line++; 211 212 if (Line.is_at_end()) 213 return success(); 214 215 uint32_t NumValueKinds; 216 if (Line->getAsInteger(10, NumValueKinds)) { 217 // No value profile data 218 return success(); 219 } 220 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 221 return error(instrprof_error::malformed, 222 "number of value kinds is invalid"); 223 Line++; 224 225 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 226 VP_READ_ADVANCE(ValueKind); 227 if (ValueKind > IPVK_Last) 228 return error(instrprof_error::malformed, "value kind is invalid"); 229 ; 230 VP_READ_ADVANCE(NumValueSites); 231 if (!NumValueSites) 232 continue; 233 234 Record.reserveSites(VK, NumValueSites); 235 for (uint32_t S = 0; S < NumValueSites; S++) { 236 VP_READ_ADVANCE(NumValueData); 237 238 std::vector<InstrProfValueData> CurrentValues; 239 for (uint32_t V = 0; V < NumValueData; V++) { 240 CHECK_LINE_END(Line); 241 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 242 uint64_t TakenCount, Value; 243 if (ValueKind == IPVK_IndirectCallTarget) { 244 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 245 Value = 0; 246 } else { 247 if (Error E = Symtab->addFuncName(VD.first)) 248 return E; 249 Value = IndexedInstrProf::ComputeHash(VD.first); 250 } 251 } else { 252 READ_NUM(VD.first, Value); 253 } 254 READ_NUM(VD.second, TakenCount); 255 CurrentValues.push_back({Value, TakenCount}); 256 Line++; 257 } 258 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 259 nullptr); 260 } 261 } 262 return success(); 263 264 #undef CHECK_LINE_END 265 #undef READ_NUM 266 #undef VP_READ_ADVANCE 267 } 268 269 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 270 // Skip empty lines and comments. 271 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 272 ++Line; 273 // If we hit EOF while looking for a name, we're done. 274 if (Line.is_at_end()) { 275 return error(instrprof_error::eof); 276 } 277 278 // Read the function name. 279 Record.Name = *Line++; 280 if (Error E = Symtab->addFuncName(Record.Name)) 281 return error(std::move(E)); 282 283 // Read the function hash. 284 if (Line.is_at_end()) 285 return error(instrprof_error::truncated); 286 if ((Line++)->getAsInteger(0, Record.Hash)) 287 return error(instrprof_error::malformed, 288 "function hash is not a valid integer"); 289 290 // Read the number of counters. 291 uint64_t NumCounters; 292 if (Line.is_at_end()) 293 return error(instrprof_error::truncated); 294 if ((Line++)->getAsInteger(10, NumCounters)) 295 return error(instrprof_error::malformed, 296 "number of counters is not a valid integer"); 297 if (NumCounters == 0) 298 return error(instrprof_error::malformed, "number of counters is zero"); 299 300 // Read each counter and fill our internal storage with the values. 301 Record.Clear(); 302 Record.Counts.reserve(NumCounters); 303 for (uint64_t I = 0; I < NumCounters; ++I) { 304 if (Line.is_at_end()) 305 return error(instrprof_error::truncated); 306 uint64_t Count; 307 if ((Line++)->getAsInteger(10, Count)) 308 return error(instrprof_error::malformed, "count is invalid"); 309 Record.Counts.push_back(Count); 310 } 311 312 // Check if value profile data exists and read it if so. 313 if (Error E = readValueProfileData(Record)) 314 return error(std::move(E)); 315 316 return success(); 317 } 318 319 template <class IntPtrT> 320 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 321 return getProfileKindFromVersion(Version); 322 } 323 324 template <class IntPtrT> 325 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 326 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 327 return false; 328 uint64_t Magic = 329 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 330 return RawInstrProf::getMagic<IntPtrT>() == Magic || 331 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 332 } 333 334 template <class IntPtrT> 335 Error RawInstrProfReader<IntPtrT>::readHeader() { 336 if (!hasFormat(*DataBuffer)) 337 return error(instrprof_error::bad_magic); 338 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 339 return error(instrprof_error::bad_header); 340 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 341 DataBuffer->getBufferStart()); 342 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 343 return readHeader(*Header); 344 } 345 346 template <class IntPtrT> 347 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 348 const char *End = DataBuffer->getBufferEnd(); 349 // Skip zero padding between profiles. 350 while (CurrentPos != End && *CurrentPos == 0) 351 ++CurrentPos; 352 // If there's nothing left, we're done. 353 if (CurrentPos == End) 354 return make_error<InstrProfError>(instrprof_error::eof); 355 // If there isn't enough space for another header, this is probably just 356 // garbage at the end of the file. 357 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 358 return make_error<InstrProfError>(instrprof_error::malformed, 359 "not enough space for another header"); 360 // The writer ensures each profile is padded to start at an aligned address. 361 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 362 return make_error<InstrProfError>(instrprof_error::malformed, 363 "insufficient padding"); 364 // The magic should have the same byte order as in the previous header. 365 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 366 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 367 return make_error<InstrProfError>(instrprof_error::bad_magic); 368 369 // There's another profile to read, so we need to process the header. 370 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 371 return readHeader(*Header); 372 } 373 374 template <class IntPtrT> 375 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 376 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 377 return error(std::move(E)); 378 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 379 const IntPtrT FPtr = swap(I->FunctionPointer); 380 if (!FPtr) 381 continue; 382 Symtab.mapAddress(FPtr, I->NameRef); 383 } 384 return success(); 385 } 386 387 template <class IntPtrT> 388 Error RawInstrProfReader<IntPtrT>::readHeader( 389 const RawInstrProf::Header &Header) { 390 Version = swap(Header.Version); 391 if (GET_VERSION(Version) != RawInstrProf::Version) 392 return error(instrprof_error::unsupported_version); 393 if (useDebugInfoCorrelate() && !Correlator) 394 return error(instrprof_error::missing_debug_info_for_correlation); 395 if (!useDebugInfoCorrelate() && Correlator) 396 return error(instrprof_error::unexpected_debug_info_for_correlation); 397 398 BinaryIdsSize = swap(Header.BinaryIdsSize); 399 if (BinaryIdsSize % sizeof(uint64_t)) 400 return error(instrprof_error::bad_header); 401 402 CountersDelta = swap(Header.CountersDelta); 403 NamesDelta = swap(Header.NamesDelta); 404 auto NumData = swap(Header.DataSize); 405 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 406 auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); 407 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 408 auto NamesSize = swap(Header.NamesSize); 409 ValueKindLast = swap(Header.ValueKindLast); 410 411 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 412 auto PaddingSize = getNumPaddingBytes(NamesSize); 413 414 // Profile data starts after profile header and binary ids if exist. 415 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 416 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 417 ptrdiff_t NamesOffset = 418 CountersOffset + CountersSize + PaddingBytesAfterCounters; 419 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 420 421 auto *Start = reinterpret_cast<const char *>(&Header); 422 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 423 return error(instrprof_error::bad_header); 424 425 if (Correlator) { 426 // These sizes in the raw file are zero because we constructed them in the 427 // Correlator. 428 assert(DataSize == 0 && NamesSize == 0); 429 assert(CountersDelta == 0 && NamesDelta == 0); 430 Data = Correlator->getDataPointer(); 431 DataEnd = Data + Correlator->getDataSize(); 432 NamesStart = Correlator->getNamesPointer(); 433 NamesEnd = NamesStart + Correlator->getNamesSize(); 434 } else { 435 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 436 Start + DataOffset); 437 DataEnd = Data + NumData; 438 NamesStart = Start + NamesOffset; 439 NamesEnd = NamesStart + NamesSize; 440 } 441 442 // Binary ids start just after the header. 443 BinaryIdsStart = 444 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 445 CountersStart = Start + CountersOffset; 446 CountersEnd = CountersStart + CountersSize; 447 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 448 449 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 450 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 451 return error(instrprof_error::bad_header); 452 453 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 454 if (Error E = createSymtab(*NewSymtab.get())) 455 return E; 456 457 Symtab = std::move(NewSymtab); 458 return success(); 459 } 460 461 template <class IntPtrT> 462 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 463 Record.Name = getName(Data->NameRef); 464 return success(); 465 } 466 467 template <class IntPtrT> 468 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 469 Record.Hash = swap(Data->FuncHash); 470 return success(); 471 } 472 473 template <class IntPtrT> 474 Error RawInstrProfReader<IntPtrT>::readRawCounts( 475 InstrProfRecord &Record) { 476 uint32_t NumCounters = swap(Data->NumCounters); 477 if (NumCounters == 0) 478 return error(instrprof_error::malformed, "number of counters is zero"); 479 480 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 481 if (CounterBaseOffset < 0) 482 return error( 483 instrprof_error::malformed, 484 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 485 486 if (CounterBaseOffset >= CountersEnd - CountersStart) 487 return error(instrprof_error::malformed, 488 ("counter offset " + Twine(CounterBaseOffset) + 489 " is greater than the maximum counter offset " + 490 Twine(CountersEnd - CountersStart - 1)) 491 .str()); 492 493 uint64_t MaxNumCounters = 494 (CountersEnd - (CountersStart + CounterBaseOffset)) / 495 getCounterTypeSize(); 496 if (NumCounters > MaxNumCounters) 497 return error(instrprof_error::malformed, 498 ("number of counters " + Twine(NumCounters) + 499 " is greater than the maximum number of counters " + 500 Twine(MaxNumCounters)) 501 .str()); 502 503 Record.Counts.clear(); 504 Record.Counts.reserve(NumCounters); 505 for (uint32_t I = 0; I < NumCounters; I++) { 506 const char *Ptr = 507 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 508 if (hasSingleByteCoverage()) { 509 // A value of zero signifies the block is covered. 510 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 511 } else { 512 const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr); 513 Record.Counts.push_back(swap(*CounterValue)); 514 } 515 } 516 517 return success(); 518 } 519 520 template <class IntPtrT> 521 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 522 InstrProfRecord &Record) { 523 Record.clearValueData(); 524 CurValueDataSize = 0; 525 // Need to match the logic in value profile dumper code in compiler-rt: 526 uint32_t NumValueKinds = 0; 527 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 528 NumValueKinds += (Data->NumValueSites[I] != 0); 529 530 if (!NumValueKinds) 531 return success(); 532 533 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 534 ValueProfData::getValueProfData( 535 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 536 getDataEndianness()); 537 538 if (Error E = VDataPtrOrErr.takeError()) 539 return E; 540 541 // Note that besides deserialization, this also performs the conversion for 542 // indirect call targets. The function pointers from the raw profile are 543 // remapped into function name hashes. 544 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 545 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 546 return success(); 547 } 548 549 template <class IntPtrT> 550 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 551 if (atEnd()) 552 // At this point, ValueDataStart field points to the next header. 553 if (Error E = readNextHeader(getNextHeaderPos())) 554 return error(std::move(E)); 555 556 // Read name ad set it in Record. 557 if (Error E = readName(Record)) 558 return error(std::move(E)); 559 560 // Read FuncHash and set it in Record. 561 if (Error E = readFuncHash(Record)) 562 return error(std::move(E)); 563 564 // Read raw counts and set Record. 565 if (Error E = readRawCounts(Record)) 566 return error(std::move(E)); 567 568 // Read value data and set Record. 569 if (Error E = readValueProfilingData(Record)) 570 return error(std::move(E)); 571 572 // Iterate. 573 advanceData(); 574 return success(); 575 } 576 577 static size_t RoundUp(size_t size, size_t align) { 578 return (size + align - 1) & ~(align - 1); 579 } 580 581 template <class IntPtrT> 582 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 583 if (BinaryIdsSize == 0) 584 return success(); 585 586 OS << "Binary IDs: \n"; 587 const uint8_t *BI = BinaryIdsStart; 588 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 589 while (BI < BIEnd) { 590 size_t Remaining = BIEnd - BI; 591 592 // There should be enough left to read the binary ID size field. 593 if (Remaining < sizeof(uint64_t)) 594 return make_error<InstrProfError>( 595 instrprof_error::malformed, 596 "not enough data to read binary id length"); 597 598 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 599 600 // There should be enough left to read the binary ID size field, and the 601 // binary ID. 602 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 603 return make_error<InstrProfError>( 604 instrprof_error::malformed, "not enough data to read binary id data"); 605 606 // Increment by binary id length data type size. 607 BI += sizeof(BinaryIdLen); 608 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 609 return make_error<InstrProfError>( 610 instrprof_error::malformed, 611 "binary id that is read is bigger than buffer size"); 612 613 for (uint64_t I = 0; I < BinaryIdLen; I++) 614 OS << format("%02x", BI[I]); 615 OS << "\n"; 616 617 // Increment by binary id data length, rounded to the next 8 bytes. This 618 // accounts for the zero-padding after each build ID. 619 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 620 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 621 return make_error<InstrProfError>(instrprof_error::malformed); 622 } 623 624 return success(); 625 } 626 627 namespace llvm { 628 629 template class RawInstrProfReader<uint32_t>; 630 template class RawInstrProfReader<uint64_t>; 631 632 } // end namespace llvm 633 634 InstrProfLookupTrait::hash_value_type 635 InstrProfLookupTrait::ComputeHash(StringRef K) { 636 return IndexedInstrProf::ComputeHash(HashType, K); 637 } 638 639 using data_type = InstrProfLookupTrait::data_type; 640 using offset_type = InstrProfLookupTrait::offset_type; 641 642 bool InstrProfLookupTrait::readValueProfilingData( 643 const unsigned char *&D, const unsigned char *const End) { 644 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 645 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 646 647 if (VDataPtrOrErr.takeError()) 648 return false; 649 650 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 651 D += VDataPtrOrErr.get()->TotalSize; 652 653 return true; 654 } 655 656 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 657 offset_type N) { 658 using namespace support; 659 660 // Check if the data is corrupt. If so, don't try to read it. 661 if (N % sizeof(uint64_t)) 662 return data_type(); 663 664 DataBuffer.clear(); 665 std::vector<uint64_t> CounterBuffer; 666 667 const unsigned char *End = D + N; 668 while (D < End) { 669 // Read hash. 670 if (D + sizeof(uint64_t) >= End) 671 return data_type(); 672 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 673 674 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 675 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 676 // If format version is different then read the number of counters. 677 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 678 if (D + sizeof(uint64_t) > End) 679 return data_type(); 680 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 681 } 682 // Read counter values. 683 if (D + CountsSize * sizeof(uint64_t) > End) 684 return data_type(); 685 686 CounterBuffer.clear(); 687 CounterBuffer.reserve(CountsSize); 688 for (uint64_t J = 0; J < CountsSize; ++J) 689 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 690 691 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 692 693 // Read value profiling data. 694 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 695 !readValueProfilingData(D, End)) { 696 DataBuffer.clear(); 697 return data_type(); 698 } 699 } 700 return DataBuffer; 701 } 702 703 template <typename HashTableImpl> 704 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 705 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 706 auto Iter = HashTable->find(FuncName); 707 if (Iter == HashTable->end()) 708 return make_error<InstrProfError>(instrprof_error::unknown_function); 709 710 Data = (*Iter); 711 if (Data.empty()) 712 return make_error<InstrProfError>(instrprof_error::malformed, 713 "profile data is empty"); 714 715 return Error::success(); 716 } 717 718 template <typename HashTableImpl> 719 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 720 ArrayRef<NamedInstrProfRecord> &Data) { 721 if (atEnd()) 722 return make_error<InstrProfError>(instrprof_error::eof); 723 724 Data = *RecordIterator; 725 726 if (Data.empty()) 727 return make_error<InstrProfError>(instrprof_error::malformed, 728 "profile data is empty"); 729 730 return Error::success(); 731 } 732 733 template <typename HashTableImpl> 734 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 735 const unsigned char *Buckets, const unsigned char *const Payload, 736 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 737 uint64_t Version) { 738 FormatVersion = Version; 739 HashTable.reset(HashTableImpl::Create( 740 Buckets, Payload, Base, 741 typename HashTableImpl::InfoType(HashType, Version))); 742 RecordIterator = HashTable->data_begin(); 743 } 744 745 template <typename HashTableImpl> 746 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 747 return getProfileKindFromVersion(FormatVersion); 748 } 749 750 namespace { 751 /// A remapper that does not apply any remappings. 752 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 753 InstrProfReaderIndexBase &Underlying; 754 755 public: 756 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 757 : Underlying(Underlying) {} 758 759 Error getRecords(StringRef FuncName, 760 ArrayRef<NamedInstrProfRecord> &Data) override { 761 return Underlying.getRecords(FuncName, Data); 762 } 763 }; 764 } // namespace 765 766 /// A remapper that applies remappings based on a symbol remapping file. 767 template <typename HashTableImpl> 768 class llvm::InstrProfReaderItaniumRemapper 769 : public InstrProfReaderRemapper { 770 public: 771 InstrProfReaderItaniumRemapper( 772 std::unique_ptr<MemoryBuffer> RemapBuffer, 773 InstrProfReaderIndex<HashTableImpl> &Underlying) 774 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 775 } 776 777 /// Extract the original function name from a PGO function name. 778 static StringRef extractName(StringRef Name) { 779 // We can have multiple :-separated pieces; there can be pieces both 780 // before and after the mangled name. Find the first part that starts 781 // with '_Z'; we'll assume that's the mangled name we want. 782 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 783 while (true) { 784 Parts = Parts.second.split(':'); 785 if (Parts.first.startswith("_Z")) 786 return Parts.first; 787 if (Parts.second.empty()) 788 return Name; 789 } 790 } 791 792 /// Given a mangled name extracted from a PGO function name, and a new 793 /// form for that mangled name, reconstitute the name. 794 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 795 StringRef Replacement, 796 SmallVectorImpl<char> &Out) { 797 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 798 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 799 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 800 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 801 } 802 803 Error populateRemappings() override { 804 if (Error E = Remappings.read(*RemapBuffer)) 805 return E; 806 for (StringRef Name : Underlying.HashTable->keys()) { 807 StringRef RealName = extractName(Name); 808 if (auto Key = Remappings.insert(RealName)) { 809 // FIXME: We could theoretically map the same equivalence class to 810 // multiple names in the profile data. If that happens, we should 811 // return NamedInstrProfRecords from all of them. 812 MappedNames.insert({Key, RealName}); 813 } 814 } 815 return Error::success(); 816 } 817 818 Error getRecords(StringRef FuncName, 819 ArrayRef<NamedInstrProfRecord> &Data) override { 820 StringRef RealName = extractName(FuncName); 821 if (auto Key = Remappings.lookup(RealName)) { 822 StringRef Remapped = MappedNames.lookup(Key); 823 if (!Remapped.empty()) { 824 if (RealName.begin() == FuncName.begin() && 825 RealName.end() == FuncName.end()) 826 FuncName = Remapped; 827 else { 828 // Try rebuilding the name from the given remapping. 829 SmallString<256> Reconstituted; 830 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 831 Error E = Underlying.getRecords(Reconstituted, Data); 832 if (!E) 833 return E; 834 835 // If we failed because the name doesn't exist, fall back to asking 836 // about the original name. 837 if (Error Unhandled = handleErrors( 838 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 839 return Err->get() == instrprof_error::unknown_function 840 ? Error::success() 841 : Error(std::move(Err)); 842 })) 843 return Unhandled; 844 } 845 } 846 } 847 return Underlying.getRecords(FuncName, Data); 848 } 849 850 private: 851 /// The memory buffer containing the remapping configuration. Remappings 852 /// holds pointers into this buffer. 853 std::unique_ptr<MemoryBuffer> RemapBuffer; 854 855 /// The mangling remapper. 856 SymbolRemappingReader Remappings; 857 858 /// Mapping from mangled name keys to the name used for the key in the 859 /// profile data. 860 /// FIXME: Can we store a location within the on-disk hash table instead of 861 /// redoing lookup? 862 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 863 864 /// The real profile data reader. 865 InstrProfReaderIndex<HashTableImpl> &Underlying; 866 }; 867 868 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 869 using namespace support; 870 871 if (DataBuffer.getBufferSize() < 8) 872 return false; 873 uint64_t Magic = 874 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 875 // Verify that it's magical. 876 return Magic == IndexedInstrProf::Magic; 877 } 878 879 const unsigned char * 880 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 881 const unsigned char *Cur, bool UseCS) { 882 using namespace IndexedInstrProf; 883 using namespace support; 884 885 if (Version >= IndexedInstrProf::Version4) { 886 const IndexedInstrProf::Summary *SummaryInLE = 887 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 888 uint64_t NFields = 889 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 890 uint64_t NEntries = 891 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 892 uint32_t SummarySize = 893 IndexedInstrProf::Summary::getSize(NFields, NEntries); 894 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 895 IndexedInstrProf::allocSummary(SummarySize); 896 897 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 898 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 899 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 900 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 901 902 SummaryEntryVector DetailedSummary; 903 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 904 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 905 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 906 Ent.NumBlocks); 907 } 908 std::unique_ptr<llvm::ProfileSummary> &Summary = 909 UseCS ? this->CS_Summary : this->Summary; 910 911 // initialize InstrProfSummary using the SummaryData from disk. 912 Summary = std::make_unique<ProfileSummary>( 913 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 914 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 915 SummaryData->get(Summary::MaxBlockCount), 916 SummaryData->get(Summary::MaxInternalBlockCount), 917 SummaryData->get(Summary::MaxFunctionCount), 918 SummaryData->get(Summary::TotalNumBlocks), 919 SummaryData->get(Summary::TotalNumFunctions)); 920 return Cur + SummarySize; 921 } else { 922 // The older versions do not support a profile summary. This just computes 923 // an empty summary, which will not result in accurate hot/cold detection. 924 // We would need to call addRecord for all NamedInstrProfRecords to get the 925 // correct summary. However, this version is old (prior to early 2016) and 926 // has not been supporting an accurate summary for several years. 927 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 928 Summary = Builder.getSummary(); 929 return Cur; 930 } 931 } 932 933 Error IndexedInstrProfReader::readHeader() { 934 using namespace support; 935 936 const unsigned char *Start = 937 (const unsigned char *)DataBuffer->getBufferStart(); 938 const unsigned char *Cur = Start; 939 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 940 return error(instrprof_error::truncated); 941 942 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start); 943 if (!HeaderOr) 944 return HeaderOr.takeError(); 945 946 const IndexedInstrProf::Header *Header = &HeaderOr.get(); 947 Cur += Header->size(); 948 949 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 950 /* UseCS */ false); 951 if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) 952 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 953 /* UseCS */ true); 954 955 // Read the hash type and start offset. 956 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 957 endian::byte_swap<uint64_t, little>(Header->HashType)); 958 if (HashType > IndexedInstrProf::HashT::Last) 959 return error(instrprof_error::unsupported_hash_type); 960 961 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 962 963 // The hash table with profile counts comes next. 964 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 965 Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); 966 967 // The MemProfOffset field in the header is only valid when the format version 968 // is higher than 8 (when it was introduced). 969 if (GET_VERSION(Header->Version) >= 8 && 970 Header->Version & VARIANT_MASK_MEMPROF) { 971 uint64_t MemProfOffset = 972 endian::byte_swap<uint64_t, little>(Header->MemProfOffset); 973 974 const unsigned char *Ptr = Start + MemProfOffset; 975 // The value returned from Generator.Emit. 976 const uint64_t TableOffset = 977 support::endian::readNext<uint64_t, little, unaligned>(Ptr); 978 979 // Read the schema. 980 auto SchemaOr = memprof::readMemProfSchema(Ptr); 981 if (!SchemaOr) 982 return SchemaOr.takeError(); 983 Schema = SchemaOr.get(); 984 985 // Now initialize the table reader with a pointer into data buffer. 986 MemProfTable.reset(MemProfHashTable::Create( 987 /*Buckets=*/Start + TableOffset, 988 /*Payload=*/Ptr, 989 /*Base=*/Start, memprof::MemProfRecordLookupTrait(Schema))); 990 } 991 992 // Load the remapping table now if requested. 993 if (RemappingBuffer) { 994 Remapper = std::make_unique< 995 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 996 std::move(RemappingBuffer), *IndexPtr); 997 if (Error E = Remapper->populateRemappings()) 998 return E; 999 } else { 1000 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 1001 } 1002 Index = std::move(IndexPtr); 1003 1004 return success(); 1005 } 1006 1007 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 1008 if (Symtab.get()) 1009 return *Symtab.get(); 1010 1011 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 1012 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 1013 consumeError(error(InstrProfError::take(std::move(E)))); 1014 } 1015 1016 Symtab = std::move(NewSymtab); 1017 return *Symtab.get(); 1018 } 1019 1020 Expected<InstrProfRecord> 1021 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 1022 uint64_t FuncHash) { 1023 ArrayRef<NamedInstrProfRecord> Data; 1024 Error Err = Remapper->getRecords(FuncName, Data); 1025 if (Err) 1026 return std::move(Err); 1027 // Found it. Look for counters with the right hash. 1028 for (const NamedInstrProfRecord &I : Data) { 1029 // Check for a match and fill the vector if there is one. 1030 if (I.Hash == FuncHash) 1031 return std::move(I); 1032 } 1033 return error(instrprof_error::hash_mismatch); 1034 } 1035 1036 Expected<ArrayRef<memprof::MemProfRecord>> 1037 IndexedInstrProfReader::getMemProfRecord(uint64_t FuncNameHash) { 1038 auto Iter = MemProfTable->find(FuncNameHash); 1039 if (Iter == MemProfTable->end()) 1040 // TODO: Add memprof specific errors. 1041 return make_error<InstrProfError>(instrprof_error::hash_mismatch, 1042 "memprof record not found for hash " + 1043 Twine(FuncNameHash)); 1044 return *Iter; 1045 } 1046 1047 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1048 uint64_t FuncHash, 1049 std::vector<uint64_t> &Counts) { 1050 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1051 if (Error E = Record.takeError()) 1052 return error(std::move(E)); 1053 1054 Counts = Record.get().Counts; 1055 return success(); 1056 } 1057 1058 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1059 ArrayRef<NamedInstrProfRecord> Data; 1060 1061 Error E = Index->getRecords(Data); 1062 if (E) 1063 return error(std::move(E)); 1064 1065 Record = Data[RecordIndex++]; 1066 if (RecordIndex >= Data.size()) { 1067 Index->advanceToNextKey(); 1068 RecordIndex = 0; 1069 } 1070 return success(); 1071 } 1072 1073 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1074 uint64_t NumFuncs = 0; 1075 for (const auto &Func : *this) { 1076 if (isIRLevelProfile()) { 1077 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1078 if (FuncIsCS != IsCS) 1079 continue; 1080 } 1081 Func.accumulateCounts(Sum); 1082 ++NumFuncs; 1083 } 1084 Sum.NumEntries = NumFuncs; 1085 } 1086