1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path) { 56 // Set up the buffer to read. 57 auto BufferOrError = setupMemoryBuffer(Path); 58 if (Error E = BufferOrError.takeError()) 59 return std::move(E); 60 return InstrProfReader::create(std::move(BufferOrError.get())); 61 } 62 63 Expected<std::unique_ptr<InstrProfReader>> 64 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 65 // Sanity check the buffer. 66 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 67 return make_error<InstrProfError>(instrprof_error::too_large); 68 69 if (Buffer->getBufferSize() == 0) 70 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 71 72 std::unique_ptr<InstrProfReader> Result; 73 // Create the reader. 74 if (IndexedInstrProfReader::hasFormat(*Buffer)) 75 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 76 else if (RawInstrProfReader64::hasFormat(*Buffer)) 77 Result.reset(new RawInstrProfReader64(std::move(Buffer))); 78 else if (RawInstrProfReader32::hasFormat(*Buffer)) 79 Result.reset(new RawInstrProfReader32(std::move(Buffer))); 80 else if (TextInstrProfReader::hasFormat(*Buffer)) 81 Result.reset(new TextInstrProfReader(std::move(Buffer))); 82 else 83 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 84 85 // Initialize the reader and return the result. 86 if (Error E = initializeReader(*Result)) 87 return std::move(E); 88 89 return std::move(Result); 90 } 91 92 Expected<std::unique_ptr<IndexedInstrProfReader>> 93 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 94 // Set up the buffer to read. 95 auto BufferOrError = setupMemoryBuffer(Path); 96 if (Error E = BufferOrError.takeError()) 97 return std::move(E); 98 99 // Set up the remapping buffer if requested. 100 std::unique_ptr<MemoryBuffer> RemappingBuffer; 101 std::string RemappingPathStr = RemappingPath.str(); 102 if (!RemappingPathStr.empty()) { 103 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 104 if (Error E = RemappingBufferOrError.takeError()) 105 return std::move(E); 106 RemappingBuffer = std::move(RemappingBufferOrError.get()); 107 } 108 109 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 110 std::move(RemappingBuffer)); 111 } 112 113 Expected<std::unique_ptr<IndexedInstrProfReader>> 114 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 115 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 116 // Sanity check the buffer. 117 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 118 return make_error<InstrProfError>(instrprof_error::too_large); 119 120 // Create the reader. 121 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 122 return make_error<InstrProfError>(instrprof_error::bad_magic); 123 auto Result = std::make_unique<IndexedInstrProfReader>( 124 std::move(Buffer), std::move(RemappingBuffer)); 125 126 // Initialize the reader and return the result. 127 if (Error E = initializeReader(*Result)) 128 return std::move(E); 129 130 return std::move(Result); 131 } 132 133 void InstrProfIterator::Increment() { 134 if (auto E = Reader->readNextRecord(Record)) { 135 // Handle errors in the reader. 136 InstrProfError::take(std::move(E)); 137 *this = InstrProfIterator(); 138 } 139 } 140 141 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 142 // Verify that this really looks like plain ASCII text by checking a 143 // 'reasonable' number of characters (up to profile magic size). 144 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 145 StringRef buffer = Buffer.getBufferStart(); 146 return count == 0 || 147 std::all_of(buffer.begin(), buffer.begin() + count, 148 [](char c) { return isPrint(c) || isSpace(c); }); 149 } 150 151 // Read the profile variant flag from the header: ":FE" means this is a FE 152 // generated profile. ":IR" means this is an IR level profile. Other strings 153 // with a leading ':' will be reported an error format. 154 Error TextInstrProfReader::readHeader() { 155 Symtab.reset(new InstrProfSymtab()); 156 bool IsIRInstr = false; 157 bool IsEntryFirst = false; 158 bool IsCS = false; 159 160 while (Line->startswith(":")) { 161 StringRef Str = Line->substr(1); 162 if (Str.equals_insensitive("ir")) 163 IsIRInstr = true; 164 else if (Str.equals_insensitive("fe")) 165 IsIRInstr = false; 166 else if (Str.equals_insensitive("csir")) { 167 IsIRInstr = true; 168 IsCS = true; 169 } else if (Str.equals_insensitive("entry_first")) 170 IsEntryFirst = true; 171 else if (Str.equals_insensitive("not_entry_first")) 172 IsEntryFirst = false; 173 else 174 return error(instrprof_error::bad_header); 175 ++Line; 176 } 177 IsIRLevelProfile = IsIRInstr; 178 InstrEntryBBEnabled = IsEntryFirst; 179 HasCSIRLevelProfile = IsCS; 180 return success(); 181 } 182 183 Error 184 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 185 186 #define CHECK_LINE_END(Line) \ 187 if (Line.is_at_end()) \ 188 return error(instrprof_error::truncated); 189 #define READ_NUM(Str, Dst) \ 190 if ((Str).getAsInteger(10, (Dst))) \ 191 return error(instrprof_error::malformed); 192 #define VP_READ_ADVANCE(Val) \ 193 CHECK_LINE_END(Line); \ 194 uint32_t Val; \ 195 READ_NUM((*Line), (Val)); \ 196 Line++; 197 198 if (Line.is_at_end()) 199 return success(); 200 201 uint32_t NumValueKinds; 202 if (Line->getAsInteger(10, NumValueKinds)) { 203 // No value profile data 204 return success(); 205 } 206 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 207 return error(instrprof_error::malformed); 208 Line++; 209 210 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 211 VP_READ_ADVANCE(ValueKind); 212 if (ValueKind > IPVK_Last) 213 return error(instrprof_error::malformed); 214 VP_READ_ADVANCE(NumValueSites); 215 if (!NumValueSites) 216 continue; 217 218 Record.reserveSites(VK, NumValueSites); 219 for (uint32_t S = 0; S < NumValueSites; S++) { 220 VP_READ_ADVANCE(NumValueData); 221 222 std::vector<InstrProfValueData> CurrentValues; 223 for (uint32_t V = 0; V < NumValueData; V++) { 224 CHECK_LINE_END(Line); 225 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 226 uint64_t TakenCount, Value; 227 if (ValueKind == IPVK_IndirectCallTarget) { 228 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 229 Value = 0; 230 } else { 231 if (Error E = Symtab->addFuncName(VD.first)) 232 return E; 233 Value = IndexedInstrProf::ComputeHash(VD.first); 234 } 235 } else { 236 READ_NUM(VD.first, Value); 237 } 238 READ_NUM(VD.second, TakenCount); 239 CurrentValues.push_back({Value, TakenCount}); 240 Line++; 241 } 242 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 243 nullptr); 244 } 245 } 246 return success(); 247 248 #undef CHECK_LINE_END 249 #undef READ_NUM 250 #undef VP_READ_ADVANCE 251 } 252 253 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 254 // Skip empty lines and comments. 255 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 256 ++Line; 257 // If we hit EOF while looking for a name, we're done. 258 if (Line.is_at_end()) { 259 return error(instrprof_error::eof); 260 } 261 262 // Read the function name. 263 Record.Name = *Line++; 264 if (Error E = Symtab->addFuncName(Record.Name)) 265 return error(std::move(E)); 266 267 // Read the function hash. 268 if (Line.is_at_end()) 269 return error(instrprof_error::truncated); 270 if ((Line++)->getAsInteger(0, Record.Hash)) 271 return error(instrprof_error::malformed); 272 273 // Read the number of counters. 274 uint64_t NumCounters; 275 if (Line.is_at_end()) 276 return error(instrprof_error::truncated); 277 if ((Line++)->getAsInteger(10, NumCounters)) 278 return error(instrprof_error::malformed); 279 if (NumCounters == 0) 280 return error(instrprof_error::malformed); 281 282 // Read each counter and fill our internal storage with the values. 283 Record.Clear(); 284 Record.Counts.reserve(NumCounters); 285 for (uint64_t I = 0; I < NumCounters; ++I) { 286 if (Line.is_at_end()) 287 return error(instrprof_error::truncated); 288 uint64_t Count; 289 if ((Line++)->getAsInteger(10, Count)) 290 return error(instrprof_error::malformed); 291 Record.Counts.push_back(Count); 292 } 293 294 // Check if value profile data exists and read it if so. 295 if (Error E = readValueProfileData(Record)) 296 return error(std::move(E)); 297 298 return success(); 299 } 300 301 template <class IntPtrT> 302 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 303 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 304 return false; 305 uint64_t Magic = 306 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 307 return RawInstrProf::getMagic<IntPtrT>() == Magic || 308 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 309 } 310 311 template <class IntPtrT> 312 Error RawInstrProfReader<IntPtrT>::readHeader() { 313 if (!hasFormat(*DataBuffer)) 314 return error(instrprof_error::bad_magic); 315 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 316 return error(instrprof_error::bad_header); 317 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 318 DataBuffer->getBufferStart()); 319 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 320 return readHeader(*Header); 321 } 322 323 template <class IntPtrT> 324 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 325 const char *End = DataBuffer->getBufferEnd(); 326 // Skip zero padding between profiles. 327 while (CurrentPos != End && *CurrentPos == 0) 328 ++CurrentPos; 329 // If there's nothing left, we're done. 330 if (CurrentPos == End) 331 return make_error<InstrProfError>(instrprof_error::eof); 332 // If there isn't enough space for another header, this is probably just 333 // garbage at the end of the file. 334 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 335 return make_error<InstrProfError>(instrprof_error::malformed); 336 // The writer ensures each profile is padded to start at an aligned address. 337 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 338 return make_error<InstrProfError>(instrprof_error::malformed); 339 // The magic should have the same byte order as in the previous header. 340 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 341 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 342 return make_error<InstrProfError>(instrprof_error::bad_magic); 343 344 // There's another profile to read, so we need to process the header. 345 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 346 return readHeader(*Header); 347 } 348 349 template <class IntPtrT> 350 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 351 if (Error E = Symtab.create(StringRef(NamesStart, NamesSize))) 352 return error(std::move(E)); 353 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 354 const IntPtrT FPtr = swap(I->FunctionPointer); 355 if (!FPtr) 356 continue; 357 Symtab.mapAddress(FPtr, I->NameRef); 358 } 359 return success(); 360 } 361 362 template <class IntPtrT> 363 Error RawInstrProfReader<IntPtrT>::readHeader( 364 const RawInstrProf::Header &Header) { 365 Version = swap(Header.Version); 366 if (GET_VERSION(Version) != RawInstrProf::Version) 367 return error(instrprof_error::unsupported_version); 368 369 BinaryIdsSize = swap(Header.BinaryIdsSize); 370 CountersDelta = swap(Header.CountersDelta); 371 NamesDelta = swap(Header.NamesDelta); 372 auto DataSize = swap(Header.DataSize); 373 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 374 auto CountersSize = swap(Header.CountersSize); 375 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 376 NamesSize = swap(Header.NamesSize); 377 ValueKindLast = swap(Header.ValueKindLast); 378 379 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); 380 auto PaddingSize = getNumPaddingBytes(NamesSize); 381 382 // Profile data starts after profile header and binary ids if exist. 383 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 384 ptrdiff_t CountersOffset = 385 DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters; 386 ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) + 387 PaddingBytesAfterCounters; 388 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 389 390 auto *Start = reinterpret_cast<const char *>(&Header); 391 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 392 return error(instrprof_error::bad_header); 393 394 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 395 Start + DataOffset); 396 DataEnd = Data + DataSize; 397 398 // Binary ids start just after the header. 399 BinaryIdsStart = 400 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 401 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 402 NamesStart = Start + NamesOffset; 403 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 404 405 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 406 if (Error E = createSymtab(*NewSymtab.get())) 407 return E; 408 409 Symtab = std::move(NewSymtab); 410 return success(); 411 } 412 413 template <class IntPtrT> 414 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 415 Record.Name = getName(Data->NameRef); 416 return success(); 417 } 418 419 template <class IntPtrT> 420 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 421 Record.Hash = swap(Data->FuncHash); 422 return success(); 423 } 424 425 template <class IntPtrT> 426 Error RawInstrProfReader<IntPtrT>::readRawCounts( 427 InstrProfRecord &Record) { 428 uint32_t NumCounters = swap(Data->NumCounters); 429 IntPtrT CounterPtr = Data->CounterPtr; 430 if (NumCounters == 0) 431 return error(instrprof_error::malformed); 432 433 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 434 ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; 435 436 // Check bounds. Note that the counter pointer embedded in the data record 437 // may itself be corrupt. 438 if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters) 439 return error(instrprof_error::malformed); 440 441 // We need to compute the in-buffer counter offset from the in-memory address 442 // distance. The initial CountersDelta is the in-memory address difference 443 // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr - 444 // CountersDelta computes the offset into the in-buffer counter section. 445 // 446 // CountersDelta decreases as we advance to the next data record. 447 ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); 448 CountersDelta -= sizeof(*Data); 449 if (CounterOffset < 0 || CounterOffset > MaxNumCounters || 450 ((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters) 451 return error(instrprof_error::malformed); 452 453 auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); 454 455 if (ShouldSwapBytes) { 456 Record.Counts.clear(); 457 Record.Counts.reserve(RawCounts.size()); 458 for (uint64_t Count : RawCounts) 459 Record.Counts.push_back(swap(Count)); 460 } else 461 Record.Counts = RawCounts; 462 463 return success(); 464 } 465 466 template <class IntPtrT> 467 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 468 InstrProfRecord &Record) { 469 Record.clearValueData(); 470 CurValueDataSize = 0; 471 // Need to match the logic in value profile dumper code in compiler-rt: 472 uint32_t NumValueKinds = 0; 473 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 474 NumValueKinds += (Data->NumValueSites[I] != 0); 475 476 if (!NumValueKinds) 477 return success(); 478 479 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 480 ValueProfData::getValueProfData( 481 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 482 getDataEndianness()); 483 484 if (Error E = VDataPtrOrErr.takeError()) 485 return E; 486 487 // Note that besides deserialization, this also performs the conversion for 488 // indirect call targets. The function pointers from the raw profile are 489 // remapped into function name hashes. 490 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 491 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 492 return success(); 493 } 494 495 template <class IntPtrT> 496 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 497 if (atEnd()) 498 // At this point, ValueDataStart field points to the next header. 499 if (Error E = readNextHeader(getNextHeaderPos())) 500 return error(std::move(E)); 501 502 // Read name ad set it in Record. 503 if (Error E = readName(Record)) 504 return error(std::move(E)); 505 506 // Read FuncHash and set it in Record. 507 if (Error E = readFuncHash(Record)) 508 return error(std::move(E)); 509 510 // Read raw counts and set Record. 511 if (Error E = readRawCounts(Record)) 512 return error(std::move(E)); 513 514 // Read value data and set Record. 515 if (Error E = readValueProfilingData(Record)) 516 return error(std::move(E)); 517 518 // Iterate. 519 advanceData(); 520 return success(); 521 } 522 523 template <class IntPtrT> 524 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 525 if (BinaryIdsSize == 0) 526 return success(); 527 528 OS << "Binary IDs: \n"; 529 const uint8_t *BI = BinaryIdsStart; 530 while (BI < BinaryIdsStart + BinaryIdsSize) { 531 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 532 // Increment by binary id length data type size. 533 BI += sizeof(BinaryIdLen); 534 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 535 return make_error<InstrProfError>(instrprof_error::malformed); 536 537 for (uint64_t I = 0; I < BinaryIdLen; I++) 538 OS << format("%02x", BI[I]); 539 OS << "\n"; 540 541 // Increment by binary id data length. 542 BI += BinaryIdLen; 543 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 544 return make_error<InstrProfError>(instrprof_error::malformed); 545 } 546 547 return success(); 548 } 549 550 namespace llvm { 551 552 template class RawInstrProfReader<uint32_t>; 553 template class RawInstrProfReader<uint64_t>; 554 555 } // end namespace llvm 556 557 InstrProfLookupTrait::hash_value_type 558 InstrProfLookupTrait::ComputeHash(StringRef K) { 559 return IndexedInstrProf::ComputeHash(HashType, K); 560 } 561 562 using data_type = InstrProfLookupTrait::data_type; 563 using offset_type = InstrProfLookupTrait::offset_type; 564 565 bool InstrProfLookupTrait::readValueProfilingData( 566 const unsigned char *&D, const unsigned char *const End) { 567 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 568 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 569 570 if (VDataPtrOrErr.takeError()) 571 return false; 572 573 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 574 D += VDataPtrOrErr.get()->TotalSize; 575 576 return true; 577 } 578 579 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 580 offset_type N) { 581 using namespace support; 582 583 // Check if the data is corrupt. If so, don't try to read it. 584 if (N % sizeof(uint64_t)) 585 return data_type(); 586 587 DataBuffer.clear(); 588 std::vector<uint64_t> CounterBuffer; 589 590 const unsigned char *End = D + N; 591 while (D < End) { 592 // Read hash. 593 if (D + sizeof(uint64_t) >= End) 594 return data_type(); 595 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 596 597 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 598 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 599 // If format version is different then read the number of counters. 600 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 601 if (D + sizeof(uint64_t) > End) 602 return data_type(); 603 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 604 } 605 // Read counter values. 606 if (D + CountsSize * sizeof(uint64_t) > End) 607 return data_type(); 608 609 CounterBuffer.clear(); 610 CounterBuffer.reserve(CountsSize); 611 for (uint64_t J = 0; J < CountsSize; ++J) 612 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 613 614 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 615 616 // Read value profiling data. 617 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 618 !readValueProfilingData(D, End)) { 619 DataBuffer.clear(); 620 return data_type(); 621 } 622 } 623 return DataBuffer; 624 } 625 626 template <typename HashTableImpl> 627 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 628 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 629 auto Iter = HashTable->find(FuncName); 630 if (Iter == HashTable->end()) 631 return make_error<InstrProfError>(instrprof_error::unknown_function); 632 633 Data = (*Iter); 634 if (Data.empty()) 635 return make_error<InstrProfError>(instrprof_error::malformed); 636 637 return Error::success(); 638 } 639 640 template <typename HashTableImpl> 641 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 642 ArrayRef<NamedInstrProfRecord> &Data) { 643 if (atEnd()) 644 return make_error<InstrProfError>(instrprof_error::eof); 645 646 Data = *RecordIterator; 647 648 if (Data.empty()) 649 return make_error<InstrProfError>(instrprof_error::malformed); 650 651 return Error::success(); 652 } 653 654 template <typename HashTableImpl> 655 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 656 const unsigned char *Buckets, const unsigned char *const Payload, 657 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 658 uint64_t Version) { 659 FormatVersion = Version; 660 HashTable.reset(HashTableImpl::Create( 661 Buckets, Payload, Base, 662 typename HashTableImpl::InfoType(HashType, Version))); 663 RecordIterator = HashTable->data_begin(); 664 } 665 666 namespace { 667 /// A remapper that does not apply any remappings. 668 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 669 InstrProfReaderIndexBase &Underlying; 670 671 public: 672 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 673 : Underlying(Underlying) {} 674 675 Error getRecords(StringRef FuncName, 676 ArrayRef<NamedInstrProfRecord> &Data) override { 677 return Underlying.getRecords(FuncName, Data); 678 } 679 }; 680 } 681 682 /// A remapper that applies remappings based on a symbol remapping file. 683 template <typename HashTableImpl> 684 class llvm::InstrProfReaderItaniumRemapper 685 : public InstrProfReaderRemapper { 686 public: 687 InstrProfReaderItaniumRemapper( 688 std::unique_ptr<MemoryBuffer> RemapBuffer, 689 InstrProfReaderIndex<HashTableImpl> &Underlying) 690 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 691 } 692 693 /// Extract the original function name from a PGO function name. 694 static StringRef extractName(StringRef Name) { 695 // We can have multiple :-separated pieces; there can be pieces both 696 // before and after the mangled name. Find the first part that starts 697 // with '_Z'; we'll assume that's the mangled name we want. 698 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 699 while (true) { 700 Parts = Parts.second.split(':'); 701 if (Parts.first.startswith("_Z")) 702 return Parts.first; 703 if (Parts.second.empty()) 704 return Name; 705 } 706 } 707 708 /// Given a mangled name extracted from a PGO function name, and a new 709 /// form for that mangled name, reconstitute the name. 710 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 711 StringRef Replacement, 712 SmallVectorImpl<char> &Out) { 713 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 714 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 715 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 716 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 717 } 718 719 Error populateRemappings() override { 720 if (Error E = Remappings.read(*RemapBuffer)) 721 return E; 722 for (StringRef Name : Underlying.HashTable->keys()) { 723 StringRef RealName = extractName(Name); 724 if (auto Key = Remappings.insert(RealName)) { 725 // FIXME: We could theoretically map the same equivalence class to 726 // multiple names in the profile data. If that happens, we should 727 // return NamedInstrProfRecords from all of them. 728 MappedNames.insert({Key, RealName}); 729 } 730 } 731 return Error::success(); 732 } 733 734 Error getRecords(StringRef FuncName, 735 ArrayRef<NamedInstrProfRecord> &Data) override { 736 StringRef RealName = extractName(FuncName); 737 if (auto Key = Remappings.lookup(RealName)) { 738 StringRef Remapped = MappedNames.lookup(Key); 739 if (!Remapped.empty()) { 740 if (RealName.begin() == FuncName.begin() && 741 RealName.end() == FuncName.end()) 742 FuncName = Remapped; 743 else { 744 // Try rebuilding the name from the given remapping. 745 SmallString<256> Reconstituted; 746 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 747 Error E = Underlying.getRecords(Reconstituted, Data); 748 if (!E) 749 return E; 750 751 // If we failed because the name doesn't exist, fall back to asking 752 // about the original name. 753 if (Error Unhandled = handleErrors( 754 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 755 return Err->get() == instrprof_error::unknown_function 756 ? Error::success() 757 : Error(std::move(Err)); 758 })) 759 return Unhandled; 760 } 761 } 762 } 763 return Underlying.getRecords(FuncName, Data); 764 } 765 766 private: 767 /// The memory buffer containing the remapping configuration. Remappings 768 /// holds pointers into this buffer. 769 std::unique_ptr<MemoryBuffer> RemapBuffer; 770 771 /// The mangling remapper. 772 SymbolRemappingReader Remappings; 773 774 /// Mapping from mangled name keys to the name used for the key in the 775 /// profile data. 776 /// FIXME: Can we store a location within the on-disk hash table instead of 777 /// redoing lookup? 778 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 779 780 /// The real profile data reader. 781 InstrProfReaderIndex<HashTableImpl> &Underlying; 782 }; 783 784 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 785 using namespace support; 786 787 if (DataBuffer.getBufferSize() < 8) 788 return false; 789 uint64_t Magic = 790 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 791 // Verify that it's magical. 792 return Magic == IndexedInstrProf::Magic; 793 } 794 795 const unsigned char * 796 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 797 const unsigned char *Cur, bool UseCS) { 798 using namespace IndexedInstrProf; 799 using namespace support; 800 801 if (Version >= IndexedInstrProf::Version4) { 802 const IndexedInstrProf::Summary *SummaryInLE = 803 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 804 uint64_t NFields = 805 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 806 uint64_t NEntries = 807 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 808 uint32_t SummarySize = 809 IndexedInstrProf::Summary::getSize(NFields, NEntries); 810 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 811 IndexedInstrProf::allocSummary(SummarySize); 812 813 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 814 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 815 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 816 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 817 818 SummaryEntryVector DetailedSummary; 819 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 820 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 821 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 822 Ent.NumBlocks); 823 } 824 std::unique_ptr<llvm::ProfileSummary> &Summary = 825 UseCS ? this->CS_Summary : this->Summary; 826 827 // initialize InstrProfSummary using the SummaryData from disk. 828 Summary = std::make_unique<ProfileSummary>( 829 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 830 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 831 SummaryData->get(Summary::MaxBlockCount), 832 SummaryData->get(Summary::MaxInternalBlockCount), 833 SummaryData->get(Summary::MaxFunctionCount), 834 SummaryData->get(Summary::TotalNumBlocks), 835 SummaryData->get(Summary::TotalNumFunctions)); 836 return Cur + SummarySize; 837 } else { 838 // The older versions do not support a profile summary. This just computes 839 // an empty summary, which will not result in accurate hot/cold detection. 840 // We would need to call addRecord for all NamedInstrProfRecords to get the 841 // correct summary. However, this version is old (prior to early 2016) and 842 // has not been supporting an accurate summary for several years. 843 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 844 Summary = Builder.getSummary(); 845 return Cur; 846 } 847 } 848 849 Error IndexedInstrProfReader::readHeader() { 850 using namespace support; 851 852 const unsigned char *Start = 853 (const unsigned char *)DataBuffer->getBufferStart(); 854 const unsigned char *Cur = Start; 855 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 856 return error(instrprof_error::truncated); 857 858 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 859 Cur += sizeof(IndexedInstrProf::Header); 860 861 // Check the magic number. 862 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 863 if (Magic != IndexedInstrProf::Magic) 864 return error(instrprof_error::bad_magic); 865 866 // Read the version. 867 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 868 if (GET_VERSION(FormatVersion) > 869 IndexedInstrProf::ProfVersion::CurrentVersion) 870 return error(instrprof_error::unsupported_version); 871 872 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 873 /* UseCS */ false); 874 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 875 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 876 /* UseCS */ true); 877 878 // Read the hash type and start offset. 879 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 880 endian::byte_swap<uint64_t, little>(Header->HashType)); 881 if (HashType > IndexedInstrProf::HashT::Last) 882 return error(instrprof_error::unsupported_hash_type); 883 884 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 885 886 // The rest of the file is an on disk hash table. 887 auto IndexPtr = 888 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 889 Start + HashOffset, Cur, Start, HashType, FormatVersion); 890 891 // Load the remapping table now if requested. 892 if (RemappingBuffer) { 893 Remapper = std::make_unique< 894 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 895 std::move(RemappingBuffer), *IndexPtr); 896 if (Error E = Remapper->populateRemappings()) 897 return E; 898 } else { 899 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 900 } 901 Index = std::move(IndexPtr); 902 903 return success(); 904 } 905 906 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 907 if (Symtab.get()) 908 return *Symtab.get(); 909 910 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 911 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 912 consumeError(error(InstrProfError::take(std::move(E)))); 913 } 914 915 Symtab = std::move(NewSymtab); 916 return *Symtab.get(); 917 } 918 919 Expected<InstrProfRecord> 920 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 921 uint64_t FuncHash) { 922 ArrayRef<NamedInstrProfRecord> Data; 923 Error Err = Remapper->getRecords(FuncName, Data); 924 if (Err) 925 return std::move(Err); 926 // Found it. Look for counters with the right hash. 927 for (unsigned I = 0, E = Data.size(); I < E; ++I) { 928 // Check for a match and fill the vector if there is one. 929 if (Data[I].Hash == FuncHash) { 930 return std::move(Data[I]); 931 } 932 } 933 return error(instrprof_error::hash_mismatch); 934 } 935 936 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 937 uint64_t FuncHash, 938 std::vector<uint64_t> &Counts) { 939 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 940 if (Error E = Record.takeError()) 941 return error(std::move(E)); 942 943 Counts = Record.get().Counts; 944 return success(); 945 } 946 947 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 948 ArrayRef<NamedInstrProfRecord> Data; 949 950 Error E = Index->getRecords(Data); 951 if (E) 952 return error(std::move(E)); 953 954 Record = Data[RecordIndex++]; 955 if (RecordIndex >= Data.size()) { 956 Index->advanceToNextKey(); 957 RecordIndex = 0; 958 } 959 return success(); 960 } 961 962 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 963 uint64_t NumFuncs = 0; 964 for (const auto &Func : *this) { 965 if (isIRLevelProfile()) { 966 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 967 if (FuncIsCS != IsCS) 968 continue; 969 } 970 Func.accumulateCounts(Sum); 971 ++NumFuncs; 972 } 973 Sum.NumEntries = NumFuncs; 974 } 975