1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/ProfileSummary.h" 27 #include "llvm/ProfileData/ProfileCommon.h" 28 #include "llvm/ProfileData/SampleProf.h" 29 #include "llvm/Support/Compression.h" 30 #include "llvm/Support/ErrorOr.h" 31 #include "llvm/Support/LEB128.h" 32 #include "llvm/Support/LineIterator.h" 33 #include "llvm/Support/MD5.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <algorithm> 37 #include <cstddef> 38 #include <cstdint> 39 #include <limits> 40 #include <memory> 41 #include <set> 42 #include <system_error> 43 #include <vector> 44 45 using namespace llvm; 46 using namespace sampleprof; 47 48 /// Dump the function profile for \p FName. 49 /// 50 /// \param FName Name of the function to print. 51 /// \param OS Stream to emit the output to. 52 void SampleProfileReader::dumpFunctionProfile(StringRef FName, 53 raw_ostream &OS) { 54 OS << "Function: " << FName << ": " << Profiles[FName]; 55 } 56 57 /// Dump all the function profiles found on stream \p OS. 58 void SampleProfileReader::dump(raw_ostream &OS) { 59 for (const auto &I : Profiles) 60 dumpFunctionProfile(I.getKey(), OS); 61 } 62 63 /// Parse \p Input as function head. 64 /// 65 /// Parse one line of \p Input, and update function name in \p FName, 66 /// function's total sample count in \p NumSamples, function's entry 67 /// count in \p NumHeadSamples. 68 /// 69 /// \returns true if parsing is successful. 70 static bool ParseHead(const StringRef &Input, StringRef &FName, 71 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 72 if (Input[0] == ' ') 73 return false; 74 size_t n2 = Input.rfind(':'); 75 size_t n1 = Input.rfind(':', n2 - 1); 76 FName = Input.substr(0, n1); 77 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 78 return false; 79 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 80 return false; 81 return true; 82 } 83 84 /// Returns true if line offset \p L is legal (only has 16 bits). 85 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 86 87 /// Parse \p Input that contains metadata. 88 /// Possible metadata: 89 /// - CFG Checksum information: 90 /// !CFGChecksum: 12345 91 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 92 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) { 93 if (!Input.startswith("!CFGChecksum:")) 94 return false; 95 96 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 97 return !CFGInfo.getAsInteger(10, FunctionHash); 98 } 99 100 enum class LineType { 101 CallSiteProfile, 102 BodyProfile, 103 Metadata, 104 }; 105 106 /// Parse \p Input as line sample. 107 /// 108 /// \param Input input line. 109 /// \param LineTy Type of this line. 110 /// \param Depth the depth of the inline stack. 111 /// \param NumSamples total samples of the line/inlined callsite. 112 /// \param LineOffset line offset to the start of the function. 113 /// \param Discriminator discriminator of the line. 114 /// \param TargetCountMap map from indirect call target to count. 115 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 116 /// 117 /// returns true if parsing is successful. 118 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 119 uint64_t &NumSamples, uint32_t &LineOffset, 120 uint32_t &Discriminator, StringRef &CalleeName, 121 DenseMap<StringRef, uint64_t> &TargetCountMap, 122 uint64_t &FunctionHash) { 123 for (Depth = 0; Input[Depth] == ' '; Depth++) 124 ; 125 if (Depth == 0) 126 return false; 127 128 if (Depth == 1 && Input[Depth] == '!') { 129 LineTy = LineType::Metadata; 130 return parseMetadata(Input.substr(Depth), FunctionHash); 131 } 132 133 size_t n1 = Input.find(':'); 134 StringRef Loc = Input.substr(Depth, n1 - Depth); 135 size_t n2 = Loc.find('.'); 136 if (n2 == StringRef::npos) { 137 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 138 return false; 139 Discriminator = 0; 140 } else { 141 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 142 return false; 143 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 144 return false; 145 } 146 147 StringRef Rest = Input.substr(n1 + 2); 148 if (isDigit(Rest[0])) { 149 LineTy = LineType::BodyProfile; 150 size_t n3 = Rest.find(' '); 151 if (n3 == StringRef::npos) { 152 if (Rest.getAsInteger(10, NumSamples)) 153 return false; 154 } else { 155 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 156 return false; 157 } 158 // Find call targets and their sample counts. 159 // Note: In some cases, there are symbols in the profile which are not 160 // mangled. To accommodate such cases, use colon + integer pairs as the 161 // anchor points. 162 // An example: 163 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 164 // ":1000" and ":437" are used as anchor points so the string above will 165 // be interpreted as 166 // target: _M_construct<char *> 167 // count: 1000 168 // target: string_view<std::allocator<char> > 169 // count: 437 170 while (n3 != StringRef::npos) { 171 n3 += Rest.substr(n3).find_first_not_of(' '); 172 Rest = Rest.substr(n3); 173 n3 = Rest.find_first_of(':'); 174 if (n3 == StringRef::npos || n3 == 0) 175 return false; 176 177 StringRef Target; 178 uint64_t count, n4; 179 while (true) { 180 // Get the segment after the current colon. 181 StringRef AfterColon = Rest.substr(n3 + 1); 182 // Get the target symbol before the current colon. 183 Target = Rest.substr(0, n3); 184 // Check if the word after the current colon is an integer. 185 n4 = AfterColon.find_first_of(' '); 186 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 187 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 188 if (!WordAfterColon.getAsInteger(10, count)) 189 break; 190 191 // Try to find the next colon. 192 uint64_t n5 = AfterColon.find_first_of(':'); 193 if (n5 == StringRef::npos) 194 return false; 195 n3 += n5 + 1; 196 } 197 198 // An anchor point is found. Save the {target, count} pair 199 TargetCountMap[Target] = count; 200 if (n4 == Rest.size()) 201 break; 202 // Change n3 to the next blank space after colon + integer pair. 203 n3 = n4; 204 } 205 } else { 206 LineTy = LineType::CallSiteProfile; 207 size_t n3 = Rest.find_last_of(':'); 208 CalleeName = Rest.substr(0, n3); 209 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 210 return false; 211 } 212 return true; 213 } 214 215 /// Load samples from a text file. 216 /// 217 /// See the documentation at the top of the file for an explanation of 218 /// the expected format. 219 /// 220 /// \returns true if the file was loaded successfully, false otherwise. 221 std::error_code SampleProfileReaderText::readImpl() { 222 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 223 sampleprof_error Result = sampleprof_error::success; 224 225 InlineCallStack InlineStack; 226 uint32_t ProbeProfileCount = 0; 227 228 // SeenMetadata tracks whether we have processed metadata for the current 229 // top-level function profile. 230 bool SeenMetadata = false; 231 232 for (; !LineIt.is_at_eof(); ++LineIt) { 233 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') 234 continue; 235 // Read the header of each function. 236 // 237 // Note that for function identifiers we are actually expecting 238 // mangled names, but we may not always get them. This happens when 239 // the compiler decides not to emit the function (e.g., it was inlined 240 // and removed). In this case, the binary will not have the linkage 241 // name for the function, so the profiler will emit the function's 242 // unmangled name, which may contain characters like ':' and '>' in its 243 // name (member functions, templates, etc). 244 // 245 // The only requirement we place on the identifier, then, is that it 246 // should not begin with a number. 247 if ((*LineIt)[0] != ' ') { 248 uint64_t NumSamples, NumHeadSamples; 249 StringRef FName; 250 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 251 reportError(LineIt.line_number(), 252 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 253 return sampleprof_error::malformed; 254 } 255 SeenMetadata = false; 256 SampleContext FContext(FName); 257 if (FContext.hasContext()) 258 ++CSProfileCount; 259 Profiles[FContext] = FunctionSamples(); 260 FunctionSamples &FProfile = Profiles[FContext]; 261 FProfile.setName(FContext.getNameWithoutContext()); 262 FProfile.setContext(FContext); 263 MergeResult(Result, FProfile.addTotalSamples(NumSamples)); 264 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); 265 InlineStack.clear(); 266 InlineStack.push_back(&FProfile); 267 } else { 268 uint64_t NumSamples; 269 StringRef FName; 270 DenseMap<StringRef, uint64_t> TargetCountMap; 271 uint32_t Depth, LineOffset, Discriminator; 272 LineType LineTy; 273 uint64_t FunctionHash; 274 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 275 Discriminator, FName, TargetCountMap, FunctionHash)) { 276 reportError(LineIt.line_number(), 277 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 278 *LineIt); 279 return sampleprof_error::malformed; 280 } 281 if (SeenMetadata && LineTy != LineType::Metadata) { 282 // Metadata must be put at the end of a function profile. 283 reportError(LineIt.line_number(), 284 "Found non-metadata after metadata: " + *LineIt); 285 return sampleprof_error::malformed; 286 } 287 while (InlineStack.size() > Depth) { 288 InlineStack.pop_back(); 289 } 290 switch (LineTy) { 291 case LineType::CallSiteProfile: { 292 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 293 LineLocation(LineOffset, Discriminator))[std::string(FName)]; 294 FSamples.setName(FName); 295 MergeResult(Result, FSamples.addTotalSamples(NumSamples)); 296 InlineStack.push_back(&FSamples); 297 break; 298 } 299 case LineType::BodyProfile: { 300 while (InlineStack.size() > Depth) { 301 InlineStack.pop_back(); 302 } 303 FunctionSamples &FProfile = *InlineStack.back(); 304 for (const auto &name_count : TargetCountMap) { 305 MergeResult(Result, FProfile.addCalledTargetSamples( 306 LineOffset, Discriminator, name_count.first, 307 name_count.second)); 308 } 309 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, 310 NumSamples)); 311 break; 312 } 313 case LineType::Metadata: { 314 FunctionSamples &FProfile = *InlineStack.back(); 315 FProfile.setFunctionHash(FunctionHash); 316 ++ProbeProfileCount; 317 SeenMetadata = true; 318 break; 319 } 320 } 321 } 322 } 323 324 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 325 "Cannot have both context-sensitive and regular profile"); 326 ProfileIsCS = (CSProfileCount > 0); 327 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && 328 "Cannot have both probe-based profiles and regular profiles"); 329 ProfileIsProbeBased = (ProbeProfileCount > 0); 330 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 331 FunctionSamples::ProfileIsCS = ProfileIsCS; 332 333 if (Result == sampleprof_error::success) 334 computeSummary(); 335 336 return Result; 337 } 338 339 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 340 bool result = false; 341 342 // Check that the first non-comment line is a valid function header. 343 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 344 if (!LineIt.is_at_eof()) { 345 if ((*LineIt)[0] != ' ') { 346 uint64_t NumSamples, NumHeadSamples; 347 StringRef FName; 348 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 349 } 350 } 351 352 return result; 353 } 354 355 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 356 unsigned NumBytesRead = 0; 357 std::error_code EC; 358 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 359 360 if (Val > std::numeric_limits<T>::max()) 361 EC = sampleprof_error::malformed; 362 else if (Data + NumBytesRead > End) 363 EC = sampleprof_error::truncated; 364 else 365 EC = sampleprof_error::success; 366 367 if (EC) { 368 reportError(0, EC.message()); 369 return EC; 370 } 371 372 Data += NumBytesRead; 373 return static_cast<T>(Val); 374 } 375 376 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 377 std::error_code EC; 378 StringRef Str(reinterpret_cast<const char *>(Data)); 379 if (Data + Str.size() + 1 > End) { 380 EC = sampleprof_error::truncated; 381 reportError(0, EC.message()); 382 return EC; 383 } 384 385 Data += Str.size() + 1; 386 return Str; 387 } 388 389 template <typename T> 390 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 391 std::error_code EC; 392 393 if (Data + sizeof(T) > End) { 394 EC = sampleprof_error::truncated; 395 reportError(0, EC.message()); 396 return EC; 397 } 398 399 using namespace support; 400 T Val = endian::readNext<T, little, unaligned>(Data); 401 return Val; 402 } 403 404 template <typename T> 405 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 406 std::error_code EC; 407 auto Idx = readNumber<uint32_t>(); 408 if (std::error_code EC = Idx.getError()) 409 return EC; 410 if (*Idx >= Table.size()) 411 return sampleprof_error::truncated_name_table; 412 return *Idx; 413 } 414 415 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { 416 auto Idx = readStringIndex(NameTable); 417 if (std::error_code EC = Idx.getError()) 418 return EC; 419 420 return NameTable[*Idx]; 421 } 422 423 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() { 424 if (!FixedLengthMD5) 425 return SampleProfileReaderBinary::readStringFromTable(); 426 427 // read NameTable index. 428 auto Idx = readStringIndex(NameTable); 429 if (std::error_code EC = Idx.getError()) 430 return EC; 431 432 // Check whether the name to be accessed has been accessed before, 433 // if not, read it from memory directly. 434 StringRef &SR = NameTable[*Idx]; 435 if (SR.empty()) { 436 const uint8_t *SavedData = Data; 437 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); 438 auto FID = readUnencodedNumber<uint64_t>(); 439 if (std::error_code EC = FID.getError()) 440 return EC; 441 // Save the string converted from uint64_t in MD5StringBuf. All the 442 // references to the name are all StringRefs refering to the string 443 // in MD5StringBuf. 444 MD5StringBuf->push_back(std::to_string(*FID)); 445 SR = MD5StringBuf->back(); 446 Data = SavedData; 447 } 448 return SR; 449 } 450 451 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() { 452 auto Idx = readStringIndex(NameTable); 453 if (std::error_code EC = Idx.getError()) 454 return EC; 455 456 return StringRef(NameTable[*Idx]); 457 } 458 459 std::error_code 460 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 461 auto NumSamples = readNumber<uint64_t>(); 462 if (std::error_code EC = NumSamples.getError()) 463 return EC; 464 FProfile.addTotalSamples(*NumSamples); 465 466 // Read the samples in the body. 467 auto NumRecords = readNumber<uint32_t>(); 468 if (std::error_code EC = NumRecords.getError()) 469 return EC; 470 471 for (uint32_t I = 0; I < *NumRecords; ++I) { 472 auto LineOffset = readNumber<uint64_t>(); 473 if (std::error_code EC = LineOffset.getError()) 474 return EC; 475 476 if (!isOffsetLegal(*LineOffset)) { 477 return std::error_code(); 478 } 479 480 auto Discriminator = readNumber<uint64_t>(); 481 if (std::error_code EC = Discriminator.getError()) 482 return EC; 483 484 auto NumSamples = readNumber<uint64_t>(); 485 if (std::error_code EC = NumSamples.getError()) 486 return EC; 487 488 auto NumCalls = readNumber<uint32_t>(); 489 if (std::error_code EC = NumCalls.getError()) 490 return EC; 491 492 for (uint32_t J = 0; J < *NumCalls; ++J) { 493 auto CalledFunction(readStringFromTable()); 494 if (std::error_code EC = CalledFunction.getError()) 495 return EC; 496 497 auto CalledFunctionSamples = readNumber<uint64_t>(); 498 if (std::error_code EC = CalledFunctionSamples.getError()) 499 return EC; 500 501 FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, 502 *CalledFunction, *CalledFunctionSamples); 503 } 504 505 FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); 506 } 507 508 // Read all the samples for inlined function calls. 509 auto NumCallsites = readNumber<uint32_t>(); 510 if (std::error_code EC = NumCallsites.getError()) 511 return EC; 512 513 for (uint32_t J = 0; J < *NumCallsites; ++J) { 514 auto LineOffset = readNumber<uint64_t>(); 515 if (std::error_code EC = LineOffset.getError()) 516 return EC; 517 518 auto Discriminator = readNumber<uint64_t>(); 519 if (std::error_code EC = Discriminator.getError()) 520 return EC; 521 522 auto FName(readStringFromTable()); 523 if (std::error_code EC = FName.getError()) 524 return EC; 525 526 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 527 LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; 528 CalleeProfile.setName(*FName); 529 if (std::error_code EC = readProfile(CalleeProfile)) 530 return EC; 531 } 532 533 return sampleprof_error::success; 534 } 535 536 std::error_code 537 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 538 Data = Start; 539 auto NumHeadSamples = readNumber<uint64_t>(); 540 if (std::error_code EC = NumHeadSamples.getError()) 541 return EC; 542 543 auto FName(readStringFromTable()); 544 if (std::error_code EC = FName.getError()) 545 return EC; 546 547 SampleContext FContext(*FName); 548 Profiles[FContext] = FunctionSamples(); 549 FunctionSamples &FProfile = Profiles[FContext]; 550 FProfile.setName(FContext.getNameWithoutContext()); 551 FProfile.setContext(FContext); 552 FProfile.addHeadSamples(*NumHeadSamples); 553 554 if (FContext.hasContext()) 555 CSProfileCount++; 556 557 if (std::error_code EC = readProfile(FProfile)) 558 return EC; 559 return sampleprof_error::success; 560 } 561 562 std::error_code SampleProfileReaderBinary::readImpl() { 563 while (!at_eof()) { 564 if (std::error_code EC = readFuncProfile(Data)) 565 return EC; 566 } 567 568 return sampleprof_error::success; 569 } 570 571 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 572 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 573 Data = Start; 574 End = Start + Size; 575 switch (Entry.Type) { 576 case SecProfSummary: 577 if (std::error_code EC = readSummary()) 578 return EC; 579 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 580 Summary->setPartialProfile(true); 581 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 582 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 583 break; 584 case SecNameTable: { 585 FixedLengthMD5 = 586 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 587 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 588 assert((!FixedLengthMD5 || UseMD5) && 589 "If FixedLengthMD5 is true, UseMD5 has to be true"); 590 FunctionSamples::HasUniqSuffix = 591 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 592 if (std::error_code EC = readNameTableSec(UseMD5)) 593 return EC; 594 break; 595 } 596 case SecLBRProfile: 597 if (std::error_code EC = readFuncProfiles()) 598 return EC; 599 break; 600 case SecFuncOffsetTable: 601 if (std::error_code EC = readFuncOffsetTable()) 602 return EC; 603 break; 604 case SecFuncMetadata: 605 ProfileIsProbeBased = 606 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 607 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 608 if (std::error_code EC = readFuncMetadata()) 609 return EC; 610 break; 611 case SecProfileSymbolList: 612 if (std::error_code EC = readProfileSymbolList()) 613 return EC; 614 break; 615 default: 616 if (std::error_code EC = readCustomSection(Entry)) 617 return EC; 618 break; 619 } 620 return sampleprof_error::success; 621 } 622 623 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 624 if (!M) 625 return false; 626 FuncsToUse.clear(); 627 for (auto &F : *M) 628 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 629 return true; 630 } 631 632 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 633 // If there are more than one FuncOffsetTable, the profile read associated 634 // with previous FuncOffsetTable has to be done before next FuncOffsetTable 635 // is read. 636 FuncOffsetTable.clear(); 637 638 auto Size = readNumber<uint64_t>(); 639 if (std::error_code EC = Size.getError()) 640 return EC; 641 642 FuncOffsetTable.reserve(*Size); 643 for (uint32_t I = 0; I < *Size; ++I) { 644 auto FName(readStringFromTable()); 645 if (std::error_code EC = FName.getError()) 646 return EC; 647 648 auto Offset = readNumber<uint64_t>(); 649 if (std::error_code EC = Offset.getError()) 650 return EC; 651 652 FuncOffsetTable[*FName] = *Offset; 653 } 654 return sampleprof_error::success; 655 } 656 657 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 658 // Collect functions used by current module if the Reader has been 659 // given a module. 660 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 661 // which will query FunctionSamples::HasUniqSuffix, so it has to be 662 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 663 // NameTable section is read. 664 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 665 666 // When LoadFuncsToBeUsed is false, load all the function profiles. 667 const uint8_t *Start = Data; 668 if (!LoadFuncsToBeUsed) { 669 while (Data < End) { 670 if (std::error_code EC = readFuncProfile(Data)) 671 return EC; 672 } 673 assert(Data == End && "More data is read than expected"); 674 } else { 675 // Load function profiles on demand. 676 if (Remapper) { 677 for (auto Name : FuncsToUse) { 678 Remapper->insert(Name); 679 } 680 } 681 682 if (useMD5()) { 683 for (auto Name : FuncsToUse) { 684 auto GUID = std::to_string(MD5Hash(Name)); 685 auto iter = FuncOffsetTable.find(StringRef(GUID)); 686 if (iter == FuncOffsetTable.end()) 687 continue; 688 const uint8_t *FuncProfileAddr = Start + iter->second; 689 assert(FuncProfileAddr < End && "out of LBRProfile section"); 690 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 691 return EC; 692 } 693 } else if (FunctionSamples::ProfileIsCS) { 694 // Compute the ordered set of names, so we can 695 // get all context profiles under a subtree by 696 // iterating through the ordered names. 697 struct Comparer { 698 // Ignore the closing ']' when ordering context 699 bool operator()(const StringRef &L, const StringRef &R) const { 700 return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1); 701 } 702 }; 703 std::set<StringRef, Comparer> OrderedNames; 704 for (auto Name : FuncOffsetTable) { 705 OrderedNames.insert(Name.first); 706 } 707 708 // For each function in current module, load all 709 // context profiles for the function. 710 for (auto NameOffset : FuncOffsetTable) { 711 StringRef ContextName = NameOffset.first; 712 SampleContext FContext(ContextName); 713 auto FuncName = FContext.getNameWithoutContext(); 714 if (!FuncsToUse.count(FuncName) && 715 (!Remapper || !Remapper->exist(FuncName))) 716 continue; 717 718 // For each context profile we need, try to load 719 // all context profile in the subtree. This can 720 // help profile guided importing for ThinLTO. 721 auto It = OrderedNames.find(ContextName); 722 while (It != OrderedNames.end() && 723 It->startswith(ContextName.substr(0, ContextName.size() - 1))) { 724 const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; 725 assert(FuncProfileAddr < End && "out of LBRProfile section"); 726 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 727 return EC; 728 // Remove loaded context profile so we won't 729 // load it repeatedly. 730 It = OrderedNames.erase(It); 731 } 732 } 733 } else { 734 for (auto NameOffset : FuncOffsetTable) { 735 SampleContext FContext(NameOffset.first); 736 auto FuncName = FContext.getNameWithoutContext(); 737 if (!FuncsToUse.count(FuncName) && 738 (!Remapper || !Remapper->exist(FuncName))) 739 continue; 740 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 741 assert(FuncProfileAddr < End && "out of LBRProfile section"); 742 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 743 return EC; 744 } 745 } 746 Data = End; 747 } 748 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 749 "Cannot have both context-sensitive and regular profile"); 750 assert(ProfileIsCS == (CSProfileCount > 0) && 751 "Section flag should be consistent with actual profile"); 752 return sampleprof_error::success; 753 } 754 755 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 756 if (!ProfSymList) 757 ProfSymList = std::make_unique<ProfileSymbolList>(); 758 759 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 760 return EC; 761 762 Data = End; 763 return sampleprof_error::success; 764 } 765 766 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 767 const uint8_t *SecStart, const uint64_t SecSize, 768 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 769 Data = SecStart; 770 End = SecStart + SecSize; 771 auto DecompressSize = readNumber<uint64_t>(); 772 if (std::error_code EC = DecompressSize.getError()) 773 return EC; 774 DecompressBufSize = *DecompressSize; 775 776 auto CompressSize = readNumber<uint64_t>(); 777 if (std::error_code EC = CompressSize.getError()) 778 return EC; 779 780 if (!llvm::zlib::isAvailable()) 781 return sampleprof_error::zlib_unavailable; 782 783 StringRef CompressedStrings(reinterpret_cast<const char *>(Data), 784 *CompressSize); 785 char *Buffer = Allocator.Allocate<char>(DecompressBufSize); 786 size_t UCSize = DecompressBufSize; 787 llvm::Error E = 788 zlib::uncompress(CompressedStrings, Buffer, UCSize); 789 if (E) 790 return sampleprof_error::uncompress_failed; 791 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 792 return sampleprof_error::success; 793 } 794 795 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 796 const uint8_t *BufStart = 797 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 798 799 for (auto &Entry : SecHdrTable) { 800 // Skip empty section. 801 if (!Entry.Size) 802 continue; 803 804 // Skip sections without context when SkipFlatProf is true. 805 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 806 continue; 807 808 const uint8_t *SecStart = BufStart + Entry.Offset; 809 uint64_t SecSize = Entry.Size; 810 811 // If the section is compressed, decompress it into a buffer 812 // DecompressBuf before reading the actual data. The pointee of 813 // 'Data' will be changed to buffer hold by DecompressBuf 814 // temporarily when reading the actual data. 815 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 816 if (isCompressed) { 817 const uint8_t *DecompressBuf; 818 uint64_t DecompressBufSize; 819 if (std::error_code EC = decompressSection( 820 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 821 return EC; 822 SecStart = DecompressBuf; 823 SecSize = DecompressBufSize; 824 } 825 826 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 827 return EC; 828 if (Data != SecStart + SecSize) 829 return sampleprof_error::malformed; 830 831 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 832 if (isCompressed) { 833 Data = BufStart + Entry.Offset; 834 End = BufStart + Buffer->getBufferSize(); 835 } 836 } 837 838 return sampleprof_error::success; 839 } 840 841 std::error_code SampleProfileReaderCompactBinary::readImpl() { 842 // Collect functions used by current module if the Reader has been 843 // given a module. 844 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 845 846 std::vector<uint64_t> OffsetsToUse; 847 if (!LoadFuncsToBeUsed) { 848 // load all the function profiles. 849 for (auto FuncEntry : FuncOffsetTable) { 850 OffsetsToUse.push_back(FuncEntry.second); 851 } 852 } else { 853 // load function profiles on demand. 854 for (auto Name : FuncsToUse) { 855 auto GUID = std::to_string(MD5Hash(Name)); 856 auto iter = FuncOffsetTable.find(StringRef(GUID)); 857 if (iter == FuncOffsetTable.end()) 858 continue; 859 OffsetsToUse.push_back(iter->second); 860 } 861 } 862 863 for (auto Offset : OffsetsToUse) { 864 const uint8_t *SavedData = Data; 865 if (std::error_code EC = readFuncProfile( 866 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 867 Offset)) 868 return EC; 869 Data = SavedData; 870 } 871 return sampleprof_error::success; 872 } 873 874 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 875 if (Magic == SPMagic()) 876 return sampleprof_error::success; 877 return sampleprof_error::bad_magic; 878 } 879 880 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 881 if (Magic == SPMagic(SPF_Ext_Binary)) 882 return sampleprof_error::success; 883 return sampleprof_error::bad_magic; 884 } 885 886 std::error_code 887 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { 888 if (Magic == SPMagic(SPF_Compact_Binary)) 889 return sampleprof_error::success; 890 return sampleprof_error::bad_magic; 891 } 892 893 std::error_code SampleProfileReaderBinary::readNameTable() { 894 auto Size = readNumber<uint32_t>(); 895 if (std::error_code EC = Size.getError()) 896 return EC; 897 NameTable.reserve(*Size + NameTable.size()); 898 for (uint32_t I = 0; I < *Size; ++I) { 899 auto Name(readString()); 900 if (std::error_code EC = Name.getError()) 901 return EC; 902 NameTable.push_back(*Name); 903 } 904 905 return sampleprof_error::success; 906 } 907 908 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() { 909 auto Size = readNumber<uint64_t>(); 910 if (std::error_code EC = Size.getError()) 911 return EC; 912 MD5StringBuf = std::make_unique<std::vector<std::string>>(); 913 MD5StringBuf->reserve(*Size); 914 if (FixedLengthMD5) { 915 // Preallocate and initialize NameTable so we can check whether a name 916 // index has been read before by checking whether the element in the 917 // NameTable is empty, meanwhile readStringIndex can do the boundary 918 // check using the size of NameTable. 919 NameTable.resize(*Size + NameTable.size()); 920 921 MD5NameMemStart = Data; 922 Data = Data + (*Size) * sizeof(uint64_t); 923 return sampleprof_error::success; 924 } 925 NameTable.reserve(*Size); 926 for (uint32_t I = 0; I < *Size; ++I) { 927 auto FID = readNumber<uint64_t>(); 928 if (std::error_code EC = FID.getError()) 929 return EC; 930 MD5StringBuf->push_back(std::to_string(*FID)); 931 // NameTable is a vector of StringRef. Here it is pushing back a 932 // StringRef initialized with the last string in MD5stringBuf. 933 NameTable.push_back(MD5StringBuf->back()); 934 } 935 return sampleprof_error::success; 936 } 937 938 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { 939 if (IsMD5) 940 return readMD5NameTable(); 941 return SampleProfileReaderBinary::readNameTable(); 942 } 943 944 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { 945 if (!ProfileIsProbeBased) 946 return sampleprof_error::success; 947 while (Data < End) { 948 auto FName(readStringFromTable()); 949 if (std::error_code EC = FName.getError()) 950 return EC; 951 952 auto Checksum = readNumber<uint64_t>(); 953 if (std::error_code EC = Checksum.getError()) 954 return EC; 955 956 SampleContext FContext(*FName); 957 // No need to load metadata for profiles that are not loaded in the current 958 // module. 959 if (Profiles.count(FContext)) 960 Profiles[FContext].setFunctionHash(*Checksum); 961 } 962 963 assert(Data == End && "More data is read than expected"); 964 return sampleprof_error::success; 965 } 966 967 std::error_code SampleProfileReaderCompactBinary::readNameTable() { 968 auto Size = readNumber<uint64_t>(); 969 if (std::error_code EC = Size.getError()) 970 return EC; 971 NameTable.reserve(*Size); 972 for (uint32_t I = 0; I < *Size; ++I) { 973 auto FID = readNumber<uint64_t>(); 974 if (std::error_code EC = FID.getError()) 975 return EC; 976 NameTable.push_back(std::to_string(*FID)); 977 } 978 return sampleprof_error::success; 979 } 980 981 std::error_code 982 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) { 983 SecHdrTableEntry Entry; 984 auto Type = readUnencodedNumber<uint64_t>(); 985 if (std::error_code EC = Type.getError()) 986 return EC; 987 Entry.Type = static_cast<SecType>(*Type); 988 989 auto Flags = readUnencodedNumber<uint64_t>(); 990 if (std::error_code EC = Flags.getError()) 991 return EC; 992 Entry.Flags = *Flags; 993 994 auto Offset = readUnencodedNumber<uint64_t>(); 995 if (std::error_code EC = Offset.getError()) 996 return EC; 997 Entry.Offset = *Offset; 998 999 auto Size = readUnencodedNumber<uint64_t>(); 1000 if (std::error_code EC = Size.getError()) 1001 return EC; 1002 Entry.Size = *Size; 1003 1004 Entry.LayoutIndex = Idx; 1005 SecHdrTable.push_back(std::move(Entry)); 1006 return sampleprof_error::success; 1007 } 1008 1009 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1010 auto EntryNum = readUnencodedNumber<uint64_t>(); 1011 if (std::error_code EC = EntryNum.getError()) 1012 return EC; 1013 1014 for (uint32_t i = 0; i < (*EntryNum); i++) 1015 if (std::error_code EC = readSecHdrTableEntry(i)) 1016 return EC; 1017 1018 return sampleprof_error::success; 1019 } 1020 1021 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1022 const uint8_t *BufStart = 1023 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1024 Data = BufStart; 1025 End = BufStart + Buffer->getBufferSize(); 1026 1027 if (std::error_code EC = readMagicIdent()) 1028 return EC; 1029 1030 if (std::error_code EC = readSecHdrTable()) 1031 return EC; 1032 1033 return sampleprof_error::success; 1034 } 1035 1036 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1037 uint64_t Size = 0; 1038 for (auto &Entry : SecHdrTable) { 1039 if (Entry.Type == Type) 1040 Size += Entry.Size; 1041 } 1042 return Size; 1043 } 1044 1045 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1046 // Sections in SecHdrTable is not necessarily in the same order as 1047 // sections in the profile because section like FuncOffsetTable needs 1048 // to be written after section LBRProfile but needs to be read before 1049 // section LBRProfile, so we cannot simply use the last entry in 1050 // SecHdrTable to calculate the file size. 1051 uint64_t FileSize = 0; 1052 for (auto &Entry : SecHdrTable) { 1053 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1054 } 1055 return FileSize; 1056 } 1057 1058 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1059 std::string Flags; 1060 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1061 Flags.append("{compressed,"); 1062 else 1063 Flags.append("{"); 1064 1065 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1066 Flags.append("flat,"); 1067 1068 switch (Entry.Type) { 1069 case SecNameTable: 1070 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1071 Flags.append("fixlenmd5,"); 1072 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1073 Flags.append("md5,"); 1074 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1075 Flags.append("uniq,"); 1076 break; 1077 case SecProfSummary: 1078 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1079 Flags.append("partial,"); 1080 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1081 Flags.append("context,"); 1082 break; 1083 default: 1084 break; 1085 } 1086 char &last = Flags.back(); 1087 if (last == ',') 1088 last = '}'; 1089 else 1090 Flags.append("}"); 1091 return Flags; 1092 } 1093 1094 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1095 uint64_t TotalSecsSize = 0; 1096 for (auto &Entry : SecHdrTable) { 1097 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1098 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1099 << "\n"; 1100 ; 1101 TotalSecsSize += Entry.Size; 1102 } 1103 uint64_t HeaderSize = SecHdrTable.front().Offset; 1104 assert(HeaderSize + TotalSecsSize == getFileSize() && 1105 "Size of 'header + sections' doesn't match the total size of profile"); 1106 1107 OS << "Header Size: " << HeaderSize << "\n"; 1108 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1109 OS << "File Size: " << getFileSize() << "\n"; 1110 return true; 1111 } 1112 1113 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1114 // Read and check the magic identifier. 1115 auto Magic = readNumber<uint64_t>(); 1116 if (std::error_code EC = Magic.getError()) 1117 return EC; 1118 else if (std::error_code EC = verifySPMagic(*Magic)) 1119 return EC; 1120 1121 // Read the version number. 1122 auto Version = readNumber<uint64_t>(); 1123 if (std::error_code EC = Version.getError()) 1124 return EC; 1125 else if (*Version != SPVersion()) 1126 return sampleprof_error::unsupported_version; 1127 1128 return sampleprof_error::success; 1129 } 1130 1131 std::error_code SampleProfileReaderBinary::readHeader() { 1132 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1133 End = Data + Buffer->getBufferSize(); 1134 1135 if (std::error_code EC = readMagicIdent()) 1136 return EC; 1137 1138 if (std::error_code EC = readSummary()) 1139 return EC; 1140 1141 if (std::error_code EC = readNameTable()) 1142 return EC; 1143 return sampleprof_error::success; 1144 } 1145 1146 std::error_code SampleProfileReaderCompactBinary::readHeader() { 1147 SampleProfileReaderBinary::readHeader(); 1148 if (std::error_code EC = readFuncOffsetTable()) 1149 return EC; 1150 return sampleprof_error::success; 1151 } 1152 1153 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { 1154 auto TableOffset = readUnencodedNumber<uint64_t>(); 1155 if (std::error_code EC = TableOffset.getError()) 1156 return EC; 1157 1158 const uint8_t *SavedData = Data; 1159 const uint8_t *TableStart = 1160 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 1161 *TableOffset; 1162 Data = TableStart; 1163 1164 auto Size = readNumber<uint64_t>(); 1165 if (std::error_code EC = Size.getError()) 1166 return EC; 1167 1168 FuncOffsetTable.reserve(*Size); 1169 for (uint32_t I = 0; I < *Size; ++I) { 1170 auto FName(readStringFromTable()); 1171 if (std::error_code EC = FName.getError()) 1172 return EC; 1173 1174 auto Offset = readNumber<uint64_t>(); 1175 if (std::error_code EC = Offset.getError()) 1176 return EC; 1177 1178 FuncOffsetTable[*FName] = *Offset; 1179 } 1180 End = TableStart; 1181 Data = SavedData; 1182 return sampleprof_error::success; 1183 } 1184 1185 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() { 1186 if (!M) 1187 return false; 1188 FuncsToUse.clear(); 1189 for (auto &F : *M) 1190 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 1191 return true; 1192 } 1193 1194 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1195 std::vector<ProfileSummaryEntry> &Entries) { 1196 auto Cutoff = readNumber<uint64_t>(); 1197 if (std::error_code EC = Cutoff.getError()) 1198 return EC; 1199 1200 auto MinBlockCount = readNumber<uint64_t>(); 1201 if (std::error_code EC = MinBlockCount.getError()) 1202 return EC; 1203 1204 auto NumBlocks = readNumber<uint64_t>(); 1205 if (std::error_code EC = NumBlocks.getError()) 1206 return EC; 1207 1208 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1209 return sampleprof_error::success; 1210 } 1211 1212 std::error_code SampleProfileReaderBinary::readSummary() { 1213 auto TotalCount = readNumber<uint64_t>(); 1214 if (std::error_code EC = TotalCount.getError()) 1215 return EC; 1216 1217 auto MaxBlockCount = readNumber<uint64_t>(); 1218 if (std::error_code EC = MaxBlockCount.getError()) 1219 return EC; 1220 1221 auto MaxFunctionCount = readNumber<uint64_t>(); 1222 if (std::error_code EC = MaxFunctionCount.getError()) 1223 return EC; 1224 1225 auto NumBlocks = readNumber<uint64_t>(); 1226 if (std::error_code EC = NumBlocks.getError()) 1227 return EC; 1228 1229 auto NumFunctions = readNumber<uint64_t>(); 1230 if (std::error_code EC = NumFunctions.getError()) 1231 return EC; 1232 1233 auto NumSummaryEntries = readNumber<uint64_t>(); 1234 if (std::error_code EC = NumSummaryEntries.getError()) 1235 return EC; 1236 1237 std::vector<ProfileSummaryEntry> Entries; 1238 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1239 std::error_code EC = readSummaryEntry(Entries); 1240 if (EC != sampleprof_error::success) 1241 return EC; 1242 } 1243 Summary = std::make_unique<ProfileSummary>( 1244 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1245 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1246 1247 return sampleprof_error::success; 1248 } 1249 1250 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1251 const uint8_t *Data = 1252 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1253 uint64_t Magic = decodeULEB128(Data); 1254 return Magic == SPMagic(); 1255 } 1256 1257 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1258 const uint8_t *Data = 1259 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1260 uint64_t Magic = decodeULEB128(Data); 1261 return Magic == SPMagic(SPF_Ext_Binary); 1262 } 1263 1264 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { 1265 const uint8_t *Data = 1266 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1267 uint64_t Magic = decodeULEB128(Data); 1268 return Magic == SPMagic(SPF_Compact_Binary); 1269 } 1270 1271 std::error_code SampleProfileReaderGCC::skipNextWord() { 1272 uint32_t dummy; 1273 if (!GcovBuffer.readInt(dummy)) 1274 return sampleprof_error::truncated; 1275 return sampleprof_error::success; 1276 } 1277 1278 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1279 if (sizeof(T) <= sizeof(uint32_t)) { 1280 uint32_t Val; 1281 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1282 return static_cast<T>(Val); 1283 } else if (sizeof(T) <= sizeof(uint64_t)) { 1284 uint64_t Val; 1285 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1286 return static_cast<T>(Val); 1287 } 1288 1289 std::error_code EC = sampleprof_error::malformed; 1290 reportError(0, EC.message()); 1291 return EC; 1292 } 1293 1294 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1295 StringRef Str; 1296 if (!GcovBuffer.readString(Str)) 1297 return sampleprof_error::truncated; 1298 return Str; 1299 } 1300 1301 std::error_code SampleProfileReaderGCC::readHeader() { 1302 // Read the magic identifier. 1303 if (!GcovBuffer.readGCDAFormat()) 1304 return sampleprof_error::unrecognized_format; 1305 1306 // Read the version number. Note - the GCC reader does not validate this 1307 // version, but the profile creator generates v704. 1308 GCOV::GCOVVersion version; 1309 if (!GcovBuffer.readGCOVVersion(version)) 1310 return sampleprof_error::unrecognized_format; 1311 1312 if (version != GCOV::V407) 1313 return sampleprof_error::unsupported_version; 1314 1315 // Skip the empty integer. 1316 if (std::error_code EC = skipNextWord()) 1317 return EC; 1318 1319 return sampleprof_error::success; 1320 } 1321 1322 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1323 uint32_t Tag; 1324 if (!GcovBuffer.readInt(Tag)) 1325 return sampleprof_error::truncated; 1326 1327 if (Tag != Expected) 1328 return sampleprof_error::malformed; 1329 1330 if (std::error_code EC = skipNextWord()) 1331 return EC; 1332 1333 return sampleprof_error::success; 1334 } 1335 1336 std::error_code SampleProfileReaderGCC::readNameTable() { 1337 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1338 return EC; 1339 1340 uint32_t Size; 1341 if (!GcovBuffer.readInt(Size)) 1342 return sampleprof_error::truncated; 1343 1344 for (uint32_t I = 0; I < Size; ++I) { 1345 StringRef Str; 1346 if (!GcovBuffer.readString(Str)) 1347 return sampleprof_error::truncated; 1348 Names.push_back(std::string(Str)); 1349 } 1350 1351 return sampleprof_error::success; 1352 } 1353 1354 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1355 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1356 return EC; 1357 1358 uint32_t NumFunctions; 1359 if (!GcovBuffer.readInt(NumFunctions)) 1360 return sampleprof_error::truncated; 1361 1362 InlineCallStack Stack; 1363 for (uint32_t I = 0; I < NumFunctions; ++I) 1364 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1365 return EC; 1366 1367 computeSummary(); 1368 return sampleprof_error::success; 1369 } 1370 1371 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1372 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1373 uint64_t HeadCount = 0; 1374 if (InlineStack.size() == 0) 1375 if (!GcovBuffer.readInt64(HeadCount)) 1376 return sampleprof_error::truncated; 1377 1378 uint32_t NameIdx; 1379 if (!GcovBuffer.readInt(NameIdx)) 1380 return sampleprof_error::truncated; 1381 1382 StringRef Name(Names[NameIdx]); 1383 1384 uint32_t NumPosCounts; 1385 if (!GcovBuffer.readInt(NumPosCounts)) 1386 return sampleprof_error::truncated; 1387 1388 uint32_t NumCallsites; 1389 if (!GcovBuffer.readInt(NumCallsites)) 1390 return sampleprof_error::truncated; 1391 1392 FunctionSamples *FProfile = nullptr; 1393 if (InlineStack.size() == 0) { 1394 // If this is a top function that we have already processed, do not 1395 // update its profile again. This happens in the presence of 1396 // function aliases. Since these aliases share the same function 1397 // body, there will be identical replicated profiles for the 1398 // original function. In this case, we simply not bother updating 1399 // the profile of the original function. 1400 FProfile = &Profiles[Name]; 1401 FProfile->addHeadSamples(HeadCount); 1402 if (FProfile->getTotalSamples() > 0) 1403 Update = false; 1404 } else { 1405 // Otherwise, we are reading an inlined instance. The top of the 1406 // inline stack contains the profile of the caller. Insert this 1407 // callee in the caller's CallsiteMap. 1408 FunctionSamples *CallerProfile = InlineStack.front(); 1409 uint32_t LineOffset = Offset >> 16; 1410 uint32_t Discriminator = Offset & 0xffff; 1411 FProfile = &CallerProfile->functionSamplesAt( 1412 LineLocation(LineOffset, Discriminator))[std::string(Name)]; 1413 } 1414 FProfile->setName(Name); 1415 1416 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1417 uint32_t Offset; 1418 if (!GcovBuffer.readInt(Offset)) 1419 return sampleprof_error::truncated; 1420 1421 uint32_t NumTargets; 1422 if (!GcovBuffer.readInt(NumTargets)) 1423 return sampleprof_error::truncated; 1424 1425 uint64_t Count; 1426 if (!GcovBuffer.readInt64(Count)) 1427 return sampleprof_error::truncated; 1428 1429 // The line location is encoded in the offset as: 1430 // high 16 bits: line offset to the start of the function. 1431 // low 16 bits: discriminator. 1432 uint32_t LineOffset = Offset >> 16; 1433 uint32_t Discriminator = Offset & 0xffff; 1434 1435 InlineCallStack NewStack; 1436 NewStack.push_back(FProfile); 1437 llvm::append_range(NewStack, InlineStack); 1438 if (Update) { 1439 // Walk up the inline stack, adding the samples on this line to 1440 // the total sample count of the callers in the chain. 1441 for (auto CallerProfile : NewStack) 1442 CallerProfile->addTotalSamples(Count); 1443 1444 // Update the body samples for the current profile. 1445 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1446 } 1447 1448 // Process the list of functions called at an indirect call site. 1449 // These are all the targets that a function pointer (or virtual 1450 // function) resolved at runtime. 1451 for (uint32_t J = 0; J < NumTargets; J++) { 1452 uint32_t HistVal; 1453 if (!GcovBuffer.readInt(HistVal)) 1454 return sampleprof_error::truncated; 1455 1456 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1457 return sampleprof_error::malformed; 1458 1459 uint64_t TargetIdx; 1460 if (!GcovBuffer.readInt64(TargetIdx)) 1461 return sampleprof_error::truncated; 1462 StringRef TargetName(Names[TargetIdx]); 1463 1464 uint64_t TargetCount; 1465 if (!GcovBuffer.readInt64(TargetCount)) 1466 return sampleprof_error::truncated; 1467 1468 if (Update) 1469 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1470 TargetName, TargetCount); 1471 } 1472 } 1473 1474 // Process all the inlined callers into the current function. These 1475 // are all the callsites that were inlined into this function. 1476 for (uint32_t I = 0; I < NumCallsites; I++) { 1477 // The offset is encoded as: 1478 // high 16 bits: line offset to the start of the function. 1479 // low 16 bits: discriminator. 1480 uint32_t Offset; 1481 if (!GcovBuffer.readInt(Offset)) 1482 return sampleprof_error::truncated; 1483 InlineCallStack NewStack; 1484 NewStack.push_back(FProfile); 1485 llvm::append_range(NewStack, InlineStack); 1486 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1487 return EC; 1488 } 1489 1490 return sampleprof_error::success; 1491 } 1492 1493 /// Read a GCC AutoFDO profile. 1494 /// 1495 /// This format is generated by the Linux Perf conversion tool at 1496 /// https://github.com/google/autofdo. 1497 std::error_code SampleProfileReaderGCC::readImpl() { 1498 // Read the string table. 1499 if (std::error_code EC = readNameTable()) 1500 return EC; 1501 1502 // Read the source profile. 1503 if (std::error_code EC = readFunctionProfiles()) 1504 return EC; 1505 1506 return sampleprof_error::success; 1507 } 1508 1509 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1510 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1511 return Magic == "adcg*704"; 1512 } 1513 1514 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1515 // If the reader uses MD5 to represent string, we can't remap it because 1516 // we don't know what the original function names were. 1517 if (Reader.useMD5()) { 1518 Ctx.diagnose(DiagnosticInfoSampleProfile( 1519 Reader.getBuffer()->getBufferIdentifier(), 1520 "Profile data remapping cannot be applied to profile data " 1521 "in compact format (original mangled names are not available).", 1522 DS_Warning)); 1523 return; 1524 } 1525 1526 // CSSPGO-TODO: Remapper is not yet supported. 1527 // We will need to remap the entire context string. 1528 assert(Remappings && "should be initialized while creating remapper"); 1529 for (auto &Sample : Reader.getProfiles()) { 1530 DenseSet<StringRef> NamesInSample; 1531 Sample.second.findAllNames(NamesInSample); 1532 for (auto &Name : NamesInSample) 1533 if (auto Key = Remappings->insert(Name)) 1534 NameMap.insert({Key, Name}); 1535 } 1536 1537 RemappingApplied = true; 1538 } 1539 1540 Optional<StringRef> 1541 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1542 if (auto Key = Remappings->lookup(Fname)) 1543 return NameMap.lookup(Key); 1544 return None; 1545 } 1546 1547 /// Prepare a memory buffer for the contents of \p Filename. 1548 /// 1549 /// \returns an error code indicating the status of the buffer. 1550 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1551 setupMemoryBuffer(const Twine &Filename) { 1552 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); 1553 if (std::error_code EC = BufferOrErr.getError()) 1554 return EC; 1555 auto Buffer = std::move(BufferOrErr.get()); 1556 1557 // Sanity check the file. 1558 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max()) 1559 return sampleprof_error::too_large; 1560 1561 return std::move(Buffer); 1562 } 1563 1564 /// Create a sample profile reader based on the format of the input file. 1565 /// 1566 /// \param Filename The file to open. 1567 /// 1568 /// \param C The LLVM context to use to emit diagnostics. 1569 /// 1570 /// \param RemapFilename The file used for profile remapping. 1571 /// 1572 /// \returns an error code indicating the status of the created reader. 1573 ErrorOr<std::unique_ptr<SampleProfileReader>> 1574 SampleProfileReader::create(const std::string Filename, LLVMContext &C, 1575 const std::string RemapFilename) { 1576 auto BufferOrError = setupMemoryBuffer(Filename); 1577 if (std::error_code EC = BufferOrError.getError()) 1578 return EC; 1579 return create(BufferOrError.get(), C, RemapFilename); 1580 } 1581 1582 /// Create a sample profile remapper from the given input, to remap the 1583 /// function names in the given profile data. 1584 /// 1585 /// \param Filename The file to open. 1586 /// 1587 /// \param Reader The profile reader the remapper is going to be applied to. 1588 /// 1589 /// \param C The LLVM context to use to emit diagnostics. 1590 /// 1591 /// \returns an error code indicating the status of the created reader. 1592 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1593 SampleProfileReaderItaniumRemapper::create(const std::string Filename, 1594 SampleProfileReader &Reader, 1595 LLVMContext &C) { 1596 auto BufferOrError = setupMemoryBuffer(Filename); 1597 if (std::error_code EC = BufferOrError.getError()) 1598 return EC; 1599 return create(BufferOrError.get(), Reader, C); 1600 } 1601 1602 /// Create a sample profile remapper from the given input, to remap the 1603 /// function names in the given profile data. 1604 /// 1605 /// \param B The memory buffer to create the reader from (assumes ownership). 1606 /// 1607 /// \param C The LLVM context to use to emit diagnostics. 1608 /// 1609 /// \param Reader The profile reader the remapper is going to be applied to. 1610 /// 1611 /// \returns an error code indicating the status of the created reader. 1612 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1613 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1614 SampleProfileReader &Reader, 1615 LLVMContext &C) { 1616 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1617 if (Error E = Remappings->read(*B.get())) { 1618 handleAllErrors( 1619 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1620 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1621 ParseError.getLineNum(), 1622 ParseError.getMessage())); 1623 }); 1624 return sampleprof_error::malformed; 1625 } 1626 1627 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1628 std::move(B), std::move(Remappings), Reader); 1629 } 1630 1631 /// Create a sample profile reader based on the format of the input data. 1632 /// 1633 /// \param B The memory buffer to create the reader from (assumes ownership). 1634 /// 1635 /// \param C The LLVM context to use to emit diagnostics. 1636 /// 1637 /// \param RemapFilename The file used for profile remapping. 1638 /// 1639 /// \returns an error code indicating the status of the created reader. 1640 ErrorOr<std::unique_ptr<SampleProfileReader>> 1641 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1642 const std::string RemapFilename) { 1643 std::unique_ptr<SampleProfileReader> Reader; 1644 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1645 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1646 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1647 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1648 else if (SampleProfileReaderCompactBinary::hasFormat(*B)) 1649 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); 1650 else if (SampleProfileReaderGCC::hasFormat(*B)) 1651 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1652 else if (SampleProfileReaderText::hasFormat(*B)) 1653 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1654 else 1655 return sampleprof_error::unrecognized_format; 1656 1657 if (!RemapFilename.empty()) { 1658 auto ReaderOrErr = 1659 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); 1660 if (std::error_code EC = ReaderOrErr.getError()) { 1661 std::string Msg = "Could not create remapper: " + EC.message(); 1662 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1663 return EC; 1664 } 1665 Reader->Remapper = std::move(ReaderOrErr.get()); 1666 } 1667 1668 FunctionSamples::Format = Reader->getFormat(); 1669 if (std::error_code EC = Reader->readHeader()) { 1670 return EC; 1671 } 1672 1673 return std::move(Reader); 1674 } 1675 1676 // For text and GCC file formats, we compute the summary after reading the 1677 // profile. Binary format has the profile summary in its header. 1678 void SampleProfileReader::computeSummary() { 1679 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1680 Summary = Builder.computeSummaryForProfiles(Profiles); 1681 } 1682