1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/ProfileSummary.h" 27 #include "llvm/ProfileData/ProfileCommon.h" 28 #include "llvm/ProfileData/SampleProf.h" 29 #include "llvm/Support/Compression.h" 30 #include "llvm/Support/ErrorOr.h" 31 #include "llvm/Support/LEB128.h" 32 #include "llvm/Support/LineIterator.h" 33 #include "llvm/Support/MD5.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <algorithm> 37 #include <cstddef> 38 #include <cstdint> 39 #include <limits> 40 #include <memory> 41 #include <set> 42 #include <system_error> 43 #include <vector> 44 45 using namespace llvm; 46 using namespace sampleprof; 47 48 /// Dump the function profile for \p FName. 49 /// 50 /// \param FName Name of the function to print. 51 /// \param OS Stream to emit the output to. 52 void SampleProfileReader::dumpFunctionProfile(StringRef FName, 53 raw_ostream &OS) { 54 OS << "Function: " << FName << ": " << Profiles[FName]; 55 } 56 57 /// Dump all the function profiles found on stream \p OS. 58 void SampleProfileReader::dump(raw_ostream &OS) { 59 for (const auto &I : Profiles) 60 dumpFunctionProfile(I.getKey(), OS); 61 } 62 63 /// Parse \p Input as function head. 64 /// 65 /// Parse one line of \p Input, and update function name in \p FName, 66 /// function's total sample count in \p NumSamples, function's entry 67 /// count in \p NumHeadSamples. 68 /// 69 /// \returns true if parsing is successful. 70 static bool ParseHead(const StringRef &Input, StringRef &FName, 71 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 72 if (Input[0] == ' ') 73 return false; 74 size_t n2 = Input.rfind(':'); 75 size_t n1 = Input.rfind(':', n2 - 1); 76 FName = Input.substr(0, n1); 77 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 78 return false; 79 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 80 return false; 81 return true; 82 } 83 84 /// Returns true if line offset \p L is legal (only has 16 bits). 85 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 86 87 /// Parse \p Input that contains metadata. 88 /// Possible metadata: 89 /// - CFG Checksum information: 90 /// !CFGChecksum: 12345 91 /// - CFG Checksum information: 92 /// !Attributes: 1 93 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 94 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, 95 uint32_t &Attributes) { 96 if (Input.startswith("!CFGChecksum:")) { 97 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 98 return !CFGInfo.getAsInteger(10, FunctionHash); 99 } 100 101 if (Input.startswith("!Attributes:")) { 102 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); 103 return !Attrib.getAsInteger(10, Attributes); 104 } 105 106 return false; 107 } 108 109 enum class LineType { 110 CallSiteProfile, 111 BodyProfile, 112 Metadata, 113 }; 114 115 /// Parse \p Input as line sample. 116 /// 117 /// \param Input input line. 118 /// \param LineTy Type of this line. 119 /// \param Depth the depth of the inline stack. 120 /// \param NumSamples total samples of the line/inlined callsite. 121 /// \param LineOffset line offset to the start of the function. 122 /// \param Discriminator discriminator of the line. 123 /// \param TargetCountMap map from indirect call target to count. 124 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 125 /// 126 /// returns true if parsing is successful. 127 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 128 uint64_t &NumSamples, uint32_t &LineOffset, 129 uint32_t &Discriminator, StringRef &CalleeName, 130 DenseMap<StringRef, uint64_t> &TargetCountMap, 131 uint64_t &FunctionHash, uint32_t &Attributes) { 132 for (Depth = 0; Input[Depth] == ' '; Depth++) 133 ; 134 if (Depth == 0) 135 return false; 136 137 if (Depth == 1 && Input[Depth] == '!') { 138 LineTy = LineType::Metadata; 139 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); 140 } 141 142 size_t n1 = Input.find(':'); 143 StringRef Loc = Input.substr(Depth, n1 - Depth); 144 size_t n2 = Loc.find('.'); 145 if (n2 == StringRef::npos) { 146 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 147 return false; 148 Discriminator = 0; 149 } else { 150 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 151 return false; 152 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 153 return false; 154 } 155 156 StringRef Rest = Input.substr(n1 + 2); 157 if (isDigit(Rest[0])) { 158 LineTy = LineType::BodyProfile; 159 size_t n3 = Rest.find(' '); 160 if (n3 == StringRef::npos) { 161 if (Rest.getAsInteger(10, NumSamples)) 162 return false; 163 } else { 164 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 165 return false; 166 } 167 // Find call targets and their sample counts. 168 // Note: In some cases, there are symbols in the profile which are not 169 // mangled. To accommodate such cases, use colon + integer pairs as the 170 // anchor points. 171 // An example: 172 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 173 // ":1000" and ":437" are used as anchor points so the string above will 174 // be interpreted as 175 // target: _M_construct<char *> 176 // count: 1000 177 // target: string_view<std::allocator<char> > 178 // count: 437 179 while (n3 != StringRef::npos) { 180 n3 += Rest.substr(n3).find_first_not_of(' '); 181 Rest = Rest.substr(n3); 182 n3 = Rest.find_first_of(':'); 183 if (n3 == StringRef::npos || n3 == 0) 184 return false; 185 186 StringRef Target; 187 uint64_t count, n4; 188 while (true) { 189 // Get the segment after the current colon. 190 StringRef AfterColon = Rest.substr(n3 + 1); 191 // Get the target symbol before the current colon. 192 Target = Rest.substr(0, n3); 193 // Check if the word after the current colon is an integer. 194 n4 = AfterColon.find_first_of(' '); 195 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 196 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 197 if (!WordAfterColon.getAsInteger(10, count)) 198 break; 199 200 // Try to find the next colon. 201 uint64_t n5 = AfterColon.find_first_of(':'); 202 if (n5 == StringRef::npos) 203 return false; 204 n3 += n5 + 1; 205 } 206 207 // An anchor point is found. Save the {target, count} pair 208 TargetCountMap[Target] = count; 209 if (n4 == Rest.size()) 210 break; 211 // Change n3 to the next blank space after colon + integer pair. 212 n3 = n4; 213 } 214 } else { 215 LineTy = LineType::CallSiteProfile; 216 size_t n3 = Rest.find_last_of(':'); 217 CalleeName = Rest.substr(0, n3); 218 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 219 return false; 220 } 221 return true; 222 } 223 224 /// Load samples from a text file. 225 /// 226 /// See the documentation at the top of the file for an explanation of 227 /// the expected format. 228 /// 229 /// \returns true if the file was loaded successfully, false otherwise. 230 std::error_code SampleProfileReaderText::readImpl() { 231 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 232 sampleprof_error Result = sampleprof_error::success; 233 234 InlineCallStack InlineStack; 235 uint32_t ProbeProfileCount = 0; 236 237 // SeenMetadata tracks whether we have processed metadata for the current 238 // top-level function profile. 239 bool SeenMetadata = false; 240 241 for (; !LineIt.is_at_eof(); ++LineIt) { 242 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') 243 continue; 244 // Read the header of each function. 245 // 246 // Note that for function identifiers we are actually expecting 247 // mangled names, but we may not always get them. This happens when 248 // the compiler decides not to emit the function (e.g., it was inlined 249 // and removed). In this case, the binary will not have the linkage 250 // name for the function, so the profiler will emit the function's 251 // unmangled name, which may contain characters like ':' and '>' in its 252 // name (member functions, templates, etc). 253 // 254 // The only requirement we place on the identifier, then, is that it 255 // should not begin with a number. 256 if ((*LineIt)[0] != ' ') { 257 uint64_t NumSamples, NumHeadSamples; 258 StringRef FName; 259 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 260 reportError(LineIt.line_number(), 261 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 262 return sampleprof_error::malformed; 263 } 264 SeenMetadata = false; 265 SampleContext FContext(FName); 266 if (FContext.hasContext()) 267 ++CSProfileCount; 268 Profiles[FContext] = FunctionSamples(); 269 FunctionSamples &FProfile = Profiles[FContext]; 270 FProfile.setName(FContext.getNameWithoutContext()); 271 FProfile.setContext(FContext); 272 MergeResult(Result, FProfile.addTotalSamples(NumSamples)); 273 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); 274 InlineStack.clear(); 275 InlineStack.push_back(&FProfile); 276 } else { 277 uint64_t NumSamples; 278 StringRef FName; 279 DenseMap<StringRef, uint64_t> TargetCountMap; 280 uint32_t Depth, LineOffset, Discriminator; 281 LineType LineTy; 282 uint64_t FunctionHash = 0; 283 uint32_t Attributes = 0; 284 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 285 Discriminator, FName, TargetCountMap, FunctionHash, 286 Attributes)) { 287 reportError(LineIt.line_number(), 288 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 289 *LineIt); 290 return sampleprof_error::malformed; 291 } 292 if (SeenMetadata && LineTy != LineType::Metadata) { 293 // Metadata must be put at the end of a function profile. 294 reportError(LineIt.line_number(), 295 "Found non-metadata after metadata: " + *LineIt); 296 return sampleprof_error::malformed; 297 } 298 while (InlineStack.size() > Depth) { 299 InlineStack.pop_back(); 300 } 301 switch (LineTy) { 302 case LineType::CallSiteProfile: { 303 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 304 LineLocation(LineOffset, Discriminator))[std::string(FName)]; 305 FSamples.setName(FName); 306 MergeResult(Result, FSamples.addTotalSamples(NumSamples)); 307 InlineStack.push_back(&FSamples); 308 break; 309 } 310 case LineType::BodyProfile: { 311 while (InlineStack.size() > Depth) { 312 InlineStack.pop_back(); 313 } 314 FunctionSamples &FProfile = *InlineStack.back(); 315 for (const auto &name_count : TargetCountMap) { 316 MergeResult(Result, FProfile.addCalledTargetSamples( 317 LineOffset, Discriminator, name_count.first, 318 name_count.second)); 319 } 320 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, 321 NumSamples)); 322 break; 323 } 324 case LineType::Metadata: { 325 FunctionSamples &FProfile = *InlineStack.back(); 326 if (FunctionHash) { 327 FProfile.setFunctionHash(FunctionHash); 328 ++ProbeProfileCount; 329 } 330 if (Attributes) 331 FProfile.getContext().setAllAttributes(Attributes); 332 SeenMetadata = true; 333 break; 334 } 335 } 336 } 337 } 338 339 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 340 "Cannot have both context-sensitive and regular profile"); 341 ProfileIsCS = (CSProfileCount > 0); 342 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && 343 "Cannot have both probe-based profiles and regular profiles"); 344 ProfileIsProbeBased = (ProbeProfileCount > 0); 345 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 346 FunctionSamples::ProfileIsCS = ProfileIsCS; 347 348 if (Result == sampleprof_error::success) 349 computeSummary(); 350 351 return Result; 352 } 353 354 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 355 bool result = false; 356 357 // Check that the first non-comment line is a valid function header. 358 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 359 if (!LineIt.is_at_eof()) { 360 if ((*LineIt)[0] != ' ') { 361 uint64_t NumSamples, NumHeadSamples; 362 StringRef FName; 363 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 364 } 365 } 366 367 return result; 368 } 369 370 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 371 unsigned NumBytesRead = 0; 372 std::error_code EC; 373 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 374 375 if (Val > std::numeric_limits<T>::max()) 376 EC = sampleprof_error::malformed; 377 else if (Data + NumBytesRead > End) 378 EC = sampleprof_error::truncated; 379 else 380 EC = sampleprof_error::success; 381 382 if (EC) { 383 reportError(0, EC.message()); 384 return EC; 385 } 386 387 Data += NumBytesRead; 388 return static_cast<T>(Val); 389 } 390 391 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 392 std::error_code EC; 393 StringRef Str(reinterpret_cast<const char *>(Data)); 394 if (Data + Str.size() + 1 > End) { 395 EC = sampleprof_error::truncated; 396 reportError(0, EC.message()); 397 return EC; 398 } 399 400 Data += Str.size() + 1; 401 return Str; 402 } 403 404 template <typename T> 405 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 406 std::error_code EC; 407 408 if (Data + sizeof(T) > End) { 409 EC = sampleprof_error::truncated; 410 reportError(0, EC.message()); 411 return EC; 412 } 413 414 using namespace support; 415 T Val = endian::readNext<T, little, unaligned>(Data); 416 return Val; 417 } 418 419 template <typename T> 420 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 421 std::error_code EC; 422 auto Idx = readNumber<uint32_t>(); 423 if (std::error_code EC = Idx.getError()) 424 return EC; 425 if (*Idx >= Table.size()) 426 return sampleprof_error::truncated_name_table; 427 return *Idx; 428 } 429 430 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { 431 auto Idx = readStringIndex(NameTable); 432 if (std::error_code EC = Idx.getError()) 433 return EC; 434 435 return NameTable[*Idx]; 436 } 437 438 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() { 439 if (!FixedLengthMD5) 440 return SampleProfileReaderBinary::readStringFromTable(); 441 442 // read NameTable index. 443 auto Idx = readStringIndex(NameTable); 444 if (std::error_code EC = Idx.getError()) 445 return EC; 446 447 // Check whether the name to be accessed has been accessed before, 448 // if not, read it from memory directly. 449 StringRef &SR = NameTable[*Idx]; 450 if (SR.empty()) { 451 const uint8_t *SavedData = Data; 452 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); 453 auto FID = readUnencodedNumber<uint64_t>(); 454 if (std::error_code EC = FID.getError()) 455 return EC; 456 // Save the string converted from uint64_t in MD5StringBuf. All the 457 // references to the name are all StringRefs refering to the string 458 // in MD5StringBuf. 459 MD5StringBuf->push_back(std::to_string(*FID)); 460 SR = MD5StringBuf->back(); 461 Data = SavedData; 462 } 463 return SR; 464 } 465 466 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() { 467 auto Idx = readStringIndex(NameTable); 468 if (std::error_code EC = Idx.getError()) 469 return EC; 470 471 return StringRef(NameTable[*Idx]); 472 } 473 474 std::error_code 475 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 476 auto NumSamples = readNumber<uint64_t>(); 477 if (std::error_code EC = NumSamples.getError()) 478 return EC; 479 FProfile.addTotalSamples(*NumSamples); 480 481 // Read the samples in the body. 482 auto NumRecords = readNumber<uint32_t>(); 483 if (std::error_code EC = NumRecords.getError()) 484 return EC; 485 486 for (uint32_t I = 0; I < *NumRecords; ++I) { 487 auto LineOffset = readNumber<uint64_t>(); 488 if (std::error_code EC = LineOffset.getError()) 489 return EC; 490 491 if (!isOffsetLegal(*LineOffset)) { 492 return std::error_code(); 493 } 494 495 auto Discriminator = readNumber<uint64_t>(); 496 if (std::error_code EC = Discriminator.getError()) 497 return EC; 498 499 auto NumSamples = readNumber<uint64_t>(); 500 if (std::error_code EC = NumSamples.getError()) 501 return EC; 502 503 auto NumCalls = readNumber<uint32_t>(); 504 if (std::error_code EC = NumCalls.getError()) 505 return EC; 506 507 for (uint32_t J = 0; J < *NumCalls; ++J) { 508 auto CalledFunction(readStringFromTable()); 509 if (std::error_code EC = CalledFunction.getError()) 510 return EC; 511 512 auto CalledFunctionSamples = readNumber<uint64_t>(); 513 if (std::error_code EC = CalledFunctionSamples.getError()) 514 return EC; 515 516 FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, 517 *CalledFunction, *CalledFunctionSamples); 518 } 519 520 FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); 521 } 522 523 // Read all the samples for inlined function calls. 524 auto NumCallsites = readNumber<uint32_t>(); 525 if (std::error_code EC = NumCallsites.getError()) 526 return EC; 527 528 for (uint32_t J = 0; J < *NumCallsites; ++J) { 529 auto LineOffset = readNumber<uint64_t>(); 530 if (std::error_code EC = LineOffset.getError()) 531 return EC; 532 533 auto Discriminator = readNumber<uint64_t>(); 534 if (std::error_code EC = Discriminator.getError()) 535 return EC; 536 537 auto FName(readStringFromTable()); 538 if (std::error_code EC = FName.getError()) 539 return EC; 540 541 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 542 LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; 543 CalleeProfile.setName(*FName); 544 if (std::error_code EC = readProfile(CalleeProfile)) 545 return EC; 546 } 547 548 return sampleprof_error::success; 549 } 550 551 std::error_code 552 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 553 Data = Start; 554 auto NumHeadSamples = readNumber<uint64_t>(); 555 if (std::error_code EC = NumHeadSamples.getError()) 556 return EC; 557 558 auto FName(readStringFromTable()); 559 if (std::error_code EC = FName.getError()) 560 return EC; 561 562 SampleContext FContext(*FName); 563 Profiles[FContext] = FunctionSamples(); 564 FunctionSamples &FProfile = Profiles[FContext]; 565 FProfile.setName(FContext.getNameWithoutContext()); 566 FProfile.setContext(FContext); 567 FProfile.addHeadSamples(*NumHeadSamples); 568 569 if (FContext.hasContext()) 570 CSProfileCount++; 571 572 if (std::error_code EC = readProfile(FProfile)) 573 return EC; 574 return sampleprof_error::success; 575 } 576 577 std::error_code SampleProfileReaderBinary::readImpl() { 578 while (!at_eof()) { 579 if (std::error_code EC = readFuncProfile(Data)) 580 return EC; 581 } 582 583 return sampleprof_error::success; 584 } 585 586 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 587 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 588 Data = Start; 589 End = Start + Size; 590 switch (Entry.Type) { 591 case SecProfSummary: 592 if (std::error_code EC = readSummary()) 593 return EC; 594 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 595 Summary->setPartialProfile(true); 596 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 597 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 598 break; 599 case SecNameTable: { 600 FixedLengthMD5 = 601 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 602 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 603 assert((!FixedLengthMD5 || UseMD5) && 604 "If FixedLengthMD5 is true, UseMD5 has to be true"); 605 FunctionSamples::HasUniqSuffix = 606 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 607 if (std::error_code EC = readNameTableSec(UseMD5)) 608 return EC; 609 break; 610 } 611 case SecLBRProfile: 612 if (std::error_code EC = readFuncProfiles()) 613 return EC; 614 break; 615 case SecFuncOffsetTable: 616 if (std::error_code EC = readFuncOffsetTable()) 617 return EC; 618 break; 619 case SecFuncMetadata: { 620 ProfileIsProbeBased = 621 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 622 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 623 bool HasAttribute = 624 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); 625 if (std::error_code EC = readFuncMetadata(HasAttribute)) 626 return EC; 627 break; 628 } 629 case SecProfileSymbolList: 630 if (std::error_code EC = readProfileSymbolList()) 631 return EC; 632 break; 633 default: 634 if (std::error_code EC = readCustomSection(Entry)) 635 return EC; 636 break; 637 } 638 return sampleprof_error::success; 639 } 640 641 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 642 if (!M) 643 return false; 644 FuncsToUse.clear(); 645 for (auto &F : *M) 646 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 647 return true; 648 } 649 650 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 651 // If there are more than one FuncOffsetTable, the profile read associated 652 // with previous FuncOffsetTable has to be done before next FuncOffsetTable 653 // is read. 654 FuncOffsetTable.clear(); 655 656 auto Size = readNumber<uint64_t>(); 657 if (std::error_code EC = Size.getError()) 658 return EC; 659 660 FuncOffsetTable.reserve(*Size); 661 for (uint32_t I = 0; I < *Size; ++I) { 662 auto FName(readStringFromTable()); 663 if (std::error_code EC = FName.getError()) 664 return EC; 665 666 auto Offset = readNumber<uint64_t>(); 667 if (std::error_code EC = Offset.getError()) 668 return EC; 669 670 FuncOffsetTable[*FName] = *Offset; 671 } 672 return sampleprof_error::success; 673 } 674 675 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 676 // Collect functions used by current module if the Reader has been 677 // given a module. 678 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 679 // which will query FunctionSamples::HasUniqSuffix, so it has to be 680 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 681 // NameTable section is read. 682 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 683 684 // When LoadFuncsToBeUsed is false, load all the function profiles. 685 const uint8_t *Start = Data; 686 if (!LoadFuncsToBeUsed) { 687 while (Data < End) { 688 if (std::error_code EC = readFuncProfile(Data)) 689 return EC; 690 } 691 assert(Data == End && "More data is read than expected"); 692 } else { 693 // Load function profiles on demand. 694 if (Remapper) { 695 for (auto Name : FuncsToUse) { 696 Remapper->insert(Name); 697 } 698 } 699 700 if (useMD5()) { 701 for (auto Name : FuncsToUse) { 702 auto GUID = std::to_string(MD5Hash(Name)); 703 auto iter = FuncOffsetTable.find(StringRef(GUID)); 704 if (iter == FuncOffsetTable.end()) 705 continue; 706 const uint8_t *FuncProfileAddr = Start + iter->second; 707 assert(FuncProfileAddr < End && "out of LBRProfile section"); 708 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 709 return EC; 710 } 711 } else if (FunctionSamples::ProfileIsCS) { 712 // Compute the ordered set of names, so we can 713 // get all context profiles under a subtree by 714 // iterating through the ordered names. 715 struct Comparer { 716 // Ignore the closing ']' when ordering context 717 bool operator()(const StringRef &L, const StringRef &R) const { 718 return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1); 719 } 720 }; 721 std::set<StringRef, Comparer> OrderedNames; 722 for (auto Name : FuncOffsetTable) { 723 OrderedNames.insert(Name.first); 724 } 725 726 // For each function in current module, load all 727 // context profiles for the function. 728 for (auto NameOffset : FuncOffsetTable) { 729 StringRef ContextName = NameOffset.first; 730 SampleContext FContext(ContextName); 731 auto FuncName = FContext.getNameWithoutContext(); 732 if (!FuncsToUse.count(FuncName) && 733 (!Remapper || !Remapper->exist(FuncName))) 734 continue; 735 736 // For each context profile we need, try to load 737 // all context profile in the subtree. This can 738 // help profile guided importing for ThinLTO. 739 auto It = OrderedNames.find(ContextName); 740 while (It != OrderedNames.end() && 741 It->startswith(ContextName.substr(0, ContextName.size() - 1))) { 742 const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; 743 assert(FuncProfileAddr < End && "out of LBRProfile section"); 744 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 745 return EC; 746 // Remove loaded context profile so we won't 747 // load it repeatedly. 748 It = OrderedNames.erase(It); 749 } 750 } 751 } else { 752 for (auto NameOffset : FuncOffsetTable) { 753 SampleContext FContext(NameOffset.first); 754 auto FuncName = FContext.getNameWithoutContext(); 755 if (!FuncsToUse.count(FuncName) && 756 (!Remapper || !Remapper->exist(FuncName))) 757 continue; 758 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 759 assert(FuncProfileAddr < End && "out of LBRProfile section"); 760 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 761 return EC; 762 } 763 } 764 Data = End; 765 } 766 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 767 "Cannot have both context-sensitive and regular profile"); 768 assert(ProfileIsCS == (CSProfileCount > 0) && 769 "Section flag should be consistent with actual profile"); 770 return sampleprof_error::success; 771 } 772 773 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 774 if (!ProfSymList) 775 ProfSymList = std::make_unique<ProfileSymbolList>(); 776 777 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 778 return EC; 779 780 Data = End; 781 return sampleprof_error::success; 782 } 783 784 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 785 const uint8_t *SecStart, const uint64_t SecSize, 786 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 787 Data = SecStart; 788 End = SecStart + SecSize; 789 auto DecompressSize = readNumber<uint64_t>(); 790 if (std::error_code EC = DecompressSize.getError()) 791 return EC; 792 DecompressBufSize = *DecompressSize; 793 794 auto CompressSize = readNumber<uint64_t>(); 795 if (std::error_code EC = CompressSize.getError()) 796 return EC; 797 798 if (!llvm::zlib::isAvailable()) 799 return sampleprof_error::zlib_unavailable; 800 801 StringRef CompressedStrings(reinterpret_cast<const char *>(Data), 802 *CompressSize); 803 char *Buffer = Allocator.Allocate<char>(DecompressBufSize); 804 size_t UCSize = DecompressBufSize; 805 llvm::Error E = 806 zlib::uncompress(CompressedStrings, Buffer, UCSize); 807 if (E) 808 return sampleprof_error::uncompress_failed; 809 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 810 return sampleprof_error::success; 811 } 812 813 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 814 const uint8_t *BufStart = 815 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 816 817 for (auto &Entry : SecHdrTable) { 818 // Skip empty section. 819 if (!Entry.Size) 820 continue; 821 822 // Skip sections without context when SkipFlatProf is true. 823 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 824 continue; 825 826 const uint8_t *SecStart = BufStart + Entry.Offset; 827 uint64_t SecSize = Entry.Size; 828 829 // If the section is compressed, decompress it into a buffer 830 // DecompressBuf before reading the actual data. The pointee of 831 // 'Data' will be changed to buffer hold by DecompressBuf 832 // temporarily when reading the actual data. 833 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 834 if (isCompressed) { 835 const uint8_t *DecompressBuf; 836 uint64_t DecompressBufSize; 837 if (std::error_code EC = decompressSection( 838 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 839 return EC; 840 SecStart = DecompressBuf; 841 SecSize = DecompressBufSize; 842 } 843 844 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 845 return EC; 846 if (Data != SecStart + SecSize) 847 return sampleprof_error::malformed; 848 849 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 850 if (isCompressed) { 851 Data = BufStart + Entry.Offset; 852 End = BufStart + Buffer->getBufferSize(); 853 } 854 } 855 856 return sampleprof_error::success; 857 } 858 859 std::error_code SampleProfileReaderCompactBinary::readImpl() { 860 // Collect functions used by current module if the Reader has been 861 // given a module. 862 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 863 864 std::vector<uint64_t> OffsetsToUse; 865 if (!LoadFuncsToBeUsed) { 866 // load all the function profiles. 867 for (auto FuncEntry : FuncOffsetTable) { 868 OffsetsToUse.push_back(FuncEntry.second); 869 } 870 } else { 871 // load function profiles on demand. 872 for (auto Name : FuncsToUse) { 873 auto GUID = std::to_string(MD5Hash(Name)); 874 auto iter = FuncOffsetTable.find(StringRef(GUID)); 875 if (iter == FuncOffsetTable.end()) 876 continue; 877 OffsetsToUse.push_back(iter->second); 878 } 879 } 880 881 for (auto Offset : OffsetsToUse) { 882 const uint8_t *SavedData = Data; 883 if (std::error_code EC = readFuncProfile( 884 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 885 Offset)) 886 return EC; 887 Data = SavedData; 888 } 889 return sampleprof_error::success; 890 } 891 892 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 893 if (Magic == SPMagic()) 894 return sampleprof_error::success; 895 return sampleprof_error::bad_magic; 896 } 897 898 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 899 if (Magic == SPMagic(SPF_Ext_Binary)) 900 return sampleprof_error::success; 901 return sampleprof_error::bad_magic; 902 } 903 904 std::error_code 905 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { 906 if (Magic == SPMagic(SPF_Compact_Binary)) 907 return sampleprof_error::success; 908 return sampleprof_error::bad_magic; 909 } 910 911 std::error_code SampleProfileReaderBinary::readNameTable() { 912 auto Size = readNumber<uint32_t>(); 913 if (std::error_code EC = Size.getError()) 914 return EC; 915 NameTable.reserve(*Size + NameTable.size()); 916 for (uint32_t I = 0; I < *Size; ++I) { 917 auto Name(readString()); 918 if (std::error_code EC = Name.getError()) 919 return EC; 920 NameTable.push_back(*Name); 921 } 922 923 return sampleprof_error::success; 924 } 925 926 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() { 927 auto Size = readNumber<uint64_t>(); 928 if (std::error_code EC = Size.getError()) 929 return EC; 930 MD5StringBuf = std::make_unique<std::vector<std::string>>(); 931 MD5StringBuf->reserve(*Size); 932 if (FixedLengthMD5) { 933 // Preallocate and initialize NameTable so we can check whether a name 934 // index has been read before by checking whether the element in the 935 // NameTable is empty, meanwhile readStringIndex can do the boundary 936 // check using the size of NameTable. 937 NameTable.resize(*Size + NameTable.size()); 938 939 MD5NameMemStart = Data; 940 Data = Data + (*Size) * sizeof(uint64_t); 941 return sampleprof_error::success; 942 } 943 NameTable.reserve(*Size); 944 for (uint32_t I = 0; I < *Size; ++I) { 945 auto FID = readNumber<uint64_t>(); 946 if (std::error_code EC = FID.getError()) 947 return EC; 948 MD5StringBuf->push_back(std::to_string(*FID)); 949 // NameTable is a vector of StringRef. Here it is pushing back a 950 // StringRef initialized with the last string in MD5stringBuf. 951 NameTable.push_back(MD5StringBuf->back()); 952 } 953 return sampleprof_error::success; 954 } 955 956 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { 957 if (IsMD5) 958 return readMD5NameTable(); 959 return SampleProfileReaderBinary::readNameTable(); 960 } 961 962 std::error_code 963 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { 964 while (Data < End) { 965 auto FName(readStringFromTable()); 966 if (std::error_code EC = FName.getError()) 967 return EC; 968 969 SampleContext FContext(*FName); 970 bool ProfileInMap = Profiles.count(FContext); 971 972 if (ProfileIsProbeBased) { 973 auto Checksum = readNumber<uint64_t>(); 974 if (std::error_code EC = Checksum.getError()) 975 return EC; 976 if (ProfileInMap) 977 Profiles[FContext].setFunctionHash(*Checksum); 978 } 979 980 if (ProfileHasAttribute) { 981 auto Attributes = readNumber<uint32_t>(); 982 if (std::error_code EC = Attributes.getError()) 983 return EC; 984 if (ProfileInMap) 985 Profiles[FContext].getContext().setAllAttributes(*Attributes); 986 } 987 } 988 989 assert(Data == End && "More data is read than expected"); 990 return sampleprof_error::success; 991 } 992 993 std::error_code SampleProfileReaderCompactBinary::readNameTable() { 994 auto Size = readNumber<uint64_t>(); 995 if (std::error_code EC = Size.getError()) 996 return EC; 997 NameTable.reserve(*Size); 998 for (uint32_t I = 0; I < *Size; ++I) { 999 auto FID = readNumber<uint64_t>(); 1000 if (std::error_code EC = FID.getError()) 1001 return EC; 1002 NameTable.push_back(std::to_string(*FID)); 1003 } 1004 return sampleprof_error::success; 1005 } 1006 1007 std::error_code 1008 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) { 1009 SecHdrTableEntry Entry; 1010 auto Type = readUnencodedNumber<uint64_t>(); 1011 if (std::error_code EC = Type.getError()) 1012 return EC; 1013 Entry.Type = static_cast<SecType>(*Type); 1014 1015 auto Flags = readUnencodedNumber<uint64_t>(); 1016 if (std::error_code EC = Flags.getError()) 1017 return EC; 1018 Entry.Flags = *Flags; 1019 1020 auto Offset = readUnencodedNumber<uint64_t>(); 1021 if (std::error_code EC = Offset.getError()) 1022 return EC; 1023 Entry.Offset = *Offset; 1024 1025 auto Size = readUnencodedNumber<uint64_t>(); 1026 if (std::error_code EC = Size.getError()) 1027 return EC; 1028 Entry.Size = *Size; 1029 1030 Entry.LayoutIndex = Idx; 1031 SecHdrTable.push_back(std::move(Entry)); 1032 return sampleprof_error::success; 1033 } 1034 1035 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1036 auto EntryNum = readUnencodedNumber<uint64_t>(); 1037 if (std::error_code EC = EntryNum.getError()) 1038 return EC; 1039 1040 for (uint32_t i = 0; i < (*EntryNum); i++) 1041 if (std::error_code EC = readSecHdrTableEntry(i)) 1042 return EC; 1043 1044 return sampleprof_error::success; 1045 } 1046 1047 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1048 const uint8_t *BufStart = 1049 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1050 Data = BufStart; 1051 End = BufStart + Buffer->getBufferSize(); 1052 1053 if (std::error_code EC = readMagicIdent()) 1054 return EC; 1055 1056 if (std::error_code EC = readSecHdrTable()) 1057 return EC; 1058 1059 return sampleprof_error::success; 1060 } 1061 1062 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1063 uint64_t Size = 0; 1064 for (auto &Entry : SecHdrTable) { 1065 if (Entry.Type == Type) 1066 Size += Entry.Size; 1067 } 1068 return Size; 1069 } 1070 1071 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1072 // Sections in SecHdrTable is not necessarily in the same order as 1073 // sections in the profile because section like FuncOffsetTable needs 1074 // to be written after section LBRProfile but needs to be read before 1075 // section LBRProfile, so we cannot simply use the last entry in 1076 // SecHdrTable to calculate the file size. 1077 uint64_t FileSize = 0; 1078 for (auto &Entry : SecHdrTable) { 1079 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1080 } 1081 return FileSize; 1082 } 1083 1084 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1085 std::string Flags; 1086 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1087 Flags.append("{compressed,"); 1088 else 1089 Flags.append("{"); 1090 1091 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1092 Flags.append("flat,"); 1093 1094 switch (Entry.Type) { 1095 case SecNameTable: 1096 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1097 Flags.append("fixlenmd5,"); 1098 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1099 Flags.append("md5,"); 1100 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1101 Flags.append("uniq,"); 1102 break; 1103 case SecProfSummary: 1104 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1105 Flags.append("partial,"); 1106 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1107 Flags.append("context,"); 1108 break; 1109 default: 1110 break; 1111 } 1112 char &last = Flags.back(); 1113 if (last == ',') 1114 last = '}'; 1115 else 1116 Flags.append("}"); 1117 return Flags; 1118 } 1119 1120 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1121 uint64_t TotalSecsSize = 0; 1122 for (auto &Entry : SecHdrTable) { 1123 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1124 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1125 << "\n"; 1126 ; 1127 TotalSecsSize += Entry.Size; 1128 } 1129 uint64_t HeaderSize = SecHdrTable.front().Offset; 1130 assert(HeaderSize + TotalSecsSize == getFileSize() && 1131 "Size of 'header + sections' doesn't match the total size of profile"); 1132 1133 OS << "Header Size: " << HeaderSize << "\n"; 1134 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1135 OS << "File Size: " << getFileSize() << "\n"; 1136 return true; 1137 } 1138 1139 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1140 // Read and check the magic identifier. 1141 auto Magic = readNumber<uint64_t>(); 1142 if (std::error_code EC = Magic.getError()) 1143 return EC; 1144 else if (std::error_code EC = verifySPMagic(*Magic)) 1145 return EC; 1146 1147 // Read the version number. 1148 auto Version = readNumber<uint64_t>(); 1149 if (std::error_code EC = Version.getError()) 1150 return EC; 1151 else if (*Version != SPVersion()) 1152 return sampleprof_error::unsupported_version; 1153 1154 return sampleprof_error::success; 1155 } 1156 1157 std::error_code SampleProfileReaderBinary::readHeader() { 1158 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1159 End = Data + Buffer->getBufferSize(); 1160 1161 if (std::error_code EC = readMagicIdent()) 1162 return EC; 1163 1164 if (std::error_code EC = readSummary()) 1165 return EC; 1166 1167 if (std::error_code EC = readNameTable()) 1168 return EC; 1169 return sampleprof_error::success; 1170 } 1171 1172 std::error_code SampleProfileReaderCompactBinary::readHeader() { 1173 SampleProfileReaderBinary::readHeader(); 1174 if (std::error_code EC = readFuncOffsetTable()) 1175 return EC; 1176 return sampleprof_error::success; 1177 } 1178 1179 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { 1180 auto TableOffset = readUnencodedNumber<uint64_t>(); 1181 if (std::error_code EC = TableOffset.getError()) 1182 return EC; 1183 1184 const uint8_t *SavedData = Data; 1185 const uint8_t *TableStart = 1186 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 1187 *TableOffset; 1188 Data = TableStart; 1189 1190 auto Size = readNumber<uint64_t>(); 1191 if (std::error_code EC = Size.getError()) 1192 return EC; 1193 1194 FuncOffsetTable.reserve(*Size); 1195 for (uint32_t I = 0; I < *Size; ++I) { 1196 auto FName(readStringFromTable()); 1197 if (std::error_code EC = FName.getError()) 1198 return EC; 1199 1200 auto Offset = readNumber<uint64_t>(); 1201 if (std::error_code EC = Offset.getError()) 1202 return EC; 1203 1204 FuncOffsetTable[*FName] = *Offset; 1205 } 1206 End = TableStart; 1207 Data = SavedData; 1208 return sampleprof_error::success; 1209 } 1210 1211 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() { 1212 if (!M) 1213 return false; 1214 FuncsToUse.clear(); 1215 for (auto &F : *M) 1216 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 1217 return true; 1218 } 1219 1220 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1221 std::vector<ProfileSummaryEntry> &Entries) { 1222 auto Cutoff = readNumber<uint64_t>(); 1223 if (std::error_code EC = Cutoff.getError()) 1224 return EC; 1225 1226 auto MinBlockCount = readNumber<uint64_t>(); 1227 if (std::error_code EC = MinBlockCount.getError()) 1228 return EC; 1229 1230 auto NumBlocks = readNumber<uint64_t>(); 1231 if (std::error_code EC = NumBlocks.getError()) 1232 return EC; 1233 1234 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1235 return sampleprof_error::success; 1236 } 1237 1238 std::error_code SampleProfileReaderBinary::readSummary() { 1239 auto TotalCount = readNumber<uint64_t>(); 1240 if (std::error_code EC = TotalCount.getError()) 1241 return EC; 1242 1243 auto MaxBlockCount = readNumber<uint64_t>(); 1244 if (std::error_code EC = MaxBlockCount.getError()) 1245 return EC; 1246 1247 auto MaxFunctionCount = readNumber<uint64_t>(); 1248 if (std::error_code EC = MaxFunctionCount.getError()) 1249 return EC; 1250 1251 auto NumBlocks = readNumber<uint64_t>(); 1252 if (std::error_code EC = NumBlocks.getError()) 1253 return EC; 1254 1255 auto NumFunctions = readNumber<uint64_t>(); 1256 if (std::error_code EC = NumFunctions.getError()) 1257 return EC; 1258 1259 auto NumSummaryEntries = readNumber<uint64_t>(); 1260 if (std::error_code EC = NumSummaryEntries.getError()) 1261 return EC; 1262 1263 std::vector<ProfileSummaryEntry> Entries; 1264 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1265 std::error_code EC = readSummaryEntry(Entries); 1266 if (EC != sampleprof_error::success) 1267 return EC; 1268 } 1269 Summary = std::make_unique<ProfileSummary>( 1270 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1271 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1272 1273 return sampleprof_error::success; 1274 } 1275 1276 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1277 const uint8_t *Data = 1278 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1279 uint64_t Magic = decodeULEB128(Data); 1280 return Magic == SPMagic(); 1281 } 1282 1283 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1284 const uint8_t *Data = 1285 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1286 uint64_t Magic = decodeULEB128(Data); 1287 return Magic == SPMagic(SPF_Ext_Binary); 1288 } 1289 1290 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { 1291 const uint8_t *Data = 1292 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1293 uint64_t Magic = decodeULEB128(Data); 1294 return Magic == SPMagic(SPF_Compact_Binary); 1295 } 1296 1297 std::error_code SampleProfileReaderGCC::skipNextWord() { 1298 uint32_t dummy; 1299 if (!GcovBuffer.readInt(dummy)) 1300 return sampleprof_error::truncated; 1301 return sampleprof_error::success; 1302 } 1303 1304 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1305 if (sizeof(T) <= sizeof(uint32_t)) { 1306 uint32_t Val; 1307 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1308 return static_cast<T>(Val); 1309 } else if (sizeof(T) <= sizeof(uint64_t)) { 1310 uint64_t Val; 1311 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1312 return static_cast<T>(Val); 1313 } 1314 1315 std::error_code EC = sampleprof_error::malformed; 1316 reportError(0, EC.message()); 1317 return EC; 1318 } 1319 1320 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1321 StringRef Str; 1322 if (!GcovBuffer.readString(Str)) 1323 return sampleprof_error::truncated; 1324 return Str; 1325 } 1326 1327 std::error_code SampleProfileReaderGCC::readHeader() { 1328 // Read the magic identifier. 1329 if (!GcovBuffer.readGCDAFormat()) 1330 return sampleprof_error::unrecognized_format; 1331 1332 // Read the version number. Note - the GCC reader does not validate this 1333 // version, but the profile creator generates v704. 1334 GCOV::GCOVVersion version; 1335 if (!GcovBuffer.readGCOVVersion(version)) 1336 return sampleprof_error::unrecognized_format; 1337 1338 if (version != GCOV::V407) 1339 return sampleprof_error::unsupported_version; 1340 1341 // Skip the empty integer. 1342 if (std::error_code EC = skipNextWord()) 1343 return EC; 1344 1345 return sampleprof_error::success; 1346 } 1347 1348 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1349 uint32_t Tag; 1350 if (!GcovBuffer.readInt(Tag)) 1351 return sampleprof_error::truncated; 1352 1353 if (Tag != Expected) 1354 return sampleprof_error::malformed; 1355 1356 if (std::error_code EC = skipNextWord()) 1357 return EC; 1358 1359 return sampleprof_error::success; 1360 } 1361 1362 std::error_code SampleProfileReaderGCC::readNameTable() { 1363 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1364 return EC; 1365 1366 uint32_t Size; 1367 if (!GcovBuffer.readInt(Size)) 1368 return sampleprof_error::truncated; 1369 1370 for (uint32_t I = 0; I < Size; ++I) { 1371 StringRef Str; 1372 if (!GcovBuffer.readString(Str)) 1373 return sampleprof_error::truncated; 1374 Names.push_back(std::string(Str)); 1375 } 1376 1377 return sampleprof_error::success; 1378 } 1379 1380 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1381 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1382 return EC; 1383 1384 uint32_t NumFunctions; 1385 if (!GcovBuffer.readInt(NumFunctions)) 1386 return sampleprof_error::truncated; 1387 1388 InlineCallStack Stack; 1389 for (uint32_t I = 0; I < NumFunctions; ++I) 1390 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1391 return EC; 1392 1393 computeSummary(); 1394 return sampleprof_error::success; 1395 } 1396 1397 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1398 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1399 uint64_t HeadCount = 0; 1400 if (InlineStack.size() == 0) 1401 if (!GcovBuffer.readInt64(HeadCount)) 1402 return sampleprof_error::truncated; 1403 1404 uint32_t NameIdx; 1405 if (!GcovBuffer.readInt(NameIdx)) 1406 return sampleprof_error::truncated; 1407 1408 StringRef Name(Names[NameIdx]); 1409 1410 uint32_t NumPosCounts; 1411 if (!GcovBuffer.readInt(NumPosCounts)) 1412 return sampleprof_error::truncated; 1413 1414 uint32_t NumCallsites; 1415 if (!GcovBuffer.readInt(NumCallsites)) 1416 return sampleprof_error::truncated; 1417 1418 FunctionSamples *FProfile = nullptr; 1419 if (InlineStack.size() == 0) { 1420 // If this is a top function that we have already processed, do not 1421 // update its profile again. This happens in the presence of 1422 // function aliases. Since these aliases share the same function 1423 // body, there will be identical replicated profiles for the 1424 // original function. In this case, we simply not bother updating 1425 // the profile of the original function. 1426 FProfile = &Profiles[Name]; 1427 FProfile->addHeadSamples(HeadCount); 1428 if (FProfile->getTotalSamples() > 0) 1429 Update = false; 1430 } else { 1431 // Otherwise, we are reading an inlined instance. The top of the 1432 // inline stack contains the profile of the caller. Insert this 1433 // callee in the caller's CallsiteMap. 1434 FunctionSamples *CallerProfile = InlineStack.front(); 1435 uint32_t LineOffset = Offset >> 16; 1436 uint32_t Discriminator = Offset & 0xffff; 1437 FProfile = &CallerProfile->functionSamplesAt( 1438 LineLocation(LineOffset, Discriminator))[std::string(Name)]; 1439 } 1440 FProfile->setName(Name); 1441 1442 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1443 uint32_t Offset; 1444 if (!GcovBuffer.readInt(Offset)) 1445 return sampleprof_error::truncated; 1446 1447 uint32_t NumTargets; 1448 if (!GcovBuffer.readInt(NumTargets)) 1449 return sampleprof_error::truncated; 1450 1451 uint64_t Count; 1452 if (!GcovBuffer.readInt64(Count)) 1453 return sampleprof_error::truncated; 1454 1455 // The line location is encoded in the offset as: 1456 // high 16 bits: line offset to the start of the function. 1457 // low 16 bits: discriminator. 1458 uint32_t LineOffset = Offset >> 16; 1459 uint32_t Discriminator = Offset & 0xffff; 1460 1461 InlineCallStack NewStack; 1462 NewStack.push_back(FProfile); 1463 llvm::append_range(NewStack, InlineStack); 1464 if (Update) { 1465 // Walk up the inline stack, adding the samples on this line to 1466 // the total sample count of the callers in the chain. 1467 for (auto CallerProfile : NewStack) 1468 CallerProfile->addTotalSamples(Count); 1469 1470 // Update the body samples for the current profile. 1471 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1472 } 1473 1474 // Process the list of functions called at an indirect call site. 1475 // These are all the targets that a function pointer (or virtual 1476 // function) resolved at runtime. 1477 for (uint32_t J = 0; J < NumTargets; J++) { 1478 uint32_t HistVal; 1479 if (!GcovBuffer.readInt(HistVal)) 1480 return sampleprof_error::truncated; 1481 1482 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1483 return sampleprof_error::malformed; 1484 1485 uint64_t TargetIdx; 1486 if (!GcovBuffer.readInt64(TargetIdx)) 1487 return sampleprof_error::truncated; 1488 StringRef TargetName(Names[TargetIdx]); 1489 1490 uint64_t TargetCount; 1491 if (!GcovBuffer.readInt64(TargetCount)) 1492 return sampleprof_error::truncated; 1493 1494 if (Update) 1495 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1496 TargetName, TargetCount); 1497 } 1498 } 1499 1500 // Process all the inlined callers into the current function. These 1501 // are all the callsites that were inlined into this function. 1502 for (uint32_t I = 0; I < NumCallsites; I++) { 1503 // The offset is encoded as: 1504 // high 16 bits: line offset to the start of the function. 1505 // low 16 bits: discriminator. 1506 uint32_t Offset; 1507 if (!GcovBuffer.readInt(Offset)) 1508 return sampleprof_error::truncated; 1509 InlineCallStack NewStack; 1510 NewStack.push_back(FProfile); 1511 llvm::append_range(NewStack, InlineStack); 1512 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1513 return EC; 1514 } 1515 1516 return sampleprof_error::success; 1517 } 1518 1519 /// Read a GCC AutoFDO profile. 1520 /// 1521 /// This format is generated by the Linux Perf conversion tool at 1522 /// https://github.com/google/autofdo. 1523 std::error_code SampleProfileReaderGCC::readImpl() { 1524 // Read the string table. 1525 if (std::error_code EC = readNameTable()) 1526 return EC; 1527 1528 // Read the source profile. 1529 if (std::error_code EC = readFunctionProfiles()) 1530 return EC; 1531 1532 return sampleprof_error::success; 1533 } 1534 1535 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1536 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1537 return Magic == "adcg*704"; 1538 } 1539 1540 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1541 // If the reader uses MD5 to represent string, we can't remap it because 1542 // we don't know what the original function names were. 1543 if (Reader.useMD5()) { 1544 Ctx.diagnose(DiagnosticInfoSampleProfile( 1545 Reader.getBuffer()->getBufferIdentifier(), 1546 "Profile data remapping cannot be applied to profile data " 1547 "in compact format (original mangled names are not available).", 1548 DS_Warning)); 1549 return; 1550 } 1551 1552 // CSSPGO-TODO: Remapper is not yet supported. 1553 // We will need to remap the entire context string. 1554 assert(Remappings && "should be initialized while creating remapper"); 1555 for (auto &Sample : Reader.getProfiles()) { 1556 DenseSet<StringRef> NamesInSample; 1557 Sample.second.findAllNames(NamesInSample); 1558 for (auto &Name : NamesInSample) 1559 if (auto Key = Remappings->insert(Name)) 1560 NameMap.insert({Key, Name}); 1561 } 1562 1563 RemappingApplied = true; 1564 } 1565 1566 Optional<StringRef> 1567 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1568 if (auto Key = Remappings->lookup(Fname)) 1569 return NameMap.lookup(Key); 1570 return None; 1571 } 1572 1573 /// Prepare a memory buffer for the contents of \p Filename. 1574 /// 1575 /// \returns an error code indicating the status of the buffer. 1576 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1577 setupMemoryBuffer(const Twine &Filename) { 1578 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); 1579 if (std::error_code EC = BufferOrErr.getError()) 1580 return EC; 1581 auto Buffer = std::move(BufferOrErr.get()); 1582 1583 // Sanity check the file. 1584 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max()) 1585 return sampleprof_error::too_large; 1586 1587 return std::move(Buffer); 1588 } 1589 1590 /// Create a sample profile reader based on the format of the input file. 1591 /// 1592 /// \param Filename The file to open. 1593 /// 1594 /// \param C The LLVM context to use to emit diagnostics. 1595 /// 1596 /// \param RemapFilename The file used for profile remapping. 1597 /// 1598 /// \returns an error code indicating the status of the created reader. 1599 ErrorOr<std::unique_ptr<SampleProfileReader>> 1600 SampleProfileReader::create(const std::string Filename, LLVMContext &C, 1601 const std::string RemapFilename) { 1602 auto BufferOrError = setupMemoryBuffer(Filename); 1603 if (std::error_code EC = BufferOrError.getError()) 1604 return EC; 1605 return create(BufferOrError.get(), C, RemapFilename); 1606 } 1607 1608 /// Create a sample profile remapper from the given input, to remap the 1609 /// function names in the given profile data. 1610 /// 1611 /// \param Filename The file to open. 1612 /// 1613 /// \param Reader The profile reader the remapper is going to be applied to. 1614 /// 1615 /// \param C The LLVM context to use to emit diagnostics. 1616 /// 1617 /// \returns an error code indicating the status of the created reader. 1618 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1619 SampleProfileReaderItaniumRemapper::create(const std::string Filename, 1620 SampleProfileReader &Reader, 1621 LLVMContext &C) { 1622 auto BufferOrError = setupMemoryBuffer(Filename); 1623 if (std::error_code EC = BufferOrError.getError()) 1624 return EC; 1625 return create(BufferOrError.get(), Reader, C); 1626 } 1627 1628 /// Create a sample profile remapper from the given input, to remap the 1629 /// function names in the given profile data. 1630 /// 1631 /// \param B The memory buffer to create the reader from (assumes ownership). 1632 /// 1633 /// \param C The LLVM context to use to emit diagnostics. 1634 /// 1635 /// \param Reader The profile reader the remapper is going to be applied to. 1636 /// 1637 /// \returns an error code indicating the status of the created reader. 1638 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1639 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1640 SampleProfileReader &Reader, 1641 LLVMContext &C) { 1642 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1643 if (Error E = Remappings->read(*B.get())) { 1644 handleAllErrors( 1645 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1646 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1647 ParseError.getLineNum(), 1648 ParseError.getMessage())); 1649 }); 1650 return sampleprof_error::malformed; 1651 } 1652 1653 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1654 std::move(B), std::move(Remappings), Reader); 1655 } 1656 1657 /// Create a sample profile reader based on the format of the input data. 1658 /// 1659 /// \param B The memory buffer to create the reader from (assumes ownership). 1660 /// 1661 /// \param C The LLVM context to use to emit diagnostics. 1662 /// 1663 /// \param RemapFilename The file used for profile remapping. 1664 /// 1665 /// \returns an error code indicating the status of the created reader. 1666 ErrorOr<std::unique_ptr<SampleProfileReader>> 1667 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1668 const std::string RemapFilename) { 1669 std::unique_ptr<SampleProfileReader> Reader; 1670 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1671 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1672 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1673 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1674 else if (SampleProfileReaderCompactBinary::hasFormat(*B)) 1675 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); 1676 else if (SampleProfileReaderGCC::hasFormat(*B)) 1677 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1678 else if (SampleProfileReaderText::hasFormat(*B)) 1679 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1680 else 1681 return sampleprof_error::unrecognized_format; 1682 1683 if (!RemapFilename.empty()) { 1684 auto ReaderOrErr = 1685 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); 1686 if (std::error_code EC = ReaderOrErr.getError()) { 1687 std::string Msg = "Could not create remapper: " + EC.message(); 1688 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1689 return EC; 1690 } 1691 Reader->Remapper = std::move(ReaderOrErr.get()); 1692 } 1693 1694 FunctionSamples::Format = Reader->getFormat(); 1695 if (std::error_code EC = Reader->readHeader()) { 1696 return EC; 1697 } 1698 1699 return std::move(Reader); 1700 } 1701 1702 // For text and GCC file formats, we compute the summary after reading the 1703 // profile. Binary format has the profile summary in its header. 1704 void SampleProfileReader::computeSummary() { 1705 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1706 Summary = Builder.computeSummaryForProfiles(Profiles); 1707 } 1708