1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/ProfileSummary.h" 27 #include "llvm/ProfileData/ProfileCommon.h" 28 #include "llvm/ProfileData/SampleProf.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Compression.h" 31 #include "llvm/Support/ErrorOr.h" 32 #include "llvm/Support/LEB128.h" 33 #include "llvm/Support/LineIterator.h" 34 #include "llvm/Support/MD5.h" 35 #include "llvm/Support/MemoryBuffer.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <algorithm> 38 #include <cstddef> 39 #include <cstdint> 40 #include <limits> 41 #include <memory> 42 #include <set> 43 #include <system_error> 44 #include <vector> 45 46 using namespace llvm; 47 using namespace sampleprof; 48 49 #define DEBUG_TYPE "samplepgo-reader" 50 51 // This internal option specifies if the profile uses FS discriminators. 52 // It only applies to text, binary and compact binary format profiles. 53 // For ext-binary format profiles, the flag is set in the summary. 54 static cl::opt<bool> ProfileIsFSDisciminator( 55 "profile-isfs", cl::Hidden, cl::init(false), 56 cl::desc("Profile uses flow sensitive discriminators")); 57 58 /// Dump the function profile for \p FName. 59 /// 60 /// \param FName Name of the function to print. 61 /// \param OS Stream to emit the output to. 62 void SampleProfileReader::dumpFunctionProfile(StringRef FName, 63 raw_ostream &OS) { 64 OS << "Function: " << FName << ": " << Profiles[FName]; 65 } 66 67 /// Dump all the function profiles found on stream \p OS. 68 void SampleProfileReader::dump(raw_ostream &OS) { 69 std::vector<NameFunctionSamples> V; 70 sortFuncProfiles(Profiles, V); 71 for (const auto &I : V) 72 dumpFunctionProfile(I.first, OS); 73 } 74 75 /// Parse \p Input as function head. 76 /// 77 /// Parse one line of \p Input, and update function name in \p FName, 78 /// function's total sample count in \p NumSamples, function's entry 79 /// count in \p NumHeadSamples. 80 /// 81 /// \returns true if parsing is successful. 82 static bool ParseHead(const StringRef &Input, StringRef &FName, 83 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 84 if (Input[0] == ' ') 85 return false; 86 size_t n2 = Input.rfind(':'); 87 size_t n1 = Input.rfind(':', n2 - 1); 88 FName = Input.substr(0, n1); 89 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 90 return false; 91 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 92 return false; 93 return true; 94 } 95 96 /// Returns true if line offset \p L is legal (only has 16 bits). 97 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 98 99 /// Parse \p Input that contains metadata. 100 /// Possible metadata: 101 /// - CFG Checksum information: 102 /// !CFGChecksum: 12345 103 /// - CFG Checksum information: 104 /// !Attributes: 1 105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 106 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, 107 uint32_t &Attributes) { 108 if (Input.startswith("!CFGChecksum:")) { 109 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 110 return !CFGInfo.getAsInteger(10, FunctionHash); 111 } 112 113 if (Input.startswith("!Attributes:")) { 114 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); 115 return !Attrib.getAsInteger(10, Attributes); 116 } 117 118 return false; 119 } 120 121 enum class LineType { 122 CallSiteProfile, 123 BodyProfile, 124 Metadata, 125 }; 126 127 /// Parse \p Input as line sample. 128 /// 129 /// \param Input input line. 130 /// \param LineTy Type of this line. 131 /// \param Depth the depth of the inline stack. 132 /// \param NumSamples total samples of the line/inlined callsite. 133 /// \param LineOffset line offset to the start of the function. 134 /// \param Discriminator discriminator of the line. 135 /// \param TargetCountMap map from indirect call target to count. 136 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 137 /// 138 /// returns true if parsing is successful. 139 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 140 uint64_t &NumSamples, uint32_t &LineOffset, 141 uint32_t &Discriminator, StringRef &CalleeName, 142 DenseMap<StringRef, uint64_t> &TargetCountMap, 143 uint64_t &FunctionHash, uint32_t &Attributes) { 144 for (Depth = 0; Input[Depth] == ' '; Depth++) 145 ; 146 if (Depth == 0) 147 return false; 148 149 if (Depth == 1 && Input[Depth] == '!') { 150 LineTy = LineType::Metadata; 151 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); 152 } 153 154 size_t n1 = Input.find(':'); 155 StringRef Loc = Input.substr(Depth, n1 - Depth); 156 size_t n2 = Loc.find('.'); 157 if (n2 == StringRef::npos) { 158 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 159 return false; 160 Discriminator = 0; 161 } else { 162 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 163 return false; 164 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 165 return false; 166 } 167 168 StringRef Rest = Input.substr(n1 + 2); 169 if (isDigit(Rest[0])) { 170 LineTy = LineType::BodyProfile; 171 size_t n3 = Rest.find(' '); 172 if (n3 == StringRef::npos) { 173 if (Rest.getAsInteger(10, NumSamples)) 174 return false; 175 } else { 176 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 177 return false; 178 } 179 // Find call targets and their sample counts. 180 // Note: In some cases, there are symbols in the profile which are not 181 // mangled. To accommodate such cases, use colon + integer pairs as the 182 // anchor points. 183 // An example: 184 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 185 // ":1000" and ":437" are used as anchor points so the string above will 186 // be interpreted as 187 // target: _M_construct<char *> 188 // count: 1000 189 // target: string_view<std::allocator<char> > 190 // count: 437 191 while (n3 != StringRef::npos) { 192 n3 += Rest.substr(n3).find_first_not_of(' '); 193 Rest = Rest.substr(n3); 194 n3 = Rest.find_first_of(':'); 195 if (n3 == StringRef::npos || n3 == 0) 196 return false; 197 198 StringRef Target; 199 uint64_t count, n4; 200 while (true) { 201 // Get the segment after the current colon. 202 StringRef AfterColon = Rest.substr(n3 + 1); 203 // Get the target symbol before the current colon. 204 Target = Rest.substr(0, n3); 205 // Check if the word after the current colon is an integer. 206 n4 = AfterColon.find_first_of(' '); 207 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 208 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 209 if (!WordAfterColon.getAsInteger(10, count)) 210 break; 211 212 // Try to find the next colon. 213 uint64_t n5 = AfterColon.find_first_of(':'); 214 if (n5 == StringRef::npos) 215 return false; 216 n3 += n5 + 1; 217 } 218 219 // An anchor point is found. Save the {target, count} pair 220 TargetCountMap[Target] = count; 221 if (n4 == Rest.size()) 222 break; 223 // Change n3 to the next blank space after colon + integer pair. 224 n3 = n4; 225 } 226 } else { 227 LineTy = LineType::CallSiteProfile; 228 size_t n3 = Rest.find_last_of(':'); 229 CalleeName = Rest.substr(0, n3); 230 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 231 return false; 232 } 233 return true; 234 } 235 236 /// Load samples from a text file. 237 /// 238 /// See the documentation at the top of the file for an explanation of 239 /// the expected format. 240 /// 241 /// \returns true if the file was loaded successfully, false otherwise. 242 std::error_code SampleProfileReaderText::readImpl() { 243 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 244 sampleprof_error Result = sampleprof_error::success; 245 246 InlineCallStack InlineStack; 247 uint32_t ProbeProfileCount = 0; 248 249 // SeenMetadata tracks whether we have processed metadata for the current 250 // top-level function profile. 251 bool SeenMetadata = false; 252 253 ProfileIsFS = ProfileIsFSDisciminator; 254 FunctionSamples::ProfileIsFS = ProfileIsFS; 255 for (; !LineIt.is_at_eof(); ++LineIt) { 256 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') 257 continue; 258 // Read the header of each function. 259 // 260 // Note that for function identifiers we are actually expecting 261 // mangled names, but we may not always get them. This happens when 262 // the compiler decides not to emit the function (e.g., it was inlined 263 // and removed). In this case, the binary will not have the linkage 264 // name for the function, so the profiler will emit the function's 265 // unmangled name, which may contain characters like ':' and '>' in its 266 // name (member functions, templates, etc). 267 // 268 // The only requirement we place on the identifier, then, is that it 269 // should not begin with a number. 270 if ((*LineIt)[0] != ' ') { 271 uint64_t NumSamples, NumHeadSamples; 272 StringRef FName; 273 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 274 reportError(LineIt.line_number(), 275 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 276 return sampleprof_error::malformed; 277 } 278 SeenMetadata = false; 279 SampleContext FContext(FName); 280 if (FContext.hasContext()) 281 ++CSProfileCount; 282 Profiles[FContext] = FunctionSamples(); 283 FunctionSamples &FProfile = Profiles[FContext]; 284 FProfile.setName(FContext.getNameWithoutContext()); 285 FProfile.setContext(FContext); 286 MergeResult(Result, FProfile.addTotalSamples(NumSamples)); 287 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); 288 InlineStack.clear(); 289 InlineStack.push_back(&FProfile); 290 } else { 291 uint64_t NumSamples; 292 StringRef FName; 293 DenseMap<StringRef, uint64_t> TargetCountMap; 294 uint32_t Depth, LineOffset, Discriminator; 295 LineType LineTy; 296 uint64_t FunctionHash = 0; 297 uint32_t Attributes = 0; 298 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 299 Discriminator, FName, TargetCountMap, FunctionHash, 300 Attributes)) { 301 reportError(LineIt.line_number(), 302 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 303 *LineIt); 304 return sampleprof_error::malformed; 305 } 306 if (SeenMetadata && LineTy != LineType::Metadata) { 307 // Metadata must be put at the end of a function profile. 308 reportError(LineIt.line_number(), 309 "Found non-metadata after metadata: " + *LineIt); 310 return sampleprof_error::malformed; 311 } 312 313 // Here we handle FS discriminators. 314 Discriminator &= getDiscriminatorMask(); 315 316 while (InlineStack.size() > Depth) { 317 InlineStack.pop_back(); 318 } 319 switch (LineTy) { 320 case LineType::CallSiteProfile: { 321 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 322 LineLocation(LineOffset, Discriminator))[std::string(FName)]; 323 FSamples.setName(FName); 324 MergeResult(Result, FSamples.addTotalSamples(NumSamples)); 325 InlineStack.push_back(&FSamples); 326 break; 327 } 328 case LineType::BodyProfile: { 329 while (InlineStack.size() > Depth) { 330 InlineStack.pop_back(); 331 } 332 FunctionSamples &FProfile = *InlineStack.back(); 333 for (const auto &name_count : TargetCountMap) { 334 MergeResult(Result, FProfile.addCalledTargetSamples( 335 LineOffset, Discriminator, name_count.first, 336 name_count.second)); 337 } 338 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, 339 NumSamples)); 340 break; 341 } 342 case LineType::Metadata: { 343 FunctionSamples &FProfile = *InlineStack.back(); 344 if (FunctionHash) { 345 FProfile.setFunctionHash(FunctionHash); 346 ++ProbeProfileCount; 347 } 348 if (Attributes) 349 FProfile.getContext().setAllAttributes(Attributes); 350 SeenMetadata = true; 351 break; 352 } 353 } 354 } 355 } 356 357 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 358 "Cannot have both context-sensitive and regular profile"); 359 ProfileIsCS = (CSProfileCount > 0); 360 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && 361 "Cannot have both probe-based profiles and regular profiles"); 362 ProfileIsProbeBased = (ProbeProfileCount > 0); 363 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 364 FunctionSamples::ProfileIsCS = ProfileIsCS; 365 366 if (Result == sampleprof_error::success) 367 computeSummary(); 368 369 return Result; 370 } 371 372 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 373 bool result = false; 374 375 // Check that the first non-comment line is a valid function header. 376 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 377 if (!LineIt.is_at_eof()) { 378 if ((*LineIt)[0] != ' ') { 379 uint64_t NumSamples, NumHeadSamples; 380 StringRef FName; 381 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 382 } 383 } 384 385 return result; 386 } 387 388 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 389 unsigned NumBytesRead = 0; 390 std::error_code EC; 391 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 392 393 if (Val > std::numeric_limits<T>::max()) 394 EC = sampleprof_error::malformed; 395 else if (Data + NumBytesRead > End) 396 EC = sampleprof_error::truncated; 397 else 398 EC = sampleprof_error::success; 399 400 if (EC) { 401 reportError(0, EC.message()); 402 return EC; 403 } 404 405 Data += NumBytesRead; 406 return static_cast<T>(Val); 407 } 408 409 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 410 std::error_code EC; 411 StringRef Str(reinterpret_cast<const char *>(Data)); 412 if (Data + Str.size() + 1 > End) { 413 EC = sampleprof_error::truncated; 414 reportError(0, EC.message()); 415 return EC; 416 } 417 418 Data += Str.size() + 1; 419 return Str; 420 } 421 422 template <typename T> 423 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 424 std::error_code EC; 425 426 if (Data + sizeof(T) > End) { 427 EC = sampleprof_error::truncated; 428 reportError(0, EC.message()); 429 return EC; 430 } 431 432 using namespace support; 433 T Val = endian::readNext<T, little, unaligned>(Data); 434 return Val; 435 } 436 437 template <typename T> 438 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 439 std::error_code EC; 440 auto Idx = readNumber<uint32_t>(); 441 if (std::error_code EC = Idx.getError()) 442 return EC; 443 if (*Idx >= Table.size()) 444 return sampleprof_error::truncated_name_table; 445 return *Idx; 446 } 447 448 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { 449 auto Idx = readStringIndex(NameTable); 450 if (std::error_code EC = Idx.getError()) 451 return EC; 452 453 return NameTable[*Idx]; 454 } 455 456 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() { 457 if (!FixedLengthMD5) 458 return SampleProfileReaderBinary::readStringFromTable(); 459 460 // read NameTable index. 461 auto Idx = readStringIndex(NameTable); 462 if (std::error_code EC = Idx.getError()) 463 return EC; 464 465 // Check whether the name to be accessed has been accessed before, 466 // if not, read it from memory directly. 467 StringRef &SR = NameTable[*Idx]; 468 if (SR.empty()) { 469 const uint8_t *SavedData = Data; 470 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); 471 auto FID = readUnencodedNumber<uint64_t>(); 472 if (std::error_code EC = FID.getError()) 473 return EC; 474 // Save the string converted from uint64_t in MD5StringBuf. All the 475 // references to the name are all StringRefs refering to the string 476 // in MD5StringBuf. 477 MD5StringBuf->push_back(std::to_string(*FID)); 478 SR = MD5StringBuf->back(); 479 Data = SavedData; 480 } 481 return SR; 482 } 483 484 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() { 485 auto Idx = readStringIndex(NameTable); 486 if (std::error_code EC = Idx.getError()) 487 return EC; 488 489 return StringRef(NameTable[*Idx]); 490 } 491 492 std::error_code 493 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 494 auto NumSamples = readNumber<uint64_t>(); 495 if (std::error_code EC = NumSamples.getError()) 496 return EC; 497 FProfile.addTotalSamples(*NumSamples); 498 499 // Read the samples in the body. 500 auto NumRecords = readNumber<uint32_t>(); 501 if (std::error_code EC = NumRecords.getError()) 502 return EC; 503 504 for (uint32_t I = 0; I < *NumRecords; ++I) { 505 auto LineOffset = readNumber<uint64_t>(); 506 if (std::error_code EC = LineOffset.getError()) 507 return EC; 508 509 if (!isOffsetLegal(*LineOffset)) { 510 return std::error_code(); 511 } 512 513 auto Discriminator = readNumber<uint64_t>(); 514 if (std::error_code EC = Discriminator.getError()) 515 return EC; 516 517 auto NumSamples = readNumber<uint64_t>(); 518 if (std::error_code EC = NumSamples.getError()) 519 return EC; 520 521 auto NumCalls = readNumber<uint32_t>(); 522 if (std::error_code EC = NumCalls.getError()) 523 return EC; 524 525 // Here we handle FS discriminators: 526 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 527 528 for (uint32_t J = 0; J < *NumCalls; ++J) { 529 auto CalledFunction(readStringFromTable()); 530 if (std::error_code EC = CalledFunction.getError()) 531 return EC; 532 533 auto CalledFunctionSamples = readNumber<uint64_t>(); 534 if (std::error_code EC = CalledFunctionSamples.getError()) 535 return EC; 536 537 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, 538 *CalledFunction, *CalledFunctionSamples); 539 } 540 541 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); 542 } 543 544 // Read all the samples for inlined function calls. 545 auto NumCallsites = readNumber<uint32_t>(); 546 if (std::error_code EC = NumCallsites.getError()) 547 return EC; 548 549 for (uint32_t J = 0; J < *NumCallsites; ++J) { 550 auto LineOffset = readNumber<uint64_t>(); 551 if (std::error_code EC = LineOffset.getError()) 552 return EC; 553 554 auto Discriminator = readNumber<uint64_t>(); 555 if (std::error_code EC = Discriminator.getError()) 556 return EC; 557 558 auto FName(readStringFromTable()); 559 if (std::error_code EC = FName.getError()) 560 return EC; 561 562 // Here we handle FS discriminators: 563 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 564 565 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 566 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; 567 CalleeProfile.setName(*FName); 568 if (std::error_code EC = readProfile(CalleeProfile)) 569 return EC; 570 } 571 572 return sampleprof_error::success; 573 } 574 575 std::error_code 576 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 577 Data = Start; 578 auto NumHeadSamples = readNumber<uint64_t>(); 579 if (std::error_code EC = NumHeadSamples.getError()) 580 return EC; 581 582 auto FName(readStringFromTable()); 583 if (std::error_code EC = FName.getError()) 584 return EC; 585 586 SampleContext FContext(*FName); 587 Profiles[FContext] = FunctionSamples(); 588 FunctionSamples &FProfile = Profiles[FContext]; 589 FProfile.setName(FContext.getNameWithoutContext()); 590 FProfile.setContext(FContext); 591 FProfile.addHeadSamples(*NumHeadSamples); 592 593 if (FContext.hasContext()) 594 CSProfileCount++; 595 596 if (std::error_code EC = readProfile(FProfile)) 597 return EC; 598 return sampleprof_error::success; 599 } 600 601 std::error_code SampleProfileReaderBinary::readImpl() { 602 ProfileIsFS = ProfileIsFSDisciminator; 603 FunctionSamples::ProfileIsFS = ProfileIsFS; 604 while (!at_eof()) { 605 if (std::error_code EC = readFuncProfile(Data)) 606 return EC; 607 } 608 609 return sampleprof_error::success; 610 } 611 612 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 613 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 614 Data = Start; 615 End = Start + Size; 616 switch (Entry.Type) { 617 case SecProfSummary: 618 if (std::error_code EC = readSummary()) 619 return EC; 620 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 621 Summary->setPartialProfile(true); 622 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 623 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 624 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 625 FunctionSamples::ProfileIsFS = ProfileIsFS = true; 626 break; 627 case SecNameTable: { 628 FixedLengthMD5 = 629 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 630 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 631 assert((!FixedLengthMD5 || UseMD5) && 632 "If FixedLengthMD5 is true, UseMD5 has to be true"); 633 FunctionSamples::HasUniqSuffix = 634 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 635 if (std::error_code EC = readNameTableSec(UseMD5)) 636 return EC; 637 break; 638 } 639 case SecLBRProfile: 640 if (std::error_code EC = readFuncProfiles()) 641 return EC; 642 break; 643 case SecFuncOffsetTable: 644 if (std::error_code EC = readFuncOffsetTable()) 645 return EC; 646 break; 647 case SecFuncMetadata: { 648 ProfileIsProbeBased = 649 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 650 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 651 bool HasAttribute = 652 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); 653 if (std::error_code EC = readFuncMetadata(HasAttribute)) 654 return EC; 655 break; 656 } 657 case SecProfileSymbolList: 658 if (std::error_code EC = readProfileSymbolList()) 659 return EC; 660 break; 661 default: 662 if (std::error_code EC = readCustomSection(Entry)) 663 return EC; 664 break; 665 } 666 return sampleprof_error::success; 667 } 668 669 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 670 if (!M) 671 return false; 672 FuncsToUse.clear(); 673 for (auto &F : *M) 674 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 675 return true; 676 } 677 678 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 679 // If there are more than one FuncOffsetTable, the profile read associated 680 // with previous FuncOffsetTable has to be done before next FuncOffsetTable 681 // is read. 682 FuncOffsetTable.clear(); 683 684 auto Size = readNumber<uint64_t>(); 685 if (std::error_code EC = Size.getError()) 686 return EC; 687 688 FuncOffsetTable.reserve(*Size); 689 for (uint32_t I = 0; I < *Size; ++I) { 690 auto FName(readStringFromTable()); 691 if (std::error_code EC = FName.getError()) 692 return EC; 693 694 auto Offset = readNumber<uint64_t>(); 695 if (std::error_code EC = Offset.getError()) 696 return EC; 697 698 FuncOffsetTable[*FName] = *Offset; 699 } 700 return sampleprof_error::success; 701 } 702 703 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 704 // Collect functions used by current module if the Reader has been 705 // given a module. 706 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 707 // which will query FunctionSamples::HasUniqSuffix, so it has to be 708 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 709 // NameTable section is read. 710 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 711 712 // When LoadFuncsToBeUsed is false, load all the function profiles. 713 const uint8_t *Start = Data; 714 if (!LoadFuncsToBeUsed) { 715 while (Data < End) { 716 if (std::error_code EC = readFuncProfile(Data)) 717 return EC; 718 } 719 assert(Data == End && "More data is read than expected"); 720 } else { 721 // Load function profiles on demand. 722 if (Remapper) { 723 for (auto Name : FuncsToUse) { 724 Remapper->insert(Name); 725 } 726 } 727 728 if (useMD5()) { 729 for (auto Name : FuncsToUse) { 730 auto GUID = std::to_string(MD5Hash(Name)); 731 auto iter = FuncOffsetTable.find(StringRef(GUID)); 732 if (iter == FuncOffsetTable.end()) 733 continue; 734 const uint8_t *FuncProfileAddr = Start + iter->second; 735 assert(FuncProfileAddr < End && "out of LBRProfile section"); 736 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 737 return EC; 738 } 739 } else if (FunctionSamples::ProfileIsCS) { 740 // Compute the ordered set of names, so we can 741 // get all context profiles under a subtree by 742 // iterating through the ordered names. 743 struct Comparer { 744 // Ignore the closing ']' when ordering context 745 bool operator()(const StringRef &L, const StringRef &R) const { 746 return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1); 747 } 748 }; 749 std::set<StringRef, Comparer> OrderedNames; 750 for (auto Name : FuncOffsetTable) { 751 OrderedNames.insert(Name.first); 752 } 753 754 // For each function in current module, load all 755 // context profiles for the function. 756 for (auto NameOffset : FuncOffsetTable) { 757 StringRef ContextName = NameOffset.first; 758 SampleContext FContext(ContextName); 759 auto FuncName = FContext.getNameWithoutContext(); 760 if (!FuncsToUse.count(FuncName) && 761 (!Remapper || !Remapper->exist(FuncName))) 762 continue; 763 764 // For each context profile we need, try to load 765 // all context profile in the subtree. This can 766 // help profile guided importing for ThinLTO. 767 auto It = OrderedNames.find(ContextName); 768 while (It != OrderedNames.end() && 769 It->startswith(ContextName.substr(0, ContextName.size() - 1))) { 770 const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; 771 assert(FuncProfileAddr < End && "out of LBRProfile section"); 772 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 773 return EC; 774 // Remove loaded context profile so we won't 775 // load it repeatedly. 776 It = OrderedNames.erase(It); 777 } 778 } 779 } else { 780 for (auto NameOffset : FuncOffsetTable) { 781 SampleContext FContext(NameOffset.first); 782 auto FuncName = FContext.getNameWithoutContext(); 783 if (!FuncsToUse.count(FuncName) && 784 (!Remapper || !Remapper->exist(FuncName))) 785 continue; 786 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 787 assert(FuncProfileAddr < End && "out of LBRProfile section"); 788 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 789 return EC; 790 } 791 } 792 Data = End; 793 } 794 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 795 "Cannot have both context-sensitive and regular profile"); 796 assert((!CSProfileCount || ProfileIsCS) && 797 "Section flag should be consistent with actual profile"); 798 return sampleprof_error::success; 799 } 800 801 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 802 if (!ProfSymList) 803 ProfSymList = std::make_unique<ProfileSymbolList>(); 804 805 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 806 return EC; 807 808 Data = End; 809 return sampleprof_error::success; 810 } 811 812 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 813 const uint8_t *SecStart, const uint64_t SecSize, 814 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 815 Data = SecStart; 816 End = SecStart + SecSize; 817 auto DecompressSize = readNumber<uint64_t>(); 818 if (std::error_code EC = DecompressSize.getError()) 819 return EC; 820 DecompressBufSize = *DecompressSize; 821 822 auto CompressSize = readNumber<uint64_t>(); 823 if (std::error_code EC = CompressSize.getError()) 824 return EC; 825 826 if (!llvm::zlib::isAvailable()) 827 return sampleprof_error::zlib_unavailable; 828 829 StringRef CompressedStrings(reinterpret_cast<const char *>(Data), 830 *CompressSize); 831 char *Buffer = Allocator.Allocate<char>(DecompressBufSize); 832 size_t UCSize = DecompressBufSize; 833 llvm::Error E = 834 zlib::uncompress(CompressedStrings, Buffer, UCSize); 835 if (E) 836 return sampleprof_error::uncompress_failed; 837 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 838 return sampleprof_error::success; 839 } 840 841 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 842 const uint8_t *BufStart = 843 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 844 845 for (auto &Entry : SecHdrTable) { 846 // Skip empty section. 847 if (!Entry.Size) 848 continue; 849 850 // Skip sections without context when SkipFlatProf is true. 851 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 852 continue; 853 854 const uint8_t *SecStart = BufStart + Entry.Offset; 855 uint64_t SecSize = Entry.Size; 856 857 // If the section is compressed, decompress it into a buffer 858 // DecompressBuf before reading the actual data. The pointee of 859 // 'Data' will be changed to buffer hold by DecompressBuf 860 // temporarily when reading the actual data. 861 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 862 if (isCompressed) { 863 const uint8_t *DecompressBuf; 864 uint64_t DecompressBufSize; 865 if (std::error_code EC = decompressSection( 866 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 867 return EC; 868 SecStart = DecompressBuf; 869 SecSize = DecompressBufSize; 870 } 871 872 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 873 return EC; 874 if (Data != SecStart + SecSize) 875 return sampleprof_error::malformed; 876 877 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 878 if (isCompressed) { 879 Data = BufStart + Entry.Offset; 880 End = BufStart + Buffer->getBufferSize(); 881 } 882 } 883 884 return sampleprof_error::success; 885 } 886 887 std::error_code SampleProfileReaderCompactBinary::readImpl() { 888 // Collect functions used by current module if the Reader has been 889 // given a module. 890 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 891 ProfileIsFS = ProfileIsFSDisciminator; 892 FunctionSamples::ProfileIsFS = ProfileIsFS; 893 std::vector<uint64_t> OffsetsToUse; 894 if (!LoadFuncsToBeUsed) { 895 // load all the function profiles. 896 for (auto FuncEntry : FuncOffsetTable) { 897 OffsetsToUse.push_back(FuncEntry.second); 898 } 899 } else { 900 // load function profiles on demand. 901 for (auto Name : FuncsToUse) { 902 auto GUID = std::to_string(MD5Hash(Name)); 903 auto iter = FuncOffsetTable.find(StringRef(GUID)); 904 if (iter == FuncOffsetTable.end()) 905 continue; 906 OffsetsToUse.push_back(iter->second); 907 } 908 } 909 910 for (auto Offset : OffsetsToUse) { 911 const uint8_t *SavedData = Data; 912 if (std::error_code EC = readFuncProfile( 913 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 914 Offset)) 915 return EC; 916 Data = SavedData; 917 } 918 return sampleprof_error::success; 919 } 920 921 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 922 if (Magic == SPMagic()) 923 return sampleprof_error::success; 924 return sampleprof_error::bad_magic; 925 } 926 927 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 928 if (Magic == SPMagic(SPF_Ext_Binary)) 929 return sampleprof_error::success; 930 return sampleprof_error::bad_magic; 931 } 932 933 std::error_code 934 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { 935 if (Magic == SPMagic(SPF_Compact_Binary)) 936 return sampleprof_error::success; 937 return sampleprof_error::bad_magic; 938 } 939 940 std::error_code SampleProfileReaderBinary::readNameTable() { 941 auto Size = readNumber<uint32_t>(); 942 if (std::error_code EC = Size.getError()) 943 return EC; 944 NameTable.reserve(*Size + NameTable.size()); 945 for (uint32_t I = 0; I < *Size; ++I) { 946 auto Name(readString()); 947 if (std::error_code EC = Name.getError()) 948 return EC; 949 NameTable.push_back(*Name); 950 } 951 952 return sampleprof_error::success; 953 } 954 955 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() { 956 auto Size = readNumber<uint64_t>(); 957 if (std::error_code EC = Size.getError()) 958 return EC; 959 MD5StringBuf = std::make_unique<std::vector<std::string>>(); 960 MD5StringBuf->reserve(*Size); 961 if (FixedLengthMD5) { 962 // Preallocate and initialize NameTable so we can check whether a name 963 // index has been read before by checking whether the element in the 964 // NameTable is empty, meanwhile readStringIndex can do the boundary 965 // check using the size of NameTable. 966 NameTable.resize(*Size + NameTable.size()); 967 968 MD5NameMemStart = Data; 969 Data = Data + (*Size) * sizeof(uint64_t); 970 return sampleprof_error::success; 971 } 972 NameTable.reserve(*Size); 973 for (uint32_t I = 0; I < *Size; ++I) { 974 auto FID = readNumber<uint64_t>(); 975 if (std::error_code EC = FID.getError()) 976 return EC; 977 MD5StringBuf->push_back(std::to_string(*FID)); 978 // NameTable is a vector of StringRef. Here it is pushing back a 979 // StringRef initialized with the last string in MD5stringBuf. 980 NameTable.push_back(MD5StringBuf->back()); 981 } 982 return sampleprof_error::success; 983 } 984 985 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { 986 if (IsMD5) 987 return readMD5NameTable(); 988 return SampleProfileReaderBinary::readNameTable(); 989 } 990 991 std::error_code 992 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { 993 while (Data < End) { 994 auto FName(readStringFromTable()); 995 if (std::error_code EC = FName.getError()) 996 return EC; 997 998 SampleContext FContext(*FName); 999 bool ProfileInMap = Profiles.count(FContext); 1000 1001 if (ProfileIsProbeBased) { 1002 auto Checksum = readNumber<uint64_t>(); 1003 if (std::error_code EC = Checksum.getError()) 1004 return EC; 1005 if (ProfileInMap) 1006 Profiles[FContext].setFunctionHash(*Checksum); 1007 } 1008 1009 if (ProfileHasAttribute) { 1010 auto Attributes = readNumber<uint32_t>(); 1011 if (std::error_code EC = Attributes.getError()) 1012 return EC; 1013 if (ProfileInMap) 1014 Profiles[FContext].getContext().setAllAttributes(*Attributes); 1015 } 1016 } 1017 1018 assert(Data == End && "More data is read than expected"); 1019 return sampleprof_error::success; 1020 } 1021 1022 std::error_code SampleProfileReaderCompactBinary::readNameTable() { 1023 auto Size = readNumber<uint64_t>(); 1024 if (std::error_code EC = Size.getError()) 1025 return EC; 1026 NameTable.reserve(*Size); 1027 for (uint32_t I = 0; I < *Size; ++I) { 1028 auto FID = readNumber<uint64_t>(); 1029 if (std::error_code EC = FID.getError()) 1030 return EC; 1031 NameTable.push_back(std::to_string(*FID)); 1032 } 1033 return sampleprof_error::success; 1034 } 1035 1036 std::error_code 1037 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) { 1038 SecHdrTableEntry Entry; 1039 auto Type = readUnencodedNumber<uint64_t>(); 1040 if (std::error_code EC = Type.getError()) 1041 return EC; 1042 Entry.Type = static_cast<SecType>(*Type); 1043 1044 auto Flags = readUnencodedNumber<uint64_t>(); 1045 if (std::error_code EC = Flags.getError()) 1046 return EC; 1047 Entry.Flags = *Flags; 1048 1049 auto Offset = readUnencodedNumber<uint64_t>(); 1050 if (std::error_code EC = Offset.getError()) 1051 return EC; 1052 Entry.Offset = *Offset; 1053 1054 auto Size = readUnencodedNumber<uint64_t>(); 1055 if (std::error_code EC = Size.getError()) 1056 return EC; 1057 Entry.Size = *Size; 1058 1059 Entry.LayoutIndex = Idx; 1060 SecHdrTable.push_back(std::move(Entry)); 1061 return sampleprof_error::success; 1062 } 1063 1064 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1065 auto EntryNum = readUnencodedNumber<uint64_t>(); 1066 if (std::error_code EC = EntryNum.getError()) 1067 return EC; 1068 1069 for (uint32_t i = 0; i < (*EntryNum); i++) 1070 if (std::error_code EC = readSecHdrTableEntry(i)) 1071 return EC; 1072 1073 return sampleprof_error::success; 1074 } 1075 1076 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1077 const uint8_t *BufStart = 1078 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1079 Data = BufStart; 1080 End = BufStart + Buffer->getBufferSize(); 1081 1082 if (std::error_code EC = readMagicIdent()) 1083 return EC; 1084 1085 if (std::error_code EC = readSecHdrTable()) 1086 return EC; 1087 1088 return sampleprof_error::success; 1089 } 1090 1091 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1092 uint64_t Size = 0; 1093 for (auto &Entry : SecHdrTable) { 1094 if (Entry.Type == Type) 1095 Size += Entry.Size; 1096 } 1097 return Size; 1098 } 1099 1100 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1101 // Sections in SecHdrTable is not necessarily in the same order as 1102 // sections in the profile because section like FuncOffsetTable needs 1103 // to be written after section LBRProfile but needs to be read before 1104 // section LBRProfile, so we cannot simply use the last entry in 1105 // SecHdrTable to calculate the file size. 1106 uint64_t FileSize = 0; 1107 for (auto &Entry : SecHdrTable) { 1108 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1109 } 1110 return FileSize; 1111 } 1112 1113 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1114 std::string Flags; 1115 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1116 Flags.append("{compressed,"); 1117 else 1118 Flags.append("{"); 1119 1120 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1121 Flags.append("flat,"); 1122 1123 switch (Entry.Type) { 1124 case SecNameTable: 1125 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1126 Flags.append("fixlenmd5,"); 1127 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1128 Flags.append("md5,"); 1129 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1130 Flags.append("uniq,"); 1131 break; 1132 case SecProfSummary: 1133 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1134 Flags.append("partial,"); 1135 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1136 Flags.append("context,"); 1137 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 1138 Flags.append("fs-discriminator,"); 1139 break; 1140 default: 1141 break; 1142 } 1143 char &last = Flags.back(); 1144 if (last == ',') 1145 last = '}'; 1146 else 1147 Flags.append("}"); 1148 return Flags; 1149 } 1150 1151 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1152 uint64_t TotalSecsSize = 0; 1153 for (auto &Entry : SecHdrTable) { 1154 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1155 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1156 << "\n"; 1157 ; 1158 TotalSecsSize += Entry.Size; 1159 } 1160 uint64_t HeaderSize = SecHdrTable.front().Offset; 1161 assert(HeaderSize + TotalSecsSize == getFileSize() && 1162 "Size of 'header + sections' doesn't match the total size of profile"); 1163 1164 OS << "Header Size: " << HeaderSize << "\n"; 1165 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1166 OS << "File Size: " << getFileSize() << "\n"; 1167 return true; 1168 } 1169 1170 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1171 // Read and check the magic identifier. 1172 auto Magic = readNumber<uint64_t>(); 1173 if (std::error_code EC = Magic.getError()) 1174 return EC; 1175 else if (std::error_code EC = verifySPMagic(*Magic)) 1176 return EC; 1177 1178 // Read the version number. 1179 auto Version = readNumber<uint64_t>(); 1180 if (std::error_code EC = Version.getError()) 1181 return EC; 1182 else if (*Version != SPVersion()) 1183 return sampleprof_error::unsupported_version; 1184 1185 return sampleprof_error::success; 1186 } 1187 1188 std::error_code SampleProfileReaderBinary::readHeader() { 1189 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1190 End = Data + Buffer->getBufferSize(); 1191 1192 if (std::error_code EC = readMagicIdent()) 1193 return EC; 1194 1195 if (std::error_code EC = readSummary()) 1196 return EC; 1197 1198 if (std::error_code EC = readNameTable()) 1199 return EC; 1200 return sampleprof_error::success; 1201 } 1202 1203 std::error_code SampleProfileReaderCompactBinary::readHeader() { 1204 SampleProfileReaderBinary::readHeader(); 1205 if (std::error_code EC = readFuncOffsetTable()) 1206 return EC; 1207 return sampleprof_error::success; 1208 } 1209 1210 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { 1211 auto TableOffset = readUnencodedNumber<uint64_t>(); 1212 if (std::error_code EC = TableOffset.getError()) 1213 return EC; 1214 1215 const uint8_t *SavedData = Data; 1216 const uint8_t *TableStart = 1217 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + 1218 *TableOffset; 1219 Data = TableStart; 1220 1221 auto Size = readNumber<uint64_t>(); 1222 if (std::error_code EC = Size.getError()) 1223 return EC; 1224 1225 FuncOffsetTable.reserve(*Size); 1226 for (uint32_t I = 0; I < *Size; ++I) { 1227 auto FName(readStringFromTable()); 1228 if (std::error_code EC = FName.getError()) 1229 return EC; 1230 1231 auto Offset = readNumber<uint64_t>(); 1232 if (std::error_code EC = Offset.getError()) 1233 return EC; 1234 1235 FuncOffsetTable[*FName] = *Offset; 1236 } 1237 End = TableStart; 1238 Data = SavedData; 1239 return sampleprof_error::success; 1240 } 1241 1242 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() { 1243 if (!M) 1244 return false; 1245 FuncsToUse.clear(); 1246 for (auto &F : *M) 1247 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 1248 return true; 1249 } 1250 1251 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1252 std::vector<ProfileSummaryEntry> &Entries) { 1253 auto Cutoff = readNumber<uint64_t>(); 1254 if (std::error_code EC = Cutoff.getError()) 1255 return EC; 1256 1257 auto MinBlockCount = readNumber<uint64_t>(); 1258 if (std::error_code EC = MinBlockCount.getError()) 1259 return EC; 1260 1261 auto NumBlocks = readNumber<uint64_t>(); 1262 if (std::error_code EC = NumBlocks.getError()) 1263 return EC; 1264 1265 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1266 return sampleprof_error::success; 1267 } 1268 1269 std::error_code SampleProfileReaderBinary::readSummary() { 1270 auto TotalCount = readNumber<uint64_t>(); 1271 if (std::error_code EC = TotalCount.getError()) 1272 return EC; 1273 1274 auto MaxBlockCount = readNumber<uint64_t>(); 1275 if (std::error_code EC = MaxBlockCount.getError()) 1276 return EC; 1277 1278 auto MaxFunctionCount = readNumber<uint64_t>(); 1279 if (std::error_code EC = MaxFunctionCount.getError()) 1280 return EC; 1281 1282 auto NumBlocks = readNumber<uint64_t>(); 1283 if (std::error_code EC = NumBlocks.getError()) 1284 return EC; 1285 1286 auto NumFunctions = readNumber<uint64_t>(); 1287 if (std::error_code EC = NumFunctions.getError()) 1288 return EC; 1289 1290 auto NumSummaryEntries = readNumber<uint64_t>(); 1291 if (std::error_code EC = NumSummaryEntries.getError()) 1292 return EC; 1293 1294 std::vector<ProfileSummaryEntry> Entries; 1295 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1296 std::error_code EC = readSummaryEntry(Entries); 1297 if (EC != sampleprof_error::success) 1298 return EC; 1299 } 1300 Summary = std::make_unique<ProfileSummary>( 1301 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1302 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1303 1304 return sampleprof_error::success; 1305 } 1306 1307 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1308 const uint8_t *Data = 1309 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1310 uint64_t Magic = decodeULEB128(Data); 1311 return Magic == SPMagic(); 1312 } 1313 1314 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1315 const uint8_t *Data = 1316 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1317 uint64_t Magic = decodeULEB128(Data); 1318 return Magic == SPMagic(SPF_Ext_Binary); 1319 } 1320 1321 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { 1322 const uint8_t *Data = 1323 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1324 uint64_t Magic = decodeULEB128(Data); 1325 return Magic == SPMagic(SPF_Compact_Binary); 1326 } 1327 1328 std::error_code SampleProfileReaderGCC::skipNextWord() { 1329 uint32_t dummy; 1330 if (!GcovBuffer.readInt(dummy)) 1331 return sampleprof_error::truncated; 1332 return sampleprof_error::success; 1333 } 1334 1335 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1336 if (sizeof(T) <= sizeof(uint32_t)) { 1337 uint32_t Val; 1338 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1339 return static_cast<T>(Val); 1340 } else if (sizeof(T) <= sizeof(uint64_t)) { 1341 uint64_t Val; 1342 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1343 return static_cast<T>(Val); 1344 } 1345 1346 std::error_code EC = sampleprof_error::malformed; 1347 reportError(0, EC.message()); 1348 return EC; 1349 } 1350 1351 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1352 StringRef Str; 1353 if (!GcovBuffer.readString(Str)) 1354 return sampleprof_error::truncated; 1355 return Str; 1356 } 1357 1358 std::error_code SampleProfileReaderGCC::readHeader() { 1359 // Read the magic identifier. 1360 if (!GcovBuffer.readGCDAFormat()) 1361 return sampleprof_error::unrecognized_format; 1362 1363 // Read the version number. Note - the GCC reader does not validate this 1364 // version, but the profile creator generates v704. 1365 GCOV::GCOVVersion version; 1366 if (!GcovBuffer.readGCOVVersion(version)) 1367 return sampleprof_error::unrecognized_format; 1368 1369 if (version != GCOV::V407) 1370 return sampleprof_error::unsupported_version; 1371 1372 // Skip the empty integer. 1373 if (std::error_code EC = skipNextWord()) 1374 return EC; 1375 1376 return sampleprof_error::success; 1377 } 1378 1379 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1380 uint32_t Tag; 1381 if (!GcovBuffer.readInt(Tag)) 1382 return sampleprof_error::truncated; 1383 1384 if (Tag != Expected) 1385 return sampleprof_error::malformed; 1386 1387 if (std::error_code EC = skipNextWord()) 1388 return EC; 1389 1390 return sampleprof_error::success; 1391 } 1392 1393 std::error_code SampleProfileReaderGCC::readNameTable() { 1394 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1395 return EC; 1396 1397 uint32_t Size; 1398 if (!GcovBuffer.readInt(Size)) 1399 return sampleprof_error::truncated; 1400 1401 for (uint32_t I = 0; I < Size; ++I) { 1402 StringRef Str; 1403 if (!GcovBuffer.readString(Str)) 1404 return sampleprof_error::truncated; 1405 Names.push_back(std::string(Str)); 1406 } 1407 1408 return sampleprof_error::success; 1409 } 1410 1411 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1412 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1413 return EC; 1414 1415 uint32_t NumFunctions; 1416 if (!GcovBuffer.readInt(NumFunctions)) 1417 return sampleprof_error::truncated; 1418 1419 InlineCallStack Stack; 1420 for (uint32_t I = 0; I < NumFunctions; ++I) 1421 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1422 return EC; 1423 1424 computeSummary(); 1425 return sampleprof_error::success; 1426 } 1427 1428 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1429 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1430 uint64_t HeadCount = 0; 1431 if (InlineStack.size() == 0) 1432 if (!GcovBuffer.readInt64(HeadCount)) 1433 return sampleprof_error::truncated; 1434 1435 uint32_t NameIdx; 1436 if (!GcovBuffer.readInt(NameIdx)) 1437 return sampleprof_error::truncated; 1438 1439 StringRef Name(Names[NameIdx]); 1440 1441 uint32_t NumPosCounts; 1442 if (!GcovBuffer.readInt(NumPosCounts)) 1443 return sampleprof_error::truncated; 1444 1445 uint32_t NumCallsites; 1446 if (!GcovBuffer.readInt(NumCallsites)) 1447 return sampleprof_error::truncated; 1448 1449 FunctionSamples *FProfile = nullptr; 1450 if (InlineStack.size() == 0) { 1451 // If this is a top function that we have already processed, do not 1452 // update its profile again. This happens in the presence of 1453 // function aliases. Since these aliases share the same function 1454 // body, there will be identical replicated profiles for the 1455 // original function. In this case, we simply not bother updating 1456 // the profile of the original function. 1457 FProfile = &Profiles[Name]; 1458 FProfile->addHeadSamples(HeadCount); 1459 if (FProfile->getTotalSamples() > 0) 1460 Update = false; 1461 } else { 1462 // Otherwise, we are reading an inlined instance. The top of the 1463 // inline stack contains the profile of the caller. Insert this 1464 // callee in the caller's CallsiteMap. 1465 FunctionSamples *CallerProfile = InlineStack.front(); 1466 uint32_t LineOffset = Offset >> 16; 1467 uint32_t Discriminator = Offset & 0xffff; 1468 FProfile = &CallerProfile->functionSamplesAt( 1469 LineLocation(LineOffset, Discriminator))[std::string(Name)]; 1470 } 1471 FProfile->setName(Name); 1472 1473 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1474 uint32_t Offset; 1475 if (!GcovBuffer.readInt(Offset)) 1476 return sampleprof_error::truncated; 1477 1478 uint32_t NumTargets; 1479 if (!GcovBuffer.readInt(NumTargets)) 1480 return sampleprof_error::truncated; 1481 1482 uint64_t Count; 1483 if (!GcovBuffer.readInt64(Count)) 1484 return sampleprof_error::truncated; 1485 1486 // The line location is encoded in the offset as: 1487 // high 16 bits: line offset to the start of the function. 1488 // low 16 bits: discriminator. 1489 uint32_t LineOffset = Offset >> 16; 1490 uint32_t Discriminator = Offset & 0xffff; 1491 1492 InlineCallStack NewStack; 1493 NewStack.push_back(FProfile); 1494 llvm::append_range(NewStack, InlineStack); 1495 if (Update) { 1496 // Walk up the inline stack, adding the samples on this line to 1497 // the total sample count of the callers in the chain. 1498 for (auto CallerProfile : NewStack) 1499 CallerProfile->addTotalSamples(Count); 1500 1501 // Update the body samples for the current profile. 1502 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1503 } 1504 1505 // Process the list of functions called at an indirect call site. 1506 // These are all the targets that a function pointer (or virtual 1507 // function) resolved at runtime. 1508 for (uint32_t J = 0; J < NumTargets; J++) { 1509 uint32_t HistVal; 1510 if (!GcovBuffer.readInt(HistVal)) 1511 return sampleprof_error::truncated; 1512 1513 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1514 return sampleprof_error::malformed; 1515 1516 uint64_t TargetIdx; 1517 if (!GcovBuffer.readInt64(TargetIdx)) 1518 return sampleprof_error::truncated; 1519 StringRef TargetName(Names[TargetIdx]); 1520 1521 uint64_t TargetCount; 1522 if (!GcovBuffer.readInt64(TargetCount)) 1523 return sampleprof_error::truncated; 1524 1525 if (Update) 1526 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1527 TargetName, TargetCount); 1528 } 1529 } 1530 1531 // Process all the inlined callers into the current function. These 1532 // are all the callsites that were inlined into this function. 1533 for (uint32_t I = 0; I < NumCallsites; I++) { 1534 // The offset is encoded as: 1535 // high 16 bits: line offset to the start of the function. 1536 // low 16 bits: discriminator. 1537 uint32_t Offset; 1538 if (!GcovBuffer.readInt(Offset)) 1539 return sampleprof_error::truncated; 1540 InlineCallStack NewStack; 1541 NewStack.push_back(FProfile); 1542 llvm::append_range(NewStack, InlineStack); 1543 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1544 return EC; 1545 } 1546 1547 return sampleprof_error::success; 1548 } 1549 1550 /// Read a GCC AutoFDO profile. 1551 /// 1552 /// This format is generated by the Linux Perf conversion tool at 1553 /// https://github.com/google/autofdo. 1554 std::error_code SampleProfileReaderGCC::readImpl() { 1555 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); 1556 // Read the string table. 1557 if (std::error_code EC = readNameTable()) 1558 return EC; 1559 1560 // Read the source profile. 1561 if (std::error_code EC = readFunctionProfiles()) 1562 return EC; 1563 1564 return sampleprof_error::success; 1565 } 1566 1567 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1568 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1569 return Magic == "adcg*704"; 1570 } 1571 1572 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1573 // If the reader uses MD5 to represent string, we can't remap it because 1574 // we don't know what the original function names were. 1575 if (Reader.useMD5()) { 1576 Ctx.diagnose(DiagnosticInfoSampleProfile( 1577 Reader.getBuffer()->getBufferIdentifier(), 1578 "Profile data remapping cannot be applied to profile data " 1579 "in compact format (original mangled names are not available).", 1580 DS_Warning)); 1581 return; 1582 } 1583 1584 // CSSPGO-TODO: Remapper is not yet supported. 1585 // We will need to remap the entire context string. 1586 assert(Remappings && "should be initialized while creating remapper"); 1587 for (auto &Sample : Reader.getProfiles()) { 1588 DenseSet<StringRef> NamesInSample; 1589 Sample.second.findAllNames(NamesInSample); 1590 for (auto &Name : NamesInSample) 1591 if (auto Key = Remappings->insert(Name)) 1592 NameMap.insert({Key, Name}); 1593 } 1594 1595 RemappingApplied = true; 1596 } 1597 1598 Optional<StringRef> 1599 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1600 if (auto Key = Remappings->lookup(Fname)) 1601 return NameMap.lookup(Key); 1602 return None; 1603 } 1604 1605 /// Prepare a memory buffer for the contents of \p Filename. 1606 /// 1607 /// \returns an error code indicating the status of the buffer. 1608 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1609 setupMemoryBuffer(const Twine &Filename) { 1610 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); 1611 if (std::error_code EC = BufferOrErr.getError()) 1612 return EC; 1613 auto Buffer = std::move(BufferOrErr.get()); 1614 1615 // Sanity check the file. 1616 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max()) 1617 return sampleprof_error::too_large; 1618 1619 return std::move(Buffer); 1620 } 1621 1622 /// Create a sample profile reader based on the format of the input file. 1623 /// 1624 /// \param Filename The file to open. 1625 /// 1626 /// \param C The LLVM context to use to emit diagnostics. 1627 /// 1628 /// \param P The FSDiscriminatorPass. 1629 /// 1630 /// \param RemapFilename The file used for profile remapping. 1631 /// 1632 /// \returns an error code indicating the status of the created reader. 1633 ErrorOr<std::unique_ptr<SampleProfileReader>> 1634 SampleProfileReader::create(const std::string Filename, LLVMContext &C, 1635 FSDiscriminatorPass P, 1636 const std::string RemapFilename) { 1637 auto BufferOrError = setupMemoryBuffer(Filename); 1638 if (std::error_code EC = BufferOrError.getError()) 1639 return EC; 1640 return create(BufferOrError.get(), C, P, RemapFilename); 1641 } 1642 1643 /// Create a sample profile remapper from the given input, to remap the 1644 /// function names in the given profile data. 1645 /// 1646 /// \param Filename The file to open. 1647 /// 1648 /// \param Reader The profile reader the remapper is going to be applied to. 1649 /// 1650 /// \param C The LLVM context to use to emit diagnostics. 1651 /// 1652 /// \returns an error code indicating the status of the created reader. 1653 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1654 SampleProfileReaderItaniumRemapper::create(const std::string Filename, 1655 SampleProfileReader &Reader, 1656 LLVMContext &C) { 1657 auto BufferOrError = setupMemoryBuffer(Filename); 1658 if (std::error_code EC = BufferOrError.getError()) 1659 return EC; 1660 return create(BufferOrError.get(), Reader, C); 1661 } 1662 1663 /// Create a sample profile remapper from the given input, to remap the 1664 /// function names in the given profile data. 1665 /// 1666 /// \param B The memory buffer to create the reader from (assumes ownership). 1667 /// 1668 /// \param C The LLVM context to use to emit diagnostics. 1669 /// 1670 /// \param Reader The profile reader the remapper is going to be applied to. 1671 /// 1672 /// \returns an error code indicating the status of the created reader. 1673 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1674 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1675 SampleProfileReader &Reader, 1676 LLVMContext &C) { 1677 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1678 if (Error E = Remappings->read(*B.get())) { 1679 handleAllErrors( 1680 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1681 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1682 ParseError.getLineNum(), 1683 ParseError.getMessage())); 1684 }); 1685 return sampleprof_error::malformed; 1686 } 1687 1688 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1689 std::move(B), std::move(Remappings), Reader); 1690 } 1691 1692 /// Create a sample profile reader based on the format of the input data. 1693 /// 1694 /// \param B The memory buffer to create the reader from (assumes ownership). 1695 /// 1696 /// \param C The LLVM context to use to emit diagnostics. 1697 /// 1698 /// \param P The FSDiscriminatorPass. 1699 /// 1700 /// \param RemapFilename The file used for profile remapping. 1701 /// 1702 /// \returns an error code indicating the status of the created reader. 1703 ErrorOr<std::unique_ptr<SampleProfileReader>> 1704 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1705 FSDiscriminatorPass P, 1706 const std::string RemapFilename) { 1707 std::unique_ptr<SampleProfileReader> Reader; 1708 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1709 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1710 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1711 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1712 else if (SampleProfileReaderCompactBinary::hasFormat(*B)) 1713 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); 1714 else if (SampleProfileReaderGCC::hasFormat(*B)) 1715 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1716 else if (SampleProfileReaderText::hasFormat(*B)) 1717 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1718 else 1719 return sampleprof_error::unrecognized_format; 1720 1721 if (!RemapFilename.empty()) { 1722 auto ReaderOrErr = 1723 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); 1724 if (std::error_code EC = ReaderOrErr.getError()) { 1725 std::string Msg = "Could not create remapper: " + EC.message(); 1726 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1727 return EC; 1728 } 1729 Reader->Remapper = std::move(ReaderOrErr.get()); 1730 } 1731 1732 FunctionSamples::Format = Reader->getFormat(); 1733 if (std::error_code EC = Reader->readHeader()) { 1734 return EC; 1735 } 1736 1737 Reader->setDiscriminatorMaskedBitFrom(P); 1738 1739 return std::move(Reader); 1740 } 1741 1742 // For text and GCC file formats, we compute the summary after reading the 1743 // profile. Binary format has the profile summary in its header. 1744 void SampleProfileReader::computeSummary() { 1745 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1746 Summary = Builder.computeSummaryForProfiles(Profiles); 1747 } 1748