1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/LineIterator.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <cstddef>
40 #include <cstdint>
41 #include <limits>
42 #include <memory>
43 #include <system_error>
44 #include <vector>
45
46 using namespace llvm;
47 using namespace sampleprof;
48
49 #define DEBUG_TYPE "samplepgo-reader"
50
51 // This internal option specifies if the profile uses FS discriminators.
52 // It only applies to text, binary and compact binary format profiles.
53 // For ext-binary format profiles, the flag is set in the summary.
54 static cl::opt<bool> ProfileIsFSDisciminator(
55 "profile-isfs", cl::Hidden, cl::init(false),
56 cl::desc("Profile uses flow sensitive discriminators"));
57
58 /// Dump the function profile for \p FName.
59 ///
60 /// \param FContext Name + context of the function to print.
61 /// \param OS Stream to emit the output to.
dumpFunctionProfile(SampleContext FContext,raw_ostream & OS)62 void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
63 raw_ostream &OS) {
64 OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
65 }
66
67 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)68 void SampleProfileReader::dump(raw_ostream &OS) {
69 std::vector<NameFunctionSamples> V;
70 sortFuncProfiles(Profiles, V);
71 for (const auto &I : V)
72 dumpFunctionProfile(I.first, OS);
73 }
74
75 /// Parse \p Input as function head.
76 ///
77 /// Parse one line of \p Input, and update function name in \p FName,
78 /// function's total sample count in \p NumSamples, function's entry
79 /// count in \p NumHeadSamples.
80 ///
81 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)82 static bool ParseHead(const StringRef &Input, StringRef &FName,
83 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
84 if (Input[0] == ' ')
85 return false;
86 size_t n2 = Input.rfind(':');
87 size_t n1 = Input.rfind(':', n2 - 1);
88 FName = Input.substr(0, n1);
89 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
90 return false;
91 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
92 return false;
93 return true;
94 }
95
96 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)97 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
98
99 /// Parse \p Input that contains metadata.
100 /// Possible metadata:
101 /// - CFG Checksum information:
102 /// !CFGChecksum: 12345
103 /// - CFG Checksum information:
104 /// !Attributes: 1
105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)106 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
107 uint32_t &Attributes) {
108 if (Input.startswith("!CFGChecksum:")) {
109 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
110 return !CFGInfo.getAsInteger(10, FunctionHash);
111 }
112
113 if (Input.startswith("!Attributes:")) {
114 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
115 return !Attrib.getAsInteger(10, Attributes);
116 }
117
118 return false;
119 }
120
121 enum class LineType {
122 CallSiteProfile,
123 BodyProfile,
124 Metadata,
125 };
126
127 /// Parse \p Input as line sample.
128 ///
129 /// \param Input input line.
130 /// \param LineTy Type of this line.
131 /// \param Depth the depth of the inline stack.
132 /// \param NumSamples total samples of the line/inlined callsite.
133 /// \param LineOffset line offset to the start of the function.
134 /// \param Discriminator discriminator of the line.
135 /// \param TargetCountMap map from indirect call target to count.
136 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
137 ///
138 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)139 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
140 uint64_t &NumSamples, uint32_t &LineOffset,
141 uint32_t &Discriminator, StringRef &CalleeName,
142 DenseMap<StringRef, uint64_t> &TargetCountMap,
143 uint64_t &FunctionHash, uint32_t &Attributes) {
144 for (Depth = 0; Input[Depth] == ' '; Depth++)
145 ;
146 if (Depth == 0)
147 return false;
148
149 if (Input[Depth] == '!') {
150 LineTy = LineType::Metadata;
151 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
152 }
153
154 size_t n1 = Input.find(':');
155 StringRef Loc = Input.substr(Depth, n1 - Depth);
156 size_t n2 = Loc.find('.');
157 if (n2 == StringRef::npos) {
158 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
159 return false;
160 Discriminator = 0;
161 } else {
162 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
163 return false;
164 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
165 return false;
166 }
167
168 StringRef Rest = Input.substr(n1 + 2);
169 if (isDigit(Rest[0])) {
170 LineTy = LineType::BodyProfile;
171 size_t n3 = Rest.find(' ');
172 if (n3 == StringRef::npos) {
173 if (Rest.getAsInteger(10, NumSamples))
174 return false;
175 } else {
176 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
177 return false;
178 }
179 // Find call targets and their sample counts.
180 // Note: In some cases, there are symbols in the profile which are not
181 // mangled. To accommodate such cases, use colon + integer pairs as the
182 // anchor points.
183 // An example:
184 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
185 // ":1000" and ":437" are used as anchor points so the string above will
186 // be interpreted as
187 // target: _M_construct<char *>
188 // count: 1000
189 // target: string_view<std::allocator<char> >
190 // count: 437
191 while (n3 != StringRef::npos) {
192 n3 += Rest.substr(n3).find_first_not_of(' ');
193 Rest = Rest.substr(n3);
194 n3 = Rest.find_first_of(':');
195 if (n3 == StringRef::npos || n3 == 0)
196 return false;
197
198 StringRef Target;
199 uint64_t count, n4;
200 while (true) {
201 // Get the segment after the current colon.
202 StringRef AfterColon = Rest.substr(n3 + 1);
203 // Get the target symbol before the current colon.
204 Target = Rest.substr(0, n3);
205 // Check if the word after the current colon is an integer.
206 n4 = AfterColon.find_first_of(' ');
207 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
208 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
209 if (!WordAfterColon.getAsInteger(10, count))
210 break;
211
212 // Try to find the next colon.
213 uint64_t n5 = AfterColon.find_first_of(':');
214 if (n5 == StringRef::npos)
215 return false;
216 n3 += n5 + 1;
217 }
218
219 // An anchor point is found. Save the {target, count} pair
220 TargetCountMap[Target] = count;
221 if (n4 == Rest.size())
222 break;
223 // Change n3 to the next blank space after colon + integer pair.
224 n3 = n4;
225 }
226 } else {
227 LineTy = LineType::CallSiteProfile;
228 size_t n3 = Rest.find_last_of(':');
229 CalleeName = Rest.substr(0, n3);
230 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
231 return false;
232 }
233 return true;
234 }
235
236 /// Load samples from a text file.
237 ///
238 /// See the documentation at the top of the file for an explanation of
239 /// the expected format.
240 ///
241 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()242 std::error_code SampleProfileReaderText::readImpl() {
243 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
244 sampleprof_error Result = sampleprof_error::success;
245
246 InlineCallStack InlineStack;
247 uint32_t TopLevelProbeProfileCount = 0;
248
249 // DepthMetadata tracks whether we have processed metadata for the current
250 // top-level or nested function profile.
251 uint32_t DepthMetadata = 0;
252
253 ProfileIsFS = ProfileIsFSDisciminator;
254 FunctionSamples::ProfileIsFS = ProfileIsFS;
255 for (; !LineIt.is_at_eof(); ++LineIt) {
256 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
257 continue;
258 // Read the header of each function.
259 //
260 // Note that for function identifiers we are actually expecting
261 // mangled names, but we may not always get them. This happens when
262 // the compiler decides not to emit the function (e.g., it was inlined
263 // and removed). In this case, the binary will not have the linkage
264 // name for the function, so the profiler will emit the function's
265 // unmangled name, which may contain characters like ':' and '>' in its
266 // name (member functions, templates, etc).
267 //
268 // The only requirement we place on the identifier, then, is that it
269 // should not begin with a number.
270 if ((*LineIt)[0] != ' ') {
271 uint64_t NumSamples, NumHeadSamples;
272 StringRef FName;
273 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
274 reportError(LineIt.line_number(),
275 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
276 return sampleprof_error::malformed;
277 }
278 DepthMetadata = 0;
279 SampleContext FContext(FName, CSNameTable);
280 if (FContext.hasContext())
281 ++CSProfileCount;
282 Profiles[FContext] = FunctionSamples();
283 FunctionSamples &FProfile = Profiles[FContext];
284 FProfile.setContext(FContext);
285 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
286 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
287 InlineStack.clear();
288 InlineStack.push_back(&FProfile);
289 } else {
290 uint64_t NumSamples;
291 StringRef FName;
292 DenseMap<StringRef, uint64_t> TargetCountMap;
293 uint32_t Depth, LineOffset, Discriminator;
294 LineType LineTy;
295 uint64_t FunctionHash = 0;
296 uint32_t Attributes = 0;
297 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
298 Discriminator, FName, TargetCountMap, FunctionHash,
299 Attributes)) {
300 reportError(LineIt.line_number(),
301 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
302 *LineIt);
303 return sampleprof_error::malformed;
304 }
305 if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
306 // Metadata must be put at the end of a function profile.
307 reportError(LineIt.line_number(),
308 "Found non-metadata after metadata: " + *LineIt);
309 return sampleprof_error::malformed;
310 }
311
312 // Here we handle FS discriminators.
313 Discriminator &= getDiscriminatorMask();
314
315 while (InlineStack.size() > Depth) {
316 InlineStack.pop_back();
317 }
318 switch (LineTy) {
319 case LineType::CallSiteProfile: {
320 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
321 LineLocation(LineOffset, Discriminator))[std::string(FName)];
322 FSamples.setName(FName);
323 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
324 InlineStack.push_back(&FSamples);
325 DepthMetadata = 0;
326 break;
327 }
328 case LineType::BodyProfile: {
329 while (InlineStack.size() > Depth) {
330 InlineStack.pop_back();
331 }
332 FunctionSamples &FProfile = *InlineStack.back();
333 for (const auto &name_count : TargetCountMap) {
334 MergeResult(Result, FProfile.addCalledTargetSamples(
335 LineOffset, Discriminator, name_count.first,
336 name_count.second));
337 }
338 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
339 NumSamples));
340 break;
341 }
342 case LineType::Metadata: {
343 FunctionSamples &FProfile = *InlineStack.back();
344 if (FunctionHash) {
345 FProfile.setFunctionHash(FunctionHash);
346 if (Depth == 1)
347 ++TopLevelProbeProfileCount;
348 }
349 FProfile.getContext().setAllAttributes(Attributes);
350 if (Attributes & (uint32_t)ContextShouldBeInlined)
351 ProfileIsPreInlined = true;
352 DepthMetadata = Depth;
353 break;
354 }
355 }
356 }
357 }
358
359 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
360 "Cannot have both context-sensitive and regular profile");
361 ProfileIsCS = (CSProfileCount > 0);
362 assert((TopLevelProbeProfileCount == 0 ||
363 TopLevelProbeProfileCount == Profiles.size()) &&
364 "Cannot have both probe-based profiles and regular profiles");
365 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
366 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
367 FunctionSamples::ProfileIsCS = ProfileIsCS;
368 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
369
370 if (Result == sampleprof_error::success)
371 computeSummary();
372
373 return Result;
374 }
375
hasFormat(const MemoryBuffer & Buffer)376 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
377 bool result = false;
378
379 // Check that the first non-comment line is a valid function header.
380 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
381 if (!LineIt.is_at_eof()) {
382 if ((*LineIt)[0] != ' ') {
383 uint64_t NumSamples, NumHeadSamples;
384 StringRef FName;
385 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
386 }
387 }
388
389 return result;
390 }
391
readNumber()392 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
393 unsigned NumBytesRead = 0;
394 std::error_code EC;
395 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
396
397 if (Val > std::numeric_limits<T>::max())
398 EC = sampleprof_error::malformed;
399 else if (Data + NumBytesRead > End)
400 EC = sampleprof_error::truncated;
401 else
402 EC = sampleprof_error::success;
403
404 if (EC) {
405 reportError(0, EC.message());
406 return EC;
407 }
408
409 Data += NumBytesRead;
410 return static_cast<T>(Val);
411 }
412
readString()413 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
414 std::error_code EC;
415 StringRef Str(reinterpret_cast<const char *>(Data));
416 if (Data + Str.size() + 1 > End) {
417 EC = sampleprof_error::truncated;
418 reportError(0, EC.message());
419 return EC;
420 }
421
422 Data += Str.size() + 1;
423 return Str;
424 }
425
426 template <typename T>
readUnencodedNumber()427 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
428 std::error_code EC;
429
430 if (Data + sizeof(T) > End) {
431 EC = sampleprof_error::truncated;
432 reportError(0, EC.message());
433 return EC;
434 }
435
436 using namespace support;
437 T Val = endian::readNext<T, little, unaligned>(Data);
438 return Val;
439 }
440
441 template <typename T>
readStringIndex(T & Table)442 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
443 std::error_code EC;
444 auto Idx = readNumber<uint32_t>();
445 if (std::error_code EC = Idx.getError())
446 return EC;
447 if (*Idx >= Table.size())
448 return sampleprof_error::truncated_name_table;
449 return *Idx;
450 }
451
readStringFromTable()452 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
453 auto Idx = readStringIndex(NameTable);
454 if (std::error_code EC = Idx.getError())
455 return EC;
456
457 return NameTable[*Idx];
458 }
459
readSampleContextFromTable()460 ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
461 auto FName(readStringFromTable());
462 if (std::error_code EC = FName.getError())
463 return EC;
464 return SampleContext(*FName);
465 }
466
readStringFromTable()467 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
468 if (!FixedLengthMD5)
469 return SampleProfileReaderBinary::readStringFromTable();
470
471 // read NameTable index.
472 auto Idx = readStringIndex(NameTable);
473 if (std::error_code EC = Idx.getError())
474 return EC;
475
476 // Check whether the name to be accessed has been accessed before,
477 // if not, read it from memory directly.
478 StringRef &SR = NameTable[*Idx];
479 if (SR.empty()) {
480 const uint8_t *SavedData = Data;
481 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
482 auto FID = readUnencodedNumber<uint64_t>();
483 if (std::error_code EC = FID.getError())
484 return EC;
485 // Save the string converted from uint64_t in MD5StringBuf. All the
486 // references to the name are all StringRefs refering to the string
487 // in MD5StringBuf.
488 MD5StringBuf->push_back(std::to_string(*FID));
489 SR = MD5StringBuf->back();
490 Data = SavedData;
491 }
492 return SR;
493 }
494
readStringFromTable()495 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
496 auto Idx = readStringIndex(NameTable);
497 if (std::error_code EC = Idx.getError())
498 return EC;
499
500 return StringRef(NameTable[*Idx]);
501 }
502
503 std::error_code
readProfile(FunctionSamples & FProfile)504 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
505 auto NumSamples = readNumber<uint64_t>();
506 if (std::error_code EC = NumSamples.getError())
507 return EC;
508 FProfile.addTotalSamples(*NumSamples);
509
510 // Read the samples in the body.
511 auto NumRecords = readNumber<uint32_t>();
512 if (std::error_code EC = NumRecords.getError())
513 return EC;
514
515 for (uint32_t I = 0; I < *NumRecords; ++I) {
516 auto LineOffset = readNumber<uint64_t>();
517 if (std::error_code EC = LineOffset.getError())
518 return EC;
519
520 if (!isOffsetLegal(*LineOffset)) {
521 return std::error_code();
522 }
523
524 auto Discriminator = readNumber<uint64_t>();
525 if (std::error_code EC = Discriminator.getError())
526 return EC;
527
528 auto NumSamples = readNumber<uint64_t>();
529 if (std::error_code EC = NumSamples.getError())
530 return EC;
531
532 auto NumCalls = readNumber<uint32_t>();
533 if (std::error_code EC = NumCalls.getError())
534 return EC;
535
536 // Here we handle FS discriminators:
537 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
538
539 for (uint32_t J = 0; J < *NumCalls; ++J) {
540 auto CalledFunction(readStringFromTable());
541 if (std::error_code EC = CalledFunction.getError())
542 return EC;
543
544 auto CalledFunctionSamples = readNumber<uint64_t>();
545 if (std::error_code EC = CalledFunctionSamples.getError())
546 return EC;
547
548 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
549 *CalledFunction, *CalledFunctionSamples);
550 }
551
552 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
553 }
554
555 // Read all the samples for inlined function calls.
556 auto NumCallsites = readNumber<uint32_t>();
557 if (std::error_code EC = NumCallsites.getError())
558 return EC;
559
560 for (uint32_t J = 0; J < *NumCallsites; ++J) {
561 auto LineOffset = readNumber<uint64_t>();
562 if (std::error_code EC = LineOffset.getError())
563 return EC;
564
565 auto Discriminator = readNumber<uint64_t>();
566 if (std::error_code EC = Discriminator.getError())
567 return EC;
568
569 auto FName(readStringFromTable());
570 if (std::error_code EC = FName.getError())
571 return EC;
572
573 // Here we handle FS discriminators:
574 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
575
576 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
577 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
578 CalleeProfile.setName(*FName);
579 if (std::error_code EC = readProfile(CalleeProfile))
580 return EC;
581 }
582
583 return sampleprof_error::success;
584 }
585
586 std::error_code
readFuncProfile(const uint8_t * Start)587 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
588 Data = Start;
589 auto NumHeadSamples = readNumber<uint64_t>();
590 if (std::error_code EC = NumHeadSamples.getError())
591 return EC;
592
593 ErrorOr<SampleContext> FContext(readSampleContextFromTable());
594 if (std::error_code EC = FContext.getError())
595 return EC;
596
597 Profiles[*FContext] = FunctionSamples();
598 FunctionSamples &FProfile = Profiles[*FContext];
599 FProfile.setContext(*FContext);
600 FProfile.addHeadSamples(*NumHeadSamples);
601
602 if (FContext->hasContext())
603 CSProfileCount++;
604
605 if (std::error_code EC = readProfile(FProfile))
606 return EC;
607 return sampleprof_error::success;
608 }
609
readImpl()610 std::error_code SampleProfileReaderBinary::readImpl() {
611 ProfileIsFS = ProfileIsFSDisciminator;
612 FunctionSamples::ProfileIsFS = ProfileIsFS;
613 while (!at_eof()) {
614 if (std::error_code EC = readFuncProfile(Data))
615 return EC;
616 }
617
618 return sampleprof_error::success;
619 }
620
621 ErrorOr<SampleContextFrames>
readContextFromTable()622 SampleProfileReaderExtBinaryBase::readContextFromTable() {
623 auto ContextIdx = readNumber<uint32_t>();
624 if (std::error_code EC = ContextIdx.getError())
625 return EC;
626 if (*ContextIdx >= CSNameTable->size())
627 return sampleprof_error::truncated_name_table;
628 return (*CSNameTable)[*ContextIdx];
629 }
630
631 ErrorOr<SampleContext>
readSampleContextFromTable()632 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
633 if (ProfileIsCS) {
634 auto FContext(readContextFromTable());
635 if (std::error_code EC = FContext.getError())
636 return EC;
637 return SampleContext(*FContext);
638 } else {
639 auto FName(readStringFromTable());
640 if (std::error_code EC = FName.getError())
641 return EC;
642 return SampleContext(*FName);
643 }
644 }
645
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)646 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
647 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
648 Data = Start;
649 End = Start + Size;
650 switch (Entry.Type) {
651 case SecProfSummary:
652 if (std::error_code EC = readSummary())
653 return EC;
654 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
655 Summary->setPartialProfile(true);
656 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
657 FunctionSamples::ProfileIsCS = ProfileIsCS = true;
658 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
659 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
660 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
661 FunctionSamples::ProfileIsFS = ProfileIsFS = true;
662 break;
663 case SecNameTable: {
664 FixedLengthMD5 =
665 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
666 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
667 assert((!FixedLengthMD5 || UseMD5) &&
668 "If FixedLengthMD5 is true, UseMD5 has to be true");
669 FunctionSamples::HasUniqSuffix =
670 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
671 if (std::error_code EC = readNameTableSec(UseMD5))
672 return EC;
673 break;
674 }
675 case SecCSNameTable: {
676 if (std::error_code EC = readCSNameTableSec())
677 return EC;
678 break;
679 }
680 case SecLBRProfile:
681 if (std::error_code EC = readFuncProfiles())
682 return EC;
683 break;
684 case SecFuncOffsetTable:
685 FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
686 if (std::error_code EC = readFuncOffsetTable())
687 return EC;
688 break;
689 case SecFuncMetadata: {
690 ProfileIsProbeBased =
691 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
692 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
693 bool HasAttribute =
694 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
695 if (std::error_code EC = readFuncMetadata(HasAttribute))
696 return EC;
697 break;
698 }
699 case SecProfileSymbolList:
700 if (std::error_code EC = readProfileSymbolList())
701 return EC;
702 break;
703 default:
704 if (std::error_code EC = readCustomSection(Entry))
705 return EC;
706 break;
707 }
708 return sampleprof_error::success;
709 }
710
collectFuncsFromModule()711 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
712 if (!M)
713 return false;
714 FuncsToUse.clear();
715 for (auto &F : *M)
716 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
717 return true;
718 }
719
readFuncOffsetTable()720 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
721 // If there are more than one FuncOffsetTable, the profile read associated
722 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
723 // is read.
724 FuncOffsetTable.clear();
725
726 auto Size = readNumber<uint64_t>();
727 if (std::error_code EC = Size.getError())
728 return EC;
729
730 FuncOffsetTable.reserve(*Size);
731
732 if (FuncOffsetsOrdered) {
733 OrderedFuncOffsets =
734 std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
735 OrderedFuncOffsets->reserve(*Size);
736 }
737
738 for (uint32_t I = 0; I < *Size; ++I) {
739 auto FContext(readSampleContextFromTable());
740 if (std::error_code EC = FContext.getError())
741 return EC;
742
743 auto Offset = readNumber<uint64_t>();
744 if (std::error_code EC = Offset.getError())
745 return EC;
746
747 FuncOffsetTable[*FContext] = *Offset;
748 if (FuncOffsetsOrdered)
749 OrderedFuncOffsets->emplace_back(*FContext, *Offset);
750 }
751
752 return sampleprof_error::success;
753 }
754
readFuncProfiles()755 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
756 // Collect functions used by current module if the Reader has been
757 // given a module.
758 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
759 // which will query FunctionSamples::HasUniqSuffix, so it has to be
760 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
761 // NameTable section is read.
762 bool LoadFuncsToBeUsed = collectFuncsFromModule();
763
764 // When LoadFuncsToBeUsed is false, load all the function profiles.
765 const uint8_t *Start = Data;
766 if (!LoadFuncsToBeUsed) {
767 while (Data < End) {
768 if (std::error_code EC = readFuncProfile(Data))
769 return EC;
770 }
771 assert(Data == End && "More data is read than expected");
772 } else {
773 // Load function profiles on demand.
774 if (Remapper) {
775 for (auto Name : FuncsToUse) {
776 Remapper->insert(Name);
777 }
778 }
779
780 if (ProfileIsCS) {
781 DenseSet<uint64_t> FuncGuidsToUse;
782 if (useMD5()) {
783 for (auto Name : FuncsToUse)
784 FuncGuidsToUse.insert(Function::getGUID(Name));
785 }
786
787 // For each function in current module, load all context profiles for
788 // the function as well as their callee contexts which can help profile
789 // guided importing for ThinLTO. This can be achieved by walking
790 // through an ordered context container, where contexts are laid out
791 // as if they were walked in preorder of a context trie. While
792 // traversing the trie, a link to the highest common ancestor node is
793 // kept so that all of its decendants will be loaded.
794 assert(OrderedFuncOffsets.get() &&
795 "func offset table should always be sorted in CS profile");
796 const SampleContext *CommonContext = nullptr;
797 for (const auto &NameOffset : *OrderedFuncOffsets) {
798 const auto &FContext = NameOffset.first;
799 auto FName = FContext.getName();
800 // For function in the current module, keep its farthest ancestor
801 // context. This can be used to load itself and its child and
802 // sibling contexts.
803 if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
804 (!useMD5() && (FuncsToUse.count(FName) ||
805 (Remapper && Remapper->exist(FName))))) {
806 if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
807 CommonContext = &FContext;
808 }
809
810 if (CommonContext == &FContext ||
811 (CommonContext && CommonContext->IsPrefixOf(FContext))) {
812 // Load profile for the current context which originated from
813 // the common ancestor.
814 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
815 assert(FuncProfileAddr < End && "out of LBRProfile section");
816 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
817 return EC;
818 }
819 }
820 } else {
821 if (useMD5()) {
822 for (auto Name : FuncsToUse) {
823 auto GUID = std::to_string(MD5Hash(Name));
824 auto iter = FuncOffsetTable.find(StringRef(GUID));
825 if (iter == FuncOffsetTable.end())
826 continue;
827 const uint8_t *FuncProfileAddr = Start + iter->second;
828 assert(FuncProfileAddr < End && "out of LBRProfile section");
829 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
830 return EC;
831 }
832 } else {
833 for (auto NameOffset : FuncOffsetTable) {
834 SampleContext FContext(NameOffset.first);
835 auto FuncName = FContext.getName();
836 if (!FuncsToUse.count(FuncName) &&
837 (!Remapper || !Remapper->exist(FuncName)))
838 continue;
839 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
840 assert(FuncProfileAddr < End && "out of LBRProfile section");
841 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
842 return EC;
843 }
844 }
845 }
846 Data = End;
847 }
848 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
849 "Cannot have both context-sensitive and regular profile");
850 assert((!CSProfileCount || ProfileIsCS) &&
851 "Section flag should be consistent with actual profile");
852 return sampleprof_error::success;
853 }
854
readProfileSymbolList()855 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
856 if (!ProfSymList)
857 ProfSymList = std::make_unique<ProfileSymbolList>();
858
859 if (std::error_code EC = ProfSymList->read(Data, End - Data))
860 return EC;
861
862 Data = End;
863 return sampleprof_error::success;
864 }
865
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)866 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
867 const uint8_t *SecStart, const uint64_t SecSize,
868 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
869 Data = SecStart;
870 End = SecStart + SecSize;
871 auto DecompressSize = readNumber<uint64_t>();
872 if (std::error_code EC = DecompressSize.getError())
873 return EC;
874 DecompressBufSize = *DecompressSize;
875
876 auto CompressSize = readNumber<uint64_t>();
877 if (std::error_code EC = CompressSize.getError())
878 return EC;
879
880 if (!llvm::compression::zlib::isAvailable())
881 return sampleprof_error::zlib_unavailable;
882
883 uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
884 size_t UCSize = DecompressBufSize;
885 llvm::Error E = compression::zlib::uncompress(
886 makeArrayRef(Data, *CompressSize), Buffer, UCSize);
887 if (E)
888 return sampleprof_error::uncompress_failed;
889 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
890 return sampleprof_error::success;
891 }
892
readImpl()893 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
894 const uint8_t *BufStart =
895 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
896
897 for (auto &Entry : SecHdrTable) {
898 // Skip empty section.
899 if (!Entry.Size)
900 continue;
901
902 // Skip sections without context when SkipFlatProf is true.
903 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
904 continue;
905
906 const uint8_t *SecStart = BufStart + Entry.Offset;
907 uint64_t SecSize = Entry.Size;
908
909 // If the section is compressed, decompress it into a buffer
910 // DecompressBuf before reading the actual data. The pointee of
911 // 'Data' will be changed to buffer hold by DecompressBuf
912 // temporarily when reading the actual data.
913 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
914 if (isCompressed) {
915 const uint8_t *DecompressBuf;
916 uint64_t DecompressBufSize;
917 if (std::error_code EC = decompressSection(
918 SecStart, SecSize, DecompressBuf, DecompressBufSize))
919 return EC;
920 SecStart = DecompressBuf;
921 SecSize = DecompressBufSize;
922 }
923
924 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
925 return EC;
926 if (Data != SecStart + SecSize)
927 return sampleprof_error::malformed;
928
929 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
930 if (isCompressed) {
931 Data = BufStart + Entry.Offset;
932 End = BufStart + Buffer->getBufferSize();
933 }
934 }
935
936 return sampleprof_error::success;
937 }
938
readImpl()939 std::error_code SampleProfileReaderCompactBinary::readImpl() {
940 // Collect functions used by current module if the Reader has been
941 // given a module.
942 bool LoadFuncsToBeUsed = collectFuncsFromModule();
943 ProfileIsFS = ProfileIsFSDisciminator;
944 FunctionSamples::ProfileIsFS = ProfileIsFS;
945 std::vector<uint64_t> OffsetsToUse;
946 if (!LoadFuncsToBeUsed) {
947 // load all the function profiles.
948 for (auto FuncEntry : FuncOffsetTable) {
949 OffsetsToUse.push_back(FuncEntry.second);
950 }
951 } else {
952 // load function profiles on demand.
953 for (auto Name : FuncsToUse) {
954 auto GUID = std::to_string(MD5Hash(Name));
955 auto iter = FuncOffsetTable.find(StringRef(GUID));
956 if (iter == FuncOffsetTable.end())
957 continue;
958 OffsetsToUse.push_back(iter->second);
959 }
960 }
961
962 for (auto Offset : OffsetsToUse) {
963 const uint8_t *SavedData = Data;
964 if (std::error_code EC = readFuncProfile(
965 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
966 Offset))
967 return EC;
968 Data = SavedData;
969 }
970 return sampleprof_error::success;
971 }
972
verifySPMagic(uint64_t Magic)973 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
974 if (Magic == SPMagic())
975 return sampleprof_error::success;
976 return sampleprof_error::bad_magic;
977 }
978
verifySPMagic(uint64_t Magic)979 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
980 if (Magic == SPMagic(SPF_Ext_Binary))
981 return sampleprof_error::success;
982 return sampleprof_error::bad_magic;
983 }
984
985 std::error_code
verifySPMagic(uint64_t Magic)986 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
987 if (Magic == SPMagic(SPF_Compact_Binary))
988 return sampleprof_error::success;
989 return sampleprof_error::bad_magic;
990 }
991
readNameTable()992 std::error_code SampleProfileReaderBinary::readNameTable() {
993 auto Size = readNumber<uint32_t>();
994 if (std::error_code EC = Size.getError())
995 return EC;
996 NameTable.reserve(*Size + NameTable.size());
997 for (uint32_t I = 0; I < *Size; ++I) {
998 auto Name(readString());
999 if (std::error_code EC = Name.getError())
1000 return EC;
1001 NameTable.push_back(*Name);
1002 }
1003
1004 return sampleprof_error::success;
1005 }
1006
readMD5NameTable()1007 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1008 auto Size = readNumber<uint64_t>();
1009 if (std::error_code EC = Size.getError())
1010 return EC;
1011 MD5StringBuf = std::make_unique<std::vector<std::string>>();
1012 MD5StringBuf->reserve(*Size);
1013 if (FixedLengthMD5) {
1014 // Preallocate and initialize NameTable so we can check whether a name
1015 // index has been read before by checking whether the element in the
1016 // NameTable is empty, meanwhile readStringIndex can do the boundary
1017 // check using the size of NameTable.
1018 NameTable.resize(*Size + NameTable.size());
1019
1020 MD5NameMemStart = Data;
1021 Data = Data + (*Size) * sizeof(uint64_t);
1022 return sampleprof_error::success;
1023 }
1024 NameTable.reserve(*Size);
1025 for (uint32_t I = 0; I < *Size; ++I) {
1026 auto FID = readNumber<uint64_t>();
1027 if (std::error_code EC = FID.getError())
1028 return EC;
1029 MD5StringBuf->push_back(std::to_string(*FID));
1030 // NameTable is a vector of StringRef. Here it is pushing back a
1031 // StringRef initialized with the last string in MD5stringBuf.
1032 NameTable.push_back(MD5StringBuf->back());
1033 }
1034 return sampleprof_error::success;
1035 }
1036
readNameTableSec(bool IsMD5)1037 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
1038 if (IsMD5)
1039 return readMD5NameTable();
1040 return SampleProfileReaderBinary::readNameTable();
1041 }
1042
1043 // Read in the CS name table section, which basically contains a list of context
1044 // vectors. Each element of a context vector, aka a frame, refers to the
1045 // underlying raw function names that are stored in the name table, as well as
1046 // a callsite identifier that only makes sense for non-leaf frames.
readCSNameTableSec()1047 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1048 auto Size = readNumber<uint32_t>();
1049 if (std::error_code EC = Size.getError())
1050 return EC;
1051
1052 std::vector<SampleContextFrameVector> *PNameVec =
1053 new std::vector<SampleContextFrameVector>();
1054 PNameVec->reserve(*Size);
1055 for (uint32_t I = 0; I < *Size; ++I) {
1056 PNameVec->emplace_back(SampleContextFrameVector());
1057 auto ContextSize = readNumber<uint32_t>();
1058 if (std::error_code EC = ContextSize.getError())
1059 return EC;
1060 for (uint32_t J = 0; J < *ContextSize; ++J) {
1061 auto FName(readStringFromTable());
1062 if (std::error_code EC = FName.getError())
1063 return EC;
1064 auto LineOffset = readNumber<uint64_t>();
1065 if (std::error_code EC = LineOffset.getError())
1066 return EC;
1067
1068 if (!isOffsetLegal(*LineOffset))
1069 return std::error_code();
1070
1071 auto Discriminator = readNumber<uint64_t>();
1072 if (std::error_code EC = Discriminator.getError())
1073 return EC;
1074
1075 PNameVec->back().emplace_back(
1076 FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1077 }
1078 }
1079
1080 // From this point the underlying object of CSNameTable should be immutable.
1081 CSNameTable.reset(PNameVec);
1082 return sampleprof_error::success;
1083 }
1084
1085 std::error_code
1086
readFuncMetadata(bool ProfileHasAttribute,FunctionSamples * FProfile)1087 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1088 FunctionSamples *FProfile) {
1089 if (Data < End) {
1090 if (ProfileIsProbeBased) {
1091 auto Checksum = readNumber<uint64_t>();
1092 if (std::error_code EC = Checksum.getError())
1093 return EC;
1094 if (FProfile)
1095 FProfile->setFunctionHash(*Checksum);
1096 }
1097
1098 if (ProfileHasAttribute) {
1099 auto Attributes = readNumber<uint32_t>();
1100 if (std::error_code EC = Attributes.getError())
1101 return EC;
1102 if (FProfile)
1103 FProfile->getContext().setAllAttributes(*Attributes);
1104 }
1105
1106 if (!ProfileIsCS) {
1107 // Read all the attributes for inlined function calls.
1108 auto NumCallsites = readNumber<uint32_t>();
1109 if (std::error_code EC = NumCallsites.getError())
1110 return EC;
1111
1112 for (uint32_t J = 0; J < *NumCallsites; ++J) {
1113 auto LineOffset = readNumber<uint64_t>();
1114 if (std::error_code EC = LineOffset.getError())
1115 return EC;
1116
1117 auto Discriminator = readNumber<uint64_t>();
1118 if (std::error_code EC = Discriminator.getError())
1119 return EC;
1120
1121 auto FContext(readSampleContextFromTable());
1122 if (std::error_code EC = FContext.getError())
1123 return EC;
1124
1125 FunctionSamples *CalleeProfile = nullptr;
1126 if (FProfile) {
1127 CalleeProfile = const_cast<FunctionSamples *>(
1128 &FProfile->functionSamplesAt(LineLocation(
1129 *LineOffset,
1130 *Discriminator))[std::string(FContext.get().getName())]);
1131 }
1132 if (std::error_code EC =
1133 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1134 return EC;
1135 }
1136 }
1137 }
1138
1139 return sampleprof_error::success;
1140 }
1141
1142 std::error_code
readFuncMetadata(bool ProfileHasAttribute)1143 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1144 while (Data < End) {
1145 auto FContext(readSampleContextFromTable());
1146 if (std::error_code EC = FContext.getError())
1147 return EC;
1148 FunctionSamples *FProfile = nullptr;
1149 auto It = Profiles.find(*FContext);
1150 if (It != Profiles.end())
1151 FProfile = &It->second;
1152
1153 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1154 return EC;
1155 }
1156
1157 assert(Data == End && "More data is read than expected");
1158 return sampleprof_error::success;
1159 }
1160
readNameTable()1161 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1162 auto Size = readNumber<uint64_t>();
1163 if (std::error_code EC = Size.getError())
1164 return EC;
1165 NameTable.reserve(*Size);
1166 for (uint32_t I = 0; I < *Size; ++I) {
1167 auto FID = readNumber<uint64_t>();
1168 if (std::error_code EC = FID.getError())
1169 return EC;
1170 NameTable.push_back(std::to_string(*FID));
1171 }
1172 return sampleprof_error::success;
1173 }
1174
1175 std::error_code
readSecHdrTableEntry(uint32_t Idx)1176 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1177 SecHdrTableEntry Entry;
1178 auto Type = readUnencodedNumber<uint64_t>();
1179 if (std::error_code EC = Type.getError())
1180 return EC;
1181 Entry.Type = static_cast<SecType>(*Type);
1182
1183 auto Flags = readUnencodedNumber<uint64_t>();
1184 if (std::error_code EC = Flags.getError())
1185 return EC;
1186 Entry.Flags = *Flags;
1187
1188 auto Offset = readUnencodedNumber<uint64_t>();
1189 if (std::error_code EC = Offset.getError())
1190 return EC;
1191 Entry.Offset = *Offset;
1192
1193 auto Size = readUnencodedNumber<uint64_t>();
1194 if (std::error_code EC = Size.getError())
1195 return EC;
1196 Entry.Size = *Size;
1197
1198 Entry.LayoutIndex = Idx;
1199 SecHdrTable.push_back(std::move(Entry));
1200 return sampleprof_error::success;
1201 }
1202
readSecHdrTable()1203 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1204 auto EntryNum = readUnencodedNumber<uint64_t>();
1205 if (std::error_code EC = EntryNum.getError())
1206 return EC;
1207
1208 for (uint32_t i = 0; i < (*EntryNum); i++)
1209 if (std::error_code EC = readSecHdrTableEntry(i))
1210 return EC;
1211
1212 return sampleprof_error::success;
1213 }
1214
readHeader()1215 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1216 const uint8_t *BufStart =
1217 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1218 Data = BufStart;
1219 End = BufStart + Buffer->getBufferSize();
1220
1221 if (std::error_code EC = readMagicIdent())
1222 return EC;
1223
1224 if (std::error_code EC = readSecHdrTable())
1225 return EC;
1226
1227 return sampleprof_error::success;
1228 }
1229
getSectionSize(SecType Type)1230 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1231 uint64_t Size = 0;
1232 for (auto &Entry : SecHdrTable) {
1233 if (Entry.Type == Type)
1234 Size += Entry.Size;
1235 }
1236 return Size;
1237 }
1238
getFileSize()1239 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1240 // Sections in SecHdrTable is not necessarily in the same order as
1241 // sections in the profile because section like FuncOffsetTable needs
1242 // to be written after section LBRProfile but needs to be read before
1243 // section LBRProfile, so we cannot simply use the last entry in
1244 // SecHdrTable to calculate the file size.
1245 uint64_t FileSize = 0;
1246 for (auto &Entry : SecHdrTable) {
1247 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1248 }
1249 return FileSize;
1250 }
1251
getSecFlagsStr(const SecHdrTableEntry & Entry)1252 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1253 std::string Flags;
1254 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1255 Flags.append("{compressed,");
1256 else
1257 Flags.append("{");
1258
1259 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1260 Flags.append("flat,");
1261
1262 switch (Entry.Type) {
1263 case SecNameTable:
1264 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1265 Flags.append("fixlenmd5,");
1266 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1267 Flags.append("md5,");
1268 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1269 Flags.append("uniq,");
1270 break;
1271 case SecProfSummary:
1272 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1273 Flags.append("partial,");
1274 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1275 Flags.append("context,");
1276 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
1277 Flags.append("preInlined,");
1278 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1279 Flags.append("fs-discriminator,");
1280 break;
1281 case SecFuncOffsetTable:
1282 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1283 Flags.append("ordered,");
1284 break;
1285 case SecFuncMetadata:
1286 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1287 Flags.append("probe,");
1288 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1289 Flags.append("attr,");
1290 break;
1291 default:
1292 break;
1293 }
1294 char &last = Flags.back();
1295 if (last == ',')
1296 last = '}';
1297 else
1298 Flags.append("}");
1299 return Flags;
1300 }
1301
dumpSectionInfo(raw_ostream & OS)1302 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1303 uint64_t TotalSecsSize = 0;
1304 for (auto &Entry : SecHdrTable) {
1305 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1306 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1307 << "\n";
1308 ;
1309 TotalSecsSize += Entry.Size;
1310 }
1311 uint64_t HeaderSize = SecHdrTable.front().Offset;
1312 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1313 "Size of 'header + sections' doesn't match the total size of profile");
1314
1315 OS << "Header Size: " << HeaderSize << "\n";
1316 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1317 OS << "File Size: " << getFileSize() << "\n";
1318 return true;
1319 }
1320
readMagicIdent()1321 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1322 // Read and check the magic identifier.
1323 auto Magic = readNumber<uint64_t>();
1324 if (std::error_code EC = Magic.getError())
1325 return EC;
1326 else if (std::error_code EC = verifySPMagic(*Magic))
1327 return EC;
1328
1329 // Read the version number.
1330 auto Version = readNumber<uint64_t>();
1331 if (std::error_code EC = Version.getError())
1332 return EC;
1333 else if (*Version != SPVersion())
1334 return sampleprof_error::unsupported_version;
1335
1336 return sampleprof_error::success;
1337 }
1338
readHeader()1339 std::error_code SampleProfileReaderBinary::readHeader() {
1340 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1341 End = Data + Buffer->getBufferSize();
1342
1343 if (std::error_code EC = readMagicIdent())
1344 return EC;
1345
1346 if (std::error_code EC = readSummary())
1347 return EC;
1348
1349 if (std::error_code EC = readNameTable())
1350 return EC;
1351 return sampleprof_error::success;
1352 }
1353
readHeader()1354 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1355 SampleProfileReaderBinary::readHeader();
1356 if (std::error_code EC = readFuncOffsetTable())
1357 return EC;
1358 return sampleprof_error::success;
1359 }
1360
readFuncOffsetTable()1361 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1362 auto TableOffset = readUnencodedNumber<uint64_t>();
1363 if (std::error_code EC = TableOffset.getError())
1364 return EC;
1365
1366 const uint8_t *SavedData = Data;
1367 const uint8_t *TableStart =
1368 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1369 *TableOffset;
1370 Data = TableStart;
1371
1372 auto Size = readNumber<uint64_t>();
1373 if (std::error_code EC = Size.getError())
1374 return EC;
1375
1376 FuncOffsetTable.reserve(*Size);
1377 for (uint32_t I = 0; I < *Size; ++I) {
1378 auto FName(readStringFromTable());
1379 if (std::error_code EC = FName.getError())
1380 return EC;
1381
1382 auto Offset = readNumber<uint64_t>();
1383 if (std::error_code EC = Offset.getError())
1384 return EC;
1385
1386 FuncOffsetTable[*FName] = *Offset;
1387 }
1388 End = TableStart;
1389 Data = SavedData;
1390 return sampleprof_error::success;
1391 }
1392
collectFuncsFromModule()1393 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1394 if (!M)
1395 return false;
1396 FuncsToUse.clear();
1397 for (auto &F : *M)
1398 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1399 return true;
1400 }
1401
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1402 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1403 std::vector<ProfileSummaryEntry> &Entries) {
1404 auto Cutoff = readNumber<uint64_t>();
1405 if (std::error_code EC = Cutoff.getError())
1406 return EC;
1407
1408 auto MinBlockCount = readNumber<uint64_t>();
1409 if (std::error_code EC = MinBlockCount.getError())
1410 return EC;
1411
1412 auto NumBlocks = readNumber<uint64_t>();
1413 if (std::error_code EC = NumBlocks.getError())
1414 return EC;
1415
1416 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1417 return sampleprof_error::success;
1418 }
1419
readSummary()1420 std::error_code SampleProfileReaderBinary::readSummary() {
1421 auto TotalCount = readNumber<uint64_t>();
1422 if (std::error_code EC = TotalCount.getError())
1423 return EC;
1424
1425 auto MaxBlockCount = readNumber<uint64_t>();
1426 if (std::error_code EC = MaxBlockCount.getError())
1427 return EC;
1428
1429 auto MaxFunctionCount = readNumber<uint64_t>();
1430 if (std::error_code EC = MaxFunctionCount.getError())
1431 return EC;
1432
1433 auto NumBlocks = readNumber<uint64_t>();
1434 if (std::error_code EC = NumBlocks.getError())
1435 return EC;
1436
1437 auto NumFunctions = readNumber<uint64_t>();
1438 if (std::error_code EC = NumFunctions.getError())
1439 return EC;
1440
1441 auto NumSummaryEntries = readNumber<uint64_t>();
1442 if (std::error_code EC = NumSummaryEntries.getError())
1443 return EC;
1444
1445 std::vector<ProfileSummaryEntry> Entries;
1446 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1447 std::error_code EC = readSummaryEntry(Entries);
1448 if (EC != sampleprof_error::success)
1449 return EC;
1450 }
1451 Summary = std::make_unique<ProfileSummary>(
1452 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1453 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1454
1455 return sampleprof_error::success;
1456 }
1457
hasFormat(const MemoryBuffer & Buffer)1458 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1459 const uint8_t *Data =
1460 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1461 uint64_t Magic = decodeULEB128(Data);
1462 return Magic == SPMagic();
1463 }
1464
hasFormat(const MemoryBuffer & Buffer)1465 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1466 const uint8_t *Data =
1467 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1468 uint64_t Magic = decodeULEB128(Data);
1469 return Magic == SPMagic(SPF_Ext_Binary);
1470 }
1471
hasFormat(const MemoryBuffer & Buffer)1472 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1473 const uint8_t *Data =
1474 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1475 uint64_t Magic = decodeULEB128(Data);
1476 return Magic == SPMagic(SPF_Compact_Binary);
1477 }
1478
skipNextWord()1479 std::error_code SampleProfileReaderGCC::skipNextWord() {
1480 uint32_t dummy;
1481 if (!GcovBuffer.readInt(dummy))
1482 return sampleprof_error::truncated;
1483 return sampleprof_error::success;
1484 }
1485
readNumber()1486 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1487 if (sizeof(T) <= sizeof(uint32_t)) {
1488 uint32_t Val;
1489 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1490 return static_cast<T>(Val);
1491 } else if (sizeof(T) <= sizeof(uint64_t)) {
1492 uint64_t Val;
1493 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1494 return static_cast<T>(Val);
1495 }
1496
1497 std::error_code EC = sampleprof_error::malformed;
1498 reportError(0, EC.message());
1499 return EC;
1500 }
1501
readString()1502 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1503 StringRef Str;
1504 if (!GcovBuffer.readString(Str))
1505 return sampleprof_error::truncated;
1506 return Str;
1507 }
1508
readHeader()1509 std::error_code SampleProfileReaderGCC::readHeader() {
1510 // Read the magic identifier.
1511 if (!GcovBuffer.readGCDAFormat())
1512 return sampleprof_error::unrecognized_format;
1513
1514 // Read the version number. Note - the GCC reader does not validate this
1515 // version, but the profile creator generates v704.
1516 GCOV::GCOVVersion version;
1517 if (!GcovBuffer.readGCOVVersion(version))
1518 return sampleprof_error::unrecognized_format;
1519
1520 if (version != GCOV::V407)
1521 return sampleprof_error::unsupported_version;
1522
1523 // Skip the empty integer.
1524 if (std::error_code EC = skipNextWord())
1525 return EC;
1526
1527 return sampleprof_error::success;
1528 }
1529
readSectionTag(uint32_t Expected)1530 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1531 uint32_t Tag;
1532 if (!GcovBuffer.readInt(Tag))
1533 return sampleprof_error::truncated;
1534
1535 if (Tag != Expected)
1536 return sampleprof_error::malformed;
1537
1538 if (std::error_code EC = skipNextWord())
1539 return EC;
1540
1541 return sampleprof_error::success;
1542 }
1543
readNameTable()1544 std::error_code SampleProfileReaderGCC::readNameTable() {
1545 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1546 return EC;
1547
1548 uint32_t Size;
1549 if (!GcovBuffer.readInt(Size))
1550 return sampleprof_error::truncated;
1551
1552 for (uint32_t I = 0; I < Size; ++I) {
1553 StringRef Str;
1554 if (!GcovBuffer.readString(Str))
1555 return sampleprof_error::truncated;
1556 Names.push_back(std::string(Str));
1557 }
1558
1559 return sampleprof_error::success;
1560 }
1561
readFunctionProfiles()1562 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1563 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1564 return EC;
1565
1566 uint32_t NumFunctions;
1567 if (!GcovBuffer.readInt(NumFunctions))
1568 return sampleprof_error::truncated;
1569
1570 InlineCallStack Stack;
1571 for (uint32_t I = 0; I < NumFunctions; ++I)
1572 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1573 return EC;
1574
1575 computeSummary();
1576 return sampleprof_error::success;
1577 }
1578
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1579 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1580 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1581 uint64_t HeadCount = 0;
1582 if (InlineStack.size() == 0)
1583 if (!GcovBuffer.readInt64(HeadCount))
1584 return sampleprof_error::truncated;
1585
1586 uint32_t NameIdx;
1587 if (!GcovBuffer.readInt(NameIdx))
1588 return sampleprof_error::truncated;
1589
1590 StringRef Name(Names[NameIdx]);
1591
1592 uint32_t NumPosCounts;
1593 if (!GcovBuffer.readInt(NumPosCounts))
1594 return sampleprof_error::truncated;
1595
1596 uint32_t NumCallsites;
1597 if (!GcovBuffer.readInt(NumCallsites))
1598 return sampleprof_error::truncated;
1599
1600 FunctionSamples *FProfile = nullptr;
1601 if (InlineStack.size() == 0) {
1602 // If this is a top function that we have already processed, do not
1603 // update its profile again. This happens in the presence of
1604 // function aliases. Since these aliases share the same function
1605 // body, there will be identical replicated profiles for the
1606 // original function. In this case, we simply not bother updating
1607 // the profile of the original function.
1608 FProfile = &Profiles[Name];
1609 FProfile->addHeadSamples(HeadCount);
1610 if (FProfile->getTotalSamples() > 0)
1611 Update = false;
1612 } else {
1613 // Otherwise, we are reading an inlined instance. The top of the
1614 // inline stack contains the profile of the caller. Insert this
1615 // callee in the caller's CallsiteMap.
1616 FunctionSamples *CallerProfile = InlineStack.front();
1617 uint32_t LineOffset = Offset >> 16;
1618 uint32_t Discriminator = Offset & 0xffff;
1619 FProfile = &CallerProfile->functionSamplesAt(
1620 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1621 }
1622 FProfile->setName(Name);
1623
1624 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1625 uint32_t Offset;
1626 if (!GcovBuffer.readInt(Offset))
1627 return sampleprof_error::truncated;
1628
1629 uint32_t NumTargets;
1630 if (!GcovBuffer.readInt(NumTargets))
1631 return sampleprof_error::truncated;
1632
1633 uint64_t Count;
1634 if (!GcovBuffer.readInt64(Count))
1635 return sampleprof_error::truncated;
1636
1637 // The line location is encoded in the offset as:
1638 // high 16 bits: line offset to the start of the function.
1639 // low 16 bits: discriminator.
1640 uint32_t LineOffset = Offset >> 16;
1641 uint32_t Discriminator = Offset & 0xffff;
1642
1643 InlineCallStack NewStack;
1644 NewStack.push_back(FProfile);
1645 llvm::append_range(NewStack, InlineStack);
1646 if (Update) {
1647 // Walk up the inline stack, adding the samples on this line to
1648 // the total sample count of the callers in the chain.
1649 for (auto CallerProfile : NewStack)
1650 CallerProfile->addTotalSamples(Count);
1651
1652 // Update the body samples for the current profile.
1653 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1654 }
1655
1656 // Process the list of functions called at an indirect call site.
1657 // These are all the targets that a function pointer (or virtual
1658 // function) resolved at runtime.
1659 for (uint32_t J = 0; J < NumTargets; J++) {
1660 uint32_t HistVal;
1661 if (!GcovBuffer.readInt(HistVal))
1662 return sampleprof_error::truncated;
1663
1664 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1665 return sampleprof_error::malformed;
1666
1667 uint64_t TargetIdx;
1668 if (!GcovBuffer.readInt64(TargetIdx))
1669 return sampleprof_error::truncated;
1670 StringRef TargetName(Names[TargetIdx]);
1671
1672 uint64_t TargetCount;
1673 if (!GcovBuffer.readInt64(TargetCount))
1674 return sampleprof_error::truncated;
1675
1676 if (Update)
1677 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1678 TargetName, TargetCount);
1679 }
1680 }
1681
1682 // Process all the inlined callers into the current function. These
1683 // are all the callsites that were inlined into this function.
1684 for (uint32_t I = 0; I < NumCallsites; I++) {
1685 // The offset is encoded as:
1686 // high 16 bits: line offset to the start of the function.
1687 // low 16 bits: discriminator.
1688 uint32_t Offset;
1689 if (!GcovBuffer.readInt(Offset))
1690 return sampleprof_error::truncated;
1691 InlineCallStack NewStack;
1692 NewStack.push_back(FProfile);
1693 llvm::append_range(NewStack, InlineStack);
1694 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1695 return EC;
1696 }
1697
1698 return sampleprof_error::success;
1699 }
1700
1701 /// Read a GCC AutoFDO profile.
1702 ///
1703 /// This format is generated by the Linux Perf conversion tool at
1704 /// https://github.com/google/autofdo.
readImpl()1705 std::error_code SampleProfileReaderGCC::readImpl() {
1706 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1707 // Read the string table.
1708 if (std::error_code EC = readNameTable())
1709 return EC;
1710
1711 // Read the source profile.
1712 if (std::error_code EC = readFunctionProfiles())
1713 return EC;
1714
1715 return sampleprof_error::success;
1716 }
1717
hasFormat(const MemoryBuffer & Buffer)1718 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1719 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1720 return Magic == "adcg*704";
1721 }
1722
applyRemapping(LLVMContext & Ctx)1723 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1724 // If the reader uses MD5 to represent string, we can't remap it because
1725 // we don't know what the original function names were.
1726 if (Reader.useMD5()) {
1727 Ctx.diagnose(DiagnosticInfoSampleProfile(
1728 Reader.getBuffer()->getBufferIdentifier(),
1729 "Profile data remapping cannot be applied to profile data "
1730 "in compact format (original mangled names are not available).",
1731 DS_Warning));
1732 return;
1733 }
1734
1735 // CSSPGO-TODO: Remapper is not yet supported.
1736 // We will need to remap the entire context string.
1737 assert(Remappings && "should be initialized while creating remapper");
1738 for (auto &Sample : Reader.getProfiles()) {
1739 DenseSet<StringRef> NamesInSample;
1740 Sample.second.findAllNames(NamesInSample);
1741 for (auto &Name : NamesInSample)
1742 if (auto Key = Remappings->insert(Name))
1743 NameMap.insert({Key, Name});
1744 }
1745
1746 RemappingApplied = true;
1747 }
1748
1749 Optional<StringRef>
lookUpNameInProfile(StringRef Fname)1750 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1751 if (auto Key = Remappings->lookup(Fname))
1752 return NameMap.lookup(Key);
1753 return None;
1754 }
1755
1756 /// Prepare a memory buffer for the contents of \p Filename.
1757 ///
1758 /// \returns an error code indicating the status of the buffer.
1759 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)1760 setupMemoryBuffer(const Twine &Filename) {
1761 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1762 if (std::error_code EC = BufferOrErr.getError())
1763 return EC;
1764 auto Buffer = std::move(BufferOrErr.get());
1765
1766 // Check the file.
1767 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1768 return sampleprof_error::too_large;
1769
1770 return std::move(Buffer);
1771 }
1772
1773 /// Create a sample profile reader based on the format of the input file.
1774 ///
1775 /// \param Filename The file to open.
1776 ///
1777 /// \param C The LLVM context to use to emit diagnostics.
1778 ///
1779 /// \param P The FSDiscriminatorPass.
1780 ///
1781 /// \param RemapFilename The file used for profile remapping.
1782 ///
1783 /// \returns an error code indicating the status of the created reader.
1784 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,FSDiscriminatorPass P,const std::string RemapFilename)1785 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1786 FSDiscriminatorPass P,
1787 const std::string RemapFilename) {
1788 auto BufferOrError = setupMemoryBuffer(Filename);
1789 if (std::error_code EC = BufferOrError.getError())
1790 return EC;
1791 return create(BufferOrError.get(), C, P, RemapFilename);
1792 }
1793
1794 /// Create a sample profile remapper from the given input, to remap the
1795 /// function names in the given profile data.
1796 ///
1797 /// \param Filename The file to open.
1798 ///
1799 /// \param Reader The profile reader the remapper is going to be applied to.
1800 ///
1801 /// \param C The LLVM context to use to emit diagnostics.
1802 ///
1803 /// \returns an error code indicating the status of the created reader.
1804 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,SampleProfileReader & Reader,LLVMContext & C)1805 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1806 SampleProfileReader &Reader,
1807 LLVMContext &C) {
1808 auto BufferOrError = setupMemoryBuffer(Filename);
1809 if (std::error_code EC = BufferOrError.getError())
1810 return EC;
1811 return create(BufferOrError.get(), Reader, C);
1812 }
1813
1814 /// Create a sample profile remapper from the given input, to remap the
1815 /// function names in the given profile data.
1816 ///
1817 /// \param B The memory buffer to create the reader from (assumes ownership).
1818 ///
1819 /// \param C The LLVM context to use to emit diagnostics.
1820 ///
1821 /// \param Reader The profile reader the remapper is going to be applied to.
1822 ///
1823 /// \returns an error code indicating the status of the created reader.
1824 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1825 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1826 SampleProfileReader &Reader,
1827 LLVMContext &C) {
1828 auto Remappings = std::make_unique<SymbolRemappingReader>();
1829 if (Error E = Remappings->read(*B)) {
1830 handleAllErrors(
1831 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1832 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1833 ParseError.getLineNum(),
1834 ParseError.getMessage()));
1835 });
1836 return sampleprof_error::malformed;
1837 }
1838
1839 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1840 std::move(B), std::move(Remappings), Reader);
1841 }
1842
1843 /// Create a sample profile reader based on the format of the input data.
1844 ///
1845 /// \param B The memory buffer to create the reader from (assumes ownership).
1846 ///
1847 /// \param C The LLVM context to use to emit diagnostics.
1848 ///
1849 /// \param P The FSDiscriminatorPass.
1850 ///
1851 /// \param RemapFilename The file used for profile remapping.
1852 ///
1853 /// \returns an error code indicating the status of the created reader.
1854 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,FSDiscriminatorPass P,const std::string RemapFilename)1855 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1856 FSDiscriminatorPass P,
1857 const std::string RemapFilename) {
1858 std::unique_ptr<SampleProfileReader> Reader;
1859 if (SampleProfileReaderRawBinary::hasFormat(*B))
1860 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1861 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1862 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1863 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1864 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1865 else if (SampleProfileReaderGCC::hasFormat(*B))
1866 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1867 else if (SampleProfileReaderText::hasFormat(*B))
1868 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1869 else
1870 return sampleprof_error::unrecognized_format;
1871
1872 if (!RemapFilename.empty()) {
1873 auto ReaderOrErr =
1874 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1875 if (std::error_code EC = ReaderOrErr.getError()) {
1876 std::string Msg = "Could not create remapper: " + EC.message();
1877 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1878 return EC;
1879 }
1880 Reader->Remapper = std::move(ReaderOrErr.get());
1881 }
1882
1883 if (std::error_code EC = Reader->readHeader()) {
1884 return EC;
1885 }
1886
1887 Reader->setDiscriminatorMaskedBitFrom(P);
1888
1889 return std::move(Reader);
1890 }
1891
1892 // For text and GCC file formats, we compute the summary after reading the
1893 // profile. Binary format has the profile summary in its header.
computeSummary()1894 void SampleProfileReader::computeSummary() {
1895 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1896 Summary = Builder.computeSummaryForProfiles(Profiles);
1897 }
1898