1 //===-- DataReader.cpp - Perf data reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <map>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 Optional<StringRef> getLTOCommonName(const StringRef Name) {
44   size_t LTOSuffixPos = Name.find(".lto_priv.");
45   if (LTOSuffixPos != StringRef::npos) {
46     return Name.substr(0, LTOSuffixPos + 10);
47   } else if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos) {
48     return Name.substr(0, LTOSuffixPos + 11);
49   } else {
50     return NoneType();
51   }
52 }
53 
54 namespace {
55 
56 /// Return true if the function name can change across compilations.
57 bool hasVolatileName(const BinaryFunction &BF) {
58   for (const StringRef Name : BF.getNames()) {
59     if (getLTOCommonName(Name))
60       return true;
61   }
62   return false;
63 }
64 
65 /// Return standard escaped name of the function possibly renamed by BOLT.
66 std::string normalizeName(StringRef NameRef) {
67   // Strip "PG." prefix used for globalized locals.
68   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
69   return getEscapedName(NameRef);
70 }
71 
72 } // anonymous namespace
73 
74 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
75   if (Loc.IsSymbol) {
76     OS << Loc.Name;
77     if (Loc.Offset)
78       OS << "+" << Twine::utohexstr(Loc.Offset);
79   } else {
80     OS << Twine::utohexstr(Loc.Offset);
81   }
82   return OS;
83 }
84 
85 iterator_range<FuncBranchData::ContainerTy::const_iterator>
86 FuncBranchData::getBranchRange(uint64_t From) const {
87   assert(std::is_sorted(Data.begin(), Data.end()));
88   struct Compare {
89     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
90       return BI.From.Offset < Val;
91     }
92     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
93       return Val < BI.From.Offset;
94     }
95   };
96   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
97   return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
98 }
99 
100 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
101   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
102   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
103     if (I->From.Name == FBD.Name) {
104       I->From.Name = this->Name;
105       I->From.Offset += Offset;
106     }
107     if (I->To.Name == FBD.Name) {
108       I->To.Name = this->Name;
109       I->To.Offset += Offset;
110     }
111   }
112   std::stable_sort(Data.begin(), Data.end());
113   ExecutionCount += FBD.ExecutionCount;
114   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
115     assert(I->To.Name == FBD.Name);
116     auto NewElmt = EntryData.insert(EntryData.end(), *I);
117     NewElmt->To.Name = this->Name;
118     NewElmt->To.Offset += Offset;
119   }
120 }
121 
122 uint64_t FuncBranchData::getNumExecutedBranches() const {
123   uint64_t ExecutedBranches = 0;
124   for (const BranchInfo &BI : Data) {
125     int64_t BranchCount = BI.Branches;
126     assert(BranchCount >= 0 && "branch execution count should not be negative");
127     ExecutedBranches += BranchCount;
128   }
129   return ExecutedBranches;
130 }
131 
132 void SampleInfo::mergeWith(const SampleInfo &SI) {
133   Hits += SI.Hits;
134 }
135 
136 void SampleInfo::print(raw_ostream &OS) const {
137   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
138      << " " << Hits << "\n";
139 }
140 
141 uint64_t
142 FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
143   assert(std::is_sorted(Data.begin(), Data.end()));
144   struct Compare {
145     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
146       return SI.Loc.Offset < Val;
147     }
148     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
149       return Val < SI.Loc.Offset;
150     }
151   };
152   uint64_t Result = 0;
153   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
154             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
155        I != E; ++I) {
156     Result += I->Hits;
157   }
158   return Result;
159 }
160 
161 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
162   auto Iter = Index.find(Offset);
163   if (Iter == Index.end()) {
164     Data.emplace_back(Location(true, Name, Offset), Count);
165     Index[Offset] = Data.size() - 1;
166     return;
167   }
168   SampleInfo &SI = Data[Iter->second];
169   SI.Hits += Count;
170 }
171 
172 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
173                                      uint64_t Count, uint64_t Mispreds) {
174   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
175   if (Iter == IntraIndex[OffsetFrom].end()) {
176     Data.emplace_back(Location(true, Name, OffsetFrom),
177                       Location(true, Name, OffsetTo), Mispreds, Count);
178     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
179     return;
180   }
181   BranchInfo &BI = Data[Iter->second];
182   BI.Branches += Count;
183   BI.Mispreds += Mispreds;
184 }
185 
186 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
187                                    uint64_t Count, uint64_t Mispreds) {
188   auto Iter = InterIndex[OffsetFrom].find(To);
189   if (Iter == InterIndex[OffsetFrom].end()) {
190     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
191     InterIndex[OffsetFrom][To] = Data.size() - 1;
192     return;
193   }
194   BranchInfo &BI = Data[Iter->second];
195   BI.Branches += Count;
196   BI.Mispreds += Mispreds;
197 }
198 
199 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
200                                     uint64_t Count, uint64_t Mispreds) {
201   auto Iter = EntryIndex[OffsetTo].find(From);
202   if (Iter == EntryIndex[OffsetTo].end()) {
203     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
204                            Count);
205     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
206     return;
207   }
208   BranchInfo &BI = EntryData[Iter->second];
209   BI.Branches += Count;
210   BI.Mispreds += Mispreds;
211 }
212 
213 void BranchInfo::mergeWith(const BranchInfo &BI) {
214   Branches += BI.Branches;
215   Mispreds += BI.Mispreds;
216 }
217 
218 void BranchInfo::print(raw_ostream &OS) const {
219   OS << From.IsSymbol << " " << From.Name << " "
220      << Twine::utohexstr(From.Offset) << " "
221      << To.IsSymbol << " " << To.Name << " "
222      << Twine::utohexstr(To.Offset) << " "
223      << Mispreds << " " << Branches << '\n';
224 }
225 
226 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
227                                                       uint64_t To) const {
228   for (const BranchInfo &I : Data) {
229     if (I.From.Offset == From && I.To.Offset == To &&
230         I.From.Name == I.To.Name)
231       return I;
232   }
233   return make_error_code(llvm::errc::invalid_argument);
234 }
235 
236 ErrorOr<const BranchInfo &>
237 FuncBranchData::getDirectCallBranch(uint64_t From) const {
238   // Commented out because it can be expensive.
239   // assert(std::is_sorted(Data.begin(), Data.end()));
240   struct Compare {
241     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
242       return BI.From.Offset < Val;
243     }
244     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
245       return Val < BI.From.Offset;
246     }
247   };
248   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
249   for (auto I = Range.first; I != Range.second; ++I) {
250     if (I->From.Name != I->To.Name)
251       return *I;
252   }
253   return make_error_code(llvm::errc::invalid_argument);
254 }
255 
256 void MemInfo::print(raw_ostream &OS) const {
257   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
258      << Twine::utohexstr(Offset.Offset) << " "
259      << (Addr.IsSymbol + 3) << " " << Addr.Name << " "
260      << Twine::utohexstr(Addr.Offset) << " "
261      << Count << "\n";
262 }
263 
264 void MemInfo::prettyPrint(raw_ostream &OS) const {
265   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
266 }
267 
268 void FuncMemData::update(const Location &Offset, const Location &Addr) {
269   auto Iter = EventIndex[Offset.Offset].find(Addr);
270   if (Iter == EventIndex[Offset.Offset].end()) {
271     Data.emplace_back(MemInfo(Offset, Addr, 1));
272     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
273     return;
274   }
275   ++Data[Iter->second].Count;
276 }
277 
278 Error DataReader::preprocessProfile(BinaryContext &BC) {
279   if (std::error_code EC = parseInput()) {
280     return errorCodeToError(EC);
281   }
282 
283   if (opts::DumpData) {
284     dump();
285   }
286 
287   if (collectedInBoltedBinary()) {
288     outs() << "BOLT-INFO: profile collection done on a binary already "
289               "processed by BOLT\n";
290   }
291 
292   for (auto &BFI : BC.getBinaryFunctions()) {
293     BinaryFunction &Function = BFI.second;
294     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
295       setMemData(Function, MemData);
296       MemData->Used = true;
297     }
298     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
299       setBranchData(Function, FuncData);
300       Function.ExecutionCount = FuncData->ExecutionCount;
301       FuncData->Used = true;
302     }
303   }
304 
305   for (auto &BFI : BC.getBinaryFunctions()) {
306     BinaryFunction &Function = BFI.second;
307     matchProfileMemData(Function);
308   }
309 
310   return Error::success();
311 }
312 
313 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
314   for (auto &BFI : BC.getBinaryFunctions()) {
315     BinaryFunction &Function = BFI.second;
316     FuncMemData *MemoryData = getMemData(Function);
317     if (!MemoryData)
318       continue;
319 
320     for (MemInfo &MI : MemoryData->Data) {
321       const uint64_t Offset = MI.Offset.Offset;
322       auto II = Function.Instructions.find(Offset);
323       if (II == Function.Instructions.end()) {
324         // Ignore bad instruction address.
325         continue;
326       }
327 
328       auto &MemAccessProfile =
329         BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
330             II->second, "MemoryAccessProfile");
331       BinaryData *BD = nullptr;
332       if (MI.Addr.IsSymbol) {
333         BD = BC.getBinaryDataByName(MI.Addr.Name);
334       }
335       MemAccessProfile.AddressAccessInfo.
336         push_back({BD, MI.Addr.Offset, MI.Count});
337       auto NextII = std::next(II);
338       if (NextII == Function.Instructions.end()) {
339         MemAccessProfile.NextInstrOffset = Function.getSize();
340       } else {
341         MemAccessProfile.NextInstrOffset = II->first;
342       }
343     }
344     Function.HasMemoryProfile = true;
345   }
346 
347   return Error::success();
348 }
349 
350 Error DataReader::readProfile(BinaryContext &BC) {
351   for (auto &BFI : BC.getBinaryFunctions()) {
352     BinaryFunction &Function = BFI.second;
353     readProfile(Function);
354   }
355 
356   uint64_t NumUnused = 0;
357   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
358     if (!FuncData.getValue().Used)
359       ++NumUnused;
360   BC.setNumUnusedProfiledObjects(NumUnused);
361 
362   return Error::success();
363 }
364 
365 std::error_code DataReader::parseInput() {
366   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
367       MemoryBuffer::getFileOrSTDIN(Filename);
368   if (std::error_code EC = MB.getError()) {
369     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
370     return EC;
371   }
372   FileBuf = std::move(MB.get());
373   ParsingBuf = FileBuf->getBuffer();
374   if (std::error_code EC = parse()) {
375     return EC;
376   }
377   if (!ParsingBuf.empty()) {
378     Diag << "WARNING: invalid profile data detected at line " << Line
379          << ". Possibly corrupted profile.\n";
380   }
381 
382   buildLTONameMaps();
383 
384   return std::error_code();
385 }
386 
387 void DataReader::readProfile(BinaryFunction &BF) {
388   if (BF.empty())
389     return;
390 
391   if (!hasLBR()) {
392     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
393     readSampleData(BF);
394     return;
395   }
396 
397   BF.ProfileFlags = BinaryFunction::PF_LBR;
398 
399   // Possibly assign/re-assign branch profile data.
400   matchProfileData(BF);
401 
402   FuncBranchData *FBD = getBranchData(BF);
403   if (!FBD)
404     return;
405 
406   // Assign basic block counts to function entry points. These only include
407   // counts for outside entries.
408   //
409   // There is a slight skew introduced here as branches originated from RETs
410   // may be accounted for in the execution count of an entry block if the last
411   // instruction in a predecessor fall-through block is a call. This situation
412   // should rarely happen because there are few multiple-entry functions.
413   for (const BranchInfo &BI : FBD->EntryData) {
414     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
415     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
416       uint64_t Count = BB->getExecutionCount();
417       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
418         Count = 0;
419       BB->setExecutionCount(Count + BI.Branches);
420     }
421   }
422 
423   uint64_t MismatchedBranches = 0;
424   for (const BranchInfo &BI : FBD->Data) {
425     if (BI.From.Name != BI.To.Name) {
426       continue;
427     }
428 
429     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset,
430                       BI.Branches, BI.Mispreds)) {
431       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
432                         << BI.To.Offset << '\n');
433       ++MismatchedBranches;
434     }
435   }
436 
437   // Convert branch data into annotations.
438   convertBranchData(BF);
439 }
440 
441 void DataReader::matchProfileData(BinaryFunction &BF) {
442   // This functionality is available for LBR-mode only
443   // TODO: Implement evaluateProfileData() for samples, checking whether
444   // sample addresses match instruction addresses in the function
445   if (!hasLBR())
446     return;
447 
448   FuncBranchData *FBD = getBranchData(BF);
449   if (FBD) {
450     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
451     BF.RawBranchCount = FBD->getNumExecutedBranches();
452     if (BF.ProfileMatchRatio == 1.0f) {
453       if (fetchProfileForOtherEntryPoints(BF)) {
454         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
455         BF.ExecutionCount = FBD->ExecutionCount;
456         BF.RawBranchCount = FBD->getNumExecutedBranches();
457       }
458       return;
459     }
460   }
461 
462   // Check if the function name can fluctuate between several compilations
463   // possibly triggered by minor unrelated code changes in the source code
464   // of the input binary.
465   if (!hasVolatileName(BF))
466     return;
467 
468   // Check for a profile that matches with 100% confidence.
469   const std::vector<FuncBranchData *> AllBranchData =
470       getBranchDataForNamesRegex(BF.getNames());
471   for (FuncBranchData *NewBranchData : AllBranchData) {
472     // Prevent functions from sharing the same profile.
473     if (NewBranchData->Used)
474       continue;
475 
476     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
477       continue;
478 
479     if (FBD)
480       FBD->Used = false;
481 
482     // Update function profile data with the new set.
483     setBranchData(BF, NewBranchData);
484     NewBranchData->Used = true;
485     BF.ExecutionCount = NewBranchData->ExecutionCount;
486     BF.ProfileMatchRatio = 1.0f;
487     break;
488   }
489 }
490 
491 void DataReader::matchProfileMemData(BinaryFunction &BF) {
492   const std::vector<FuncMemData *> AllMemData =
493       getMemDataForNamesRegex(BF.getNames());
494   for (FuncMemData *NewMemData : AllMemData) {
495     // Prevent functions from sharing the same profile.
496     if (NewMemData->Used)
497       continue;
498 
499     if (FuncMemData *MD = getMemData(BF))
500       MD->Used = false;
501 
502     // Update function profile data with the new set.
503     setMemData(BF, NewMemData);
504     NewMemData->Used = true;
505     break;
506   }
507 }
508 
509 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
510   BinaryContext &BC = BF.getBinaryContext();
511 
512   FuncBranchData *FBD = getBranchData(BF);
513   if (!FBD)
514     return false;
515 
516   // Check if we are missing profiling data for secondary entry points
517   bool First = true;
518   bool Updated = false;
519   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
520     if (First) {
521       First = false;
522       continue;
523     }
524     if (BB->isEntryPoint()) {
525       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
526       // Look for branch data associated with this entry point
527       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
528         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
529           FBD->appendFrom(*Data, BB->getOffset());
530           Data->Used = true;
531           Updated = true;
532         }
533       }
534     }
535   }
536 
537   return Updated;
538 }
539 
540 float DataReader::evaluateProfileData(BinaryFunction &BF,
541                                       const FuncBranchData &BranchData) const {
542   BinaryContext &BC = BF.getBinaryContext();
543 
544   // Until we define a minimal profile, we consider an empty branch data to be
545   // a valid profile. It could happen to a function without branches when we
546   // still have an EntryData for the execution count.
547   if (BranchData.Data.empty()) {
548     return 1.0f;
549   }
550 
551   uint64_t NumMatchedBranches = 0;
552   for (const BranchInfo &BI : BranchData.Data) {
553     bool IsValid = false;
554     if (BI.From.Name == BI.To.Name) {
555       // Try to record information with 0 count.
556       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
557     } else if (collectedInBoltedBinary()) {
558       // We can't check branch source for collections in bolted binaries because
559       // the source of the branch may be mapped to the first instruction in a BB
560       // instead of the original branch (which may not exist in the source bin).
561       IsValid = true;
562     } else {
563       // The branch has to originate from this function.
564       // Check for calls, tail calls, rets and indirect branches.
565       // When matching profiling info, we did not reach the stage
566       // when we identify tail calls, so they are still represented
567       // by regular branch instructions and we need isBranch() here.
568       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
569       // If it's a prefix - skip it.
570       if (Instr && BC.MIB->isPrefix(*Instr))
571         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
572       if (Instr &&
573           (BC.MIB->isCall(*Instr) ||
574            BC.MIB->isBranch(*Instr) ||
575            BC.MIB->isReturn(*Instr))) {
576         IsValid = true;
577       }
578     }
579 
580     if (IsValid) {
581       ++NumMatchedBranches;
582       continue;
583     }
584 
585     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
586                       << Twine::utohexstr(BI.From.Offset) << " -> ";
587                if (BI.From.Name == BI.To.Name) dbgs()
588                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
589                else dbgs() << "<outbounds>\n";);
590   }
591 
592   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
593   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
594     errs() << "BOLT-WARNING: profile branches match only "
595            << format("%.1f%%", MatchRatio * 100.0f) << " ("
596            << NumMatchedBranches << '/' << BranchData.Data.size()
597            << ") for function " << BF << '\n';
598   }
599 
600   return MatchRatio;
601 }
602 
603 void DataReader::readSampleData(BinaryFunction &BF) {
604   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
605   if (!SampleDataOrErr)
606     return;
607 
608   // Basic samples mode territory (without LBR info)
609   // First step is to assign BB execution count based on samples from perf
610   BF.ProfileMatchRatio = 1.0f;
611   BF.removeTagsFromProfile();
612   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
613   bool NormalizeByCalls = usesEvent("branches");
614   static bool NagUser = true;
615   if (NagUser) {
616     outs()
617         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
618     if (NormalizeByInsnCount) {
619       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
620     } else if (NormalizeByCalls) {
621       outs() << "BOLT-INFO: normalizing samples by branches.\n";
622     }
623     NagUser = false;
624   }
625   uint64_t LastOffset = BF.getSize();
626   uint64_t TotalEntryCount = 0;
627   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
628        I != E; ++I) {
629     uint64_t CurOffset = I->first;
630     // Always work with samples multiplied by 1000 to avoid losing them if we
631     // later need to normalize numbers
632     uint64_t NumSamples =
633         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
634     if (NormalizeByInsnCount && I->second->getNumNonPseudos())
635       NumSamples /= I->second->getNumNonPseudos();
636     else if (NormalizeByCalls) {
637       uint32_t NumCalls = I->second->getNumCalls();
638       NumSamples /= NumCalls + 1;
639     }
640     I->second->setExecutionCount(NumSamples);
641     if (I->second->isEntryPoint())
642       TotalEntryCount += NumSamples;
643     LastOffset = CurOffset;
644   }
645 
646   BF.ExecutionCount = TotalEntryCount;
647 
648   estimateEdgeCounts(BF);
649 }
650 
651 void DataReader::convertBranchData(BinaryFunction &BF) const {
652   BinaryContext &BC = BF.getBinaryContext();
653 
654   if (BF.empty())
655     return;
656 
657   FuncBranchData *FBD = getBranchData(BF);
658   if (!FBD)
659     return;
660 
661   // Profile information for calls.
662   //
663   // There are 3 cases that we annotate differently:
664   //   1) Conditional tail calls that could be mispredicted.
665   //   2) Indirect calls to multiple destinations with mispredictions.
666   //      Before we validate CFG we have to handle indirect branches here too.
667   //   3) Regular direct calls. The count could be different from containing
668   //      basic block count. Keep this data in case we find it useful.
669   //
670   for (BranchInfo &BI : FBD->Data) {
671     // Ignore internal branches.
672     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
673       continue;
674 
675     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
676     if (!Instr ||
677         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
678       continue;
679 
680     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
681       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) {
682         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
683                << Twine::utohexstr(BI.From.Offset)
684                << " in function " << BF << '\n';
685       }
686       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
687       Value += Count;
688     };
689 
690     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
691       IndirectCallSiteProfile &CSP =
692         BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
693             *Instr, "CallProfile");
694       MCSymbol *CalleeSymbol = nullptr;
695       if (BI.To.IsSymbol) {
696         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) {
697           CalleeSymbol = BD->getSymbol();
698         }
699       }
700       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
701     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
702       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
703       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
704     } else {
705       setOrUpdateAnnotation("Count", BI.Branches);
706     }
707   }
708 }
709 
710 bool DataReader::recordBranch(BinaryFunction &BF,
711                               uint64_t From, uint64_t To,
712                               uint64_t Count, uint64_t Mispreds) const {
713   BinaryContext &BC = BF.getBinaryContext();
714 
715   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
716   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
717 
718   if (!FromBB || !ToBB) {
719     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
720     return false;
721   }
722 
723   // Could be bad LBR data; ignore the branch. In the case of data collected
724   // in binaries optimized by BOLT, a source BB may be mapped to two output
725   // BBs as a result of optimizations. In that case, a branch between these
726   // two will be recorded as a branch from A going to A in the source address
727   // space. Keep processing.
728   if (From == To) {
729     return true;
730   }
731 
732   if (FromBB->succ_size() == 0) {
733     // Return from a tail call.
734     return true;
735   }
736 
737   // Very rarely we will see ignored branches. Do a linear check.
738   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
739     if (Branch == std::make_pair(static_cast<uint32_t>(From),
740                                  static_cast<uint32_t>(To)))
741       return true;
742   }
743 
744   if (To != ToBB->getOffset()) {
745     // "To" could be referring to nop instructions in between 2 basic blocks.
746     // While building the CFG we make sure these nops are attributed to the
747     // previous basic block, thus we check if the destination belongs to the
748     // gap past the last instruction.
749     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
750     if (LastInstr) {
751       const uint32_t LastInstrOffset =
752           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Offset");
753 
754       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
755       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) {
756         return true;
757       }
758 
759       if (To <= LastInstrOffset) {
760         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
761                           << " in " << BF << " : " << From << " -> " << To
762                           << '\n');
763         return false;
764       }
765     }
766 
767     // The real destination is the layout successor of the detected ToBB.
768     if (ToBB == BF.BasicBlocksLayout.back())
769       return false;
770     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
771     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
772     ToBB = NextBB;
773   }
774 
775   // If there's no corresponding instruction for 'From', we have probably
776   // discarded it as a FT from __builtin_unreachable.
777   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
778   if (!FromInstruction) {
779     // If the data was collected in a bolted binary, the From addresses may be
780     // translated to the first instruction of the source BB if BOLT inserted
781     // a new branch that did not exist in the source (we can't map it to the
782     // source instruction, so we map it to the first instr of source BB).
783     // We do not keep offsets for random instructions. So the check above will
784     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
785     if (collectedInBoltedBinary()) {
786       if (FromBB->getInputOffset() != From) {
787         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
788                           << BF << '\n');
789         return false;
790       }
791       FromInstruction = nullptr;
792     } else {
793       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
794                         << '\n');
795       return false;
796     }
797   }
798 
799   if (!FromBB->getSuccessor(ToBB->getLabel())) {
800     // Check if this is a recursive call or a return from a recursive call.
801     if (FromInstruction && ToBB->isEntryPoint() &&
802         (BC.MIB->isCall(*FromInstruction) ||
803          BC.MIB->isIndirectBranch(*FromInstruction))) {
804       // Execution count is already accounted for.
805       return true;
806     }
807     // For data collected in a bolted binary, we may have created two output BBs
808     // that map to one original block. Branches between these two blocks will
809     // appear here as one BB jumping to itself, even though it has no loop edges.
810     // Ignore these.
811     if (collectedInBoltedBinary() && FromBB == ToBB)
812       return true;
813 
814     LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
815                       << Twine::utohexstr(From) << " -> "
816                       << Twine::utohexstr(To) << '\n');
817     return false;
818   }
819 
820   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
821   BI.Count += Count;
822   // Only update mispredicted count if it the count was real.
823   if (Count) {
824     BI.MispredictedCount += Mispreds;
825   }
826 
827   return true;
828 }
829 
830 void DataReader::reportError(StringRef ErrorMsg) {
831   Diag << "Error reading BOLT data input file: line " << Line << ", column "
832        << Col << ": " << ErrorMsg << '\n';
833 }
834 
835 bool DataReader::expectAndConsumeFS() {
836   if (ParsingBuf[0] != FieldSeparator) {
837     reportError("expected field separator");
838     return false;
839   }
840   ParsingBuf = ParsingBuf.drop_front(1);
841   Col += 1;
842   return true;
843 }
844 
845 void DataReader::consumeAllRemainingFS() {
846   while (ParsingBuf[0] == FieldSeparator) {
847     ParsingBuf = ParsingBuf.drop_front(1);
848     Col += 1;
849   }
850 }
851 
852 bool DataReader::checkAndConsumeNewLine() {
853   if (ParsingBuf[0] != '\n')
854     return false;
855 
856   ParsingBuf = ParsingBuf.drop_front(1);
857   Col = 0;
858   Line += 1;
859   return true;
860 }
861 
862 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
863   if (EndChar == '\\') {
864     reportError("EndChar could not be backslash");
865     return make_error_code(llvm::errc::io_error);
866   }
867 
868   std::string EndChars(1, EndChar);
869   EndChars.push_back('\\');
870   if (EndNl)
871     EndChars.push_back('\n');
872 
873   size_t StringEnd = 0;
874   do {
875     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
876     if (StringEnd == StringRef::npos ||
877         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
878       reportError("malformed field");
879       return make_error_code(llvm::errc::io_error);
880     }
881 
882     if (ParsingBuf[StringEnd] != '\\')
883       break;
884 
885     StringEnd += 2;
886   } while (1);
887 
888   StringRef Str = ParsingBuf.substr(0, StringEnd);
889 
890   // If EndNl was set and nl was found instead of EndChar, do not consume the
891   // new line.
892   bool EndNlInsteadOfEndChar =
893     ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
894   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
895 
896   ParsingBuf = ParsingBuf.drop_front(End);
897   if (EndChar == '\n') {
898     Col = 0;
899     Line += 1;
900   } else {
901     Col += End;
902   }
903   return Str;
904 }
905 
906 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
907   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
908   if (std::error_code EC = NumStrRes.getError())
909     return EC;
910   StringRef NumStr = NumStrRes.get();
911   int64_t Num;
912   if (NumStr.getAsInteger(10, Num)) {
913     reportError("expected decimal number");
914     Diag << "Found: " << NumStr << "\n";
915     return make_error_code(llvm::errc::io_error);
916   }
917   return Num;
918 }
919 
920 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
921   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
922   if (std::error_code EC = NumStrRes.getError())
923     return EC;
924   StringRef NumStr = NumStrRes.get();
925   uint64_t Num;
926   if (NumStr.getAsInteger(16, Num)) {
927     reportError("expected hexidecimal number");
928     Diag << "Found: " << NumStr << "\n";
929     return make_error_code(llvm::errc::io_error);
930   }
931   return Num;
932 }
933 
934 ErrorOr<Location> DataReader::parseLocation(char EndChar,
935                                             bool EndNl,
936                                             bool ExpectMemLoc) {
937   // Read whether the location of the branch should be DSO or a symbol
938   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
939   // symbol.
940   // The symbol flag is also used to tag memory load events by adding 3 to the
941   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
942   if (!ExpectMemLoc &&
943       ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && ParsingBuf[0] != '2') {
944     reportError("expected 0, 1 or 2");
945     return make_error_code(llvm::errc::io_error);
946   }
947 
948   if (ExpectMemLoc &&
949       ParsingBuf[0] != '3' && ParsingBuf[0] != '4' && ParsingBuf[0] != '5') {
950     reportError("expected 3, 4 or 5");
951     return make_error_code(llvm::errc::io_error);
952   }
953 
954   bool IsSymbol =
955     (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
956     (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
957   ParsingBuf = ParsingBuf.drop_front(1);
958   Col += 1;
959 
960   if (!expectAndConsumeFS())
961     return make_error_code(llvm::errc::io_error);
962   consumeAllRemainingFS();
963 
964   // Read the string containing the symbol or the DSO name
965   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
966   if (std::error_code EC = NameRes.getError())
967     return EC;
968   StringRef Name = NameRes.get();
969   consumeAllRemainingFS();
970 
971   // Read the offset
972   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
973   if (std::error_code EC = Offset.getError())
974     return EC;
975 
976   return Location(IsSymbol, Name, Offset.get());
977 }
978 
979 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
980   ErrorOr<Location> Res = parseLocation(FieldSeparator);
981   if (std::error_code EC = Res.getError())
982     return EC;
983   Location From = Res.get();
984 
985   consumeAllRemainingFS();
986   Res = parseLocation(FieldSeparator);
987   if (std::error_code EC = Res.getError())
988     return EC;
989   Location To = Res.get();
990 
991   consumeAllRemainingFS();
992   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
993   if (std::error_code EC = MRes.getError())
994     return EC;
995   int64_t NumMispreds = MRes.get();
996 
997   consumeAllRemainingFS();
998   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
999   if (std::error_code EC = BRes.getError())
1000     return EC;
1001   int64_t NumBranches = BRes.get();
1002 
1003   consumeAllRemainingFS();
1004   if (!checkAndConsumeNewLine()) {
1005     reportError("expected end of line");
1006     return make_error_code(llvm::errc::io_error);
1007   }
1008 
1009   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
1010 }
1011 
1012 ErrorOr<MemInfo> DataReader::parseMemInfo() {
1013   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
1014   if (std::error_code EC = Res.getError())
1015     return EC;
1016   Location Offset = Res.get();
1017 
1018   consumeAllRemainingFS();
1019   Res = parseMemLocation(FieldSeparator);
1020   if (std::error_code EC = Res.getError())
1021     return EC;
1022   Location Addr = Res.get();
1023 
1024   consumeAllRemainingFS();
1025   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1026   if (std::error_code EC = CountRes.getError())
1027     return EC;
1028 
1029   consumeAllRemainingFS();
1030   if (!checkAndConsumeNewLine()) {
1031     reportError("expected end of line");
1032     return make_error_code(llvm::errc::io_error);
1033   }
1034 
1035   return MemInfo(Offset, Addr, CountRes.get());
1036 }
1037 
1038 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1039   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1040   if (std::error_code EC = Res.getError())
1041     return EC;
1042   Location Address = Res.get();
1043 
1044   consumeAllRemainingFS();
1045   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1046   if (std::error_code EC = BRes.getError())
1047     return EC;
1048   int64_t Occurrences = BRes.get();
1049 
1050   consumeAllRemainingFS();
1051   if (!checkAndConsumeNewLine()) {
1052     reportError("expected end of line");
1053     return make_error_code(llvm::errc::io_error);
1054   }
1055 
1056   return SampleInfo(std::move(Address), Occurrences);
1057 }
1058 
1059 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1060   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1061     return false;
1062   ParsingBuf = ParsingBuf.drop_front(6);
1063   Col += 6;
1064 
1065   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1066     ParsingBuf = ParsingBuf.drop_front(1);
1067 
1068   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1069     ErrorOr<StringRef> EventName = parseString(' ', true);
1070     if (!EventName)
1071       return make_error_code(llvm::errc::io_error);
1072     EventNames.insert(EventName.get());
1073   }
1074 
1075   if (!checkAndConsumeNewLine()) {
1076     reportError("malformed no_lbr line");
1077     return make_error_code(llvm::errc::io_error);
1078   }
1079   return true;
1080 }
1081 
1082 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1083   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1084     return false;
1085   ParsingBuf = ParsingBuf.drop_front(16);
1086   Col += 16;
1087 
1088   if (!checkAndConsumeNewLine()) {
1089     reportError("malformed boltedcollection line");
1090     return make_error_code(llvm::errc::io_error);
1091   }
1092   return true;
1093 }
1094 
1095 
1096 bool DataReader::hasBranchData() {
1097   if (ParsingBuf.size() == 0)
1098     return false;
1099 
1100   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1101     return true;
1102   return false;
1103 }
1104 
1105 bool DataReader::hasMemData() {
1106   if (ParsingBuf.size() == 0)
1107     return false;
1108 
1109   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1110     return true;
1111   return false;
1112 }
1113 
1114 std::error_code DataReader::parseInNoLBRMode() {
1115   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1116     auto I = NamesToSamples.find(Name);
1117     if (I == NamesToSamples.end()) {
1118       bool Success;
1119       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1120           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1121 
1122       assert(Success && "unexpected result of insert");
1123     }
1124     return I;
1125   };
1126 
1127   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1128     auto I = NamesToMemEvents.find(Name);
1129     if (I == NamesToMemEvents.end()) {
1130       bool Success;
1131       std::tie(I, Success) = NamesToMemEvents.insert(
1132           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1133       assert(Success && "unexpected result of insert");
1134     }
1135     return I;
1136   };
1137 
1138   while (hasBranchData()) {
1139     ErrorOr<SampleInfo> Res = parseSampleInfo();
1140     if (std::error_code EC = Res.getError())
1141       return EC;
1142 
1143     SampleInfo SI = Res.get();
1144 
1145     // Ignore samples not involving known locations
1146     if (!SI.Loc.IsSymbol)
1147       continue;
1148 
1149     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1150     I->getValue().Data.emplace_back(std::move(SI));
1151   }
1152 
1153   while (hasMemData()) {
1154     ErrorOr<MemInfo> Res = parseMemInfo();
1155     if (std::error_code EC = Res.getError())
1156       return EC;
1157 
1158     MemInfo MI = Res.get();
1159 
1160     // Ignore memory events not involving known pc.
1161     if (!MI.Offset.IsSymbol)
1162       continue;
1163 
1164     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1165     I->getValue().Data.emplace_back(std::move(MI));
1166   }
1167 
1168   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples) {
1169     std::stable_sort(FuncSamples.second.Data.begin(),
1170                      FuncSamples.second.Data.end());
1171   }
1172 
1173   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1174     std::stable_sort(MemEvents.second.Data.begin(),
1175                      MemEvents.second.Data.end());
1176   }
1177 
1178   return std::error_code();
1179 }
1180 
1181 std::error_code DataReader::parse() {
1182   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1183     auto I = NamesToBranches.find(Name);
1184     if (I == NamesToBranches.end()) {
1185       bool Success;
1186       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1187           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1188                                FuncBranchData::ContainerTy())));
1189       assert(Success && "unexpected result of insert");
1190     }
1191     return I;
1192   };
1193 
1194   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1195     auto I = NamesToMemEvents.find(Name);
1196     if (I == NamesToMemEvents.end()) {
1197       bool Success;
1198       std::tie(I, Success) = NamesToMemEvents.insert(
1199           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1200       assert(Success && "unexpected result of insert");
1201     }
1202     return I;
1203   };
1204 
1205   Col = 0;
1206   Line = 1;
1207   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1208   if (!FlagOrErr)
1209     return FlagOrErr.getError();
1210   NoLBRMode = *FlagOrErr;
1211 
1212   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1213   if (!BATFlagOrErr)
1214     return BATFlagOrErr.getError();
1215   BATMode = *BATFlagOrErr;
1216 
1217   if (!hasBranchData() && !hasMemData()) {
1218     Diag << "ERROR: no valid profile data found\n";
1219     return make_error_code(llvm::errc::io_error);
1220   }
1221 
1222   if (NoLBRMode)
1223     return parseInNoLBRMode();
1224 
1225   while (hasBranchData()) {
1226     ErrorOr<BranchInfo> Res = parseBranchInfo();
1227     if (std::error_code EC = Res.getError())
1228       return EC;
1229 
1230     BranchInfo BI = Res.get();
1231 
1232     // Ignore branches not involving known location.
1233     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1234       continue;
1235 
1236     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1237     I->getValue().Data.emplace_back(std::move(BI));
1238 
1239     // Add entry data for branches to another function or branches
1240     // to entry points (including recursive calls)
1241     if (BI.To.IsSymbol &&
1242         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1243       I = GetOrCreateFuncEntry(BI.To.Name);
1244       I->getValue().EntryData.emplace_back(std::move(BI));
1245     }
1246 
1247     // If destination is the function start - update execution count.
1248     // NB: the data is skewed since we cannot tell tail recursion from
1249     //     branches to the function start.
1250     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1251       I = GetOrCreateFuncEntry(BI.To.Name);
1252       I->getValue().ExecutionCount += BI.Branches;
1253     }
1254   }
1255 
1256   while (hasMemData()) {
1257     ErrorOr<MemInfo> Res = parseMemInfo();
1258     if (std::error_code EC = Res.getError())
1259       return EC;
1260 
1261     MemInfo MI = Res.get();
1262 
1263     // Ignore memory events not involving known pc.
1264     if (!MI.Offset.IsSymbol)
1265       continue;
1266 
1267     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1268     I->getValue().Data.emplace_back(std::move(MI));
1269   }
1270 
1271   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches) {
1272     std::stable_sort(FuncBranches.second.Data.begin(),
1273                      FuncBranches.second.Data.end());
1274   }
1275 
1276   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1277     std::stable_sort(MemEvents.second.Data.begin(),
1278                      MemEvents.second.Data.end());
1279   }
1280 
1281   return std::error_code();
1282 }
1283 
1284 void DataReader::buildLTONameMaps() {
1285   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1286     const StringRef FuncName = FuncData.getKey();
1287     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1288     if (CommonName)
1289       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1290   }
1291 
1292   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1293     const StringRef FuncName = FuncData.getKey();
1294     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1295     if (CommonName)
1296       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1297   }
1298 }
1299 
1300 namespace {
1301 template <typename MapTy>
1302 decltype(MapTy::MapEntryTy::second) *
1303 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1304   // Do a reverse order iteration since the name in profile has a higher chance
1305   // of matching a name at the end of the list.
1306   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1307     auto I = Map.find(normalizeName((*SI)->getName()));
1308     if (I != Map.end())
1309       return &I->getValue();
1310   }
1311   return nullptr;
1312 }
1313 
1314 template <typename MapTy>
1315 decltype(MapTy::MapEntryTy::second) *
1316 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1317   // Do a reverse order iteration since the name in profile has a higher chance
1318   // of matching a name at the end of the list.
1319   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1320     auto I = Map.find(normalizeName(*FI));
1321     if (I != Map.end())
1322       return &I->getValue();
1323   }
1324   return nullptr;
1325 }
1326 
1327 template <typename MapTy>
1328 std::vector<decltype(MapTy::MapEntryTy::second) *>
1329 fetchMapEntriesRegex(
1330   MapTy &Map,
1331   const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>> &LTOCommonNameMap,
1332   const std::vector<StringRef> &FuncNames) {
1333   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1334   // Do a reverse order iteration since the name in profile has a higher chance
1335   // of matching a name at the end of the list.
1336   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1337     std::string Name = normalizeName(*FI);
1338     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1339     if (LTOCommonName) {
1340       auto I = LTOCommonNameMap.find(*LTOCommonName);
1341       if (I != LTOCommonNameMap.end()) {
1342         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1343             I->getValue();
1344         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1345       }
1346     } else {
1347       auto I = Map.find(Name);
1348       if (I != Map.end()) {
1349         return {&I->getValue()};
1350       }
1351     }
1352   }
1353   return AllData;
1354 }
1355 
1356 }
1357 
1358 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1359   if (getBranchData(Function) || getMemData(Function))
1360     return true;
1361 
1362   if (getBranchDataForNames(Function.getNames()) ||
1363       getMemDataForNames(Function.getNames()))
1364     return true;
1365 
1366   if (!hasVolatileName(Function))
1367     return false;
1368 
1369   const std::vector<FuncBranchData *> AllBranchData =
1370       getBranchDataForNamesRegex(Function.getNames());
1371   if (!AllBranchData.empty())
1372     return true;
1373 
1374   const std::vector<FuncMemData *> AllMemData =
1375       getMemDataForNamesRegex(Function.getNames());
1376   if (!AllMemData.empty())
1377     return true;
1378 
1379   return false;
1380 }
1381 
1382 FuncBranchData *
1383 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1384   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1385 }
1386 
1387 FuncBranchData *
1388 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1389   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1390 }
1391 
1392 FuncMemData *
1393 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1394   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1395 }
1396 
1397 FuncSampleData *
1398 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1399   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1400 }
1401 
1402 std::vector<FuncBranchData *>
1403 DataReader::getBranchDataForNamesRegex(
1404     const std::vector<StringRef> &FuncNames) {
1405   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1406 }
1407 
1408 std::vector<FuncMemData *>
1409 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1410   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1411 }
1412 
1413 bool DataReader::hasLocalsWithFileName() const {
1414   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1415     const StringRef &FuncName = Func.getKey();
1416     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1417       return true;
1418   }
1419   return false;
1420 }
1421 
1422 void DataReader::dump() const {
1423   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1424     Diag << Func.getKey() << " branches:\n";
1425     for (const BranchInfo &BI : Func.getValue().Data) {
1426       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1427            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1428     }
1429     Diag << Func.getKey() << " entry points:\n";
1430     for (const BranchInfo &BI : Func.getValue().EntryData) {
1431       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1432            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1433     }
1434   }
1435 
1436   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1437     StringRef Event = I->getKey();
1438     Diag << "Data was collected with event: " << Event << "\n";
1439   }
1440   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1441     Diag << Func.getKey() << " samples:\n";
1442     for (const SampleInfo &SI : Func.getValue().Data) {
1443       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " "
1444            << SI.Hits << "\n";
1445     }
1446   }
1447 
1448   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1449     Diag << "Memory events for " << Func.getValue().Name;
1450     Location LastOffset(0);
1451     for (const MemInfo &MI : Func.getValue().Data) {
1452       if (MI.Offset == LastOffset) {
1453         Diag << ", " << MI.Addr << "/" << MI.Count;
1454       } else {
1455         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1456       }
1457       LastOffset = MI.Offset;
1458     }
1459     Diag << "\n";
1460   }
1461 }
1462 
1463 } // namespace bolt
1464 } // namespace llvm
1465