1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Errc.h"
21 #include <map>
22 
23 #undef  DEBUG_TYPE
24 #define DEBUG_TYPE "bolt-prof"
25 
26 using namespace llvm;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltCategory;
31 extern llvm::cl::opt<unsigned> Verbosity;
32 
33 static cl::opt<bool>
34 DumpData("dump-data",
35   cl::desc("dump parsed bolt data for debugging"),
36   cl::Hidden,
37   cl::cat(BoltCategory));
38 
39 } // namespace opts
40 
41 namespace llvm {
42 namespace bolt {
43 
44 Optional<StringRef> getLTOCommonName(const StringRef Name) {
45   size_t LTOSuffixPos = Name.find(".lto_priv.");
46   if (LTOSuffixPos != StringRef::npos)
47     return Name.substr(0, LTOSuffixPos + 10);
48   if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos)
49     return Name.substr(0, LTOSuffixPos + 11);
50   if ((LTOSuffixPos = Name.find(".llvm.")) != StringRef::npos)
51     return Name.substr(0, LTOSuffixPos + 6);
52   return NoneType();
53 }
54 
55 namespace {
56 
57 /// Return true if the function name can change across compilations.
58 bool hasVolatileName(const BinaryFunction &BF) {
59   for (const StringRef &Name : BF.getNames())
60     if (getLTOCommonName(Name))
61       return true;
62 
63   return false;
64 }
65 
66 /// Return standard escaped name of the function possibly renamed by BOLT.
67 std::string normalizeName(StringRef NameRef) {
68   // Strip "PG." prefix used for globalized locals.
69   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
70   return getEscapedName(NameRef);
71 }
72 
73 } // anonymous namespace
74 
75 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
76   if (Loc.IsSymbol) {
77     OS << Loc.Name;
78     if (Loc.Offset)
79       OS << "+" << Twine::utohexstr(Loc.Offset);
80   } else {
81     OS << Twine::utohexstr(Loc.Offset);
82   }
83   return OS;
84 }
85 
86 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
87   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
88   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
89     if (I->From.Name == FBD.Name) {
90       I->From.Name = this->Name;
91       I->From.Offset += Offset;
92     }
93     if (I->To.Name == FBD.Name) {
94       I->To.Name = this->Name;
95       I->To.Offset += Offset;
96     }
97   }
98   llvm::stable_sort(Data);
99   ExecutionCount += FBD.ExecutionCount;
100   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
101     assert(I->To.Name == FBD.Name);
102     auto NewElmt = EntryData.insert(EntryData.end(), *I);
103     NewElmt->To.Name = this->Name;
104     NewElmt->To.Offset += Offset;
105   }
106 }
107 
108 uint64_t FuncBranchData::getNumExecutedBranches() const {
109   uint64_t ExecutedBranches = 0;
110   for (const BranchInfo &BI : Data) {
111     int64_t BranchCount = BI.Branches;
112     assert(BranchCount >= 0 && "branch execution count should not be negative");
113     ExecutedBranches += BranchCount;
114   }
115   return ExecutedBranches;
116 }
117 
118 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
119 
120 void SampleInfo::print(raw_ostream &OS) const {
121   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
122      << " " << Hits << "\n";
123 }
124 
125 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
126   assert(llvm::is_sorted(Data));
127   struct Compare {
128     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
129       return SI.Loc.Offset < Val;
130     }
131     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
132       return Val < SI.Loc.Offset;
133     }
134   };
135   uint64_t Result = 0;
136   for (auto I = llvm::lower_bound(Data, Start, Compare()),
137             E = llvm::lower_bound(Data, End, Compare());
138        I != E; ++I)
139     Result += I->Hits;
140   return Result;
141 }
142 
143 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
144   auto Iter = Index.find(Offset);
145   if (Iter == Index.end()) {
146     Data.emplace_back(Location(true, Name, Offset), Count);
147     Index[Offset] = Data.size() - 1;
148     return;
149   }
150   SampleInfo &SI = Data[Iter->second];
151   SI.Hits += Count;
152 }
153 
154 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
155                                      uint64_t Count, uint64_t Mispreds) {
156   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
157   if (Iter == IntraIndex[OffsetFrom].end()) {
158     Data.emplace_back(Location(true, Name, OffsetFrom),
159                       Location(true, Name, OffsetTo), Mispreds, Count);
160     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
161     return;
162   }
163   BranchInfo &BI = Data[Iter->second];
164   BI.Branches += Count;
165   BI.Mispreds += Mispreds;
166 }
167 
168 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
169                                    uint64_t Count, uint64_t Mispreds) {
170   auto Iter = InterIndex[OffsetFrom].find(To);
171   if (Iter == InterIndex[OffsetFrom].end()) {
172     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
173     InterIndex[OffsetFrom][To] = Data.size() - 1;
174     return;
175   }
176   BranchInfo &BI = Data[Iter->second];
177   BI.Branches += Count;
178   BI.Mispreds += Mispreds;
179 }
180 
181 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
182                                     uint64_t Count, uint64_t Mispreds) {
183   auto Iter = EntryIndex[OffsetTo].find(From);
184   if (Iter == EntryIndex[OffsetTo].end()) {
185     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
186                            Count);
187     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
188     return;
189   }
190   BranchInfo &BI = EntryData[Iter->second];
191   BI.Branches += Count;
192   BI.Mispreds += Mispreds;
193 }
194 
195 void BranchInfo::mergeWith(const BranchInfo &BI) {
196   Branches += BI.Branches;
197   Mispreds += BI.Mispreds;
198 }
199 
200 void BranchInfo::print(raw_ostream &OS) const {
201   OS << From.IsSymbol << " " << From.Name << " "
202      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
203      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
204      << '\n';
205 }
206 
207 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
208                                                       uint64_t To) const {
209   for (const BranchInfo &I : Data)
210     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
211       return I;
212 
213   return make_error_code(llvm::errc::invalid_argument);
214 }
215 
216 ErrorOr<const BranchInfo &>
217 FuncBranchData::getDirectCallBranch(uint64_t From) const {
218   // Commented out because it can be expensive.
219   // assert(std::is_sorted(Data.begin(), Data.end()));
220   struct Compare {
221     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
222       return BI.From.Offset < Val;
223     }
224     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
225       return Val < BI.From.Offset;
226     }
227   };
228   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
229   for (auto I = Range.first; I != Range.second; ++I)
230     if (I->From.Name != I->To.Name)
231       return *I;
232 
233   return make_error_code(llvm::errc::invalid_argument);
234 }
235 
236 void MemInfo::print(raw_ostream &OS) const {
237   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
238      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
239      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
240      << "\n";
241 }
242 
243 void MemInfo::prettyPrint(raw_ostream &OS) const {
244   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
245 }
246 
247 void FuncMemData::update(const Location &Offset, const Location &Addr) {
248   auto Iter = EventIndex[Offset.Offset].find(Addr);
249   if (Iter == EventIndex[Offset.Offset].end()) {
250     Data.emplace_back(MemInfo(Offset, Addr, 1));
251     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
252     return;
253   }
254   ++Data[Iter->second].Count;
255 }
256 
257 Error DataReader::preprocessProfile(BinaryContext &BC) {
258   if (std::error_code EC = parseInput())
259     return errorCodeToError(EC);
260 
261   if (opts::DumpData)
262     dump();
263 
264   if (collectedInBoltedBinary())
265     outs() << "BOLT-INFO: profile collection done on a binary already "
266               "processed by BOLT\n";
267 
268   for (auto &BFI : BC.getBinaryFunctions()) {
269     BinaryFunction &Function = BFI.second;
270     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
271       setMemData(Function, MemData);
272       MemData->Used = true;
273     }
274     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
275       setBranchData(Function, FuncData);
276       Function.ExecutionCount = FuncData->ExecutionCount;
277       FuncData->Used = true;
278     }
279   }
280 
281   for (auto &BFI : BC.getBinaryFunctions()) {
282     BinaryFunction &Function = BFI.second;
283     matchProfileMemData(Function);
284   }
285 
286   return Error::success();
287 }
288 
289 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
290   for (auto &BFI : BC.getBinaryFunctions()) {
291     BinaryFunction &Function = BFI.second;
292     FuncMemData *MemoryData = getMemData(Function);
293     if (!MemoryData)
294       continue;
295 
296     for (MemInfo &MI : MemoryData->Data) {
297       const uint64_t Offset = MI.Offset.Offset;
298       auto II = Function.Instructions.find(Offset);
299       if (II == Function.Instructions.end()) {
300         // Ignore bad instruction address.
301         continue;
302       }
303 
304       auto &MemAccessProfile =
305           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
306               II->second, "MemoryAccessProfile");
307       BinaryData *BD = nullptr;
308       if (MI.Addr.IsSymbol)
309         BD = BC.getBinaryDataByName(MI.Addr.Name);
310       MemAccessProfile.AddressAccessInfo.push_back(
311           {BD, MI.Addr.Offset, MI.Count});
312       auto NextII = std::next(II);
313       if (NextII == Function.Instructions.end())
314         MemAccessProfile.NextInstrOffset = Function.getSize();
315       else
316         MemAccessProfile.NextInstrOffset = II->first;
317     }
318     Function.HasMemoryProfile = true;
319   }
320 
321   return Error::success();
322 }
323 
324 Error DataReader::readProfile(BinaryContext &BC) {
325   for (auto &BFI : BC.getBinaryFunctions()) {
326     BinaryFunction &Function = BFI.second;
327     readProfile(Function);
328   }
329 
330   uint64_t NumUnused = 0;
331   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
332     if (!FuncData.getValue().Used)
333       ++NumUnused;
334   BC.setNumUnusedProfiledObjects(NumUnused);
335 
336   return Error::success();
337 }
338 
339 std::error_code DataReader::parseInput() {
340   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
341       MemoryBuffer::getFileOrSTDIN(Filename);
342   if (std::error_code EC = MB.getError()) {
343     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
344     return EC;
345   }
346   FileBuf = std::move(MB.get());
347   ParsingBuf = FileBuf->getBuffer();
348   if (std::error_code EC = parse())
349     return EC;
350   if (!ParsingBuf.empty())
351     Diag << "WARNING: invalid profile data detected at line " << Line
352          << ". Possibly corrupted profile.\n";
353 
354   buildLTONameMaps();
355 
356   return std::error_code();
357 }
358 
359 void DataReader::readProfile(BinaryFunction &BF) {
360   if (BF.empty())
361     return;
362 
363   if (!hasLBR()) {
364     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
365     readSampleData(BF);
366     return;
367   }
368 
369   BF.ProfileFlags = BinaryFunction::PF_LBR;
370 
371   // Possibly assign/re-assign branch profile data.
372   matchProfileData(BF);
373 
374   FuncBranchData *FBD = getBranchData(BF);
375   if (!FBD)
376     return;
377 
378   // Assign basic block counts to function entry points. These only include
379   // counts for outside entries.
380   //
381   // There is a slight skew introduced here as branches originated from RETs
382   // may be accounted for in the execution count of an entry block if the last
383   // instruction in a predecessor fall-through block is a call. This situation
384   // should rarely happen because there are few multiple-entry functions.
385   for (const BranchInfo &BI : FBD->EntryData) {
386     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
387     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
388       uint64_t Count = BB->getExecutionCount();
389       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
390         Count = 0;
391       BB->setExecutionCount(Count + BI.Branches);
392     }
393   }
394 
395   for (const BranchInfo &BI : FBD->Data) {
396     if (BI.From.Name != BI.To.Name)
397       continue;
398 
399     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
400                       BI.Mispreds)) {
401       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
402                         << BI.To.Offset << '\n');
403     }
404   }
405 
406   // Convert branch data into annotations.
407   convertBranchData(BF);
408 }
409 
410 void DataReader::matchProfileData(BinaryFunction &BF) {
411   // This functionality is available for LBR-mode only
412   // TODO: Implement evaluateProfileData() for samples, checking whether
413   // sample addresses match instruction addresses in the function
414   if (!hasLBR())
415     return;
416 
417   FuncBranchData *FBD = getBranchData(BF);
418   if (FBD) {
419     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
420     BF.RawBranchCount = FBD->getNumExecutedBranches();
421     if (BF.ProfileMatchRatio == 1.0f) {
422       if (fetchProfileForOtherEntryPoints(BF)) {
423         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
424         BF.ExecutionCount = FBD->ExecutionCount;
425         BF.RawBranchCount = FBD->getNumExecutedBranches();
426       }
427       return;
428     }
429   }
430 
431   // Check if the function name can fluctuate between several compilations
432   // possibly triggered by minor unrelated code changes in the source code
433   // of the input binary.
434   if (!hasVolatileName(BF))
435     return;
436 
437   // Check for a profile that matches with 100% confidence.
438   const std::vector<FuncBranchData *> AllBranchData =
439       getBranchDataForNamesRegex(BF.getNames());
440   for (FuncBranchData *NewBranchData : AllBranchData) {
441     // Prevent functions from sharing the same profile.
442     if (NewBranchData->Used)
443       continue;
444 
445     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
446       continue;
447 
448     if (FBD)
449       FBD->Used = false;
450 
451     // Update function profile data with the new set.
452     setBranchData(BF, NewBranchData);
453     NewBranchData->Used = true;
454     BF.ExecutionCount = NewBranchData->ExecutionCount;
455     BF.ProfileMatchRatio = 1.0f;
456     break;
457   }
458 }
459 
460 void DataReader::matchProfileMemData(BinaryFunction &BF) {
461   const std::vector<FuncMemData *> AllMemData =
462       getMemDataForNamesRegex(BF.getNames());
463   for (FuncMemData *NewMemData : AllMemData) {
464     // Prevent functions from sharing the same profile.
465     if (NewMemData->Used)
466       continue;
467 
468     if (FuncMemData *MD = getMemData(BF))
469       MD->Used = false;
470 
471     // Update function profile data with the new set.
472     setMemData(BF, NewMemData);
473     NewMemData->Used = true;
474     break;
475   }
476 }
477 
478 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
479   BinaryContext &BC = BF.getBinaryContext();
480 
481   FuncBranchData *FBD = getBranchData(BF);
482   if (!FBD)
483     return false;
484 
485   // Check if we are missing profiling data for secondary entry points
486   bool First = true;
487   bool Updated = false;
488   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
489     if (First) {
490       First = false;
491       continue;
492     }
493     if (BB->isEntryPoint()) {
494       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
495       // Look for branch data associated with this entry point
496       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
497         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
498           FBD->appendFrom(*Data, BB->getOffset());
499           Data->Used = true;
500           Updated = true;
501         }
502       }
503     }
504   }
505 
506   return Updated;
507 }
508 
509 float DataReader::evaluateProfileData(BinaryFunction &BF,
510                                       const FuncBranchData &BranchData) const {
511   BinaryContext &BC = BF.getBinaryContext();
512 
513   // Until we define a minimal profile, we consider an empty branch data to be
514   // a valid profile. It could happen to a function without branches when we
515   // still have an EntryData for the execution count.
516   if (BranchData.Data.empty())
517     return 1.0f;
518 
519   uint64_t NumMatchedBranches = 0;
520   for (const BranchInfo &BI : BranchData.Data) {
521     bool IsValid = false;
522     if (BI.From.Name == BI.To.Name) {
523       // Try to record information with 0 count.
524       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
525     } else if (collectedInBoltedBinary()) {
526       // We can't check branch source for collections in bolted binaries because
527       // the source of the branch may be mapped to the first instruction in a BB
528       // instead of the original branch (which may not exist in the source bin).
529       IsValid = true;
530     } else {
531       // The branch has to originate from this function.
532       // Check for calls, tail calls, rets and indirect branches.
533       // When matching profiling info, we did not reach the stage
534       // when we identify tail calls, so they are still represented
535       // by regular branch instructions and we need isBranch() here.
536       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
537       // If it's a prefix - skip it.
538       if (Instr && BC.MIB->isPrefix(*Instr))
539         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
540       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
541                     BC.MIB->isReturn(*Instr)))
542         IsValid = true;
543     }
544 
545     if (IsValid) {
546       ++NumMatchedBranches;
547       continue;
548     }
549 
550     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
551                       << Twine::utohexstr(BI.From.Offset) << " -> ";
552                if (BI.From.Name == BI.To.Name) dbgs()
553                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
554                else dbgs() << "<outbounds>\n";);
555   }
556 
557   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
558   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
559     errs() << "BOLT-WARNING: profile branches match only "
560            << format("%.1f%%", MatchRatio * 100.0f) << " ("
561            << NumMatchedBranches << '/' << BranchData.Data.size()
562            << ") for function " << BF << '\n';
563 
564   return MatchRatio;
565 }
566 
567 void DataReader::readSampleData(BinaryFunction &BF) {
568   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
569   if (!SampleDataOrErr)
570     return;
571 
572   // Basic samples mode territory (without LBR info)
573   // First step is to assign BB execution count based on samples from perf
574   BF.ProfileMatchRatio = 1.0f;
575   BF.removeTagsFromProfile();
576   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
577   bool NormalizeByCalls = usesEvent("branches");
578   static bool NagUser = true;
579   if (NagUser) {
580     outs()
581         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
582     if (NormalizeByInsnCount)
583       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
584     else if (NormalizeByCalls)
585       outs() << "BOLT-INFO: normalizing samples by branches.\n";
586 
587     NagUser = false;
588   }
589   uint64_t LastOffset = BF.getSize();
590   uint64_t TotalEntryCount = 0;
591   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
592        I != E; ++I) {
593     uint64_t CurOffset = I->first;
594     // Always work with samples multiplied by 1000 to avoid losing them if we
595     // later need to normalize numbers
596     uint64_t NumSamples =
597         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
598     if (NormalizeByInsnCount && I->second->getNumNonPseudos()) {
599       NumSamples /= I->second->getNumNonPseudos();
600     } else if (NormalizeByCalls) {
601       uint32_t NumCalls = I->second->getNumCalls();
602       NumSamples /= NumCalls + 1;
603     }
604     I->second->setExecutionCount(NumSamples);
605     if (I->second->isEntryPoint())
606       TotalEntryCount += NumSamples;
607     LastOffset = CurOffset;
608   }
609 
610   BF.ExecutionCount = TotalEntryCount;
611 
612   estimateEdgeCounts(BF);
613 }
614 
615 void DataReader::convertBranchData(BinaryFunction &BF) const {
616   BinaryContext &BC = BF.getBinaryContext();
617 
618   if (BF.empty())
619     return;
620 
621   FuncBranchData *FBD = getBranchData(BF);
622   if (!FBD)
623     return;
624 
625   // Profile information for calls.
626   //
627   // There are 3 cases that we annotate differently:
628   //   1) Conditional tail calls that could be mispredicted.
629   //   2) Indirect calls to multiple destinations with mispredictions.
630   //      Before we validate CFG we have to handle indirect branches here too.
631   //   3) Regular direct calls. The count could be different from containing
632   //      basic block count. Keep this data in case we find it useful.
633   //
634   for (BranchInfo &BI : FBD->Data) {
635     // Ignore internal branches.
636     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
637       continue;
638 
639     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
640     if (!Instr ||
641         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
642       continue;
643 
644     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
645       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
646         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
647                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
648                << '\n';
649       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
650       Value += Count;
651     };
652 
653     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
654       IndirectCallSiteProfile &CSP =
655           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
656               *Instr, "CallProfile");
657       MCSymbol *CalleeSymbol = nullptr;
658       if (BI.To.IsSymbol) {
659         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
660           CalleeSymbol = BD->getSymbol();
661       }
662       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
663     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
664       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
665       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
666     } else {
667       setOrUpdateAnnotation("Count", BI.Branches);
668     }
669   }
670 }
671 
672 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
673                               uint64_t Count, uint64_t Mispreds) const {
674   BinaryContext &BC = BF.getBinaryContext();
675 
676   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
677   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
678 
679   if (!FromBB || !ToBB) {
680     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
681     return false;
682   }
683 
684   // Could be bad LBR data; ignore the branch. In the case of data collected
685   // in binaries optimized by BOLT, a source BB may be mapped to two output
686   // BBs as a result of optimizations. In that case, a branch between these
687   // two will be recorded as a branch from A going to A in the source address
688   // space. Keep processing.
689   if (From == To)
690     return true;
691 
692   // Return from a tail call.
693   if (FromBB->succ_size() == 0)
694     return true;
695 
696   // Very rarely we will see ignored branches. Do a linear check.
697   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
698     if (Branch ==
699         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
700       return true;
701 
702   bool OffsetMatches = !!(To == ToBB->getOffset());
703   if (!OffsetMatches) {
704     // Skip the nops to support old .fdata
705     uint64_t Offset = ToBB->getOffset();
706     for (MCInst &Instr : *ToBB) {
707       if (!BC.MIB->isNoop(Instr))
708         break;
709 
710       Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
711     }
712 
713     if (To == Offset)
714       OffsetMatches = true;
715   }
716 
717   if (!OffsetMatches) {
718     // "To" could be referring to nop instructions in between 2 basic blocks.
719     // While building the CFG we make sure these nops are attributed to the
720     // previous basic block, thus we check if the destination belongs to the
721     // gap past the last instruction.
722     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
723     if (LastInstr) {
724       const uint32_t LastInstrOffset =
725           BC.MIB->getOffsetWithDefault(*LastInstr, 0);
726 
727       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
728       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
729         return true;
730 
731       if (To <= LastInstrOffset) {
732         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
733                           << " in " << BF << " : " << From << " -> " << To
734                           << '\n');
735         return false;
736       }
737     }
738 
739     // The real destination is the layout successor of the detected ToBB.
740     if (ToBB == BF.getLayout().block_back())
741       return false;
742     BinaryBasicBlock *NextBB = BF.getLayout().getBlock(ToBB->getIndex() + 1);
743     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
744     ToBB = NextBB;
745   }
746 
747   // If there's no corresponding instruction for 'From', we have probably
748   // discarded it as a FT from __builtin_unreachable.
749   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
750   if (!FromInstruction) {
751     // If the data was collected in a bolted binary, the From addresses may be
752     // translated to the first instruction of the source BB if BOLT inserted
753     // a new branch that did not exist in the source (we can't map it to the
754     // source instruction, so we map it to the first instr of source BB).
755     // We do not keep offsets for random instructions. So the check above will
756     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
757     if (collectedInBoltedBinary()) {
758       if (FromBB->getInputOffset() != From) {
759         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
760                           << BF << '\n');
761         return false;
762       }
763       FromInstruction = nullptr;
764     } else {
765       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
766                         << '\n');
767       return false;
768     }
769   }
770 
771   if (!FromBB->getSuccessor(ToBB->getLabel())) {
772     // Check if this is a recursive call or a return from a recursive call.
773     if (FromInstruction && ToBB->isEntryPoint() &&
774         (BC.MIB->isCall(*FromInstruction) ||
775          BC.MIB->isIndirectBranch(*FromInstruction))) {
776       // Execution count is already accounted for.
777       return true;
778     }
779     // For data collected in a bolted binary, we may have created two output BBs
780     // that map to one original block. Branches between these two blocks will
781     // appear here as one BB jumping to itself, even though it has no loop
782     // edges. Ignore these.
783     if (collectedInBoltedBinary() && FromBB == ToBB)
784       return true;
785 
786     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
787     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
788         FTSuccessor->getSuccessor(ToBB->getLabel())) {
789       BinaryBasicBlock::BinaryBranchInfo &FTBI =
790           FTSuccessor->getBranchInfo(*ToBB);
791       FTBI.Count += Count;
792       if (Count)
793         FTBI.MispredictedCount += Mispreds;
794       ToBB = FTSuccessor;
795     } else {
796       LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
797                         << Twine::utohexstr(From) << " -> "
798                         << Twine::utohexstr(To) << '\n');
799       return false;
800     }
801   }
802 
803   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
804   BI.Count += Count;
805   // Only update mispredicted count if it the count was real.
806   if (Count) {
807     BI.MispredictedCount += Mispreds;
808   }
809 
810   return true;
811 }
812 
813 void DataReader::reportError(StringRef ErrorMsg) {
814   Diag << "Error reading BOLT data input file: line " << Line << ", column "
815        << Col << ": " << ErrorMsg << '\n';
816 }
817 
818 bool DataReader::expectAndConsumeFS() {
819   if (ParsingBuf[0] != FieldSeparator) {
820     reportError("expected field separator");
821     return false;
822   }
823   ParsingBuf = ParsingBuf.drop_front(1);
824   Col += 1;
825   return true;
826 }
827 
828 void DataReader::consumeAllRemainingFS() {
829   while (ParsingBuf[0] == FieldSeparator) {
830     ParsingBuf = ParsingBuf.drop_front(1);
831     Col += 1;
832   }
833 }
834 
835 bool DataReader::checkAndConsumeNewLine() {
836   if (ParsingBuf[0] != '\n')
837     return false;
838 
839   ParsingBuf = ParsingBuf.drop_front(1);
840   Col = 0;
841   Line += 1;
842   return true;
843 }
844 
845 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
846   if (EndChar == '\\') {
847     reportError("EndChar could not be backslash");
848     return make_error_code(llvm::errc::io_error);
849   }
850 
851   std::string EndChars(1, EndChar);
852   EndChars.push_back('\\');
853   if (EndNl)
854     EndChars.push_back('\n');
855 
856   size_t StringEnd = 0;
857   do {
858     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
859     if (StringEnd == StringRef::npos ||
860         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
861       reportError("malformed field");
862       return make_error_code(llvm::errc::io_error);
863     }
864 
865     if (ParsingBuf[StringEnd] != '\\')
866       break;
867 
868     StringEnd += 2;
869   } while (1);
870 
871   StringRef Str = ParsingBuf.substr(0, StringEnd);
872 
873   // If EndNl was set and nl was found instead of EndChar, do not consume the
874   // new line.
875   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
876   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
877 
878   ParsingBuf = ParsingBuf.drop_front(End);
879   if (EndChar == '\n') {
880     Col = 0;
881     Line += 1;
882   } else {
883     Col += End;
884   }
885   return Str;
886 }
887 
888 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
889   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
890   if (std::error_code EC = NumStrRes.getError())
891     return EC;
892   StringRef NumStr = NumStrRes.get();
893   int64_t Num;
894   if (NumStr.getAsInteger(10, Num)) {
895     reportError("expected decimal number");
896     Diag << "Found: " << NumStr << "\n";
897     return make_error_code(llvm::errc::io_error);
898   }
899   return Num;
900 }
901 
902 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
903   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
904   if (std::error_code EC = NumStrRes.getError())
905     return EC;
906   StringRef NumStr = NumStrRes.get();
907   uint64_t Num;
908   if (NumStr.getAsInteger(16, Num)) {
909     reportError("expected hexidecimal number");
910     Diag << "Found: " << NumStr << "\n";
911     return make_error_code(llvm::errc::io_error);
912   }
913   return Num;
914 }
915 
916 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
917                                             bool ExpectMemLoc) {
918   // Read whether the location of the branch should be DSO or a symbol
919   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
920   // symbol.
921   // The symbol flag is also used to tag memory load events by adding 3 to the
922   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
923   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
924       ParsingBuf[0] != '2') {
925     reportError("expected 0, 1 or 2");
926     return make_error_code(llvm::errc::io_error);
927   }
928 
929   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
930       ParsingBuf[0] != '5') {
931     reportError("expected 3, 4 or 5");
932     return make_error_code(llvm::errc::io_error);
933   }
934 
935   bool IsSymbol =
936       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
937       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
938   ParsingBuf = ParsingBuf.drop_front(1);
939   Col += 1;
940 
941   if (!expectAndConsumeFS())
942     return make_error_code(llvm::errc::io_error);
943   consumeAllRemainingFS();
944 
945   // Read the string containing the symbol or the DSO name
946   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
947   if (std::error_code EC = NameRes.getError())
948     return EC;
949   StringRef Name = NameRes.get();
950   consumeAllRemainingFS();
951 
952   // Read the offset
953   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
954   if (std::error_code EC = Offset.getError())
955     return EC;
956 
957   return Location(IsSymbol, Name, Offset.get());
958 }
959 
960 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
961   ErrorOr<Location> Res = parseLocation(FieldSeparator);
962   if (std::error_code EC = Res.getError())
963     return EC;
964   Location From = Res.get();
965 
966   consumeAllRemainingFS();
967   Res = parseLocation(FieldSeparator);
968   if (std::error_code EC = Res.getError())
969     return EC;
970   Location To = Res.get();
971 
972   consumeAllRemainingFS();
973   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
974   if (std::error_code EC = MRes.getError())
975     return EC;
976   int64_t NumMispreds = MRes.get();
977 
978   consumeAllRemainingFS();
979   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
980   if (std::error_code EC = BRes.getError())
981     return EC;
982   int64_t NumBranches = BRes.get();
983 
984   consumeAllRemainingFS();
985   if (!checkAndConsumeNewLine()) {
986     reportError("expected end of line");
987     return make_error_code(llvm::errc::io_error);
988   }
989 
990   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
991 }
992 
993 ErrorOr<MemInfo> DataReader::parseMemInfo() {
994   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
995   if (std::error_code EC = Res.getError())
996     return EC;
997   Location Offset = Res.get();
998 
999   consumeAllRemainingFS();
1000   Res = parseMemLocation(FieldSeparator);
1001   if (std::error_code EC = Res.getError())
1002     return EC;
1003   Location Addr = Res.get();
1004 
1005   consumeAllRemainingFS();
1006   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1007   if (std::error_code EC = CountRes.getError())
1008     return EC;
1009 
1010   consumeAllRemainingFS();
1011   if (!checkAndConsumeNewLine()) {
1012     reportError("expected end of line");
1013     return make_error_code(llvm::errc::io_error);
1014   }
1015 
1016   return MemInfo(Offset, Addr, CountRes.get());
1017 }
1018 
1019 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1020   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1021   if (std::error_code EC = Res.getError())
1022     return EC;
1023   Location Address = Res.get();
1024 
1025   consumeAllRemainingFS();
1026   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1027   if (std::error_code EC = BRes.getError())
1028     return EC;
1029   int64_t Occurrences = BRes.get();
1030 
1031   consumeAllRemainingFS();
1032   if (!checkAndConsumeNewLine()) {
1033     reportError("expected end of line");
1034     return make_error_code(llvm::errc::io_error);
1035   }
1036 
1037   return SampleInfo(std::move(Address), Occurrences);
1038 }
1039 
1040 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1041   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1042     return false;
1043   ParsingBuf = ParsingBuf.drop_front(6);
1044   Col += 6;
1045 
1046   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1047     ParsingBuf = ParsingBuf.drop_front(1);
1048 
1049   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1050     ErrorOr<StringRef> EventName = parseString(' ', true);
1051     if (!EventName)
1052       return make_error_code(llvm::errc::io_error);
1053     EventNames.insert(EventName.get());
1054   }
1055 
1056   if (!checkAndConsumeNewLine()) {
1057     reportError("malformed no_lbr line");
1058     return make_error_code(llvm::errc::io_error);
1059   }
1060   return true;
1061 }
1062 
1063 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1064   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1065     return false;
1066   ParsingBuf = ParsingBuf.drop_front(16);
1067   Col += 16;
1068 
1069   if (!checkAndConsumeNewLine()) {
1070     reportError("malformed boltedcollection line");
1071     return make_error_code(llvm::errc::io_error);
1072   }
1073   return true;
1074 }
1075 
1076 bool DataReader::hasBranchData() {
1077   if (ParsingBuf.size() == 0)
1078     return false;
1079 
1080   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1081     return true;
1082   return false;
1083 }
1084 
1085 bool DataReader::hasMemData() {
1086   if (ParsingBuf.size() == 0)
1087     return false;
1088 
1089   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1090     return true;
1091   return false;
1092 }
1093 
1094 std::error_code DataReader::parseInNoLBRMode() {
1095   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1096     auto I = NamesToSamples.find(Name);
1097     if (I == NamesToSamples.end()) {
1098       bool Success;
1099       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1100           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1101 
1102       assert(Success && "unexpected result of insert");
1103     }
1104     return I;
1105   };
1106 
1107   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1108     auto I = NamesToMemEvents.find(Name);
1109     if (I == NamesToMemEvents.end()) {
1110       bool Success;
1111       std::tie(I, Success) = NamesToMemEvents.insert(
1112           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1113       assert(Success && "unexpected result of insert");
1114     }
1115     return I;
1116   };
1117 
1118   while (hasBranchData()) {
1119     ErrorOr<SampleInfo> Res = parseSampleInfo();
1120     if (std::error_code EC = Res.getError())
1121       return EC;
1122 
1123     SampleInfo SI = Res.get();
1124 
1125     // Ignore samples not involving known locations
1126     if (!SI.Loc.IsSymbol)
1127       continue;
1128 
1129     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1130     I->getValue().Data.emplace_back(std::move(SI));
1131   }
1132 
1133   while (hasMemData()) {
1134     ErrorOr<MemInfo> Res = parseMemInfo();
1135     if (std::error_code EC = Res.getError())
1136       return EC;
1137 
1138     MemInfo MI = Res.get();
1139 
1140     // Ignore memory events not involving known pc.
1141     if (!MI.Offset.IsSymbol)
1142       continue;
1143 
1144     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1145     I->getValue().Data.emplace_back(std::move(MI));
1146   }
1147 
1148   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples)
1149     llvm::stable_sort(FuncSamples.second.Data);
1150 
1151   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1152     llvm::stable_sort(MemEvents.second.Data);
1153 
1154   return std::error_code();
1155 }
1156 
1157 std::error_code DataReader::parse() {
1158   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1159     auto I = NamesToBranches.find(Name);
1160     if (I == NamesToBranches.end()) {
1161       bool Success;
1162       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1163           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1164                                FuncBranchData::ContainerTy())));
1165       assert(Success && "unexpected result of insert");
1166     }
1167     return I;
1168   };
1169 
1170   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1171     auto I = NamesToMemEvents.find(Name);
1172     if (I == NamesToMemEvents.end()) {
1173       bool Success;
1174       std::tie(I, Success) = NamesToMemEvents.insert(
1175           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1176       assert(Success && "unexpected result of insert");
1177     }
1178     return I;
1179   };
1180 
1181   Col = 0;
1182   Line = 1;
1183   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1184   if (!FlagOrErr)
1185     return FlagOrErr.getError();
1186   NoLBRMode = *FlagOrErr;
1187 
1188   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1189   if (!BATFlagOrErr)
1190     return BATFlagOrErr.getError();
1191   BATMode = *BATFlagOrErr;
1192 
1193   if (!hasBranchData() && !hasMemData()) {
1194     Diag << "ERROR: no valid profile data found\n";
1195     return make_error_code(llvm::errc::io_error);
1196   }
1197 
1198   if (NoLBRMode)
1199     return parseInNoLBRMode();
1200 
1201   while (hasBranchData()) {
1202     ErrorOr<BranchInfo> Res = parseBranchInfo();
1203     if (std::error_code EC = Res.getError())
1204       return EC;
1205 
1206     BranchInfo BI = Res.get();
1207 
1208     // Ignore branches not involving known location.
1209     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1210       continue;
1211 
1212     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1213     I->getValue().Data.emplace_back(std::move(BI));
1214 
1215     // Add entry data for branches to another function or branches
1216     // to entry points (including recursive calls)
1217     if (BI.To.IsSymbol &&
1218         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1219       I = GetOrCreateFuncEntry(BI.To.Name);
1220       I->getValue().EntryData.emplace_back(std::move(BI));
1221     }
1222 
1223     // If destination is the function start - update execution count.
1224     // NB: the data is skewed since we cannot tell tail recursion from
1225     //     branches to the function start.
1226     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1227       I = GetOrCreateFuncEntry(BI.To.Name);
1228       I->getValue().ExecutionCount += BI.Branches;
1229     }
1230   }
1231 
1232   while (hasMemData()) {
1233     ErrorOr<MemInfo> Res = parseMemInfo();
1234     if (std::error_code EC = Res.getError())
1235       return EC;
1236 
1237     MemInfo MI = Res.get();
1238 
1239     // Ignore memory events not involving known pc.
1240     if (!MI.Offset.IsSymbol)
1241       continue;
1242 
1243     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1244     I->getValue().Data.emplace_back(std::move(MI));
1245   }
1246 
1247   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches)
1248     llvm::stable_sort(FuncBranches.second.Data);
1249 
1250   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1251     llvm::stable_sort(MemEvents.second.Data);
1252 
1253   return std::error_code();
1254 }
1255 
1256 void DataReader::buildLTONameMaps() {
1257   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1258     const StringRef FuncName = FuncData.getKey();
1259     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1260     if (CommonName)
1261       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1262   }
1263 
1264   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1265     const StringRef FuncName = FuncData.getKey();
1266     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1267     if (CommonName)
1268       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1269   }
1270 }
1271 
1272 namespace {
1273 template <typename MapTy>
1274 decltype(MapTy::MapEntryTy::second) *
1275 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1276   // Do a reverse order iteration since the name in profile has a higher chance
1277   // of matching a name at the end of the list.
1278   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1279     auto I = Map.find(normalizeName((*SI)->getName()));
1280     if (I != Map.end())
1281       return &I->getValue();
1282   }
1283   return nullptr;
1284 }
1285 
1286 template <typename MapTy>
1287 decltype(MapTy::MapEntryTy::second) *
1288 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1289   // Do a reverse order iteration since the name in profile has a higher chance
1290   // of matching a name at the end of the list.
1291   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1292     auto I = Map.find(normalizeName(*FI));
1293     if (I != Map.end())
1294       return &I->getValue();
1295   }
1296   return nullptr;
1297 }
1298 
1299 template <typename MapTy>
1300 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1301     MapTy &Map,
1302     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1303         &LTOCommonNameMap,
1304     const std::vector<StringRef> &FuncNames) {
1305   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1306   // Do a reverse order iteration since the name in profile has a higher chance
1307   // of matching a name at the end of the list.
1308   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1309     std::string Name = normalizeName(*FI);
1310     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1311     if (LTOCommonName) {
1312       auto I = LTOCommonNameMap.find(*LTOCommonName);
1313       if (I != LTOCommonNameMap.end()) {
1314         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1315             I->getValue();
1316         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1317       }
1318     } else {
1319       auto I = Map.find(Name);
1320       if (I != Map.end())
1321         return {&I->getValue()};
1322     }
1323   }
1324   return AllData;
1325 }
1326 
1327 }
1328 
1329 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1330   if (getBranchData(Function) || getMemData(Function))
1331     return true;
1332 
1333   if (getBranchDataForNames(Function.getNames()) ||
1334       getMemDataForNames(Function.getNames()))
1335     return true;
1336 
1337   if (!hasVolatileName(Function))
1338     return false;
1339 
1340   const std::vector<FuncBranchData *> AllBranchData =
1341       getBranchDataForNamesRegex(Function.getNames());
1342   if (!AllBranchData.empty())
1343     return true;
1344 
1345   const std::vector<FuncMemData *> AllMemData =
1346       getMemDataForNamesRegex(Function.getNames());
1347   if (!AllMemData.empty())
1348     return true;
1349 
1350   return false;
1351 }
1352 
1353 FuncBranchData *
1354 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1355   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1356 }
1357 
1358 FuncBranchData *
1359 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1360   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1361 }
1362 
1363 FuncMemData *
1364 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1365   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1366 }
1367 
1368 FuncSampleData *
1369 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1370   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1371 }
1372 
1373 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1374     const std::vector<StringRef> &FuncNames) {
1375   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1376 }
1377 
1378 std::vector<FuncMemData *>
1379 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1380   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1381 }
1382 
1383 bool DataReader::hasLocalsWithFileName() const {
1384   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1385     const StringRef &FuncName = Func.getKey();
1386     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1387       return true;
1388   }
1389   return false;
1390 }
1391 
1392 void DataReader::dump() const {
1393   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1394     Diag << Func.getKey() << " branches:\n";
1395     for (const BranchInfo &BI : Func.getValue().Data)
1396       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1397            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1398     Diag << Func.getKey() << " entry points:\n";
1399     for (const BranchInfo &BI : Func.getValue().EntryData)
1400       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1401            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1402   }
1403 
1404   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1405     StringRef Event = I->getKey();
1406     Diag << "Data was collected with event: " << Event << "\n";
1407   }
1408   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1409     Diag << Func.getKey() << " samples:\n";
1410     for (const SampleInfo &SI : Func.getValue().Data)
1411       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1412   }
1413 
1414   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1415     Diag << "Memory events for " << Func.getValue().Name;
1416     Location LastOffset(0);
1417     for (const MemInfo &MI : Func.getValue().Data) {
1418       if (MI.Offset == LastOffset)
1419         Diag << ", " << MI.Addr << "/" << MI.Count;
1420       else
1421         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1422       LastOffset = MI.Offset;
1423     }
1424     Diag << "\n";
1425   }
1426 }
1427 
1428 } // namespace bolt
1429 } // namespace llvm
1430