1 //===-- DataReader.cpp - Perf data reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <map>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 Optional<StringRef> getLTOCommonName(const StringRef Name) {
44   size_t LTOSuffixPos = Name.find(".lto_priv.");
45   if (LTOSuffixPos != StringRef::npos) {
46     return Name.substr(0, LTOSuffixPos + 10);
47   } else if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos) {
48     return Name.substr(0, LTOSuffixPos + 11);
49   } else {
50     return NoneType();
51   }
52 }
53 
54 namespace {
55 
56 /// Return true if the function name can change across compilations.
57 bool hasVolatileName(const BinaryFunction &BF) {
58   for (const StringRef Name : BF.getNames()) {
59     if (getLTOCommonName(Name))
60       return true;
61   }
62   return false;
63 }
64 
65 /// Return standard escaped name of the function possibly renamed by BOLT.
66 std::string normalizeName(StringRef NameRef) {
67   // Strip "PG." prefix used for globalized locals.
68   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
69   return getEscapedName(NameRef);
70 }
71 
72 } // anonymous namespace
73 
74 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
75   if (Loc.IsSymbol) {
76     OS << Loc.Name;
77     if (Loc.Offset)
78       OS << "+" << Twine::utohexstr(Loc.Offset);
79   } else {
80     OS << Twine::utohexstr(Loc.Offset);
81   }
82   return OS;
83 }
84 
85 iterator_range<FuncBranchData::ContainerTy::const_iterator>
86 FuncBranchData::getBranchRange(uint64_t From) const {
87   assert(std::is_sorted(Data.begin(), Data.end()));
88   struct Compare {
89     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
90       return BI.From.Offset < Val;
91     }
92     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
93       return Val < BI.From.Offset;
94     }
95   };
96   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
97   return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
98 }
99 
100 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
101   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
102   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
103     if (I->From.Name == FBD.Name) {
104       I->From.Name = this->Name;
105       I->From.Offset += Offset;
106     }
107     if (I->To.Name == FBD.Name) {
108       I->To.Name = this->Name;
109       I->To.Offset += Offset;
110     }
111   }
112   std::stable_sort(Data.begin(), Data.end());
113   ExecutionCount += FBD.ExecutionCount;
114   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
115     assert(I->To.Name == FBD.Name);
116     auto NewElmt = EntryData.insert(EntryData.end(), *I);
117     NewElmt->To.Name = this->Name;
118     NewElmt->To.Offset += Offset;
119   }
120 }
121 
122 uint64_t FuncBranchData::getNumExecutedBranches() const {
123   uint64_t ExecutedBranches = 0;
124   for (const BranchInfo &BI : Data) {
125     int64_t BranchCount = BI.Branches;
126     assert(BranchCount >= 0 && "branch execution count should not be negative");
127     ExecutedBranches += BranchCount;
128   }
129   return ExecutedBranches;
130 }
131 
132 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
133 
134 void SampleInfo::print(raw_ostream &OS) const {
135   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
136      << " " << Hits << "\n";
137 }
138 
139 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
140   assert(std::is_sorted(Data.begin(), Data.end()));
141   struct Compare {
142     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
143       return SI.Loc.Offset < Val;
144     }
145     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
146       return Val < SI.Loc.Offset;
147     }
148   };
149   uint64_t Result = 0;
150   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
151             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
152        I != E; ++I) {
153     Result += I->Hits;
154   }
155   return Result;
156 }
157 
158 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
159   auto Iter = Index.find(Offset);
160   if (Iter == Index.end()) {
161     Data.emplace_back(Location(true, Name, Offset), Count);
162     Index[Offset] = Data.size() - 1;
163     return;
164   }
165   SampleInfo &SI = Data[Iter->second];
166   SI.Hits += Count;
167 }
168 
169 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
170                                      uint64_t Count, uint64_t Mispreds) {
171   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
172   if (Iter == IntraIndex[OffsetFrom].end()) {
173     Data.emplace_back(Location(true, Name, OffsetFrom),
174                       Location(true, Name, OffsetTo), Mispreds, Count);
175     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
176     return;
177   }
178   BranchInfo &BI = Data[Iter->second];
179   BI.Branches += Count;
180   BI.Mispreds += Mispreds;
181 }
182 
183 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
184                                    uint64_t Count, uint64_t Mispreds) {
185   auto Iter = InterIndex[OffsetFrom].find(To);
186   if (Iter == InterIndex[OffsetFrom].end()) {
187     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
188     InterIndex[OffsetFrom][To] = Data.size() - 1;
189     return;
190   }
191   BranchInfo &BI = Data[Iter->second];
192   BI.Branches += Count;
193   BI.Mispreds += Mispreds;
194 }
195 
196 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
197                                     uint64_t Count, uint64_t Mispreds) {
198   auto Iter = EntryIndex[OffsetTo].find(From);
199   if (Iter == EntryIndex[OffsetTo].end()) {
200     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
201                            Count);
202     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
203     return;
204   }
205   BranchInfo &BI = EntryData[Iter->second];
206   BI.Branches += Count;
207   BI.Mispreds += Mispreds;
208 }
209 
210 void BranchInfo::mergeWith(const BranchInfo &BI) {
211   Branches += BI.Branches;
212   Mispreds += BI.Mispreds;
213 }
214 
215 void BranchInfo::print(raw_ostream &OS) const {
216   OS << From.IsSymbol << " " << From.Name << " "
217      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
218      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
219      << '\n';
220 }
221 
222 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
223                                                       uint64_t To) const {
224   for (const BranchInfo &I : Data) {
225     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
226       return I;
227   }
228   return make_error_code(llvm::errc::invalid_argument);
229 }
230 
231 ErrorOr<const BranchInfo &>
232 FuncBranchData::getDirectCallBranch(uint64_t From) const {
233   // Commented out because it can be expensive.
234   // assert(std::is_sorted(Data.begin(), Data.end()));
235   struct Compare {
236     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
237       return BI.From.Offset < Val;
238     }
239     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
240       return Val < BI.From.Offset;
241     }
242   };
243   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
244   for (auto I = Range.first; I != Range.second; ++I) {
245     if (I->From.Name != I->To.Name)
246       return *I;
247   }
248   return make_error_code(llvm::errc::invalid_argument);
249 }
250 
251 void MemInfo::print(raw_ostream &OS) const {
252   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
253      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
254      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
255      << "\n";
256 }
257 
258 void MemInfo::prettyPrint(raw_ostream &OS) const {
259   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
260 }
261 
262 void FuncMemData::update(const Location &Offset, const Location &Addr) {
263   auto Iter = EventIndex[Offset.Offset].find(Addr);
264   if (Iter == EventIndex[Offset.Offset].end()) {
265     Data.emplace_back(MemInfo(Offset, Addr, 1));
266     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
267     return;
268   }
269   ++Data[Iter->second].Count;
270 }
271 
272 Error DataReader::preprocessProfile(BinaryContext &BC) {
273   if (std::error_code EC = parseInput()) {
274     return errorCodeToError(EC);
275   }
276 
277   if (opts::DumpData) {
278     dump();
279   }
280 
281   if (collectedInBoltedBinary()) {
282     outs() << "BOLT-INFO: profile collection done on a binary already "
283               "processed by BOLT\n";
284   }
285 
286   for (auto &BFI : BC.getBinaryFunctions()) {
287     BinaryFunction &Function = BFI.second;
288     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
289       setMemData(Function, MemData);
290       MemData->Used = true;
291     }
292     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
293       setBranchData(Function, FuncData);
294       Function.ExecutionCount = FuncData->ExecutionCount;
295       FuncData->Used = true;
296     }
297   }
298 
299   for (auto &BFI : BC.getBinaryFunctions()) {
300     BinaryFunction &Function = BFI.second;
301     matchProfileMemData(Function);
302   }
303 
304   return Error::success();
305 }
306 
307 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
308   for (auto &BFI : BC.getBinaryFunctions()) {
309     BinaryFunction &Function = BFI.second;
310     FuncMemData *MemoryData = getMemData(Function);
311     if (!MemoryData)
312       continue;
313 
314     for (MemInfo &MI : MemoryData->Data) {
315       const uint64_t Offset = MI.Offset.Offset;
316       auto II = Function.Instructions.find(Offset);
317       if (II == Function.Instructions.end()) {
318         // Ignore bad instruction address.
319         continue;
320       }
321 
322       auto &MemAccessProfile =
323           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
324               II->second, "MemoryAccessProfile");
325       BinaryData *BD = nullptr;
326       if (MI.Addr.IsSymbol) {
327         BD = BC.getBinaryDataByName(MI.Addr.Name);
328       }
329       MemAccessProfile.AddressAccessInfo.push_back(
330           {BD, MI.Addr.Offset, MI.Count});
331       auto NextII = std::next(II);
332       if (NextII == Function.Instructions.end()) {
333         MemAccessProfile.NextInstrOffset = Function.getSize();
334       } else {
335         MemAccessProfile.NextInstrOffset = II->first;
336       }
337     }
338     Function.HasMemoryProfile = true;
339   }
340 
341   return Error::success();
342 }
343 
344 Error DataReader::readProfile(BinaryContext &BC) {
345   for (auto &BFI : BC.getBinaryFunctions()) {
346     BinaryFunction &Function = BFI.second;
347     readProfile(Function);
348   }
349 
350   uint64_t NumUnused = 0;
351   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
352     if (!FuncData.getValue().Used)
353       ++NumUnused;
354   BC.setNumUnusedProfiledObjects(NumUnused);
355 
356   return Error::success();
357 }
358 
359 std::error_code DataReader::parseInput() {
360   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
361       MemoryBuffer::getFileOrSTDIN(Filename);
362   if (std::error_code EC = MB.getError()) {
363     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
364     return EC;
365   }
366   FileBuf = std::move(MB.get());
367   ParsingBuf = FileBuf->getBuffer();
368   if (std::error_code EC = parse()) {
369     return EC;
370   }
371   if (!ParsingBuf.empty()) {
372     Diag << "WARNING: invalid profile data detected at line " << Line
373          << ". Possibly corrupted profile.\n";
374   }
375 
376   buildLTONameMaps();
377 
378   return std::error_code();
379 }
380 
381 void DataReader::readProfile(BinaryFunction &BF) {
382   if (BF.empty())
383     return;
384 
385   if (!hasLBR()) {
386     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
387     readSampleData(BF);
388     return;
389   }
390 
391   BF.ProfileFlags = BinaryFunction::PF_LBR;
392 
393   // Possibly assign/re-assign branch profile data.
394   matchProfileData(BF);
395 
396   FuncBranchData *FBD = getBranchData(BF);
397   if (!FBD)
398     return;
399 
400   // Assign basic block counts to function entry points. These only include
401   // counts for outside entries.
402   //
403   // There is a slight skew introduced here as branches originated from RETs
404   // may be accounted for in the execution count of an entry block if the last
405   // instruction in a predecessor fall-through block is a call. This situation
406   // should rarely happen because there are few multiple-entry functions.
407   for (const BranchInfo &BI : FBD->EntryData) {
408     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
409     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
410       uint64_t Count = BB->getExecutionCount();
411       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
412         Count = 0;
413       BB->setExecutionCount(Count + BI.Branches);
414     }
415   }
416 
417   uint64_t MismatchedBranches = 0;
418   for (const BranchInfo &BI : FBD->Data) {
419     if (BI.From.Name != BI.To.Name) {
420       continue;
421     }
422 
423     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
424                       BI.Mispreds)) {
425       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
426                         << BI.To.Offset << '\n');
427       ++MismatchedBranches;
428     }
429   }
430 
431   // Convert branch data into annotations.
432   convertBranchData(BF);
433 }
434 
435 void DataReader::matchProfileData(BinaryFunction &BF) {
436   // This functionality is available for LBR-mode only
437   // TODO: Implement evaluateProfileData() for samples, checking whether
438   // sample addresses match instruction addresses in the function
439   if (!hasLBR())
440     return;
441 
442   FuncBranchData *FBD = getBranchData(BF);
443   if (FBD) {
444     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
445     BF.RawBranchCount = FBD->getNumExecutedBranches();
446     if (BF.ProfileMatchRatio == 1.0f) {
447       if (fetchProfileForOtherEntryPoints(BF)) {
448         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
449         BF.ExecutionCount = FBD->ExecutionCount;
450         BF.RawBranchCount = FBD->getNumExecutedBranches();
451       }
452       return;
453     }
454   }
455 
456   // Check if the function name can fluctuate between several compilations
457   // possibly triggered by minor unrelated code changes in the source code
458   // of the input binary.
459   if (!hasVolatileName(BF))
460     return;
461 
462   // Check for a profile that matches with 100% confidence.
463   const std::vector<FuncBranchData *> AllBranchData =
464       getBranchDataForNamesRegex(BF.getNames());
465   for (FuncBranchData *NewBranchData : AllBranchData) {
466     // Prevent functions from sharing the same profile.
467     if (NewBranchData->Used)
468       continue;
469 
470     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
471       continue;
472 
473     if (FBD)
474       FBD->Used = false;
475 
476     // Update function profile data with the new set.
477     setBranchData(BF, NewBranchData);
478     NewBranchData->Used = true;
479     BF.ExecutionCount = NewBranchData->ExecutionCount;
480     BF.ProfileMatchRatio = 1.0f;
481     break;
482   }
483 }
484 
485 void DataReader::matchProfileMemData(BinaryFunction &BF) {
486   const std::vector<FuncMemData *> AllMemData =
487       getMemDataForNamesRegex(BF.getNames());
488   for (FuncMemData *NewMemData : AllMemData) {
489     // Prevent functions from sharing the same profile.
490     if (NewMemData->Used)
491       continue;
492 
493     if (FuncMemData *MD = getMemData(BF))
494       MD->Used = false;
495 
496     // Update function profile data with the new set.
497     setMemData(BF, NewMemData);
498     NewMemData->Used = true;
499     break;
500   }
501 }
502 
503 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
504   BinaryContext &BC = BF.getBinaryContext();
505 
506   FuncBranchData *FBD = getBranchData(BF);
507   if (!FBD)
508     return false;
509 
510   // Check if we are missing profiling data for secondary entry points
511   bool First = true;
512   bool Updated = false;
513   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
514     if (First) {
515       First = false;
516       continue;
517     }
518     if (BB->isEntryPoint()) {
519       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
520       // Look for branch data associated with this entry point
521       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
522         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
523           FBD->appendFrom(*Data, BB->getOffset());
524           Data->Used = true;
525           Updated = true;
526         }
527       }
528     }
529   }
530 
531   return Updated;
532 }
533 
534 float DataReader::evaluateProfileData(BinaryFunction &BF,
535                                       const FuncBranchData &BranchData) const {
536   BinaryContext &BC = BF.getBinaryContext();
537 
538   // Until we define a minimal profile, we consider an empty branch data to be
539   // a valid profile. It could happen to a function without branches when we
540   // still have an EntryData for the execution count.
541   if (BranchData.Data.empty()) {
542     return 1.0f;
543   }
544 
545   uint64_t NumMatchedBranches = 0;
546   for (const BranchInfo &BI : BranchData.Data) {
547     bool IsValid = false;
548     if (BI.From.Name == BI.To.Name) {
549       // Try to record information with 0 count.
550       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
551     } else if (collectedInBoltedBinary()) {
552       // We can't check branch source for collections in bolted binaries because
553       // the source of the branch may be mapped to the first instruction in a BB
554       // instead of the original branch (which may not exist in the source bin).
555       IsValid = true;
556     } else {
557       // The branch has to originate from this function.
558       // Check for calls, tail calls, rets and indirect branches.
559       // When matching profiling info, we did not reach the stage
560       // when we identify tail calls, so they are still represented
561       // by regular branch instructions and we need isBranch() here.
562       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
563       // If it's a prefix - skip it.
564       if (Instr && BC.MIB->isPrefix(*Instr))
565         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
566       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
567                     BC.MIB->isReturn(*Instr))) {
568         IsValid = true;
569       }
570     }
571 
572     if (IsValid) {
573       ++NumMatchedBranches;
574       continue;
575     }
576 
577     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
578                       << Twine::utohexstr(BI.From.Offset) << " -> ";
579                if (BI.From.Name == BI.To.Name) dbgs()
580                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
581                else dbgs() << "<outbounds>\n";);
582   }
583 
584   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
585   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
586     errs() << "BOLT-WARNING: profile branches match only "
587            << format("%.1f%%", MatchRatio * 100.0f) << " ("
588            << NumMatchedBranches << '/' << BranchData.Data.size()
589            << ") for function " << BF << '\n';
590   }
591 
592   return MatchRatio;
593 }
594 
595 void DataReader::readSampleData(BinaryFunction &BF) {
596   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
597   if (!SampleDataOrErr)
598     return;
599 
600   // Basic samples mode territory (without LBR info)
601   // First step is to assign BB execution count based on samples from perf
602   BF.ProfileMatchRatio = 1.0f;
603   BF.removeTagsFromProfile();
604   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
605   bool NormalizeByCalls = usesEvent("branches");
606   static bool NagUser = true;
607   if (NagUser) {
608     outs()
609         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
610     if (NormalizeByInsnCount) {
611       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
612     } else if (NormalizeByCalls) {
613       outs() << "BOLT-INFO: normalizing samples by branches.\n";
614     }
615     NagUser = false;
616   }
617   uint64_t LastOffset = BF.getSize();
618   uint64_t TotalEntryCount = 0;
619   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
620        I != E; ++I) {
621     uint64_t CurOffset = I->first;
622     // Always work with samples multiplied by 1000 to avoid losing them if we
623     // later need to normalize numbers
624     uint64_t NumSamples =
625         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
626     if (NormalizeByInsnCount && I->second->getNumNonPseudos())
627       NumSamples /= I->second->getNumNonPseudos();
628     else if (NormalizeByCalls) {
629       uint32_t NumCalls = I->second->getNumCalls();
630       NumSamples /= NumCalls + 1;
631     }
632     I->second->setExecutionCount(NumSamples);
633     if (I->second->isEntryPoint())
634       TotalEntryCount += NumSamples;
635     LastOffset = CurOffset;
636   }
637 
638   BF.ExecutionCount = TotalEntryCount;
639 
640   estimateEdgeCounts(BF);
641 }
642 
643 void DataReader::convertBranchData(BinaryFunction &BF) const {
644   BinaryContext &BC = BF.getBinaryContext();
645 
646   if (BF.empty())
647     return;
648 
649   FuncBranchData *FBD = getBranchData(BF);
650   if (!FBD)
651     return;
652 
653   // Profile information for calls.
654   //
655   // There are 3 cases that we annotate differently:
656   //   1) Conditional tail calls that could be mispredicted.
657   //   2) Indirect calls to multiple destinations with mispredictions.
658   //      Before we validate CFG we have to handle indirect branches here too.
659   //   3) Regular direct calls. The count could be different from containing
660   //      basic block count. Keep this data in case we find it useful.
661   //
662   for (BranchInfo &BI : FBD->Data) {
663     // Ignore internal branches.
664     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
665       continue;
666 
667     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
668     if (!Instr ||
669         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
670       continue;
671 
672     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
673       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) {
674         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
675                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
676                << '\n';
677       }
678       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
679       Value += Count;
680     };
681 
682     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
683       IndirectCallSiteProfile &CSP =
684           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
685               *Instr, "CallProfile");
686       MCSymbol *CalleeSymbol = nullptr;
687       if (BI.To.IsSymbol) {
688         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) {
689           CalleeSymbol = BD->getSymbol();
690         }
691       }
692       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
693     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
694       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
695       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
696     } else {
697       setOrUpdateAnnotation("Count", BI.Branches);
698     }
699   }
700 }
701 
702 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
703                               uint64_t Count, uint64_t Mispreds) const {
704   BinaryContext &BC = BF.getBinaryContext();
705 
706   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
707   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
708 
709   if (!FromBB || !ToBB) {
710     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
711     return false;
712   }
713 
714   // Could be bad LBR data; ignore the branch. In the case of data collected
715   // in binaries optimized by BOLT, a source BB may be mapped to two output
716   // BBs as a result of optimizations. In that case, a branch between these
717   // two will be recorded as a branch from A going to A in the source address
718   // space. Keep processing.
719   if (From == To) {
720     return true;
721   }
722 
723   if (FromBB->succ_size() == 0) {
724     // Return from a tail call.
725     return true;
726   }
727 
728   // Very rarely we will see ignored branches. Do a linear check.
729   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
730     if (Branch ==
731         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
732       return true;
733   }
734 
735   if (To != ToBB->getOffset()) {
736     // "To" could be referring to nop instructions in between 2 basic blocks.
737     // While building the CFG we make sure these nops are attributed to the
738     // previous basic block, thus we check if the destination belongs to the
739     // gap past the last instruction.
740     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
741     if (LastInstr) {
742       const uint32_t LastInstrOffset =
743           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Offset");
744 
745       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
746       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) {
747         return true;
748       }
749 
750       if (To <= LastInstrOffset) {
751         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
752                           << " in " << BF << " : " << From << " -> " << To
753                           << '\n');
754         return false;
755       }
756     }
757 
758     // The real destination is the layout successor of the detected ToBB.
759     if (ToBB == BF.BasicBlocksLayout.back())
760       return false;
761     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
762     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
763     ToBB = NextBB;
764   }
765 
766   // If there's no corresponding instruction for 'From', we have probably
767   // discarded it as a FT from __builtin_unreachable.
768   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
769   if (!FromInstruction) {
770     // If the data was collected in a bolted binary, the From addresses may be
771     // translated to the first instruction of the source BB if BOLT inserted
772     // a new branch that did not exist in the source (we can't map it to the
773     // source instruction, so we map it to the first instr of source BB).
774     // We do not keep offsets for random instructions. So the check above will
775     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
776     if (collectedInBoltedBinary()) {
777       if (FromBB->getInputOffset() != From) {
778         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
779                           << BF << '\n');
780         return false;
781       }
782       FromInstruction = nullptr;
783     } else {
784       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
785                         << '\n');
786       return false;
787     }
788   }
789 
790   if (!FromBB->getSuccessor(ToBB->getLabel())) {
791     // Check if this is a recursive call or a return from a recursive call.
792     if (FromInstruction && ToBB->isEntryPoint() &&
793         (BC.MIB->isCall(*FromInstruction) ||
794          BC.MIB->isIndirectBranch(*FromInstruction))) {
795       // Execution count is already accounted for.
796       return true;
797     }
798     // For data collected in a bolted binary, we may have created two output BBs
799     // that map to one original block. Branches between these two blocks will
800     // appear here as one BB jumping to itself, even though it has no loop
801     // edges. Ignore these.
802     if (collectedInBoltedBinary() && FromBB == ToBB)
803       return true;
804 
805     LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
806                       << Twine::utohexstr(From) << " -> "
807                       << Twine::utohexstr(To) << '\n');
808     return false;
809   }
810 
811   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
812   BI.Count += Count;
813   // Only update mispredicted count if it the count was real.
814   if (Count) {
815     BI.MispredictedCount += Mispreds;
816   }
817 
818   return true;
819 }
820 
821 void DataReader::reportError(StringRef ErrorMsg) {
822   Diag << "Error reading BOLT data input file: line " << Line << ", column "
823        << Col << ": " << ErrorMsg << '\n';
824 }
825 
826 bool DataReader::expectAndConsumeFS() {
827   if (ParsingBuf[0] != FieldSeparator) {
828     reportError("expected field separator");
829     return false;
830   }
831   ParsingBuf = ParsingBuf.drop_front(1);
832   Col += 1;
833   return true;
834 }
835 
836 void DataReader::consumeAllRemainingFS() {
837   while (ParsingBuf[0] == FieldSeparator) {
838     ParsingBuf = ParsingBuf.drop_front(1);
839     Col += 1;
840   }
841 }
842 
843 bool DataReader::checkAndConsumeNewLine() {
844   if (ParsingBuf[0] != '\n')
845     return false;
846 
847   ParsingBuf = ParsingBuf.drop_front(1);
848   Col = 0;
849   Line += 1;
850   return true;
851 }
852 
853 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
854   if (EndChar == '\\') {
855     reportError("EndChar could not be backslash");
856     return make_error_code(llvm::errc::io_error);
857   }
858 
859   std::string EndChars(1, EndChar);
860   EndChars.push_back('\\');
861   if (EndNl)
862     EndChars.push_back('\n');
863 
864   size_t StringEnd = 0;
865   do {
866     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
867     if (StringEnd == StringRef::npos ||
868         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
869       reportError("malformed field");
870       return make_error_code(llvm::errc::io_error);
871     }
872 
873     if (ParsingBuf[StringEnd] != '\\')
874       break;
875 
876     StringEnd += 2;
877   } while (1);
878 
879   StringRef Str = ParsingBuf.substr(0, StringEnd);
880 
881   // If EndNl was set and nl was found instead of EndChar, do not consume the
882   // new line.
883   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
884   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
885 
886   ParsingBuf = ParsingBuf.drop_front(End);
887   if (EndChar == '\n') {
888     Col = 0;
889     Line += 1;
890   } else {
891     Col += End;
892   }
893   return Str;
894 }
895 
896 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
897   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
898   if (std::error_code EC = NumStrRes.getError())
899     return EC;
900   StringRef NumStr = NumStrRes.get();
901   int64_t Num;
902   if (NumStr.getAsInteger(10, Num)) {
903     reportError("expected decimal number");
904     Diag << "Found: " << NumStr << "\n";
905     return make_error_code(llvm::errc::io_error);
906   }
907   return Num;
908 }
909 
910 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
911   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
912   if (std::error_code EC = NumStrRes.getError())
913     return EC;
914   StringRef NumStr = NumStrRes.get();
915   uint64_t Num;
916   if (NumStr.getAsInteger(16, Num)) {
917     reportError("expected hexidecimal number");
918     Diag << "Found: " << NumStr << "\n";
919     return make_error_code(llvm::errc::io_error);
920   }
921   return Num;
922 }
923 
924 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
925                                             bool ExpectMemLoc) {
926   // Read whether the location of the branch should be DSO or a symbol
927   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
928   // symbol.
929   // The symbol flag is also used to tag memory load events by adding 3 to the
930   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
931   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
932       ParsingBuf[0] != '2') {
933     reportError("expected 0, 1 or 2");
934     return make_error_code(llvm::errc::io_error);
935   }
936 
937   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
938       ParsingBuf[0] != '5') {
939     reportError("expected 3, 4 or 5");
940     return make_error_code(llvm::errc::io_error);
941   }
942 
943   bool IsSymbol =
944       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
945       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
946   ParsingBuf = ParsingBuf.drop_front(1);
947   Col += 1;
948 
949   if (!expectAndConsumeFS())
950     return make_error_code(llvm::errc::io_error);
951   consumeAllRemainingFS();
952 
953   // Read the string containing the symbol or the DSO name
954   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
955   if (std::error_code EC = NameRes.getError())
956     return EC;
957   StringRef Name = NameRes.get();
958   consumeAllRemainingFS();
959 
960   // Read the offset
961   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
962   if (std::error_code EC = Offset.getError())
963     return EC;
964 
965   return Location(IsSymbol, Name, Offset.get());
966 }
967 
968 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
969   ErrorOr<Location> Res = parseLocation(FieldSeparator);
970   if (std::error_code EC = Res.getError())
971     return EC;
972   Location From = Res.get();
973 
974   consumeAllRemainingFS();
975   Res = parseLocation(FieldSeparator);
976   if (std::error_code EC = Res.getError())
977     return EC;
978   Location To = Res.get();
979 
980   consumeAllRemainingFS();
981   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
982   if (std::error_code EC = MRes.getError())
983     return EC;
984   int64_t NumMispreds = MRes.get();
985 
986   consumeAllRemainingFS();
987   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
988   if (std::error_code EC = BRes.getError())
989     return EC;
990   int64_t NumBranches = BRes.get();
991 
992   consumeAllRemainingFS();
993   if (!checkAndConsumeNewLine()) {
994     reportError("expected end of line");
995     return make_error_code(llvm::errc::io_error);
996   }
997 
998   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
999 }
1000 
1001 ErrorOr<MemInfo> DataReader::parseMemInfo() {
1002   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
1003   if (std::error_code EC = Res.getError())
1004     return EC;
1005   Location Offset = Res.get();
1006 
1007   consumeAllRemainingFS();
1008   Res = parseMemLocation(FieldSeparator);
1009   if (std::error_code EC = Res.getError())
1010     return EC;
1011   Location Addr = Res.get();
1012 
1013   consumeAllRemainingFS();
1014   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1015   if (std::error_code EC = CountRes.getError())
1016     return EC;
1017 
1018   consumeAllRemainingFS();
1019   if (!checkAndConsumeNewLine()) {
1020     reportError("expected end of line");
1021     return make_error_code(llvm::errc::io_error);
1022   }
1023 
1024   return MemInfo(Offset, Addr, CountRes.get());
1025 }
1026 
1027 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1028   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1029   if (std::error_code EC = Res.getError())
1030     return EC;
1031   Location Address = Res.get();
1032 
1033   consumeAllRemainingFS();
1034   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1035   if (std::error_code EC = BRes.getError())
1036     return EC;
1037   int64_t Occurrences = BRes.get();
1038 
1039   consumeAllRemainingFS();
1040   if (!checkAndConsumeNewLine()) {
1041     reportError("expected end of line");
1042     return make_error_code(llvm::errc::io_error);
1043   }
1044 
1045   return SampleInfo(std::move(Address), Occurrences);
1046 }
1047 
1048 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1049   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1050     return false;
1051   ParsingBuf = ParsingBuf.drop_front(6);
1052   Col += 6;
1053 
1054   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1055     ParsingBuf = ParsingBuf.drop_front(1);
1056 
1057   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1058     ErrorOr<StringRef> EventName = parseString(' ', true);
1059     if (!EventName)
1060       return make_error_code(llvm::errc::io_error);
1061     EventNames.insert(EventName.get());
1062   }
1063 
1064   if (!checkAndConsumeNewLine()) {
1065     reportError("malformed no_lbr line");
1066     return make_error_code(llvm::errc::io_error);
1067   }
1068   return true;
1069 }
1070 
1071 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1072   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1073     return false;
1074   ParsingBuf = ParsingBuf.drop_front(16);
1075   Col += 16;
1076 
1077   if (!checkAndConsumeNewLine()) {
1078     reportError("malformed boltedcollection line");
1079     return make_error_code(llvm::errc::io_error);
1080   }
1081   return true;
1082 }
1083 
1084 bool DataReader::hasBranchData() {
1085   if (ParsingBuf.size() == 0)
1086     return false;
1087 
1088   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1089     return true;
1090   return false;
1091 }
1092 
1093 bool DataReader::hasMemData() {
1094   if (ParsingBuf.size() == 0)
1095     return false;
1096 
1097   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1098     return true;
1099   return false;
1100 }
1101 
1102 std::error_code DataReader::parseInNoLBRMode() {
1103   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1104     auto I = NamesToSamples.find(Name);
1105     if (I == NamesToSamples.end()) {
1106       bool Success;
1107       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1108           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1109 
1110       assert(Success && "unexpected result of insert");
1111     }
1112     return I;
1113   };
1114 
1115   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1116     auto I = NamesToMemEvents.find(Name);
1117     if (I == NamesToMemEvents.end()) {
1118       bool Success;
1119       std::tie(I, Success) = NamesToMemEvents.insert(
1120           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1121       assert(Success && "unexpected result of insert");
1122     }
1123     return I;
1124   };
1125 
1126   while (hasBranchData()) {
1127     ErrorOr<SampleInfo> Res = parseSampleInfo();
1128     if (std::error_code EC = Res.getError())
1129       return EC;
1130 
1131     SampleInfo SI = Res.get();
1132 
1133     // Ignore samples not involving known locations
1134     if (!SI.Loc.IsSymbol)
1135       continue;
1136 
1137     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1138     I->getValue().Data.emplace_back(std::move(SI));
1139   }
1140 
1141   while (hasMemData()) {
1142     ErrorOr<MemInfo> Res = parseMemInfo();
1143     if (std::error_code EC = Res.getError())
1144       return EC;
1145 
1146     MemInfo MI = Res.get();
1147 
1148     // Ignore memory events not involving known pc.
1149     if (!MI.Offset.IsSymbol)
1150       continue;
1151 
1152     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1153     I->getValue().Data.emplace_back(std::move(MI));
1154   }
1155 
1156   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples) {
1157     std::stable_sort(FuncSamples.second.Data.begin(),
1158                      FuncSamples.second.Data.end());
1159   }
1160 
1161   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1162     std::stable_sort(MemEvents.second.Data.begin(),
1163                      MemEvents.second.Data.end());
1164   }
1165 
1166   return std::error_code();
1167 }
1168 
1169 std::error_code DataReader::parse() {
1170   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1171     auto I = NamesToBranches.find(Name);
1172     if (I == NamesToBranches.end()) {
1173       bool Success;
1174       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1175           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1176                                FuncBranchData::ContainerTy())));
1177       assert(Success && "unexpected result of insert");
1178     }
1179     return I;
1180   };
1181 
1182   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1183     auto I = NamesToMemEvents.find(Name);
1184     if (I == NamesToMemEvents.end()) {
1185       bool Success;
1186       std::tie(I, Success) = NamesToMemEvents.insert(
1187           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1188       assert(Success && "unexpected result of insert");
1189     }
1190     return I;
1191   };
1192 
1193   Col = 0;
1194   Line = 1;
1195   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1196   if (!FlagOrErr)
1197     return FlagOrErr.getError();
1198   NoLBRMode = *FlagOrErr;
1199 
1200   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1201   if (!BATFlagOrErr)
1202     return BATFlagOrErr.getError();
1203   BATMode = *BATFlagOrErr;
1204 
1205   if (!hasBranchData() && !hasMemData()) {
1206     Diag << "ERROR: no valid profile data found\n";
1207     return make_error_code(llvm::errc::io_error);
1208   }
1209 
1210   if (NoLBRMode)
1211     return parseInNoLBRMode();
1212 
1213   while (hasBranchData()) {
1214     ErrorOr<BranchInfo> Res = parseBranchInfo();
1215     if (std::error_code EC = Res.getError())
1216       return EC;
1217 
1218     BranchInfo BI = Res.get();
1219 
1220     // Ignore branches not involving known location.
1221     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1222       continue;
1223 
1224     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1225     I->getValue().Data.emplace_back(std::move(BI));
1226 
1227     // Add entry data for branches to another function or branches
1228     // to entry points (including recursive calls)
1229     if (BI.To.IsSymbol &&
1230         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1231       I = GetOrCreateFuncEntry(BI.To.Name);
1232       I->getValue().EntryData.emplace_back(std::move(BI));
1233     }
1234 
1235     // If destination is the function start - update execution count.
1236     // NB: the data is skewed since we cannot tell tail recursion from
1237     //     branches to the function start.
1238     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1239       I = GetOrCreateFuncEntry(BI.To.Name);
1240       I->getValue().ExecutionCount += BI.Branches;
1241     }
1242   }
1243 
1244   while (hasMemData()) {
1245     ErrorOr<MemInfo> Res = parseMemInfo();
1246     if (std::error_code EC = Res.getError())
1247       return EC;
1248 
1249     MemInfo MI = Res.get();
1250 
1251     // Ignore memory events not involving known pc.
1252     if (!MI.Offset.IsSymbol)
1253       continue;
1254 
1255     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1256     I->getValue().Data.emplace_back(std::move(MI));
1257   }
1258 
1259   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches) {
1260     std::stable_sort(FuncBranches.second.Data.begin(),
1261                      FuncBranches.second.Data.end());
1262   }
1263 
1264   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1265     std::stable_sort(MemEvents.second.Data.begin(),
1266                      MemEvents.second.Data.end());
1267   }
1268 
1269   return std::error_code();
1270 }
1271 
1272 void DataReader::buildLTONameMaps() {
1273   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1274     const StringRef FuncName = FuncData.getKey();
1275     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1276     if (CommonName)
1277       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1278   }
1279 
1280   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1281     const StringRef FuncName = FuncData.getKey();
1282     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1283     if (CommonName)
1284       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1285   }
1286 }
1287 
1288 namespace {
1289 template <typename MapTy>
1290 decltype(MapTy::MapEntryTy::second) *
1291 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1292   // Do a reverse order iteration since the name in profile has a higher chance
1293   // of matching a name at the end of the list.
1294   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1295     auto I = Map.find(normalizeName((*SI)->getName()));
1296     if (I != Map.end())
1297       return &I->getValue();
1298   }
1299   return nullptr;
1300 }
1301 
1302 template <typename MapTy>
1303 decltype(MapTy::MapEntryTy::second) *
1304 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1305   // Do a reverse order iteration since the name in profile has a higher chance
1306   // of matching a name at the end of the list.
1307   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1308     auto I = Map.find(normalizeName(*FI));
1309     if (I != Map.end())
1310       return &I->getValue();
1311   }
1312   return nullptr;
1313 }
1314 
1315 template <typename MapTy>
1316 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1317     MapTy &Map,
1318     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1319         &LTOCommonNameMap,
1320     const std::vector<StringRef> &FuncNames) {
1321   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1322   // Do a reverse order iteration since the name in profile has a higher chance
1323   // of matching a name at the end of the list.
1324   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1325     std::string Name = normalizeName(*FI);
1326     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1327     if (LTOCommonName) {
1328       auto I = LTOCommonNameMap.find(*LTOCommonName);
1329       if (I != LTOCommonNameMap.end()) {
1330         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1331             I->getValue();
1332         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1333       }
1334     } else {
1335       auto I = Map.find(Name);
1336       if (I != Map.end()) {
1337         return {&I->getValue()};
1338       }
1339     }
1340   }
1341   return AllData;
1342 }
1343 
1344 }
1345 
1346 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1347   if (getBranchData(Function) || getMemData(Function))
1348     return true;
1349 
1350   if (getBranchDataForNames(Function.getNames()) ||
1351       getMemDataForNames(Function.getNames()))
1352     return true;
1353 
1354   if (!hasVolatileName(Function))
1355     return false;
1356 
1357   const std::vector<FuncBranchData *> AllBranchData =
1358       getBranchDataForNamesRegex(Function.getNames());
1359   if (!AllBranchData.empty())
1360     return true;
1361 
1362   const std::vector<FuncMemData *> AllMemData =
1363       getMemDataForNamesRegex(Function.getNames());
1364   if (!AllMemData.empty())
1365     return true;
1366 
1367   return false;
1368 }
1369 
1370 FuncBranchData *
1371 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1372   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1373 }
1374 
1375 FuncBranchData *
1376 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1377   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1378 }
1379 
1380 FuncMemData *
1381 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1382   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1383 }
1384 
1385 FuncSampleData *
1386 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1387   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1388 }
1389 
1390 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1391     const std::vector<StringRef> &FuncNames) {
1392   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1393 }
1394 
1395 std::vector<FuncMemData *>
1396 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1397   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1398 }
1399 
1400 bool DataReader::hasLocalsWithFileName() const {
1401   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1402     const StringRef &FuncName = Func.getKey();
1403     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1404       return true;
1405   }
1406   return false;
1407 }
1408 
1409 void DataReader::dump() const {
1410   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1411     Diag << Func.getKey() << " branches:\n";
1412     for (const BranchInfo &BI : Func.getValue().Data) {
1413       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1414            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1415     }
1416     Diag << Func.getKey() << " entry points:\n";
1417     for (const BranchInfo &BI : Func.getValue().EntryData) {
1418       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1419            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1420     }
1421   }
1422 
1423   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1424     StringRef Event = I->getKey();
1425     Diag << "Data was collected with event: " << Event << "\n";
1426   }
1427   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1428     Diag << Func.getKey() << " samples:\n";
1429     for (const SampleInfo &SI : Func.getValue().Data) {
1430       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1431     }
1432   }
1433 
1434   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1435     Diag << "Memory events for " << Func.getValue().Name;
1436     Location LastOffset(0);
1437     for (const MemInfo &MI : Func.getValue().Data) {
1438       if (MI.Offset == LastOffset) {
1439         Diag << ", " << MI.Addr << "/" << MI.Count;
1440       } else {
1441         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1442       }
1443       LastOffset = MI.Offset;
1444     }
1445     Diag << "\n";
1446   }
1447 }
1448 
1449 } // namespace bolt
1450 } // namespace llvm
1451