1 //===-- DataReader.cpp - Perf data reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <map>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 Optional<StringRef> getLTOCommonName(const StringRef Name) {
44   size_t LTOSuffixPos = Name.find(".lto_priv.");
45   if (LTOSuffixPos != StringRef::npos) {
46     return Name.substr(0, LTOSuffixPos + 10);
47   } else if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos) {
48     return Name.substr(0, LTOSuffixPos + 11);
49   } else {
50     return NoneType();
51   }
52 }
53 
54 namespace {
55 
56 /// Return true if the function name can change across compilations.
57 bool hasVolatileName(const BinaryFunction &BF) {
58   for (const StringRef Name : BF.getNames()) {
59     if (getLTOCommonName(Name))
60       return true;
61   }
62   return false;
63 }
64 
65 /// Return standard escaped name of the function possibly renamed by BOLT.
66 std::string normalizeName(StringRef NameRef) {
67   // Strip "PG." prefix used for globalized locals.
68   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
69   return getEscapedName(NameRef);
70 }
71 
72 } // anonymous namespace
73 
74 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
75   if (Loc.IsSymbol) {
76     OS << Loc.Name;
77     if (Loc.Offset)
78       OS << "+" << Twine::utohexstr(Loc.Offset);
79   } else {
80     OS << Twine::utohexstr(Loc.Offset);
81   }
82   return OS;
83 }
84 
85 iterator_range<FuncBranchData::ContainerTy::const_iterator>
86 FuncBranchData::getBranchRange(uint64_t From) const {
87   assert(std::is_sorted(Data.begin(), Data.end()));
88   struct Compare {
89     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
90       return BI.From.Offset < Val;
91     }
92     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
93       return Val < BI.From.Offset;
94     }
95   };
96   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
97   return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
98 }
99 
100 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
101   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
102   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
103     if (I->From.Name == FBD.Name) {
104       I->From.Name = this->Name;
105       I->From.Offset += Offset;
106     }
107     if (I->To.Name == FBD.Name) {
108       I->To.Name = this->Name;
109       I->To.Offset += Offset;
110     }
111   }
112   std::stable_sort(Data.begin(), Data.end());
113   ExecutionCount += FBD.ExecutionCount;
114   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
115     assert(I->To.Name == FBD.Name);
116     auto NewElmt = EntryData.insert(EntryData.end(), *I);
117     NewElmt->To.Name = this->Name;
118     NewElmt->To.Offset += Offset;
119   }
120 }
121 
122 uint64_t FuncBranchData::getNumExecutedBranches() const {
123   uint64_t ExecutedBranches = 0;
124   for (const BranchInfo &BI : Data) {
125     int64_t BranchCount = BI.Branches;
126     assert(BranchCount >= 0 && "branch execution count should not be negative");
127     ExecutedBranches += BranchCount;
128   }
129   return ExecutedBranches;
130 }
131 
132 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
133 
134 void SampleInfo::print(raw_ostream &OS) const {
135   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
136      << " " << Hits << "\n";
137 }
138 
139 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
140   assert(std::is_sorted(Data.begin(), Data.end()));
141   struct Compare {
142     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
143       return SI.Loc.Offset < Val;
144     }
145     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
146       return Val < SI.Loc.Offset;
147     }
148   };
149   uint64_t Result = 0;
150   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
151             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
152        I != E; ++I) {
153     Result += I->Hits;
154   }
155   return Result;
156 }
157 
158 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
159   auto Iter = Index.find(Offset);
160   if (Iter == Index.end()) {
161     Data.emplace_back(Location(true, Name, Offset), Count);
162     Index[Offset] = Data.size() - 1;
163     return;
164   }
165   SampleInfo &SI = Data[Iter->second];
166   SI.Hits += Count;
167 }
168 
169 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
170                                      uint64_t Count, uint64_t Mispreds) {
171   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
172   if (Iter == IntraIndex[OffsetFrom].end()) {
173     Data.emplace_back(Location(true, Name, OffsetFrom),
174                       Location(true, Name, OffsetTo), Mispreds, Count);
175     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
176     return;
177   }
178   BranchInfo &BI = Data[Iter->second];
179   BI.Branches += Count;
180   BI.Mispreds += Mispreds;
181 }
182 
183 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
184                                    uint64_t Count, uint64_t Mispreds) {
185   auto Iter = InterIndex[OffsetFrom].find(To);
186   if (Iter == InterIndex[OffsetFrom].end()) {
187     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
188     InterIndex[OffsetFrom][To] = Data.size() - 1;
189     return;
190   }
191   BranchInfo &BI = Data[Iter->second];
192   BI.Branches += Count;
193   BI.Mispreds += Mispreds;
194 }
195 
196 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
197                                     uint64_t Count, uint64_t Mispreds) {
198   auto Iter = EntryIndex[OffsetTo].find(From);
199   if (Iter == EntryIndex[OffsetTo].end()) {
200     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
201                            Count);
202     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
203     return;
204   }
205   BranchInfo &BI = EntryData[Iter->second];
206   BI.Branches += Count;
207   BI.Mispreds += Mispreds;
208 }
209 
210 void BranchInfo::mergeWith(const BranchInfo &BI) {
211   Branches += BI.Branches;
212   Mispreds += BI.Mispreds;
213 }
214 
215 void BranchInfo::print(raw_ostream &OS) const {
216   OS << From.IsSymbol << " " << From.Name << " "
217      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
218      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
219      << '\n';
220 }
221 
222 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
223                                                       uint64_t To) const {
224   for (const BranchInfo &I : Data) {
225     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
226       return I;
227   }
228   return make_error_code(llvm::errc::invalid_argument);
229 }
230 
231 ErrorOr<const BranchInfo &>
232 FuncBranchData::getDirectCallBranch(uint64_t From) const {
233   // Commented out because it can be expensive.
234   // assert(std::is_sorted(Data.begin(), Data.end()));
235   struct Compare {
236     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
237       return BI.From.Offset < Val;
238     }
239     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
240       return Val < BI.From.Offset;
241     }
242   };
243   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
244   for (auto I = Range.first; I != Range.second; ++I) {
245     if (I->From.Name != I->To.Name)
246       return *I;
247   }
248   return make_error_code(llvm::errc::invalid_argument);
249 }
250 
251 void MemInfo::print(raw_ostream &OS) const {
252   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
253      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
254      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
255      << "\n";
256 }
257 
258 void MemInfo::prettyPrint(raw_ostream &OS) const {
259   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
260 }
261 
262 void FuncMemData::update(const Location &Offset, const Location &Addr) {
263   auto Iter = EventIndex[Offset.Offset].find(Addr);
264   if (Iter == EventIndex[Offset.Offset].end()) {
265     Data.emplace_back(MemInfo(Offset, Addr, 1));
266     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
267     return;
268   }
269   ++Data[Iter->second].Count;
270 }
271 
272 Error DataReader::preprocessProfile(BinaryContext &BC) {
273   if (std::error_code EC = parseInput()) {
274     return errorCodeToError(EC);
275   }
276 
277   if (opts::DumpData) {
278     dump();
279   }
280 
281   if (collectedInBoltedBinary()) {
282     outs() << "BOLT-INFO: profile collection done on a binary already "
283               "processed by BOLT\n";
284   }
285 
286   for (auto &BFI : BC.getBinaryFunctions()) {
287     BinaryFunction &Function = BFI.second;
288     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
289       setMemData(Function, MemData);
290       MemData->Used = true;
291     }
292     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
293       setBranchData(Function, FuncData);
294       Function.ExecutionCount = FuncData->ExecutionCount;
295       FuncData->Used = true;
296     }
297   }
298 
299   for (auto &BFI : BC.getBinaryFunctions()) {
300     BinaryFunction &Function = BFI.second;
301     matchProfileMemData(Function);
302   }
303 
304   return Error::success();
305 }
306 
307 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
308   for (auto &BFI : BC.getBinaryFunctions()) {
309     BinaryFunction &Function = BFI.second;
310     FuncMemData *MemoryData = getMemData(Function);
311     if (!MemoryData)
312       continue;
313 
314     for (MemInfo &MI : MemoryData->Data) {
315       const uint64_t Offset = MI.Offset.Offset;
316       auto II = Function.Instructions.find(Offset);
317       if (II == Function.Instructions.end()) {
318         // Ignore bad instruction address.
319         continue;
320       }
321 
322       auto &MemAccessProfile =
323           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
324               II->second, "MemoryAccessProfile");
325       BinaryData *BD = nullptr;
326       if (MI.Addr.IsSymbol) {
327         BD = BC.getBinaryDataByName(MI.Addr.Name);
328       }
329       MemAccessProfile.AddressAccessInfo.push_back(
330           {BD, MI.Addr.Offset, MI.Count});
331       auto NextII = std::next(II);
332       if (NextII == Function.Instructions.end()) {
333         MemAccessProfile.NextInstrOffset = Function.getSize();
334       } else {
335         MemAccessProfile.NextInstrOffset = II->first;
336       }
337     }
338     Function.HasMemoryProfile = true;
339   }
340 
341   return Error::success();
342 }
343 
344 Error DataReader::readProfile(BinaryContext &BC) {
345   for (auto &BFI : BC.getBinaryFunctions()) {
346     BinaryFunction &Function = BFI.second;
347     readProfile(Function);
348   }
349 
350   uint64_t NumUnused = 0;
351   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
352     if (!FuncData.getValue().Used)
353       ++NumUnused;
354   BC.setNumUnusedProfiledObjects(NumUnused);
355 
356   return Error::success();
357 }
358 
359 std::error_code DataReader::parseInput() {
360   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
361       MemoryBuffer::getFileOrSTDIN(Filename);
362   if (std::error_code EC = MB.getError()) {
363     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
364     return EC;
365   }
366   FileBuf = std::move(MB.get());
367   ParsingBuf = FileBuf->getBuffer();
368   if (std::error_code EC = parse()) {
369     return EC;
370   }
371   if (!ParsingBuf.empty()) {
372     Diag << "WARNING: invalid profile data detected at line " << Line
373          << ". Possibly corrupted profile.\n";
374   }
375 
376   buildLTONameMaps();
377 
378   return std::error_code();
379 }
380 
381 void DataReader::readProfile(BinaryFunction &BF) {
382   if (BF.empty())
383     return;
384 
385   if (!hasLBR()) {
386     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
387     readSampleData(BF);
388     return;
389   }
390 
391   BF.ProfileFlags = BinaryFunction::PF_LBR;
392 
393   // Possibly assign/re-assign branch profile data.
394   matchProfileData(BF);
395 
396   FuncBranchData *FBD = getBranchData(BF);
397   if (!FBD)
398     return;
399 
400   // Assign basic block counts to function entry points. These only include
401   // counts for outside entries.
402   //
403   // There is a slight skew introduced here as branches originated from RETs
404   // may be accounted for in the execution count of an entry block if the last
405   // instruction in a predecessor fall-through block is a call. This situation
406   // should rarely happen because there are few multiple-entry functions.
407   for (const BranchInfo &BI : FBD->EntryData) {
408     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
409     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
410       uint64_t Count = BB->getExecutionCount();
411       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
412         Count = 0;
413       BB->setExecutionCount(Count + BI.Branches);
414     }
415   }
416 
417   uint64_t MismatchedBranches = 0;
418   for (const BranchInfo &BI : FBD->Data) {
419     if (BI.From.Name != BI.To.Name) {
420       continue;
421     }
422 
423     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
424                       BI.Mispreds)) {
425       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
426                         << BI.To.Offset << '\n');
427       ++MismatchedBranches;
428     }
429   }
430 
431   // Convert branch data into annotations.
432   convertBranchData(BF);
433 }
434 
435 void DataReader::matchProfileData(BinaryFunction &BF) {
436   // This functionality is available for LBR-mode only
437   // TODO: Implement evaluateProfileData() for samples, checking whether
438   // sample addresses match instruction addresses in the function
439   if (!hasLBR())
440     return;
441 
442   FuncBranchData *FBD = getBranchData(BF);
443   if (FBD) {
444     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
445     BF.RawBranchCount = FBD->getNumExecutedBranches();
446     if (BF.ProfileMatchRatio == 1.0f) {
447       if (fetchProfileForOtherEntryPoints(BF)) {
448         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
449         BF.ExecutionCount = FBD->ExecutionCount;
450         BF.RawBranchCount = FBD->getNumExecutedBranches();
451       }
452       return;
453     }
454   }
455 
456   // Check if the function name can fluctuate between several compilations
457   // possibly triggered by minor unrelated code changes in the source code
458   // of the input binary.
459   if (!hasVolatileName(BF))
460     return;
461 
462   // Check for a profile that matches with 100% confidence.
463   const std::vector<FuncBranchData *> AllBranchData =
464       getBranchDataForNamesRegex(BF.getNames());
465   for (FuncBranchData *NewBranchData : AllBranchData) {
466     // Prevent functions from sharing the same profile.
467     if (NewBranchData->Used)
468       continue;
469 
470     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
471       continue;
472 
473     if (FBD)
474       FBD->Used = false;
475 
476     // Update function profile data with the new set.
477     setBranchData(BF, NewBranchData);
478     NewBranchData->Used = true;
479     BF.ExecutionCount = NewBranchData->ExecutionCount;
480     BF.ProfileMatchRatio = 1.0f;
481     break;
482   }
483 }
484 
485 void DataReader::matchProfileMemData(BinaryFunction &BF) {
486   const std::vector<FuncMemData *> AllMemData =
487       getMemDataForNamesRegex(BF.getNames());
488   for (FuncMemData *NewMemData : AllMemData) {
489     // Prevent functions from sharing the same profile.
490     if (NewMemData->Used)
491       continue;
492 
493     if (FuncMemData *MD = getMemData(BF))
494       MD->Used = false;
495 
496     // Update function profile data with the new set.
497     setMemData(BF, NewMemData);
498     NewMemData->Used = true;
499     break;
500   }
501 }
502 
503 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
504   BinaryContext &BC = BF.getBinaryContext();
505 
506   FuncBranchData *FBD = getBranchData(BF);
507   if (!FBD)
508     return false;
509 
510   // Check if we are missing profiling data for secondary entry points
511   bool First = true;
512   bool Updated = false;
513   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
514     if (First) {
515       First = false;
516       continue;
517     }
518     if (BB->isEntryPoint()) {
519       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
520       // Look for branch data associated with this entry point
521       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
522         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
523           FBD->appendFrom(*Data, BB->getOffset());
524           Data->Used = true;
525           Updated = true;
526         }
527       }
528     }
529   }
530 
531   return Updated;
532 }
533 
534 float DataReader::evaluateProfileData(BinaryFunction &BF,
535                                       const FuncBranchData &BranchData) const {
536   BinaryContext &BC = BF.getBinaryContext();
537 
538   // Until we define a minimal profile, we consider an empty branch data to be
539   // a valid profile. It could happen to a function without branches when we
540   // still have an EntryData for the execution count.
541   if (BranchData.Data.empty()) {
542     return 1.0f;
543   }
544 
545   uint64_t NumMatchedBranches = 0;
546   for (const BranchInfo &BI : BranchData.Data) {
547     bool IsValid = false;
548     if (BI.From.Name == BI.To.Name) {
549       // Try to record information with 0 count.
550       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
551     } else if (collectedInBoltedBinary()) {
552       // We can't check branch source for collections in bolted binaries because
553       // the source of the branch may be mapped to the first instruction in a BB
554       // instead of the original branch (which may not exist in the source bin).
555       IsValid = true;
556     } else {
557       // The branch has to originate from this function.
558       // Check for calls, tail calls, rets and indirect branches.
559       // When matching profiling info, we did not reach the stage
560       // when we identify tail calls, so they are still represented
561       // by regular branch instructions and we need isBranch() here.
562       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
563       // If it's a prefix - skip it.
564       if (Instr && BC.MIB->isPrefix(*Instr))
565         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
566       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
567                     BC.MIB->isReturn(*Instr))) {
568         IsValid = true;
569       }
570     }
571 
572     if (IsValid) {
573       ++NumMatchedBranches;
574       continue;
575     }
576 
577     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
578                       << Twine::utohexstr(BI.From.Offset) << " -> ";
579                if (BI.From.Name == BI.To.Name) dbgs()
580                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
581                else dbgs() << "<outbounds>\n";);
582   }
583 
584   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
585   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
586     errs() << "BOLT-WARNING: profile branches match only "
587            << format("%.1f%%", MatchRatio * 100.0f) << " ("
588            << NumMatchedBranches << '/' << BranchData.Data.size()
589            << ") for function " << BF << '\n';
590   }
591 
592   return MatchRatio;
593 }
594 
595 void DataReader::readSampleData(BinaryFunction &BF) {
596   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
597   if (!SampleDataOrErr)
598     return;
599 
600   // Basic samples mode territory (without LBR info)
601   // First step is to assign BB execution count based on samples from perf
602   BF.ProfileMatchRatio = 1.0f;
603   BF.removeTagsFromProfile();
604   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
605   bool NormalizeByCalls = usesEvent("branches");
606   static bool NagUser = true;
607   if (NagUser) {
608     outs()
609         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
610     if (NormalizeByInsnCount) {
611       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
612     } else if (NormalizeByCalls) {
613       outs() << "BOLT-INFO: normalizing samples by branches.\n";
614     }
615     NagUser = false;
616   }
617   uint64_t LastOffset = BF.getSize();
618   uint64_t TotalEntryCount = 0;
619   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
620        I != E; ++I) {
621     uint64_t CurOffset = I->first;
622     // Always work with samples multiplied by 1000 to avoid losing them if we
623     // later need to normalize numbers
624     uint64_t NumSamples =
625         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
626     if (NormalizeByInsnCount && I->second->getNumNonPseudos())
627       NumSamples /= I->second->getNumNonPseudos();
628     else if (NormalizeByCalls) {
629       uint32_t NumCalls = I->second->getNumCalls();
630       NumSamples /= NumCalls + 1;
631     }
632     I->second->setExecutionCount(NumSamples);
633     if (I->second->isEntryPoint())
634       TotalEntryCount += NumSamples;
635     LastOffset = CurOffset;
636   }
637 
638   BF.ExecutionCount = TotalEntryCount;
639 
640   estimateEdgeCounts(BF);
641 }
642 
643 void DataReader::convertBranchData(BinaryFunction &BF) const {
644   BinaryContext &BC = BF.getBinaryContext();
645 
646   if (BF.empty())
647     return;
648 
649   FuncBranchData *FBD = getBranchData(BF);
650   if (!FBD)
651     return;
652 
653   // Profile information for calls.
654   //
655   // There are 3 cases that we annotate differently:
656   //   1) Conditional tail calls that could be mispredicted.
657   //   2) Indirect calls to multiple destinations with mispredictions.
658   //      Before we validate CFG we have to handle indirect branches here too.
659   //   3) Regular direct calls. The count could be different from containing
660   //      basic block count. Keep this data in case we find it useful.
661   //
662   for (BranchInfo &BI : FBD->Data) {
663     // Ignore internal branches.
664     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
665       continue;
666 
667     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
668     if (!Instr ||
669         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
670       continue;
671 
672     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
673       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) {
674         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
675                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
676                << '\n';
677       }
678       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
679       Value += Count;
680     };
681 
682     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
683       IndirectCallSiteProfile &CSP =
684           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
685               *Instr, "CallProfile");
686       MCSymbol *CalleeSymbol = nullptr;
687       if (BI.To.IsSymbol) {
688         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) {
689           CalleeSymbol = BD->getSymbol();
690         }
691       }
692       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
693     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
694       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
695       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
696     } else {
697       setOrUpdateAnnotation("Count", BI.Branches);
698     }
699   }
700 }
701 
702 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
703                               uint64_t Count, uint64_t Mispreds) const {
704   BinaryContext &BC = BF.getBinaryContext();
705 
706   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
707   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
708 
709   if (!FromBB || !ToBB) {
710     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
711     return false;
712   }
713 
714   // Could be bad LBR data; ignore the branch. In the case of data collected
715   // in binaries optimized by BOLT, a source BB may be mapped to two output
716   // BBs as a result of optimizations. In that case, a branch between these
717   // two will be recorded as a branch from A going to A in the source address
718   // space. Keep processing.
719   if (From == To) {
720     return true;
721   }
722 
723   // Return from a tail call.
724   if (FromBB->succ_size() == 0)
725     return true;
726 
727   // Very rarely we will see ignored branches. Do a linear check.
728   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
729     if (Branch ==
730         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
731       return true;
732   }
733 
734   bool OffsetMatches = !!(To == ToBB->getOffset());
735   if (!OffsetMatches) {
736     // Skip the nops to support old .fdata
737     uint64_t Offset = ToBB->getOffset();
738     for (MCInst &Instr : *ToBB) {
739       if (!BC.MIB->isNoop(Instr))
740         break;
741 
742       Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
743     }
744 
745     if (To == Offset)
746       OffsetMatches = true;
747   }
748 
749   if (!OffsetMatches) {
750     // "To" could be referring to nop instructions in between 2 basic blocks.
751     // While building the CFG we make sure these nops are attributed to the
752     // previous basic block, thus we check if the destination belongs to the
753     // gap past the last instruction.
754     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
755     if (LastInstr) {
756       const uint32_t LastInstrOffset =
757           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Offset");
758 
759       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
760       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) {
761         return true;
762       }
763 
764       if (To <= LastInstrOffset) {
765         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
766                           << " in " << BF << " : " << From << " -> " << To
767                           << '\n');
768         return false;
769       }
770     }
771 
772     // The real destination is the layout successor of the detected ToBB.
773     if (ToBB == BF.BasicBlocksLayout.back())
774       return false;
775     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
776     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
777     ToBB = NextBB;
778   }
779 
780   // If there's no corresponding instruction for 'From', we have probably
781   // discarded it as a FT from __builtin_unreachable.
782   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
783   if (!FromInstruction) {
784     // If the data was collected in a bolted binary, the From addresses may be
785     // translated to the first instruction of the source BB if BOLT inserted
786     // a new branch that did not exist in the source (we can't map it to the
787     // source instruction, so we map it to the first instr of source BB).
788     // We do not keep offsets for random instructions. So the check above will
789     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
790     if (collectedInBoltedBinary()) {
791       if (FromBB->getInputOffset() != From) {
792         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
793                           << BF << '\n');
794         return false;
795       }
796       FromInstruction = nullptr;
797     } else {
798       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
799                         << '\n');
800       return false;
801     }
802   }
803 
804   if (!FromBB->getSuccessor(ToBB->getLabel())) {
805     // Check if this is a recursive call or a return from a recursive call.
806     if (FromInstruction && ToBB->isEntryPoint() &&
807         (BC.MIB->isCall(*FromInstruction) ||
808          BC.MIB->isIndirectBranch(*FromInstruction))) {
809       // Execution count is already accounted for.
810       return true;
811     }
812     // For data collected in a bolted binary, we may have created two output BBs
813     // that map to one original block. Branches between these two blocks will
814     // appear here as one BB jumping to itself, even though it has no loop
815     // edges. Ignore these.
816     if (collectedInBoltedBinary() && FromBB == ToBB)
817       return true;
818 
819     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
820     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
821         FTSuccessor->getSuccessor(ToBB->getLabel())) {
822       BinaryBasicBlock::BinaryBranchInfo &FTBI =
823           FTSuccessor->getBranchInfo(*ToBB);
824       FTBI.Count += Count;
825       if (Count)
826         FTBI.MispredictedCount += Mispreds;
827       ToBB = FTSuccessor;
828     } else {
829       LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
830                         << Twine::utohexstr(From) << " -> "
831                         << Twine::utohexstr(To) << '\n');
832       return false;
833     }
834   }
835 
836   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
837   BI.Count += Count;
838   // Only update mispredicted count if it the count was real.
839   if (Count) {
840     BI.MispredictedCount += Mispreds;
841   }
842 
843   return true;
844 }
845 
846 void DataReader::reportError(StringRef ErrorMsg) {
847   Diag << "Error reading BOLT data input file: line " << Line << ", column "
848        << Col << ": " << ErrorMsg << '\n';
849 }
850 
851 bool DataReader::expectAndConsumeFS() {
852   if (ParsingBuf[0] != FieldSeparator) {
853     reportError("expected field separator");
854     return false;
855   }
856   ParsingBuf = ParsingBuf.drop_front(1);
857   Col += 1;
858   return true;
859 }
860 
861 void DataReader::consumeAllRemainingFS() {
862   while (ParsingBuf[0] == FieldSeparator) {
863     ParsingBuf = ParsingBuf.drop_front(1);
864     Col += 1;
865   }
866 }
867 
868 bool DataReader::checkAndConsumeNewLine() {
869   if (ParsingBuf[0] != '\n')
870     return false;
871 
872   ParsingBuf = ParsingBuf.drop_front(1);
873   Col = 0;
874   Line += 1;
875   return true;
876 }
877 
878 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
879   if (EndChar == '\\') {
880     reportError("EndChar could not be backslash");
881     return make_error_code(llvm::errc::io_error);
882   }
883 
884   std::string EndChars(1, EndChar);
885   EndChars.push_back('\\');
886   if (EndNl)
887     EndChars.push_back('\n');
888 
889   size_t StringEnd = 0;
890   do {
891     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
892     if (StringEnd == StringRef::npos ||
893         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
894       reportError("malformed field");
895       return make_error_code(llvm::errc::io_error);
896     }
897 
898     if (ParsingBuf[StringEnd] != '\\')
899       break;
900 
901     StringEnd += 2;
902   } while (1);
903 
904   StringRef Str = ParsingBuf.substr(0, StringEnd);
905 
906   // If EndNl was set and nl was found instead of EndChar, do not consume the
907   // new line.
908   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
909   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
910 
911   ParsingBuf = ParsingBuf.drop_front(End);
912   if (EndChar == '\n') {
913     Col = 0;
914     Line += 1;
915   } else {
916     Col += End;
917   }
918   return Str;
919 }
920 
921 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
922   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
923   if (std::error_code EC = NumStrRes.getError())
924     return EC;
925   StringRef NumStr = NumStrRes.get();
926   int64_t Num;
927   if (NumStr.getAsInteger(10, Num)) {
928     reportError("expected decimal number");
929     Diag << "Found: " << NumStr << "\n";
930     return make_error_code(llvm::errc::io_error);
931   }
932   return Num;
933 }
934 
935 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
936   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
937   if (std::error_code EC = NumStrRes.getError())
938     return EC;
939   StringRef NumStr = NumStrRes.get();
940   uint64_t Num;
941   if (NumStr.getAsInteger(16, Num)) {
942     reportError("expected hexidecimal number");
943     Diag << "Found: " << NumStr << "\n";
944     return make_error_code(llvm::errc::io_error);
945   }
946   return Num;
947 }
948 
949 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
950                                             bool ExpectMemLoc) {
951   // Read whether the location of the branch should be DSO or a symbol
952   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
953   // symbol.
954   // The symbol flag is also used to tag memory load events by adding 3 to the
955   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
956   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
957       ParsingBuf[0] != '2') {
958     reportError("expected 0, 1 or 2");
959     return make_error_code(llvm::errc::io_error);
960   }
961 
962   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
963       ParsingBuf[0] != '5') {
964     reportError("expected 3, 4 or 5");
965     return make_error_code(llvm::errc::io_error);
966   }
967 
968   bool IsSymbol =
969       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
970       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
971   ParsingBuf = ParsingBuf.drop_front(1);
972   Col += 1;
973 
974   if (!expectAndConsumeFS())
975     return make_error_code(llvm::errc::io_error);
976   consumeAllRemainingFS();
977 
978   // Read the string containing the symbol or the DSO name
979   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
980   if (std::error_code EC = NameRes.getError())
981     return EC;
982   StringRef Name = NameRes.get();
983   consumeAllRemainingFS();
984 
985   // Read the offset
986   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
987   if (std::error_code EC = Offset.getError())
988     return EC;
989 
990   return Location(IsSymbol, Name, Offset.get());
991 }
992 
993 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
994   ErrorOr<Location> Res = parseLocation(FieldSeparator);
995   if (std::error_code EC = Res.getError())
996     return EC;
997   Location From = Res.get();
998 
999   consumeAllRemainingFS();
1000   Res = parseLocation(FieldSeparator);
1001   if (std::error_code EC = Res.getError())
1002     return EC;
1003   Location To = Res.get();
1004 
1005   consumeAllRemainingFS();
1006   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
1007   if (std::error_code EC = MRes.getError())
1008     return EC;
1009   int64_t NumMispreds = MRes.get();
1010 
1011   consumeAllRemainingFS();
1012   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1013   if (std::error_code EC = BRes.getError())
1014     return EC;
1015   int64_t NumBranches = BRes.get();
1016 
1017   consumeAllRemainingFS();
1018   if (!checkAndConsumeNewLine()) {
1019     reportError("expected end of line");
1020     return make_error_code(llvm::errc::io_error);
1021   }
1022 
1023   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
1024 }
1025 
1026 ErrorOr<MemInfo> DataReader::parseMemInfo() {
1027   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
1028   if (std::error_code EC = Res.getError())
1029     return EC;
1030   Location Offset = Res.get();
1031 
1032   consumeAllRemainingFS();
1033   Res = parseMemLocation(FieldSeparator);
1034   if (std::error_code EC = Res.getError())
1035     return EC;
1036   Location Addr = Res.get();
1037 
1038   consumeAllRemainingFS();
1039   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1040   if (std::error_code EC = CountRes.getError())
1041     return EC;
1042 
1043   consumeAllRemainingFS();
1044   if (!checkAndConsumeNewLine()) {
1045     reportError("expected end of line");
1046     return make_error_code(llvm::errc::io_error);
1047   }
1048 
1049   return MemInfo(Offset, Addr, CountRes.get());
1050 }
1051 
1052 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1053   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1054   if (std::error_code EC = Res.getError())
1055     return EC;
1056   Location Address = Res.get();
1057 
1058   consumeAllRemainingFS();
1059   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1060   if (std::error_code EC = BRes.getError())
1061     return EC;
1062   int64_t Occurrences = BRes.get();
1063 
1064   consumeAllRemainingFS();
1065   if (!checkAndConsumeNewLine()) {
1066     reportError("expected end of line");
1067     return make_error_code(llvm::errc::io_error);
1068   }
1069 
1070   return SampleInfo(std::move(Address), Occurrences);
1071 }
1072 
1073 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1074   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1075     return false;
1076   ParsingBuf = ParsingBuf.drop_front(6);
1077   Col += 6;
1078 
1079   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1080     ParsingBuf = ParsingBuf.drop_front(1);
1081 
1082   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1083     ErrorOr<StringRef> EventName = parseString(' ', true);
1084     if (!EventName)
1085       return make_error_code(llvm::errc::io_error);
1086     EventNames.insert(EventName.get());
1087   }
1088 
1089   if (!checkAndConsumeNewLine()) {
1090     reportError("malformed no_lbr line");
1091     return make_error_code(llvm::errc::io_error);
1092   }
1093   return true;
1094 }
1095 
1096 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1097   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1098     return false;
1099   ParsingBuf = ParsingBuf.drop_front(16);
1100   Col += 16;
1101 
1102   if (!checkAndConsumeNewLine()) {
1103     reportError("malformed boltedcollection line");
1104     return make_error_code(llvm::errc::io_error);
1105   }
1106   return true;
1107 }
1108 
1109 bool DataReader::hasBranchData() {
1110   if (ParsingBuf.size() == 0)
1111     return false;
1112 
1113   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1114     return true;
1115   return false;
1116 }
1117 
1118 bool DataReader::hasMemData() {
1119   if (ParsingBuf.size() == 0)
1120     return false;
1121 
1122   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1123     return true;
1124   return false;
1125 }
1126 
1127 std::error_code DataReader::parseInNoLBRMode() {
1128   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1129     auto I = NamesToSamples.find(Name);
1130     if (I == NamesToSamples.end()) {
1131       bool Success;
1132       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1133           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1134 
1135       assert(Success && "unexpected result of insert");
1136     }
1137     return I;
1138   };
1139 
1140   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1141     auto I = NamesToMemEvents.find(Name);
1142     if (I == NamesToMemEvents.end()) {
1143       bool Success;
1144       std::tie(I, Success) = NamesToMemEvents.insert(
1145           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1146       assert(Success && "unexpected result of insert");
1147     }
1148     return I;
1149   };
1150 
1151   while (hasBranchData()) {
1152     ErrorOr<SampleInfo> Res = parseSampleInfo();
1153     if (std::error_code EC = Res.getError())
1154       return EC;
1155 
1156     SampleInfo SI = Res.get();
1157 
1158     // Ignore samples not involving known locations
1159     if (!SI.Loc.IsSymbol)
1160       continue;
1161 
1162     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1163     I->getValue().Data.emplace_back(std::move(SI));
1164   }
1165 
1166   while (hasMemData()) {
1167     ErrorOr<MemInfo> Res = parseMemInfo();
1168     if (std::error_code EC = Res.getError())
1169       return EC;
1170 
1171     MemInfo MI = Res.get();
1172 
1173     // Ignore memory events not involving known pc.
1174     if (!MI.Offset.IsSymbol)
1175       continue;
1176 
1177     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1178     I->getValue().Data.emplace_back(std::move(MI));
1179   }
1180 
1181   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples) {
1182     std::stable_sort(FuncSamples.second.Data.begin(),
1183                      FuncSamples.second.Data.end());
1184   }
1185 
1186   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1187     std::stable_sort(MemEvents.second.Data.begin(),
1188                      MemEvents.second.Data.end());
1189   }
1190 
1191   return std::error_code();
1192 }
1193 
1194 std::error_code DataReader::parse() {
1195   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1196     auto I = NamesToBranches.find(Name);
1197     if (I == NamesToBranches.end()) {
1198       bool Success;
1199       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1200           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1201                                FuncBranchData::ContainerTy())));
1202       assert(Success && "unexpected result of insert");
1203     }
1204     return I;
1205   };
1206 
1207   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1208     auto I = NamesToMemEvents.find(Name);
1209     if (I == NamesToMemEvents.end()) {
1210       bool Success;
1211       std::tie(I, Success) = NamesToMemEvents.insert(
1212           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1213       assert(Success && "unexpected result of insert");
1214     }
1215     return I;
1216   };
1217 
1218   Col = 0;
1219   Line = 1;
1220   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1221   if (!FlagOrErr)
1222     return FlagOrErr.getError();
1223   NoLBRMode = *FlagOrErr;
1224 
1225   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1226   if (!BATFlagOrErr)
1227     return BATFlagOrErr.getError();
1228   BATMode = *BATFlagOrErr;
1229 
1230   if (!hasBranchData() && !hasMemData()) {
1231     Diag << "ERROR: no valid profile data found\n";
1232     return make_error_code(llvm::errc::io_error);
1233   }
1234 
1235   if (NoLBRMode)
1236     return parseInNoLBRMode();
1237 
1238   while (hasBranchData()) {
1239     ErrorOr<BranchInfo> Res = parseBranchInfo();
1240     if (std::error_code EC = Res.getError())
1241       return EC;
1242 
1243     BranchInfo BI = Res.get();
1244 
1245     // Ignore branches not involving known location.
1246     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1247       continue;
1248 
1249     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1250     I->getValue().Data.emplace_back(std::move(BI));
1251 
1252     // Add entry data for branches to another function or branches
1253     // to entry points (including recursive calls)
1254     if (BI.To.IsSymbol &&
1255         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1256       I = GetOrCreateFuncEntry(BI.To.Name);
1257       I->getValue().EntryData.emplace_back(std::move(BI));
1258     }
1259 
1260     // If destination is the function start - update execution count.
1261     // NB: the data is skewed since we cannot tell tail recursion from
1262     //     branches to the function start.
1263     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1264       I = GetOrCreateFuncEntry(BI.To.Name);
1265       I->getValue().ExecutionCount += BI.Branches;
1266     }
1267   }
1268 
1269   while (hasMemData()) {
1270     ErrorOr<MemInfo> Res = parseMemInfo();
1271     if (std::error_code EC = Res.getError())
1272       return EC;
1273 
1274     MemInfo MI = Res.get();
1275 
1276     // Ignore memory events not involving known pc.
1277     if (!MI.Offset.IsSymbol)
1278       continue;
1279 
1280     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1281     I->getValue().Data.emplace_back(std::move(MI));
1282   }
1283 
1284   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches) {
1285     std::stable_sort(FuncBranches.second.Data.begin(),
1286                      FuncBranches.second.Data.end());
1287   }
1288 
1289   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1290     std::stable_sort(MemEvents.second.Data.begin(),
1291                      MemEvents.second.Data.end());
1292   }
1293 
1294   return std::error_code();
1295 }
1296 
1297 void DataReader::buildLTONameMaps() {
1298   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1299     const StringRef FuncName = FuncData.getKey();
1300     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1301     if (CommonName)
1302       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1303   }
1304 
1305   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1306     const StringRef FuncName = FuncData.getKey();
1307     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1308     if (CommonName)
1309       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1310   }
1311 }
1312 
1313 namespace {
1314 template <typename MapTy>
1315 decltype(MapTy::MapEntryTy::second) *
1316 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1317   // Do a reverse order iteration since the name in profile has a higher chance
1318   // of matching a name at the end of the list.
1319   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1320     auto I = Map.find(normalizeName((*SI)->getName()));
1321     if (I != Map.end())
1322       return &I->getValue();
1323   }
1324   return nullptr;
1325 }
1326 
1327 template <typename MapTy>
1328 decltype(MapTy::MapEntryTy::second) *
1329 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1330   // Do a reverse order iteration since the name in profile has a higher chance
1331   // of matching a name at the end of the list.
1332   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1333     auto I = Map.find(normalizeName(*FI));
1334     if (I != Map.end())
1335       return &I->getValue();
1336   }
1337   return nullptr;
1338 }
1339 
1340 template <typename MapTy>
1341 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1342     MapTy &Map,
1343     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1344         &LTOCommonNameMap,
1345     const std::vector<StringRef> &FuncNames) {
1346   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1347   // Do a reverse order iteration since the name in profile has a higher chance
1348   // of matching a name at the end of the list.
1349   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1350     std::string Name = normalizeName(*FI);
1351     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1352     if (LTOCommonName) {
1353       auto I = LTOCommonNameMap.find(*LTOCommonName);
1354       if (I != LTOCommonNameMap.end()) {
1355         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1356             I->getValue();
1357         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1358       }
1359     } else {
1360       auto I = Map.find(Name);
1361       if (I != Map.end()) {
1362         return {&I->getValue()};
1363       }
1364     }
1365   }
1366   return AllData;
1367 }
1368 
1369 }
1370 
1371 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1372   if (getBranchData(Function) || getMemData(Function))
1373     return true;
1374 
1375   if (getBranchDataForNames(Function.getNames()) ||
1376       getMemDataForNames(Function.getNames()))
1377     return true;
1378 
1379   if (!hasVolatileName(Function))
1380     return false;
1381 
1382   const std::vector<FuncBranchData *> AllBranchData =
1383       getBranchDataForNamesRegex(Function.getNames());
1384   if (!AllBranchData.empty())
1385     return true;
1386 
1387   const std::vector<FuncMemData *> AllMemData =
1388       getMemDataForNamesRegex(Function.getNames());
1389   if (!AllMemData.empty())
1390     return true;
1391 
1392   return false;
1393 }
1394 
1395 FuncBranchData *
1396 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1397   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1398 }
1399 
1400 FuncBranchData *
1401 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1402   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1403 }
1404 
1405 FuncMemData *
1406 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1407   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1408 }
1409 
1410 FuncSampleData *
1411 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1412   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1413 }
1414 
1415 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1416     const std::vector<StringRef> &FuncNames) {
1417   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1418 }
1419 
1420 std::vector<FuncMemData *>
1421 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1422   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1423 }
1424 
1425 bool DataReader::hasLocalsWithFileName() const {
1426   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1427     const StringRef &FuncName = Func.getKey();
1428     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1429       return true;
1430   }
1431   return false;
1432 }
1433 
1434 void DataReader::dump() const {
1435   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1436     Diag << Func.getKey() << " branches:\n";
1437     for (const BranchInfo &BI : Func.getValue().Data) {
1438       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1439            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1440     }
1441     Diag << Func.getKey() << " entry points:\n";
1442     for (const BranchInfo &BI : Func.getValue().EntryData) {
1443       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1444            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1445     }
1446   }
1447 
1448   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1449     StringRef Event = I->getKey();
1450     Diag << "Data was collected with event: " << Event << "\n";
1451   }
1452   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1453     Diag << Func.getKey() << " samples:\n";
1454     for (const SampleInfo &SI : Func.getValue().Data) {
1455       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1456     }
1457   }
1458 
1459   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1460     Diag << "Memory events for " << Func.getValue().Name;
1461     Location LastOffset(0);
1462     for (const MemInfo &MI : Func.getValue().Data) {
1463       if (MI.Offset == LastOffset) {
1464         Diag << ", " << MI.Addr << "/" << MI.Count;
1465       } else {
1466         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1467       }
1468       LastOffset = MI.Offset;
1469     }
1470     Diag << "\n";
1471   }
1472 }
1473 
1474 } // namespace bolt
1475 } // namespace llvm
1476