1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <map>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 Optional<StringRef> getLTOCommonName(const StringRef Name) {
44   size_t LTOSuffixPos = Name.find(".lto_priv.");
45   if (LTOSuffixPos != StringRef::npos) {
46     return Name.substr(0, LTOSuffixPos + 10);
47   } else if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos) {
48     return Name.substr(0, LTOSuffixPos + 11);
49   } else {
50     return NoneType();
51   }
52 }
53 
54 namespace {
55 
56 /// Return true if the function name can change across compilations.
57 bool hasVolatileName(const BinaryFunction &BF) {
58   for (const StringRef Name : BF.getNames()) {
59     if (getLTOCommonName(Name))
60       return true;
61   }
62   return false;
63 }
64 
65 /// Return standard escaped name of the function possibly renamed by BOLT.
66 std::string normalizeName(StringRef NameRef) {
67   // Strip "PG." prefix used for globalized locals.
68   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
69   return getEscapedName(NameRef);
70 }
71 
72 } // anonymous namespace
73 
74 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
75   if (Loc.IsSymbol) {
76     OS << Loc.Name;
77     if (Loc.Offset)
78       OS << "+" << Twine::utohexstr(Loc.Offset);
79   } else {
80     OS << Twine::utohexstr(Loc.Offset);
81   }
82   return OS;
83 }
84 
85 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
86   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
87   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
88     if (I->From.Name == FBD.Name) {
89       I->From.Name = this->Name;
90       I->From.Offset += Offset;
91     }
92     if (I->To.Name == FBD.Name) {
93       I->To.Name = this->Name;
94       I->To.Offset += Offset;
95     }
96   }
97   std::stable_sort(Data.begin(), Data.end());
98   ExecutionCount += FBD.ExecutionCount;
99   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
100     assert(I->To.Name == FBD.Name);
101     auto NewElmt = EntryData.insert(EntryData.end(), *I);
102     NewElmt->To.Name = this->Name;
103     NewElmt->To.Offset += Offset;
104   }
105 }
106 
107 uint64_t FuncBranchData::getNumExecutedBranches() const {
108   uint64_t ExecutedBranches = 0;
109   for (const BranchInfo &BI : Data) {
110     int64_t BranchCount = BI.Branches;
111     assert(BranchCount >= 0 && "branch execution count should not be negative");
112     ExecutedBranches += BranchCount;
113   }
114   return ExecutedBranches;
115 }
116 
117 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
118 
119 void SampleInfo::print(raw_ostream &OS) const {
120   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
121      << " " << Hits << "\n";
122 }
123 
124 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
125   assert(std::is_sorted(Data.begin(), Data.end()));
126   struct Compare {
127     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
128       return SI.Loc.Offset < Val;
129     }
130     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
131       return Val < SI.Loc.Offset;
132     }
133   };
134   uint64_t Result = 0;
135   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
136             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
137        I != E; ++I) {
138     Result += I->Hits;
139   }
140   return Result;
141 }
142 
143 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
144   auto Iter = Index.find(Offset);
145   if (Iter == Index.end()) {
146     Data.emplace_back(Location(true, Name, Offset), Count);
147     Index[Offset] = Data.size() - 1;
148     return;
149   }
150   SampleInfo &SI = Data[Iter->second];
151   SI.Hits += Count;
152 }
153 
154 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
155                                      uint64_t Count, uint64_t Mispreds) {
156   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
157   if (Iter == IntraIndex[OffsetFrom].end()) {
158     Data.emplace_back(Location(true, Name, OffsetFrom),
159                       Location(true, Name, OffsetTo), Mispreds, Count);
160     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
161     return;
162   }
163   BranchInfo &BI = Data[Iter->second];
164   BI.Branches += Count;
165   BI.Mispreds += Mispreds;
166 }
167 
168 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
169                                    uint64_t Count, uint64_t Mispreds) {
170   auto Iter = InterIndex[OffsetFrom].find(To);
171   if (Iter == InterIndex[OffsetFrom].end()) {
172     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
173     InterIndex[OffsetFrom][To] = Data.size() - 1;
174     return;
175   }
176   BranchInfo &BI = Data[Iter->second];
177   BI.Branches += Count;
178   BI.Mispreds += Mispreds;
179 }
180 
181 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
182                                     uint64_t Count, uint64_t Mispreds) {
183   auto Iter = EntryIndex[OffsetTo].find(From);
184   if (Iter == EntryIndex[OffsetTo].end()) {
185     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
186                            Count);
187     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
188     return;
189   }
190   BranchInfo &BI = EntryData[Iter->second];
191   BI.Branches += Count;
192   BI.Mispreds += Mispreds;
193 }
194 
195 void BranchInfo::mergeWith(const BranchInfo &BI) {
196   Branches += BI.Branches;
197   Mispreds += BI.Mispreds;
198 }
199 
200 void BranchInfo::print(raw_ostream &OS) const {
201   OS << From.IsSymbol << " " << From.Name << " "
202      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
203      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
204      << '\n';
205 }
206 
207 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
208                                                       uint64_t To) const {
209   for (const BranchInfo &I : Data) {
210     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
211       return I;
212   }
213   return make_error_code(llvm::errc::invalid_argument);
214 }
215 
216 ErrorOr<const BranchInfo &>
217 FuncBranchData::getDirectCallBranch(uint64_t From) const {
218   // Commented out because it can be expensive.
219   // assert(std::is_sorted(Data.begin(), Data.end()));
220   struct Compare {
221     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
222       return BI.From.Offset < Val;
223     }
224     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
225       return Val < BI.From.Offset;
226     }
227   };
228   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
229   for (auto I = Range.first; I != Range.second; ++I) {
230     if (I->From.Name != I->To.Name)
231       return *I;
232   }
233   return make_error_code(llvm::errc::invalid_argument);
234 }
235 
236 void MemInfo::print(raw_ostream &OS) const {
237   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
238      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
239      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
240      << "\n";
241 }
242 
243 void MemInfo::prettyPrint(raw_ostream &OS) const {
244   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
245 }
246 
247 void FuncMemData::update(const Location &Offset, const Location &Addr) {
248   auto Iter = EventIndex[Offset.Offset].find(Addr);
249   if (Iter == EventIndex[Offset.Offset].end()) {
250     Data.emplace_back(MemInfo(Offset, Addr, 1));
251     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
252     return;
253   }
254   ++Data[Iter->second].Count;
255 }
256 
257 Error DataReader::preprocessProfile(BinaryContext &BC) {
258   if (std::error_code EC = parseInput()) {
259     return errorCodeToError(EC);
260   }
261 
262   if (opts::DumpData) {
263     dump();
264   }
265 
266   if (collectedInBoltedBinary()) {
267     outs() << "BOLT-INFO: profile collection done on a binary already "
268               "processed by BOLT\n";
269   }
270 
271   for (auto &BFI : BC.getBinaryFunctions()) {
272     BinaryFunction &Function = BFI.second;
273     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
274       setMemData(Function, MemData);
275       MemData->Used = true;
276     }
277     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
278       setBranchData(Function, FuncData);
279       Function.ExecutionCount = FuncData->ExecutionCount;
280       FuncData->Used = true;
281     }
282   }
283 
284   for (auto &BFI : BC.getBinaryFunctions()) {
285     BinaryFunction &Function = BFI.second;
286     matchProfileMemData(Function);
287   }
288 
289   return Error::success();
290 }
291 
292 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
293   for (auto &BFI : BC.getBinaryFunctions()) {
294     BinaryFunction &Function = BFI.second;
295     FuncMemData *MemoryData = getMemData(Function);
296     if (!MemoryData)
297       continue;
298 
299     for (MemInfo &MI : MemoryData->Data) {
300       const uint64_t Offset = MI.Offset.Offset;
301       auto II = Function.Instructions.find(Offset);
302       if (II == Function.Instructions.end()) {
303         // Ignore bad instruction address.
304         continue;
305       }
306 
307       auto &MemAccessProfile =
308           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
309               II->second, "MemoryAccessProfile");
310       BinaryData *BD = nullptr;
311       if (MI.Addr.IsSymbol) {
312         BD = BC.getBinaryDataByName(MI.Addr.Name);
313       }
314       MemAccessProfile.AddressAccessInfo.push_back(
315           {BD, MI.Addr.Offset, MI.Count});
316       auto NextII = std::next(II);
317       if (NextII == Function.Instructions.end()) {
318         MemAccessProfile.NextInstrOffset = Function.getSize();
319       } else {
320         MemAccessProfile.NextInstrOffset = II->first;
321       }
322     }
323     Function.HasMemoryProfile = true;
324   }
325 
326   return Error::success();
327 }
328 
329 Error DataReader::readProfile(BinaryContext &BC) {
330   for (auto &BFI : BC.getBinaryFunctions()) {
331     BinaryFunction &Function = BFI.second;
332     readProfile(Function);
333   }
334 
335   uint64_t NumUnused = 0;
336   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
337     if (!FuncData.getValue().Used)
338       ++NumUnused;
339   BC.setNumUnusedProfiledObjects(NumUnused);
340 
341   return Error::success();
342 }
343 
344 std::error_code DataReader::parseInput() {
345   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
346       MemoryBuffer::getFileOrSTDIN(Filename);
347   if (std::error_code EC = MB.getError()) {
348     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
349     return EC;
350   }
351   FileBuf = std::move(MB.get());
352   ParsingBuf = FileBuf->getBuffer();
353   if (std::error_code EC = parse()) {
354     return EC;
355   }
356   if (!ParsingBuf.empty()) {
357     Diag << "WARNING: invalid profile data detected at line " << Line
358          << ". Possibly corrupted profile.\n";
359   }
360 
361   buildLTONameMaps();
362 
363   return std::error_code();
364 }
365 
366 void DataReader::readProfile(BinaryFunction &BF) {
367   if (BF.empty())
368     return;
369 
370   if (!hasLBR()) {
371     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
372     readSampleData(BF);
373     return;
374   }
375 
376   BF.ProfileFlags = BinaryFunction::PF_LBR;
377 
378   // Possibly assign/re-assign branch profile data.
379   matchProfileData(BF);
380 
381   FuncBranchData *FBD = getBranchData(BF);
382   if (!FBD)
383     return;
384 
385   // Assign basic block counts to function entry points. These only include
386   // counts for outside entries.
387   //
388   // There is a slight skew introduced here as branches originated from RETs
389   // may be accounted for in the execution count of an entry block if the last
390   // instruction in a predecessor fall-through block is a call. This situation
391   // should rarely happen because there are few multiple-entry functions.
392   for (const BranchInfo &BI : FBD->EntryData) {
393     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
394     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
395       uint64_t Count = BB->getExecutionCount();
396       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
397         Count = 0;
398       BB->setExecutionCount(Count + BI.Branches);
399     }
400   }
401 
402   uint64_t MismatchedBranches = 0;
403   for (const BranchInfo &BI : FBD->Data) {
404     if (BI.From.Name != BI.To.Name) {
405       continue;
406     }
407 
408     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
409                       BI.Mispreds)) {
410       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
411                         << BI.To.Offset << '\n');
412       ++MismatchedBranches;
413     }
414   }
415 
416   // Convert branch data into annotations.
417   convertBranchData(BF);
418 }
419 
420 void DataReader::matchProfileData(BinaryFunction &BF) {
421   // This functionality is available for LBR-mode only
422   // TODO: Implement evaluateProfileData() for samples, checking whether
423   // sample addresses match instruction addresses in the function
424   if (!hasLBR())
425     return;
426 
427   FuncBranchData *FBD = getBranchData(BF);
428   if (FBD) {
429     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
430     BF.RawBranchCount = FBD->getNumExecutedBranches();
431     if (BF.ProfileMatchRatio == 1.0f) {
432       if (fetchProfileForOtherEntryPoints(BF)) {
433         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
434         BF.ExecutionCount = FBD->ExecutionCount;
435         BF.RawBranchCount = FBD->getNumExecutedBranches();
436       }
437       return;
438     }
439   }
440 
441   // Check if the function name can fluctuate between several compilations
442   // possibly triggered by minor unrelated code changes in the source code
443   // of the input binary.
444   if (!hasVolatileName(BF))
445     return;
446 
447   // Check for a profile that matches with 100% confidence.
448   const std::vector<FuncBranchData *> AllBranchData =
449       getBranchDataForNamesRegex(BF.getNames());
450   for (FuncBranchData *NewBranchData : AllBranchData) {
451     // Prevent functions from sharing the same profile.
452     if (NewBranchData->Used)
453       continue;
454 
455     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
456       continue;
457 
458     if (FBD)
459       FBD->Used = false;
460 
461     // Update function profile data with the new set.
462     setBranchData(BF, NewBranchData);
463     NewBranchData->Used = true;
464     BF.ExecutionCount = NewBranchData->ExecutionCount;
465     BF.ProfileMatchRatio = 1.0f;
466     break;
467   }
468 }
469 
470 void DataReader::matchProfileMemData(BinaryFunction &BF) {
471   const std::vector<FuncMemData *> AllMemData =
472       getMemDataForNamesRegex(BF.getNames());
473   for (FuncMemData *NewMemData : AllMemData) {
474     // Prevent functions from sharing the same profile.
475     if (NewMemData->Used)
476       continue;
477 
478     if (FuncMemData *MD = getMemData(BF))
479       MD->Used = false;
480 
481     // Update function profile data with the new set.
482     setMemData(BF, NewMemData);
483     NewMemData->Used = true;
484     break;
485   }
486 }
487 
488 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
489   BinaryContext &BC = BF.getBinaryContext();
490 
491   FuncBranchData *FBD = getBranchData(BF);
492   if (!FBD)
493     return false;
494 
495   // Check if we are missing profiling data for secondary entry points
496   bool First = true;
497   bool Updated = false;
498   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
499     if (First) {
500       First = false;
501       continue;
502     }
503     if (BB->isEntryPoint()) {
504       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
505       // Look for branch data associated with this entry point
506       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
507         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
508           FBD->appendFrom(*Data, BB->getOffset());
509           Data->Used = true;
510           Updated = true;
511         }
512       }
513     }
514   }
515 
516   return Updated;
517 }
518 
519 float DataReader::evaluateProfileData(BinaryFunction &BF,
520                                       const FuncBranchData &BranchData) const {
521   BinaryContext &BC = BF.getBinaryContext();
522 
523   // Until we define a minimal profile, we consider an empty branch data to be
524   // a valid profile. It could happen to a function without branches when we
525   // still have an EntryData for the execution count.
526   if (BranchData.Data.empty()) {
527     return 1.0f;
528   }
529 
530   uint64_t NumMatchedBranches = 0;
531   for (const BranchInfo &BI : BranchData.Data) {
532     bool IsValid = false;
533     if (BI.From.Name == BI.To.Name) {
534       // Try to record information with 0 count.
535       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
536     } else if (collectedInBoltedBinary()) {
537       // We can't check branch source for collections in bolted binaries because
538       // the source of the branch may be mapped to the first instruction in a BB
539       // instead of the original branch (which may not exist in the source bin).
540       IsValid = true;
541     } else {
542       // The branch has to originate from this function.
543       // Check for calls, tail calls, rets and indirect branches.
544       // When matching profiling info, we did not reach the stage
545       // when we identify tail calls, so they are still represented
546       // by regular branch instructions and we need isBranch() here.
547       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
548       // If it's a prefix - skip it.
549       if (Instr && BC.MIB->isPrefix(*Instr))
550         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
551       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
552                     BC.MIB->isReturn(*Instr))) {
553         IsValid = true;
554       }
555     }
556 
557     if (IsValid) {
558       ++NumMatchedBranches;
559       continue;
560     }
561 
562     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
563                       << Twine::utohexstr(BI.From.Offset) << " -> ";
564                if (BI.From.Name == BI.To.Name) dbgs()
565                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
566                else dbgs() << "<outbounds>\n";);
567   }
568 
569   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
570   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
571     errs() << "BOLT-WARNING: profile branches match only "
572            << format("%.1f%%", MatchRatio * 100.0f) << " ("
573            << NumMatchedBranches << '/' << BranchData.Data.size()
574            << ") for function " << BF << '\n';
575   }
576 
577   return MatchRatio;
578 }
579 
580 void DataReader::readSampleData(BinaryFunction &BF) {
581   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
582   if (!SampleDataOrErr)
583     return;
584 
585   // Basic samples mode territory (without LBR info)
586   // First step is to assign BB execution count based on samples from perf
587   BF.ProfileMatchRatio = 1.0f;
588   BF.removeTagsFromProfile();
589   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
590   bool NormalizeByCalls = usesEvent("branches");
591   static bool NagUser = true;
592   if (NagUser) {
593     outs()
594         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
595     if (NormalizeByInsnCount) {
596       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
597     } else if (NormalizeByCalls) {
598       outs() << "BOLT-INFO: normalizing samples by branches.\n";
599     }
600     NagUser = false;
601   }
602   uint64_t LastOffset = BF.getSize();
603   uint64_t TotalEntryCount = 0;
604   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
605        I != E; ++I) {
606     uint64_t CurOffset = I->first;
607     // Always work with samples multiplied by 1000 to avoid losing them if we
608     // later need to normalize numbers
609     uint64_t NumSamples =
610         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
611     if (NormalizeByInsnCount && I->second->getNumNonPseudos())
612       NumSamples /= I->second->getNumNonPseudos();
613     else if (NormalizeByCalls) {
614       uint32_t NumCalls = I->second->getNumCalls();
615       NumSamples /= NumCalls + 1;
616     }
617     I->second->setExecutionCount(NumSamples);
618     if (I->second->isEntryPoint())
619       TotalEntryCount += NumSamples;
620     LastOffset = CurOffset;
621   }
622 
623   BF.ExecutionCount = TotalEntryCount;
624 
625   estimateEdgeCounts(BF);
626 }
627 
628 void DataReader::convertBranchData(BinaryFunction &BF) const {
629   BinaryContext &BC = BF.getBinaryContext();
630 
631   if (BF.empty())
632     return;
633 
634   FuncBranchData *FBD = getBranchData(BF);
635   if (!FBD)
636     return;
637 
638   // Profile information for calls.
639   //
640   // There are 3 cases that we annotate differently:
641   //   1) Conditional tail calls that could be mispredicted.
642   //   2) Indirect calls to multiple destinations with mispredictions.
643   //      Before we validate CFG we have to handle indirect branches here too.
644   //   3) Regular direct calls. The count could be different from containing
645   //      basic block count. Keep this data in case we find it useful.
646   //
647   for (BranchInfo &BI : FBD->Data) {
648     // Ignore internal branches.
649     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
650       continue;
651 
652     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
653     if (!Instr ||
654         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
655       continue;
656 
657     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
658       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) {
659         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
660                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
661                << '\n';
662       }
663       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
664       Value += Count;
665     };
666 
667     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
668       IndirectCallSiteProfile &CSP =
669           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
670               *Instr, "CallProfile");
671       MCSymbol *CalleeSymbol = nullptr;
672       if (BI.To.IsSymbol) {
673         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) {
674           CalleeSymbol = BD->getSymbol();
675         }
676       }
677       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
678     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
679       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
680       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
681     } else {
682       setOrUpdateAnnotation("Count", BI.Branches);
683     }
684   }
685 }
686 
687 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
688                               uint64_t Count, uint64_t Mispreds) const {
689   BinaryContext &BC = BF.getBinaryContext();
690 
691   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
692   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
693 
694   if (!FromBB || !ToBB) {
695     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
696     return false;
697   }
698 
699   // Could be bad LBR data; ignore the branch. In the case of data collected
700   // in binaries optimized by BOLT, a source BB may be mapped to two output
701   // BBs as a result of optimizations. In that case, a branch between these
702   // two will be recorded as a branch from A going to A in the source address
703   // space. Keep processing.
704   if (From == To) {
705     return true;
706   }
707 
708   // Return from a tail call.
709   if (FromBB->succ_size() == 0)
710     return true;
711 
712   // Very rarely we will see ignored branches. Do a linear check.
713   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) {
714     if (Branch ==
715         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
716       return true;
717   }
718 
719   bool OffsetMatches = !!(To == ToBB->getOffset());
720   if (!OffsetMatches) {
721     // Skip the nops to support old .fdata
722     uint64_t Offset = ToBB->getOffset();
723     for (MCInst &Instr : *ToBB) {
724       if (!BC.MIB->isNoop(Instr))
725         break;
726 
727       Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
728     }
729 
730     if (To == Offset)
731       OffsetMatches = true;
732   }
733 
734   if (!OffsetMatches) {
735     // "To" could be referring to nop instructions in between 2 basic blocks.
736     // While building the CFG we make sure these nops are attributed to the
737     // previous basic block, thus we check if the destination belongs to the
738     // gap past the last instruction.
739     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
740     if (LastInstr) {
741       const uint32_t LastInstrOffset =
742           BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Offset");
743 
744       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
745       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) {
746         return true;
747       }
748 
749       if (To <= LastInstrOffset) {
750         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
751                           << " in " << BF << " : " << From << " -> " << To
752                           << '\n');
753         return false;
754       }
755     }
756 
757     // The real destination is the layout successor of the detected ToBB.
758     if (ToBB == BF.BasicBlocksLayout.back())
759       return false;
760     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
761     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
762     ToBB = NextBB;
763   }
764 
765   // If there's no corresponding instruction for 'From', we have probably
766   // discarded it as a FT from __builtin_unreachable.
767   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
768   if (!FromInstruction) {
769     // If the data was collected in a bolted binary, the From addresses may be
770     // translated to the first instruction of the source BB if BOLT inserted
771     // a new branch that did not exist in the source (we can't map it to the
772     // source instruction, so we map it to the first instr of source BB).
773     // We do not keep offsets for random instructions. So the check above will
774     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
775     if (collectedInBoltedBinary()) {
776       if (FromBB->getInputOffset() != From) {
777         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
778                           << BF << '\n');
779         return false;
780       }
781       FromInstruction = nullptr;
782     } else {
783       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
784                         << '\n');
785       return false;
786     }
787   }
788 
789   if (!FromBB->getSuccessor(ToBB->getLabel())) {
790     // Check if this is a recursive call or a return from a recursive call.
791     if (FromInstruction && ToBB->isEntryPoint() &&
792         (BC.MIB->isCall(*FromInstruction) ||
793          BC.MIB->isIndirectBranch(*FromInstruction))) {
794       // Execution count is already accounted for.
795       return true;
796     }
797     // For data collected in a bolted binary, we may have created two output BBs
798     // that map to one original block. Branches between these two blocks will
799     // appear here as one BB jumping to itself, even though it has no loop
800     // edges. Ignore these.
801     if (collectedInBoltedBinary() && FromBB == ToBB)
802       return true;
803 
804     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
805     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
806         FTSuccessor->getSuccessor(ToBB->getLabel())) {
807       BinaryBasicBlock::BinaryBranchInfo &FTBI =
808           FTSuccessor->getBranchInfo(*ToBB);
809       FTBI.Count += Count;
810       if (Count)
811         FTBI.MispredictedCount += Mispreds;
812       ToBB = FTSuccessor;
813     } else {
814       LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
815                         << Twine::utohexstr(From) << " -> "
816                         << Twine::utohexstr(To) << '\n');
817       return false;
818     }
819   }
820 
821   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
822   BI.Count += Count;
823   // Only update mispredicted count if it the count was real.
824   if (Count) {
825     BI.MispredictedCount += Mispreds;
826   }
827 
828   return true;
829 }
830 
831 void DataReader::reportError(StringRef ErrorMsg) {
832   Diag << "Error reading BOLT data input file: line " << Line << ", column "
833        << Col << ": " << ErrorMsg << '\n';
834 }
835 
836 bool DataReader::expectAndConsumeFS() {
837   if (ParsingBuf[0] != FieldSeparator) {
838     reportError("expected field separator");
839     return false;
840   }
841   ParsingBuf = ParsingBuf.drop_front(1);
842   Col += 1;
843   return true;
844 }
845 
846 void DataReader::consumeAllRemainingFS() {
847   while (ParsingBuf[0] == FieldSeparator) {
848     ParsingBuf = ParsingBuf.drop_front(1);
849     Col += 1;
850   }
851 }
852 
853 bool DataReader::checkAndConsumeNewLine() {
854   if (ParsingBuf[0] != '\n')
855     return false;
856 
857   ParsingBuf = ParsingBuf.drop_front(1);
858   Col = 0;
859   Line += 1;
860   return true;
861 }
862 
863 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
864   if (EndChar == '\\') {
865     reportError("EndChar could not be backslash");
866     return make_error_code(llvm::errc::io_error);
867   }
868 
869   std::string EndChars(1, EndChar);
870   EndChars.push_back('\\');
871   if (EndNl)
872     EndChars.push_back('\n');
873 
874   size_t StringEnd = 0;
875   do {
876     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
877     if (StringEnd == StringRef::npos ||
878         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
879       reportError("malformed field");
880       return make_error_code(llvm::errc::io_error);
881     }
882 
883     if (ParsingBuf[StringEnd] != '\\')
884       break;
885 
886     StringEnd += 2;
887   } while (1);
888 
889   StringRef Str = ParsingBuf.substr(0, StringEnd);
890 
891   // If EndNl was set and nl was found instead of EndChar, do not consume the
892   // new line.
893   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
894   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
895 
896   ParsingBuf = ParsingBuf.drop_front(End);
897   if (EndChar == '\n') {
898     Col = 0;
899     Line += 1;
900   } else {
901     Col += End;
902   }
903   return Str;
904 }
905 
906 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
907   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
908   if (std::error_code EC = NumStrRes.getError())
909     return EC;
910   StringRef NumStr = NumStrRes.get();
911   int64_t Num;
912   if (NumStr.getAsInteger(10, Num)) {
913     reportError("expected decimal number");
914     Diag << "Found: " << NumStr << "\n";
915     return make_error_code(llvm::errc::io_error);
916   }
917   return Num;
918 }
919 
920 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
921   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
922   if (std::error_code EC = NumStrRes.getError())
923     return EC;
924   StringRef NumStr = NumStrRes.get();
925   uint64_t Num;
926   if (NumStr.getAsInteger(16, Num)) {
927     reportError("expected hexidecimal number");
928     Diag << "Found: " << NumStr << "\n";
929     return make_error_code(llvm::errc::io_error);
930   }
931   return Num;
932 }
933 
934 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
935                                             bool ExpectMemLoc) {
936   // Read whether the location of the branch should be DSO or a symbol
937   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
938   // symbol.
939   // The symbol flag is also used to tag memory load events by adding 3 to the
940   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
941   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
942       ParsingBuf[0] != '2') {
943     reportError("expected 0, 1 or 2");
944     return make_error_code(llvm::errc::io_error);
945   }
946 
947   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
948       ParsingBuf[0] != '5') {
949     reportError("expected 3, 4 or 5");
950     return make_error_code(llvm::errc::io_error);
951   }
952 
953   bool IsSymbol =
954       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
955       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
956   ParsingBuf = ParsingBuf.drop_front(1);
957   Col += 1;
958 
959   if (!expectAndConsumeFS())
960     return make_error_code(llvm::errc::io_error);
961   consumeAllRemainingFS();
962 
963   // Read the string containing the symbol or the DSO name
964   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
965   if (std::error_code EC = NameRes.getError())
966     return EC;
967   StringRef Name = NameRes.get();
968   consumeAllRemainingFS();
969 
970   // Read the offset
971   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
972   if (std::error_code EC = Offset.getError())
973     return EC;
974 
975   return Location(IsSymbol, Name, Offset.get());
976 }
977 
978 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
979   ErrorOr<Location> Res = parseLocation(FieldSeparator);
980   if (std::error_code EC = Res.getError())
981     return EC;
982   Location From = Res.get();
983 
984   consumeAllRemainingFS();
985   Res = parseLocation(FieldSeparator);
986   if (std::error_code EC = Res.getError())
987     return EC;
988   Location To = Res.get();
989 
990   consumeAllRemainingFS();
991   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
992   if (std::error_code EC = MRes.getError())
993     return EC;
994   int64_t NumMispreds = MRes.get();
995 
996   consumeAllRemainingFS();
997   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
998   if (std::error_code EC = BRes.getError())
999     return EC;
1000   int64_t NumBranches = BRes.get();
1001 
1002   consumeAllRemainingFS();
1003   if (!checkAndConsumeNewLine()) {
1004     reportError("expected end of line");
1005     return make_error_code(llvm::errc::io_error);
1006   }
1007 
1008   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
1009 }
1010 
1011 ErrorOr<MemInfo> DataReader::parseMemInfo() {
1012   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
1013   if (std::error_code EC = Res.getError())
1014     return EC;
1015   Location Offset = Res.get();
1016 
1017   consumeAllRemainingFS();
1018   Res = parseMemLocation(FieldSeparator);
1019   if (std::error_code EC = Res.getError())
1020     return EC;
1021   Location Addr = Res.get();
1022 
1023   consumeAllRemainingFS();
1024   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1025   if (std::error_code EC = CountRes.getError())
1026     return EC;
1027 
1028   consumeAllRemainingFS();
1029   if (!checkAndConsumeNewLine()) {
1030     reportError("expected end of line");
1031     return make_error_code(llvm::errc::io_error);
1032   }
1033 
1034   return MemInfo(Offset, Addr, CountRes.get());
1035 }
1036 
1037 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1038   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1039   if (std::error_code EC = Res.getError())
1040     return EC;
1041   Location Address = Res.get();
1042 
1043   consumeAllRemainingFS();
1044   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1045   if (std::error_code EC = BRes.getError())
1046     return EC;
1047   int64_t Occurrences = BRes.get();
1048 
1049   consumeAllRemainingFS();
1050   if (!checkAndConsumeNewLine()) {
1051     reportError("expected end of line");
1052     return make_error_code(llvm::errc::io_error);
1053   }
1054 
1055   return SampleInfo(std::move(Address), Occurrences);
1056 }
1057 
1058 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1059   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1060     return false;
1061   ParsingBuf = ParsingBuf.drop_front(6);
1062   Col += 6;
1063 
1064   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1065     ParsingBuf = ParsingBuf.drop_front(1);
1066 
1067   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1068     ErrorOr<StringRef> EventName = parseString(' ', true);
1069     if (!EventName)
1070       return make_error_code(llvm::errc::io_error);
1071     EventNames.insert(EventName.get());
1072   }
1073 
1074   if (!checkAndConsumeNewLine()) {
1075     reportError("malformed no_lbr line");
1076     return make_error_code(llvm::errc::io_error);
1077   }
1078   return true;
1079 }
1080 
1081 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1082   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1083     return false;
1084   ParsingBuf = ParsingBuf.drop_front(16);
1085   Col += 16;
1086 
1087   if (!checkAndConsumeNewLine()) {
1088     reportError("malformed boltedcollection line");
1089     return make_error_code(llvm::errc::io_error);
1090   }
1091   return true;
1092 }
1093 
1094 bool DataReader::hasBranchData() {
1095   if (ParsingBuf.size() == 0)
1096     return false;
1097 
1098   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1099     return true;
1100   return false;
1101 }
1102 
1103 bool DataReader::hasMemData() {
1104   if (ParsingBuf.size() == 0)
1105     return false;
1106 
1107   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1108     return true;
1109   return false;
1110 }
1111 
1112 std::error_code DataReader::parseInNoLBRMode() {
1113   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1114     auto I = NamesToSamples.find(Name);
1115     if (I == NamesToSamples.end()) {
1116       bool Success;
1117       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1118           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1119 
1120       assert(Success && "unexpected result of insert");
1121     }
1122     return I;
1123   };
1124 
1125   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1126     auto I = NamesToMemEvents.find(Name);
1127     if (I == NamesToMemEvents.end()) {
1128       bool Success;
1129       std::tie(I, Success) = NamesToMemEvents.insert(
1130           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1131       assert(Success && "unexpected result of insert");
1132     }
1133     return I;
1134   };
1135 
1136   while (hasBranchData()) {
1137     ErrorOr<SampleInfo> Res = parseSampleInfo();
1138     if (std::error_code EC = Res.getError())
1139       return EC;
1140 
1141     SampleInfo SI = Res.get();
1142 
1143     // Ignore samples not involving known locations
1144     if (!SI.Loc.IsSymbol)
1145       continue;
1146 
1147     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1148     I->getValue().Data.emplace_back(std::move(SI));
1149   }
1150 
1151   while (hasMemData()) {
1152     ErrorOr<MemInfo> Res = parseMemInfo();
1153     if (std::error_code EC = Res.getError())
1154       return EC;
1155 
1156     MemInfo MI = Res.get();
1157 
1158     // Ignore memory events not involving known pc.
1159     if (!MI.Offset.IsSymbol)
1160       continue;
1161 
1162     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1163     I->getValue().Data.emplace_back(std::move(MI));
1164   }
1165 
1166   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples) {
1167     std::stable_sort(FuncSamples.second.Data.begin(),
1168                      FuncSamples.second.Data.end());
1169   }
1170 
1171   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1172     std::stable_sort(MemEvents.second.Data.begin(),
1173                      MemEvents.second.Data.end());
1174   }
1175 
1176   return std::error_code();
1177 }
1178 
1179 std::error_code DataReader::parse() {
1180   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1181     auto I = NamesToBranches.find(Name);
1182     if (I == NamesToBranches.end()) {
1183       bool Success;
1184       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1185           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1186                                FuncBranchData::ContainerTy())));
1187       assert(Success && "unexpected result of insert");
1188     }
1189     return I;
1190   };
1191 
1192   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1193     auto I = NamesToMemEvents.find(Name);
1194     if (I == NamesToMemEvents.end()) {
1195       bool Success;
1196       std::tie(I, Success) = NamesToMemEvents.insert(
1197           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1198       assert(Success && "unexpected result of insert");
1199     }
1200     return I;
1201   };
1202 
1203   Col = 0;
1204   Line = 1;
1205   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1206   if (!FlagOrErr)
1207     return FlagOrErr.getError();
1208   NoLBRMode = *FlagOrErr;
1209 
1210   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1211   if (!BATFlagOrErr)
1212     return BATFlagOrErr.getError();
1213   BATMode = *BATFlagOrErr;
1214 
1215   if (!hasBranchData() && !hasMemData()) {
1216     Diag << "ERROR: no valid profile data found\n";
1217     return make_error_code(llvm::errc::io_error);
1218   }
1219 
1220   if (NoLBRMode)
1221     return parseInNoLBRMode();
1222 
1223   while (hasBranchData()) {
1224     ErrorOr<BranchInfo> Res = parseBranchInfo();
1225     if (std::error_code EC = Res.getError())
1226       return EC;
1227 
1228     BranchInfo BI = Res.get();
1229 
1230     // Ignore branches not involving known location.
1231     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1232       continue;
1233 
1234     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1235     I->getValue().Data.emplace_back(std::move(BI));
1236 
1237     // Add entry data for branches to another function or branches
1238     // to entry points (including recursive calls)
1239     if (BI.To.IsSymbol &&
1240         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1241       I = GetOrCreateFuncEntry(BI.To.Name);
1242       I->getValue().EntryData.emplace_back(std::move(BI));
1243     }
1244 
1245     // If destination is the function start - update execution count.
1246     // NB: the data is skewed since we cannot tell tail recursion from
1247     //     branches to the function start.
1248     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1249       I = GetOrCreateFuncEntry(BI.To.Name);
1250       I->getValue().ExecutionCount += BI.Branches;
1251     }
1252   }
1253 
1254   while (hasMemData()) {
1255     ErrorOr<MemInfo> Res = parseMemInfo();
1256     if (std::error_code EC = Res.getError())
1257       return EC;
1258 
1259     MemInfo MI = Res.get();
1260 
1261     // Ignore memory events not involving known pc.
1262     if (!MI.Offset.IsSymbol)
1263       continue;
1264 
1265     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1266     I->getValue().Data.emplace_back(std::move(MI));
1267   }
1268 
1269   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches) {
1270     std::stable_sort(FuncBranches.second.Data.begin(),
1271                      FuncBranches.second.Data.end());
1272   }
1273 
1274   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents) {
1275     std::stable_sort(MemEvents.second.Data.begin(),
1276                      MemEvents.second.Data.end());
1277   }
1278 
1279   return std::error_code();
1280 }
1281 
1282 void DataReader::buildLTONameMaps() {
1283   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1284     const StringRef FuncName = FuncData.getKey();
1285     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1286     if (CommonName)
1287       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1288   }
1289 
1290   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1291     const StringRef FuncName = FuncData.getKey();
1292     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1293     if (CommonName)
1294       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1295   }
1296 }
1297 
1298 namespace {
1299 template <typename MapTy>
1300 decltype(MapTy::MapEntryTy::second) *
1301 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1302   // Do a reverse order iteration since the name in profile has a higher chance
1303   // of matching a name at the end of the list.
1304   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1305     auto I = Map.find(normalizeName((*SI)->getName()));
1306     if (I != Map.end())
1307       return &I->getValue();
1308   }
1309   return nullptr;
1310 }
1311 
1312 template <typename MapTy>
1313 decltype(MapTy::MapEntryTy::second) *
1314 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1315   // Do a reverse order iteration since the name in profile has a higher chance
1316   // of matching a name at the end of the list.
1317   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1318     auto I = Map.find(normalizeName(*FI));
1319     if (I != Map.end())
1320       return &I->getValue();
1321   }
1322   return nullptr;
1323 }
1324 
1325 template <typename MapTy>
1326 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1327     MapTy &Map,
1328     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1329         &LTOCommonNameMap,
1330     const std::vector<StringRef> &FuncNames) {
1331   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1332   // Do a reverse order iteration since the name in profile has a higher chance
1333   // of matching a name at the end of the list.
1334   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1335     std::string Name = normalizeName(*FI);
1336     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1337     if (LTOCommonName) {
1338       auto I = LTOCommonNameMap.find(*LTOCommonName);
1339       if (I != LTOCommonNameMap.end()) {
1340         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1341             I->getValue();
1342         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1343       }
1344     } else {
1345       auto I = Map.find(Name);
1346       if (I != Map.end()) {
1347         return {&I->getValue()};
1348       }
1349     }
1350   }
1351   return AllData;
1352 }
1353 
1354 }
1355 
1356 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1357   if (getBranchData(Function) || getMemData(Function))
1358     return true;
1359 
1360   if (getBranchDataForNames(Function.getNames()) ||
1361       getMemDataForNames(Function.getNames()))
1362     return true;
1363 
1364   if (!hasVolatileName(Function))
1365     return false;
1366 
1367   const std::vector<FuncBranchData *> AllBranchData =
1368       getBranchDataForNamesRegex(Function.getNames());
1369   if (!AllBranchData.empty())
1370     return true;
1371 
1372   const std::vector<FuncMemData *> AllMemData =
1373       getMemDataForNamesRegex(Function.getNames());
1374   if (!AllMemData.empty())
1375     return true;
1376 
1377   return false;
1378 }
1379 
1380 FuncBranchData *
1381 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1382   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1383 }
1384 
1385 FuncBranchData *
1386 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1387   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1388 }
1389 
1390 FuncMemData *
1391 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1392   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1393 }
1394 
1395 FuncSampleData *
1396 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1397   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1398 }
1399 
1400 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1401     const std::vector<StringRef> &FuncNames) {
1402   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1403 }
1404 
1405 std::vector<FuncMemData *>
1406 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1407   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1408 }
1409 
1410 bool DataReader::hasLocalsWithFileName() const {
1411   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1412     const StringRef &FuncName = Func.getKey();
1413     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1414       return true;
1415   }
1416   return false;
1417 }
1418 
1419 void DataReader::dump() const {
1420   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1421     Diag << Func.getKey() << " branches:\n";
1422     for (const BranchInfo &BI : Func.getValue().Data) {
1423       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1424            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1425     }
1426     Diag << Func.getKey() << " entry points:\n";
1427     for (const BranchInfo &BI : Func.getValue().EntryData) {
1428       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1429            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1430     }
1431   }
1432 
1433   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1434     StringRef Event = I->getKey();
1435     Diag << "Data was collected with event: " << Event << "\n";
1436   }
1437   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1438     Diag << Func.getKey() << " samples:\n";
1439     for (const SampleInfo &SI : Func.getValue().Data) {
1440       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1441     }
1442   }
1443 
1444   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1445     Diag << "Memory events for " << Func.getValue().Name;
1446     Location LastOffset(0);
1447     for (const MemInfo &MI : Func.getValue().Data) {
1448       if (MI.Offset == LastOffset) {
1449         Diag << ", " << MI.Addr << "/" << MI.Count;
1450       } else {
1451         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1452       }
1453       LastOffset = MI.Offset;
1454     }
1455     Diag << "\n";
1456   }
1457 }
1458 
1459 } // namespace bolt
1460 } // namespace llvm
1461