1 //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by the perf2bolt
10 // utility and stores it in memory for llvm-bolt consumption.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataReader.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <map>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "bolt-prof"
24 
25 using namespace llvm;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 extern llvm::cl::opt<unsigned> Verbosity;
31 
32 static cl::opt<bool>
33 DumpData("dump-data",
34   cl::desc("dump parsed bolt data for debugging"),
35   cl::Hidden,
36   cl::cat(BoltCategory));
37 
38 } // namespace opts
39 
40 namespace llvm {
41 namespace bolt {
42 
43 Optional<StringRef> getLTOCommonName(const StringRef Name) {
44   size_t LTOSuffixPos = Name.find(".lto_priv.");
45   if (LTOSuffixPos != StringRef::npos)
46     return Name.substr(0, LTOSuffixPos + 10);
47   if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos)
48     return Name.substr(0, LTOSuffixPos + 11);
49   return NoneType();
50 }
51 
52 namespace {
53 
54 /// Return true if the function name can change across compilations.
55 bool hasVolatileName(const BinaryFunction &BF) {
56   for (const StringRef Name : BF.getNames())
57     if (getLTOCommonName(Name))
58       return true;
59 
60   return false;
61 }
62 
63 /// Return standard escaped name of the function possibly renamed by BOLT.
64 std::string normalizeName(StringRef NameRef) {
65   // Strip "PG." prefix used for globalized locals.
66   NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
67   return getEscapedName(NameRef);
68 }
69 
70 } // anonymous namespace
71 
72 raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
73   if (Loc.IsSymbol) {
74     OS << Loc.Name;
75     if (Loc.Offset)
76       OS << "+" << Twine::utohexstr(Loc.Offset);
77   } else {
78     OS << Twine::utohexstr(Loc.Offset);
79   }
80   return OS;
81 }
82 
83 void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
84   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
85   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
86     if (I->From.Name == FBD.Name) {
87       I->From.Name = this->Name;
88       I->From.Offset += Offset;
89     }
90     if (I->To.Name == FBD.Name) {
91       I->To.Name = this->Name;
92       I->To.Offset += Offset;
93     }
94   }
95   std::stable_sort(Data.begin(), Data.end());
96   ExecutionCount += FBD.ExecutionCount;
97   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
98     assert(I->To.Name == FBD.Name);
99     auto NewElmt = EntryData.insert(EntryData.end(), *I);
100     NewElmt->To.Name = this->Name;
101     NewElmt->To.Offset += Offset;
102   }
103 }
104 
105 uint64_t FuncBranchData::getNumExecutedBranches() const {
106   uint64_t ExecutedBranches = 0;
107   for (const BranchInfo &BI : Data) {
108     int64_t BranchCount = BI.Branches;
109     assert(BranchCount >= 0 && "branch execution count should not be negative");
110     ExecutedBranches += BranchCount;
111   }
112   return ExecutedBranches;
113 }
114 
115 void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }
116 
117 void SampleInfo::print(raw_ostream &OS) const {
118   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
119      << " " << Hits << "\n";
120 }
121 
122 uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
123   assert(std::is_sorted(Data.begin(), Data.end()));
124   struct Compare {
125     bool operator()(const SampleInfo &SI, const uint64_t Val) const {
126       return SI.Loc.Offset < Val;
127     }
128     bool operator()(const uint64_t Val, const SampleInfo &SI) const {
129       return Val < SI.Loc.Offset;
130     }
131   };
132   uint64_t Result = 0;
133   for (auto I = std::lower_bound(Data.begin(), Data.end(), Start, Compare()),
134             E = std::lower_bound(Data.begin(), Data.end(), End, Compare());
135        I != E; ++I)
136     Result += I->Hits;
137   return Result;
138 }
139 
140 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
141   auto Iter = Index.find(Offset);
142   if (Iter == Index.end()) {
143     Data.emplace_back(Location(true, Name, Offset), Count);
144     Index[Offset] = Data.size() - 1;
145     return;
146   }
147   SampleInfo &SI = Data[Iter->second];
148   SI.Hits += Count;
149 }
150 
151 void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
152                                      uint64_t Count, uint64_t Mispreds) {
153   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
154   if (Iter == IntraIndex[OffsetFrom].end()) {
155     Data.emplace_back(Location(true, Name, OffsetFrom),
156                       Location(true, Name, OffsetTo), Mispreds, Count);
157     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
158     return;
159   }
160   BranchInfo &BI = Data[Iter->second];
161   BI.Branches += Count;
162   BI.Mispreds += Mispreds;
163 }
164 
165 void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
166                                    uint64_t Count, uint64_t Mispreds) {
167   auto Iter = InterIndex[OffsetFrom].find(To);
168   if (Iter == InterIndex[OffsetFrom].end()) {
169     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
170     InterIndex[OffsetFrom][To] = Data.size() - 1;
171     return;
172   }
173   BranchInfo &BI = Data[Iter->second];
174   BI.Branches += Count;
175   BI.Mispreds += Mispreds;
176 }
177 
178 void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
179                                     uint64_t Count, uint64_t Mispreds) {
180   auto Iter = EntryIndex[OffsetTo].find(From);
181   if (Iter == EntryIndex[OffsetTo].end()) {
182     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
183                            Count);
184     EntryIndex[OffsetTo][From] = EntryData.size() - 1;
185     return;
186   }
187   BranchInfo &BI = EntryData[Iter->second];
188   BI.Branches += Count;
189   BI.Mispreds += Mispreds;
190 }
191 
192 void BranchInfo::mergeWith(const BranchInfo &BI) {
193   Branches += BI.Branches;
194   Mispreds += BI.Mispreds;
195 }
196 
197 void BranchInfo::print(raw_ostream &OS) const {
198   OS << From.IsSymbol << " " << From.Name << " "
199      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
200      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
201      << '\n';
202 }
203 
204 ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
205                                                       uint64_t To) const {
206   for (const BranchInfo &I : Data)
207     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
208       return I;
209 
210   return make_error_code(llvm::errc::invalid_argument);
211 }
212 
213 ErrorOr<const BranchInfo &>
214 FuncBranchData::getDirectCallBranch(uint64_t From) const {
215   // Commented out because it can be expensive.
216   // assert(std::is_sorted(Data.begin(), Data.end()));
217   struct Compare {
218     bool operator()(const BranchInfo &BI, const uint64_t Val) const {
219       return BI.From.Offset < Val;
220     }
221     bool operator()(const uint64_t Val, const BranchInfo &BI) const {
222       return Val < BI.From.Offset;
223     }
224   };
225   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
226   for (auto I = Range.first; I != Range.second; ++I)
227     if (I->From.Name != I->To.Name)
228       return *I;
229 
230   return make_error_code(llvm::errc::invalid_argument);
231 }
232 
233 void MemInfo::print(raw_ostream &OS) const {
234   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
235      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
236      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
237      << "\n";
238 }
239 
240 void MemInfo::prettyPrint(raw_ostream &OS) const {
241   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
242 }
243 
244 void FuncMemData::update(const Location &Offset, const Location &Addr) {
245   auto Iter = EventIndex[Offset.Offset].find(Addr);
246   if (Iter == EventIndex[Offset.Offset].end()) {
247     Data.emplace_back(MemInfo(Offset, Addr, 1));
248     EventIndex[Offset.Offset][Addr] = Data.size() - 1;
249     return;
250   }
251   ++Data[Iter->second].Count;
252 }
253 
254 Error DataReader::preprocessProfile(BinaryContext &BC) {
255   if (std::error_code EC = parseInput())
256     return errorCodeToError(EC);
257 
258   if (opts::DumpData)
259     dump();
260 
261   if (collectedInBoltedBinary())
262     outs() << "BOLT-INFO: profile collection done on a binary already "
263               "processed by BOLT\n";
264 
265   for (auto &BFI : BC.getBinaryFunctions()) {
266     BinaryFunction &Function = BFI.second;
267     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
268       setMemData(Function, MemData);
269       MemData->Used = true;
270     }
271     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
272       setBranchData(Function, FuncData);
273       Function.ExecutionCount = FuncData->ExecutionCount;
274       FuncData->Used = true;
275     }
276   }
277 
278   for (auto &BFI : BC.getBinaryFunctions()) {
279     BinaryFunction &Function = BFI.second;
280     matchProfileMemData(Function);
281   }
282 
283   return Error::success();
284 }
285 
286 Error DataReader::readProfilePreCFG(BinaryContext &BC) {
287   for (auto &BFI : BC.getBinaryFunctions()) {
288     BinaryFunction &Function = BFI.second;
289     FuncMemData *MemoryData = getMemData(Function);
290     if (!MemoryData)
291       continue;
292 
293     for (MemInfo &MI : MemoryData->Data) {
294       const uint64_t Offset = MI.Offset.Offset;
295       auto II = Function.Instructions.find(Offset);
296       if (II == Function.Instructions.end()) {
297         // Ignore bad instruction address.
298         continue;
299       }
300 
301       auto &MemAccessProfile =
302           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
303               II->second, "MemoryAccessProfile");
304       BinaryData *BD = nullptr;
305       if (MI.Addr.IsSymbol)
306         BD = BC.getBinaryDataByName(MI.Addr.Name);
307       MemAccessProfile.AddressAccessInfo.push_back(
308           {BD, MI.Addr.Offset, MI.Count});
309       auto NextII = std::next(II);
310       if (NextII == Function.Instructions.end())
311         MemAccessProfile.NextInstrOffset = Function.getSize();
312       else
313         MemAccessProfile.NextInstrOffset = II->first;
314     }
315     Function.HasMemoryProfile = true;
316   }
317 
318   return Error::success();
319 }
320 
321 Error DataReader::readProfile(BinaryContext &BC) {
322   for (auto &BFI : BC.getBinaryFunctions()) {
323     BinaryFunction &Function = BFI.second;
324     readProfile(Function);
325   }
326 
327   uint64_t NumUnused = 0;
328   for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
329     if (!FuncData.getValue().Used)
330       ++NumUnused;
331   BC.setNumUnusedProfiledObjects(NumUnused);
332 
333   return Error::success();
334 }
335 
336 std::error_code DataReader::parseInput() {
337   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
338       MemoryBuffer::getFileOrSTDIN(Filename);
339   if (std::error_code EC = MB.getError()) {
340     Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
341     return EC;
342   }
343   FileBuf = std::move(MB.get());
344   ParsingBuf = FileBuf->getBuffer();
345   if (std::error_code EC = parse())
346     return EC;
347   if (!ParsingBuf.empty())
348     Diag << "WARNING: invalid profile data detected at line " << Line
349          << ". Possibly corrupted profile.\n";
350 
351   buildLTONameMaps();
352 
353   return std::error_code();
354 }
355 
356 void DataReader::readProfile(BinaryFunction &BF) {
357   if (BF.empty())
358     return;
359 
360   if (!hasLBR()) {
361     BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
362     readSampleData(BF);
363     return;
364   }
365 
366   BF.ProfileFlags = BinaryFunction::PF_LBR;
367 
368   // Possibly assign/re-assign branch profile data.
369   matchProfileData(BF);
370 
371   FuncBranchData *FBD = getBranchData(BF);
372   if (!FBD)
373     return;
374 
375   // Assign basic block counts to function entry points. These only include
376   // counts for outside entries.
377   //
378   // There is a slight skew introduced here as branches originated from RETs
379   // may be accounted for in the execution count of an entry block if the last
380   // instruction in a predecessor fall-through block is a call. This situation
381   // should rarely happen because there are few multiple-entry functions.
382   for (const BranchInfo &BI : FBD->EntryData) {
383     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
384     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
385       uint64_t Count = BB->getExecutionCount();
386       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
387         Count = 0;
388       BB->setExecutionCount(Count + BI.Branches);
389     }
390   }
391 
392   uint64_t MismatchedBranches = 0;
393   for (const BranchInfo &BI : FBD->Data) {
394     if (BI.From.Name != BI.To.Name)
395       continue;
396 
397     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
398                       BI.Mispreds)) {
399       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
400                         << BI.To.Offset << '\n');
401       ++MismatchedBranches;
402     }
403   }
404 
405   // Convert branch data into annotations.
406   convertBranchData(BF);
407 }
408 
409 void DataReader::matchProfileData(BinaryFunction &BF) {
410   // This functionality is available for LBR-mode only
411   // TODO: Implement evaluateProfileData() for samples, checking whether
412   // sample addresses match instruction addresses in the function
413   if (!hasLBR())
414     return;
415 
416   FuncBranchData *FBD = getBranchData(BF);
417   if (FBD) {
418     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
419     BF.RawBranchCount = FBD->getNumExecutedBranches();
420     if (BF.ProfileMatchRatio == 1.0f) {
421       if (fetchProfileForOtherEntryPoints(BF)) {
422         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
423         BF.ExecutionCount = FBD->ExecutionCount;
424         BF.RawBranchCount = FBD->getNumExecutedBranches();
425       }
426       return;
427     }
428   }
429 
430   // Check if the function name can fluctuate between several compilations
431   // possibly triggered by minor unrelated code changes in the source code
432   // of the input binary.
433   if (!hasVolatileName(BF))
434     return;
435 
436   // Check for a profile that matches with 100% confidence.
437   const std::vector<FuncBranchData *> AllBranchData =
438       getBranchDataForNamesRegex(BF.getNames());
439   for (FuncBranchData *NewBranchData : AllBranchData) {
440     // Prevent functions from sharing the same profile.
441     if (NewBranchData->Used)
442       continue;
443 
444     if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
445       continue;
446 
447     if (FBD)
448       FBD->Used = false;
449 
450     // Update function profile data with the new set.
451     setBranchData(BF, NewBranchData);
452     NewBranchData->Used = true;
453     BF.ExecutionCount = NewBranchData->ExecutionCount;
454     BF.ProfileMatchRatio = 1.0f;
455     break;
456   }
457 }
458 
459 void DataReader::matchProfileMemData(BinaryFunction &BF) {
460   const std::vector<FuncMemData *> AllMemData =
461       getMemDataForNamesRegex(BF.getNames());
462   for (FuncMemData *NewMemData : AllMemData) {
463     // Prevent functions from sharing the same profile.
464     if (NewMemData->Used)
465       continue;
466 
467     if (FuncMemData *MD = getMemData(BF))
468       MD->Used = false;
469 
470     // Update function profile data with the new set.
471     setMemData(BF, NewMemData);
472     NewMemData->Used = true;
473     break;
474   }
475 }
476 
477 bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
478   BinaryContext &BC = BF.getBinaryContext();
479 
480   FuncBranchData *FBD = getBranchData(BF);
481   if (!FBD)
482     return false;
483 
484   // Check if we are missing profiling data for secondary entry points
485   bool First = true;
486   bool Updated = false;
487   for (BinaryBasicBlock *BB : BF.BasicBlocks) {
488     if (First) {
489       First = false;
490       continue;
491     }
492     if (BB->isEntryPoint()) {
493       uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
494       // Look for branch data associated with this entry point
495       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
496         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
497           FBD->appendFrom(*Data, BB->getOffset());
498           Data->Used = true;
499           Updated = true;
500         }
501       }
502     }
503   }
504 
505   return Updated;
506 }
507 
508 float DataReader::evaluateProfileData(BinaryFunction &BF,
509                                       const FuncBranchData &BranchData) const {
510   BinaryContext &BC = BF.getBinaryContext();
511 
512   // Until we define a minimal profile, we consider an empty branch data to be
513   // a valid profile. It could happen to a function without branches when we
514   // still have an EntryData for the execution count.
515   if (BranchData.Data.empty())
516     return 1.0f;
517 
518   uint64_t NumMatchedBranches = 0;
519   for (const BranchInfo &BI : BranchData.Data) {
520     bool IsValid = false;
521     if (BI.From.Name == BI.To.Name) {
522       // Try to record information with 0 count.
523       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
524     } else if (collectedInBoltedBinary()) {
525       // We can't check branch source for collections in bolted binaries because
526       // the source of the branch may be mapped to the first instruction in a BB
527       // instead of the original branch (which may not exist in the source bin).
528       IsValid = true;
529     } else {
530       // The branch has to originate from this function.
531       // Check for calls, tail calls, rets and indirect branches.
532       // When matching profiling info, we did not reach the stage
533       // when we identify tail calls, so they are still represented
534       // by regular branch instructions and we need isBranch() here.
535       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
536       // If it's a prefix - skip it.
537       if (Instr && BC.MIB->isPrefix(*Instr))
538         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
539       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
540                     BC.MIB->isReturn(*Instr)))
541         IsValid = true;
542     }
543 
544     if (IsValid) {
545       ++NumMatchedBranches;
546       continue;
547     }
548 
549     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
550                       << Twine::utohexstr(BI.From.Offset) << " -> ";
551                if (BI.From.Name == BI.To.Name) dbgs()
552                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
553                else dbgs() << "<outbounds>\n";);
554   }
555 
556   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
557   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
558     errs() << "BOLT-WARNING: profile branches match only "
559            << format("%.1f%%", MatchRatio * 100.0f) << " ("
560            << NumMatchedBranches << '/' << BranchData.Data.size()
561            << ") for function " << BF << '\n';
562 
563   return MatchRatio;
564 }
565 
566 void DataReader::readSampleData(BinaryFunction &BF) {
567   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
568   if (!SampleDataOrErr)
569     return;
570 
571   // Basic samples mode territory (without LBR info)
572   // First step is to assign BB execution count based on samples from perf
573   BF.ProfileMatchRatio = 1.0f;
574   BF.removeTagsFromProfile();
575   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
576   bool NormalizeByCalls = usesEvent("branches");
577   static bool NagUser = true;
578   if (NagUser) {
579     outs()
580         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
581     if (NormalizeByInsnCount)
582       outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
583     else if (NormalizeByCalls)
584       outs() << "BOLT-INFO: normalizing samples by branches.\n";
585 
586     NagUser = false;
587   }
588   uint64_t LastOffset = BF.getSize();
589   uint64_t TotalEntryCount = 0;
590   for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
591        I != E; ++I) {
592     uint64_t CurOffset = I->first;
593     // Always work with samples multiplied by 1000 to avoid losing them if we
594     // later need to normalize numbers
595     uint64_t NumSamples =
596         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
597     if (NormalizeByInsnCount && I->second->getNumNonPseudos()) {
598       NumSamples /= I->second->getNumNonPseudos();
599     } else if (NormalizeByCalls) {
600       uint32_t NumCalls = I->second->getNumCalls();
601       NumSamples /= NumCalls + 1;
602     }
603     I->second->setExecutionCount(NumSamples);
604     if (I->second->isEntryPoint())
605       TotalEntryCount += NumSamples;
606     LastOffset = CurOffset;
607   }
608 
609   BF.ExecutionCount = TotalEntryCount;
610 
611   estimateEdgeCounts(BF);
612 }
613 
614 void DataReader::convertBranchData(BinaryFunction &BF) const {
615   BinaryContext &BC = BF.getBinaryContext();
616 
617   if (BF.empty())
618     return;
619 
620   FuncBranchData *FBD = getBranchData(BF);
621   if (!FBD)
622     return;
623 
624   // Profile information for calls.
625   //
626   // There are 3 cases that we annotate differently:
627   //   1) Conditional tail calls that could be mispredicted.
628   //   2) Indirect calls to multiple destinations with mispredictions.
629   //      Before we validate CFG we have to handle indirect branches here too.
630   //   3) Regular direct calls. The count could be different from containing
631   //      basic block count. Keep this data in case we find it useful.
632   //
633   for (BranchInfo &BI : FBD->Data) {
634     // Ignore internal branches.
635     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
636       continue;
637 
638     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
639     if (!Instr ||
640         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
641       continue;
642 
643     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
644       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
645         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
646                << Twine::utohexstr(BI.From.Offset) << " in function " << BF
647                << '\n';
648       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
649       Value += Count;
650     };
651 
652     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
653       IndirectCallSiteProfile &CSP =
654           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
655               *Instr, "CallProfile");
656       MCSymbol *CalleeSymbol = nullptr;
657       if (BI.To.IsSymbol) {
658         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
659           CalleeSymbol = BD->getSymbol();
660       }
661       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
662     } else if (BC.MIB->getConditionalTailCall(*Instr)) {
663       setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
664       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
665     } else {
666       setOrUpdateAnnotation("Count", BI.Branches);
667     }
668   }
669 }
670 
671 bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
672                               uint64_t Count, uint64_t Mispreds) const {
673   BinaryContext &BC = BF.getBinaryContext();
674 
675   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
676   BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
677 
678   if (!FromBB || !ToBB) {
679     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
680     return false;
681   }
682 
683   // Could be bad LBR data; ignore the branch. In the case of data collected
684   // in binaries optimized by BOLT, a source BB may be mapped to two output
685   // BBs as a result of optimizations. In that case, a branch between these
686   // two will be recorded as a branch from A going to A in the source address
687   // space. Keep processing.
688   if (From == To)
689     return true;
690 
691   // Return from a tail call.
692   if (FromBB->succ_size() == 0)
693     return true;
694 
695   // Very rarely we will see ignored branches. Do a linear check.
696   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
697     if (Branch ==
698         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
699       return true;
700 
701   bool OffsetMatches = !!(To == ToBB->getOffset());
702   if (!OffsetMatches) {
703     // Skip the nops to support old .fdata
704     uint64_t Offset = ToBB->getOffset();
705     for (MCInst &Instr : *ToBB) {
706       if (!BC.MIB->isNoop(Instr))
707         break;
708 
709       Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
710     }
711 
712     if (To == Offset)
713       OffsetMatches = true;
714   }
715 
716   if (!OffsetMatches) {
717     // "To" could be referring to nop instructions in between 2 basic blocks.
718     // While building the CFG we make sure these nops are attributed to the
719     // previous basic block, thus we check if the destination belongs to the
720     // gap past the last instruction.
721     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
722     if (LastInstr) {
723       const uint32_t LastInstrOffset =
724           BC.MIB->getOffsetWithDefault(*LastInstr, 0);
725 
726       // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
727       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
728         return true;
729 
730       if (To <= LastInstrOffset) {
731         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
732                           << " in " << BF << " : " << From << " -> " << To
733                           << '\n');
734         return false;
735       }
736     }
737 
738     // The real destination is the layout successor of the detected ToBB.
739     if (ToBB == BF.BasicBlocksLayout.back())
740       return false;
741     BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
742     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
743     ToBB = NextBB;
744   }
745 
746   // If there's no corresponding instruction for 'From', we have probably
747   // discarded it as a FT from __builtin_unreachable.
748   MCInst *FromInstruction = BF.getInstructionAtOffset(From);
749   if (!FromInstruction) {
750     // If the data was collected in a bolted binary, the From addresses may be
751     // translated to the first instruction of the source BB if BOLT inserted
752     // a new branch that did not exist in the source (we can't map it to the
753     // source instruction, so we map it to the first instr of source BB).
754     // We do not keep offsets for random instructions. So the check above will
755     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
756     if (collectedInBoltedBinary()) {
757       if (FromBB->getInputOffset() != From) {
758         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
759                           << BF << '\n');
760         return false;
761       }
762       FromInstruction = nullptr;
763     } else {
764       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
765                         << '\n');
766       return false;
767     }
768   }
769 
770   if (!FromBB->getSuccessor(ToBB->getLabel())) {
771     // Check if this is a recursive call or a return from a recursive call.
772     if (FromInstruction && ToBB->isEntryPoint() &&
773         (BC.MIB->isCall(*FromInstruction) ||
774          BC.MIB->isIndirectBranch(*FromInstruction))) {
775       // Execution count is already accounted for.
776       return true;
777     }
778     // For data collected in a bolted binary, we may have created two output BBs
779     // that map to one original block. Branches between these two blocks will
780     // appear here as one BB jumping to itself, even though it has no loop
781     // edges. Ignore these.
782     if (collectedInBoltedBinary() && FromBB == ToBB)
783       return true;
784 
785     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
786     if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
787         FTSuccessor->getSuccessor(ToBB->getLabel())) {
788       BinaryBasicBlock::BinaryBranchInfo &FTBI =
789           FTSuccessor->getBranchInfo(*ToBB);
790       FTBI.Count += Count;
791       if (Count)
792         FTBI.MispredictedCount += Mispreds;
793       ToBB = FTSuccessor;
794     } else {
795       LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
796                         << Twine::utohexstr(From) << " -> "
797                         << Twine::utohexstr(To) << '\n');
798       return false;
799     }
800   }
801 
802   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
803   BI.Count += Count;
804   // Only update mispredicted count if it the count was real.
805   if (Count) {
806     BI.MispredictedCount += Mispreds;
807   }
808 
809   return true;
810 }
811 
812 void DataReader::reportError(StringRef ErrorMsg) {
813   Diag << "Error reading BOLT data input file: line " << Line << ", column "
814        << Col << ": " << ErrorMsg << '\n';
815 }
816 
817 bool DataReader::expectAndConsumeFS() {
818   if (ParsingBuf[0] != FieldSeparator) {
819     reportError("expected field separator");
820     return false;
821   }
822   ParsingBuf = ParsingBuf.drop_front(1);
823   Col += 1;
824   return true;
825 }
826 
827 void DataReader::consumeAllRemainingFS() {
828   while (ParsingBuf[0] == FieldSeparator) {
829     ParsingBuf = ParsingBuf.drop_front(1);
830     Col += 1;
831   }
832 }
833 
834 bool DataReader::checkAndConsumeNewLine() {
835   if (ParsingBuf[0] != '\n')
836     return false;
837 
838   ParsingBuf = ParsingBuf.drop_front(1);
839   Col = 0;
840   Line += 1;
841   return true;
842 }
843 
844 ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
845   if (EndChar == '\\') {
846     reportError("EndChar could not be backslash");
847     return make_error_code(llvm::errc::io_error);
848   }
849 
850   std::string EndChars(1, EndChar);
851   EndChars.push_back('\\');
852   if (EndNl)
853     EndChars.push_back('\n');
854 
855   size_t StringEnd = 0;
856   do {
857     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
858     if (StringEnd == StringRef::npos ||
859         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
860       reportError("malformed field");
861       return make_error_code(llvm::errc::io_error);
862     }
863 
864     if (ParsingBuf[StringEnd] != '\\')
865       break;
866 
867     StringEnd += 2;
868   } while (1);
869 
870   StringRef Str = ParsingBuf.substr(0, StringEnd);
871 
872   // If EndNl was set and nl was found instead of EndChar, do not consume the
873   // new line.
874   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
875   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;
876 
877   ParsingBuf = ParsingBuf.drop_front(End);
878   if (EndChar == '\n') {
879     Col = 0;
880     Line += 1;
881   } else {
882     Col += End;
883   }
884   return Str;
885 }
886 
887 ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
888   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
889   if (std::error_code EC = NumStrRes.getError())
890     return EC;
891   StringRef NumStr = NumStrRes.get();
892   int64_t Num;
893   if (NumStr.getAsInteger(10, Num)) {
894     reportError("expected decimal number");
895     Diag << "Found: " << NumStr << "\n";
896     return make_error_code(llvm::errc::io_error);
897   }
898   return Num;
899 }
900 
901 ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
902   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
903   if (std::error_code EC = NumStrRes.getError())
904     return EC;
905   StringRef NumStr = NumStrRes.get();
906   uint64_t Num;
907   if (NumStr.getAsInteger(16, Num)) {
908     reportError("expected hexidecimal number");
909     Diag << "Found: " << NumStr << "\n";
910     return make_error_code(llvm::errc::io_error);
911   }
912   return Num;
913 }
914 
915 ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
916                                             bool ExpectMemLoc) {
917   // Read whether the location of the branch should be DSO or a symbol
918   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
919   // symbol.
920   // The symbol flag is also used to tag memory load events by adding 3 to the
921   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
922   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
923       ParsingBuf[0] != '2') {
924     reportError("expected 0, 1 or 2");
925     return make_error_code(llvm::errc::io_error);
926   }
927 
928   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
929       ParsingBuf[0] != '5') {
930     reportError("expected 3, 4 or 5");
931     return make_error_code(llvm::errc::io_error);
932   }
933 
934   bool IsSymbol =
935       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
936       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
937   ParsingBuf = ParsingBuf.drop_front(1);
938   Col += 1;
939 
940   if (!expectAndConsumeFS())
941     return make_error_code(llvm::errc::io_error);
942   consumeAllRemainingFS();
943 
944   // Read the string containing the symbol or the DSO name
945   ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
946   if (std::error_code EC = NameRes.getError())
947     return EC;
948   StringRef Name = NameRes.get();
949   consumeAllRemainingFS();
950 
951   // Read the offset
952   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
953   if (std::error_code EC = Offset.getError())
954     return EC;
955 
956   return Location(IsSymbol, Name, Offset.get());
957 }
958 
959 ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
960   ErrorOr<Location> Res = parseLocation(FieldSeparator);
961   if (std::error_code EC = Res.getError())
962     return EC;
963   Location From = Res.get();
964 
965   consumeAllRemainingFS();
966   Res = parseLocation(FieldSeparator);
967   if (std::error_code EC = Res.getError())
968     return EC;
969   Location To = Res.get();
970 
971   consumeAllRemainingFS();
972   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
973   if (std::error_code EC = MRes.getError())
974     return EC;
975   int64_t NumMispreds = MRes.get();
976 
977   consumeAllRemainingFS();
978   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
979   if (std::error_code EC = BRes.getError())
980     return EC;
981   int64_t NumBranches = BRes.get();
982 
983   consumeAllRemainingFS();
984   if (!checkAndConsumeNewLine()) {
985     reportError("expected end of line");
986     return make_error_code(llvm::errc::io_error);
987   }
988 
989   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
990 }
991 
992 ErrorOr<MemInfo> DataReader::parseMemInfo() {
993   ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
994   if (std::error_code EC = Res.getError())
995     return EC;
996   Location Offset = Res.get();
997 
998   consumeAllRemainingFS();
999   Res = parseMemLocation(FieldSeparator);
1000   if (std::error_code EC = Res.getError())
1001     return EC;
1002   Location Addr = Res.get();
1003 
1004   consumeAllRemainingFS();
1005   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
1006   if (std::error_code EC = CountRes.getError())
1007     return EC;
1008 
1009   consumeAllRemainingFS();
1010   if (!checkAndConsumeNewLine()) {
1011     reportError("expected end of line");
1012     return make_error_code(llvm::errc::io_error);
1013   }
1014 
1015   return MemInfo(Offset, Addr, CountRes.get());
1016 }
1017 
1018 ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
1019   ErrorOr<Location> Res = parseLocation(FieldSeparator);
1020   if (std::error_code EC = Res.getError())
1021     return EC;
1022   Location Address = Res.get();
1023 
1024   consumeAllRemainingFS();
1025   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
1026   if (std::error_code EC = BRes.getError())
1027     return EC;
1028   int64_t Occurrences = BRes.get();
1029 
1030   consumeAllRemainingFS();
1031   if (!checkAndConsumeNewLine()) {
1032     reportError("expected end of line");
1033     return make_error_code(llvm::errc::io_error);
1034   }
1035 
1036   return SampleInfo(std::move(Address), Occurrences);
1037 }
1038 
1039 ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
1040   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
1041     return false;
1042   ParsingBuf = ParsingBuf.drop_front(6);
1043   Col += 6;
1044 
1045   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
1046     ParsingBuf = ParsingBuf.drop_front(1);
1047 
1048   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
1049     ErrorOr<StringRef> EventName = parseString(' ', true);
1050     if (!EventName)
1051       return make_error_code(llvm::errc::io_error);
1052     EventNames.insert(EventName.get());
1053   }
1054 
1055   if (!checkAndConsumeNewLine()) {
1056     reportError("malformed no_lbr line");
1057     return make_error_code(llvm::errc::io_error);
1058   }
1059   return true;
1060 }
1061 
1062 ErrorOr<bool> DataReader::maybeParseBATFlag() {
1063   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
1064     return false;
1065   ParsingBuf = ParsingBuf.drop_front(16);
1066   Col += 16;
1067 
1068   if (!checkAndConsumeNewLine()) {
1069     reportError("malformed boltedcollection line");
1070     return make_error_code(llvm::errc::io_error);
1071   }
1072   return true;
1073 }
1074 
1075 bool DataReader::hasBranchData() {
1076   if (ParsingBuf.size() == 0)
1077     return false;
1078 
1079   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
1080     return true;
1081   return false;
1082 }
1083 
1084 bool DataReader::hasMemData() {
1085   if (ParsingBuf.size() == 0)
1086     return false;
1087 
1088   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
1089     return true;
1090   return false;
1091 }
1092 
1093 std::error_code DataReader::parseInNoLBRMode() {
1094   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1095     auto I = NamesToSamples.find(Name);
1096     if (I == NamesToSamples.end()) {
1097       bool Success;
1098       std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
1099           Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));
1100 
1101       assert(Success && "unexpected result of insert");
1102     }
1103     return I;
1104   };
1105 
1106   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1107     auto I = NamesToMemEvents.find(Name);
1108     if (I == NamesToMemEvents.end()) {
1109       bool Success;
1110       std::tie(I, Success) = NamesToMemEvents.insert(
1111           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1112       assert(Success && "unexpected result of insert");
1113     }
1114     return I;
1115   };
1116 
1117   while (hasBranchData()) {
1118     ErrorOr<SampleInfo> Res = parseSampleInfo();
1119     if (std::error_code EC = Res.getError())
1120       return EC;
1121 
1122     SampleInfo SI = Res.get();
1123 
1124     // Ignore samples not involving known locations
1125     if (!SI.Loc.IsSymbol)
1126       continue;
1127 
1128     StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
1129     I->getValue().Data.emplace_back(std::move(SI));
1130   }
1131 
1132   while (hasMemData()) {
1133     ErrorOr<MemInfo> Res = parseMemInfo();
1134     if (std::error_code EC = Res.getError())
1135       return EC;
1136 
1137     MemInfo MI = Res.get();
1138 
1139     // Ignore memory events not involving known pc.
1140     if (!MI.Offset.IsSymbol)
1141       continue;
1142 
1143     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1144     I->getValue().Data.emplace_back(std::move(MI));
1145   }
1146 
1147   for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples)
1148     std::stable_sort(FuncSamples.second.Data.begin(),
1149                      FuncSamples.second.Data.end());
1150 
1151   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1152     std::stable_sort(MemEvents.second.Data.begin(),
1153                      MemEvents.second.Data.end());
1154 
1155   return std::error_code();
1156 }
1157 
1158 std::error_code DataReader::parse() {
1159   auto GetOrCreateFuncEntry = [&](StringRef Name) {
1160     auto I = NamesToBranches.find(Name);
1161     if (I == NamesToBranches.end()) {
1162       bool Success;
1163       std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
1164           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
1165                                FuncBranchData::ContainerTy())));
1166       assert(Success && "unexpected result of insert");
1167     }
1168     return I;
1169   };
1170 
1171   auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
1172     auto I = NamesToMemEvents.find(Name);
1173     if (I == NamesToMemEvents.end()) {
1174       bool Success;
1175       std::tie(I, Success) = NamesToMemEvents.insert(
1176           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
1177       assert(Success && "unexpected result of insert");
1178     }
1179     return I;
1180   };
1181 
1182   Col = 0;
1183   Line = 1;
1184   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
1185   if (!FlagOrErr)
1186     return FlagOrErr.getError();
1187   NoLBRMode = *FlagOrErr;
1188 
1189   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
1190   if (!BATFlagOrErr)
1191     return BATFlagOrErr.getError();
1192   BATMode = *BATFlagOrErr;
1193 
1194   if (!hasBranchData() && !hasMemData()) {
1195     Diag << "ERROR: no valid profile data found\n";
1196     return make_error_code(llvm::errc::io_error);
1197   }
1198 
1199   if (NoLBRMode)
1200     return parseInNoLBRMode();
1201 
1202   while (hasBranchData()) {
1203     ErrorOr<BranchInfo> Res = parseBranchInfo();
1204     if (std::error_code EC = Res.getError())
1205       return EC;
1206 
1207     BranchInfo BI = Res.get();
1208 
1209     // Ignore branches not involving known location.
1210     if (!BI.From.IsSymbol && !BI.To.IsSymbol)
1211       continue;
1212 
1213     StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
1214     I->getValue().Data.emplace_back(std::move(BI));
1215 
1216     // Add entry data for branches to another function or branches
1217     // to entry points (including recursive calls)
1218     if (BI.To.IsSymbol &&
1219         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
1220       I = GetOrCreateFuncEntry(BI.To.Name);
1221       I->getValue().EntryData.emplace_back(std::move(BI));
1222     }
1223 
1224     // If destination is the function start - update execution count.
1225     // NB: the data is skewed since we cannot tell tail recursion from
1226     //     branches to the function start.
1227     if (BI.To.IsSymbol && BI.To.Offset == 0) {
1228       I = GetOrCreateFuncEntry(BI.To.Name);
1229       I->getValue().ExecutionCount += BI.Branches;
1230     }
1231   }
1232 
1233   while (hasMemData()) {
1234     ErrorOr<MemInfo> Res = parseMemInfo();
1235     if (std::error_code EC = Res.getError())
1236       return EC;
1237 
1238     MemInfo MI = Res.get();
1239 
1240     // Ignore memory events not involving known pc.
1241     if (!MI.Offset.IsSymbol)
1242       continue;
1243 
1244     StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
1245     I->getValue().Data.emplace_back(std::move(MI));
1246   }
1247 
1248   for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches)
1249     std::stable_sort(FuncBranches.second.Data.begin(),
1250                      FuncBranches.second.Data.end());
1251 
1252   for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
1253     std::stable_sort(MemEvents.second.Data.begin(),
1254                      MemEvents.second.Data.end());
1255 
1256   return std::error_code();
1257 }
1258 
1259 void DataReader::buildLTONameMaps() {
1260   for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
1261     const StringRef FuncName = FuncData.getKey();
1262     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1263     if (CommonName)
1264       LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
1265   }
1266 
1267   for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
1268     const StringRef FuncName = FuncData.getKey();
1269     const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
1270     if (CommonName)
1271       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
1272   }
1273 }
1274 
1275 namespace {
1276 template <typename MapTy>
1277 decltype(MapTy::MapEntryTy::second) *
1278 fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
1279   // Do a reverse order iteration since the name in profile has a higher chance
1280   // of matching a name at the end of the list.
1281   for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
1282     auto I = Map.find(normalizeName((*SI)->getName()));
1283     if (I != Map.end())
1284       return &I->getValue();
1285   }
1286   return nullptr;
1287 }
1288 
1289 template <typename MapTy>
1290 decltype(MapTy::MapEntryTy::second) *
1291 fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
1292   // Do a reverse order iteration since the name in profile has a higher chance
1293   // of matching a name at the end of the list.
1294   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1295     auto I = Map.find(normalizeName(*FI));
1296     if (I != Map.end())
1297       return &I->getValue();
1298   }
1299   return nullptr;
1300 }
1301 
1302 template <typename MapTy>
1303 std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
1304     MapTy &Map,
1305     const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
1306         &LTOCommonNameMap,
1307     const std::vector<StringRef> &FuncNames) {
1308   std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
1309   // Do a reverse order iteration since the name in profile has a higher chance
1310   // of matching a name at the end of the list.
1311   for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
1312     std::string Name = normalizeName(*FI);
1313     const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
1314     if (LTOCommonName) {
1315       auto I = LTOCommonNameMap.find(*LTOCommonName);
1316       if (I != LTOCommonNameMap.end()) {
1317         const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
1318             I->getValue();
1319         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
1320       }
1321     } else {
1322       auto I = Map.find(Name);
1323       if (I != Map.end())
1324         return {&I->getValue()};
1325     }
1326   }
1327   return AllData;
1328 }
1329 
1330 }
1331 
1332 bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
1333   if (getBranchData(Function) || getMemData(Function))
1334     return true;
1335 
1336   if (getBranchDataForNames(Function.getNames()) ||
1337       getMemDataForNames(Function.getNames()))
1338     return true;
1339 
1340   if (!hasVolatileName(Function))
1341     return false;
1342 
1343   const std::vector<FuncBranchData *> AllBranchData =
1344       getBranchDataForNamesRegex(Function.getNames());
1345   if (!AllBranchData.empty())
1346     return true;
1347 
1348   const std::vector<FuncMemData *> AllMemData =
1349       getMemDataForNamesRegex(Function.getNames());
1350   if (!AllMemData.empty())
1351     return true;
1352 
1353   return false;
1354 }
1355 
1356 FuncBranchData *
1357 DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
1358   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
1359 }
1360 
1361 FuncBranchData *
1362 DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
1363   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
1364 }
1365 
1366 FuncMemData *
1367 DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
1368   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
1369 }
1370 
1371 FuncSampleData *
1372 DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
1373   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
1374 }
1375 
1376 std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
1377     const std::vector<StringRef> &FuncNames) {
1378   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
1379 }
1380 
1381 std::vector<FuncMemData *>
1382 DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
1383   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
1384 }
1385 
1386 bool DataReader::hasLocalsWithFileName() const {
1387   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1388     const StringRef &FuncName = Func.getKey();
1389     if (FuncName.count('/') == 2 && FuncName[0] != '/')
1390       return true;
1391   }
1392   return false;
1393 }
1394 
1395 void DataReader::dump() const {
1396   for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
1397     Diag << Func.getKey() << " branches:\n";
1398     for (const BranchInfo &BI : Func.getValue().Data)
1399       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1400            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1401     Diag << Func.getKey() << " entry points:\n";
1402     for (const BranchInfo &BI : Func.getValue().EntryData)
1403       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
1404            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
1405   }
1406 
1407   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
1408     StringRef Event = I->getKey();
1409     Diag << "Data was collected with event: " << Event << "\n";
1410   }
1411   for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
1412     Diag << Func.getKey() << " samples:\n";
1413     for (const SampleInfo &SI : Func.getValue().Data)
1414       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
1415   }
1416 
1417   for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
1418     Diag << "Memory events for " << Func.getValue().Name;
1419     Location LastOffset(0);
1420     for (const MemInfo &MI : Func.getValue().Data) {
1421       if (MI.Offset == LastOffset)
1422         Diag << ", " << MI.Addr << "/" << MI.Count;
1423       else
1424         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
1425       LastOffset = MI.Offset;
1426     }
1427     Diag << "\n";
1428   }
1429 }
1430 
1431 } // namespace bolt
1432 } // namespace llvm
1433