1 //===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Profile/YAMLProfileReader.h"
10 #include "bolt/Core/BinaryBasicBlock.h"
11 #include "bolt/Core/BinaryFunction.h"
12 #include "bolt/Passes/MCF.h"
13 #include "bolt/Profile/ProfileYAMLMapping.h"
14 #include "bolt/Utils/Utils.h"
15 #include "llvm/Support/CommandLine.h"
16 
17 using namespace llvm;
18 
19 namespace opts {
20 
21 extern cl::opt<unsigned> Verbosity;
22 extern cl::OptionCategory BoltOptCategory;
23 
24 static llvm::cl::opt<bool>
25 IgnoreHash("profile-ignore-hash",
26   cl::desc("ignore hash while reading function profile"),
27   cl::init(false),
28   cl::ZeroOrMore,
29   cl::Hidden,
30   cl::cat(BoltOptCategory));
31 
32 }
33 
34 namespace llvm {
35 namespace bolt {
36 
37 bool YAMLProfileReader::isYAML(const StringRef Filename) {
38   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
39       MemoryBuffer::getFileOrSTDIN(Filename);
40   if (std::error_code EC = MB.getError())
41     report_error(Filename, EC);
42   StringRef Buffer = MB.get()->getBuffer();
43   if (Buffer.startswith("---\n"))
44     return true;
45   return false;
46 }
47 
48 void YAMLProfileReader::buildNameMaps(
49     std::map<uint64_t, BinaryFunction> &Functions) {
50   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
51     StringRef Name = YamlBF.Name;
52     const size_t Pos = Name.find("(*");
53     if (Pos != StringRef::npos)
54       Name = Name.substr(0, Pos);
55     ProfileNameToProfile[Name] = &YamlBF;
56     if (const Optional<StringRef> CommonName = getLTOCommonName(Name))
57       LTOCommonNameMap[*CommonName].push_back(&YamlBF);
58   }
59   for (auto &BFI : Functions) {
60     const BinaryFunction &Function = BFI.second;
61     for (StringRef Name : Function.getNames())
62       if (const Optional<StringRef> CommonName = getLTOCommonName(Name))
63         LTOCommonNameFunctionMap[*CommonName].insert(&Function);
64   }
65 }
66 
67 bool YAMLProfileReader::hasLocalsWithFileName() const {
68   for (const StringMapEntry<yaml::bolt::BinaryFunctionProfile *> &KV :
69        ProfileNameToProfile) {
70     const StringRef &FuncName = KV.getKey();
71     if (FuncName.count('/') == 2 && FuncName[0] != '/')
72       return true;
73   }
74   return false;
75 }
76 
77 bool YAMLProfileReader::parseFunctionProfile(
78     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
79   BinaryContext &BC = BF.getBinaryContext();
80 
81   bool ProfileMatched = true;
82   uint64_t MismatchedBlocks = 0;
83   uint64_t MismatchedCalls = 0;
84   uint64_t MismatchedEdges = 0;
85 
86   uint64_t FunctionExecutionCount = 0;
87 
88   BF.setExecutionCount(YamlBF.ExecCount);
89 
90   if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) {
91     if (opts::Verbosity >= 1)
92       errs() << "BOLT-WARNING: function hash mismatch\n";
93     ProfileMatched = false;
94   }
95 
96   if (YamlBF.NumBasicBlocks != BF.size()) {
97     if (opts::Verbosity >= 1)
98       errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
99     ProfileMatched = false;
100   }
101 
102   BinaryFunction::BasicBlockOrderType DFSOrder = BF.dfs();
103 
104   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
105     if (YamlBB.Index >= DFSOrder.size()) {
106       if (opts::Verbosity >= 2)
107         errs() << "BOLT-WARNING: index " << YamlBB.Index
108                << " is out of bounds\n";
109       ++MismatchedBlocks;
110       continue;
111     }
112 
113     BinaryBasicBlock &BB = *DFSOrder[YamlBB.Index];
114 
115     // Basic samples profile (without LBR) does not have branches information
116     // and needs a special processing.
117     if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
118       if (!YamlBB.EventCount) {
119         BB.setExecutionCount(0);
120         continue;
121       }
122       uint64_t NumSamples = YamlBB.EventCount * 1000;
123       if (NormalizeByInsnCount && BB.getNumNonPseudos())
124         NumSamples /= BB.getNumNonPseudos();
125       else if (NormalizeByCalls)
126         NumSamples /= BB.getNumCalls() + 1;
127 
128       BB.setExecutionCount(NumSamples);
129       if (BB.isEntryPoint())
130         FunctionExecutionCount += NumSamples;
131       continue;
132     }
133 
134     BB.setExecutionCount(YamlBB.ExecCount);
135 
136     for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
137       BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
138                                    ? YamlProfileToFunction[YamlCSI.DestId]
139                                    : nullptr;
140       bool IsFunction = Callee ? true : false;
141       MCSymbol *CalleeSymbol = nullptr;
142       if (IsFunction)
143         CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
144 
145       BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count,
146                                         YamlCSI.Mispreds, YamlCSI.Offset);
147 
148       if (YamlCSI.Offset >= BB.getOriginalSize()) {
149         if (opts::Verbosity >= 2)
150           errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
151                  << " out of bounds in block " << BB.getName() << '\n';
152         ++MismatchedCalls;
153         continue;
154       }
155 
156       MCInst *Instr =
157           BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
158       if (!Instr) {
159         if (opts::Verbosity >= 2)
160           errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
161                  << " in block " << BB.getName() << '\n';
162         ++MismatchedCalls;
163         continue;
164       }
165       if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
166         if (opts::Verbosity >= 2)
167           errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
168                  << " in block " << BB.getName() << '\n';
169         ++MismatchedCalls;
170         continue;
171       }
172 
173       auto setAnnotation = [&](StringRef Name, uint64_t Count) {
174         if (BC.MIB->hasAnnotation(*Instr, Name)) {
175           if (opts::Verbosity >= 1)
176             errs() << "BOLT-WARNING: ignoring duplicate " << Name
177                    << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
178                    << " in function " << BF << '\n';
179           return;
180         }
181         BC.MIB->addAnnotation(*Instr, Name, Count);
182       };
183 
184       if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
185         auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
186             *Instr, "CallProfile");
187         CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
188       } else if (BC.MIB->getConditionalTailCall(*Instr)) {
189         setAnnotation("CTCTakenCount", YamlCSI.Count);
190         setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
191       } else {
192         setAnnotation("Count", YamlCSI.Count);
193       }
194     }
195 
196     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
197       if (YamlSI.Index >= DFSOrder.size()) {
198         if (opts::Verbosity >= 1)
199           errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
200         ++MismatchedEdges;
201         continue;
202       }
203 
204       BinaryBasicBlock &SuccessorBB = *DFSOrder[YamlSI.Index];
205       if (!BB.getSuccessor(SuccessorBB.getLabel())) {
206         if (opts::Verbosity >= 1)
207           errs() << "BOLT-WARNING: no successor for block " << BB.getName()
208                  << " that matches index " << YamlSI.Index << " or block "
209                  << SuccessorBB.getName() << '\n';
210         ++MismatchedEdges;
211         continue;
212       }
213 
214       BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(SuccessorBB);
215       BI.Count += YamlSI.Count;
216       BI.MispredictedCount += YamlSI.Mispreds;
217     }
218   }
219 
220   // If basic block profile wasn't read it should be 0.
221   for (BinaryBasicBlock &BB : BF)
222     if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
223       BB.setExecutionCount(0);
224 
225   if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
226     BF.setExecutionCount(FunctionExecutionCount);
227     estimateEdgeCounts(BF);
228   }
229 
230   ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
231 
232   if (ProfileMatched)
233     BF.markProfiled(YamlBP.Header.Flags);
234 
235   if (!ProfileMatched && opts::Verbosity >= 1)
236     errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
237            << MismatchedCalls << " calls, and " << MismatchedEdges
238            << " edges in profile did not match function " << BF << '\n';
239 
240   return ProfileMatched;
241 }
242 
243 Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
244   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
245       MemoryBuffer::getFileOrSTDIN(Filename);
246   if (std::error_code EC = MB.getError()) {
247     errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
248     return errorCodeToError(EC);
249   }
250   yaml::Input YamlInput(MB.get()->getBuffer());
251 
252   // Consume YAML file.
253   YamlInput >> YamlBP;
254   if (YamlInput.error()) {
255     errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
256            << " : " << YamlInput.error().message() << '\n';
257     return errorCodeToError(YamlInput.error());
258   }
259 
260   // Sanity check.
261   if (YamlBP.Header.Version != 1)
262     return make_error<StringError>(
263         Twine("cannot read profile : unsupported version"),
264         inconvertibleErrorCode());
265 
266   if (YamlBP.Header.EventNames.find(',') != StringRef::npos)
267     return make_error<StringError>(
268         Twine("multiple events in profile are not supported"),
269         inconvertibleErrorCode());
270 
271   // Match profile to function based on a function name.
272   buildNameMaps(BC.getBinaryFunctions());
273 
274   // Preliminary assign function execution count.
275   for (auto &KV : BC.getBinaryFunctions()) {
276     BinaryFunction &BF = KV.second;
277     for (StringRef Name : BF.getNames()) {
278       auto PI = ProfileNameToProfile.find(Name);
279       if (PI != ProfileNameToProfile.end()) {
280         yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
281         BF.setExecutionCount(YamlBF.ExecCount);
282         break;
283       }
284     }
285   }
286 
287   return Error::success();
288 }
289 
290 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
291   for (StringRef Name : BF.getNames()) {
292     if (ProfileNameToProfile.find(Name) != ProfileNameToProfile.end())
293       return true;
294     if (const Optional<StringRef> CommonName = getLTOCommonName(Name)) {
295       if (LTOCommonNameMap.find(*CommonName) != LTOCommonNameMap.end())
296         return true;
297     }
298   }
299 
300   return false;
301 }
302 
303 Error YAMLProfileReader::readProfile(BinaryContext &BC) {
304   YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
305 
306   auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
307                            BinaryFunction &BF) {
308     if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size())
309       return true;
310     if (!opts::IgnoreHash &&
311         Profile.Hash == static_cast<uint64_t>(BF.getHash()))
312       return true;
313     return false;
314   };
315 
316   // We have to do 2 passes since LTO introduces an ambiguity in function
317   // names. The first pass assigns profiles that match 100% by name and
318   // by hash. The second pass allows name ambiguity for LTO private functions.
319   for (auto &BFI : BC.getBinaryFunctions()) {
320     BinaryFunction &Function = BFI.second;
321 
322     // Clear function call count that may have been set while pre-processing
323     // the profile.
324     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
325 
326     // Recompute hash once per function.
327     if (!opts::IgnoreHash)
328       Function.computeHash(/*UseDFS=*/true);
329 
330     for (StringRef FunctionName : Function.getNames()) {
331       auto PI = ProfileNameToProfile.find(FunctionName);
332       if (PI == ProfileNameToProfile.end())
333         continue;
334 
335       yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
336       if (profileMatches(YamlBF, Function))
337         matchProfileToFunction(YamlBF, Function);
338     }
339   }
340 
341   for (auto &BFI : BC.getBinaryFunctions()) {
342     BinaryFunction &Function = BFI.second;
343 
344     if (ProfiledFunctions.count(&Function))
345       continue;
346 
347     for (StringRef FunctionName : Function.getNames()) {
348       const Optional<StringRef> CommonName = getLTOCommonName(FunctionName);
349       if (CommonName) {
350         auto I = LTOCommonNameMap.find(*CommonName);
351         if (I == LTOCommonNameMap.end())
352           continue;
353 
354         bool ProfileMatched = false;
355         std::vector<yaml::bolt::BinaryFunctionProfile *> &LTOProfiles =
356             I->getValue();
357         for (yaml::bolt::BinaryFunctionProfile *YamlBF : LTOProfiles) {
358           if (YamlBF->Used)
359             continue;
360           if ((ProfileMatched = profileMatches(*YamlBF, Function))) {
361             matchProfileToFunction(*YamlBF, Function);
362             break;
363           }
364         }
365         if (ProfileMatched)
366           break;
367 
368         // If there's only one function with a given name, try to
369         // match it partially.
370         if (LTOProfiles.size() == 1 &&
371             LTOCommonNameFunctionMap[*CommonName].size() == 1 &&
372             !LTOProfiles.front()->Used) {
373           matchProfileToFunction(*LTOProfiles.front(), Function);
374           break;
375         }
376       } else {
377         auto PI = ProfileNameToProfile.find(FunctionName);
378         if (PI == ProfileNameToProfile.end())
379           continue;
380 
381         yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue();
382         if (!YamlBF.Used) {
383           matchProfileToFunction(YamlBF, Function);
384           break;
385         }
386       }
387     }
388   }
389 
390   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
391     if (!YamlBF.Used && opts::Verbosity >= 1)
392       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
393              << '\n';
394 
395   // Set for parseFunctionProfile().
396   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
397   NormalizeByCalls = usesEvent("branches");
398 
399   uint64_t NumUnused = 0;
400   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
401     if (YamlBF.Id >= YamlProfileToFunction.size()) {
402       // Such profile was ignored.
403       ++NumUnused;
404       continue;
405     }
406     if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
407       parseFunctionProfile(*BF, YamlBF);
408     else
409       ++NumUnused;
410   }
411 
412   BC.setNumUnusedProfiledObjects(NumUnused);
413 
414   return Error::success();
415 }
416 
417 bool YAMLProfileReader::usesEvent(StringRef Name) const {
418   return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos;
419 }
420 
421 } // end namespace bolt
422 } // end namespace llvm
423