1 //===- bolt/Passes/IndirectCallPromotion.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IndirectCallPromotion class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/IndirectCallPromotion.h"
14 #include "bolt/Passes/BinaryFunctionCallGraph.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Passes/Inliner.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 #define DEBUG_TYPE "ICP"
20 #define DEBUG_VERBOSE(Level, X)                                                \
21   if (opts::Verbosity >= (Level)) {                                            \
22     X;                                                                         \
23   }
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltOptCategory;
31 
32 extern cl::opt<IndirectCallPromotionType> ICP;
33 extern cl::opt<unsigned> Verbosity;
34 extern cl::opt<unsigned> ExecutionCountThreshold;
35 
36 static cl::opt<unsigned> ICPJTRemainingPercentThreshold(
37     "icp-jt-remaining-percent-threshold",
38     cl::desc("The percentage threshold against remaining unpromoted indirect "
39              "call count for the promotion for jump tables"),
40     cl::init(30), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
41 
42 static cl::opt<unsigned> ICPJTTotalPercentThreshold(
43     "icp-jt-total-percent-threshold",
44     cl::desc(
45         "The percentage threshold against total count for the promotion for "
46         "jump tables"),
47     cl::init(5), cl::Hidden, cl::cat(BoltOptCategory));
48 
49 static cl::opt<unsigned> ICPCallsRemainingPercentThreshold(
50     "icp-calls-remaining-percent-threshold",
51     cl::desc("The percentage threshold against remaining unpromoted indirect "
52              "call count for the promotion for calls"),
53     cl::init(50), cl::Hidden, cl::cat(BoltOptCategory));
54 
55 static cl::opt<unsigned> ICPCallsTotalPercentThreshold(
56     "icp-calls-total-percent-threshold",
57     cl::desc(
58         "The percentage threshold against total count for the promotion for "
59         "calls"),
60     cl::init(30), cl::Hidden, cl::cat(BoltOptCategory));
61 
62 static cl::opt<unsigned> ICPMispredictThreshold(
63     "indirect-call-promotion-mispredict-threshold",
64     cl::desc("misprediction threshold for skipping ICP on an "
65              "indirect call"),
66     cl::init(0), cl::cat(BoltOptCategory));
67 
68 static cl::opt<bool> ICPUseMispredicts(
69     "indirect-call-promotion-use-mispredicts",
70     cl::desc("use misprediction frequency for determining whether or not ICP "
71              "should be applied at a callsite.  The "
72              "-indirect-call-promotion-mispredict-threshold value will be used "
73              "by this heuristic"),
74     cl::cat(BoltOptCategory));
75 
76 static cl::opt<unsigned>
77     ICPTopN("indirect-call-promotion-topn",
78             cl::desc("limit number of targets to consider when doing indirect "
79                      "call promotion. 0 = no limit"),
80             cl::init(3), cl::cat(BoltOptCategory));
81 
82 static cl::opt<unsigned> ICPCallsTopN(
83     "indirect-call-promotion-calls-topn",
84     cl::desc("limit number of targets to consider when doing indirect "
85              "call promotion on calls. 0 = no limit"),
86     cl::init(0), cl::cat(BoltOptCategory));
87 
88 static cl::opt<unsigned> ICPJumpTablesTopN(
89     "indirect-call-promotion-jump-tables-topn",
90     cl::desc("limit number of targets to consider when doing indirect "
91              "call promotion on jump tables. 0 = no limit"),
92     cl::init(0), cl::cat(BoltOptCategory));
93 
94 static cl::opt<bool> EliminateLoads(
95     "icp-eliminate-loads",
96     cl::desc("enable load elimination using memory profiling data when "
97              "performing ICP"),
98     cl::init(true), cl::cat(BoltOptCategory));
99 
100 static cl::opt<unsigned> ICPTopCallsites(
101     "icp-top-callsites",
102     cl::desc("optimize hottest calls until at least this percentage of all "
103              "indirect calls frequency is covered. 0 = all callsites"),
104     cl::init(99), cl::Hidden, cl::cat(BoltOptCategory));
105 
106 static cl::list<std::string>
107     ICPFuncsList("icp-funcs", cl::CommaSeparated,
108                  cl::desc("list of functions to enable ICP for"),
109                  cl::value_desc("func1,func2,func3,..."), cl::Hidden,
110                  cl::cat(BoltOptCategory));
111 
112 static cl::opt<bool>
113     ICPOldCodeSequence("icp-old-code-sequence",
114                        cl::desc("use old code sequence for promoted calls"),
115                        cl::Hidden, cl::cat(BoltOptCategory));
116 
117 static cl::opt<bool> ICPJumpTablesByTarget(
118     "icp-jump-tables-targets",
119     cl::desc(
120         "for jump tables, optimize indirect jmp targets instead of indices"),
121     cl::Hidden, cl::cat(BoltOptCategory));
122 
123 static cl::opt<bool> ICPPeelForInline(
124     "icp-inline", cl::desc("only promote call targets eligible for inlining"),
125     cl::Hidden, cl::cat(BoltOptCategory));
126 
127 } // namespace opts
128 
129 static bool verifyProfile(std::map<uint64_t, BinaryFunction> &BFs) {
130   bool IsValid = true;
131   for (auto &BFI : BFs) {
132     BinaryFunction &BF = BFI.second;
133     if (!BF.isSimple())
134       continue;
135     for (BinaryBasicBlock *BB : BF.layout()) {
136       auto BI = BB->branch_info_begin();
137       for (BinaryBasicBlock *SuccBB : BB->successors()) {
138         if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && BI->Count > 0) {
139           if (BB->getKnownExecutionCount() == 0 ||
140               SuccBB->getKnownExecutionCount() == 0) {
141             errs() << "BOLT-WARNING: profile verification failed after ICP for "
142                       "function "
143                    << BF << '\n';
144             IsValid = false;
145           }
146         }
147         ++BI;
148       }
149     }
150   }
151   return IsValid;
152 }
153 
154 namespace llvm {
155 namespace bolt {
156 
157 IndirectCallPromotion::Callsite::Callsite(BinaryFunction &BF,
158                                           const IndirectCallProfile &ICP)
159     : From(BF.getSymbol()), To(ICP.Offset), Mispreds(ICP.Mispreds),
160       Branches(ICP.Count) {
161   if (ICP.Symbol) {
162     To.Sym = ICP.Symbol;
163     To.Addr = 0;
164   }
165 }
166 
167 void IndirectCallPromotion::printDecision(
168     llvm::raw_ostream &OS,
169     std::vector<IndirectCallPromotion::Callsite> &Targets, unsigned N) const {
170   uint64_t TotalCount = 0;
171   uint64_t TotalMispreds = 0;
172   for (const Callsite &S : Targets) {
173     TotalCount += S.Branches;
174     TotalMispreds += S.Mispreds;
175   }
176   if (!TotalCount)
177     TotalCount = 1;
178   if (!TotalMispreds)
179     TotalMispreds = 1;
180 
181   OS << "BOLT-INFO: ICP decision for call site with " << Targets.size()
182      << " targets, Count = " << TotalCount << ", Mispreds = " << TotalMispreds
183      << "\n";
184 
185   size_t I = 0;
186   for (const Callsite &S : Targets) {
187     OS << "Count = " << S.Branches << ", "
188        << format("%.1f", (100.0 * S.Branches) / TotalCount) << ", "
189        << "Mispreds = " << S.Mispreds << ", "
190        << format("%.1f", (100.0 * S.Mispreds) / TotalMispreds);
191     if (I < N)
192       OS << " * to be optimized *";
193     if (!S.JTIndices.empty()) {
194       OS << " Indices:";
195       for (const uint64_t Idx : S.JTIndices)
196         OS << " " << Idx;
197     }
198     OS << "\n";
199     I += S.JTIndices.empty() ? 1 : S.JTIndices.size();
200   }
201 }
202 
203 // Get list of targets for a given call sorted by most frequently
204 // called first.
205 std::vector<IndirectCallPromotion::Callsite>
206 IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
207                                       const MCInst &Inst) const {
208   BinaryFunction &BF = *BB.getFunction();
209   const BinaryContext &BC = BF.getBinaryContext();
210   std::vector<Callsite> Targets;
211 
212   if (const JumpTable *JT = BF.getJumpTable(Inst)) {
213     // Don't support PIC jump tables for now
214     if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_PIC)
215       return Targets;
216     const Location From(BF.getSymbol());
217     const std::pair<size_t, size_t> Range =
218         JT->getEntriesForAddress(BC.MIB->getJumpTable(Inst));
219     assert(JT->Counts.empty() || JT->Counts.size() >= Range.second);
220     JumpTable::JumpInfo DefaultJI;
221     const JumpTable::JumpInfo *JI =
222         JT->Counts.empty() ? &DefaultJI : &JT->Counts[Range.first];
223     const size_t JIAdj = JT->Counts.empty() ? 0 : 1;
224     assert(JT->Type == JumpTable::JTT_PIC ||
225            JT->EntrySize == BC.AsmInfo->getCodePointerSize());
226     for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) {
227       MCSymbol *Entry = JT->Entries[I];
228       assert(BF.getBasicBlockForLabel(Entry) ||
229              Entry == BF.getFunctionEndLabel() ||
230              Entry == BF.getFunctionColdEndLabel());
231       if (Entry == BF.getFunctionEndLabel() ||
232           Entry == BF.getFunctionColdEndLabel())
233         continue;
234       const Location To(Entry);
235       const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(Entry);
236       Targets.emplace_back(From, To, BI.MispredictedCount, BI.Count,
237                            I - Range.first);
238     }
239 
240     // Sort by symbol then addr.
241     llvm::sort(Targets, [](const Callsite &A, const Callsite &B) {
242       if (A.To.Sym && B.To.Sym)
243         return A.To.Sym < B.To.Sym;
244       else if (A.To.Sym && !B.To.Sym)
245         return true;
246       else if (!A.To.Sym && B.To.Sym)
247         return false;
248       else
249         return A.To.Addr < B.To.Addr;
250     });
251 
252     // Targets may contain multiple entries to the same target, but using
253     // different indices. Their profile will report the same number of branches
254     // for different indices if the target is the same. That's because we don't
255     // profile the index value, but only the target via LBR.
256     auto First = Targets.begin();
257     auto Last = Targets.end();
258     auto Result = First;
259     while (++First != Last) {
260       Callsite &A = *Result;
261       const Callsite &B = *First;
262       if (A.To.Sym && B.To.Sym && A.To.Sym == B.To.Sym)
263         A.JTIndices.insert(A.JTIndices.end(), B.JTIndices.begin(),
264                            B.JTIndices.end());
265       else
266         *(++Result) = *First;
267     }
268     ++Result;
269 
270     LLVM_DEBUG(if (Targets.end() - Result > 0) {
271       dbgs() << "BOLT-INFO: ICP: " << (Targets.end() - Result)
272              << " duplicate targets removed\n";
273     });
274 
275     Targets.erase(Result, Targets.end());
276   } else {
277     // Don't try to optimize PC relative indirect calls.
278     if (Inst.getOperand(0).isReg() &&
279         Inst.getOperand(0).getReg() == BC.MRI->getProgramCounter())
280       return Targets;
281 
282     const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
283         Inst, "CallProfile");
284     if (ICSP) {
285       for (const IndirectCallProfile &CSP : ICSP.get()) {
286         Callsite Site(BF, CSP);
287         if (Site.isValid())
288           Targets.emplace_back(std::move(Site));
289       }
290     }
291   }
292 
293   // Sort by target count, number of indices in case of jump table, and
294   // mispredicts. We prioritize targets with high count, small number of indices
295   // and high mispredicts. Break ties by selecting targets with lower addresses.
296   llvm::stable_sort(Targets, [](const Callsite &A, const Callsite &B) {
297     if (A.Branches != B.Branches)
298       return A.Branches > B.Branches;
299     if (A.JTIndices.size() != B.JTIndices.size())
300       return A.JTIndices.size() < B.JTIndices.size();
301     if (A.Mispreds != B.Mispreds)
302       return A.Mispreds > B.Mispreds;
303     return A.To.Addr < B.To.Addr;
304   });
305 
306   // Remove non-symbol targets
307   llvm::erase_if(Targets, [](const Callsite &CS) { return !CS.To.Sym; });
308 
309   LLVM_DEBUG(if (BF.getJumpTable(Inst)) {
310     uint64_t TotalCount = 0;
311     uint64_t TotalMispreds = 0;
312     for (const Callsite &S : Targets) {
313       TotalCount += S.Branches;
314       TotalMispreds += S.Mispreds;
315     }
316     if (!TotalCount)
317       TotalCount = 1;
318     if (!TotalMispreds)
319       TotalMispreds = 1;
320 
321     dbgs() << "BOLT-INFO: ICP: jump table size = " << Targets.size()
322            << ", Count = " << TotalCount << ", Mispreds = " << TotalMispreds
323            << "\n";
324 
325     size_t I = 0;
326     for (const Callsite &S : Targets) {
327       dbgs() << "Count[" << I << "] = " << S.Branches << ", "
328              << format("%.1f", (100.0 * S.Branches) / TotalCount) << ", "
329              << "Mispreds[" << I << "] = " << S.Mispreds << ", "
330              << format("%.1f", (100.0 * S.Mispreds) / TotalMispreds) << "\n";
331       ++I;
332     }
333   });
334 
335   return Targets;
336 }
337 
338 IndirectCallPromotion::JumpTableInfoType
339 IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
340                                                    MCInst &CallInst,
341                                                    MCInst *&TargetFetchInst,
342                                                    const JumpTable *JT) const {
343   assert(JT && "Can't get jump table addrs for non-jump tables.");
344 
345   BinaryFunction &Function = *BB.getFunction();
346   BinaryContext &BC = Function.getBinaryContext();
347 
348   if (!Function.hasMemoryProfile() || !opts::EliminateLoads)
349     return JumpTableInfoType();
350 
351   JumpTableInfoType HotTargets;
352   MCInst *MemLocInstr;
353   MCInst *PCRelBaseOut;
354   unsigned BaseReg, IndexReg;
355   int64_t DispValue;
356   const MCExpr *DispExpr;
357   MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
358   const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
359       CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
360       MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
361 
362   assert(MemLocInstr && "There should always be a load for jump tables");
363   if (!MemLocInstr)
364     return JumpTableInfoType();
365 
366   LLVM_DEBUG({
367     dbgs() << "BOLT-INFO: ICP attempting to find memory profiling data for "
368            << "jump table in " << Function << " at @ "
369            << (&CallInst - &BB.front()) << "\n"
370            << "BOLT-INFO: ICP target fetch instructions:\n";
371     BC.printInstruction(dbgs(), *MemLocInstr, 0, &Function);
372     if (MemLocInstr != &CallInst)
373       BC.printInstruction(dbgs(), CallInst, 0, &Function);
374   });
375 
376   DEBUG_VERBOSE(1, {
377     dbgs() << "Jmp info: Type = " << (unsigned)Type << ", "
378            << "BaseReg = " << BC.MRI->getName(BaseReg) << ", "
379            << "IndexReg = " << BC.MRI->getName(IndexReg) << ", "
380            << "DispValue = " << Twine::utohexstr(DispValue) << ", "
381            << "DispExpr = " << DispExpr << ", "
382            << "MemLocInstr = ";
383     BC.printInstruction(dbgs(), *MemLocInstr, 0, &Function);
384     dbgs() << "\n";
385   });
386 
387   ++TotalIndexBasedCandidates;
388 
389   auto ErrorOrMemAccesssProfile =
390       BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(*MemLocInstr,
391                                                       "MemoryAccessProfile");
392   if (!ErrorOrMemAccesssProfile) {
393     DEBUG_VERBOSE(1, dbgs()
394                          << "BOLT-INFO: ICP no memory profiling data found\n");
395     return JumpTableInfoType();
396   }
397   MemoryAccessProfile &MemAccessProfile = ErrorOrMemAccesssProfile.get();
398 
399   uint64_t ArrayStart;
400   if (DispExpr) {
401     ErrorOr<uint64_t> DispValueOrError =
402         BC.getSymbolValue(*BC.MIB->getTargetSymbol(DispExpr));
403     assert(DispValueOrError && "global symbol needs a value");
404     ArrayStart = *DispValueOrError;
405   } else {
406     ArrayStart = static_cast<uint64_t>(DispValue);
407   }
408 
409   if (BaseReg == BC.MRI->getProgramCounter())
410     ArrayStart += Function.getAddress() + MemAccessProfile.NextInstrOffset;
411 
412   // This is a map of [symbol] -> [count, index] and is used to combine indices
413   // into the jump table since there may be multiple addresses that all have the
414   // same entry.
415   std::map<MCSymbol *, std::pair<uint64_t, uint64_t>> HotTargetMap;
416   const std::pair<size_t, size_t> Range = JT->getEntriesForAddress(ArrayStart);
417 
418   for (const AddressAccess &AccessInfo : MemAccessProfile.AddressAccessInfo) {
419     size_t Index;
420     // Mem data occasionally includes nullprs, ignore them.
421     if (!AccessInfo.MemoryObject && !AccessInfo.Offset)
422       continue;
423 
424     if (AccessInfo.Offset % JT->EntrySize != 0) // ignore bogus data
425       return JumpTableInfoType();
426 
427     if (AccessInfo.MemoryObject) {
428       // Deal with bad/stale data
429       if (!AccessInfo.MemoryObject->getName().startswith(
430               "JUMP_TABLE/" + Function.getOneName().str()))
431         return JumpTableInfoType();
432       Index =
433           (AccessInfo.Offset - (ArrayStart - JT->getAddress())) / JT->EntrySize;
434     } else {
435       Index = (AccessInfo.Offset - ArrayStart) / JT->EntrySize;
436     }
437 
438     // If Index is out of range it probably means the memory profiling data is
439     // wrong for this instruction, bail out.
440     if (Index >= Range.second) {
441       LLVM_DEBUG(dbgs() << "BOLT-INFO: Index out of range of " << Range.first
442                         << ", " << Range.second << "\n");
443       return JumpTableInfoType();
444     }
445 
446     // Make sure the hot index points at a legal label corresponding to a BB,
447     // e.g. not the end of function (unreachable) label.
448     if (!Function.getBasicBlockForLabel(JT->Entries[Index + Range.first])) {
449       LLVM_DEBUG({
450         dbgs() << "BOLT-INFO: hot index " << Index << " pointing at bogus "
451                << "label " << JT->Entries[Index + Range.first]->getName()
452                << " in jump table:\n";
453         JT->print(dbgs());
454         dbgs() << "HotTargetMap:\n";
455         for (std::pair<MCSymbol *const, std::pair<uint64_t, uint64_t>> &HT :
456              HotTargetMap)
457           dbgs() << "BOLT-INFO: " << HT.first->getName()
458                  << " = (count=" << HT.second.first
459                  << ", index=" << HT.second.second << ")\n";
460       });
461       return JumpTableInfoType();
462     }
463 
464     std::pair<uint64_t, uint64_t> &HotTarget =
465         HotTargetMap[JT->Entries[Index + Range.first]];
466     HotTarget.first += AccessInfo.Count;
467     HotTarget.second = Index;
468   }
469 
470   llvm::transform(
471       HotTargetMap, std::back_inserter(HotTargets),
472       [](const std::pair<MCSymbol *, std::pair<uint64_t, uint64_t>> &A) {
473         return A.second;
474       });
475 
476   // Sort with highest counts first.
477   llvm::sort(reverse(HotTargets));
478 
479   LLVM_DEBUG({
480     dbgs() << "BOLT-INFO: ICP jump table hot targets:\n";
481     for (const std::pair<uint64_t, uint64_t> &Target : HotTargets)
482       dbgs() << "BOLT-INFO:  Idx = " << Target.second << ", "
483              << "Count = " << Target.first << "\n";
484   });
485 
486   BC.MIB->getOrCreateAnnotationAs<uint16_t>(CallInst, "JTIndexReg") = IndexReg;
487 
488   TargetFetchInst = MemLocInstr;
489 
490   return HotTargets;
491 }
492 
493 IndirectCallPromotion::SymTargetsType
494 IndirectCallPromotion::findCallTargetSymbols(std::vector<Callsite> &Targets,
495                                              size_t &N, BinaryBasicBlock &BB,
496                                              MCInst &CallInst,
497                                              MCInst *&TargetFetchInst) const {
498   const JumpTable *JT = BB.getFunction()->getJumpTable(CallInst);
499   SymTargetsType SymTargets;
500 
501   if (!JT) {
502     for (size_t I = 0; I < N; ++I) {
503       assert(Targets[I].To.Sym && "All ICP targets must be to known symbols");
504       assert(Targets[I].JTIndices.empty() &&
505              "Can't have jump table indices for non-jump tables");
506       SymTargets.emplace_back(Targets[I].To.Sym, 0);
507     }
508     return SymTargets;
509   }
510 
511   // Use memory profile to select hot targets.
512   JumpTableInfoType HotTargets =
513       maybeGetHotJumpTableTargets(BB, CallInst, TargetFetchInst, JT);
514 
515   auto findTargetsIndex = [&](uint64_t JTIndex) {
516     for (size_t I = 0; I < Targets.size(); ++I)
517       if (llvm::is_contained(Targets[I].JTIndices, JTIndex))
518         return I;
519     LLVM_DEBUG(dbgs() << "BOLT-ERROR: Unable to find target index for hot jump "
520                       << " table entry in " << *BB.getFunction() << "\n");
521     llvm_unreachable("Hot indices must be referred to by at least one "
522                      "callsite");
523   };
524 
525   if (!HotTargets.empty()) {
526     if (opts::Verbosity >= 1)
527       for (size_t I = 0; I < HotTargets.size(); ++I)
528         outs() << "BOLT-INFO: HotTarget[" << I << "] = (" << HotTargets[I].first
529                << ", " << HotTargets[I].second << ")\n";
530 
531     // Recompute hottest targets, now discriminating which index is hot
532     // NOTE: This is a tradeoff. On one hand, we get index information. On the
533     // other hand, info coming from the memory profile is much less accurate
534     // than LBRs. So we may actually end up working with more coarse
535     // profile granularity in exchange for information about indices.
536     std::vector<Callsite> NewTargets;
537     std::map<const MCSymbol *, uint32_t> IndicesPerTarget;
538     uint64_t TotalMemAccesses = 0;
539     for (size_t I = 0; I < HotTargets.size(); ++I) {
540       const uint64_t TargetIndex = findTargetsIndex(HotTargets[I].second);
541       ++IndicesPerTarget[Targets[TargetIndex].To.Sym];
542       TotalMemAccesses += HotTargets[I].first;
543     }
544     uint64_t RemainingMemAccesses = TotalMemAccesses;
545     const size_t TopN =
546         opts::ICPJumpTablesTopN ? opts::ICPJumpTablesTopN : opts::ICPTopN;
547     size_t I = 0;
548     for (; I < HotTargets.size(); ++I) {
549       const uint64_t MemAccesses = HotTargets[I].first;
550       if (100 * MemAccesses <
551           TotalMemAccesses * opts::ICPJTTotalPercentThreshold)
552         break;
553       if (100 * MemAccesses <
554           RemainingMemAccesses * opts::ICPJTRemainingPercentThreshold)
555         break;
556       if (TopN && I >= TopN)
557         break;
558       RemainingMemAccesses -= MemAccesses;
559 
560       const uint64_t JTIndex = HotTargets[I].second;
561       Callsite &Target = Targets[findTargetsIndex(JTIndex)];
562 
563       NewTargets.push_back(Target);
564       std::vector<uint64_t>({JTIndex}).swap(NewTargets.back().JTIndices);
565       llvm::erase_value(Target.JTIndices, JTIndex);
566 
567       // Keep fixCFG counts sane if more indices use this same target later
568       assert(IndicesPerTarget[Target.To.Sym] > 0 && "wrong map");
569       NewTargets.back().Branches =
570           Target.Branches / IndicesPerTarget[Target.To.Sym];
571       NewTargets.back().Mispreds =
572           Target.Mispreds / IndicesPerTarget[Target.To.Sym];
573       assert(Target.Branches >= NewTargets.back().Branches);
574       assert(Target.Mispreds >= NewTargets.back().Mispreds);
575       Target.Branches -= NewTargets.back().Branches;
576       Target.Mispreds -= NewTargets.back().Mispreds;
577     }
578     llvm::copy(Targets, std::back_inserter(NewTargets));
579     std::swap(NewTargets, Targets);
580     N = I;
581 
582     if (N == 0 && opts::Verbosity >= 1) {
583       outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " in "
584              << BB.getName() << ": failed to meet thresholds after memory "
585              << "profile data was loaded.\n";
586       return SymTargets;
587     }
588   }
589 
590   for (size_t I = 0, TgtIdx = 0; I < N; ++TgtIdx) {
591     Callsite &Target = Targets[TgtIdx];
592     assert(Target.To.Sym && "All ICP targets must be to known symbols");
593     assert(!Target.JTIndices.empty() && "Jump tables must have indices");
594     for (uint64_t Idx : Target.JTIndices) {
595       SymTargets.emplace_back(Target.To.Sym, Idx);
596       ++I;
597     }
598   }
599 
600   return SymTargets;
601 }
602 
603 IndirectCallPromotion::MethodInfoType IndirectCallPromotion::maybeGetVtableSyms(
604     BinaryBasicBlock &BB, MCInst &Inst,
605     const SymTargetsType &SymTargets) const {
606   BinaryFunction &Function = *BB.getFunction();
607   BinaryContext &BC = Function.getBinaryContext();
608   std::vector<std::pair<MCSymbol *, uint64_t>> VtableSyms;
609   std::vector<MCInst *> MethodFetchInsns;
610   unsigned VtableReg, MethodReg;
611   uint64_t MethodOffset;
612 
613   assert(!Function.getJumpTable(Inst) &&
614          "Can't get vtable addrs for jump tables.");
615 
616   if (!Function.hasMemoryProfile() || !opts::EliminateLoads)
617     return MethodInfoType();
618 
619   MutableArrayRef<MCInst> Insts(&BB.front(), &Inst + 1);
620   if (!BC.MIB->analyzeVirtualMethodCall(Insts.begin(), Insts.end(),
621                                         MethodFetchInsns, VtableReg, MethodReg,
622                                         MethodOffset)) {
623     DEBUG_VERBOSE(
624         1, dbgs() << "BOLT-INFO: ICP unable to analyze method call in "
625                   << Function << " at @ " << (&Inst - &BB.front()) << "\n");
626     return MethodInfoType();
627   }
628 
629   ++TotalMethodLoadEliminationCandidates;
630 
631   DEBUG_VERBOSE(1, {
632     dbgs() << "BOLT-INFO: ICP found virtual method call in " << Function
633            << " at @ " << (&Inst - &BB.front()) << "\n";
634     dbgs() << "BOLT-INFO: ICP method fetch instructions:\n";
635     for (MCInst *Inst : MethodFetchInsns)
636       BC.printInstruction(dbgs(), *Inst, 0, &Function);
637 
638     if (MethodFetchInsns.back() != &Inst)
639       BC.printInstruction(dbgs(), Inst, 0, &Function);
640   });
641 
642   // Try to get value profiling data for the method load instruction.
643   auto ErrorOrMemAccesssProfile =
644       BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(*MethodFetchInsns.back(),
645                                                       "MemoryAccessProfile");
646   if (!ErrorOrMemAccesssProfile) {
647     DEBUG_VERBOSE(1, dbgs()
648                          << "BOLT-INFO: ICP no memory profiling data found\n");
649     return MethodInfoType();
650   }
651   MemoryAccessProfile &MemAccessProfile = ErrorOrMemAccesssProfile.get();
652 
653   // Find the vtable that each method belongs to.
654   std::map<const MCSymbol *, uint64_t> MethodToVtable;
655 
656   for (const AddressAccess &AccessInfo : MemAccessProfile.AddressAccessInfo) {
657     uint64_t Address = AccessInfo.Offset;
658     if (AccessInfo.MemoryObject)
659       Address += AccessInfo.MemoryObject->getAddress();
660 
661     // Ignore bogus data.
662     if (!Address)
663       continue;
664 
665     const uint64_t VtableBase = Address - MethodOffset;
666 
667     DEBUG_VERBOSE(1, dbgs() << "BOLT-INFO: ICP vtable = "
668                             << Twine::utohexstr(VtableBase) << "+"
669                             << MethodOffset << "/" << AccessInfo.Count << "\n");
670 
671     if (ErrorOr<uint64_t> MethodAddr = BC.getPointerAtAddress(Address)) {
672       BinaryData *MethodBD = BC.getBinaryDataAtAddress(MethodAddr.get());
673       if (!MethodBD) // skip unknown methods
674         continue;
675       MCSymbol *MethodSym = MethodBD->getSymbol();
676       MethodToVtable[MethodSym] = VtableBase;
677       DEBUG_VERBOSE(1, {
678         const BinaryFunction *Method = BC.getFunctionForSymbol(MethodSym);
679         dbgs() << "BOLT-INFO: ICP found method = "
680                << Twine::utohexstr(MethodAddr.get()) << "/"
681                << (Method ? Method->getPrintName() : "") << "\n";
682       });
683     }
684   }
685 
686   // Find the vtable for each target symbol.
687   for (size_t I = 0; I < SymTargets.size(); ++I) {
688     auto Itr = MethodToVtable.find(SymTargets[I].first);
689     if (Itr != MethodToVtable.end()) {
690       if (BinaryData *BD = BC.getBinaryDataContainingAddress(Itr->second)) {
691         const uint64_t Addend = Itr->second - BD->getAddress();
692         VtableSyms.emplace_back(BD->getSymbol(), Addend);
693         continue;
694       }
695     }
696     // Give up if we can't find the vtable for a method.
697     DEBUG_VERBOSE(1, dbgs() << "BOLT-INFO: ICP can't find vtable for "
698                             << SymTargets[I].first->getName() << "\n");
699     return MethodInfoType();
700   }
701 
702   // Make sure the vtable reg is not clobbered by the argument passing code
703   if (VtableReg != MethodReg) {
704     for (MCInst *CurInst = MethodFetchInsns.front(); CurInst < &Inst;
705          ++CurInst) {
706       const MCInstrDesc &InstrInfo = BC.MII->get(CurInst->getOpcode());
707       if (InstrInfo.hasDefOfPhysReg(*CurInst, VtableReg, *BC.MRI))
708         return MethodInfoType();
709     }
710   }
711 
712   return MethodInfoType(VtableSyms, MethodFetchInsns);
713 }
714 
715 std::vector<std::unique_ptr<BinaryBasicBlock>>
716 IndirectCallPromotion::rewriteCall(
717     BinaryBasicBlock &IndCallBlock, const MCInst &CallInst,
718     MCPlusBuilder::BlocksVectorTy &&ICPcode,
719     const std::vector<MCInst *> &MethodFetchInsns) const {
720   BinaryFunction &Function = *IndCallBlock.getFunction();
721   MCPlusBuilder *MIB = Function.getBinaryContext().MIB.get();
722 
723   // Create new basic blocks with correct code in each one first.
724   std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
725   const bool IsTailCallOrJT =
726       (MIB->isTailCall(CallInst) || Function.getJumpTable(CallInst));
727 
728   // Move instructions from the tail of the original call block
729   // to the merge block.
730 
731   // Remember any pseudo instructions following a tail call.  These
732   // must be preserved and moved to the original block.
733   InstructionListType TailInsts;
734   const MCInst *TailInst = &CallInst;
735   if (IsTailCallOrJT)
736     while (TailInst + 1 < &(*IndCallBlock.end()) &&
737            MIB->isPseudo(*(TailInst + 1)))
738       TailInsts.push_back(*++TailInst);
739 
740   InstructionListType MovedInst = IndCallBlock.splitInstructions(&CallInst);
741   // Link new BBs to the original input offset of the BB where the indirect
742   // call site is, so we can map samples recorded in new BBs back to the
743   // original BB seen in the input binary (if using BAT)
744   const uint32_t OrigOffset = IndCallBlock.getInputOffset();
745 
746   IndCallBlock.eraseInstructions(MethodFetchInsns.begin(),
747                                  MethodFetchInsns.end());
748   if (IndCallBlock.empty() ||
749       (!MethodFetchInsns.empty() && MethodFetchInsns.back() == &CallInst))
750     IndCallBlock.addInstructions(ICPcode.front().second.begin(),
751                                  ICPcode.front().second.end());
752   else
753     IndCallBlock.replaceInstruction(std::prev(IndCallBlock.end()),
754                                     ICPcode.front().second);
755   IndCallBlock.addInstructions(TailInsts.begin(), TailInsts.end());
756 
757   for (auto Itr = ICPcode.begin() + 1; Itr != ICPcode.end(); ++Itr) {
758     MCSymbol *&Sym = Itr->first;
759     InstructionListType &Insts = Itr->second;
760     assert(Sym);
761     std::unique_ptr<BinaryBasicBlock> TBB = Function.createBasicBlock(Sym);
762     TBB->setOffset(OrigOffset);
763     for (MCInst &Inst : Insts) // sanitize new instructions.
764       if (MIB->isCall(Inst))
765         MIB->removeAnnotation(Inst, "CallProfile");
766     TBB->addInstructions(Insts.begin(), Insts.end());
767     NewBBs.emplace_back(std::move(TBB));
768   }
769 
770   // Move tail of instructions from after the original call to
771   // the merge block.
772   if (!IsTailCallOrJT)
773     NewBBs.back()->addInstructions(MovedInst.begin(), MovedInst.end());
774 
775   return NewBBs;
776 }
777 
778 BinaryBasicBlock *
779 IndirectCallPromotion::fixCFG(BinaryBasicBlock &IndCallBlock,
780                               const bool IsTailCall, const bool IsJumpTable,
781                               IndirectCallPromotion::BasicBlocksVector &&NewBBs,
782                               const std::vector<Callsite> &Targets) const {
783   BinaryFunction &Function = *IndCallBlock.getFunction();
784   using BinaryBranchInfo = BinaryBasicBlock::BinaryBranchInfo;
785   BinaryBasicBlock *MergeBlock = nullptr;
786 
787   // Scale indirect call counts to the execution count of the original
788   // basic block containing the indirect call.
789   uint64_t TotalCount = IndCallBlock.getKnownExecutionCount();
790   uint64_t TotalIndirectBranches = 0;
791   for (const Callsite &Target : Targets)
792     TotalIndirectBranches += Target.Branches;
793   if (TotalIndirectBranches == 0)
794     TotalIndirectBranches = 1;
795   BinaryBasicBlock::BranchInfoType BBI;
796   BinaryBasicBlock::BranchInfoType ScaledBBI;
797   for (const Callsite &Target : Targets) {
798     const size_t NumEntries =
799         std::max(static_cast<std::size_t>(1UL), Target.JTIndices.size());
800     for (size_t I = 0; I < NumEntries; ++I) {
801       BBI.push_back(
802           BinaryBranchInfo{(Target.Branches + NumEntries - 1) / NumEntries,
803                            (Target.Mispreds + NumEntries - 1) / NumEntries});
804       ScaledBBI.push_back(
805           BinaryBranchInfo{uint64_t(TotalCount * Target.Branches /
806                                     (NumEntries * TotalIndirectBranches)),
807                            uint64_t(TotalCount * Target.Mispreds /
808                                     (NumEntries * TotalIndirectBranches))});
809     }
810   }
811 
812   if (IsJumpTable) {
813     BinaryBasicBlock *NewIndCallBlock = NewBBs.back().get();
814     IndCallBlock.moveAllSuccessorsTo(NewIndCallBlock);
815 
816     std::vector<MCSymbol *> SymTargets;
817     for (const Callsite &Target : Targets) {
818       const size_t NumEntries =
819           std::max(static_cast<std::size_t>(1UL), Target.JTIndices.size());
820       for (size_t I = 0; I < NumEntries; ++I)
821         SymTargets.push_back(Target.To.Sym);
822     }
823     assert(SymTargets.size() > NewBBs.size() - 1 &&
824            "There must be a target symbol associated with each new BB.");
825 
826     for (uint64_t I = 0; I < NewBBs.size(); ++I) {
827       BinaryBasicBlock *SourceBB = I ? NewBBs[I - 1].get() : &IndCallBlock;
828       SourceBB->setExecutionCount(TotalCount);
829 
830       BinaryBasicBlock *TargetBB =
831           Function.getBasicBlockForLabel(SymTargets[I]);
832       SourceBB->addSuccessor(TargetBB, ScaledBBI[I]); // taken
833 
834       TotalCount -= ScaledBBI[I].Count;
835       SourceBB->addSuccessor(NewBBs[I].get(), TotalCount); // fall-through
836 
837       // Update branch info for the indirect jump.
838       BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
839           NewIndCallBlock->getBranchInfo(*TargetBB);
840       if (BranchInfo.Count > BBI[I].Count)
841         BranchInfo.Count -= BBI[I].Count;
842       else
843         BranchInfo.Count = 0;
844 
845       if (BranchInfo.MispredictedCount > BBI[I].MispredictedCount)
846         BranchInfo.MispredictedCount -= BBI[I].MispredictedCount;
847       else
848         BranchInfo.MispredictedCount = 0;
849     }
850   } else {
851     assert(NewBBs.size() >= 2);
852     assert(NewBBs.size() % 2 == 1 || IndCallBlock.succ_empty());
853     assert(NewBBs.size() % 2 == 1 || IsTailCall);
854 
855     auto ScaledBI = ScaledBBI.begin();
856     auto updateCurrentBranchInfo = [&] {
857       assert(ScaledBI != ScaledBBI.end());
858       TotalCount -= ScaledBI->Count;
859       ++ScaledBI;
860     };
861 
862     if (!IsTailCall) {
863       MergeBlock = NewBBs.back().get();
864       IndCallBlock.moveAllSuccessorsTo(MergeBlock);
865     }
866 
867     // Fix up successors and execution counts.
868     updateCurrentBranchInfo();
869     IndCallBlock.addSuccessor(NewBBs[1].get(), TotalCount);
870     IndCallBlock.addSuccessor(NewBBs[0].get(), ScaledBBI[0]);
871 
872     const size_t Adj = IsTailCall ? 1 : 2;
873     for (size_t I = 0; I < NewBBs.size() - Adj; ++I) {
874       assert(TotalCount <= IndCallBlock.getExecutionCount() ||
875              TotalCount <= uint64_t(TotalIndirectBranches));
876       uint64_t ExecCount = ScaledBBI[(I + 1) / 2].Count;
877       if (I % 2 == 0) {
878         if (MergeBlock)
879           NewBBs[I]->addSuccessor(MergeBlock, ScaledBBI[(I + 1) / 2].Count);
880       } else {
881         assert(I + 2 < NewBBs.size());
882         updateCurrentBranchInfo();
883         NewBBs[I]->addSuccessor(NewBBs[I + 2].get(), TotalCount);
884         NewBBs[I]->addSuccessor(NewBBs[I + 1].get(), ScaledBBI[(I + 1) / 2]);
885         ExecCount += TotalCount;
886       }
887       NewBBs[I]->setExecutionCount(ExecCount);
888     }
889 
890     if (MergeBlock) {
891       // Arrange for the MergeBlock to be the fallthrough for the first
892       // promoted call block.
893       std::unique_ptr<BinaryBasicBlock> MBPtr;
894       std::swap(MBPtr, NewBBs.back());
895       NewBBs.pop_back();
896       NewBBs.emplace(NewBBs.begin() + 1, std::move(MBPtr));
897       // TODO: is COUNT_FALLTHROUGH_EDGE the right thing here?
898       NewBBs.back()->addSuccessor(MergeBlock, TotalCount); // uncond branch
899     }
900   }
901 
902   // Update the execution count.
903   NewBBs.back()->setExecutionCount(TotalCount);
904 
905   // Update BB and BB layout.
906   Function.insertBasicBlocks(&IndCallBlock, std::move(NewBBs));
907   assert(Function.validateCFG());
908 
909   return MergeBlock;
910 }
911 
912 size_t IndirectCallPromotion::canPromoteCallsite(
913     const BinaryBasicBlock &BB, const MCInst &Inst,
914     const std::vector<Callsite> &Targets, uint64_t NumCalls) {
915   BinaryFunction *BF = BB.getFunction();
916   const BinaryContext &BC = BF->getBinaryContext();
917 
918   if (BB.getKnownExecutionCount() < opts::ExecutionCountThreshold)
919     return 0;
920 
921   const bool IsJumpTable = BF->getJumpTable(Inst);
922 
923   auto computeStats = [&](size_t N) {
924     for (size_t I = 0; I < N; ++I)
925       if (IsJumpTable)
926         TotalNumFrequentJmps += Targets[I].Branches;
927       else
928         TotalNumFrequentCalls += Targets[I].Branches;
929   };
930 
931   // If we have no targets (or no calls), skip this callsite.
932   if (Targets.empty() || !NumCalls) {
933     if (opts::Verbosity >= 1) {
934       const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
935       outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx << " in "
936              << BB.getName() << ", calls = " << NumCalls
937              << ", targets empty or NumCalls == 0.\n";
938     }
939     return 0;
940   }
941 
942   size_t TopN = opts::ICPTopN;
943   if (IsJumpTable)
944     TopN = opts::ICPJumpTablesTopN ? opts::ICPJumpTablesTopN : TopN;
945   else
946     TopN = opts::ICPCallsTopN ? opts::ICPCallsTopN : TopN;
947 
948   const size_t TrialN = TopN ? std::min(TopN, Targets.size()) : Targets.size();
949 
950   if (opts::ICPTopCallsites > 0) {
951     if (!BC.MIB->hasAnnotation(Inst, "DoICP"))
952       return 0;
953   }
954 
955   // Pick the top N targets.
956   uint64_t TotalMispredictsTopN = 0;
957   size_t N = 0;
958 
959   if (opts::ICPUseMispredicts &&
960       (!IsJumpTable || opts::ICPJumpTablesByTarget)) {
961     // Count total number of mispredictions for (at most) the top N targets.
962     // We may choose a smaller N (TrialN vs. N) if the frequency threshold
963     // is exceeded by fewer targets.
964     double Threshold = double(opts::ICPMispredictThreshold);
965     for (size_t I = 0; I < TrialN && Threshold > 0; ++I, ++N) {
966       Threshold -= (100.0 * Targets[I].Mispreds) / NumCalls;
967       TotalMispredictsTopN += Targets[I].Mispreds;
968     }
969     computeStats(N);
970 
971     // Compute the misprediction frequency of the top N call targets.  If this
972     // frequency is greater than the threshold, we should try ICP on this
973     // callsite.
974     const double TopNFrequency = (100.0 * TotalMispredictsTopN) / NumCalls;
975     if (TopNFrequency == 0 || TopNFrequency < opts::ICPMispredictThreshold) {
976       if (opts::Verbosity >= 1) {
977         const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
978         outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx
979                << " in " << BB.getName() << ", calls = " << NumCalls
980                << ", top N mis. frequency " << format("%.1f", TopNFrequency)
981                << "% < " << opts::ICPMispredictThreshold << "%\n";
982       }
983       return 0;
984     }
985   } else {
986     size_t MaxTargets = 0;
987 
988     // Count total number of calls for (at most) the top N targets.
989     // We may choose a smaller N (TrialN vs. N) if the frequency threshold
990     // is exceeded by fewer targets.
991     const unsigned TotalThreshold = IsJumpTable
992                                         ? opts::ICPJTTotalPercentThreshold
993                                         : opts::ICPCallsTotalPercentThreshold;
994     const unsigned RemainingThreshold =
995         IsJumpTable ? opts::ICPJTRemainingPercentThreshold
996                     : opts::ICPCallsRemainingPercentThreshold;
997     uint64_t NumRemainingCalls = NumCalls;
998     for (size_t I = 0; I < TrialN; ++I, ++MaxTargets) {
999       if (100 * Targets[I].Branches < NumCalls * TotalThreshold)
1000         break;
1001       if (100 * Targets[I].Branches < NumRemainingCalls * RemainingThreshold)
1002         break;
1003       if (N + (Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size()) >
1004           TrialN)
1005         break;
1006       TotalMispredictsTopN += Targets[I].Mispreds;
1007       NumRemainingCalls -= Targets[I].Branches;
1008       N += Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size();
1009     }
1010     computeStats(MaxTargets);
1011 
1012     // Don't check misprediction frequency for jump tables -- we don't really
1013     // care as long as we are saving loads from the jump table.
1014     if (!IsJumpTable || opts::ICPJumpTablesByTarget) {
1015       // Compute the misprediction frequency of the top N call targets.  If
1016       // this frequency is less than the threshold, we should skip ICP at
1017       // this callsite.
1018       const double TopNMispredictFrequency =
1019           (100.0 * TotalMispredictsTopN) / NumCalls;
1020 
1021       if (TopNMispredictFrequency < opts::ICPMispredictThreshold) {
1022         if (opts::Verbosity >= 1) {
1023           const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
1024           outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx
1025                  << " in " << BB.getName() << ", calls = " << NumCalls
1026                  << ", top N mispredict frequency "
1027                  << format("%.1f", TopNMispredictFrequency) << "% < "
1028                  << opts::ICPMispredictThreshold << "%\n";
1029         }
1030         return 0;
1031       }
1032     }
1033   }
1034 
1035   // Filter by inline-ability of target functions, stop at first target that
1036   // can't be inlined.
1037   if (opts::ICPPeelForInline) {
1038     for (size_t I = 0; I < N; ++I) {
1039       const MCSymbol *TargetSym = Targets[I].To.Sym;
1040       const BinaryFunction *TargetBF = BC.getFunctionForSymbol(TargetSym);
1041       if (!BinaryFunctionPass::shouldOptimize(*TargetBF) ||
1042           getInliningInfo(*TargetBF).Type == InliningType::INL_NONE) {
1043         N = I;
1044         break;
1045       }
1046     }
1047   }
1048 
1049   // Filter functions that can have ICP applied (for debugging)
1050   if (!opts::ICPFuncsList.empty()) {
1051     for (std::string &Name : opts::ICPFuncsList)
1052       if (BF->hasName(Name))
1053         return N;
1054     return 0;
1055   }
1056 
1057   return N;
1058 }
1059 
1060 void IndirectCallPromotion::printCallsiteInfo(
1061     const BinaryBasicBlock &BB, const MCInst &Inst,
1062     const std::vector<Callsite> &Targets, const size_t N,
1063     uint64_t NumCalls) const {
1064   BinaryContext &BC = BB.getFunction()->getBinaryContext();
1065   const bool IsTailCall = BC.MIB->isTailCall(Inst);
1066   const bool IsJumpTable = BB.getFunction()->getJumpTable(Inst);
1067   const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
1068 
1069   outs() << "BOLT-INFO: ICP candidate branch info: " << *BB.getFunction()
1070          << " @ " << InstIdx << " in " << BB.getName()
1071          << " -> calls = " << NumCalls
1072          << (IsTailCall ? " (tail)" : (IsJumpTable ? " (jump table)" : ""))
1073          << "\n";
1074   for (size_t I = 0; I < N; I++) {
1075     const double Frequency = 100.0 * Targets[I].Branches / NumCalls;
1076     const double MisFrequency = 100.0 * Targets[I].Mispreds / NumCalls;
1077     outs() << "BOLT-INFO:   ";
1078     if (Targets[I].To.Sym)
1079       outs() << Targets[I].To.Sym->getName();
1080     else
1081       outs() << Targets[I].To.Addr;
1082     outs() << ", calls = " << Targets[I].Branches
1083            << ", mispreds = " << Targets[I].Mispreds
1084            << ", taken freq = " << format("%.1f", Frequency) << "%"
1085            << ", mis. freq = " << format("%.1f", MisFrequency) << "%";
1086     bool First = true;
1087     for (uint64_t JTIndex : Targets[I].JTIndices) {
1088       outs() << (First ? ", indices = " : ", ") << JTIndex;
1089       First = false;
1090     }
1091     outs() << "\n";
1092   }
1093 
1094   LLVM_DEBUG({
1095     dbgs() << "BOLT-INFO: ICP original call instruction:";
1096     BC.printInstruction(dbgs(), Inst, Targets[0].From.Addr, nullptr, true);
1097   });
1098 }
1099 
1100 void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
1101   if (opts::ICP == ICP_NONE)
1102     return;
1103 
1104   auto &BFs = BC.getBinaryFunctions();
1105 
1106   const bool OptimizeCalls = (opts::ICP == ICP_CALLS || opts::ICP == ICP_ALL);
1107   const bool OptimizeJumpTables =
1108       (opts::ICP == ICP_JUMP_TABLES || opts::ICP == ICP_ALL);
1109 
1110   std::unique_ptr<RegAnalysis> RA;
1111   std::unique_ptr<BinaryFunctionCallGraph> CG;
1112   if (OptimizeJumpTables) {
1113     CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC)));
1114     RA.reset(new RegAnalysis(BC, &BFs, &*CG));
1115   }
1116 
1117   // If icp-top-callsites is enabled, compute the total number of indirect
1118   // calls and then optimize the hottest callsites that contribute to that
1119   // total.
1120   SetVector<BinaryFunction *> Functions;
1121   if (opts::ICPTopCallsites == 0) {
1122     for (auto &KV : BFs)
1123       Functions.insert(&KV.second);
1124   } else {
1125     using IndirectCallsite = std::tuple<uint64_t, MCInst *, BinaryFunction *>;
1126     std::vector<IndirectCallsite> IndirectCalls;
1127     size_t TotalIndirectCalls = 0;
1128 
1129     // Find all the indirect callsites.
1130     for (auto &BFIt : BFs) {
1131       BinaryFunction &Function = BFIt.second;
1132 
1133       if (!Function.isSimple() || Function.isIgnored() ||
1134           !Function.hasProfile())
1135         continue;
1136 
1137       const bool HasLayout = !Function.layout_empty();
1138 
1139       for (BinaryBasicBlock &BB : Function) {
1140         if (HasLayout && Function.isSplit() && BB.isCold())
1141           continue;
1142 
1143         for (MCInst &Inst : BB) {
1144           const bool IsJumpTable = Function.getJumpTable(Inst);
1145           const bool HasIndirectCallProfile =
1146               BC.MIB->hasAnnotation(Inst, "CallProfile");
1147           const bool IsDirectCall =
1148               (BC.MIB->isCall(Inst) && BC.MIB->getTargetSymbol(Inst, 0));
1149 
1150           if (!IsDirectCall &&
1151               ((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) ||
1152                (IsJumpTable && OptimizeJumpTables))) {
1153             uint64_t NumCalls = 0;
1154             for (const Callsite &BInfo : getCallTargets(BB, Inst))
1155               NumCalls += BInfo.Branches;
1156             IndirectCalls.push_back(
1157                 std::make_tuple(NumCalls, &Inst, &Function));
1158             TotalIndirectCalls += NumCalls;
1159           }
1160         }
1161       }
1162     }
1163 
1164     // Sort callsites by execution count.
1165     llvm::sort(reverse(IndirectCalls));
1166 
1167     // Find callsites that contribute to the top "opts::ICPTopCallsites"%
1168     // number of calls.
1169     const float TopPerc = opts::ICPTopCallsites / 100.0f;
1170     int64_t MaxCalls = TotalIndirectCalls * TopPerc;
1171     uint64_t LastFreq = std::numeric_limits<uint64_t>::max();
1172     size_t Num = 0;
1173     for (const IndirectCallsite &IC : IndirectCalls) {
1174       const uint64_t CurFreq = std::get<0>(IC);
1175       // Once we decide to stop, include at least all branches that share the
1176       // same frequency of the last one to avoid non-deterministic behavior
1177       // (e.g. turning on/off ICP depending on the order of functions)
1178       if (MaxCalls <= 0 && CurFreq != LastFreq)
1179         break;
1180       MaxCalls -= CurFreq;
1181       LastFreq = CurFreq;
1182       BC.MIB->addAnnotation(*std::get<1>(IC), "DoICP", true);
1183       Functions.insert(std::get<2>(IC));
1184       ++Num;
1185     }
1186     outs() << "BOLT-INFO: ICP Total indirect calls = " << TotalIndirectCalls
1187            << ", " << Num << " callsites cover " << opts::ICPTopCallsites
1188            << "% of all indirect calls\n";
1189   }
1190 
1191   for (BinaryFunction *FuncPtr : Functions) {
1192     BinaryFunction &Function = *FuncPtr;
1193 
1194     if (!Function.isSimple() || Function.isIgnored() || !Function.hasProfile())
1195       continue;
1196 
1197     const bool HasLayout = !Function.layout_empty();
1198 
1199     // Total number of indirect calls issued from the current Function.
1200     // (a fraction of TotalIndirectCalls)
1201     uint64_t FuncTotalIndirectCalls = 0;
1202     uint64_t FuncTotalIndirectJmps = 0;
1203 
1204     std::vector<BinaryBasicBlock *> BBs;
1205     for (BinaryBasicBlock &BB : Function) {
1206       // Skip indirect calls in cold blocks.
1207       if (!HasLayout || !Function.isSplit() || !BB.isCold())
1208         BBs.push_back(&BB);
1209     }
1210     if (BBs.empty())
1211       continue;
1212 
1213     DataflowInfoManager Info(Function, RA.get(), nullptr);
1214     while (!BBs.empty()) {
1215       BinaryBasicBlock *BB = BBs.back();
1216       BBs.pop_back();
1217 
1218       for (unsigned Idx = 0; Idx < BB->size(); ++Idx) {
1219         MCInst &Inst = BB->getInstructionAtIndex(Idx);
1220         const ptrdiff_t InstIdx = &Inst - &(*BB->begin());
1221         const bool IsTailCall = BC.MIB->isTailCall(Inst);
1222         const bool HasIndirectCallProfile =
1223             BC.MIB->hasAnnotation(Inst, "CallProfile");
1224         const bool IsJumpTable = Function.getJumpTable(Inst);
1225 
1226         if (BC.MIB->isCall(Inst))
1227           TotalCalls += BB->getKnownExecutionCount();
1228 
1229         if (IsJumpTable && !OptimizeJumpTables)
1230           continue;
1231 
1232         if (!IsJumpTable && (!HasIndirectCallProfile || !OptimizeCalls))
1233           continue;
1234 
1235         // Ignore direct calls.
1236         if (BC.MIB->isCall(Inst) && BC.MIB->getTargetSymbol(Inst, 0))
1237           continue;
1238 
1239         assert((BC.MIB->isCall(Inst) || BC.MIB->isIndirectBranch(Inst)) &&
1240                "expected a call or an indirect jump instruction");
1241 
1242         if (IsJumpTable)
1243           ++TotalJumpTableCallsites;
1244         else
1245           ++TotalIndirectCallsites;
1246 
1247         std::vector<Callsite> Targets = getCallTargets(*BB, Inst);
1248 
1249         // Compute the total number of calls from this particular callsite.
1250         uint64_t NumCalls = 0;
1251         for (const Callsite &BInfo : Targets)
1252           NumCalls += BInfo.Branches;
1253         if (!IsJumpTable)
1254           FuncTotalIndirectCalls += NumCalls;
1255         else
1256           FuncTotalIndirectJmps += NumCalls;
1257 
1258         // If FLAGS regs is alive after this jmp site, do not try
1259         // promoting because we will clobber FLAGS.
1260         if (IsJumpTable) {
1261           ErrorOr<const BitVector &> State =
1262               Info.getLivenessAnalysis().getStateBefore(Inst);
1263           if (!State || (State && (*State)[BC.MIB->getFlagsReg()])) {
1264             if (opts::Verbosity >= 1)
1265               outs() << "BOLT-INFO: ICP failed in " << Function << " @ "
1266                      << InstIdx << " in " << BB->getName()
1267                      << ", calls = " << NumCalls
1268                      << (State ? ", cannot clobber flags reg.\n"
1269                                : ", no liveness data available.\n");
1270             continue;
1271           }
1272         }
1273 
1274         // Should this callsite be optimized?  Return the number of targets
1275         // to use when promoting this call.  A value of zero means to skip
1276         // this callsite.
1277         size_t N = canPromoteCallsite(*BB, Inst, Targets, NumCalls);
1278 
1279         // If it is a jump table and it failed to meet our initial threshold,
1280         // proceed to findCallTargetSymbols -- it may reevaluate N if
1281         // memory profile is present
1282         if (!N && !IsJumpTable)
1283           continue;
1284 
1285         if (opts::Verbosity >= 1)
1286           printCallsiteInfo(*BB, Inst, Targets, N, NumCalls);
1287 
1288         // Find MCSymbols or absolute addresses for each call target.
1289         MCInst *TargetFetchInst = nullptr;
1290         const SymTargetsType SymTargets =
1291             findCallTargetSymbols(Targets, N, *BB, Inst, TargetFetchInst);
1292 
1293         // findCallTargetSymbols may have changed N if mem profile is available
1294         // for jump tables
1295         if (!N)
1296           continue;
1297 
1298         LLVM_DEBUG(printDecision(dbgs(), Targets, N));
1299 
1300         // If we can't resolve any of the target symbols, punt on this callsite.
1301         // TODO: can this ever happen?
1302         if (SymTargets.size() < N) {
1303           const size_t LastTarget = SymTargets.size();
1304           if (opts::Verbosity >= 1)
1305             outs() << "BOLT-INFO: ICP failed in " << Function << " @ "
1306                    << InstIdx << " in " << BB->getName()
1307                    << ", calls = " << NumCalls
1308                    << ", ICP failed to find target symbol for "
1309                    << Targets[LastTarget].To.Sym->getName() << "\n";
1310           continue;
1311         }
1312 
1313         MethodInfoType MethodInfo;
1314 
1315         if (!IsJumpTable) {
1316           MethodInfo = maybeGetVtableSyms(*BB, Inst, SymTargets);
1317           TotalMethodLoadsEliminated += MethodInfo.first.empty() ? 0 : 1;
1318           LLVM_DEBUG(dbgs()
1319                      << "BOLT-INFO: ICP "
1320                      << (!MethodInfo.first.empty() ? "found" : "did not find")
1321                      << " vtables for all methods.\n");
1322         } else if (TargetFetchInst) {
1323           ++TotalIndexBasedJumps;
1324           MethodInfo.second.push_back(TargetFetchInst);
1325         }
1326 
1327         // Generate new promoted call code for this callsite.
1328         MCPlusBuilder::BlocksVectorTy ICPcode =
1329             (IsJumpTable && !opts::ICPJumpTablesByTarget)
1330                 ? BC.MIB->jumpTablePromotion(Inst, SymTargets,
1331                                              MethodInfo.second, BC.Ctx.get())
1332                 : BC.MIB->indirectCallPromotion(
1333                       Inst, SymTargets, MethodInfo.first, MethodInfo.second,
1334                       opts::ICPOldCodeSequence, BC.Ctx.get());
1335 
1336         if (ICPcode.empty()) {
1337           if (opts::Verbosity >= 1)
1338             outs() << "BOLT-INFO: ICP failed in " << Function << " @ "
1339                    << InstIdx << " in " << BB->getName()
1340                    << ", calls = " << NumCalls
1341                    << ", unable to generate promoted call code.\n";
1342           continue;
1343         }
1344 
1345         LLVM_DEBUG({
1346           uint64_t Offset = Targets[0].From.Addr;
1347           dbgs() << "BOLT-INFO: ICP indirect call code:\n";
1348           for (const auto &entry : ICPcode) {
1349             const MCSymbol *const &Sym = entry.first;
1350             const InstructionListType &Insts = entry.second;
1351             if (Sym)
1352               dbgs() << Sym->getName() << ":\n";
1353             Offset = BC.printInstructions(dbgs(), Insts.begin(), Insts.end(),
1354                                           Offset);
1355           }
1356           dbgs() << "---------------------------------------------------\n";
1357         });
1358 
1359         // Rewrite the CFG with the newly generated ICP code.
1360         std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs =
1361             rewriteCall(*BB, Inst, std::move(ICPcode), MethodInfo.second);
1362 
1363         // Fix the CFG after inserting the new basic blocks.
1364         BinaryBasicBlock *MergeBlock =
1365             fixCFG(*BB, IsTailCall, IsJumpTable, std::move(NewBBs), Targets);
1366 
1367         // Since the tail of the original block was split off and it may contain
1368         // additional indirect calls, we must add the merge block to the set of
1369         // blocks to process.
1370         if (MergeBlock)
1371           BBs.push_back(MergeBlock);
1372 
1373         if (opts::Verbosity >= 1)
1374           outs() << "BOLT-INFO: ICP succeeded in " << Function << " @ "
1375                  << InstIdx << " in " << BB->getName()
1376                  << " -> calls = " << NumCalls << "\n";
1377 
1378         if (IsJumpTable)
1379           ++TotalOptimizedJumpTableCallsites;
1380         else
1381           ++TotalOptimizedIndirectCallsites;
1382 
1383         Modified.insert(&Function);
1384       }
1385     }
1386     TotalIndirectCalls += FuncTotalIndirectCalls;
1387     TotalIndirectJmps += FuncTotalIndirectJmps;
1388   }
1389 
1390   outs() << "BOLT-INFO: ICP total indirect callsites with profile = "
1391          << TotalIndirectCallsites << "\n"
1392          << "BOLT-INFO: ICP total jump table callsites = "
1393          << TotalJumpTableCallsites << "\n"
1394          << "BOLT-INFO: ICP total number of calls = " << TotalCalls << "\n"
1395          << "BOLT-INFO: ICP percentage of calls that are indirect = "
1396          << format("%.1f", (100.0 * TotalIndirectCalls) / TotalCalls) << "%\n"
1397          << "BOLT-INFO: ICP percentage of indirect calls that can be "
1398             "optimized = "
1399          << format("%.1f", (100.0 * TotalNumFrequentCalls) /
1400                                std::max<size_t>(TotalIndirectCalls, 1))
1401          << "%\n"
1402          << "BOLT-INFO: ICP percentage of indirect callsites that are "
1403             "optimized = "
1404          << format("%.1f", (100.0 * TotalOptimizedIndirectCallsites) /
1405                                std::max<uint64_t>(TotalIndirectCallsites, 1))
1406          << "%\n"
1407          << "BOLT-INFO: ICP number of method load elimination candidates = "
1408          << TotalMethodLoadEliminationCandidates << "\n"
1409          << "BOLT-INFO: ICP percentage of method calls candidates that have "
1410             "loads eliminated = "
1411          << format("%.1f", (100.0 * TotalMethodLoadsEliminated) /
1412                                std::max<uint64_t>(
1413                                    TotalMethodLoadEliminationCandidates, 1))
1414          << "%\n"
1415          << "BOLT-INFO: ICP percentage of indirect branches that are "
1416             "optimized = "
1417          << format("%.1f", (100.0 * TotalNumFrequentJmps) /
1418                                std::max<uint64_t>(TotalIndirectJmps, 1))
1419          << "%\n"
1420          << "BOLT-INFO: ICP percentage of jump table callsites that are "
1421          << "optimized = "
1422          << format("%.1f", (100.0 * TotalOptimizedJumpTableCallsites) /
1423                                std::max<uint64_t>(TotalJumpTableCallsites, 1))
1424          << "%\n"
1425          << "BOLT-INFO: ICP number of jump table callsites that can use hot "
1426          << "indices = " << TotalIndexBasedCandidates << "\n"
1427          << "BOLT-INFO: ICP percentage of jump table callsites that use hot "
1428             "indices = "
1429          << format("%.1f", (100.0 * TotalIndexBasedJumps) /
1430                                std::max<uint64_t>(TotalIndexBasedCandidates, 1))
1431          << "%\n";
1432 
1433   (void)verifyProfile;
1434 #ifndef NDEBUG
1435   verifyProfile(BFs);
1436 #endif
1437 }
1438 
1439 } // namespace bolt
1440 } // namespace llvm
1441