1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Inliner class used for inlining binary functions.
10 //
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
13 //
14 //  * No exception handling
15 //  * No jump tables
16 //  * Single entry point
17 //  * CFI update not supported - breaks unwinding
18 //  * Regular Call Sites:
19 //    - only leaf functions (or callees with only tail calls)
20 //      * no invokes (they can't be tail calls)
21 //    - no direct use of %rsp
22 //  * Tail Call Sites:
23 //    - since the stack is unmodified, the regular call limitations are lifted
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
30 #include <map>
31 
32 #define DEBUG_TYPE "bolt-inliner"
33 
34 using namespace llvm;
35 
36 namespace opts {
37 
38 extern cl::OptionCategory BoltOptCategory;
39 
40 static cl::opt<bool>
41 AdjustProfile("inline-ap",
42   cl::desc("adjust function profile after inlining"),
43   cl::ZeroOrMore,
44   cl::cat(BoltOptCategory));
45 
46 static cl::list<std::string>
47 ForceInlineFunctions("force-inline",
48   cl::CommaSeparated,
49   cl::desc("list of functions to always consider for inlining"),
50   cl::value_desc("func1,func2,func3,..."),
51   cl::Hidden,
52   cl::cat(BoltOptCategory));
53 
54 static cl::opt<bool>
55 InlineAll("inline-all",
56   cl::desc("inline all functions"),
57   cl::init(false),
58   cl::ZeroOrMore,
59   cl::cat(BoltOptCategory));
60 
61 static cl::opt<bool>
62 InlineIgnoreLeafCFI("inline-ignore-leaf-cfi",
63   cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
64   cl::init(true),
65   cl::ZeroOrMore,
66   cl::ReallyHidden,
67   cl::cat(BoltOptCategory));
68 
69 static cl::opt<bool>
70 InlineIgnoreCFI("inline-ignore-cfi",
71   cl::desc("inline functions with CFI programs (can break exception handling)"),
72   cl::init(false),
73   cl::ZeroOrMore,
74   cl::ReallyHidden,
75   cl::cat(BoltOptCategory));
76 
77 static cl::opt<unsigned>
78 InlineLimit("inline-limit",
79   cl::desc("maximum number of call sites to inline"),
80   cl::init(0),
81   cl::ZeroOrMore,
82   cl::Hidden,
83   cl::cat(BoltOptCategory));
84 
85 static cl::opt<unsigned>
86 InlineMaxIters("inline-max-iters",
87   cl::desc("maximum number of inline iterations"),
88   cl::init(3),
89   cl::ZeroOrMore,
90   cl::Hidden,
91   cl::cat(BoltOptCategory));
92 
93 static cl::opt<bool>
94 InlineSmallFunctions("inline-small-functions",
95   cl::desc("inline functions if increase in size is less than defined by "
96            "-inline-small-functions-bytes"),
97   cl::init(false),
98   cl::ZeroOrMore,
99   cl::cat(BoltOptCategory));
100 
101 static cl::opt<unsigned>
102 InlineSmallFunctionsBytes("inline-small-functions-bytes",
103   cl::desc("max number of bytes for the function to be considered small for "
104            "inlining purposes"),
105   cl::init(4),
106   cl::ZeroOrMore,
107   cl::Hidden,
108   cl::cat(BoltOptCategory));
109 
110 static cl::opt<bool>
111 NoInline("no-inline",
112   cl::desc("disable all inlining (overrides other inlining options)"),
113   cl::init(false),
114   cl::ZeroOrMore,
115   cl::cat(BoltOptCategory));
116 
117 /// This function returns true if any of inlining options are specified and the
118 /// inlining pass should be executed. Whenever a new inlining option is added,
119 /// this function should reflect the change.
120 bool inliningEnabled() {
121   return !NoInline &&
122          (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
123 }
124 
125 bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
126   for (std::string &Name : opts::ForceInlineFunctions)
127     if (Function.hasName(Name))
128       return true;
129   return false;
130 }
131 
132 void syncOptions() {
133   if (opts::InlineIgnoreCFI)
134     opts::InlineIgnoreLeafCFI = true;
135 
136   if (opts::InlineAll)
137     opts::InlineSmallFunctions = true;
138 }
139 
140 } // namespace opts
141 
142 namespace llvm {
143 namespace bolt {
144 
145 uint64_t Inliner::SizeOfCallInst;
146 uint64_t Inliner::SizeOfTailCallInst;
147 
148 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
149   if (SizeOfCallInst)
150     return SizeOfCallInst;
151 
152   MCInst Inst;
153   BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
154   SizeOfCallInst = BC.computeInstructionSize(Inst);
155 
156   return SizeOfCallInst;
157 }
158 
159 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
160   if (SizeOfTailCallInst)
161     return SizeOfTailCallInst;
162 
163   MCInst Inst;
164   BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
165   SizeOfTailCallInst = BC.computeInstructionSize(Inst);
166 
167   return SizeOfTailCallInst;
168 }
169 
170 InliningInfo getInliningInfo(const BinaryFunction &BF) {
171   const BinaryContext &BC = BF.getBinaryContext();
172   bool DirectSP = false;
173   bool HasCFI = false;
174   bool IsLeaf = true;
175 
176   // Perform necessary checks unless the option overrides it.
177   if (!opts::mustConsider(BF)) {
178     if (BF.hasSDTMarker())
179       return INL_NONE;
180 
181     if (BF.hasEHRanges())
182       return INL_NONE;
183 
184     if (BF.isMultiEntry())
185       return INL_NONE;
186 
187     if (BF.hasJumpTables())
188       return INL_NONE;
189 
190     const MCPhysReg SPReg = BC.MIB->getStackPointer();
191     for (const BinaryBasicBlock *BB : BF.layout()) {
192       for (const MCInst &Inst : *BB) {
193         // Tail calls are marked as implicitly using the stack pointer and they
194         // could be inlined.
195         if (BC.MIB->isTailCall(Inst))
196           break;
197 
198         if (BC.MIB->isCFI(Inst)) {
199           HasCFI = true;
200           continue;
201         }
202 
203         if (BC.MIB->isCall(Inst))
204           IsLeaf = false;
205 
206         // Push/pop instructions are straightforward to handle.
207         if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
208           continue;
209 
210         DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
211                     BC.MIB->hasUseOfPhysReg(Inst, SPReg);
212       }
213     }
214   }
215 
216   if (HasCFI) {
217     if (!opts::InlineIgnoreLeafCFI)
218       return INL_NONE;
219 
220     if (!IsLeaf && !opts::InlineIgnoreCFI)
221       return INL_NONE;
222   }
223 
224   InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
225 
226   size_t Size = BF.estimateSize();
227 
228   Info.SizeAfterInlining = Size;
229   Info.SizeAfterTailCallInlining = Size;
230 
231   // Handle special case of the known size reduction.
232   if (BF.size() == 1) {
233     // For a regular call the last return instruction could be removed
234     // (or converted to a branch).
235     const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
236     if (LastInst && BC.MIB->isReturn(*LastInst) &&
237         !BC.MIB->isTailCall(*LastInst)) {
238       const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
239       assert(Size >= RetInstSize);
240       Info.SizeAfterInlining -= RetInstSize;
241     }
242   }
243 
244   return Info;
245 }
246 
247 void Inliner::findInliningCandidates(BinaryContext &BC) {
248   for (const auto &BFI : BC.getBinaryFunctions()) {
249     const BinaryFunction &Function = BFI.second;
250     if (!shouldOptimize(Function))
251       continue;
252     const InliningInfo InlInfo = getInliningInfo(Function);
253     if (InlInfo.Type != INL_NONE)
254       InliningCandidates[&Function] = InlInfo;
255   }
256 }
257 
258 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
259 Inliner::inlineCall(BinaryBasicBlock &CallerBB,
260                     BinaryBasicBlock::iterator CallInst,
261                     const BinaryFunction &Callee) {
262   BinaryFunction &CallerFunction = *CallerBB.getFunction();
263   BinaryContext &BC = CallerFunction.getBinaryContext();
264   auto &MIB = *BC.MIB;
265 
266   assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
267   assert(!Callee.isMultiEntry() &&
268          "cannot inline function with multiple entries");
269   assert(!Callee.hasJumpTables() &&
270          "cannot inline function with jump table(s)");
271 
272   // Get information about the call site.
273   const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
274   const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
275   const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
276   const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst);
277 
278   // Split basic block at the call site if there will be more incoming edges
279   // coming from the callee.
280   BinaryBasicBlock *FirstInlinedBB = &CallerBB;
281   if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
282     FirstInlinedBB = CallerBB.splitAt(CallInst);
283     CallInst = FirstInlinedBB->begin();
284   }
285 
286   // Split basic block after the call instruction unless the callee is trivial
287   // (i.e. consists of a single basic block). If necessary, obtain a basic block
288   // for return instructions in the callee to redirect to.
289   BinaryBasicBlock *NextBB = nullptr;
290   if (Callee.size() > 1) {
291     if (std::next(CallInst) != FirstInlinedBB->end())
292       NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
293     else
294       NextBB = FirstInlinedBB->getSuccessor();
295   }
296   if (NextBB)
297     FirstInlinedBB->removeSuccessor(NextBB);
298 
299   // Remove the call instruction.
300   auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);
301 
302   double ProfileRatio = 0;
303   if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
304     ProfileRatio =
305         (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
306 
307   // Save execution count of the first block as we don't want it to change
308   // later due to profile adjustment rounding errors.
309   const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
310 
311   // Copy basic blocks and maintain a map from their origin.
312   std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
313   InlinedBBMap[&Callee.front()] = FirstInlinedBB;
314   for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) {
315     BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(0);
316     InlinedBBMap[&*BBI] = InlinedBB;
317     InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
318     if (Callee.hasValidProfile())
319       InlinedBB->setExecutionCount(BBI->getKnownExecutionCount());
320     else
321       InlinedBB->setExecutionCount(FirstInlinedBBCount);
322   }
323 
324   // Copy over instructions and edges.
325   for (const BinaryBasicBlock &BB : Callee) {
326     BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
327 
328     if (InlinedBB != FirstInlinedBB)
329       InsertII = InlinedBB->begin();
330 
331     // Copy over instructions making any necessary mods.
332     for (MCInst Inst : BB) {
333       if (MIB.isPseudo(Inst))
334         continue;
335 
336       MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
337 
338       // Fix branch target. Strictly speaking, we don't have to do this as
339       // targets of direct branches will be fixed later and don't matter
340       // in the CFG state. However, disassembly may look misleading, and
341       // hence we do the fixing.
342       if (MIB.isBranch(Inst)) {
343         assert(!MIB.isIndirectBranch(Inst) &&
344                "unexpected indirect branch in callee");
345         const BinaryBasicBlock *TargetBB =
346             Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
347         assert(TargetBB && "cannot find target block in callee");
348         MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
349                                 BC.Ctx.get());
350       }
351 
352       if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
353         InsertII =
354             std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
355         continue;
356       }
357 
358       // Handle special instructions for a non-tail call site.
359       if (!MIB.isCall(Inst)) {
360         // Returns are removed.
361         break;
362       }
363 
364       MIB.convertTailCallToCall(Inst);
365 
366       // Propagate EH-related info to call instructions.
367       if (CSIsInvoke) {
368         MIB.addEHInfo(Inst, *CSEHInfo);
369         if (CSGNUArgsSize >= 0)
370           MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
371       }
372 
373       InsertII =
374           std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
375     }
376 
377     // Add CFG edges to the basic blocks of the inlined instance.
378     std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
379     std::transform(BB.succ_begin(), BB.succ_end(), Successors.begin(),
380                    [&InlinedBBMap](const BinaryBasicBlock *BB) {
381                      return InlinedBBMap.at(BB);
382                    });
383 
384     if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
385       InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
386                                BB.branch_info_begin(), BB.branch_info_end());
387     else
388       InlinedBB->addSuccessors(Successors.begin(), Successors.end());
389 
390     if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
391       // Either it's a return block or the last instruction never returns.
392       InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
393     }
394 
395     // Scale profiling info for blocks and edges after inlining.
396     if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
397       if (opts::AdjustProfile)
398         InlinedBB->adjustExecutionCount(ProfileRatio);
399       else
400         InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
401                                      ProfileRatio);
402     }
403   }
404 
405   // Restore the original execution count of the first inlined basic block.
406   FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
407 
408   CallerFunction.recomputeLandingPads();
409 
410   if (NextBB)
411     return std::make_pair(NextBB, NextBB->begin());
412 
413   if (Callee.size() == 1)
414     return std::make_pair(FirstInlinedBB, InsertII);
415 
416   return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
417 }
418 
419 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
420   BinaryContext &BC = Function.getBinaryContext();
421   std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
422                                          Function.layout().end());
423   std::sort(Blocks.begin(), Blocks.end(),
424             [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
425               return BB1->getKnownExecutionCount() >
426                      BB2->getKnownExecutionCount();
427             });
428 
429   bool DidInlining = false;
430   for (BinaryBasicBlock *BB : Blocks) {
431     for (auto InstIt = BB->begin(); InstIt != BB->end();) {
432       MCInst &Inst = *InstIt;
433       if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
434           !Inst.getOperand(0).isExpr()) {
435         ++InstIt;
436         continue;
437       }
438 
439       const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
440       assert(TargetSymbol && "target symbol expected for direct call");
441 
442       // Don't inline calls to a secondary entry point in a target function.
443       uint64_t EntryID = 0;
444       BinaryFunction *TargetFunction =
445           BC.getFunctionForSymbol(TargetSymbol, &EntryID);
446       if (!TargetFunction || EntryID != 0) {
447         ++InstIt;
448         continue;
449       }
450 
451       // Don't do recursive inlining.
452       if (TargetFunction == &Function) {
453         ++InstIt;
454         continue;
455       }
456 
457       auto IInfo = InliningCandidates.find(TargetFunction);
458       if (IInfo == InliningCandidates.end()) {
459         ++InstIt;
460         continue;
461       }
462 
463       const bool IsTailCall = BC.MIB->isTailCall(Inst);
464       if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
465         ++InstIt;
466         continue;
467       }
468 
469       int64_t SizeAfterInlining;
470       if (IsTailCall)
471         SizeAfterInlining =
472             IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
473       else
474         SizeAfterInlining =
475             IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
476 
477       if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
478         if (!opts::InlineSmallFunctions ||
479             SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
480           ++InstIt;
481           continue;
482         }
483       }
484 
485       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
486                         << " in " << Function << " : " << BB->getName()
487                         << ". Count: " << BB->getKnownExecutionCount()
488                         << ". Size change: " << SizeAfterInlining
489                         << " bytes.\n");
490 
491       std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
492 
493       DidInlining = true;
494       TotalInlinedBytes += SizeAfterInlining;
495 
496       ++NumInlinedCallSites;
497       NumInlinedDynamicCalls += BB->getExecutionCount();
498 
499       // Subtract basic block execution count from the callee execution count.
500       if (opts::AdjustProfile)
501         TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());
502 
503       // Check if the caller inlining status has to be adjusted.
504       if (IInfo->second.Type == INL_TAILCALL) {
505         auto CallerIInfo = InliningCandidates.find(&Function);
506         if (CallerIInfo != InliningCandidates.end() &&
507             CallerIInfo->second.Type == INL_ANY) {
508           LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
509                             << Function << '\n');
510           CallerIInfo->second.Type = INL_TAILCALL;
511         }
512       }
513 
514       if (NumInlinedCallSites == opts::InlineLimit)
515         return true;
516     }
517   }
518 
519   return DidInlining;
520 }
521 
522 void Inliner::runOnFunctions(BinaryContext &BC) {
523   opts::syncOptions();
524 
525   if (!opts::inliningEnabled())
526     return;
527 
528   bool InlinedOnce;
529   unsigned NumIters = 0;
530   do {
531     if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
532       break;
533 
534     InlinedOnce = false;
535 
536     InliningCandidates.clear();
537     findInliningCandidates(BC);
538 
539     std::vector<BinaryFunction *> ConsideredFunctions;
540     for (auto &BFI : BC.getBinaryFunctions()) {
541       BinaryFunction &Function = BFI.second;
542       if (!shouldOptimize(Function))
543         continue;
544       ConsideredFunctions.push_back(&Function);
545     }
546     std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(),
547               [](const BinaryFunction *A, const BinaryFunction *B) {
548                 return B->getKnownExecutionCount() <
549                        A->getKnownExecutionCount();
550               });
551     for (BinaryFunction *Function : ConsideredFunctions) {
552       if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
553         break;
554 
555       const bool DidInline = inlineCallsInFunction(*Function);
556 
557       if (DidInline)
558         Modified.insert(Function);
559 
560       InlinedOnce |= DidInline;
561     }
562 
563     ++NumIters;
564   } while (InlinedOnce && NumIters < opts::InlineMaxIters);
565 
566   if (NumInlinedCallSites)
567     outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
568            << NumInlinedCallSites << " call sites in " << NumIters
569            << " iteration(s). Change in binary size: " << TotalInlinedBytes
570            << " bytes.\n";
571 }
572 
573 } // namespace bolt
574 } // namespace llvm
575