1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Inliner class used for inlining binary functions.
10 //
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
13 //
14 //  * No exception handling
15 //  * No jump tables
16 //  * Single entry point
17 //  * CFI update not supported - breaks unwinding
18 //  * Regular Call Sites:
19 //    - only leaf functions (or callees with only tail calls)
20 //      * no invokes (they can't be tail calls)
21 //    - no direct use of %rsp
22 //  * Tail Call Sites:
23 //    - since the stack is unmodified, the regular call limitations are lifted
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
30 #include <map>
31 
32 #define DEBUG_TYPE "bolt-inliner"
33 
34 using namespace llvm;
35 
36 namespace opts {
37 
38 extern cl::OptionCategory BoltOptCategory;
39 
40 static cl::opt<bool>
41     AdjustProfile("inline-ap",
42                   cl::desc("adjust function profile after inlining"),
43                   cl::cat(BoltOptCategory));
44 
45 static cl::list<std::string>
46 ForceInlineFunctions("force-inline",
47   cl::CommaSeparated,
48   cl::desc("list of functions to always consider for inlining"),
49   cl::value_desc("func1,func2,func3,..."),
50   cl::Hidden,
51   cl::cat(BoltOptCategory));
52 
53 static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
54                                cl::cat(BoltOptCategory));
55 
56 static cl::opt<bool> InlineIgnoreLeafCFI(
57     "inline-ignore-leaf-cfi",
58     cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
59     cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
60 
61 static cl::opt<bool> InlineIgnoreCFI(
62     "inline-ignore-cfi",
63     cl::desc(
64         "inline functions with CFI programs (can break exception handling)"),
65     cl::ReallyHidden, cl::cat(BoltOptCategory));
66 
67 static cl::opt<unsigned>
68     InlineLimit("inline-limit",
69                 cl::desc("maximum number of call sites to inline"), cl::init(0),
70                 cl::Hidden, cl::cat(BoltOptCategory));
71 
72 static cl::opt<unsigned>
73     InlineMaxIters("inline-max-iters",
74                    cl::desc("maximum number of inline iterations"), cl::init(3),
75                    cl::Hidden, cl::cat(BoltOptCategory));
76 
77 static cl::opt<bool> InlineSmallFunctions(
78     "inline-small-functions",
79     cl::desc("inline functions if increase in size is less than defined by "
80              "-inline-small-functions-bytes"),
81     cl::cat(BoltOptCategory));
82 
83 static cl::opt<unsigned> InlineSmallFunctionsBytes(
84     "inline-small-functions-bytes",
85     cl::desc("max number of bytes for the function to be considered small for "
86              "inlining purposes"),
87     cl::init(4), cl::Hidden, cl::cat(BoltOptCategory));
88 
89 static cl::opt<bool> NoInline(
90     "no-inline",
91     cl::desc("disable all inlining (overrides other inlining options)"),
92     cl::cat(BoltOptCategory));
93 
94 /// This function returns true if any of inlining options are specified and the
95 /// inlining pass should be executed. Whenever a new inlining option is added,
96 /// this function should reflect the change.
inliningEnabled()97 bool inliningEnabled() {
98   return !NoInline &&
99          (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
100 }
101 
mustConsider(const llvm::bolt::BinaryFunction & Function)102 bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
103   for (std::string &Name : opts::ForceInlineFunctions)
104     if (Function.hasName(Name))
105       return true;
106   return false;
107 }
108 
syncOptions()109 void syncOptions() {
110   if (opts::InlineIgnoreCFI)
111     opts::InlineIgnoreLeafCFI = true;
112 
113   if (opts::InlineAll)
114     opts::InlineSmallFunctions = true;
115 }
116 
117 } // namespace opts
118 
119 namespace llvm {
120 namespace bolt {
121 
122 uint64_t Inliner::SizeOfCallInst;
123 uint64_t Inliner::SizeOfTailCallInst;
124 
getSizeOfCallInst(const BinaryContext & BC)125 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
126   if (SizeOfCallInst)
127     return SizeOfCallInst;
128 
129   MCInst Inst;
130   BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
131   SizeOfCallInst = BC.computeInstructionSize(Inst);
132 
133   return SizeOfCallInst;
134 }
135 
getSizeOfTailCallInst(const BinaryContext & BC)136 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
137   if (SizeOfTailCallInst)
138     return SizeOfTailCallInst;
139 
140   MCInst Inst;
141   BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
142   SizeOfTailCallInst = BC.computeInstructionSize(Inst);
143 
144   return SizeOfTailCallInst;
145 }
146 
getInliningInfo(const BinaryFunction & BF)147 InliningInfo getInliningInfo(const BinaryFunction &BF) {
148   const BinaryContext &BC = BF.getBinaryContext();
149   bool DirectSP = false;
150   bool HasCFI = false;
151   bool IsLeaf = true;
152 
153   // Perform necessary checks unless the option overrides it.
154   if (!opts::mustConsider(BF)) {
155     if (BF.hasSDTMarker())
156       return INL_NONE;
157 
158     if (BF.hasEHRanges())
159       return INL_NONE;
160 
161     if (BF.isMultiEntry())
162       return INL_NONE;
163 
164     if (BF.hasJumpTables())
165       return INL_NONE;
166 
167     const MCPhysReg SPReg = BC.MIB->getStackPointer();
168     for (const BinaryBasicBlock &BB : BF) {
169       for (const MCInst &Inst : BB) {
170         // Tail calls are marked as implicitly using the stack pointer and they
171         // could be inlined.
172         if (BC.MIB->isTailCall(Inst))
173           break;
174 
175         if (BC.MIB->isCFI(Inst)) {
176           HasCFI = true;
177           continue;
178         }
179 
180         if (BC.MIB->isCall(Inst))
181           IsLeaf = false;
182 
183         // Push/pop instructions are straightforward to handle.
184         if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
185           continue;
186 
187         DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
188                     BC.MIB->hasUseOfPhysReg(Inst, SPReg);
189       }
190     }
191   }
192 
193   if (HasCFI) {
194     if (!opts::InlineIgnoreLeafCFI)
195       return INL_NONE;
196 
197     if (!IsLeaf && !opts::InlineIgnoreCFI)
198       return INL_NONE;
199   }
200 
201   InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
202 
203   size_t Size = BF.estimateSize();
204 
205   Info.SizeAfterInlining = Size;
206   Info.SizeAfterTailCallInlining = Size;
207 
208   // Handle special case of the known size reduction.
209   if (BF.size() == 1) {
210     // For a regular call the last return instruction could be removed
211     // (or converted to a branch).
212     const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
213     if (LastInst && BC.MIB->isReturn(*LastInst) &&
214         !BC.MIB->isTailCall(*LastInst)) {
215       const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
216       assert(Size >= RetInstSize);
217       Info.SizeAfterInlining -= RetInstSize;
218     }
219   }
220 
221   return Info;
222 }
223 
findInliningCandidates(BinaryContext & BC)224 void Inliner::findInliningCandidates(BinaryContext &BC) {
225   for (const auto &BFI : BC.getBinaryFunctions()) {
226     const BinaryFunction &Function = BFI.second;
227     if (!shouldOptimize(Function))
228       continue;
229     const InliningInfo InlInfo = getInliningInfo(Function);
230     if (InlInfo.Type != INL_NONE)
231       InliningCandidates[&Function] = InlInfo;
232   }
233 }
234 
235 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
inlineCall(BinaryBasicBlock & CallerBB,BinaryBasicBlock::iterator CallInst,const BinaryFunction & Callee)236 Inliner::inlineCall(BinaryBasicBlock &CallerBB,
237                     BinaryBasicBlock::iterator CallInst,
238                     const BinaryFunction &Callee) {
239   BinaryFunction &CallerFunction = *CallerBB.getFunction();
240   BinaryContext &BC = CallerFunction.getBinaryContext();
241   auto &MIB = *BC.MIB;
242 
243   assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
244   assert(!Callee.isMultiEntry() &&
245          "cannot inline function with multiple entries");
246   assert(!Callee.hasJumpTables() &&
247          "cannot inline function with jump table(s)");
248 
249   // Get information about the call site.
250   const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
251   const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
252   const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
253   const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst);
254 
255   // Split basic block at the call site if there will be more incoming edges
256   // coming from the callee.
257   BinaryBasicBlock *FirstInlinedBB = &CallerBB;
258   if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
259     FirstInlinedBB = CallerBB.splitAt(CallInst);
260     CallInst = FirstInlinedBB->begin();
261   }
262 
263   // Split basic block after the call instruction unless the callee is trivial
264   // (i.e. consists of a single basic block). If necessary, obtain a basic block
265   // for return instructions in the callee to redirect to.
266   BinaryBasicBlock *NextBB = nullptr;
267   if (Callee.size() > 1) {
268     if (std::next(CallInst) != FirstInlinedBB->end())
269       NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
270     else
271       NextBB = FirstInlinedBB->getSuccessor();
272   }
273   if (NextBB)
274     FirstInlinedBB->removeSuccessor(NextBB);
275 
276   // Remove the call instruction.
277   auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);
278 
279   double ProfileRatio = 0;
280   if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
281     ProfileRatio =
282         (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
283 
284   // Save execution count of the first block as we don't want it to change
285   // later due to profile adjustment rounding errors.
286   const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
287 
288   // Copy basic blocks and maintain a map from their origin.
289   std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
290   InlinedBBMap[&Callee.front()] = FirstInlinedBB;
291   for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) {
292     BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
293     InlinedBBMap[&*BBI] = InlinedBB;
294     InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
295     if (Callee.hasValidProfile())
296       InlinedBB->setExecutionCount(BBI->getKnownExecutionCount());
297     else
298       InlinedBB->setExecutionCount(FirstInlinedBBCount);
299   }
300 
301   // Copy over instructions and edges.
302   for (const BinaryBasicBlock &BB : Callee) {
303     BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
304 
305     if (InlinedBB != FirstInlinedBB)
306       InsertII = InlinedBB->begin();
307 
308     // Copy over instructions making any necessary mods.
309     for (MCInst Inst : BB) {
310       if (MIB.isPseudo(Inst))
311         continue;
312 
313       MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
314 
315       // Fix branch target. Strictly speaking, we don't have to do this as
316       // targets of direct branches will be fixed later and don't matter
317       // in the CFG state. However, disassembly may look misleading, and
318       // hence we do the fixing.
319       if (MIB.isBranch(Inst)) {
320         assert(!MIB.isIndirectBranch(Inst) &&
321                "unexpected indirect branch in callee");
322         const BinaryBasicBlock *TargetBB =
323             Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
324         assert(TargetBB && "cannot find target block in callee");
325         MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
326                                 BC.Ctx.get());
327       }
328 
329       if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
330         InsertII =
331             std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
332         continue;
333       }
334 
335       // Handle special instructions for a non-tail call site.
336       if (!MIB.isCall(Inst)) {
337         // Returns are removed.
338         break;
339       }
340 
341       MIB.convertTailCallToCall(Inst);
342 
343       // Propagate EH-related info to call instructions.
344       if (CSIsInvoke) {
345         MIB.addEHInfo(Inst, *CSEHInfo);
346         if (CSGNUArgsSize >= 0)
347           MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
348       }
349 
350       InsertII =
351           std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
352     }
353 
354     // Add CFG edges to the basic blocks of the inlined instance.
355     std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
356     llvm::transform(BB.successors(), Successors.begin(),
357                     [&InlinedBBMap](const BinaryBasicBlock *BB) {
358                       return InlinedBBMap.at(BB);
359                     });
360 
361     if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
362       InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
363                                BB.branch_info_begin(), BB.branch_info_end());
364     else
365       InlinedBB->addSuccessors(Successors.begin(), Successors.end());
366 
367     if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
368       // Either it's a return block or the last instruction never returns.
369       InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
370     }
371 
372     // Scale profiling info for blocks and edges after inlining.
373     if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
374       if (opts::AdjustProfile)
375         InlinedBB->adjustExecutionCount(ProfileRatio);
376       else
377         InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
378                                      ProfileRatio);
379     }
380   }
381 
382   // Restore the original execution count of the first inlined basic block.
383   FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
384 
385   CallerFunction.recomputeLandingPads();
386 
387   if (NextBB)
388     return std::make_pair(NextBB, NextBB->begin());
389 
390   if (Callee.size() == 1)
391     return std::make_pair(FirstInlinedBB, InsertII);
392 
393   return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
394 }
395 
inlineCallsInFunction(BinaryFunction & Function)396 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
397   BinaryContext &BC = Function.getBinaryContext();
398   std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
399                                          Function.getLayout().block_end());
400   llvm::sort(
401       Blocks, [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
402         return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
403       });
404 
405   bool DidInlining = false;
406   for (BinaryBasicBlock *BB : Blocks) {
407     for (auto InstIt = BB->begin(); InstIt != BB->end();) {
408       MCInst &Inst = *InstIt;
409       if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
410           !Inst.getOperand(0).isExpr()) {
411         ++InstIt;
412         continue;
413       }
414 
415       const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
416       assert(TargetSymbol && "target symbol expected for direct call");
417 
418       // Don't inline calls to a secondary entry point in a target function.
419       uint64_t EntryID = 0;
420       BinaryFunction *TargetFunction =
421           BC.getFunctionForSymbol(TargetSymbol, &EntryID);
422       if (!TargetFunction || EntryID != 0) {
423         ++InstIt;
424         continue;
425       }
426 
427       // Don't do recursive inlining.
428       if (TargetFunction == &Function) {
429         ++InstIt;
430         continue;
431       }
432 
433       auto IInfo = InliningCandidates.find(TargetFunction);
434       if (IInfo == InliningCandidates.end()) {
435         ++InstIt;
436         continue;
437       }
438 
439       const bool IsTailCall = BC.MIB->isTailCall(Inst);
440       if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
441         ++InstIt;
442         continue;
443       }
444 
445       int64_t SizeAfterInlining;
446       if (IsTailCall)
447         SizeAfterInlining =
448             IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
449       else
450         SizeAfterInlining =
451             IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
452 
453       if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
454         if (!opts::InlineSmallFunctions ||
455             SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
456           ++InstIt;
457           continue;
458         }
459       }
460 
461       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
462                         << " in " << Function << " : " << BB->getName()
463                         << ". Count: " << BB->getKnownExecutionCount()
464                         << ". Size change: " << SizeAfterInlining
465                         << " bytes.\n");
466 
467       std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
468 
469       DidInlining = true;
470       TotalInlinedBytes += SizeAfterInlining;
471 
472       ++NumInlinedCallSites;
473       NumInlinedDynamicCalls += BB->getExecutionCount();
474 
475       // Subtract basic block execution count from the callee execution count.
476       if (opts::AdjustProfile)
477         TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());
478 
479       // Check if the caller inlining status has to be adjusted.
480       if (IInfo->second.Type == INL_TAILCALL) {
481         auto CallerIInfo = InliningCandidates.find(&Function);
482         if (CallerIInfo != InliningCandidates.end() &&
483             CallerIInfo->second.Type == INL_ANY) {
484           LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
485                             << Function << '\n');
486           CallerIInfo->second.Type = INL_TAILCALL;
487         }
488       }
489 
490       if (NumInlinedCallSites == opts::InlineLimit)
491         return true;
492     }
493   }
494 
495   return DidInlining;
496 }
497 
runOnFunctions(BinaryContext & BC)498 void Inliner::runOnFunctions(BinaryContext &BC) {
499   opts::syncOptions();
500 
501   if (!opts::inliningEnabled())
502     return;
503 
504   bool InlinedOnce;
505   unsigned NumIters = 0;
506   do {
507     if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
508       break;
509 
510     InlinedOnce = false;
511 
512     InliningCandidates.clear();
513     findInliningCandidates(BC);
514 
515     std::vector<BinaryFunction *> ConsideredFunctions;
516     for (auto &BFI : BC.getBinaryFunctions()) {
517       BinaryFunction &Function = BFI.second;
518       if (!shouldOptimize(Function))
519         continue;
520       ConsideredFunctions.push_back(&Function);
521     }
522     llvm::sort(ConsideredFunctions, [](const BinaryFunction *A,
523                                        const BinaryFunction *B) {
524       return B->getKnownExecutionCount() < A->getKnownExecutionCount();
525     });
526     for (BinaryFunction *Function : ConsideredFunctions) {
527       if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
528         break;
529 
530       const bool DidInline = inlineCallsInFunction(*Function);
531 
532       if (DidInline)
533         Modified.insert(Function);
534 
535       InlinedOnce |= DidInline;
536     }
537 
538     ++NumIters;
539   } while (InlinedOnce && NumIters < opts::InlineMaxIters);
540 
541   if (NumInlinedCallSites)
542     outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
543            << NumInlinedCallSites << " call sites in " << NumIters
544            << " iteration(s). Change in binary size: " << TotalInlinedBytes
545            << " bytes.\n";
546 }
547 
548 } // namespace bolt
549 } // namespace llvm
550