1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Inliner class used for inlining binary functions.
10 //
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
13 //
14 //  * No exception handling
15 //  * No jump tables
16 //  * Single entry point
17 //  * CFI update not supported - breaks unwinding
18 //  * Regular Call Sites:
19 //    - only leaf functions (or callees with only tail calls)
20 //      * no invokes (they can't be tail calls)
21 //    - no direct use of %rsp
22 //  * Tail Call Sites:
23 //    - since the stack is unmodified, the regular call limitations are lifted
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
30 #include <map>
31 
32 #define DEBUG_TYPE "bolt-inliner"
33 
34 using namespace llvm;
35 
36 namespace opts {
37 
38 extern cl::OptionCategory BoltOptCategory;
39 
40 static cl::opt<bool>
41 AdjustProfile("inline-ap",
42   cl::desc("adjust function profile after inlining"),
43   cl::ZeroOrMore,
44   cl::cat(BoltOptCategory));
45 
46 static cl::list<std::string>
47 ForceInlineFunctions("force-inline",
48   cl::CommaSeparated,
49   cl::desc("list of functions to always consider for inlining"),
50   cl::value_desc("func1,func2,func3,..."),
51   cl::Hidden,
52   cl::cat(BoltOptCategory));
53 
54 static cl::opt<bool>
55 InlineAll("inline-all",
56   cl::desc("inline all functions"),
57   cl::init(false),
58   cl::ZeroOrMore,
59   cl::cat(BoltOptCategory));
60 
61 static cl::opt<bool>
62 InlineIgnoreLeafCFI("inline-ignore-leaf-cfi",
63   cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
64   cl::init(true),
65   cl::ZeroOrMore,
66   cl::ReallyHidden,
67   cl::cat(BoltOptCategory));
68 
69 static cl::opt<bool>
70 InlineIgnoreCFI("inline-ignore-cfi",
71   cl::desc("inline functions with CFI programs (can break exception handling)"),
72   cl::init(false),
73   cl::ZeroOrMore,
74   cl::ReallyHidden,
75   cl::cat(BoltOptCategory));
76 
77 static cl::opt<unsigned>
78 InlineLimit("inline-limit",
79   cl::desc("maximum number of call sites to inline"),
80   cl::init(0),
81   cl::ZeroOrMore,
82   cl::Hidden,
83   cl::cat(BoltOptCategory));
84 
85 static cl::opt<unsigned>
86 InlineMaxIters("inline-max-iters",
87   cl::desc("maximum number of inline iterations"),
88   cl::init(3),
89   cl::ZeroOrMore,
90   cl::Hidden,
91   cl::cat(BoltOptCategory));
92 
93 static cl::opt<bool>
94 InlineSmallFunctions("inline-small-functions",
95   cl::desc("inline functions if increase in size is less than defined by "
96            "-inline-small-functions-bytes"),
97   cl::init(false),
98   cl::ZeroOrMore,
99   cl::cat(BoltOptCategory));
100 
101 static cl::opt<unsigned>
102 InlineSmallFunctionsBytes("inline-small-functions-bytes",
103   cl::desc("max number of bytes for the function to be considered small for "
104            "inlining purposes"),
105   cl::init(4),
106   cl::ZeroOrMore,
107   cl::Hidden,
108   cl::cat(BoltOptCategory));
109 
110 static cl::opt<bool>
111 NoInline("no-inline",
112   cl::desc("disable all inlining (overrides other inlining options)"),
113   cl::init(false),
114   cl::ZeroOrMore,
115   cl::cat(BoltOptCategory));
116 
117 /// This function returns true if any of inlining options are specified and the
118 /// inlining pass should be executed. Whenever a new inlining option is added,
119 /// this function should reflect the change.
120 bool inliningEnabled() {
121   return !NoInline &&
122          (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
123 }
124 
125 bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
126   for (std::string &Name : opts::ForceInlineFunctions)
127     if (Function.hasName(Name))
128       return true;
129   return false;
130 }
131 
132 void syncOptions() {
133   if (opts::InlineIgnoreCFI)
134     opts::InlineIgnoreLeafCFI = true;
135 
136   if (opts::InlineAll)
137     opts::InlineSmallFunctions = true;
138 }
139 
140 } // namespace opts
141 
142 namespace llvm {
143 namespace bolt {
144 
145 uint64_t Inliner::SizeOfCallInst;
146 uint64_t Inliner::SizeOfTailCallInst;
147 
148 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
149   if (SizeOfCallInst)
150     return SizeOfCallInst;
151 
152   MCInst Inst;
153   BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
154   SizeOfCallInst = BC.computeInstructionSize(Inst);
155 
156   return SizeOfCallInst;
157 }
158 
159 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
160   if (SizeOfTailCallInst)
161     return SizeOfTailCallInst;
162 
163   MCInst Inst;
164   BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
165   SizeOfTailCallInst = BC.computeInstructionSize(Inst);
166 
167   return SizeOfTailCallInst;
168 }
169 
170 Inliner::InliningInfo Inliner::getInliningInfo(const BinaryFunction &BF) const {
171   if (!shouldOptimize(BF))
172     return INL_NONE;
173 
174   const BinaryContext &BC = BF.getBinaryContext();
175   bool DirectSP = false;
176   bool HasCFI = false;
177   bool IsLeaf = true;
178 
179   // Perform necessary checks unless the option overrides it.
180   if (!opts::mustConsider(BF)) {
181     if (BF.hasSDTMarker())
182       return INL_NONE;
183 
184     if (BF.hasEHRanges())
185       return INL_NONE;
186 
187     if (BF.isMultiEntry())
188       return INL_NONE;
189 
190     if (BF.hasJumpTables())
191       return INL_NONE;
192 
193     const MCPhysReg SPReg = BC.MIB->getStackPointer();
194     for (const BinaryBasicBlock *BB : BF.layout()) {
195       for (const MCInst &Inst : *BB) {
196         // Tail calls are marked as implicitly using the stack pointer and they
197         // could be inlined.
198         if (BC.MIB->isTailCall(Inst))
199           break;
200 
201         if (BC.MIB->isCFI(Inst)) {
202           HasCFI = true;
203           continue;
204         }
205 
206         if (BC.MIB->isCall(Inst))
207           IsLeaf = false;
208 
209         // Push/pop instructions are straightforward to handle.
210         if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
211           continue;
212 
213         DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
214                     BC.MIB->hasUseOfPhysReg(Inst, SPReg);
215       }
216     }
217   }
218 
219   if (HasCFI) {
220     if (!opts::InlineIgnoreLeafCFI)
221       return INL_NONE;
222 
223     if (!IsLeaf && !opts::InlineIgnoreCFI)
224       return INL_NONE;
225   }
226 
227   InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
228 
229   size_t Size = BF.estimateSize();
230 
231   Info.SizeAfterInlining = Size;
232   Info.SizeAfterTailCallInlining = Size;
233 
234   // Handle special case of the known size reduction.
235   if (BF.size() == 1) {
236     // For a regular call the last return instruction could be removed
237     // (or converted to a branch).
238     const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
239     if (LastInst && BC.MIB->isReturn(*LastInst) &&
240         !BC.MIB->isTailCall(*LastInst)) {
241       const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
242       assert(Size >= RetInstSize);
243       Info.SizeAfterInlining -= RetInstSize;
244     }
245   }
246 
247   return Info;
248 }
249 
250 void Inliner::findInliningCandidates(BinaryContext &BC) {
251   for (const auto &BFI : BC.getBinaryFunctions()) {
252     const BinaryFunction &Function = BFI.second;
253     const InliningInfo InlInfo = getInliningInfo(Function);
254     if (InlInfo.Type != INL_NONE)
255       InliningCandidates[&Function] = InlInfo;
256   }
257 }
258 
259 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
260 Inliner::inlineCall(BinaryBasicBlock &CallerBB,
261                     BinaryBasicBlock::iterator CallInst,
262                     const BinaryFunction &Callee) {
263   BinaryFunction &CallerFunction = *CallerBB.getFunction();
264   BinaryContext &BC = CallerFunction.getBinaryContext();
265   auto &MIB = *BC.MIB;
266 
267   assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
268   assert(!Callee.isMultiEntry() &&
269          "cannot inline function with multiple entries");
270   assert(!Callee.hasJumpTables() &&
271          "cannot inline function with jump table(s)");
272 
273   // Get information about the call site.
274   const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
275   const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
276   const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
277   const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst);
278 
279   // Split basic block at the call site if there will be more incoming edges
280   // coming from the callee.
281   BinaryBasicBlock *FirstInlinedBB = &CallerBB;
282   if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
283     FirstInlinedBB = CallerBB.splitAt(CallInst);
284     CallInst = FirstInlinedBB->begin();
285   }
286 
287   // Split basic block after the call instruction unless the callee is trivial
288   // (i.e. consists of a single basic block). If necessary, obtain a basic block
289   // for return instructions in the callee to redirect to.
290   BinaryBasicBlock *NextBB = nullptr;
291   if (Callee.size() > 1) {
292     if (std::next(CallInst) != FirstInlinedBB->end())
293       NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
294     else
295       NextBB = FirstInlinedBB->getSuccessor();
296   }
297   if (NextBB)
298     FirstInlinedBB->removeSuccessor(NextBB);
299 
300   // Remove the call instruction.
301   auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);
302 
303   double ProfileRatio = 0;
304   if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
305     ProfileRatio =
306         (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
307 
308   // Save execution count of the first block as we don't want it to change
309   // later due to profile adjustment rounding errors.
310   const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
311 
312   // Copy basic blocks and maintain a map from their origin.
313   std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
314   InlinedBBMap[&Callee.front()] = FirstInlinedBB;
315   for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) {
316     BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(0);
317     InlinedBBMap[&*BBI] = InlinedBB;
318     InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
319     if (Callee.hasValidProfile())
320       InlinedBB->setExecutionCount(BBI->getKnownExecutionCount());
321     else
322       InlinedBB->setExecutionCount(FirstInlinedBBCount);
323   }
324 
325   // Copy over instructions and edges.
326   for (const BinaryBasicBlock &BB : Callee) {
327     BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
328 
329     if (InlinedBB != FirstInlinedBB)
330       InsertII = InlinedBB->begin();
331 
332     // Copy over instructions making any necessary mods.
333     for (MCInst Inst : BB) {
334       if (MIB.isPseudo(Inst))
335         continue;
336 
337       MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
338 
339       // Fix branch target. Strictly speaking, we don't have to do this as
340       // targets of direct branches will be fixed later and don't matter
341       // in the CFG state. However, disassembly may look misleading, and
342       // hence we do the fixing.
343       if (MIB.isBranch(Inst)) {
344         assert(!MIB.isIndirectBranch(Inst) &&
345                "unexpected indirect branch in callee");
346         const BinaryBasicBlock *TargetBB =
347             Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
348         assert(TargetBB && "cannot find target block in callee");
349         MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
350                                 BC.Ctx.get());
351       }
352 
353       if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
354         InsertII =
355             std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
356         continue;
357       }
358 
359       // Handle special instructions for a non-tail call site.
360       if (!MIB.isCall(Inst)) {
361         // Returns are removed.
362         break;
363       }
364 
365       MIB.convertTailCallToCall(Inst);
366 
367       // Propagate EH-related info to call instructions.
368       if (CSIsInvoke) {
369         MIB.addEHInfo(Inst, *CSEHInfo);
370         if (CSGNUArgsSize >= 0)
371           MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
372       }
373 
374       InsertII =
375           std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
376     }
377 
378     // Add CFG edges to the basic blocks of the inlined instance.
379     std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
380     std::transform(BB.succ_begin(), BB.succ_end(), Successors.begin(),
381                    [&InlinedBBMap](const BinaryBasicBlock *BB) {
382                      return InlinedBBMap.at(BB);
383                    });
384 
385     if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
386       InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
387                                BB.branch_info_begin(), BB.branch_info_end());
388     else
389       InlinedBB->addSuccessors(Successors.begin(), Successors.end());
390 
391     if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
392       // Either it's a return block or the last instruction never returns.
393       InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
394     }
395 
396     // Scale profiling info for blocks and edges after inlining.
397     if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
398       if (opts::AdjustProfile)
399         InlinedBB->adjustExecutionCount(ProfileRatio);
400       else
401         InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
402                                      ProfileRatio);
403     }
404   }
405 
406   // Restore the original execution count of the first inlined basic block.
407   FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
408 
409   CallerFunction.recomputeLandingPads();
410 
411   if (NextBB)
412     return std::make_pair(NextBB, NextBB->begin());
413 
414   if (Callee.size() == 1)
415     return std::make_pair(FirstInlinedBB, InsertII);
416 
417   return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
418 }
419 
420 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
421   BinaryContext &BC = Function.getBinaryContext();
422   std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(),
423                                          Function.layout().end());
424   std::sort(Blocks.begin(), Blocks.end(),
425             [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
426               return BB1->getKnownExecutionCount() >
427                      BB2->getKnownExecutionCount();
428             });
429 
430   bool DidInlining = false;
431   for (BinaryBasicBlock *BB : Blocks) {
432     for (auto InstIt = BB->begin(); InstIt != BB->end();) {
433       MCInst &Inst = *InstIt;
434       if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
435           !Inst.getOperand(0).isExpr()) {
436         ++InstIt;
437         continue;
438       }
439 
440       const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
441       assert(TargetSymbol && "target symbol expected for direct call");
442 
443       // Don't inline calls to a secondary entry point in a target function.
444       uint64_t EntryID = 0;
445       BinaryFunction *TargetFunction =
446           BC.getFunctionForSymbol(TargetSymbol, &EntryID);
447       if (!TargetFunction || EntryID != 0) {
448         ++InstIt;
449         continue;
450       }
451 
452       // Don't do recursive inlining.
453       if (TargetFunction == &Function) {
454         ++InstIt;
455         continue;
456       }
457 
458       auto IInfo = InliningCandidates.find(TargetFunction);
459       if (IInfo == InliningCandidates.end()) {
460         ++InstIt;
461         continue;
462       }
463 
464       const bool IsTailCall = BC.MIB->isTailCall(Inst);
465       if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
466         ++InstIt;
467         continue;
468       }
469 
470       int64_t SizeAfterInlining;
471       if (IsTailCall)
472         SizeAfterInlining =
473             IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
474       else
475         SizeAfterInlining =
476             IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
477 
478       if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
479         if (!opts::InlineSmallFunctions ||
480             SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
481           ++InstIt;
482           continue;
483         }
484       }
485 
486       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
487                         << " in " << Function << " : " << BB->getName()
488                         << ". Count: " << BB->getKnownExecutionCount()
489                         << ". Size change: " << SizeAfterInlining
490                         << " bytes.\n");
491 
492       std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
493 
494       DidInlining = true;
495       TotalInlinedBytes += SizeAfterInlining;
496 
497       ++NumInlinedCallSites;
498       NumInlinedDynamicCalls += BB->getExecutionCount();
499 
500       // Subtract basic block execution count from the callee execution count.
501       if (opts::AdjustProfile)
502         TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());
503 
504       // Check if the caller inlining status has to be adjusted.
505       if (IInfo->second.Type == INL_TAILCALL) {
506         auto CallerIInfo = InliningCandidates.find(&Function);
507         if (CallerIInfo != InliningCandidates.end() &&
508             CallerIInfo->second.Type == INL_ANY) {
509           LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
510                             << Function << '\n');
511           CallerIInfo->second.Type = INL_TAILCALL;
512         }
513       }
514 
515       if (NumInlinedCallSites == opts::InlineLimit)
516         return true;
517     }
518   }
519 
520   return DidInlining;
521 }
522 
523 void Inliner::runOnFunctions(BinaryContext &BC) {
524   opts::syncOptions();
525 
526   if (!opts::inliningEnabled())
527     return;
528 
529   uint64_t TotalSize = 0;
530   for (auto &BFI : BC.getBinaryFunctions())
531     TotalSize += BFI.second.getSize();
532 
533   bool InlinedOnce;
534   unsigned NumIters = 0;
535   do {
536     if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
537       break;
538 
539     InlinedOnce = false;
540 
541     InliningCandidates.clear();
542     findInliningCandidates(BC);
543 
544     std::vector<BinaryFunction *> ConsideredFunctions;
545     for (auto &BFI : BC.getBinaryFunctions()) {
546       BinaryFunction &Function = BFI.second;
547       if (!shouldOptimize(Function))
548         continue;
549       ConsideredFunctions.push_back(&Function);
550     }
551     std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(),
552               [](const BinaryFunction *A, const BinaryFunction *B) {
553                 return B->getKnownExecutionCount() <
554                        A->getKnownExecutionCount();
555               });
556     for (BinaryFunction *Function : ConsideredFunctions) {
557       if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
558         break;
559 
560       const bool DidInline = inlineCallsInFunction(*Function);
561 
562       if (DidInline)
563         Modified.insert(Function);
564 
565       InlinedOnce |= DidInline;
566     }
567 
568     ++NumIters;
569   } while (InlinedOnce && NumIters < opts::InlineMaxIters);
570 
571   if (NumInlinedCallSites)
572     outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
573            << NumInlinedCallSites << " call sites in " << NumIters
574            << " iteration(s). Change in binary size: " << TotalInlinedBytes
575            << " bytes.\n";
576 }
577 
578 } // namespace bolt
579 } // namespace llvm
580