1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Inliner class used for inlining binary functions. 10 // 11 // The current inliner has a limited callee support 12 // (see Inliner::getInliningInfo() for the most up-to-date details): 13 // 14 // * No exception handling 15 // * No jump tables 16 // * Single entry point 17 // * CFI update not supported - breaks unwinding 18 // * Regular Call Sites: 19 // - only leaf functions (or callees with only tail calls) 20 // * no invokes (they can't be tail calls) 21 // - no direct use of %rsp 22 // * Tail Call Sites: 23 // - since the stack is unmodified, the regular call limitations are lifted 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "bolt/Passes/Inliner.h" 28 #include "bolt/Core/MCPlus.h" 29 #include "llvm/Support/CommandLine.h" 30 #include <map> 31 32 #define DEBUG_TYPE "bolt-inliner" 33 34 using namespace llvm; 35 36 namespace opts { 37 38 extern cl::OptionCategory BoltOptCategory; 39 40 static cl::opt<bool> 41 AdjustProfile("inline-ap", 42 cl::desc("adjust function profile after inlining"), 43 cl::ZeroOrMore, 44 cl::cat(BoltOptCategory)); 45 46 static cl::list<std::string> 47 ForceInlineFunctions("force-inline", 48 cl::CommaSeparated, 49 cl::desc("list of functions to always consider for inlining"), 50 cl::value_desc("func1,func2,func3,..."), 51 cl::Hidden, 52 cl::cat(BoltOptCategory)); 53 54 static cl::opt<bool> 55 InlineAll("inline-all", 56 cl::desc("inline all functions"), 57 cl::init(false), 58 cl::ZeroOrMore, 59 cl::cat(BoltOptCategory)); 60 61 static cl::opt<bool> 62 InlineIgnoreLeafCFI("inline-ignore-leaf-cfi", 63 cl::desc("inline leaf functions with CFI programs (can break unwinding)"), 64 cl::init(true), 65 cl::ZeroOrMore, 66 cl::ReallyHidden, 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<bool> 70 InlineIgnoreCFI("inline-ignore-cfi", 71 cl::desc("inline functions with CFI programs (can break exception handling)"), 72 cl::init(false), 73 cl::ZeroOrMore, 74 cl::ReallyHidden, 75 cl::cat(BoltOptCategory)); 76 77 static cl::opt<unsigned> 78 InlineLimit("inline-limit", 79 cl::desc("maximum number of call sites to inline"), 80 cl::init(0), 81 cl::ZeroOrMore, 82 cl::Hidden, 83 cl::cat(BoltOptCategory)); 84 85 static cl::opt<unsigned> 86 InlineMaxIters("inline-max-iters", 87 cl::desc("maximum number of inline iterations"), 88 cl::init(3), 89 cl::ZeroOrMore, 90 cl::Hidden, 91 cl::cat(BoltOptCategory)); 92 93 static cl::opt<bool> 94 InlineSmallFunctions("inline-small-functions", 95 cl::desc("inline functions if increase in size is less than defined by " 96 "-inline-small-functions-bytes"), 97 cl::init(false), 98 cl::ZeroOrMore, 99 cl::cat(BoltOptCategory)); 100 101 static cl::opt<unsigned> 102 InlineSmallFunctionsBytes("inline-small-functions-bytes", 103 cl::desc("max number of bytes for the function to be considered small for " 104 "inlining purposes"), 105 cl::init(4), 106 cl::ZeroOrMore, 107 cl::Hidden, 108 cl::cat(BoltOptCategory)); 109 110 static cl::opt<bool> 111 NoInline("no-inline", 112 cl::desc("disable all inlining (overrides other inlining options)"), 113 cl::init(false), 114 cl::ZeroOrMore, 115 cl::cat(BoltOptCategory)); 116 117 /// This function returns true if any of inlining options are specified and the 118 /// inlining pass should be executed. Whenever a new inlining option is added, 119 /// this function should reflect the change. 120 bool inliningEnabled() { 121 return !NoInline && 122 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty()); 123 } 124 125 bool mustConsider(const llvm::bolt::BinaryFunction &Function) { 126 for (std::string &Name : opts::ForceInlineFunctions) 127 if (Function.hasName(Name)) 128 return true; 129 return false; 130 } 131 132 void syncOptions() { 133 if (opts::InlineIgnoreCFI) 134 opts::InlineIgnoreLeafCFI = true; 135 136 if (opts::InlineAll) 137 opts::InlineSmallFunctions = true; 138 } 139 140 } // namespace opts 141 142 namespace llvm { 143 namespace bolt { 144 145 uint64_t Inliner::SizeOfCallInst; 146 uint64_t Inliner::SizeOfTailCallInst; 147 148 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) { 149 if (SizeOfCallInst) 150 return SizeOfCallInst; 151 152 MCInst Inst; 153 BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 154 SizeOfCallInst = BC.computeInstructionSize(Inst); 155 156 return SizeOfCallInst; 157 } 158 159 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) { 160 if (SizeOfTailCallInst) 161 return SizeOfTailCallInst; 162 163 MCInst Inst; 164 BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 165 SizeOfTailCallInst = BC.computeInstructionSize(Inst); 166 167 return SizeOfTailCallInst; 168 } 169 170 InliningInfo getInliningInfo(const BinaryFunction &BF) { 171 const BinaryContext &BC = BF.getBinaryContext(); 172 bool DirectSP = false; 173 bool HasCFI = false; 174 bool IsLeaf = true; 175 176 // Perform necessary checks unless the option overrides it. 177 if (!opts::mustConsider(BF)) { 178 if (BF.hasSDTMarker()) 179 return INL_NONE; 180 181 if (BF.hasEHRanges()) 182 return INL_NONE; 183 184 if (BF.isMultiEntry()) 185 return INL_NONE; 186 187 if (BF.hasJumpTables()) 188 return INL_NONE; 189 190 const MCPhysReg SPReg = BC.MIB->getStackPointer(); 191 for (const BinaryBasicBlock *BB : BF.layout()) { 192 for (const MCInst &Inst : *BB) { 193 // Tail calls are marked as implicitly using the stack pointer and they 194 // could be inlined. 195 if (BC.MIB->isTailCall(Inst)) 196 break; 197 198 if (BC.MIB->isCFI(Inst)) { 199 HasCFI = true; 200 continue; 201 } 202 203 if (BC.MIB->isCall(Inst)) 204 IsLeaf = false; 205 206 // Push/pop instructions are straightforward to handle. 207 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 208 continue; 209 210 DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) || 211 BC.MIB->hasUseOfPhysReg(Inst, SPReg); 212 } 213 } 214 } 215 216 if (HasCFI) { 217 if (!opts::InlineIgnoreLeafCFI) 218 return INL_NONE; 219 220 if (!IsLeaf && !opts::InlineIgnoreCFI) 221 return INL_NONE; 222 } 223 224 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY); 225 226 size_t Size = BF.estimateSize(); 227 228 Info.SizeAfterInlining = Size; 229 Info.SizeAfterTailCallInlining = Size; 230 231 // Handle special case of the known size reduction. 232 if (BF.size() == 1) { 233 // For a regular call the last return instruction could be removed 234 // (or converted to a branch). 235 const MCInst *LastInst = BF.back().getLastNonPseudoInstr(); 236 if (LastInst && BC.MIB->isReturn(*LastInst) && 237 !BC.MIB->isTailCall(*LastInst)) { 238 const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst); 239 assert(Size >= RetInstSize); 240 Info.SizeAfterInlining -= RetInstSize; 241 } 242 } 243 244 return Info; 245 } 246 247 void Inliner::findInliningCandidates(BinaryContext &BC) { 248 for (const auto &BFI : BC.getBinaryFunctions()) { 249 const BinaryFunction &Function = BFI.second; 250 if (!shouldOptimize(Function)) 251 continue; 252 const InliningInfo InlInfo = getInliningInfo(Function); 253 if (InlInfo.Type != INL_NONE) 254 InliningCandidates[&Function] = InlInfo; 255 } 256 } 257 258 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator> 259 Inliner::inlineCall(BinaryBasicBlock &CallerBB, 260 BinaryBasicBlock::iterator CallInst, 261 const BinaryFunction &Callee) { 262 BinaryFunction &CallerFunction = *CallerBB.getFunction(); 263 BinaryContext &BC = CallerFunction.getBinaryContext(); 264 auto &MIB = *BC.MIB; 265 266 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call"); 267 assert(!Callee.isMultiEntry() && 268 "cannot inline function with multiple entries"); 269 assert(!Callee.hasJumpTables() && 270 "cannot inline function with jump table(s)"); 271 272 // Get information about the call site. 273 const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst); 274 const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst); 275 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst); 276 const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst); 277 278 // Split basic block at the call site if there will be more incoming edges 279 // coming from the callee. 280 BinaryBasicBlock *FirstInlinedBB = &CallerBB; 281 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) { 282 FirstInlinedBB = CallerBB.splitAt(CallInst); 283 CallInst = FirstInlinedBB->begin(); 284 } 285 286 // Split basic block after the call instruction unless the callee is trivial 287 // (i.e. consists of a single basic block). If necessary, obtain a basic block 288 // for return instructions in the callee to redirect to. 289 BinaryBasicBlock *NextBB = nullptr; 290 if (Callee.size() > 1) { 291 if (std::next(CallInst) != FirstInlinedBB->end()) 292 NextBB = FirstInlinedBB->splitAt(std::next(CallInst)); 293 else 294 NextBB = FirstInlinedBB->getSuccessor(); 295 } 296 if (NextBB) 297 FirstInlinedBB->removeSuccessor(NextBB); 298 299 // Remove the call instruction. 300 auto InsertII = FirstInlinedBB->eraseInstruction(CallInst); 301 302 double ProfileRatio = 0; 303 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount()) 304 ProfileRatio = 305 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount; 306 307 // Save execution count of the first block as we don't want it to change 308 // later due to profile adjustment rounding errors. 309 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount(); 310 311 // Copy basic blocks and maintain a map from their origin. 312 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap; 313 InlinedBBMap[&Callee.front()] = FirstInlinedBB; 314 for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) { 315 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(0); 316 InlinedBBMap[&*BBI] = InlinedBB; 317 InlinedBB->setCFIState(FirstInlinedBB->getCFIState()); 318 if (Callee.hasValidProfile()) 319 InlinedBB->setExecutionCount(BBI->getKnownExecutionCount()); 320 else 321 InlinedBB->setExecutionCount(FirstInlinedBBCount); 322 } 323 324 // Copy over instructions and edges. 325 for (const BinaryBasicBlock &BB : Callee) { 326 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB]; 327 328 if (InlinedBB != FirstInlinedBB) 329 InsertII = InlinedBB->begin(); 330 331 // Copy over instructions making any necessary mods. 332 for (MCInst Inst : BB) { 333 if (MIB.isPseudo(Inst)) 334 continue; 335 336 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86()); 337 338 // Fix branch target. Strictly speaking, we don't have to do this as 339 // targets of direct branches will be fixed later and don't matter 340 // in the CFG state. However, disassembly may look misleading, and 341 // hence we do the fixing. 342 if (MIB.isBranch(Inst)) { 343 assert(!MIB.isIndirectBranch(Inst) && 344 "unexpected indirect branch in callee"); 345 const BinaryBasicBlock *TargetBB = 346 Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); 347 assert(TargetBB && "cannot find target block in callee"); 348 MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(), 349 BC.Ctx.get()); 350 } 351 352 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { 353 InsertII = 354 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 355 continue; 356 } 357 358 // Handle special instructions for a non-tail call site. 359 if (!MIB.isCall(Inst)) { 360 // Returns are removed. 361 break; 362 } 363 364 MIB.convertTailCallToCall(Inst); 365 366 // Propagate EH-related info to call instructions. 367 if (CSIsInvoke) { 368 MIB.addEHInfo(Inst, *CSEHInfo); 369 if (CSGNUArgsSize >= 0) 370 MIB.addGnuArgsSize(Inst, CSGNUArgsSize); 371 } 372 373 InsertII = 374 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 375 } 376 377 // Add CFG edges to the basic blocks of the inlined instance. 378 std::vector<BinaryBasicBlock *> Successors(BB.succ_size()); 379 std::transform(BB.succ_begin(), BB.succ_end(), Successors.begin(), 380 [&InlinedBBMap](const BinaryBasicBlock *BB) { 381 return InlinedBBMap.at(BB); 382 }); 383 384 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile()) 385 InlinedBB->addSuccessors(Successors.begin(), Successors.end(), 386 BB.branch_info_begin(), BB.branch_info_end()); 387 else 388 InlinedBB->addSuccessors(Successors.begin(), Successors.end()); 389 390 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) { 391 // Either it's a return block or the last instruction never returns. 392 InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount()); 393 } 394 395 // Scale profiling info for blocks and edges after inlining. 396 if (CallerFunction.hasValidProfile() && Callee.size() > 1) { 397 if (opts::AdjustProfile) 398 InlinedBB->adjustExecutionCount(ProfileRatio); 399 else 400 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() * 401 ProfileRatio); 402 } 403 } 404 405 // Restore the original execution count of the first inlined basic block. 406 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount); 407 408 CallerFunction.recomputeLandingPads(); 409 410 if (NextBB) 411 return std::make_pair(NextBB, NextBB->begin()); 412 413 if (Callee.size() == 1) 414 return std::make_pair(FirstInlinedBB, InsertII); 415 416 return std::make_pair(FirstInlinedBB, FirstInlinedBB->end()); 417 } 418 419 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { 420 BinaryContext &BC = Function.getBinaryContext(); 421 std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(), 422 Function.layout().end()); 423 std::sort(Blocks.begin(), Blocks.end(), 424 [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { 425 return BB1->getKnownExecutionCount() > 426 BB2->getKnownExecutionCount(); 427 }); 428 429 bool DidInlining = false; 430 for (BinaryBasicBlock *BB : Blocks) { 431 for (auto InstIt = BB->begin(); InstIt != BB->end();) { 432 MCInst &Inst = *InstIt; 433 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 || 434 !Inst.getOperand(0).isExpr()) { 435 ++InstIt; 436 continue; 437 } 438 439 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); 440 assert(TargetSymbol && "target symbol expected for direct call"); 441 442 // Don't inline calls to a secondary entry point in a target function. 443 uint64_t EntryID = 0; 444 BinaryFunction *TargetFunction = 445 BC.getFunctionForSymbol(TargetSymbol, &EntryID); 446 if (!TargetFunction || EntryID != 0) { 447 ++InstIt; 448 continue; 449 } 450 451 // Don't do recursive inlining. 452 if (TargetFunction == &Function) { 453 ++InstIt; 454 continue; 455 } 456 457 auto IInfo = InliningCandidates.find(TargetFunction); 458 if (IInfo == InliningCandidates.end()) { 459 ++InstIt; 460 continue; 461 } 462 463 const bool IsTailCall = BC.MIB->isTailCall(Inst); 464 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) { 465 ++InstIt; 466 continue; 467 } 468 469 int64_t SizeAfterInlining; 470 if (IsTailCall) 471 SizeAfterInlining = 472 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC); 473 else 474 SizeAfterInlining = 475 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC); 476 477 if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) { 478 if (!opts::InlineSmallFunctions || 479 SizeAfterInlining > opts::InlineSmallFunctionsBytes) { 480 ++InstIt; 481 continue; 482 } 483 } 484 485 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction 486 << " in " << Function << " : " << BB->getName() 487 << ". Count: " << BB->getKnownExecutionCount() 488 << ". Size change: " << SizeAfterInlining 489 << " bytes.\n"); 490 491 std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); 492 493 DidInlining = true; 494 TotalInlinedBytes += SizeAfterInlining; 495 496 ++NumInlinedCallSites; 497 NumInlinedDynamicCalls += BB->getExecutionCount(); 498 499 // Subtract basic block execution count from the callee execution count. 500 if (opts::AdjustProfile) 501 TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount()); 502 503 // Check if the caller inlining status has to be adjusted. 504 if (IInfo->second.Type == INL_TAILCALL) { 505 auto CallerIInfo = InliningCandidates.find(&Function); 506 if (CallerIInfo != InliningCandidates.end() && 507 CallerIInfo->second.Type == INL_ANY) { 508 LLVM_DEBUG(dbgs() << "adjusting inlining status for function " 509 << Function << '\n'); 510 CallerIInfo->second.Type = INL_TAILCALL; 511 } 512 } 513 514 if (NumInlinedCallSites == opts::InlineLimit) 515 return true; 516 } 517 } 518 519 return DidInlining; 520 } 521 522 void Inliner::runOnFunctions(BinaryContext &BC) { 523 opts::syncOptions(); 524 525 if (!opts::inliningEnabled()) 526 return; 527 528 bool InlinedOnce; 529 unsigned NumIters = 0; 530 do { 531 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 532 break; 533 534 InlinedOnce = false; 535 536 InliningCandidates.clear(); 537 findInliningCandidates(BC); 538 539 std::vector<BinaryFunction *> ConsideredFunctions; 540 for (auto &BFI : BC.getBinaryFunctions()) { 541 BinaryFunction &Function = BFI.second; 542 if (!shouldOptimize(Function)) 543 continue; 544 ConsideredFunctions.push_back(&Function); 545 } 546 std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(), 547 [](const BinaryFunction *A, const BinaryFunction *B) { 548 return B->getKnownExecutionCount() < 549 A->getKnownExecutionCount(); 550 }); 551 for (BinaryFunction *Function : ConsideredFunctions) { 552 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 553 break; 554 555 const bool DidInline = inlineCallsInFunction(*Function); 556 557 if (DidInline) 558 Modified.insert(Function); 559 560 InlinedOnce |= DidInline; 561 } 562 563 ++NumIters; 564 } while (InlinedOnce && NumIters < opts::InlineMaxIters); 565 566 if (NumInlinedCallSites) 567 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at " 568 << NumInlinedCallSites << " call sites in " << NumIters 569 << " iteration(s). Change in binary size: " << TotalInlinedBytes 570 << " bytes.\n"; 571 } 572 573 } // namespace bolt 574 } // namespace llvm 575