1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Inliner class used for inlining binary functions. 10 // 11 // The current inliner has a limited callee support 12 // (see Inliner::getInliningInfo() for the most up-to-date details): 13 // 14 // * No exception handling 15 // * No jump tables 16 // * Single entry point 17 // * CFI update not supported - breaks unwinding 18 // * Regular Call Sites: 19 // - only leaf functions (or callees with only tail calls) 20 // * no invokes (they can't be tail calls) 21 // - no direct use of %rsp 22 // * Tail Call Sites: 23 // - since the stack is unmodified, the regular call limitations are lifted 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "bolt/Passes/Inliner.h" 28 #include "bolt/Core/MCPlus.h" 29 #include "llvm/Support/CommandLine.h" 30 #include <map> 31 32 #define DEBUG_TYPE "bolt-inliner" 33 34 using namespace llvm; 35 36 namespace opts { 37 38 extern cl::OptionCategory BoltOptCategory; 39 40 static cl::opt<bool> 41 AdjustProfile("inline-ap", 42 cl::desc("adjust function profile after inlining"), 43 cl::ZeroOrMore, 44 cl::cat(BoltOptCategory)); 45 46 static cl::list<std::string> 47 ForceInlineFunctions("force-inline", 48 cl::CommaSeparated, 49 cl::desc("list of functions to always consider for inlining"), 50 cl::value_desc("func1,func2,func3,..."), 51 cl::Hidden, 52 cl::cat(BoltOptCategory)); 53 54 static cl::opt<bool> 55 InlineAll("inline-all", 56 cl::desc("inline all functions"), 57 cl::init(false), 58 cl::ZeroOrMore, 59 cl::cat(BoltOptCategory)); 60 61 static cl::opt<bool> 62 InlineIgnoreLeafCFI("inline-ignore-leaf-cfi", 63 cl::desc("inline leaf functions with CFI programs (can break unwinding)"), 64 cl::init(true), 65 cl::ZeroOrMore, 66 cl::ReallyHidden, 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<bool> 70 InlineIgnoreCFI("inline-ignore-cfi", 71 cl::desc("inline functions with CFI programs (can break exception handling)"), 72 cl::init(false), 73 cl::ZeroOrMore, 74 cl::ReallyHidden, 75 cl::cat(BoltOptCategory)); 76 77 static cl::opt<unsigned> 78 InlineLimit("inline-limit", 79 cl::desc("maximum number of call sites to inline"), 80 cl::init(0), 81 cl::ZeroOrMore, 82 cl::Hidden, 83 cl::cat(BoltOptCategory)); 84 85 static cl::opt<unsigned> 86 InlineMaxIters("inline-max-iters", 87 cl::desc("maximum number of inline iterations"), 88 cl::init(3), 89 cl::ZeroOrMore, 90 cl::Hidden, 91 cl::cat(BoltOptCategory)); 92 93 static cl::opt<bool> 94 InlineSmallFunctions("inline-small-functions", 95 cl::desc("inline functions if increase in size is less than defined by " 96 "-inline-small-functions-bytes"), 97 cl::init(false), 98 cl::ZeroOrMore, 99 cl::cat(BoltOptCategory)); 100 101 static cl::opt<unsigned> 102 InlineSmallFunctionsBytes("inline-small-functions-bytes", 103 cl::desc("max number of bytes for the function to be considered small for " 104 "inlining purposes"), 105 cl::init(4), 106 cl::ZeroOrMore, 107 cl::Hidden, 108 cl::cat(BoltOptCategory)); 109 110 static cl::opt<bool> 111 NoInline("no-inline", 112 cl::desc("disable all inlining (overrides other inlining options)"), 113 cl::init(false), 114 cl::ZeroOrMore, 115 cl::cat(BoltOptCategory)); 116 117 /// This function returns true if any of inlining options are specified and the 118 /// inlining pass should be executed. Whenever a new inlining option is added, 119 /// this function should reflect the change. 120 bool inliningEnabled() { 121 return !NoInline && 122 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty()); 123 } 124 125 bool mustConsider(const llvm::bolt::BinaryFunction &Function) { 126 for (std::string &Name : opts::ForceInlineFunctions) 127 if (Function.hasName(Name)) 128 return true; 129 return false; 130 } 131 132 void syncOptions() { 133 if (opts::InlineIgnoreCFI) 134 opts::InlineIgnoreLeafCFI = true; 135 136 if (opts::InlineAll) 137 opts::InlineSmallFunctions = true; 138 } 139 140 } // namespace opts 141 142 namespace llvm { 143 namespace bolt { 144 145 uint64_t Inliner::SizeOfCallInst; 146 uint64_t Inliner::SizeOfTailCallInst; 147 148 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) { 149 if (SizeOfCallInst) 150 return SizeOfCallInst; 151 152 MCInst Inst; 153 BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 154 SizeOfCallInst = BC.computeInstructionSize(Inst); 155 156 return SizeOfCallInst; 157 } 158 159 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) { 160 if (SizeOfTailCallInst) 161 return SizeOfTailCallInst; 162 163 MCInst Inst; 164 BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 165 SizeOfTailCallInst = BC.computeInstructionSize(Inst); 166 167 return SizeOfTailCallInst; 168 } 169 170 Inliner::InliningInfo Inliner::getInliningInfo(const BinaryFunction &BF) const { 171 if (!shouldOptimize(BF)) 172 return INL_NONE; 173 174 const BinaryContext &BC = BF.getBinaryContext(); 175 bool DirectSP = false; 176 bool HasCFI = false; 177 bool IsLeaf = true; 178 179 // Perform necessary checks unless the option overrides it. 180 if (!opts::mustConsider(BF)) { 181 if (BF.hasSDTMarker()) 182 return INL_NONE; 183 184 if (BF.hasEHRanges()) 185 return INL_NONE; 186 187 if (BF.isMultiEntry()) 188 return INL_NONE; 189 190 if (BF.hasJumpTables()) 191 return INL_NONE; 192 193 const MCPhysReg SPReg = BC.MIB->getStackPointer(); 194 for (const BinaryBasicBlock *BB : BF.layout()) { 195 for (const MCInst &Inst : *BB) { 196 // Tail calls are marked as implicitly using the stack pointer and they 197 // could be inlined. 198 if (BC.MIB->isTailCall(Inst)) 199 break; 200 201 if (BC.MIB->isCFI(Inst)) { 202 HasCFI = true; 203 continue; 204 } 205 206 if (BC.MIB->isCall(Inst)) 207 IsLeaf = false; 208 209 // Push/pop instructions are straightforward to handle. 210 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 211 continue; 212 213 DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) || 214 BC.MIB->hasUseOfPhysReg(Inst, SPReg); 215 } 216 } 217 } 218 219 if (HasCFI) { 220 if (!opts::InlineIgnoreLeafCFI) 221 return INL_NONE; 222 223 if (!IsLeaf && !opts::InlineIgnoreCFI) 224 return INL_NONE; 225 } 226 227 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY); 228 229 size_t Size = BF.estimateSize(); 230 231 Info.SizeAfterInlining = Size; 232 Info.SizeAfterTailCallInlining = Size; 233 234 // Handle special case of the known size reduction. 235 if (BF.size() == 1) { 236 // For a regular call the last return instruction could be removed 237 // (or converted to a branch). 238 const MCInst *LastInst = BF.back().getLastNonPseudoInstr(); 239 if (LastInst && BC.MIB->isReturn(*LastInst) && 240 !BC.MIB->isTailCall(*LastInst)) { 241 const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst); 242 assert(Size >= RetInstSize); 243 Info.SizeAfterInlining -= RetInstSize; 244 } 245 } 246 247 return Info; 248 } 249 250 void Inliner::findInliningCandidates(BinaryContext &BC) { 251 for (const auto &BFI : BC.getBinaryFunctions()) { 252 const BinaryFunction &Function = BFI.second; 253 const InliningInfo InlInfo = getInliningInfo(Function); 254 if (InlInfo.Type != INL_NONE) 255 InliningCandidates[&Function] = InlInfo; 256 } 257 } 258 259 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator> 260 Inliner::inlineCall(BinaryBasicBlock &CallerBB, 261 BinaryBasicBlock::iterator CallInst, 262 const BinaryFunction &Callee) { 263 BinaryFunction &CallerFunction = *CallerBB.getFunction(); 264 BinaryContext &BC = CallerFunction.getBinaryContext(); 265 auto &MIB = *BC.MIB; 266 267 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call"); 268 assert(!Callee.isMultiEntry() && 269 "cannot inline function with multiple entries"); 270 assert(!Callee.hasJumpTables() && 271 "cannot inline function with jump table(s)"); 272 273 // Get information about the call site. 274 const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst); 275 const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst); 276 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst); 277 const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst); 278 279 // Split basic block at the call site if there will be more incoming edges 280 // coming from the callee. 281 BinaryBasicBlock *FirstInlinedBB = &CallerBB; 282 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) { 283 FirstInlinedBB = CallerBB.splitAt(CallInst); 284 CallInst = FirstInlinedBB->begin(); 285 } 286 287 // Split basic block after the call instruction unless the callee is trivial 288 // (i.e. consists of a single basic block). If necessary, obtain a basic block 289 // for return instructions in the callee to redirect to. 290 BinaryBasicBlock *NextBB = nullptr; 291 if (Callee.size() > 1) { 292 if (std::next(CallInst) != FirstInlinedBB->end()) 293 NextBB = FirstInlinedBB->splitAt(std::next(CallInst)); 294 else 295 NextBB = FirstInlinedBB->getSuccessor(); 296 } 297 if (NextBB) 298 FirstInlinedBB->removeSuccessor(NextBB); 299 300 // Remove the call instruction. 301 auto InsertII = FirstInlinedBB->eraseInstruction(CallInst); 302 303 double ProfileRatio = 0; 304 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount()) 305 ProfileRatio = 306 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount; 307 308 // Save execution count of the first block as we don't want it to change 309 // later due to profile adjustment rounding errors. 310 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount(); 311 312 // Copy basic blocks and maintain a map from their origin. 313 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap; 314 InlinedBBMap[&Callee.front()] = FirstInlinedBB; 315 for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) { 316 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(0); 317 InlinedBBMap[&*BBI] = InlinedBB; 318 InlinedBB->setCFIState(FirstInlinedBB->getCFIState()); 319 if (Callee.hasValidProfile()) 320 InlinedBB->setExecutionCount(BBI->getKnownExecutionCount()); 321 else 322 InlinedBB->setExecutionCount(FirstInlinedBBCount); 323 } 324 325 // Copy over instructions and edges. 326 for (const BinaryBasicBlock &BB : Callee) { 327 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB]; 328 329 if (InlinedBB != FirstInlinedBB) 330 InsertII = InlinedBB->begin(); 331 332 // Copy over instructions making any necessary mods. 333 for (MCInst Inst : BB) { 334 if (MIB.isPseudo(Inst)) 335 continue; 336 337 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86()); 338 339 // Fix branch target. Strictly speaking, we don't have to do this as 340 // targets of direct branches will be fixed later and don't matter 341 // in the CFG state. However, disassembly may look misleading, and 342 // hence we do the fixing. 343 if (MIB.isBranch(Inst)) { 344 assert(!MIB.isIndirectBranch(Inst) && 345 "unexpected indirect branch in callee"); 346 const BinaryBasicBlock *TargetBB = 347 Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); 348 assert(TargetBB && "cannot find target block in callee"); 349 MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(), 350 BC.Ctx.get()); 351 } 352 353 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { 354 InsertII = 355 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 356 continue; 357 } 358 359 // Handle special instructions for a non-tail call site. 360 if (!MIB.isCall(Inst)) { 361 // Returns are removed. 362 break; 363 } 364 365 MIB.convertTailCallToCall(Inst); 366 367 // Propagate EH-related info to call instructions. 368 if (CSIsInvoke) { 369 MIB.addEHInfo(Inst, *CSEHInfo); 370 if (CSGNUArgsSize >= 0) 371 MIB.addGnuArgsSize(Inst, CSGNUArgsSize); 372 } 373 374 InsertII = 375 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 376 } 377 378 // Add CFG edges to the basic blocks of the inlined instance. 379 std::vector<BinaryBasicBlock *> Successors(BB.succ_size()); 380 std::transform(BB.succ_begin(), BB.succ_end(), Successors.begin(), 381 [&InlinedBBMap](const BinaryBasicBlock *BB) { 382 return InlinedBBMap.at(BB); 383 }); 384 385 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile()) 386 InlinedBB->addSuccessors(Successors.begin(), Successors.end(), 387 BB.branch_info_begin(), BB.branch_info_end()); 388 else 389 InlinedBB->addSuccessors(Successors.begin(), Successors.end()); 390 391 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) { 392 // Either it's a return block or the last instruction never returns. 393 InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount()); 394 } 395 396 // Scale profiling info for blocks and edges after inlining. 397 if (CallerFunction.hasValidProfile() && Callee.size() > 1) { 398 if (opts::AdjustProfile) 399 InlinedBB->adjustExecutionCount(ProfileRatio); 400 else 401 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() * 402 ProfileRatio); 403 } 404 } 405 406 // Restore the original execution count of the first inlined basic block. 407 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount); 408 409 CallerFunction.recomputeLandingPads(); 410 411 if (NextBB) 412 return std::make_pair(NextBB, NextBB->begin()); 413 414 if (Callee.size() == 1) 415 return std::make_pair(FirstInlinedBB, InsertII); 416 417 return std::make_pair(FirstInlinedBB, FirstInlinedBB->end()); 418 } 419 420 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { 421 BinaryContext &BC = Function.getBinaryContext(); 422 std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(), 423 Function.layout().end()); 424 std::sort(Blocks.begin(), Blocks.end(), 425 [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { 426 return BB1->getKnownExecutionCount() > 427 BB2->getKnownExecutionCount(); 428 }); 429 430 bool DidInlining = false; 431 for (BinaryBasicBlock *BB : Blocks) { 432 for (auto InstIt = BB->begin(); InstIt != BB->end();) { 433 MCInst &Inst = *InstIt; 434 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 || 435 !Inst.getOperand(0).isExpr()) { 436 ++InstIt; 437 continue; 438 } 439 440 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); 441 assert(TargetSymbol && "target symbol expected for direct call"); 442 443 // Don't inline calls to a secondary entry point in a target function. 444 uint64_t EntryID = 0; 445 BinaryFunction *TargetFunction = 446 BC.getFunctionForSymbol(TargetSymbol, &EntryID); 447 if (!TargetFunction || EntryID != 0) { 448 ++InstIt; 449 continue; 450 } 451 452 // Don't do recursive inlining. 453 if (TargetFunction == &Function) { 454 ++InstIt; 455 continue; 456 } 457 458 auto IInfo = InliningCandidates.find(TargetFunction); 459 if (IInfo == InliningCandidates.end()) { 460 ++InstIt; 461 continue; 462 } 463 464 const bool IsTailCall = BC.MIB->isTailCall(Inst); 465 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) { 466 ++InstIt; 467 continue; 468 } 469 470 int64_t SizeAfterInlining; 471 if (IsTailCall) 472 SizeAfterInlining = 473 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC); 474 else 475 SizeAfterInlining = 476 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC); 477 478 if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) { 479 if (!opts::InlineSmallFunctions || 480 SizeAfterInlining > opts::InlineSmallFunctionsBytes) { 481 ++InstIt; 482 continue; 483 } 484 } 485 486 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction 487 << " in " << Function << " : " << BB->getName() 488 << ". Count: " << BB->getKnownExecutionCount() 489 << ". Size change: " << SizeAfterInlining 490 << " bytes.\n"); 491 492 std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); 493 494 DidInlining = true; 495 TotalInlinedBytes += SizeAfterInlining; 496 497 ++NumInlinedCallSites; 498 NumInlinedDynamicCalls += BB->getExecutionCount(); 499 500 // Subtract basic block execution count from the callee execution count. 501 if (opts::AdjustProfile) 502 TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount()); 503 504 // Check if the caller inlining status has to be adjusted. 505 if (IInfo->second.Type == INL_TAILCALL) { 506 auto CallerIInfo = InliningCandidates.find(&Function); 507 if (CallerIInfo != InliningCandidates.end() && 508 CallerIInfo->second.Type == INL_ANY) { 509 LLVM_DEBUG(dbgs() << "adjusting inlining status for function " 510 << Function << '\n'); 511 CallerIInfo->second.Type = INL_TAILCALL; 512 } 513 } 514 515 if (NumInlinedCallSites == opts::InlineLimit) 516 return true; 517 } 518 } 519 520 return DidInlining; 521 } 522 523 void Inliner::runOnFunctions(BinaryContext &BC) { 524 opts::syncOptions(); 525 526 if (!opts::inliningEnabled()) 527 return; 528 529 uint64_t TotalSize = 0; 530 for (auto &BFI : BC.getBinaryFunctions()) 531 TotalSize += BFI.second.getSize(); 532 533 bool InlinedOnce; 534 unsigned NumIters = 0; 535 do { 536 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 537 break; 538 539 InlinedOnce = false; 540 541 InliningCandidates.clear(); 542 findInliningCandidates(BC); 543 544 std::vector<BinaryFunction *> ConsideredFunctions; 545 for (auto &BFI : BC.getBinaryFunctions()) { 546 BinaryFunction &Function = BFI.second; 547 if (!shouldOptimize(Function)) 548 continue; 549 ConsideredFunctions.push_back(&Function); 550 } 551 std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(), 552 [](const BinaryFunction *A, const BinaryFunction *B) { 553 return B->getKnownExecutionCount() < 554 A->getKnownExecutionCount(); 555 }); 556 for (BinaryFunction *Function : ConsideredFunctions) { 557 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 558 break; 559 560 const bool DidInline = inlineCallsInFunction(*Function); 561 562 if (DidInline) 563 Modified.insert(Function); 564 565 InlinedOnce |= DidInline; 566 } 567 568 ++NumIters; 569 } while (InlinedOnce && NumIters < opts::InlineMaxIters); 570 571 if (NumInlinedCallSites) 572 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at " 573 << NumInlinedCallSites << " call sites in " << NumIters 574 << " iteration(s). Change in binary size: " << TotalInlinedBytes 575 << " bytes.\n"; 576 } 577 578 } // namespace bolt 579 } // namespace llvm 580