1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Inliner class used for inlining binary functions. 10 // 11 // The current inliner has a limited callee support 12 // (see Inliner::getInliningInfo() for the most up-to-date details): 13 // 14 // * No exception handling 15 // * No jump tables 16 // * Single entry point 17 // * CFI update not supported - breaks unwinding 18 // * Regular Call Sites: 19 // - only leaf functions (or callees with only tail calls) 20 // * no invokes (they can't be tail calls) 21 // - no direct use of %rsp 22 // * Tail Call Sites: 23 // - since the stack is unmodified, the regular call limitations are lifted 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "bolt/Passes/Inliner.h" 28 #include "bolt/Core/MCPlus.h" 29 #include "llvm/Support/CommandLine.h" 30 #include <map> 31 32 #define DEBUG_TYPE "bolt-inliner" 33 34 using namespace llvm; 35 36 namespace opts { 37 38 extern cl::OptionCategory BoltOptCategory; 39 40 static cl::opt<bool> 41 AdjustProfile("inline-ap", 42 cl::desc("adjust function profile after inlining"), 43 cl::cat(BoltOptCategory)); 44 45 static cl::list<std::string> 46 ForceInlineFunctions("force-inline", 47 cl::CommaSeparated, 48 cl::desc("list of functions to always consider for inlining"), 49 cl::value_desc("func1,func2,func3,..."), 50 cl::Hidden, 51 cl::cat(BoltOptCategory)); 52 53 static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"), 54 cl::cat(BoltOptCategory)); 55 56 static cl::opt<bool> InlineIgnoreLeafCFI( 57 "inline-ignore-leaf-cfi", 58 cl::desc("inline leaf functions with CFI programs (can break unwinding)"), 59 cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); 60 61 static cl::opt<bool> InlineIgnoreCFI( 62 "inline-ignore-cfi", 63 cl::desc( 64 "inline functions with CFI programs (can break exception handling)"), 65 cl::ReallyHidden, cl::cat(BoltOptCategory)); 66 67 static cl::opt<unsigned> 68 InlineLimit("inline-limit", 69 cl::desc("maximum number of call sites to inline"), cl::init(0), 70 cl::Hidden, cl::cat(BoltOptCategory)); 71 72 static cl::opt<unsigned> 73 InlineMaxIters("inline-max-iters", 74 cl::desc("maximum number of inline iterations"), cl::init(3), 75 cl::Hidden, cl::cat(BoltOptCategory)); 76 77 static cl::opt<bool> InlineSmallFunctions( 78 "inline-small-functions", 79 cl::desc("inline functions if increase in size is less than defined by " 80 "-inline-small-functions-bytes"), 81 cl::cat(BoltOptCategory)); 82 83 static cl::opt<unsigned> InlineSmallFunctionsBytes( 84 "inline-small-functions-bytes", 85 cl::desc("max number of bytes for the function to be considered small for " 86 "inlining purposes"), 87 cl::init(4), cl::Hidden, cl::cat(BoltOptCategory)); 88 89 static cl::opt<bool> NoInline( 90 "no-inline", 91 cl::desc("disable all inlining (overrides other inlining options)"), 92 cl::cat(BoltOptCategory)); 93 94 /// This function returns true if any of inlining options are specified and the 95 /// inlining pass should be executed. Whenever a new inlining option is added, 96 /// this function should reflect the change. 97 bool inliningEnabled() { 98 return !NoInline && 99 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty()); 100 } 101 102 bool mustConsider(const llvm::bolt::BinaryFunction &Function) { 103 for (std::string &Name : opts::ForceInlineFunctions) 104 if (Function.hasName(Name)) 105 return true; 106 return false; 107 } 108 109 void syncOptions() { 110 if (opts::InlineIgnoreCFI) 111 opts::InlineIgnoreLeafCFI = true; 112 113 if (opts::InlineAll) 114 opts::InlineSmallFunctions = true; 115 } 116 117 } // namespace opts 118 119 namespace llvm { 120 namespace bolt { 121 122 uint64_t Inliner::SizeOfCallInst; 123 uint64_t Inliner::SizeOfTailCallInst; 124 125 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) { 126 if (SizeOfCallInst) 127 return SizeOfCallInst; 128 129 MCInst Inst; 130 BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 131 SizeOfCallInst = BC.computeInstructionSize(Inst); 132 133 return SizeOfCallInst; 134 } 135 136 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) { 137 if (SizeOfTailCallInst) 138 return SizeOfTailCallInst; 139 140 MCInst Inst; 141 BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get()); 142 SizeOfTailCallInst = BC.computeInstructionSize(Inst); 143 144 return SizeOfTailCallInst; 145 } 146 147 InliningInfo getInliningInfo(const BinaryFunction &BF) { 148 const BinaryContext &BC = BF.getBinaryContext(); 149 bool DirectSP = false; 150 bool HasCFI = false; 151 bool IsLeaf = true; 152 153 // Perform necessary checks unless the option overrides it. 154 if (!opts::mustConsider(BF)) { 155 if (BF.hasSDTMarker()) 156 return INL_NONE; 157 158 if (BF.hasEHRanges()) 159 return INL_NONE; 160 161 if (BF.isMultiEntry()) 162 return INL_NONE; 163 164 if (BF.hasJumpTables()) 165 return INL_NONE; 166 167 const MCPhysReg SPReg = BC.MIB->getStackPointer(); 168 for (const BinaryBasicBlock *BB : BF.layout()) { 169 for (const MCInst &Inst : *BB) { 170 // Tail calls are marked as implicitly using the stack pointer and they 171 // could be inlined. 172 if (BC.MIB->isTailCall(Inst)) 173 break; 174 175 if (BC.MIB->isCFI(Inst)) { 176 HasCFI = true; 177 continue; 178 } 179 180 if (BC.MIB->isCall(Inst)) 181 IsLeaf = false; 182 183 // Push/pop instructions are straightforward to handle. 184 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 185 continue; 186 187 DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) || 188 BC.MIB->hasUseOfPhysReg(Inst, SPReg); 189 } 190 } 191 } 192 193 if (HasCFI) { 194 if (!opts::InlineIgnoreLeafCFI) 195 return INL_NONE; 196 197 if (!IsLeaf && !opts::InlineIgnoreCFI) 198 return INL_NONE; 199 } 200 201 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY); 202 203 size_t Size = BF.estimateSize(); 204 205 Info.SizeAfterInlining = Size; 206 Info.SizeAfterTailCallInlining = Size; 207 208 // Handle special case of the known size reduction. 209 if (BF.size() == 1) { 210 // For a regular call the last return instruction could be removed 211 // (or converted to a branch). 212 const MCInst *LastInst = BF.back().getLastNonPseudoInstr(); 213 if (LastInst && BC.MIB->isReturn(*LastInst) && 214 !BC.MIB->isTailCall(*LastInst)) { 215 const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst); 216 assert(Size >= RetInstSize); 217 Info.SizeAfterInlining -= RetInstSize; 218 } 219 } 220 221 return Info; 222 } 223 224 void Inliner::findInliningCandidates(BinaryContext &BC) { 225 for (const auto &BFI : BC.getBinaryFunctions()) { 226 const BinaryFunction &Function = BFI.second; 227 if (!shouldOptimize(Function)) 228 continue; 229 const InliningInfo InlInfo = getInliningInfo(Function); 230 if (InlInfo.Type != INL_NONE) 231 InliningCandidates[&Function] = InlInfo; 232 } 233 } 234 235 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator> 236 Inliner::inlineCall(BinaryBasicBlock &CallerBB, 237 BinaryBasicBlock::iterator CallInst, 238 const BinaryFunction &Callee) { 239 BinaryFunction &CallerFunction = *CallerBB.getFunction(); 240 BinaryContext &BC = CallerFunction.getBinaryContext(); 241 auto &MIB = *BC.MIB; 242 243 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call"); 244 assert(!Callee.isMultiEntry() && 245 "cannot inline function with multiple entries"); 246 assert(!Callee.hasJumpTables() && 247 "cannot inline function with jump table(s)"); 248 249 // Get information about the call site. 250 const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst); 251 const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst); 252 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst); 253 const Optional<MCPlus::MCLandingPad> CSEHInfo = BC.MIB->getEHInfo(*CallInst); 254 255 // Split basic block at the call site if there will be more incoming edges 256 // coming from the callee. 257 BinaryBasicBlock *FirstInlinedBB = &CallerBB; 258 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) { 259 FirstInlinedBB = CallerBB.splitAt(CallInst); 260 CallInst = FirstInlinedBB->begin(); 261 } 262 263 // Split basic block after the call instruction unless the callee is trivial 264 // (i.e. consists of a single basic block). If necessary, obtain a basic block 265 // for return instructions in the callee to redirect to. 266 BinaryBasicBlock *NextBB = nullptr; 267 if (Callee.size() > 1) { 268 if (std::next(CallInst) != FirstInlinedBB->end()) 269 NextBB = FirstInlinedBB->splitAt(std::next(CallInst)); 270 else 271 NextBB = FirstInlinedBB->getSuccessor(); 272 } 273 if (NextBB) 274 FirstInlinedBB->removeSuccessor(NextBB); 275 276 // Remove the call instruction. 277 auto InsertII = FirstInlinedBB->eraseInstruction(CallInst); 278 279 double ProfileRatio = 0; 280 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount()) 281 ProfileRatio = 282 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount; 283 284 // Save execution count of the first block as we don't want it to change 285 // later due to profile adjustment rounding errors. 286 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount(); 287 288 // Copy basic blocks and maintain a map from their origin. 289 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap; 290 InlinedBBMap[&Callee.front()] = FirstInlinedBB; 291 for (auto BBI = std::next(Callee.begin()); BBI != Callee.end(); ++BBI) { 292 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(0); 293 InlinedBBMap[&*BBI] = InlinedBB; 294 InlinedBB->setCFIState(FirstInlinedBB->getCFIState()); 295 if (Callee.hasValidProfile()) 296 InlinedBB->setExecutionCount(BBI->getKnownExecutionCount()); 297 else 298 InlinedBB->setExecutionCount(FirstInlinedBBCount); 299 } 300 301 // Copy over instructions and edges. 302 for (const BinaryBasicBlock &BB : Callee) { 303 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB]; 304 305 if (InlinedBB != FirstInlinedBB) 306 InsertII = InlinedBB->begin(); 307 308 // Copy over instructions making any necessary mods. 309 for (MCInst Inst : BB) { 310 if (MIB.isPseudo(Inst)) 311 continue; 312 313 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86()); 314 315 // Fix branch target. Strictly speaking, we don't have to do this as 316 // targets of direct branches will be fixed later and don't matter 317 // in the CFG state. However, disassembly may look misleading, and 318 // hence we do the fixing. 319 if (MIB.isBranch(Inst)) { 320 assert(!MIB.isIndirectBranch(Inst) && 321 "unexpected indirect branch in callee"); 322 const BinaryBasicBlock *TargetBB = 323 Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); 324 assert(TargetBB && "cannot find target block in callee"); 325 MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(), 326 BC.Ctx.get()); 327 } 328 329 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { 330 InsertII = 331 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 332 continue; 333 } 334 335 // Handle special instructions for a non-tail call site. 336 if (!MIB.isCall(Inst)) { 337 // Returns are removed. 338 break; 339 } 340 341 MIB.convertTailCallToCall(Inst); 342 343 // Propagate EH-related info to call instructions. 344 if (CSIsInvoke) { 345 MIB.addEHInfo(Inst, *CSEHInfo); 346 if (CSGNUArgsSize >= 0) 347 MIB.addGnuArgsSize(Inst, CSGNUArgsSize); 348 } 349 350 InsertII = 351 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); 352 } 353 354 // Add CFG edges to the basic blocks of the inlined instance. 355 std::vector<BinaryBasicBlock *> Successors(BB.succ_size()); 356 std::transform(BB.succ_begin(), BB.succ_end(), Successors.begin(), 357 [&InlinedBBMap](const BinaryBasicBlock *BB) { 358 return InlinedBBMap.at(BB); 359 }); 360 361 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile()) 362 InlinedBB->addSuccessors(Successors.begin(), Successors.end(), 363 BB.branch_info_begin(), BB.branch_info_end()); 364 else 365 InlinedBB->addSuccessors(Successors.begin(), Successors.end()); 366 367 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) { 368 // Either it's a return block or the last instruction never returns. 369 InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount()); 370 } 371 372 // Scale profiling info for blocks and edges after inlining. 373 if (CallerFunction.hasValidProfile() && Callee.size() > 1) { 374 if (opts::AdjustProfile) 375 InlinedBB->adjustExecutionCount(ProfileRatio); 376 else 377 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() * 378 ProfileRatio); 379 } 380 } 381 382 // Restore the original execution count of the first inlined basic block. 383 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount); 384 385 CallerFunction.recomputeLandingPads(); 386 387 if (NextBB) 388 return std::make_pair(NextBB, NextBB->begin()); 389 390 if (Callee.size() == 1) 391 return std::make_pair(FirstInlinedBB, InsertII); 392 393 return std::make_pair(FirstInlinedBB, FirstInlinedBB->end()); 394 } 395 396 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { 397 BinaryContext &BC = Function.getBinaryContext(); 398 std::vector<BinaryBasicBlock *> Blocks(Function.layout().begin(), 399 Function.layout().end()); 400 std::sort(Blocks.begin(), Blocks.end(), 401 [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { 402 return BB1->getKnownExecutionCount() > 403 BB2->getKnownExecutionCount(); 404 }); 405 406 bool DidInlining = false; 407 for (BinaryBasicBlock *BB : Blocks) { 408 for (auto InstIt = BB->begin(); InstIt != BB->end();) { 409 MCInst &Inst = *InstIt; 410 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 || 411 !Inst.getOperand(0).isExpr()) { 412 ++InstIt; 413 continue; 414 } 415 416 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); 417 assert(TargetSymbol && "target symbol expected for direct call"); 418 419 // Don't inline calls to a secondary entry point in a target function. 420 uint64_t EntryID = 0; 421 BinaryFunction *TargetFunction = 422 BC.getFunctionForSymbol(TargetSymbol, &EntryID); 423 if (!TargetFunction || EntryID != 0) { 424 ++InstIt; 425 continue; 426 } 427 428 // Don't do recursive inlining. 429 if (TargetFunction == &Function) { 430 ++InstIt; 431 continue; 432 } 433 434 auto IInfo = InliningCandidates.find(TargetFunction); 435 if (IInfo == InliningCandidates.end()) { 436 ++InstIt; 437 continue; 438 } 439 440 const bool IsTailCall = BC.MIB->isTailCall(Inst); 441 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) { 442 ++InstIt; 443 continue; 444 } 445 446 int64_t SizeAfterInlining; 447 if (IsTailCall) 448 SizeAfterInlining = 449 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC); 450 else 451 SizeAfterInlining = 452 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC); 453 454 if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) { 455 if (!opts::InlineSmallFunctions || 456 SizeAfterInlining > opts::InlineSmallFunctionsBytes) { 457 ++InstIt; 458 continue; 459 } 460 } 461 462 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction 463 << " in " << Function << " : " << BB->getName() 464 << ". Count: " << BB->getKnownExecutionCount() 465 << ". Size change: " << SizeAfterInlining 466 << " bytes.\n"); 467 468 std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); 469 470 DidInlining = true; 471 TotalInlinedBytes += SizeAfterInlining; 472 473 ++NumInlinedCallSites; 474 NumInlinedDynamicCalls += BB->getExecutionCount(); 475 476 // Subtract basic block execution count from the callee execution count. 477 if (opts::AdjustProfile) 478 TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount()); 479 480 // Check if the caller inlining status has to be adjusted. 481 if (IInfo->second.Type == INL_TAILCALL) { 482 auto CallerIInfo = InliningCandidates.find(&Function); 483 if (CallerIInfo != InliningCandidates.end() && 484 CallerIInfo->second.Type == INL_ANY) { 485 LLVM_DEBUG(dbgs() << "adjusting inlining status for function " 486 << Function << '\n'); 487 CallerIInfo->second.Type = INL_TAILCALL; 488 } 489 } 490 491 if (NumInlinedCallSites == opts::InlineLimit) 492 return true; 493 } 494 } 495 496 return DidInlining; 497 } 498 499 void Inliner::runOnFunctions(BinaryContext &BC) { 500 opts::syncOptions(); 501 502 if (!opts::inliningEnabled()) 503 return; 504 505 bool InlinedOnce; 506 unsigned NumIters = 0; 507 do { 508 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 509 break; 510 511 InlinedOnce = false; 512 513 InliningCandidates.clear(); 514 findInliningCandidates(BC); 515 516 std::vector<BinaryFunction *> ConsideredFunctions; 517 for (auto &BFI : BC.getBinaryFunctions()) { 518 BinaryFunction &Function = BFI.second; 519 if (!shouldOptimize(Function)) 520 continue; 521 ConsideredFunctions.push_back(&Function); 522 } 523 std::sort(ConsideredFunctions.begin(), ConsideredFunctions.end(), 524 [](const BinaryFunction *A, const BinaryFunction *B) { 525 return B->getKnownExecutionCount() < 526 A->getKnownExecutionCount(); 527 }); 528 for (BinaryFunction *Function : ConsideredFunctions) { 529 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) 530 break; 531 532 const bool DidInline = inlineCallsInFunction(*Function); 533 534 if (DidInline) 535 Modified.insert(Function); 536 537 InlinedOnce |= DidInline; 538 } 539 540 ++NumIters; 541 } while (InlinedOnce && NumIters < opts::InlineMaxIters); 542 543 if (NumInlinedCallSites) 544 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at " 545 << NumInlinedCallSites << " call sites in " << NumIters 546 << " iteration(s). Change in binary size: " << TotalInlinedBytes 547 << " bytes.\n"; 548 } 549 550 } // namespace bolt 551 } // namespace llvm 552