1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FrameOptimizer.h" 16 #include "bolt/Passes/IdenticalCodeFolding.h" 17 #include "bolt/Passes/IndirectCallPromotion.h" 18 #include "bolt/Passes/Inliner.h" 19 #include "bolt/Passes/Instrumentation.h" 20 #include "bolt/Passes/JTFootprintReduction.h" 21 #include "bolt/Passes/LongJmp.h" 22 #include "bolt/Passes/LoopInversionPass.h" 23 #include "bolt/Passes/PLTCall.h" 24 #include "bolt/Passes/PatchEntries.h" 25 #include "bolt/Passes/RegReAssign.h" 26 #include "bolt/Passes/ReorderData.h" 27 #include "bolt/Passes/ReorderFunctions.h" 28 #include "bolt/Passes/RetpolineInsertion.h" 29 #include "bolt/Passes/SplitFunctions.h" 30 #include "bolt/Passes/StokeInfo.h" 31 #include "bolt/Passes/TailDuplication.h" 32 #include "bolt/Passes/ThreeWayBranch.h" 33 #include "bolt/Passes/ValidateInternalCalls.h" 34 #include "bolt/Passes/VeneerElimination.h" 35 #include "bolt/Utils/CommandLineOpts.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/Timer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <memory> 40 #include <numeric> 41 42 using namespace llvm; 43 44 namespace opts { 45 46 extern cl::opt<bool> PrintAll; 47 extern cl::opt<bool> PrintDynoStats; 48 extern cl::opt<bool> DumpDotAll; 49 extern cl::opt<std::string> AsmDump; 50 extern cl::opt<bolt::PLTCall::OptType> PLT; 51 52 static cl::opt<bool> 53 DynoStatsAll("dyno-stats-all", 54 cl::desc("print dyno stats after each stage"), 55 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 EliminateUnreachable("eliminate-unreachable", 59 cl::desc("eliminate unreachable code"), 60 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 61 62 cl::opt<bool> 63 ICF("icf", 64 cl::desc("fold functions with identical code"), 65 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 66 67 static cl::opt<bool> 68 JTFootprintReductionFlag("jt-footprint-reduction", 69 cl::desc("make jump tables size smaller at the cost of using more " 70 "instructions at jump sites"), 71 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 72 73 cl::opt<bool> 74 NeverPrint("never-print", 75 cl::desc("never print"), 76 cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 77 78 cl::opt<bool> 79 PrintAfterBranchFixup("print-after-branch-fixup", 80 cl::desc("print function after fixing local branches"), 81 cl::Hidden, cl::cat(BoltOptCategory)); 82 83 static cl::opt<bool> 84 PrintAfterLowering("print-after-lowering", 85 cl::desc("print function after instruction lowering"), 86 cl::Hidden, cl::cat(BoltOptCategory)); 87 88 cl::opt<bool> 89 PrintFinalized("print-finalized", 90 cl::desc("print function after CFG is finalized"), 91 cl::Hidden, cl::cat(BoltOptCategory)); 92 93 static cl::opt<bool> 94 PrintFOP("print-fop", 95 cl::desc("print functions after frame optimizer pass"), 96 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> 99 PrintICF("print-icf", 100 cl::desc("print functions after ICF optimization"), 101 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 102 103 static cl::opt<bool> 104 PrintICP("print-icp", 105 cl::desc("print functions after indirect call promotion"), 106 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 107 108 static cl::opt<bool> 109 PrintInline("print-inline", 110 cl::desc("print functions after inlining optimization"), 111 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 112 113 static cl::opt<bool> 114 PrintJTFootprintReduction("print-after-jt-footprint-reduction", 115 cl::desc("print function after jt-footprint-reduction pass"), 116 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 117 118 static cl::opt<bool> 119 PrintLongJmp("print-longjmp", 120 cl::desc("print functions after longjmp pass"), 121 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 122 123 cl::opt<bool> 124 PrintNormalized("print-normalized", 125 cl::desc("print functions after CFG is normalized"), 126 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 127 128 static cl::opt<bool> 129 PrintOptimizeBodyless("print-optimize-bodyless", 130 cl::desc("print functions after bodyless optimization"), 131 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 132 133 static cl::opt<bool> 134 PrintPeepholes("print-peepholes", 135 cl::desc("print functions after peephole optimization"), 136 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 137 138 static cl::opt<bool> 139 PrintPLT("print-plt", 140 cl::desc("print functions after PLT optimization"), 141 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 142 143 static cl::opt<bool> 144 PrintProfileStats("print-profile-stats", 145 cl::desc("print profile quality/bias analysis"), 146 cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory)); 147 148 static cl::opt<bool> 149 PrintRegReAssign("print-regreassign", 150 cl::desc("print functions after regreassign pass"), 151 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 152 153 cl::opt<bool> 154 PrintReordered("print-reordered", 155 cl::desc("print functions after layout optimization"), 156 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 157 158 static cl::opt<bool> 159 PrintReorderedFunctions("print-reordered-functions", 160 cl::desc("print functions after clustering"), 161 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 162 163 static cl::opt<bool> 164 PrintRetpolineInsertion("print-retpoline-insertion", 165 cl::desc("print functions after retpoline insertion pass"), 166 cl::init(false), cl::ZeroOrMore, cl::cat(BoltCategory)); 167 168 static cl::opt<bool> 169 PrintSCTC("print-sctc", 170 cl::desc("print functions after conditional tail call simplification"), 171 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 172 173 static cl::opt<bool> 174 PrintSimplifyROLoads("print-simplify-rodata-loads", 175 cl::desc("print functions after simplification of RO data loads"), 176 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 177 178 static cl::opt<bool> 179 PrintSplit("print-split", 180 cl::desc("print functions after code splitting"), 181 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 182 183 static cl::opt<bool> 184 PrintStoke("print-stoke", 185 cl::desc("print functions after stoke analysis"), 186 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 187 188 static cl::opt<bool> 189 PrintVeneerElimination("print-veneer-elimination", 190 cl::desc("print functions after veneer elimination pass"), 191 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 192 193 static cl::opt<bool> 194 PrintUCE("print-uce", 195 cl::desc("print functions after unreachable code elimination"), 196 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 197 198 static cl::opt<bool> 199 RegReAssign("reg-reassign", 200 cl::desc("reassign registers so as to avoid using REX prefixes in hot code"), 201 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 202 203 static cl::opt<bool> 204 SimplifyConditionalTailCalls("simplify-conditional-tail-calls", 205 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 206 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 207 208 static cl::opt<bool> 209 SimplifyRODataLoads("simplify-rodata-loads", 210 cl::desc("simplify loads from read-only sections by replacing the memory " 211 "operand with the constant found in the corresponding section"), 212 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 213 214 static cl::list<std::string> 215 SpecializeMemcpy1("memcpy1-spec", 216 cl::desc("list of functions with call sites for which to specialize memcpy() " 217 "for size 1"), 218 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 219 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 220 221 static cl::opt<bool> 222 Stoke("stoke", 223 cl::desc("turn on the stoke analysis"), 224 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 225 226 static cl::opt<bool> 227 StringOps("inline-memcpy", 228 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 229 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 230 231 static cl::opt<bool> 232 StripRepRet("strip-rep-ret", 233 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 234 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 235 236 static cl::opt<bool> 237 VerifyCFG("verify-cfg", 238 cl::desc("verify the CFG after every pass"), 239 cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); 240 241 static cl::opt<bool> 242 TailDuplicationFlag("tail-duplication", 243 cl::desc("duplicate unconditional branches that cross a cache line"), 244 cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 245 246 static cl::opt<bool> 247 ThreeWayBranchFlag("three-way-branch", 248 cl::desc("reorder three way branches"), 249 cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 250 251 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 252 cl::desc("fold jcc+mov into cmov"), 253 cl::ZeroOrMore, cl::ReallyHidden, 254 cl::cat(BoltOptCategory)); 255 256 } // namespace opts 257 258 namespace llvm { 259 namespace bolt { 260 261 using namespace opts; 262 263 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 264 const char BinaryFunctionPassManager::TimerGroupDesc[] = 265 "Binary Function Pass Manager"; 266 267 void BinaryFunctionPassManager::runPasses() { 268 auto &BFs = BC.getBinaryFunctions(); 269 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 270 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 271 &OptPassPair = Passes[PassIdx]; 272 if (!OptPassPair.first) 273 continue; 274 275 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 276 std::string PassIdName = 277 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 278 279 if (opts::Verbosity > 0) 280 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 281 282 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 283 TimerGroupDesc, TimeOpts); 284 285 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, 286 Pass->getName(), opts::DynoStatsAll); 287 288 if (opts::VerifyCFG && 289 !std::accumulate( 290 BFs.begin(), BFs.end(), true, 291 [](const bool Valid, 292 const std::pair<const uint64_t, BinaryFunction> &It) { 293 return Valid && It.second.validateCFG(); 294 })) { 295 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 296 << Pass->getName() << "\n"; 297 exit(1); 298 } 299 300 if (opts::Verbosity > 0) 301 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 302 303 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 304 continue; 305 306 const std::string Message = std::string("after ") + Pass->getName(); 307 308 for (auto &It : BFs) { 309 BinaryFunction &Function = It.second; 310 311 if (!Pass->shouldPrint(Function)) 312 continue; 313 314 Function.print(outs(), Message, true); 315 316 if (opts::DumpDotAll) 317 Function.dumpGraphForPass(PassIdName); 318 } 319 } 320 } 321 322 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 323 BinaryFunctionPassManager Manager(BC); 324 325 const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions()); 326 327 Manager.registerPass(std::make_unique<AsmDumpPass>(), 328 opts::AsmDump.getNumOccurrences()); 329 330 if (opts::Instrument) 331 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 332 333 // Here we manage dependencies/order manually, since passes are run in the 334 // order they're registered. 335 336 // Run this pass first to use stats for the original functions. 337 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 338 339 if (opts::PrintProfileStats) 340 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 341 342 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 343 344 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 345 346 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); 347 348 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 349 350 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 351 opts::StripRepRet); 352 353 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 354 opts::ICF); 355 356 if (BC.isAArch64()) 357 Manager.registerPass( 358 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 359 360 Manager.registerPass( 361 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 362 !opts::SpecializeMemcpy1.empty()); 363 364 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 365 opts::StringOps); 366 367 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 368 369 Manager.registerPass( 370 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 371 opts::JTFootprintReductionFlag); 372 373 Manager.registerPass( 374 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 375 opts::SimplifyRODataLoads); 376 377 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 378 opts::RegReAssign); 379 380 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 381 382 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 383 opts::ICF); 384 385 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 386 387 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 388 opts::ThreeWayBranchFlag); 389 390 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 391 392 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 393 opts::EliminateUnreachable); 394 395 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 396 397 Manager.registerPass(std::make_unique<LoopInversionPass>()); 398 399 Manager.registerPass(std::make_unique<TailDuplication>(), 400 opts::TailDuplicationFlag); 401 402 Manager.registerPass(std::make_unique<CMOVConversion>(), 403 opts::CMOVConversionFlag); 404 405 // This pass syncs local branches with CFG. If any of the following 406 // passes breaks the sync - they either need to re-run the pass or 407 // fix branches consistency internally. 408 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 409 410 // This pass should come close to last since it uses the estimated hot 411 // size of a function to determine the order. It should definitely 412 // also happen after any changes to the call graph are made, e.g. inlining. 413 Manager.registerPass( 414 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 415 416 // Print final dyno stats right while CFG and instruction analysis are intact. 417 Manager.registerPass( 418 std::make_unique<DynoStatsPrintPass>( 419 InitialDynoStats, "after all optimizations before SCTC and FOP"), 420 opts::PrintDynoStats || opts::DynoStatsAll); 421 422 // Add the StokeInfo pass, which extract functions for stoke optimization and 423 // get the liveness information for them 424 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 425 426 // This pass introduces conditional jumps into external functions. 427 // Between extending CFG to support this and isolating this pass we chose 428 // the latter. Thus this pass will do double jump removal and unreachable 429 // code elimination if necessary and won't rely on peepholes/UCE for these 430 // optimizations. 431 // More generally this pass should be the last optimization pass that 432 // modifies branches/control flow. This pass is run after function 433 // reordering so that it can tell whether calls are forward/backward 434 // accurately. 435 Manager.registerPass( 436 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 437 opts::SimplifyConditionalTailCalls); 438 439 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 440 441 Manager.registerPass(std::make_unique<AlignerPass>()); 442 443 // Perform reordering on data contained in one or more sections using 444 // memory profiling data. 445 Manager.registerPass(std::make_unique<ReorderData>()); 446 447 if (BC.isAArch64()) { 448 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 449 450 // Tighten branches according to offset differences between branch and 451 // targets. No extra instructions after this pass, otherwise we may have 452 // relocations out of range and crash during linking. 453 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 454 } 455 456 // This pass should always run last.* 457 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 458 459 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 460 // FrameOptimizer move values around and needs to update CFIs. To do this, it 461 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 462 // placed according to the final layout. 463 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 464 465 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 466 467 Manager.registerPass( 468 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 469 470 // Assign each function an output section. 471 Manager.registerPass(std::make_unique<AssignSections>()); 472 473 // Patch original function entries 474 if (BC.HasRelocations) 475 Manager.registerPass(std::make_unique<PatchEntries>()); 476 477 // This pass turns tail calls into jumps which makes them invisible to 478 // function reordering. It's unsafe to use any CFG or instruction analysis 479 // after this point. 480 Manager.registerPass( 481 std::make_unique<InstructionLowering>(PrintAfterLowering)); 482 483 // In non-relocation mode, mark functions that do not fit into their original 484 // space as non-simple if we have to (e.g. for correct debug info update). 485 // NOTE: this pass depends on finalized code. 486 if (!BC.HasRelocations) 487 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 488 489 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 490 491 Manager.runPasses(); 492 } 493 494 } // namespace bolt 495 } // namespace llvm 496