1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/FrameOptimizer.h" 15 #include "bolt/Passes/IdenticalCodeFolding.h" 16 #include "bolt/Passes/IndirectCallPromotion.h" 17 #include "bolt/Passes/Inliner.h" 18 #include "bolt/Passes/Instrumentation.h" 19 #include "bolt/Passes/JTFootprintReduction.h" 20 #include "bolt/Passes/LongJmp.h" 21 #include "bolt/Passes/LoopInversionPass.h" 22 #include "bolt/Passes/PLTCall.h" 23 #include "bolt/Passes/PatchEntries.h" 24 #include "bolt/Passes/RegReAssign.h" 25 #include "bolt/Passes/ReorderData.h" 26 #include "bolt/Passes/ReorderFunctions.h" 27 #include "bolt/Passes/RetpolineInsertion.h" 28 #include "bolt/Passes/SplitFunctions.h" 29 #include "bolt/Passes/StokeInfo.h" 30 #include "bolt/Passes/TailDuplication.h" 31 #include "bolt/Passes/ThreeWayBranch.h" 32 #include "bolt/Passes/ValidateInternalCalls.h" 33 #include "bolt/Passes/VeneerElimination.h" 34 #include "bolt/Utils/CommandLineOpts.h" 35 #include "llvm/Support/FormatVariadic.h" 36 #include "llvm/Support/Timer.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <memory> 39 #include <numeric> 40 41 using namespace llvm; 42 43 namespace opts { 44 45 extern cl::opt<bool> PrintAll; 46 extern cl::opt<bool> PrintDynoStats; 47 extern cl::opt<bool> DumpDotAll; 48 extern cl::opt<std::string> AsmDump; 49 extern cl::opt<bolt::PLTCall::OptType> PLT; 50 51 static cl::opt<bool> 52 DynoStatsAll("dyno-stats-all", 53 cl::desc("print dyno stats after each stage"), 54 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 EliminateUnreachable("eliminate-unreachable", 58 cl::desc("eliminate unreachable code"), 59 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 60 61 cl::opt<bool> 62 ICF("icf", 63 cl::desc("fold functions with identical code"), 64 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 65 66 static cl::opt<bool> 67 JTFootprintReductionFlag("jt-footprint-reduction", 68 cl::desc("make jump tables size smaller at the cost of using more " 69 "instructions at jump sites"), 70 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 71 72 cl::opt<bool> 73 NeverPrint("never-print", 74 cl::desc("never print"), 75 cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 76 77 cl::opt<bool> 78 PrintAfterBranchFixup("print-after-branch-fixup", 79 cl::desc("print function after fixing local branches"), 80 cl::Hidden, cl::cat(BoltOptCategory)); 81 82 static cl::opt<bool> 83 PrintAfterLowering("print-after-lowering", 84 cl::desc("print function after instruction lowering"), 85 cl::Hidden, cl::cat(BoltOptCategory)); 86 87 cl::opt<bool> 88 PrintFinalized("print-finalized", 89 cl::desc("print function after CFG is finalized"), 90 cl::Hidden, cl::cat(BoltOptCategory)); 91 92 static cl::opt<bool> 93 PrintFOP("print-fop", 94 cl::desc("print functions after frame optimizer pass"), 95 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 96 97 static cl::opt<bool> 98 PrintICF("print-icf", 99 cl::desc("print functions after ICF optimization"), 100 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 101 102 static cl::opt<bool> 103 PrintICP("print-icp", 104 cl::desc("print functions after indirect call promotion"), 105 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 106 107 static cl::opt<bool> 108 PrintInline("print-inline", 109 cl::desc("print functions after inlining optimization"), 110 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 111 112 static cl::opt<bool> 113 PrintJTFootprintReduction("print-after-jt-footprint-reduction", 114 cl::desc("print function after jt-footprint-reduction pass"), 115 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 116 117 static cl::opt<bool> 118 PrintLongJmp("print-longjmp", 119 cl::desc("print functions after longjmp pass"), 120 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 121 122 cl::opt<bool> 123 PrintNormalized("print-normalized", 124 cl::desc("print functions after CFG is normalized"), 125 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 126 127 static cl::opt<bool> 128 PrintOptimizeBodyless("print-optimize-bodyless", 129 cl::desc("print functions after bodyless optimization"), 130 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 131 132 static cl::opt<bool> 133 PrintPeepholes("print-peepholes", 134 cl::desc("print functions after peephole optimization"), 135 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 136 137 static cl::opt<bool> 138 PrintPLT("print-plt", 139 cl::desc("print functions after PLT optimization"), 140 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 141 142 static cl::opt<bool> 143 PrintProfileStats("print-profile-stats", 144 cl::desc("print profile quality/bias analysis"), 145 cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory)); 146 147 static cl::opt<bool> 148 PrintRegReAssign("print-regreassign", 149 cl::desc("print functions after regreassign pass"), 150 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 151 152 cl::opt<bool> 153 PrintReordered("print-reordered", 154 cl::desc("print functions after layout optimization"), 155 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 156 157 static cl::opt<bool> 158 PrintReorderedFunctions("print-reordered-functions", 159 cl::desc("print functions after clustering"), 160 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 161 162 static cl::opt<bool> 163 PrintRetpolineInsertion("print-retpoline-insertion", 164 cl::desc("print functions after retpoline insertion pass"), 165 cl::init(false), cl::ZeroOrMore, cl::cat(BoltCategory)); 166 167 static cl::opt<bool> 168 PrintSCTC("print-sctc", 169 cl::desc("print functions after conditional tail call simplification"), 170 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 171 172 static cl::opt<bool> 173 PrintSimplifyROLoads("print-simplify-rodata-loads", 174 cl::desc("print functions after simplification of RO data loads"), 175 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 176 177 static cl::opt<bool> 178 PrintSplit("print-split", 179 cl::desc("print functions after code splitting"), 180 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 181 182 static cl::opt<bool> 183 PrintStoke("print-stoke", 184 cl::desc("print functions after stoke analysis"), 185 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 186 187 static cl::opt<bool> 188 PrintVeneerElimination("print-veneer-elimination", 189 cl::desc("print functions after veneer elimination pass"), 190 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 191 192 static cl::opt<bool> 193 PrintUCE("print-uce", 194 cl::desc("print functions after unreachable code elimination"), 195 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 196 197 static cl::opt<bool> 198 RegReAssign("reg-reassign", 199 cl::desc("reassign registers so as to avoid using REX prefixes in hot code"), 200 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 201 202 static cl::opt<bool> 203 SimplifyConditionalTailCalls("simplify-conditional-tail-calls", 204 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 205 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 206 207 static cl::opt<bool> 208 SimplifyRODataLoads("simplify-rodata-loads", 209 cl::desc("simplify loads from read-only sections by replacing the memory " 210 "operand with the constant found in the corresponding section"), 211 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 212 213 static cl::list<std::string> 214 SpecializeMemcpy1("memcpy1-spec", 215 cl::desc("list of functions with call sites for which to specialize memcpy() " 216 "for size 1"), 217 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 218 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 219 220 static cl::opt<bool> 221 Stoke("stoke", 222 cl::desc("turn on the stoke analysis"), 223 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 224 225 static cl::opt<bool> 226 StringOps("inline-memcpy", 227 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 228 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 229 230 static cl::opt<bool> 231 StripRepRet("strip-rep-ret", 232 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 233 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 234 235 static cl::opt<bool> 236 VerifyCFG("verify-cfg", 237 cl::desc("verify the CFG after every pass"), 238 cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); 239 240 static cl::opt<bool> 241 TailDuplicationFlag("tail-duplication", 242 cl::desc("duplicate unconditional branches that cross a cache line"), 243 cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 244 245 static cl::opt<bool> 246 ThreeWayBranchFlag("three-way-branch", 247 cl::desc("reorder three way branches"), 248 cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 249 250 } // namespace opts 251 252 namespace llvm { 253 namespace bolt { 254 255 using namespace opts; 256 257 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 258 const char BinaryFunctionPassManager::TimerGroupDesc[] = 259 "Binary Function Pass Manager"; 260 261 void BinaryFunctionPassManager::runPasses() { 262 auto &BFs = BC.getBinaryFunctions(); 263 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 264 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 265 &OptPassPair = Passes[PassIdx]; 266 if (!OptPassPair.first) 267 continue; 268 269 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 270 std::string PassIdName = 271 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 272 273 if (opts::Verbosity > 0) 274 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 275 276 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 277 TimerGroupDesc, TimeOpts); 278 279 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, 280 Pass->getName(), opts::DynoStatsAll); 281 282 if (opts::VerifyCFG && 283 !std::accumulate( 284 BFs.begin(), BFs.end(), true, 285 [](const bool Valid, 286 const std::pair<const uint64_t, BinaryFunction> &It) { 287 return Valid && It.second.validateCFG(); 288 })) { 289 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 290 << Pass->getName() << "\n"; 291 exit(1); 292 } 293 294 if (opts::Verbosity > 0) 295 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 296 297 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 298 continue; 299 300 const std::string Message = std::string("after ") + Pass->getName(); 301 302 for (auto &It : BFs) { 303 BinaryFunction &Function = It.second; 304 305 if (!Pass->shouldPrint(Function)) 306 continue; 307 308 Function.print(outs(), Message, true); 309 310 if (opts::DumpDotAll) 311 Function.dumpGraphForPass(PassIdName); 312 } 313 } 314 } 315 316 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 317 BinaryFunctionPassManager Manager(BC); 318 319 const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions()); 320 321 Manager.registerPass(std::make_unique<AsmDumpPass>(), 322 opts::AsmDump.getNumOccurrences()); 323 324 if (opts::Instrument) 325 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 326 327 // Here we manage dependencies/order manually, since passes are run in the 328 // order they're registered. 329 330 // Run this pass first to use stats for the original functions. 331 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 332 333 if (opts::PrintProfileStats) 334 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 335 336 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 337 338 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 339 340 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); 341 342 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 343 344 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 345 opts::StripRepRet); 346 347 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 348 opts::ICF); 349 350 if (BC.isAArch64()) 351 Manager.registerPass( 352 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 353 354 Manager.registerPass( 355 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 356 !opts::SpecializeMemcpy1.empty()); 357 358 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 359 opts::StringOps); 360 361 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 362 363 Manager.registerPass( 364 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 365 opts::JTFootprintReductionFlag); 366 367 Manager.registerPass( 368 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 369 opts::SimplifyRODataLoads); 370 371 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 372 opts::RegReAssign); 373 374 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 375 376 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 377 opts::ICF); 378 379 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 380 381 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 382 opts::ThreeWayBranchFlag); 383 384 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 385 386 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 387 opts::EliminateUnreachable); 388 389 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 390 391 Manager.registerPass(std::make_unique<LoopInversionPass>()); 392 393 Manager.registerPass(std::make_unique<TailDuplication>(), 394 opts::TailDuplicationFlag); 395 396 // This pass syncs local branches with CFG. If any of the following 397 // passes breaks the sync - they either need to re-run the pass or 398 // fix branches consistency internally. 399 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 400 401 // This pass should come close to last since it uses the estimated hot 402 // size of a function to determine the order. It should definitely 403 // also happen after any changes to the call graph are made, e.g. inlining. 404 Manager.registerPass( 405 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 406 407 // Print final dyno stats right while CFG and instruction analysis are intact. 408 Manager.registerPass( 409 std::make_unique<DynoStatsPrintPass>( 410 InitialDynoStats, "after all optimizations before SCTC and FOP"), 411 opts::PrintDynoStats | opts::DynoStatsAll); 412 413 // Add the StokeInfo pass, which extract functions for stoke optimization and 414 // get the liveness information for them 415 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 416 417 // This pass introduces conditional jumps into external functions. 418 // Between extending CFG to support this and isolating this pass we chose 419 // the latter. Thus this pass will do double jump removal and unreachable 420 // code elimination if necessary and won't rely on peepholes/UCE for these 421 // optimizations. 422 // More generally this pass should be the last optimization pass that 423 // modifies branches/control flow. This pass is run after function 424 // reordering so that it can tell whether calls are forward/backward 425 // accurately. 426 Manager.registerPass( 427 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 428 opts::SimplifyConditionalTailCalls); 429 430 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 431 432 Manager.registerPass(std::make_unique<AlignerPass>()); 433 434 // Perform reordering on data contained in one or more sections using 435 // memory profiling data. 436 Manager.registerPass(std::make_unique<ReorderData>()); 437 438 if (BC.isAArch64()) { 439 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 440 441 // Tighten branches according to offset differences between branch and 442 // targets. No extra instructions after this pass, otherwise we may have 443 // relocations out of range and crash during linking. 444 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 445 } 446 447 // This pass should always run last.* 448 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 449 450 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 451 // FrameOptimizer move values around and needs to update CFIs. To do this, it 452 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 453 // placed according to the final layout. 454 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 455 456 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 457 458 Manager.registerPass( 459 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 460 461 // Assign each function an output section. 462 Manager.registerPass(std::make_unique<AssignSections>()); 463 464 // Patch original function entries 465 if (BC.HasRelocations) 466 Manager.registerPass(std::make_unique<PatchEntries>()); 467 468 // This pass turns tail calls into jumps which makes them invisible to 469 // function reordering. It's unsafe to use any CFG or instruction analysis 470 // after this point. 471 Manager.registerPass( 472 std::make_unique<InstructionLowering>(PrintAfterLowering)); 473 474 // In non-relocation mode, mark functions that do not fit into their original 475 // space as non-simple if we have to (e.g. for correct debug info update). 476 // NOTE: this pass depends on finalized code. 477 if (!BC.HasRelocations) 478 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 479 480 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 481 482 Manager.runPasses(); 483 } 484 485 } // namespace bolt 486 } // namespace llvm 487