1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FrameOptimizer.h" 16 #include "bolt/Passes/IdenticalCodeFolding.h" 17 #include "bolt/Passes/IndirectCallPromotion.h" 18 #include "bolt/Passes/Inliner.h" 19 #include "bolt/Passes/Instrumentation.h" 20 #include "bolt/Passes/JTFootprintReduction.h" 21 #include "bolt/Passes/LongJmp.h" 22 #include "bolt/Passes/LoopInversionPass.h" 23 #include "bolt/Passes/PLTCall.h" 24 #include "bolt/Passes/PatchEntries.h" 25 #include "bolt/Passes/RegReAssign.h" 26 #include "bolt/Passes/ReorderData.h" 27 #include "bolt/Passes/ReorderFunctions.h" 28 #include "bolt/Passes/RetpolineInsertion.h" 29 #include "bolt/Passes/SplitFunctions.h" 30 #include "bolt/Passes/StokeInfo.h" 31 #include "bolt/Passes/TailDuplication.h" 32 #include "bolt/Passes/ThreeWayBranch.h" 33 #include "bolt/Passes/ValidateInternalCalls.h" 34 #include "bolt/Passes/VeneerElimination.h" 35 #include "bolt/Utils/CommandLineOpts.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/Timer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <memory> 40 #include <numeric> 41 42 using namespace llvm; 43 44 namespace opts { 45 46 extern cl::opt<bool> PrintAll; 47 extern cl::opt<bool> PrintDynoStats; 48 extern cl::opt<bool> DumpDotAll; 49 extern cl::opt<std::string> AsmDump; 50 extern cl::opt<bolt::PLTCall::OptType> PLT; 51 52 static cl::opt<bool> 53 DynoStatsAll("dyno-stats-all", 54 cl::desc("print dyno stats after each stage"), 55 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 EliminateUnreachable("eliminate-unreachable", 59 cl::desc("eliminate unreachable code"), cl::init(true), 60 cl::cat(BoltOptCategory)); 61 62 cl::opt<bool> 63 ICF("icf", 64 cl::desc("fold functions with identical code"), 65 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 66 67 static cl::opt<bool> 68 JTFootprintReductionFlag("jt-footprint-reduction", 69 cl::desc("make jump tables size smaller at the cost of using more " 70 "instructions at jump sites"), 71 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 72 73 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 74 cl::ReallyHidden, cl::cat(BoltOptCategory)); 75 76 cl::opt<bool> 77 PrintAfterBranchFixup("print-after-branch-fixup", 78 cl::desc("print function after fixing local branches"), 79 cl::Hidden, cl::cat(BoltOptCategory)); 80 81 static cl::opt<bool> 82 PrintAfterLowering("print-after-lowering", 83 cl::desc("print function after instruction lowering"), 84 cl::Hidden, cl::cat(BoltOptCategory)); 85 86 cl::opt<bool> 87 PrintFinalized("print-finalized", 88 cl::desc("print function after CFG is finalized"), 89 cl::Hidden, cl::cat(BoltOptCategory)); 90 91 static cl::opt<bool> 92 PrintFOP("print-fop", 93 cl::desc("print functions after frame optimizer pass"), 94 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 95 96 static cl::opt<bool> 97 PrintICF("print-icf", 98 cl::desc("print functions after ICF optimization"), 99 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 100 101 static cl::opt<bool> 102 PrintICP("print-icp", 103 cl::desc("print functions after indirect call promotion"), 104 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 105 106 static cl::opt<bool> 107 PrintInline("print-inline", 108 cl::desc("print functions after inlining optimization"), 109 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 110 111 static cl::opt<bool> 112 PrintJTFootprintReduction("print-after-jt-footprint-reduction", 113 cl::desc("print function after jt-footprint-reduction pass"), 114 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 115 116 static cl::opt<bool> 117 PrintLongJmp("print-longjmp", 118 cl::desc("print functions after longjmp pass"), 119 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 120 121 cl::opt<bool> 122 PrintNormalized("print-normalized", 123 cl::desc("print functions after CFG is normalized"), 124 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 125 126 static cl::opt<bool> 127 PrintOptimizeBodyless("print-optimize-bodyless", 128 cl::desc("print functions after bodyless optimization"), 129 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 130 131 static cl::opt<bool> 132 PrintPeepholes("print-peepholes", 133 cl::desc("print functions after peephole optimization"), 134 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 135 136 static cl::opt<bool> 137 PrintPLT("print-plt", 138 cl::desc("print functions after PLT optimization"), 139 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 140 141 static cl::opt<bool> 142 PrintProfileStats("print-profile-stats", 143 cl::desc("print profile quality/bias analysis"), 144 cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory)); 145 146 static cl::opt<bool> 147 PrintRegReAssign("print-regreassign", 148 cl::desc("print functions after regreassign pass"), 149 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 150 151 cl::opt<bool> 152 PrintReordered("print-reordered", 153 cl::desc("print functions after layout optimization"), 154 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 155 156 static cl::opt<bool> 157 PrintReorderedFunctions("print-reordered-functions", 158 cl::desc("print functions after clustering"), 159 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 160 161 static cl::opt<bool> PrintRetpolineInsertion( 162 "print-retpoline-insertion", 163 cl::desc("print functions after retpoline insertion pass"), 164 cl::cat(BoltCategory)); 165 166 static cl::opt<bool> 167 PrintSCTC("print-sctc", 168 cl::desc("print functions after conditional tail call simplification"), 169 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 170 171 static cl::opt<bool> 172 PrintSimplifyROLoads("print-simplify-rodata-loads", 173 cl::desc("print functions after simplification of RO data loads"), 174 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 175 176 static cl::opt<bool> 177 PrintSplit("print-split", 178 cl::desc("print functions after code splitting"), 179 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 180 181 static cl::opt<bool> 182 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 183 cl::cat(BoltOptCategory)); 184 185 static cl::opt<bool> PrintVeneerElimination( 186 "print-veneer-elimination", 187 cl::desc("print functions after veneer elimination pass"), 188 cl::cat(BoltOptCategory)); 189 190 static cl::opt<bool> 191 PrintUCE("print-uce", 192 cl::desc("print functions after unreachable code elimination"), 193 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); 194 195 static cl::opt<bool> RegReAssign( 196 "reg-reassign", 197 cl::desc( 198 "reassign registers so as to avoid using REX prefixes in hot code"), 199 cl::cat(BoltOptCategory)); 200 201 static cl::opt<bool> SimplifyConditionalTailCalls( 202 "simplify-conditional-tail-calls", 203 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 204 cl::init(true), cl::cat(BoltOptCategory)); 205 206 static cl::opt<bool> 207 SimplifyRODataLoads("simplify-rodata-loads", 208 cl::desc("simplify loads from read-only sections by replacing the memory " 209 "operand with the constant found in the corresponding section"), 210 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 211 212 static cl::list<std::string> 213 SpecializeMemcpy1("memcpy1-spec", 214 cl::desc("list of functions with call sites for which to specialize memcpy() " 215 "for size 1"), 216 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 217 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 218 219 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 220 cl::cat(BoltOptCategory)); 221 222 static cl::opt<bool> StringOps( 223 "inline-memcpy", 224 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 225 cl::cat(BoltOptCategory)); 226 227 static cl::opt<bool> StripRepRet( 228 "strip-rep-ret", 229 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 230 cl::init(true), cl::cat(BoltOptCategory)); 231 232 static cl::opt<bool> VerifyCFG("verify-cfg", 233 cl::desc("verify the CFG after every pass"), 234 cl::Hidden, cl::cat(BoltOptCategory)); 235 236 static cl::opt<bool> 237 ThreeWayBranchFlag("three-way-branch", 238 cl::desc("reorder three way branches"), 239 cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); 240 241 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 242 cl::desc("fold jcc+mov into cmov"), 243 cl::ZeroOrMore, cl::ReallyHidden, 244 cl::cat(BoltOptCategory)); 245 246 } // namespace opts 247 248 namespace llvm { 249 namespace bolt { 250 251 using namespace opts; 252 253 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 254 const char BinaryFunctionPassManager::TimerGroupDesc[] = 255 "Binary Function Pass Manager"; 256 257 void BinaryFunctionPassManager::runPasses() { 258 auto &BFs = BC.getBinaryFunctions(); 259 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 260 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 261 &OptPassPair = Passes[PassIdx]; 262 if (!OptPassPair.first) 263 continue; 264 265 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 266 std::string PassIdName = 267 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 268 269 if (opts::Verbosity > 0) 270 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 271 272 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 273 TimerGroupDesc, TimeOpts); 274 275 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, 276 Pass->getName(), opts::DynoStatsAll); 277 278 if (opts::VerifyCFG && 279 !std::accumulate( 280 BFs.begin(), BFs.end(), true, 281 [](const bool Valid, 282 const std::pair<const uint64_t, BinaryFunction> &It) { 283 return Valid && It.second.validateCFG(); 284 })) { 285 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 286 << Pass->getName() << "\n"; 287 exit(1); 288 } 289 290 if (opts::Verbosity > 0) 291 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 292 293 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 294 continue; 295 296 const std::string Message = std::string("after ") + Pass->getName(); 297 298 for (auto &It : BFs) { 299 BinaryFunction &Function = It.second; 300 301 if (!Pass->shouldPrint(Function)) 302 continue; 303 304 Function.print(outs(), Message, true); 305 306 if (opts::DumpDotAll) 307 Function.dumpGraphForPass(PassIdName); 308 } 309 } 310 } 311 312 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 313 BinaryFunctionPassManager Manager(BC); 314 315 const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions()); 316 317 Manager.registerPass(std::make_unique<AsmDumpPass>(), 318 opts::AsmDump.getNumOccurrences()); 319 320 if (opts::Instrument) 321 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 322 323 // Here we manage dependencies/order manually, since passes are run in the 324 // order they're registered. 325 326 // Run this pass first to use stats for the original functions. 327 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 328 329 if (opts::PrintProfileStats) 330 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 331 332 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 333 334 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 335 336 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); 337 338 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 339 340 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 341 opts::StripRepRet); 342 343 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 344 opts::ICF); 345 346 if (BC.isAArch64()) 347 Manager.registerPass( 348 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 349 350 Manager.registerPass( 351 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 352 !opts::SpecializeMemcpy1.empty()); 353 354 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 355 opts::StringOps); 356 357 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 358 359 Manager.registerPass( 360 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 361 opts::JTFootprintReductionFlag); 362 363 Manager.registerPass( 364 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 365 opts::SimplifyRODataLoads); 366 367 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 368 opts::RegReAssign); 369 370 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 371 372 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 373 opts::ICF); 374 375 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 376 377 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 378 opts::ThreeWayBranchFlag); 379 380 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 381 382 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 383 opts::EliminateUnreachable); 384 385 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 386 387 Manager.registerPass(std::make_unique<LoopInversionPass>()); 388 389 Manager.registerPass(std::make_unique<TailDuplication>()); 390 391 Manager.registerPass(std::make_unique<CMOVConversion>(), 392 opts::CMOVConversionFlag); 393 394 // This pass syncs local branches with CFG. If any of the following 395 // passes breaks the sync - they either need to re-run the pass or 396 // fix branches consistency internally. 397 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 398 399 // This pass should come close to last since it uses the estimated hot 400 // size of a function to determine the order. It should definitely 401 // also happen after any changes to the call graph are made, e.g. inlining. 402 Manager.registerPass( 403 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 404 405 // Print final dyno stats right while CFG and instruction analysis are intact. 406 Manager.registerPass( 407 std::make_unique<DynoStatsPrintPass>( 408 InitialDynoStats, "after all optimizations before SCTC and FOP"), 409 opts::PrintDynoStats || opts::DynoStatsAll); 410 411 // Add the StokeInfo pass, which extract functions for stoke optimization and 412 // get the liveness information for them 413 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 414 415 // This pass introduces conditional jumps into external functions. 416 // Between extending CFG to support this and isolating this pass we chose 417 // the latter. Thus this pass will do double jump removal and unreachable 418 // code elimination if necessary and won't rely on peepholes/UCE for these 419 // optimizations. 420 // More generally this pass should be the last optimization pass that 421 // modifies branches/control flow. This pass is run after function 422 // reordering so that it can tell whether calls are forward/backward 423 // accurately. 424 Manager.registerPass( 425 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 426 opts::SimplifyConditionalTailCalls); 427 428 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 429 430 Manager.registerPass(std::make_unique<AlignerPass>()); 431 432 // Perform reordering on data contained in one or more sections using 433 // memory profiling data. 434 Manager.registerPass(std::make_unique<ReorderData>()); 435 436 if (BC.isAArch64()) { 437 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 438 439 // Tighten branches according to offset differences between branch and 440 // targets. No extra instructions after this pass, otherwise we may have 441 // relocations out of range and crash during linking. 442 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 443 } 444 445 // This pass should always run last.* 446 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 447 448 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 449 // FrameOptimizer move values around and needs to update CFIs. To do this, it 450 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 451 // placed according to the final layout. 452 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 453 454 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 455 456 Manager.registerPass( 457 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 458 459 // Assign each function an output section. 460 Manager.registerPass(std::make_unique<AssignSections>()); 461 462 // Patch original function entries 463 if (BC.HasRelocations) 464 Manager.registerPass(std::make_unique<PatchEntries>()); 465 466 // This pass turns tail calls into jumps which makes them invisible to 467 // function reordering. It's unsafe to use any CFG or instruction analysis 468 // after this point. 469 Manager.registerPass( 470 std::make_unique<InstructionLowering>(PrintAfterLowering)); 471 472 // In non-relocation mode, mark functions that do not fit into their original 473 // space as non-simple if we have to (e.g. for correct debug info update). 474 // NOTE: this pass depends on finalized code. 475 if (!BC.HasRelocations) 476 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 477 478 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 479 480 Manager.runPasses(); 481 } 482 483 } // namespace bolt 484 } // namespace llvm 485