1 //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The set of optimization/analysis passes that run on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef BOLT_PASSES_BINARY_PASSES_H 14 #define BOLT_PASSES_BINARY_PASSES_H 15 16 #include "bolt/Core/BinaryContext.h" 17 #include "bolt/Core/BinaryFunction.h" 18 #include "bolt/Core/DynoStats.h" 19 #include "llvm/Support/CommandLine.h" 20 #include <atomic> 21 #include <map> 22 #include <set> 23 #include <string> 24 #include <unordered_set> 25 26 namespace llvm { 27 namespace bolt { 28 29 /// An optimization/analysis pass that runs on functions. 30 class BinaryFunctionPass { 31 protected: 32 bool PrintPass; 33 BinaryFunctionPass(const bool PrintPass)34 explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {} 35 36 /// Control whether a specific function should be skipped during 37 /// optimization. 38 virtual bool shouldOptimize(const BinaryFunction &BF) const; 39 40 public: 41 virtual ~BinaryFunctionPass() = default; 42 43 /// The name of this pass 44 virtual const char *getName() const = 0; 45 46 /// Control whether debug info is printed after this pass is completed. printPass()47 bool printPass() const { return PrintPass; } 48 49 /// Control whether debug info is printed for an individual function after 50 /// this pass is completed (printPass() must have returned true). 51 virtual bool shouldPrint(const BinaryFunction &BF) const; 52 53 /// Execute this pass on the given functions. 54 virtual void runOnFunctions(BinaryContext &BC) = 0; 55 }; 56 57 /// A pass to print program-wide dynostats. 58 class DynoStatsPrintPass : public BinaryFunctionPass { 59 protected: 60 DynoStats PrevDynoStats; 61 std::string Title; 62 63 public: DynoStatsPrintPass(const DynoStats & PrevDynoStats,const char * Title)64 DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title) 65 : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {} 66 getName()67 const char *getName() const override { 68 return "print dyno-stats after optimizations"; 69 } 70 shouldPrint(const BinaryFunction & BF)71 bool shouldPrint(const BinaryFunction &BF) const override { return false; } 72 runOnFunctions(BinaryContext & BC)73 void runOnFunctions(BinaryContext &BC) override { 74 const DynoStats NewDynoStats = 75 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 76 const bool Changed = (NewDynoStats != PrevDynoStats); 77 outs() << "BOLT-INFO: program-wide dynostats " << Title 78 << (Changed ? "" : " (no change)") << ":\n\n" 79 << PrevDynoStats; 80 if (Changed) { 81 outs() << '\n'; 82 NewDynoStats.print(outs(), &PrevDynoStats, BC.InstPrinter.get()); 83 } 84 outs() << '\n'; 85 } 86 }; 87 88 /// The pass normalizes CFG by performing the following transformations: 89 /// * removes empty basic blocks 90 /// * merges duplicate edges and updates jump instructions 91 class NormalizeCFG : public BinaryFunctionPass { 92 std::atomic<uint64_t> NumBlocksRemoved{0}; 93 std::atomic<uint64_t> NumDuplicateEdgesMerged{0}; 94 95 void runOnFunction(BinaryFunction &BF); 96 97 public: NormalizeCFG(const cl::opt<bool> & PrintPass)98 NormalizeCFG(const cl::opt<bool> &PrintPass) 99 : BinaryFunctionPass(PrintPass) {} 100 getName()101 const char *getName() const override { return "normalize CFG"; } 102 103 void runOnFunctions(BinaryContext &) override; 104 }; 105 106 /// Detect and eliminate unreachable basic blocks. We could have those 107 /// filled with nops and they are used for alignment. 108 class EliminateUnreachableBlocks : public BinaryFunctionPass { 109 std::unordered_set<const BinaryFunction *> Modified; 110 std::atomic<unsigned> DeletedBlocks{0}; 111 std::atomic<uint64_t> DeletedBytes{0}; 112 void runOnFunction(BinaryFunction &Function); 113 114 public: EliminateUnreachableBlocks(const cl::opt<bool> & PrintPass)115 EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass) 116 : BinaryFunctionPass(PrintPass) {} 117 getName()118 const char *getName() const override { return "eliminate-unreachable"; } shouldPrint(const BinaryFunction & BF)119 bool shouldPrint(const BinaryFunction &BF) const override { 120 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 121 } 122 void runOnFunctions(BinaryContext &) override; 123 }; 124 125 // Reorder the basic blocks for each function based on hotness. 126 class ReorderBasicBlocks : public BinaryFunctionPass { 127 public: 128 /// Choose which strategy should the block layout heuristic prioritize when 129 /// facing conflicting goals. 130 enum LayoutType : char { 131 /// LT_NONE - do not change layout of basic blocks 132 LT_NONE = 0, /// no reordering 133 /// LT_REVERSE - reverse the order of basic blocks, meant for testing 134 /// purposes. The first basic block is left intact and the rest are 135 /// put in the reverse order. 136 LT_REVERSE, 137 /// LT_OPTIMIZE - optimize layout of basic blocks based on profile. 138 LT_OPTIMIZE, 139 /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis' 140 /// paper (PLDI '90) about block reordering, trying to minimize branch 141 /// mispredictions. 142 LT_OPTIMIZE_BRANCH, 143 /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04) 144 /// that suggests putting frequently executed chains first in the layout. 145 LT_OPTIMIZE_CACHE, 146 // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation. 147 LT_OPTIMIZE_CACHE_PLUS, 148 /// Block reordering guided by the extended TSP metric. 149 LT_OPTIMIZE_EXT_TSP, 150 /// Create clusters and use random order for them. 151 LT_OPTIMIZE_SHUFFLE, 152 }; 153 154 private: 155 void modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, 156 bool MinBranchClusters) const; 157 158 public: ReorderBasicBlocks(const cl::opt<bool> & PrintPass)159 explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass) 160 : BinaryFunctionPass(PrintPass) {} 161 162 bool shouldOptimize(const BinaryFunction &BF) const override; 163 getName()164 const char *getName() const override { return "reorder-blocks"; } 165 bool shouldPrint(const BinaryFunction &BF) const override; 166 void runOnFunctions(BinaryContext &BC) override; 167 }; 168 169 /// Sync local branches with CFG. 170 class FixupBranches : public BinaryFunctionPass { 171 public: FixupBranches(const cl::opt<bool> & PrintPass)172 explicit FixupBranches(const cl::opt<bool> &PrintPass) 173 : BinaryFunctionPass(PrintPass) {} 174 getName()175 const char *getName() const override { return "fix-branches"; } 176 void runOnFunctions(BinaryContext &BC) override; 177 }; 178 179 /// Fix the CFI state and exception handling information after all other 180 /// passes have completed. 181 class FinalizeFunctions : public BinaryFunctionPass { 182 public: FinalizeFunctions(const cl::opt<bool> & PrintPass)183 explicit FinalizeFunctions(const cl::opt<bool> &PrintPass) 184 : BinaryFunctionPass(PrintPass) {} 185 getName()186 const char *getName() const override { return "finalize-functions"; } 187 void runOnFunctions(BinaryContext &BC) override; 188 }; 189 190 /// Perform any necessary adjustments for functions that do not fit into their 191 /// original space in non-relocation mode. 192 class CheckLargeFunctions : public BinaryFunctionPass { 193 public: CheckLargeFunctions(const cl::opt<bool> & PrintPass)194 explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass) 195 : BinaryFunctionPass(PrintPass) {} 196 getName()197 const char *getName() const override { return "check-large-functions"; } 198 199 void runOnFunctions(BinaryContext &BC) override; 200 201 bool shouldOptimize(const BinaryFunction &BF) const override; 202 }; 203 204 /// Convert and remove all BOLT-related annotations before LLVM code emission. 205 class LowerAnnotations : public BinaryFunctionPass { 206 public: LowerAnnotations(const cl::opt<bool> & PrintPass)207 explicit LowerAnnotations(const cl::opt<bool> &PrintPass) 208 : BinaryFunctionPass(PrintPass) {} 209 getName()210 const char *getName() const override { return "lower-annotations"; } 211 void runOnFunctions(BinaryContext &BC) override; 212 }; 213 214 /// An optimization to simplify conditional tail calls by removing 215 /// unnecessary branches. 216 /// 217 /// This optimization considers both of the following cases: 218 /// 219 /// foo: ... 220 /// jcc L1 original 221 /// ... 222 /// L1: jmp bar # TAILJMP 223 /// 224 /// -> 225 /// 226 /// foo: ... 227 /// jcc bar iff jcc L1 is expected 228 /// ... 229 /// 230 /// L1 is unreachable 231 /// 232 /// OR 233 /// 234 /// foo: ... 235 /// jcc L2 236 /// L1: jmp dest # TAILJMP 237 /// L2: ... 238 /// 239 /// -> 240 /// 241 /// foo: jncc dest # TAILJMP 242 /// L2: ... 243 /// 244 /// L1 is unreachable 245 /// 246 /// For this particular case, the first basic block ends with 247 /// a conditional branch and has two successors, one fall-through 248 /// and one for when the condition is true. 249 /// The target of the conditional is a basic block with a single 250 /// unconditional branch (i.e. tail call) to another function. 251 /// We don't care about the contents of the fall-through block. 252 /// We assume that the target of the conditional branch is the 253 /// first successor. 254 class SimplifyConditionalTailCalls : public BinaryFunctionPass { 255 uint64_t NumCandidateTailCalls{0}; 256 uint64_t NumTailCallsPatched{0}; 257 uint64_t CTCExecCount{0}; 258 uint64_t CTCTakenCount{0}; 259 uint64_t NumOrigForwardBranches{0}; 260 uint64_t NumOrigBackwardBranches{0}; 261 uint64_t NumDoubleJumps{0}; 262 uint64_t DeletedBlocks{0}; 263 uint64_t DeletedBytes{0}; 264 std::unordered_set<const BinaryFunction *> Modified; 265 std::set<const BinaryBasicBlock *> BeenOptimized; 266 267 bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, 268 const MCInst &CondBranch, const BinaryBasicBlock *BB, 269 const bool DirectionFlag); 270 271 uint64_t fixTailCalls(BinaryFunction &BF); 272 273 public: SimplifyConditionalTailCalls(const cl::opt<bool> & PrintPass)274 explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass) 275 : BinaryFunctionPass(PrintPass) {} 276 getName()277 const char *getName() const override { 278 return "simplify-conditional-tail-calls"; 279 } shouldPrint(const BinaryFunction & BF)280 bool shouldPrint(const BinaryFunction &BF) const override { 281 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 282 } 283 void runOnFunctions(BinaryContext &BC) override; 284 }; 285 286 /// Convert instructions to the form with the minimum operand width. 287 class ShortenInstructions : public BinaryFunctionPass { 288 uint64_t shortenInstructions(BinaryFunction &Function); 289 290 public: ShortenInstructions(const cl::opt<bool> & PrintPass)291 explicit ShortenInstructions(const cl::opt<bool> &PrintPass) 292 : BinaryFunctionPass(PrintPass) {} 293 getName()294 const char *getName() const override { return "shorten-instructions"; } 295 296 void runOnFunctions(BinaryContext &BC) override; 297 }; 298 299 /// Perform simple peephole optimizations. 300 class Peepholes : public BinaryFunctionPass { 301 public: 302 enum PeepholeOpts : char { 303 PEEP_NONE = 0x0, 304 PEEP_DOUBLE_JUMPS = 0x2, 305 PEEP_TAILCALL_TRAPS = 0x4, 306 PEEP_USELESS_BRANCHES = 0x8, 307 PEEP_ALL = 0xf 308 }; 309 310 private: 311 uint64_t NumDoubleJumps{0}; 312 uint64_t TailCallTraps{0}; 313 uint64_t NumUselessCondBranches{0}; 314 315 /// Add trap instructions immediately after indirect tail calls to prevent 316 /// the processor from decoding instructions immediate following the 317 /// tailcall. 318 void addTailcallTraps(BinaryFunction &Function); 319 320 /// Remove useless duplicate successors. When the conditional 321 /// successor is the same as the unconditional successor, we can 322 /// remove the conditional successor and branch instruction. 323 void removeUselessCondBranches(BinaryFunction &Function); 324 325 public: Peepholes(const cl::opt<bool> & PrintPass)326 explicit Peepholes(const cl::opt<bool> &PrintPass) 327 : BinaryFunctionPass(PrintPass) {} 328 getName()329 const char *getName() const override { return "peepholes"; } 330 void runOnFunctions(BinaryContext &BC) override; 331 }; 332 333 /// An optimization to simplify loads from read-only sections.The pass converts 334 /// load instructions with statically computed target address such as: 335 /// 336 /// mov 0x12f(%rip), %eax 337 /// 338 /// to their counterparts that use immediate operands instead of memory loads: 339 /// 340 /// mov $0x4007dc, %eax 341 /// 342 /// when the target address points somewhere inside a read-only section. 343 /// 344 class SimplifyRODataLoads : public BinaryFunctionPass { 345 uint64_t NumLoadsSimplified{0}; 346 uint64_t NumDynamicLoadsSimplified{0}; 347 uint64_t NumLoadsFound{0}; 348 uint64_t NumDynamicLoadsFound{0}; 349 std::unordered_set<const BinaryFunction *> Modified; 350 351 bool simplifyRODataLoads(BinaryFunction &BF); 352 353 public: SimplifyRODataLoads(const cl::opt<bool> & PrintPass)354 explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass) 355 : BinaryFunctionPass(PrintPass) {} 356 getName()357 const char *getName() const override { return "simplify-read-only-loads"; } shouldPrint(const BinaryFunction & BF)358 bool shouldPrint(const BinaryFunction &BF) const override { 359 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 360 } 361 void runOnFunctions(BinaryContext &BC) override; 362 }; 363 364 /// Assign output sections to all functions. 365 class AssignSections : public BinaryFunctionPass { 366 public: AssignSections()367 explicit AssignSections() : BinaryFunctionPass(false) {} 368 getName()369 const char *getName() const override { return "assign-sections"; } 370 void runOnFunctions(BinaryContext &BC) override; 371 }; 372 373 /// Compute and report to the user the imbalance in flow equations for all 374 /// CFGs, so we can detect bad quality profile. Prints average and standard 375 /// deviation of the absolute differences of outgoing flow minus incoming flow 376 /// for blocks of interest (excluding prologues, epilogues, and BB frequency 377 /// lower than 100). 378 class PrintProfileStats : public BinaryFunctionPass { 379 public: PrintProfileStats(const cl::opt<bool> & PrintPass)380 explicit PrintProfileStats(const cl::opt<bool> &PrintPass) 381 : BinaryFunctionPass(PrintPass) {} 382 getName()383 const char *getName() const override { return "profile-stats"; } shouldPrint(const BinaryFunction &)384 bool shouldPrint(const BinaryFunction &) const override { return false; } 385 void runOnFunctions(BinaryContext &BC) override; 386 }; 387 388 /// Prints a list of the top 100 functions sorted by a set of 389 /// dyno stats categories. 390 class PrintProgramStats : public BinaryFunctionPass { 391 public: PrintProgramStats(const cl::opt<bool> & PrintPass)392 explicit PrintProgramStats(const cl::opt<bool> &PrintPass) 393 : BinaryFunctionPass(PrintPass) {} 394 getName()395 const char *getName() const override { return "print-stats"; } shouldPrint(const BinaryFunction &)396 bool shouldPrint(const BinaryFunction &) const override { return false; } 397 void runOnFunctions(BinaryContext &BC) override; 398 }; 399 400 /// Pass for lowering any instructions that we have raised and that have 401 /// to be lowered. 402 class InstructionLowering : public BinaryFunctionPass { 403 public: InstructionLowering(const cl::opt<bool> & PrintPass)404 explicit InstructionLowering(const cl::opt<bool> &PrintPass) 405 : BinaryFunctionPass(PrintPass) {} 406 getName()407 const char *getName() const override { return "inst-lowering"; } 408 409 void runOnFunctions(BinaryContext &BC) override; 410 }; 411 412 /// Pass for stripping 'repz' from 'repz retq' sequence of instructions. 413 class StripRepRet : public BinaryFunctionPass { 414 public: StripRepRet(const cl::opt<bool> & PrintPass)415 explicit StripRepRet(const cl::opt<bool> &PrintPass) 416 : BinaryFunctionPass(PrintPass) {} 417 getName()418 const char *getName() const override { return "strip-rep-ret"; } 419 420 void runOnFunctions(BinaryContext &BC) override; 421 }; 422 423 /// Pass for inlining calls to memcpy using 'rep movsb' on X86. 424 class InlineMemcpy : public BinaryFunctionPass { 425 public: InlineMemcpy(const cl::opt<bool> & PrintPass)426 explicit InlineMemcpy(const cl::opt<bool> &PrintPass) 427 : BinaryFunctionPass(PrintPass) {} 428 getName()429 const char *getName() const override { return "inline-memcpy"; } 430 431 void runOnFunctions(BinaryContext &BC) override; 432 }; 433 434 /// Pass for specializing memcpy for a size of 1 byte. 435 class SpecializeMemcpy1 : public BinaryFunctionPass { 436 private: 437 std::vector<std::string> Spec; 438 439 /// Return indices of the call sites to optimize. Count starts at 1. 440 /// Returns an empty set for all call sites in the function. 441 std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const; 442 443 public: SpecializeMemcpy1(const cl::opt<bool> & PrintPass,cl::list<std::string> & Spec)444 explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass, 445 cl::list<std::string> &Spec) 446 : BinaryFunctionPass(PrintPass), Spec(Spec) {} 447 448 bool shouldOptimize(const BinaryFunction &BF) const override; 449 getName()450 const char *getName() const override { return "specialize-memcpy"; } 451 452 void runOnFunctions(BinaryContext &BC) override; 453 }; 454 455 /// Pass to remove nops in code 456 class RemoveNops : public BinaryFunctionPass { 457 void runOnFunction(BinaryFunction &Function); 458 459 public: RemoveNops(const cl::opt<bool> & PrintPass)460 explicit RemoveNops(const cl::opt<bool> &PrintPass) 461 : BinaryFunctionPass(PrintPass) {} 462 getName()463 const char *getName() const override { return "remove-nops"; } 464 465 /// Pass entry point 466 void runOnFunctions(BinaryContext &BC) override; 467 }; 468 469 enum FrameOptimizationType : char { 470 FOP_NONE, /// Don't perform FOP. 471 FOP_HOT, /// Perform FOP on hot functions. 472 FOP_ALL /// Perform FOP on all functions. 473 }; 474 475 } // namespace bolt 476 } // namespace llvm 477 478 #endif 479