1 //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The set of optimization/analysis passes that run on BinaryFunctions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_PASSES_BINARY_PASSES_H
14 #define BOLT_PASSES_BINARY_PASSES_H
15 
16 #include "bolt/Core/BinaryContext.h"
17 #include "bolt/Core/BinaryFunction.h"
18 #include "bolt/Core/DynoStats.h"
19 #include "llvm/Support/CommandLine.h"
20 #include <atomic>
21 #include <map>
22 #include <set>
23 #include <string>
24 #include <unordered_set>
25 
26 namespace llvm {
27 namespace bolt {
28 
29 /// An optimization/analysis pass that runs on functions.
30 class BinaryFunctionPass {
31 protected:
32   bool PrintPass;
33 
BinaryFunctionPass(const bool PrintPass)34   explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {}
35 
36   /// Control whether a specific function should be skipped during
37   /// optimization.
38   virtual bool shouldOptimize(const BinaryFunction &BF) const;
39 
40 public:
41   virtual ~BinaryFunctionPass() = default;
42 
43   /// The name of this pass
44   virtual const char *getName() const = 0;
45 
46   /// Control whether debug info is printed after this pass is completed.
printPass()47   bool printPass() const { return PrintPass; }
48 
49   /// Control whether debug info is printed for an individual function after
50   /// this pass is completed (printPass() must have returned true).
51   virtual bool shouldPrint(const BinaryFunction &BF) const;
52 
53   /// Execute this pass on the given functions.
54   virtual void runOnFunctions(BinaryContext &BC) = 0;
55 };
56 
57 /// A pass to print program-wide dynostats.
58 class DynoStatsPrintPass : public BinaryFunctionPass {
59 protected:
60   DynoStats PrevDynoStats;
61   std::string Title;
62 
63 public:
DynoStatsPrintPass(const DynoStats & PrevDynoStats,const char * Title)64   DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
65       : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}
66 
getName()67   const char *getName() const override {
68     return "print dyno-stats after optimizations";
69   }
70 
shouldPrint(const BinaryFunction & BF)71   bool shouldPrint(const BinaryFunction &BF) const override { return false; }
72 
runOnFunctions(BinaryContext & BC)73   void runOnFunctions(BinaryContext &BC) override {
74     const DynoStats NewDynoStats =
75         getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
76     const bool Changed = (NewDynoStats != PrevDynoStats);
77     outs() << "BOLT-INFO: program-wide dynostats " << Title
78            << (Changed ? "" : " (no change)") << ":\n\n"
79            << PrevDynoStats;
80     if (Changed) {
81       outs() << '\n';
82       NewDynoStats.print(outs(), &PrevDynoStats, BC.InstPrinter.get());
83     }
84     outs() << '\n';
85   }
86 };
87 
88 /// The pass normalizes CFG by performing the following transformations:
89 ///   * removes empty basic blocks
90 ///   * merges duplicate edges and updates jump instructions
91 class NormalizeCFG : public BinaryFunctionPass {
92   std::atomic<uint64_t> NumBlocksRemoved{0};
93   std::atomic<uint64_t> NumDuplicateEdgesMerged{0};
94 
95   void runOnFunction(BinaryFunction &BF);
96 
97 public:
NormalizeCFG(const cl::opt<bool> & PrintPass)98   NormalizeCFG(const cl::opt<bool> &PrintPass)
99       : BinaryFunctionPass(PrintPass) {}
100 
getName()101   const char *getName() const override { return "normalize CFG"; }
102 
103   void runOnFunctions(BinaryContext &) override;
104 };
105 
106 /// Detect and eliminate unreachable basic blocks. We could have those
107 /// filled with nops and they are used for alignment.
108 class EliminateUnreachableBlocks : public BinaryFunctionPass {
109   std::unordered_set<const BinaryFunction *> Modified;
110   std::atomic<unsigned> DeletedBlocks{0};
111   std::atomic<uint64_t> DeletedBytes{0};
112   void runOnFunction(BinaryFunction &Function);
113 
114 public:
EliminateUnreachableBlocks(const cl::opt<bool> & PrintPass)115   EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
116       : BinaryFunctionPass(PrintPass) {}
117 
getName()118   const char *getName() const override { return "eliminate-unreachable"; }
shouldPrint(const BinaryFunction & BF)119   bool shouldPrint(const BinaryFunction &BF) const override {
120     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
121   }
122   void runOnFunctions(BinaryContext &) override;
123 };
124 
125 // Reorder the basic blocks for each function based on hotness.
126 class ReorderBasicBlocks : public BinaryFunctionPass {
127 public:
128   /// Choose which strategy should the block layout heuristic prioritize when
129   /// facing conflicting goals.
130   enum LayoutType : char {
131     /// LT_NONE - do not change layout of basic blocks
132     LT_NONE = 0, /// no reordering
133     /// LT_REVERSE - reverse the order of basic blocks, meant for testing
134     /// purposes. The first basic block is left intact and the rest are
135     /// put in the reverse order.
136     LT_REVERSE,
137     /// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
138     LT_OPTIMIZE,
139     /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
140     /// paper (PLDI '90) about block reordering, trying to minimize branch
141     /// mispredictions.
142     LT_OPTIMIZE_BRANCH,
143     /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04)
144     /// that suggests putting frequently executed chains first in the layout.
145     LT_OPTIMIZE_CACHE,
146     // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation.
147     LT_OPTIMIZE_CACHE_PLUS,
148     /// Block reordering guided by the extended TSP metric.
149     LT_OPTIMIZE_EXT_TSP,
150     /// Create clusters and use random order for them.
151     LT_OPTIMIZE_SHUFFLE,
152   };
153 
154 private:
155   void modifyFunctionLayout(BinaryFunction &Function, LayoutType Type,
156                             bool MinBranchClusters) const;
157 
158 public:
ReorderBasicBlocks(const cl::opt<bool> & PrintPass)159   explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
160       : BinaryFunctionPass(PrintPass) {}
161 
162   bool shouldOptimize(const BinaryFunction &BF) const override;
163 
getName()164   const char *getName() const override { return "reorder-blocks"; }
165   bool shouldPrint(const BinaryFunction &BF) const override;
166   void runOnFunctions(BinaryContext &BC) override;
167 };
168 
169 /// Sync local branches with CFG.
170 class FixupBranches : public BinaryFunctionPass {
171 public:
FixupBranches(const cl::opt<bool> & PrintPass)172   explicit FixupBranches(const cl::opt<bool> &PrintPass)
173       : BinaryFunctionPass(PrintPass) {}
174 
getName()175   const char *getName() const override { return "fix-branches"; }
176   void runOnFunctions(BinaryContext &BC) override;
177 };
178 
179 /// Fix the CFI state and exception handling information after all other
180 /// passes have completed.
181 class FinalizeFunctions : public BinaryFunctionPass {
182 public:
FinalizeFunctions(const cl::opt<bool> & PrintPass)183   explicit FinalizeFunctions(const cl::opt<bool> &PrintPass)
184       : BinaryFunctionPass(PrintPass) {}
185 
getName()186   const char *getName() const override { return "finalize-functions"; }
187   void runOnFunctions(BinaryContext &BC) override;
188 };
189 
190 /// Perform any necessary adjustments for functions that do not fit into their
191 /// original space in non-relocation mode.
192 class CheckLargeFunctions : public BinaryFunctionPass {
193 public:
CheckLargeFunctions(const cl::opt<bool> & PrintPass)194   explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass)
195       : BinaryFunctionPass(PrintPass) {}
196 
getName()197   const char *getName() const override { return "check-large-functions"; }
198 
199   void runOnFunctions(BinaryContext &BC) override;
200 
201   bool shouldOptimize(const BinaryFunction &BF) const override;
202 };
203 
204 /// Convert and remove all BOLT-related annotations before LLVM code emission.
205 class LowerAnnotations : public BinaryFunctionPass {
206 public:
LowerAnnotations(const cl::opt<bool> & PrintPass)207   explicit LowerAnnotations(const cl::opt<bool> &PrintPass)
208       : BinaryFunctionPass(PrintPass) {}
209 
getName()210   const char *getName() const override { return "lower-annotations"; }
211   void runOnFunctions(BinaryContext &BC) override;
212 };
213 
214 /// An optimization to simplify conditional tail calls by removing
215 /// unnecessary branches.
216 ///
217 /// This optimization considers both of the following cases:
218 ///
219 /// foo: ...
220 ///      jcc L1   original
221 ///      ...
222 /// L1:  jmp bar  # TAILJMP
223 ///
224 /// ->
225 ///
226 /// foo: ...
227 ///      jcc bar  iff jcc L1 is expected
228 ///      ...
229 ///
230 /// L1 is unreachable
231 ///
232 /// OR
233 ///
234 /// foo: ...
235 ///      jcc  L2
236 /// L1:  jmp  dest  # TAILJMP
237 /// L2:  ...
238 ///
239 /// ->
240 ///
241 /// foo: jncc dest  # TAILJMP
242 /// L2:  ...
243 ///
244 /// L1 is unreachable
245 ///
246 /// For this particular case, the first basic block ends with
247 /// a conditional branch and has two successors, one fall-through
248 /// and one for when the condition is true.
249 /// The target of the conditional is a basic block with a single
250 /// unconditional branch (i.e. tail call) to another function.
251 /// We don't care about the contents of the fall-through block.
252 /// We assume that the target of the conditional branch is the
253 /// first successor.
254 class SimplifyConditionalTailCalls : public BinaryFunctionPass {
255   uint64_t NumCandidateTailCalls{0};
256   uint64_t NumTailCallsPatched{0};
257   uint64_t CTCExecCount{0};
258   uint64_t CTCTakenCount{0};
259   uint64_t NumOrigForwardBranches{0};
260   uint64_t NumOrigBackwardBranches{0};
261   uint64_t NumDoubleJumps{0};
262   uint64_t DeletedBlocks{0};
263   uint64_t DeletedBytes{0};
264   std::unordered_set<const BinaryFunction *> Modified;
265   std::set<const BinaryBasicBlock *> BeenOptimized;
266 
267   bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
268                            const MCInst &CondBranch, const BinaryBasicBlock *BB,
269                            const bool DirectionFlag);
270 
271   uint64_t fixTailCalls(BinaryFunction &BF);
272 
273 public:
SimplifyConditionalTailCalls(const cl::opt<bool> & PrintPass)274   explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
275       : BinaryFunctionPass(PrintPass) {}
276 
getName()277   const char *getName() const override {
278     return "simplify-conditional-tail-calls";
279   }
shouldPrint(const BinaryFunction & BF)280   bool shouldPrint(const BinaryFunction &BF) const override {
281     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
282   }
283   void runOnFunctions(BinaryContext &BC) override;
284 };
285 
286 /// Convert instructions to the form with the minimum operand width.
287 class ShortenInstructions : public BinaryFunctionPass {
288   uint64_t shortenInstructions(BinaryFunction &Function);
289 
290 public:
ShortenInstructions(const cl::opt<bool> & PrintPass)291   explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
292       : BinaryFunctionPass(PrintPass) {}
293 
getName()294   const char *getName() const override { return "shorten-instructions"; }
295 
296   void runOnFunctions(BinaryContext &BC) override;
297 };
298 
299 /// Perform simple peephole optimizations.
300 class Peepholes : public BinaryFunctionPass {
301 public:
302   enum PeepholeOpts : char {
303     PEEP_NONE = 0x0,
304     PEEP_DOUBLE_JUMPS = 0x2,
305     PEEP_TAILCALL_TRAPS = 0x4,
306     PEEP_USELESS_BRANCHES = 0x8,
307     PEEP_ALL = 0xf
308   };
309 
310 private:
311   uint64_t NumDoubleJumps{0};
312   uint64_t TailCallTraps{0};
313   uint64_t NumUselessCondBranches{0};
314 
315   /// Add trap instructions immediately after indirect tail calls to prevent
316   /// the processor from decoding instructions immediate following the
317   /// tailcall.
318   void addTailcallTraps(BinaryFunction &Function);
319 
320   /// Remove useless duplicate successors.  When the conditional
321   /// successor is the same as the unconditional successor, we can
322   /// remove the conditional successor and branch instruction.
323   void removeUselessCondBranches(BinaryFunction &Function);
324 
325 public:
Peepholes(const cl::opt<bool> & PrintPass)326   explicit Peepholes(const cl::opt<bool> &PrintPass)
327       : BinaryFunctionPass(PrintPass) {}
328 
getName()329   const char *getName() const override { return "peepholes"; }
330   void runOnFunctions(BinaryContext &BC) override;
331 };
332 
333 /// An optimization to simplify loads from read-only sections.The pass converts
334 /// load instructions with statically computed target address such as:
335 ///
336 ///      mov 0x12f(%rip), %eax
337 ///
338 /// to their counterparts that use immediate operands instead of memory loads:
339 ///
340 ///     mov $0x4007dc, %eax
341 ///
342 /// when the target address points somewhere inside a read-only section.
343 ///
344 class SimplifyRODataLoads : public BinaryFunctionPass {
345   uint64_t NumLoadsSimplified{0};
346   uint64_t NumDynamicLoadsSimplified{0};
347   uint64_t NumLoadsFound{0};
348   uint64_t NumDynamicLoadsFound{0};
349   std::unordered_set<const BinaryFunction *> Modified;
350 
351   bool simplifyRODataLoads(BinaryFunction &BF);
352 
353 public:
SimplifyRODataLoads(const cl::opt<bool> & PrintPass)354   explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
355       : BinaryFunctionPass(PrintPass) {}
356 
getName()357   const char *getName() const override { return "simplify-read-only-loads"; }
shouldPrint(const BinaryFunction & BF)358   bool shouldPrint(const BinaryFunction &BF) const override {
359     return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
360   }
361   void runOnFunctions(BinaryContext &BC) override;
362 };
363 
364 /// Assign output sections to all functions.
365 class AssignSections : public BinaryFunctionPass {
366 public:
AssignSections()367   explicit AssignSections() : BinaryFunctionPass(false) {}
368 
getName()369   const char *getName() const override { return "assign-sections"; }
370   void runOnFunctions(BinaryContext &BC) override;
371 };
372 
373 /// Compute and report to the user the imbalance in flow equations for all
374 /// CFGs, so we can detect bad quality profile. Prints average and standard
375 /// deviation of the absolute differences of outgoing flow minus incoming flow
376 /// for blocks of interest (excluding prologues, epilogues, and BB frequency
377 /// lower than 100).
378 class PrintProfileStats : public BinaryFunctionPass {
379 public:
PrintProfileStats(const cl::opt<bool> & PrintPass)380   explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
381       : BinaryFunctionPass(PrintPass) {}
382 
getName()383   const char *getName() const override { return "profile-stats"; }
shouldPrint(const BinaryFunction &)384   bool shouldPrint(const BinaryFunction &) const override { return false; }
385   void runOnFunctions(BinaryContext &BC) override;
386 };
387 
388 /// Prints a list of the top 100 functions sorted by a set of
389 /// dyno stats categories.
390 class PrintProgramStats : public BinaryFunctionPass {
391 public:
PrintProgramStats(const cl::opt<bool> & PrintPass)392   explicit PrintProgramStats(const cl::opt<bool> &PrintPass)
393       : BinaryFunctionPass(PrintPass) {}
394 
getName()395   const char *getName() const override { return "print-stats"; }
shouldPrint(const BinaryFunction &)396   bool shouldPrint(const BinaryFunction &) const override { return false; }
397   void runOnFunctions(BinaryContext &BC) override;
398 };
399 
400 /// Pass for lowering any instructions that we have raised and that have
401 /// to be lowered.
402 class InstructionLowering : public BinaryFunctionPass {
403 public:
InstructionLowering(const cl::opt<bool> & PrintPass)404   explicit InstructionLowering(const cl::opt<bool> &PrintPass)
405       : BinaryFunctionPass(PrintPass) {}
406 
getName()407   const char *getName() const override { return "inst-lowering"; }
408 
409   void runOnFunctions(BinaryContext &BC) override;
410 };
411 
412 /// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
413 class StripRepRet : public BinaryFunctionPass {
414 public:
StripRepRet(const cl::opt<bool> & PrintPass)415   explicit StripRepRet(const cl::opt<bool> &PrintPass)
416       : BinaryFunctionPass(PrintPass) {}
417 
getName()418   const char *getName() const override { return "strip-rep-ret"; }
419 
420   void runOnFunctions(BinaryContext &BC) override;
421 };
422 
423 /// Pass for inlining calls to memcpy using 'rep movsb' on X86.
424 class InlineMemcpy : public BinaryFunctionPass {
425 public:
InlineMemcpy(const cl::opt<bool> & PrintPass)426   explicit InlineMemcpy(const cl::opt<bool> &PrintPass)
427       : BinaryFunctionPass(PrintPass) {}
428 
getName()429   const char *getName() const override { return "inline-memcpy"; }
430 
431   void runOnFunctions(BinaryContext &BC) override;
432 };
433 
434 /// Pass for specializing memcpy for a size of 1 byte.
435 class SpecializeMemcpy1 : public BinaryFunctionPass {
436 private:
437   std::vector<std::string> Spec;
438 
439   /// Return indices of the call sites to optimize. Count starts at 1.
440   /// Returns an empty set for all call sites in the function.
441   std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const;
442 
443 public:
SpecializeMemcpy1(const cl::opt<bool> & PrintPass,cl::list<std::string> & Spec)444   explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass,
445                              cl::list<std::string> &Spec)
446       : BinaryFunctionPass(PrintPass), Spec(Spec) {}
447 
448   bool shouldOptimize(const BinaryFunction &BF) const override;
449 
getName()450   const char *getName() const override { return "specialize-memcpy"; }
451 
452   void runOnFunctions(BinaryContext &BC) override;
453 };
454 
455 /// Pass to remove nops in code
456 class RemoveNops : public BinaryFunctionPass {
457   void runOnFunction(BinaryFunction &Function);
458 
459 public:
RemoveNops(const cl::opt<bool> & PrintPass)460   explicit RemoveNops(const cl::opt<bool> &PrintPass)
461       : BinaryFunctionPass(PrintPass) {}
462 
getName()463   const char *getName() const override { return "remove-nops"; }
464 
465   /// Pass entry point
466   void runOnFunctions(BinaryContext &BC) override;
467 };
468 
469 enum FrameOptimizationType : char {
470   FOP_NONE, /// Don't perform FOP.
471   FOP_HOT,  /// Perform FOP on hot functions.
472   FOP_ALL   /// Perform FOP on all functions.
473 };
474 
475 } // namespace bolt
476 } // namespace llvm
477 
478 #endif
479