1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FrameOptimizer.h"
16 #include "bolt/Passes/IdenticalCodeFolding.h"
17 #include "bolt/Passes/IndirectCallPromotion.h"
18 #include "bolt/Passes/Inliner.h"
19 #include "bolt/Passes/Instrumentation.h"
20 #include "bolt/Passes/JTFootprintReduction.h"
21 #include "bolt/Passes/LongJmp.h"
22 #include "bolt/Passes/LoopInversionPass.h"
23 #include "bolt/Passes/PLTCall.h"
24 #include "bolt/Passes/PatchEntries.h"
25 #include "bolt/Passes/RegReAssign.h"
26 #include "bolt/Passes/ReorderData.h"
27 #include "bolt/Passes/ReorderFunctions.h"
28 #include "bolt/Passes/RetpolineInsertion.h"
29 #include "bolt/Passes/SplitFunctions.h"
30 #include "bolt/Passes/StokeInfo.h"
31 #include "bolt/Passes/TailDuplication.h"
32 #include "bolt/Passes/ThreeWayBranch.h"
33 #include "bolt/Passes/ValidateInternalCalls.h"
34 #include "bolt/Passes/VeneerElimination.h"
35 #include "bolt/Utils/CommandLineOpts.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/Timer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <memory>
40 #include <numeric>
41 
42 using namespace llvm;
43 
44 namespace opts {
45 
46 extern cl::opt<bool> PrintAll;
47 extern cl::opt<bool> PrintDynoStats;
48 extern cl::opt<bool> DumpDotAll;
49 extern cl::opt<std::string> AsmDump;
50 extern cl::opt<bolt::PLTCall::OptType> PLT;
51 
52 static cl::opt<bool>
53 DynoStatsAll("dyno-stats-all",
54   cl::desc("print dyno stats after each stage"),
55   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58 EliminateUnreachable("eliminate-unreachable",
59   cl::desc("eliminate unreachable code"),
60   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
61 
62 cl::opt<bool>
63 ICF("icf",
64   cl::desc("fold functions with identical code"),
65   cl::ZeroOrMore, cl::cat(BoltOptCategory));
66 
67 static cl::opt<bool>
68 JTFootprintReductionFlag("jt-footprint-reduction",
69   cl::desc("make jump tables size smaller at the cost of using more "
70            "instructions at jump sites"),
71   cl::ZeroOrMore, cl::cat(BoltOptCategory));
72 
73 cl::opt<bool>
74 NeverPrint("never-print",
75   cl::desc("never print"),
76   cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
77 
78 cl::opt<bool>
79 PrintAfterBranchFixup("print-after-branch-fixup",
80   cl::desc("print function after fixing local branches"),
81   cl::Hidden, cl::cat(BoltOptCategory));
82 
83 static cl::opt<bool>
84 PrintAfterLowering("print-after-lowering",
85   cl::desc("print function after instruction lowering"),
86   cl::Hidden, cl::cat(BoltOptCategory));
87 
88 cl::opt<bool>
89 PrintFinalized("print-finalized",
90   cl::desc("print function after CFG is finalized"),
91   cl::Hidden, cl::cat(BoltOptCategory));
92 
93 static cl::opt<bool>
94 PrintFOP("print-fop",
95   cl::desc("print functions after frame optimizer pass"),
96   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool>
99 PrintICF("print-icf",
100   cl::desc("print functions after ICF optimization"),
101   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool>
104 PrintICP("print-icp",
105   cl::desc("print functions after indirect call promotion"),
106   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
107 
108 static cl::opt<bool>
109 PrintInline("print-inline",
110   cl::desc("print functions after inlining optimization"),
111   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
112 
113 static cl::opt<bool>
114 PrintJTFootprintReduction("print-after-jt-footprint-reduction",
115   cl::desc("print function after jt-footprint-reduction pass"),
116   cl::ZeroOrMore, cl::cat(BoltOptCategory));
117 
118 static cl::opt<bool>
119 PrintLongJmp("print-longjmp",
120   cl::desc("print functions after longjmp pass"),
121   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
122 
123 cl::opt<bool>
124 PrintNormalized("print-normalized",
125   cl::desc("print functions after CFG is normalized"),
126   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
127 
128 static cl::opt<bool>
129 PrintOptimizeBodyless("print-optimize-bodyless",
130   cl::desc("print functions after bodyless optimization"),
131   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
132 
133 static cl::opt<bool>
134 PrintPeepholes("print-peepholes",
135   cl::desc("print functions after peephole optimization"),
136   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
137 
138 static cl::opt<bool>
139 PrintPLT("print-plt",
140   cl::desc("print functions after PLT optimization"),
141   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
142 
143 static cl::opt<bool>
144 PrintProfileStats("print-profile-stats",
145   cl::desc("print profile quality/bias analysis"),
146   cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory));
147 
148 static cl::opt<bool>
149 PrintRegReAssign("print-regreassign",
150   cl::desc("print functions after regreassign pass"),
151   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
152 
153 cl::opt<bool>
154 PrintReordered("print-reordered",
155   cl::desc("print functions after layout optimization"),
156   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
157 
158 static cl::opt<bool>
159 PrintReorderedFunctions("print-reordered-functions",
160   cl::desc("print functions after clustering"),
161   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
162 
163 static cl::opt<bool>
164 PrintRetpolineInsertion("print-retpoline-insertion",
165   cl::desc("print functions after retpoline insertion pass"),
166   cl::init(false), cl::ZeroOrMore, cl::cat(BoltCategory));
167 
168 static cl::opt<bool>
169 PrintSCTC("print-sctc",
170   cl::desc("print functions after conditional tail call simplification"),
171   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
172 
173 static cl::opt<bool>
174 PrintSimplifyROLoads("print-simplify-rodata-loads",
175   cl::desc("print functions after simplification of RO data loads"),
176   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
177 
178 static cl::opt<bool>
179 PrintSplit("print-split",
180   cl::desc("print functions after code splitting"),
181   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
182 
183 static cl::opt<bool>
184 PrintStoke("print-stoke",
185   cl::desc("print functions after stoke analysis"),
186   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
187 
188 static cl::opt<bool>
189 PrintVeneerElimination("print-veneer-elimination",
190   cl::desc("print functions after veneer elimination pass"),
191   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
192 
193 static cl::opt<bool>
194 PrintUCE("print-uce",
195   cl::desc("print functions after unreachable code elimination"),
196   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
197 
198 static cl::opt<bool>
199 RegReAssign("reg-reassign",
200   cl::desc("reassign registers so as to avoid using REX prefixes in hot code"),
201   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
202 
203 static cl::opt<bool>
204 SimplifyConditionalTailCalls("simplify-conditional-tail-calls",
205   cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
206   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
207 
208 static cl::opt<bool>
209 SimplifyRODataLoads("simplify-rodata-loads",
210   cl::desc("simplify loads from read-only sections by replacing the memory "
211            "operand with the constant found in the corresponding section"),
212   cl::ZeroOrMore, cl::cat(BoltOptCategory));
213 
214 static cl::list<std::string>
215 SpecializeMemcpy1("memcpy1-spec",
216   cl::desc("list of functions with call sites for which to specialize memcpy() "
217            "for size 1"),
218   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
219   cl::ZeroOrMore, cl::cat(BoltOptCategory));
220 
221 static cl::opt<bool>
222 Stoke("stoke",
223   cl::desc("turn on the stoke analysis"),
224   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
225 
226 static cl::opt<bool>
227 StringOps("inline-memcpy",
228   cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
229   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
230 
231 static cl::opt<bool>
232 StripRepRet("strip-rep-ret",
233   cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
234   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
235 
236 static cl::opt<bool>
237 VerifyCFG("verify-cfg",
238   cl::desc("verify the CFG after every pass"),
239   cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltOptCategory));
240 
241 static cl::opt<bool>
242 TailDuplicationFlag("tail-duplication",
243   cl::desc("duplicate unconditional branches that cross a cache line"),
244   cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
245 
246 static cl::opt<bool>
247 ThreeWayBranchFlag("three-way-branch",
248   cl::desc("reorder three way branches"),
249   cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
250 
251 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
252                                         cl::desc("fold jcc+mov into cmov"),
253                                         cl::ZeroOrMore, cl::ReallyHidden,
254                                         cl::cat(BoltOptCategory));
255 
256 } // namespace opts
257 
258 namespace llvm {
259 namespace bolt {
260 
261 using namespace opts;
262 
263 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
264 const char BinaryFunctionPassManager::TimerGroupDesc[] =
265     "Binary Function Pass Manager";
266 
267 void BinaryFunctionPassManager::runPasses() {
268   auto &BFs = BC.getBinaryFunctions();
269   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
270     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
271         &OptPassPair = Passes[PassIdx];
272     if (!OptPassPair.first)
273       continue;
274 
275     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
276     std::string PassIdName =
277         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
278 
279     if (opts::Verbosity > 0)
280       outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
281 
282     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
283                        TimerGroupDesc, TimeOpts);
284 
285     callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
286                       Pass->getName(), opts::DynoStatsAll);
287 
288     if (opts::VerifyCFG &&
289         !std::accumulate(
290             BFs.begin(), BFs.end(), true,
291             [](const bool Valid,
292                const std::pair<const uint64_t, BinaryFunction> &It) {
293               return Valid && It.second.validateCFG();
294             })) {
295       errs() << "BOLT-ERROR: Invalid CFG detected after pass "
296              << Pass->getName() << "\n";
297       exit(1);
298     }
299 
300     if (opts::Verbosity > 0)
301       outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
302 
303     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
304       continue;
305 
306     const std::string Message = std::string("after ") + Pass->getName();
307 
308     for (auto &It : BFs) {
309       BinaryFunction &Function = It.second;
310 
311       if (!Pass->shouldPrint(Function))
312         continue;
313 
314       Function.print(outs(), Message, true);
315 
316       if (opts::DumpDotAll)
317         Function.dumpGraphForPass(PassIdName);
318     }
319   }
320 }
321 
322 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
323   BinaryFunctionPassManager Manager(BC);
324 
325   const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions());
326 
327   Manager.registerPass(std::make_unique<AsmDumpPass>(),
328                        opts::AsmDump.getNumOccurrences());
329 
330   if (opts::Instrument)
331     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
332 
333   // Here we manage dependencies/order manually, since passes are run in the
334   // order they're registered.
335 
336   // Run this pass first to use stats for the original functions.
337   Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
338 
339   if (opts::PrintProfileStats)
340     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
341 
342   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
343 
344   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
345 
346   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
347 
348   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
349 
350   Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
351                        opts::StripRepRet);
352 
353   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
354                        opts::ICF);
355 
356   if (BC.isAArch64())
357     Manager.registerPass(
358         std::make_unique<VeneerElimination>(PrintVeneerElimination));
359 
360   Manager.registerPass(
361       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
362       !opts::SpecializeMemcpy1.empty());
363 
364   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
365                        opts::StringOps);
366 
367   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
368 
369   Manager.registerPass(
370       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
371       opts::JTFootprintReductionFlag);
372 
373   Manager.registerPass(
374       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
375       opts::SimplifyRODataLoads);
376 
377   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
378                        opts::RegReAssign);
379 
380   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
381 
382   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
383                        opts::ICF);
384 
385   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
386 
387   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
388                        opts::ThreeWayBranchFlag);
389 
390   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
391 
392   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
393                        opts::EliminateUnreachable);
394 
395   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
396 
397   Manager.registerPass(std::make_unique<LoopInversionPass>());
398 
399   Manager.registerPass(std::make_unique<TailDuplication>(),
400                        opts::TailDuplicationFlag);
401 
402   Manager.registerPass(std::make_unique<CMOVConversion>(),
403                        opts::CMOVConversionFlag);
404 
405   // This pass syncs local branches with CFG. If any of the following
406   // passes breaks the sync - they either need to re-run the pass or
407   // fix branches consistency internally.
408   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
409 
410   // This pass should come close to last since it uses the estimated hot
411   // size of a function to determine the order.  It should definitely
412   // also happen after any changes to the call graph are made, e.g. inlining.
413   Manager.registerPass(
414       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
415 
416   // Print final dyno stats right while CFG and instruction analysis are intact.
417   Manager.registerPass(
418       std::make_unique<DynoStatsPrintPass>(
419           InitialDynoStats, "after all optimizations before SCTC and FOP"),
420       opts::PrintDynoStats || opts::DynoStatsAll);
421 
422   // Add the StokeInfo pass, which extract functions for stoke optimization and
423   // get the liveness information for them
424   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
425 
426   // This pass introduces conditional jumps into external functions.
427   // Between extending CFG to support this and isolating this pass we chose
428   // the latter. Thus this pass will do double jump removal and unreachable
429   // code elimination if necessary and won't rely on peepholes/UCE for these
430   // optimizations.
431   // More generally this pass should be the last optimization pass that
432   // modifies branches/control flow.  This pass is run after function
433   // reordering so that it can tell whether calls are forward/backward
434   // accurately.
435   Manager.registerPass(
436       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
437       opts::SimplifyConditionalTailCalls);
438 
439   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
440 
441   Manager.registerPass(std::make_unique<AlignerPass>());
442 
443   // Perform reordering on data contained in one or more sections using
444   // memory profiling data.
445   Manager.registerPass(std::make_unique<ReorderData>());
446 
447   if (BC.isAArch64()) {
448     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
449 
450     // Tighten branches according to offset differences between branch and
451     // targets. No extra instructions after this pass, otherwise we may have
452     // relocations out of range and crash during linking.
453     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
454   }
455 
456   // This pass should always run last.*
457   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
458 
459   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
460   // FrameOptimizer move values around and needs to update CFIs. To do this, it
461   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
462   // placed according to the final layout.
463   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
464 
465   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
466 
467   Manager.registerPass(
468       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
469 
470   // Assign each function an output section.
471   Manager.registerPass(std::make_unique<AssignSections>());
472 
473   // Patch original function entries
474   if (BC.HasRelocations)
475     Manager.registerPass(std::make_unique<PatchEntries>());
476 
477   // This pass turns tail calls into jumps which makes them invisible to
478   // function reordering. It's unsafe to use any CFG or instruction analysis
479   // after this point.
480   Manager.registerPass(
481       std::make_unique<InstructionLowering>(PrintAfterLowering));
482 
483   // In non-relocation mode, mark functions that do not fit into their original
484   // space as non-simple if we have to (e.g. for correct debug info update).
485   // NOTE: this pass depends on finalized code.
486   if (!BC.HasRelocations)
487     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
488 
489   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
490 
491   Manager.runPasses();
492 }
493 
494 } // namespace bolt
495 } // namespace llvm
496