1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/FrameOptimizer.h"
15 #include "bolt/Passes/IdenticalCodeFolding.h"
16 #include "bolt/Passes/IndirectCallPromotion.h"
17 #include "bolt/Passes/Inliner.h"
18 #include "bolt/Passes/Instrumentation.h"
19 #include "bolt/Passes/JTFootprintReduction.h"
20 #include "bolt/Passes/LongJmp.h"
21 #include "bolt/Passes/LoopInversionPass.h"
22 #include "bolt/Passes/PLTCall.h"
23 #include "bolt/Passes/PatchEntries.h"
24 #include "bolt/Passes/RegReAssign.h"
25 #include "bolt/Passes/ReorderData.h"
26 #include "bolt/Passes/ReorderFunctions.h"
27 #include "bolt/Passes/RetpolineInsertion.h"
28 #include "bolt/Passes/SplitFunctions.h"
29 #include "bolt/Passes/StokeInfo.h"
30 #include "bolt/Passes/TailDuplication.h"
31 #include "bolt/Passes/ThreeWayBranch.h"
32 #include "bolt/Passes/ValidateInternalCalls.h"
33 #include "bolt/Passes/VeneerElimination.h"
34 #include "bolt/Utils/CommandLineOpts.h"
35 #include "llvm/Support/FormatVariadic.h"
36 #include "llvm/Support/Timer.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <memory>
39 #include <numeric>
40 
41 using namespace llvm;
42 
43 namespace opts {
44 
45 extern cl::opt<bool> PrintAll;
46 extern cl::opt<bool> PrintDynoStats;
47 extern cl::opt<bool> DumpDotAll;
48 extern cl::opt<std::string> AsmDump;
49 extern cl::opt<bolt::PLTCall::OptType> PLT;
50 
51 static cl::opt<bool>
52 DynoStatsAll("dyno-stats-all",
53   cl::desc("print dyno stats after each stage"),
54   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 EliminateUnreachable("eliminate-unreachable",
58   cl::desc("eliminate unreachable code"),
59   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
60 
61 cl::opt<bool>
62 ICF("icf",
63   cl::desc("fold functions with identical code"),
64   cl::ZeroOrMore, cl::cat(BoltOptCategory));
65 
66 static cl::opt<bool>
67 JTFootprintReductionFlag("jt-footprint-reduction",
68   cl::desc("make jump tables size smaller at the cost of using more "
69            "instructions at jump sites"),
70   cl::ZeroOrMore, cl::cat(BoltOptCategory));
71 
72 cl::opt<bool>
73 NeverPrint("never-print",
74   cl::desc("never print"),
75   cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
76 
77 cl::opt<bool>
78 PrintAfterBranchFixup("print-after-branch-fixup",
79   cl::desc("print function after fixing local branches"),
80   cl::Hidden, cl::cat(BoltOptCategory));
81 
82 static cl::opt<bool>
83 PrintAfterLowering("print-after-lowering",
84   cl::desc("print function after instruction lowering"),
85   cl::Hidden, cl::cat(BoltOptCategory));
86 
87 cl::opt<bool>
88 PrintFinalized("print-finalized",
89   cl::desc("print function after CFG is finalized"),
90   cl::Hidden, cl::cat(BoltOptCategory));
91 
92 static cl::opt<bool>
93 PrintFOP("print-fop",
94   cl::desc("print functions after frame optimizer pass"),
95   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
96 
97 static cl::opt<bool>
98 PrintICF("print-icf",
99   cl::desc("print functions after ICF optimization"),
100   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
101 
102 static cl::opt<bool>
103 PrintICP("print-icp",
104   cl::desc("print functions after indirect call promotion"),
105   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
106 
107 static cl::opt<bool>
108 PrintInline("print-inline",
109   cl::desc("print functions after inlining optimization"),
110   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
111 
112 static cl::opt<bool>
113 PrintJTFootprintReduction("print-after-jt-footprint-reduction",
114   cl::desc("print function after jt-footprint-reduction pass"),
115   cl::ZeroOrMore, cl::cat(BoltOptCategory));
116 
117 static cl::opt<bool>
118 PrintLongJmp("print-longjmp",
119   cl::desc("print functions after longjmp pass"),
120   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
121 
122 cl::opt<bool>
123 PrintNormalized("print-normalized",
124   cl::desc("print functions after CFG is normalized"),
125   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
126 
127 static cl::opt<bool>
128 PrintOptimizeBodyless("print-optimize-bodyless",
129   cl::desc("print functions after bodyless optimization"),
130   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
131 
132 static cl::opt<bool>
133 PrintPeepholes("print-peepholes",
134   cl::desc("print functions after peephole optimization"),
135   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
136 
137 static cl::opt<bool>
138 PrintPLT("print-plt",
139   cl::desc("print functions after PLT optimization"),
140   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
141 
142 static cl::opt<bool>
143 PrintProfileStats("print-profile-stats",
144   cl::desc("print profile quality/bias analysis"),
145   cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory));
146 
147 static cl::opt<bool>
148 PrintRegReAssign("print-regreassign",
149   cl::desc("print functions after regreassign pass"),
150   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
151 
152 cl::opt<bool>
153 PrintReordered("print-reordered",
154   cl::desc("print functions after layout optimization"),
155   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
156 
157 static cl::opt<bool>
158 PrintReorderedFunctions("print-reordered-functions",
159   cl::desc("print functions after clustering"),
160   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
161 
162 static cl::opt<bool>
163 PrintRetpolineInsertion("print-retpoline-insertion",
164   cl::desc("print functions after retpoline insertion pass"),
165   cl::init(false), cl::ZeroOrMore, cl::cat(BoltCategory));
166 
167 static cl::opt<bool>
168 PrintSCTC("print-sctc",
169   cl::desc("print functions after conditional tail call simplification"),
170   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
171 
172 static cl::opt<bool>
173 PrintSimplifyROLoads("print-simplify-rodata-loads",
174   cl::desc("print functions after simplification of RO data loads"),
175   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
176 
177 static cl::opt<bool>
178 PrintSplit("print-split",
179   cl::desc("print functions after code splitting"),
180   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
181 
182 static cl::opt<bool>
183 PrintStoke("print-stoke",
184   cl::desc("print functions after stoke analysis"),
185   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
186 
187 static cl::opt<bool>
188 PrintVeneerElimination("print-veneer-elimination",
189   cl::desc("print functions after veneer elimination pass"),
190   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
191 
192 static cl::opt<bool>
193 PrintUCE("print-uce",
194   cl::desc("print functions after unreachable code elimination"),
195   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
196 
197 static cl::opt<bool>
198 RegReAssign("reg-reassign",
199   cl::desc("reassign registers so as to avoid using REX prefixes in hot code"),
200   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
201 
202 static cl::opt<bool>
203 SimplifyConditionalTailCalls("simplify-conditional-tail-calls",
204   cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
205   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
206 
207 static cl::opt<bool>
208 SimplifyRODataLoads("simplify-rodata-loads",
209   cl::desc("simplify loads from read-only sections by replacing the memory "
210            "operand with the constant found in the corresponding section"),
211   cl::ZeroOrMore, cl::cat(BoltOptCategory));
212 
213 static cl::list<std::string>
214 SpecializeMemcpy1("memcpy1-spec",
215   cl::desc("list of functions with call sites for which to specialize memcpy() "
216            "for size 1"),
217   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
218   cl::ZeroOrMore, cl::cat(BoltOptCategory));
219 
220 static cl::opt<bool>
221 Stoke("stoke",
222   cl::desc("turn on the stoke analysis"),
223   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
224 
225 static cl::opt<bool>
226 StringOps("inline-memcpy",
227   cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
228   cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
229 
230 static cl::opt<bool>
231 StripRepRet("strip-rep-ret",
232   cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
233   cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
234 
235 static cl::opt<bool>
236 VerifyCFG("verify-cfg",
237   cl::desc("verify the CFG after every pass"),
238   cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltOptCategory));
239 
240 static cl::opt<bool>
241 TailDuplicationFlag("tail-duplication",
242   cl::desc("duplicate unconditional branches that cross a cache line"),
243   cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
244 
245 static cl::opt<bool>
246 ThreeWayBranchFlag("three-way-branch",
247   cl::desc("reorder three way branches"),
248   cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
249 
250 } // namespace opts
251 
252 namespace llvm {
253 namespace bolt {
254 
255 using namespace opts;
256 
257 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
258 const char BinaryFunctionPassManager::TimerGroupDesc[] =
259     "Binary Function Pass Manager";
260 
261 void BinaryFunctionPassManager::runPasses() {
262   auto &BFs = BC.getBinaryFunctions();
263   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
264     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
265         &OptPassPair = Passes[PassIdx];
266     if (!OptPassPair.first)
267       continue;
268 
269     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
270     std::string PassIdName =
271         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
272 
273     if (opts::Verbosity > 0)
274       outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
275 
276     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
277                        TimerGroupDesc, TimeOpts);
278 
279     callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
280                       Pass->getName(), opts::DynoStatsAll);
281 
282     if (opts::VerifyCFG &&
283         !std::accumulate(
284             BFs.begin(), BFs.end(), true,
285             [](const bool Valid,
286                const std::pair<const uint64_t, BinaryFunction> &It) {
287               return Valid && It.second.validateCFG();
288             })) {
289       errs() << "BOLT-ERROR: Invalid CFG detected after pass "
290              << Pass->getName() << "\n";
291       exit(1);
292     }
293 
294     if (opts::Verbosity > 0)
295       outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
296 
297     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
298       continue;
299 
300     const std::string Message = std::string("after ") + Pass->getName();
301 
302     for (auto &It : BFs) {
303       BinaryFunction &Function = It.second;
304 
305       if (!Pass->shouldPrint(Function))
306         continue;
307 
308       Function.print(outs(), Message, true);
309 
310       if (opts::DumpDotAll)
311         Function.dumpGraphForPass(PassIdName);
312     }
313   }
314 }
315 
316 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
317   BinaryFunctionPassManager Manager(BC);
318 
319   const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions());
320 
321   Manager.registerPass(std::make_unique<AsmDumpPass>(),
322                        opts::AsmDump.getNumOccurrences());
323 
324   if (opts::Instrument)
325     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
326 
327   // Here we manage dependencies/order manually, since passes are run in the
328   // order they're registered.
329 
330   // Run this pass first to use stats for the original functions.
331   Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
332 
333   if (opts::PrintProfileStats)
334     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
335 
336   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
337 
338   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
339 
340   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
341 
342   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
343 
344   Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
345                        opts::StripRepRet);
346 
347   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
348                        opts::ICF);
349 
350   if (BC.isAArch64())
351     Manager.registerPass(
352         std::make_unique<VeneerElimination>(PrintVeneerElimination));
353 
354   Manager.registerPass(
355       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
356       !opts::SpecializeMemcpy1.empty());
357 
358   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
359                        opts::StringOps);
360 
361   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
362 
363   Manager.registerPass(
364       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
365       opts::JTFootprintReductionFlag);
366 
367   Manager.registerPass(
368       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
369       opts::SimplifyRODataLoads);
370 
371   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
372                        opts::RegReAssign);
373 
374   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
375 
376   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
377                        opts::ICF);
378 
379   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
380 
381   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
382                        opts::ThreeWayBranchFlag);
383 
384   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
385 
386   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
387                        opts::EliminateUnreachable);
388 
389   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
390 
391   Manager.registerPass(std::make_unique<LoopInversionPass>());
392 
393   Manager.registerPass(std::make_unique<TailDuplication>(),
394                        opts::TailDuplicationFlag);
395 
396   // This pass syncs local branches with CFG. If any of the following
397   // passes breaks the sync - they either need to re-run the pass or
398   // fix branches consistency internally.
399   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
400 
401   // This pass should come close to last since it uses the estimated hot
402   // size of a function to determine the order.  It should definitely
403   // also happen after any changes to the call graph are made, e.g. inlining.
404   Manager.registerPass(
405       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
406 
407   // Print final dyno stats right while CFG and instruction analysis are intact.
408   Manager.registerPass(
409       std::make_unique<DynoStatsPrintPass>(
410           InitialDynoStats, "after all optimizations before SCTC and FOP"),
411       opts::PrintDynoStats | opts::DynoStatsAll);
412 
413   // Add the StokeInfo pass, which extract functions for stoke optimization and
414   // get the liveness information for them
415   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
416 
417   // This pass introduces conditional jumps into external functions.
418   // Between extending CFG to support this and isolating this pass we chose
419   // the latter. Thus this pass will do double jump removal and unreachable
420   // code elimination if necessary and won't rely on peepholes/UCE for these
421   // optimizations.
422   // More generally this pass should be the last optimization pass that
423   // modifies branches/control flow.  This pass is run after function
424   // reordering so that it can tell whether calls are forward/backward
425   // accurately.
426   Manager.registerPass(
427       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
428       opts::SimplifyConditionalTailCalls);
429 
430   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
431 
432   Manager.registerPass(std::make_unique<AlignerPass>());
433 
434   // Perform reordering on data contained in one or more sections using
435   // memory profiling data.
436   Manager.registerPass(std::make_unique<ReorderData>());
437 
438   if (BC.isAArch64()) {
439     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
440 
441     // Tighten branches according to offset differences between branch and
442     // targets. No extra instructions after this pass, otherwise we may have
443     // relocations out of range and crash during linking.
444     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
445   }
446 
447   // This pass should always run last.*
448   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
449 
450   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
451   // FrameOptimizer move values around and needs to update CFIs. To do this, it
452   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
453   // placed according to the final layout.
454   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
455 
456   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
457 
458   Manager.registerPass(
459       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
460 
461   // Assign each function an output section.
462   Manager.registerPass(std::make_unique<AssignSections>());
463 
464   // Patch original function entries
465   if (BC.HasRelocations)
466     Manager.registerPass(std::make_unique<PatchEntries>());
467 
468   // This pass turns tail calls into jumps which makes them invisible to
469   // function reordering. It's unsafe to use any CFG or instruction analysis
470   // after this point.
471   Manager.registerPass(
472       std::make_unique<InstructionLowering>(PrintAfterLowering));
473 
474   // In non-relocation mode, mark functions that do not fit into their original
475   // space as non-simple if we have to (e.g. for correct debug info update).
476   // NOTE: this pass depends on finalized code.
477   if (!BC.HasRelocations)
478     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
479 
480   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
481 
482   Manager.runPasses();
483 }
484 
485 } // namespace bolt
486 } // namespace llvm
487