1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FrameOptimizer.h"
16 #include "bolt/Passes/IdenticalCodeFolding.h"
17 #include "bolt/Passes/IndirectCallPromotion.h"
18 #include "bolt/Passes/Inliner.h"
19 #include "bolt/Passes/Instrumentation.h"
20 #include "bolt/Passes/JTFootprintReduction.h"
21 #include "bolt/Passes/LongJmp.h"
22 #include "bolt/Passes/LoopInversionPass.h"
23 #include "bolt/Passes/PLTCall.h"
24 #include "bolt/Passes/PatchEntries.h"
25 #include "bolt/Passes/RegReAssign.h"
26 #include "bolt/Passes/ReorderData.h"
27 #include "bolt/Passes/ReorderFunctions.h"
28 #include "bolt/Passes/RetpolineInsertion.h"
29 #include "bolt/Passes/SplitFunctions.h"
30 #include "bolt/Passes/StokeInfo.h"
31 #include "bolt/Passes/TailDuplication.h"
32 #include "bolt/Passes/ThreeWayBranch.h"
33 #include "bolt/Passes/ValidateInternalCalls.h"
34 #include "bolt/Passes/VeneerElimination.h"
35 #include "bolt/Utils/CommandLineOpts.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/Timer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <memory>
40 #include <numeric>
41 
42 using namespace llvm;
43 
44 namespace opts {
45 
46 extern cl::opt<bool> PrintAll;
47 extern cl::opt<bool> PrintDynoStats;
48 extern cl::opt<bool> DumpDotAll;
49 extern cl::opt<std::string> AsmDump;
50 extern cl::opt<bolt::PLTCall::OptType> PLT;
51 
52 static cl::opt<bool>
53 DynoStatsAll("dyno-stats-all",
54   cl::desc("print dyno stats after each stage"),
55   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58     EliminateUnreachable("eliminate-unreachable",
59                          cl::desc("eliminate unreachable code"), cl::init(true),
60                          cl::cat(BoltOptCategory));
61 
62 cl::opt<bool>
63 ICF("icf",
64   cl::desc("fold functions with identical code"),
65   cl::ZeroOrMore, cl::cat(BoltOptCategory));
66 
67 static cl::opt<bool>
68 JTFootprintReductionFlag("jt-footprint-reduction",
69   cl::desc("make jump tables size smaller at the cost of using more "
70            "instructions at jump sites"),
71   cl::ZeroOrMore, cl::cat(BoltOptCategory));
72 
73 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
74                          cl::ReallyHidden, cl::cat(BoltOptCategory));
75 
76 cl::opt<bool>
77 PrintAfterBranchFixup("print-after-branch-fixup",
78   cl::desc("print function after fixing local branches"),
79   cl::Hidden, cl::cat(BoltOptCategory));
80 
81 static cl::opt<bool>
82 PrintAfterLowering("print-after-lowering",
83   cl::desc("print function after instruction lowering"),
84   cl::Hidden, cl::cat(BoltOptCategory));
85 
86 cl::opt<bool>
87 PrintFinalized("print-finalized",
88   cl::desc("print function after CFG is finalized"),
89   cl::Hidden, cl::cat(BoltOptCategory));
90 
91 static cl::opt<bool>
92 PrintFOP("print-fop",
93   cl::desc("print functions after frame optimizer pass"),
94   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
95 
96 static cl::opt<bool>
97 PrintICF("print-icf",
98   cl::desc("print functions after ICF optimization"),
99   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
100 
101 static cl::opt<bool>
102 PrintICP("print-icp",
103   cl::desc("print functions after indirect call promotion"),
104   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
105 
106 static cl::opt<bool>
107 PrintInline("print-inline",
108   cl::desc("print functions after inlining optimization"),
109   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
110 
111 static cl::opt<bool>
112 PrintJTFootprintReduction("print-after-jt-footprint-reduction",
113   cl::desc("print function after jt-footprint-reduction pass"),
114   cl::ZeroOrMore, cl::cat(BoltOptCategory));
115 
116 static cl::opt<bool>
117 PrintLongJmp("print-longjmp",
118   cl::desc("print functions after longjmp pass"),
119   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
120 
121 cl::opt<bool>
122 PrintNormalized("print-normalized",
123   cl::desc("print functions after CFG is normalized"),
124   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
125 
126 static cl::opt<bool>
127 PrintOptimizeBodyless("print-optimize-bodyless",
128   cl::desc("print functions after bodyless optimization"),
129   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
130 
131 static cl::opt<bool>
132 PrintPeepholes("print-peepholes",
133   cl::desc("print functions after peephole optimization"),
134   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
135 
136 static cl::opt<bool>
137 PrintPLT("print-plt",
138   cl::desc("print functions after PLT optimization"),
139   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
140 
141 static cl::opt<bool>
142 PrintProfileStats("print-profile-stats",
143   cl::desc("print profile quality/bias analysis"),
144   cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory));
145 
146 static cl::opt<bool>
147 PrintRegReAssign("print-regreassign",
148   cl::desc("print functions after regreassign pass"),
149   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
150 
151 cl::opt<bool>
152 PrintReordered("print-reordered",
153   cl::desc("print functions after layout optimization"),
154   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
155 
156 static cl::opt<bool>
157 PrintReorderedFunctions("print-reordered-functions",
158   cl::desc("print functions after clustering"),
159   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
160 
161 static cl::opt<bool> PrintRetpolineInsertion(
162     "print-retpoline-insertion",
163     cl::desc("print functions after retpoline insertion pass"),
164     cl::cat(BoltCategory));
165 
166 static cl::opt<bool>
167 PrintSCTC("print-sctc",
168   cl::desc("print functions after conditional tail call simplification"),
169   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
170 
171 static cl::opt<bool>
172 PrintSimplifyROLoads("print-simplify-rodata-loads",
173   cl::desc("print functions after simplification of RO data loads"),
174   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
175 
176 static cl::opt<bool>
177 PrintSplit("print-split",
178   cl::desc("print functions after code splitting"),
179   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
180 
181 static cl::opt<bool>
182     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
183                cl::cat(BoltOptCategory));
184 
185 static cl::opt<bool> PrintVeneerElimination(
186     "print-veneer-elimination",
187     cl::desc("print functions after veneer elimination pass"),
188     cl::cat(BoltOptCategory));
189 
190 static cl::opt<bool>
191 PrintUCE("print-uce",
192   cl::desc("print functions after unreachable code elimination"),
193   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
194 
195 static cl::opt<bool> RegReAssign(
196     "reg-reassign",
197     cl::desc(
198         "reassign registers so as to avoid using REX prefixes in hot code"),
199     cl::cat(BoltOptCategory));
200 
201 static cl::opt<bool> SimplifyConditionalTailCalls(
202     "simplify-conditional-tail-calls",
203     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
204     cl::init(true), cl::cat(BoltOptCategory));
205 
206 static cl::opt<bool>
207 SimplifyRODataLoads("simplify-rodata-loads",
208   cl::desc("simplify loads from read-only sections by replacing the memory "
209            "operand with the constant found in the corresponding section"),
210   cl::ZeroOrMore, cl::cat(BoltOptCategory));
211 
212 static cl::list<std::string>
213 SpecializeMemcpy1("memcpy1-spec",
214   cl::desc("list of functions with call sites for which to specialize memcpy() "
215            "for size 1"),
216   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
217   cl::ZeroOrMore, cl::cat(BoltOptCategory));
218 
219 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
220                            cl::cat(BoltOptCategory));
221 
222 static cl::opt<bool> StringOps(
223     "inline-memcpy",
224     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
225     cl::cat(BoltOptCategory));
226 
227 static cl::opt<bool> StripRepRet(
228     "strip-rep-ret",
229     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
230     cl::init(true), cl::cat(BoltOptCategory));
231 
232 static cl::opt<bool> VerifyCFG("verify-cfg",
233                                cl::desc("verify the CFG after every pass"),
234                                cl::Hidden, cl::cat(BoltOptCategory));
235 
236 static cl::opt<bool>
237 ThreeWayBranchFlag("three-way-branch",
238   cl::desc("reorder three way branches"),
239   cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory));
240 
241 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
242                                         cl::desc("fold jcc+mov into cmov"),
243                                         cl::ZeroOrMore, cl::ReallyHidden,
244                                         cl::cat(BoltOptCategory));
245 
246 } // namespace opts
247 
248 namespace llvm {
249 namespace bolt {
250 
251 using namespace opts;
252 
253 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
254 const char BinaryFunctionPassManager::TimerGroupDesc[] =
255     "Binary Function Pass Manager";
256 
257 void BinaryFunctionPassManager::runPasses() {
258   auto &BFs = BC.getBinaryFunctions();
259   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
260     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
261         &OptPassPair = Passes[PassIdx];
262     if (!OptPassPair.first)
263       continue;
264 
265     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
266     std::string PassIdName =
267         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
268 
269     if (opts::Verbosity > 0)
270       outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
271 
272     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
273                        TimerGroupDesc, TimeOpts);
274 
275     callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
276                       Pass->getName(), opts::DynoStatsAll);
277 
278     if (opts::VerifyCFG &&
279         !std::accumulate(
280             BFs.begin(), BFs.end(), true,
281             [](const bool Valid,
282                const std::pair<const uint64_t, BinaryFunction> &It) {
283               return Valid && It.second.validateCFG();
284             })) {
285       errs() << "BOLT-ERROR: Invalid CFG detected after pass "
286              << Pass->getName() << "\n";
287       exit(1);
288     }
289 
290     if (opts::Verbosity > 0)
291       outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
292 
293     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
294       continue;
295 
296     const std::string Message = std::string("after ") + Pass->getName();
297 
298     for (auto &It : BFs) {
299       BinaryFunction &Function = It.second;
300 
301       if (!Pass->shouldPrint(Function))
302         continue;
303 
304       Function.print(outs(), Message, true);
305 
306       if (opts::DumpDotAll)
307         Function.dumpGraphForPass(PassIdName);
308     }
309   }
310 }
311 
312 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
313   BinaryFunctionPassManager Manager(BC);
314 
315   const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions());
316 
317   Manager.registerPass(std::make_unique<AsmDumpPass>(),
318                        opts::AsmDump.getNumOccurrences());
319 
320   if (opts::Instrument)
321     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
322 
323   // Here we manage dependencies/order manually, since passes are run in the
324   // order they're registered.
325 
326   // Run this pass first to use stats for the original functions.
327   Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
328 
329   if (opts::PrintProfileStats)
330     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
331 
332   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
333 
334   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
335 
336   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
337 
338   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
339 
340   Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
341                        opts::StripRepRet);
342 
343   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
344                        opts::ICF);
345 
346   if (BC.isAArch64())
347     Manager.registerPass(
348         std::make_unique<VeneerElimination>(PrintVeneerElimination));
349 
350   Manager.registerPass(
351       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
352       !opts::SpecializeMemcpy1.empty());
353 
354   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
355                        opts::StringOps);
356 
357   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
358 
359   Manager.registerPass(
360       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
361       opts::JTFootprintReductionFlag);
362 
363   Manager.registerPass(
364       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
365       opts::SimplifyRODataLoads);
366 
367   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
368                        opts::RegReAssign);
369 
370   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
371 
372   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
373                        opts::ICF);
374 
375   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
376 
377   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
378                        opts::ThreeWayBranchFlag);
379 
380   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
381 
382   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
383                        opts::EliminateUnreachable);
384 
385   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
386 
387   Manager.registerPass(std::make_unique<LoopInversionPass>());
388 
389   Manager.registerPass(std::make_unique<TailDuplication>());
390 
391   Manager.registerPass(std::make_unique<CMOVConversion>(),
392                        opts::CMOVConversionFlag);
393 
394   // This pass syncs local branches with CFG. If any of the following
395   // passes breaks the sync - they either need to re-run the pass or
396   // fix branches consistency internally.
397   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
398 
399   // This pass should come close to last since it uses the estimated hot
400   // size of a function to determine the order.  It should definitely
401   // also happen after any changes to the call graph are made, e.g. inlining.
402   Manager.registerPass(
403       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
404 
405   // Print final dyno stats right while CFG and instruction analysis are intact.
406   Manager.registerPass(
407       std::make_unique<DynoStatsPrintPass>(
408           InitialDynoStats, "after all optimizations before SCTC and FOP"),
409       opts::PrintDynoStats || opts::DynoStatsAll);
410 
411   // Add the StokeInfo pass, which extract functions for stoke optimization and
412   // get the liveness information for them
413   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
414 
415   // This pass introduces conditional jumps into external functions.
416   // Between extending CFG to support this and isolating this pass we chose
417   // the latter. Thus this pass will do double jump removal and unreachable
418   // code elimination if necessary and won't rely on peepholes/UCE for these
419   // optimizations.
420   // More generally this pass should be the last optimization pass that
421   // modifies branches/control flow.  This pass is run after function
422   // reordering so that it can tell whether calls are forward/backward
423   // accurately.
424   Manager.registerPass(
425       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
426       opts::SimplifyConditionalTailCalls);
427 
428   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
429 
430   Manager.registerPass(std::make_unique<AlignerPass>());
431 
432   // Perform reordering on data contained in one or more sections using
433   // memory profiling data.
434   Manager.registerPass(std::make_unique<ReorderData>());
435 
436   if (BC.isAArch64()) {
437     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
438 
439     // Tighten branches according to offset differences between branch and
440     // targets. No extra instructions after this pass, otherwise we may have
441     // relocations out of range and crash during linking.
442     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
443   }
444 
445   // This pass should always run last.*
446   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
447 
448   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
449   // FrameOptimizer move values around and needs to update CFIs. To do this, it
450   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
451   // placed according to the final layout.
452   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
453 
454   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
455 
456   Manager.registerPass(
457       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
458 
459   // Assign each function an output section.
460   Manager.registerPass(std::make_unique<AssignSections>());
461 
462   // Patch original function entries
463   if (BC.HasRelocations)
464     Manager.registerPass(std::make_unique<PatchEntries>());
465 
466   // This pass turns tail calls into jumps which makes them invisible to
467   // function reordering. It's unsafe to use any CFG or instruction analysis
468   // after this point.
469   Manager.registerPass(
470       std::make_unique<InstructionLowering>(PrintAfterLowering));
471 
472   // In non-relocation mode, mark functions that do not fit into their original
473   // space as non-simple if we have to (e.g. for correct debug info update).
474   // NOTE: this pass depends on finalized code.
475   if (!BC.HasRelocations)
476     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
477 
478   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
479 
480   Manager.runPasses();
481 }
482 
483 } // namespace bolt
484 } // namespace llvm
485