1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FrameOptimizer.h"
16 #include "bolt/Passes/IdenticalCodeFolding.h"
17 #include "bolt/Passes/IndirectCallPromotion.h"
18 #include "bolt/Passes/Inliner.h"
19 #include "bolt/Passes/Instrumentation.h"
20 #include "bolt/Passes/JTFootprintReduction.h"
21 #include "bolt/Passes/LongJmp.h"
22 #include "bolt/Passes/LoopInversionPass.h"
23 #include "bolt/Passes/PLTCall.h"
24 #include "bolt/Passes/PatchEntries.h"
25 #include "bolt/Passes/RegReAssign.h"
26 #include "bolt/Passes/ReorderData.h"
27 #include "bolt/Passes/ReorderFunctions.h"
28 #include "bolt/Passes/RetpolineInsertion.h"
29 #include "bolt/Passes/SplitFunctions.h"
30 #include "bolt/Passes/StokeInfo.h"
31 #include "bolt/Passes/TailDuplication.h"
32 #include "bolt/Passes/ThreeWayBranch.h"
33 #include "bolt/Passes/ValidateInternalCalls.h"
34 #include "bolt/Passes/VeneerElimination.h"
35 #include "bolt/Utils/CommandLineOpts.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/Timer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <memory>
40 #include <numeric>
41 
42 using namespace llvm;
43 
44 namespace opts {
45 
46 extern cl::opt<bool> PrintAll;
47 extern cl::opt<bool> PrintDynoStats;
48 extern cl::opt<bool> DumpDotAll;
49 extern cl::opt<std::string> AsmDump;
50 extern cl::opt<bolt::PLTCall::OptType> PLT;
51 
52 static cl::opt<bool>
53 DynoStatsAll("dyno-stats-all",
54   cl::desc("print dyno stats after each stage"),
55   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58     EliminateUnreachable("eliminate-unreachable",
59                          cl::desc("eliminate unreachable code"), cl::init(true),
60                          cl::cat(BoltOptCategory));
61 
62 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
63                   cl::cat(BoltOptCategory));
64 
65 static cl::opt<bool> JTFootprintReductionFlag(
66     "jt-footprint-reduction",
67     cl::desc("make jump tables size smaller at the cost of using more "
68              "instructions at jump sites"),
69     cl::cat(BoltOptCategory));
70 
71 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
72                          cl::ReallyHidden, cl::cat(BoltOptCategory));
73 
74 cl::opt<bool>
75 PrintAfterBranchFixup("print-after-branch-fixup",
76   cl::desc("print function after fixing local branches"),
77   cl::Hidden, cl::cat(BoltOptCategory));
78 
79 static cl::opt<bool>
80 PrintAfterLowering("print-after-lowering",
81   cl::desc("print function after instruction lowering"),
82   cl::Hidden, cl::cat(BoltOptCategory));
83 
84 cl::opt<bool>
85 PrintFinalized("print-finalized",
86   cl::desc("print function after CFG is finalized"),
87   cl::Hidden, cl::cat(BoltOptCategory));
88 
89 static cl::opt<bool>
90     PrintFOP("print-fop",
91              cl::desc("print functions after frame optimizer pass"), cl::Hidden,
92              cl::cat(BoltOptCategory));
93 
94 static cl::opt<bool>
95     PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
96              cl::Hidden, cl::cat(BoltOptCategory));
97 
98 static cl::opt<bool>
99     PrintICP("print-icp",
100              cl::desc("print functions after indirect call promotion"),
101              cl::Hidden, cl::cat(BoltOptCategory));
102 
103 static cl::opt<bool>
104     PrintInline("print-inline",
105                 cl::desc("print functions after inlining optimization"),
106                 cl::Hidden, cl::cat(BoltOptCategory));
107 
108 static cl::opt<bool> PrintJTFootprintReduction(
109     "print-after-jt-footprint-reduction",
110     cl::desc("print function after jt-footprint-reduction pass"),
111     cl::cat(BoltOptCategory));
112 
113 static cl::opt<bool>
114     PrintLongJmp("print-longjmp",
115                  cl::desc("print functions after longjmp pass"), cl::Hidden,
116                  cl::cat(BoltOptCategory));
117 
118 cl::opt<bool>
119     PrintNormalized("print-normalized",
120                     cl::desc("print functions after CFG is normalized"),
121                     cl::Hidden, cl::cat(BoltCategory));
122 
123 static cl::opt<bool> PrintOptimizeBodyless(
124     "print-optimize-bodyless",
125     cl::desc("print functions after bodyless optimization"), cl::Hidden,
126     cl::cat(BoltOptCategory));
127 
128 static cl::opt<bool>
129     PrintPeepholes("print-peepholes",
130                    cl::desc("print functions after peephole optimization"),
131                    cl::Hidden, cl::cat(BoltOptCategory));
132 
133 static cl::opt<bool>
134     PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
135              cl::Hidden, cl::cat(BoltOptCategory));
136 
137 static cl::opt<bool>
138     PrintProfileStats("print-profile-stats",
139                       cl::desc("print profile quality/bias analysis"),
140                       cl::cat(BoltCategory));
141 
142 static cl::opt<bool>
143     PrintRegReAssign("print-regreassign",
144                      cl::desc("print functions after regreassign pass"),
145                      cl::Hidden, cl::cat(BoltOptCategory));
146 
147 cl::opt<bool>
148     PrintReordered("print-reordered",
149                    cl::desc("print functions after layout optimization"),
150                    cl::Hidden, cl::cat(BoltOptCategory));
151 
152 static cl::opt<bool>
153     PrintReorderedFunctions("print-reordered-functions",
154                             cl::desc("print functions after clustering"),
155                             cl::Hidden, cl::cat(BoltOptCategory));
156 
157 static cl::opt<bool> PrintRetpolineInsertion(
158     "print-retpoline-insertion",
159     cl::desc("print functions after retpoline insertion pass"),
160     cl::cat(BoltCategory));
161 
162 static cl::opt<bool> PrintSCTC(
163     "print-sctc",
164     cl::desc("print functions after conditional tail call simplification"),
165     cl::Hidden, cl::cat(BoltOptCategory));
166 
167 static cl::opt<bool> PrintSimplifyROLoads(
168     "print-simplify-rodata-loads",
169     cl::desc("print functions after simplification of RO data loads"),
170     cl::Hidden, cl::cat(BoltOptCategory));
171 
172 static cl::opt<bool>
173     PrintSplit("print-split", cl::desc("print functions after code splitting"),
174                cl::Hidden, cl::cat(BoltOptCategory));
175 
176 static cl::opt<bool>
177     PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
178                cl::cat(BoltOptCategory));
179 
180 static cl::opt<bool> PrintVeneerElimination(
181     "print-veneer-elimination",
182     cl::desc("print functions after veneer elimination pass"),
183     cl::cat(BoltOptCategory));
184 
185 static cl::opt<bool>
186     PrintUCE("print-uce",
187              cl::desc("print functions after unreachable code elimination"),
188              cl::Hidden, cl::cat(BoltOptCategory));
189 
190 static cl::opt<bool> RegReAssign(
191     "reg-reassign",
192     cl::desc(
193         "reassign registers so as to avoid using REX prefixes in hot code"),
194     cl::cat(BoltOptCategory));
195 
196 static cl::opt<bool> SimplifyConditionalTailCalls(
197     "simplify-conditional-tail-calls",
198     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
199     cl::init(true), cl::cat(BoltOptCategory));
200 
201 static cl::opt<bool> SimplifyRODataLoads(
202     "simplify-rodata-loads",
203     cl::desc("simplify loads from read-only sections by replacing the memory "
204              "operand with the constant found in the corresponding section"),
205     cl::cat(BoltOptCategory));
206 
207 static cl::list<std::string>
208 SpecializeMemcpy1("memcpy1-spec",
209   cl::desc("list of functions with call sites for which to specialize memcpy() "
210            "for size 1"),
211   cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
212   cl::ZeroOrMore, cl::cat(BoltOptCategory));
213 
214 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
215                            cl::cat(BoltOptCategory));
216 
217 static cl::opt<bool> StringOps(
218     "inline-memcpy",
219     cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
220     cl::cat(BoltOptCategory));
221 
222 static cl::opt<bool> StripRepRet(
223     "strip-rep-ret",
224     cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
225     cl::init(true), cl::cat(BoltOptCategory));
226 
227 static cl::opt<bool> VerifyCFG("verify-cfg",
228                                cl::desc("verify the CFG after every pass"),
229                                cl::Hidden, cl::cat(BoltOptCategory));
230 
231 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
232                                         cl::desc("reorder three way branches"),
233                                         cl::ReallyHidden,
234                                         cl::cat(BoltOptCategory));
235 
236 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
237                                         cl::desc("fold jcc+mov into cmov"),
238                                         cl::ReallyHidden,
239                                         cl::cat(BoltOptCategory));
240 
241 } // namespace opts
242 
243 namespace llvm {
244 namespace bolt {
245 
246 using namespace opts;
247 
248 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
249 const char BinaryFunctionPassManager::TimerGroupDesc[] =
250     "Binary Function Pass Manager";
251 
252 void BinaryFunctionPassManager::runPasses() {
253   auto &BFs = BC.getBinaryFunctions();
254   for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
255     const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
256         &OptPassPair = Passes[PassIdx];
257     if (!OptPassPair.first)
258       continue;
259 
260     const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
261     std::string PassIdName =
262         formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
263 
264     if (opts::Verbosity > 0)
265       outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
266 
267     NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
268                        TimerGroupDesc, TimeOpts);
269 
270     callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
271                       Pass->getName(), opts::DynoStatsAll);
272 
273     if (opts::VerifyCFG &&
274         !std::accumulate(
275             BFs.begin(), BFs.end(), true,
276             [](const bool Valid,
277                const std::pair<const uint64_t, BinaryFunction> &It) {
278               return Valid && It.second.validateCFG();
279             })) {
280       errs() << "BOLT-ERROR: Invalid CFG detected after pass "
281              << Pass->getName() << "\n";
282       exit(1);
283     }
284 
285     if (opts::Verbosity > 0)
286       outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
287 
288     if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
289       continue;
290 
291     const std::string Message = std::string("after ") + Pass->getName();
292 
293     for (auto &It : BFs) {
294       BinaryFunction &Function = It.second;
295 
296       if (!Pass->shouldPrint(Function))
297         continue;
298 
299       Function.print(outs(), Message, true);
300 
301       if (opts::DumpDotAll)
302         Function.dumpGraphForPass(PassIdName);
303     }
304   }
305 }
306 
307 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
308   BinaryFunctionPassManager Manager(BC);
309 
310   const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions());
311 
312   Manager.registerPass(std::make_unique<AsmDumpPass>(),
313                        opts::AsmDump.getNumOccurrences());
314 
315   if (opts::Instrument)
316     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
317 
318   // Here we manage dependencies/order manually, since passes are run in the
319   // order they're registered.
320 
321   // Run this pass first to use stats for the original functions.
322   Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
323 
324   if (opts::PrintProfileStats)
325     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
326 
327   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
328 
329   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
330 
331   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
332 
333   Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
334 
335   Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
336                        opts::StripRepRet);
337 
338   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
339                        opts::ICF);
340 
341   if (BC.isAArch64())
342     Manager.registerPass(
343         std::make_unique<VeneerElimination>(PrintVeneerElimination));
344 
345   Manager.registerPass(
346       std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
347       !opts::SpecializeMemcpy1.empty());
348 
349   Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
350                        opts::StringOps);
351 
352   Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
353 
354   Manager.registerPass(
355       std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
356       opts::JTFootprintReductionFlag);
357 
358   Manager.registerPass(
359       std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
360       opts::SimplifyRODataLoads);
361 
362   Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
363                        opts::RegReAssign);
364 
365   Manager.registerPass(std::make_unique<Inliner>(PrintInline));
366 
367   Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
368                        opts::ICF);
369 
370   Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
371 
372   Manager.registerPass(std::make_unique<ThreeWayBranch>(),
373                        opts::ThreeWayBranchFlag);
374 
375   Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
376 
377   Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
378                        opts::EliminateUnreachable);
379 
380   Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
381 
382   Manager.registerPass(std::make_unique<LoopInversionPass>());
383 
384   Manager.registerPass(std::make_unique<TailDuplication>());
385 
386   Manager.registerPass(std::make_unique<CMOVConversion>(),
387                        opts::CMOVConversionFlag);
388 
389   // This pass syncs local branches with CFG. If any of the following
390   // passes breaks the sync - they either need to re-run the pass or
391   // fix branches consistency internally.
392   Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
393 
394   // This pass should come close to last since it uses the estimated hot
395   // size of a function to determine the order.  It should definitely
396   // also happen after any changes to the call graph are made, e.g. inlining.
397   Manager.registerPass(
398       std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
399 
400   // Print final dyno stats right while CFG and instruction analysis are intact.
401   Manager.registerPass(
402       std::make_unique<DynoStatsPrintPass>(
403           InitialDynoStats, "after all optimizations before SCTC and FOP"),
404       opts::PrintDynoStats || opts::DynoStatsAll);
405 
406   // Add the StokeInfo pass, which extract functions for stoke optimization and
407   // get the liveness information for them
408   Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
409 
410   // This pass introduces conditional jumps into external functions.
411   // Between extending CFG to support this and isolating this pass we chose
412   // the latter. Thus this pass will do double jump removal and unreachable
413   // code elimination if necessary and won't rely on peepholes/UCE for these
414   // optimizations.
415   // More generally this pass should be the last optimization pass that
416   // modifies branches/control flow.  This pass is run after function
417   // reordering so that it can tell whether calls are forward/backward
418   // accurately.
419   Manager.registerPass(
420       std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
421       opts::SimplifyConditionalTailCalls);
422 
423   Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
424 
425   Manager.registerPass(std::make_unique<AlignerPass>());
426 
427   // Perform reordering on data contained in one or more sections using
428   // memory profiling data.
429   Manager.registerPass(std::make_unique<ReorderData>());
430 
431   if (BC.isAArch64()) {
432     Manager.registerPass(std::make_unique<ADRRelaxationPass>());
433 
434     // Tighten branches according to offset differences between branch and
435     // targets. No extra instructions after this pass, otherwise we may have
436     // relocations out of range and crash during linking.
437     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
438   }
439 
440   // This pass should always run last.*
441   Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
442 
443   // FrameOptimizer has an implicit dependency on FinalizeFunctions.
444   // FrameOptimizer move values around and needs to update CFIs. To do this, it
445   // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
446   // placed according to the final layout.
447   Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
448 
449   Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
450 
451   Manager.registerPass(
452       std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
453 
454   // Assign each function an output section.
455   Manager.registerPass(std::make_unique<AssignSections>());
456 
457   // Patch original function entries
458   if (BC.HasRelocations)
459     Manager.registerPass(std::make_unique<PatchEntries>());
460 
461   // This pass turns tail calls into jumps which makes them invisible to
462   // function reordering. It's unsafe to use any CFG or instruction analysis
463   // after this point.
464   Manager.registerPass(
465       std::make_unique<InstructionLowering>(PrintAfterLowering));
466 
467   // In non-relocation mode, mark functions that do not fit into their original
468   // space as non-simple if we have to (e.g. for correct debug info update).
469   // NOTE: this pass depends on finalized code.
470   if (!BC.HasRelocations)
471     Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
472 
473   Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
474 
475   Manager.runPasses();
476 }
477 
478 } // namespace bolt
479 } // namespace llvm
480