1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "bolt/Rewrite/BinaryPassManager.h"
10 #include "bolt/Passes/ADRRelaxationPass.h"
11 #include "bolt/Passes/Aligner.h"
12 #include "bolt/Passes/AllocCombiner.h"
13 #include "bolt/Passes/AsmDump.h"
14 #include "bolt/Passes/CMOVConversion.h"
15 #include "bolt/Passes/FrameOptimizer.h"
16 #include "bolt/Passes/IdenticalCodeFolding.h"
17 #include "bolt/Passes/IndirectCallPromotion.h"
18 #include "bolt/Passes/Inliner.h"
19 #include "bolt/Passes/Instrumentation.h"
20 #include "bolt/Passes/JTFootprintReduction.h"
21 #include "bolt/Passes/LongJmp.h"
22 #include "bolt/Passes/LoopInversionPass.h"
23 #include "bolt/Passes/PLTCall.h"
24 #include "bolt/Passes/PatchEntries.h"
25 #include "bolt/Passes/RegReAssign.h"
26 #include "bolt/Passes/ReorderData.h"
27 #include "bolt/Passes/ReorderFunctions.h"
28 #include "bolt/Passes/RetpolineInsertion.h"
29 #include "bolt/Passes/SplitFunctions.h"
30 #include "bolt/Passes/StokeInfo.h"
31 #include "bolt/Passes/TailDuplication.h"
32 #include "bolt/Passes/ThreeWayBranch.h"
33 #include "bolt/Passes/ValidateInternalCalls.h"
34 #include "bolt/Passes/VeneerElimination.h"
35 #include "bolt/Utils/CommandLineOpts.h"
36 #include "llvm/Support/FormatVariadic.h"
37 #include "llvm/Support/Timer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <memory>
40 #include <numeric>
41
42 using namespace llvm;
43
44 namespace opts {
45
46 extern cl::opt<bool> PrintAll;
47 extern cl::opt<bool> PrintDynoStats;
48 extern cl::opt<bool> DumpDotAll;
49 extern cl::opt<std::string> AsmDump;
50 extern cl::opt<bolt::PLTCall::OptType> PLT;
51
52 static cl::opt<bool>
53 DynoStatsAll("dyno-stats-all",
54 cl::desc("print dyno stats after each stage"),
55 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
56
57 static cl::opt<bool>
58 EliminateUnreachable("eliminate-unreachable",
59 cl::desc("eliminate unreachable code"), cl::init(true),
60 cl::cat(BoltOptCategory));
61
62 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
63 cl::cat(BoltOptCategory));
64
65 static cl::opt<bool> JTFootprintReductionFlag(
66 "jt-footprint-reduction",
67 cl::desc("make jump tables size smaller at the cost of using more "
68 "instructions at jump sites"),
69 cl::cat(BoltOptCategory));
70
71 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
72 cl::ReallyHidden, cl::cat(BoltOptCategory));
73
74 cl::opt<bool>
75 PrintAfterBranchFixup("print-after-branch-fixup",
76 cl::desc("print function after fixing local branches"),
77 cl::Hidden, cl::cat(BoltOptCategory));
78
79 static cl::opt<bool>
80 PrintAfterLowering("print-after-lowering",
81 cl::desc("print function after instruction lowering"),
82 cl::Hidden, cl::cat(BoltOptCategory));
83
84 cl::opt<bool>
85 PrintFinalized("print-finalized",
86 cl::desc("print function after CFG is finalized"),
87 cl::Hidden, cl::cat(BoltOptCategory));
88
89 static cl::opt<bool>
90 PrintFOP("print-fop",
91 cl::desc("print functions after frame optimizer pass"), cl::Hidden,
92 cl::cat(BoltOptCategory));
93
94 static cl::opt<bool>
95 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
96 cl::Hidden, cl::cat(BoltOptCategory));
97
98 static cl::opt<bool>
99 PrintICP("print-icp",
100 cl::desc("print functions after indirect call promotion"),
101 cl::Hidden, cl::cat(BoltOptCategory));
102
103 static cl::opt<bool>
104 PrintInline("print-inline",
105 cl::desc("print functions after inlining optimization"),
106 cl::Hidden, cl::cat(BoltOptCategory));
107
108 static cl::opt<bool> PrintJTFootprintReduction(
109 "print-after-jt-footprint-reduction",
110 cl::desc("print function after jt-footprint-reduction pass"),
111 cl::cat(BoltOptCategory));
112
113 static cl::opt<bool>
114 PrintLongJmp("print-longjmp",
115 cl::desc("print functions after longjmp pass"), cl::Hidden,
116 cl::cat(BoltOptCategory));
117
118 cl::opt<bool>
119 PrintNormalized("print-normalized",
120 cl::desc("print functions after CFG is normalized"),
121 cl::Hidden, cl::cat(BoltCategory));
122
123 static cl::opt<bool> PrintOptimizeBodyless(
124 "print-optimize-bodyless",
125 cl::desc("print functions after bodyless optimization"), cl::Hidden,
126 cl::cat(BoltOptCategory));
127
128 static cl::opt<bool>
129 PrintPeepholes("print-peepholes",
130 cl::desc("print functions after peephole optimization"),
131 cl::Hidden, cl::cat(BoltOptCategory));
132
133 static cl::opt<bool>
134 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
135 cl::Hidden, cl::cat(BoltOptCategory));
136
137 static cl::opt<bool>
138 PrintProfileStats("print-profile-stats",
139 cl::desc("print profile quality/bias analysis"),
140 cl::cat(BoltCategory));
141
142 static cl::opt<bool>
143 PrintRegReAssign("print-regreassign",
144 cl::desc("print functions after regreassign pass"),
145 cl::Hidden, cl::cat(BoltOptCategory));
146
147 cl::opt<bool>
148 PrintReordered("print-reordered",
149 cl::desc("print functions after layout optimization"),
150 cl::Hidden, cl::cat(BoltOptCategory));
151
152 static cl::opt<bool>
153 PrintReorderedFunctions("print-reordered-functions",
154 cl::desc("print functions after clustering"),
155 cl::Hidden, cl::cat(BoltOptCategory));
156
157 static cl::opt<bool> PrintRetpolineInsertion(
158 "print-retpoline-insertion",
159 cl::desc("print functions after retpoline insertion pass"),
160 cl::cat(BoltCategory));
161
162 static cl::opt<bool> PrintSCTC(
163 "print-sctc",
164 cl::desc("print functions after conditional tail call simplification"),
165 cl::Hidden, cl::cat(BoltOptCategory));
166
167 static cl::opt<bool> PrintSimplifyROLoads(
168 "print-simplify-rodata-loads",
169 cl::desc("print functions after simplification of RO data loads"),
170 cl::Hidden, cl::cat(BoltOptCategory));
171
172 static cl::opt<bool>
173 PrintSplit("print-split", cl::desc("print functions after code splitting"),
174 cl::Hidden, cl::cat(BoltOptCategory));
175
176 static cl::opt<bool>
177 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
178 cl::cat(BoltOptCategory));
179
180 static cl::opt<bool> PrintVeneerElimination(
181 "print-veneer-elimination",
182 cl::desc("print functions after veneer elimination pass"),
183 cl::cat(BoltOptCategory));
184
185 static cl::opt<bool>
186 PrintUCE("print-uce",
187 cl::desc("print functions after unreachable code elimination"),
188 cl::Hidden, cl::cat(BoltOptCategory));
189
190 static cl::opt<bool> RegReAssign(
191 "reg-reassign",
192 cl::desc(
193 "reassign registers so as to avoid using REX prefixes in hot code"),
194 cl::cat(BoltOptCategory));
195
196 static cl::opt<bool> SimplifyConditionalTailCalls(
197 "simplify-conditional-tail-calls",
198 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
199 cl::init(true), cl::cat(BoltOptCategory));
200
201 static cl::opt<bool> SimplifyRODataLoads(
202 "simplify-rodata-loads",
203 cl::desc("simplify loads from read-only sections by replacing the memory "
204 "operand with the constant found in the corresponding section"),
205 cl::cat(BoltOptCategory));
206
207 static cl::list<std::string>
208 SpecializeMemcpy1("memcpy1-spec",
209 cl::desc("list of functions with call sites for which to specialize memcpy() "
210 "for size 1"),
211 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
212 cl::ZeroOrMore, cl::cat(BoltOptCategory));
213
214 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
215 cl::cat(BoltOptCategory));
216
217 static cl::opt<bool> StringOps(
218 "inline-memcpy",
219 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
220 cl::cat(BoltOptCategory));
221
222 static cl::opt<bool> StripRepRet(
223 "strip-rep-ret",
224 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
225 cl::init(true), cl::cat(BoltOptCategory));
226
227 static cl::opt<bool> VerifyCFG("verify-cfg",
228 cl::desc("verify the CFG after every pass"),
229 cl::Hidden, cl::cat(BoltOptCategory));
230
231 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
232 cl::desc("reorder three way branches"),
233 cl::ReallyHidden,
234 cl::cat(BoltOptCategory));
235
236 static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
237 cl::desc("fold jcc+mov into cmov"),
238 cl::ReallyHidden,
239 cl::cat(BoltOptCategory));
240
241 } // namespace opts
242
243 namespace llvm {
244 namespace bolt {
245
246 using namespace opts;
247
248 const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
249 const char BinaryFunctionPassManager::TimerGroupDesc[] =
250 "Binary Function Pass Manager";
251
runPasses()252 void BinaryFunctionPassManager::runPasses() {
253 auto &BFs = BC.getBinaryFunctions();
254 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
255 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
256 &OptPassPair = Passes[PassIdx];
257 if (!OptPassPair.first)
258 continue;
259
260 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
261 std::string PassIdName =
262 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
263
264 if (opts::Verbosity > 0)
265 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
266
267 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
268 TimerGroupDesc, TimeOpts);
269
270 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
271 Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
272
273 if (opts::VerifyCFG &&
274 !std::accumulate(
275 BFs.begin(), BFs.end(), true,
276 [](const bool Valid,
277 const std::pair<const uint64_t, BinaryFunction> &It) {
278 return Valid && It.second.validateCFG();
279 })) {
280 errs() << "BOLT-ERROR: Invalid CFG detected after pass "
281 << Pass->getName() << "\n";
282 exit(1);
283 }
284
285 if (opts::Verbosity > 0)
286 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
287
288 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
289 continue;
290
291 const std::string Message = std::string("after ") + Pass->getName();
292
293 for (auto &It : BFs) {
294 BinaryFunction &Function = It.second;
295
296 if (!Pass->shouldPrint(Function))
297 continue;
298
299 Function.print(outs(), Message, true);
300
301 if (opts::DumpDotAll)
302 Function.dumpGraphForPass(PassIdName);
303 }
304 }
305 }
306
runAllPasses(BinaryContext & BC)307 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
308 BinaryFunctionPassManager Manager(BC);
309
310 const DynoStats InitialDynoStats =
311 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
312
313 Manager.registerPass(std::make_unique<AsmDumpPass>(),
314 opts::AsmDump.getNumOccurrences());
315
316 if (BC.isAArch64())
317 Manager.registerPass(
318 std::make_unique<VeneerElimination>(PrintVeneerElimination));
319
320 if (opts::Instrument)
321 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
322
323 // Here we manage dependencies/order manually, since passes are run in the
324 // order they're registered.
325
326 // Run this pass first to use stats for the original functions.
327 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
328
329 if (opts::PrintProfileStats)
330 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
331
332 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
333
334 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
335
336 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
337
338 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
339
340 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
341 opts::StripRepRet);
342
343 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
344 opts::ICF);
345
346 Manager.registerPass(
347 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
348 !opts::SpecializeMemcpy1.empty());
349
350 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
351 opts::StringOps);
352
353 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
354
355 Manager.registerPass(
356 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
357 opts::JTFootprintReductionFlag);
358
359 Manager.registerPass(
360 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
361 opts::SimplifyRODataLoads);
362
363 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
364 opts::RegReAssign);
365
366 Manager.registerPass(std::make_unique<Inliner>(PrintInline));
367
368 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
369 opts::ICF);
370
371 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
372
373 Manager.registerPass(std::make_unique<ThreeWayBranch>(),
374 opts::ThreeWayBranchFlag);
375
376 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
377
378 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
379 opts::EliminateUnreachable);
380
381 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
382
383 Manager.registerPass(std::make_unique<LoopInversionPass>());
384
385 Manager.registerPass(std::make_unique<TailDuplication>());
386
387 Manager.registerPass(std::make_unique<CMOVConversion>(),
388 opts::CMOVConversionFlag);
389
390 // This pass syncs local branches with CFG. If any of the following
391 // passes breaks the sync - they either need to re-run the pass or
392 // fix branches consistency internally.
393 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
394
395 // This pass should come close to last since it uses the estimated hot
396 // size of a function to determine the order. It should definitely
397 // also happen after any changes to the call graph are made, e.g. inlining.
398 Manager.registerPass(
399 std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
400
401 // Print final dyno stats right while CFG and instruction analysis are intact.
402 Manager.registerPass(
403 std::make_unique<DynoStatsPrintPass>(
404 InitialDynoStats, "after all optimizations before SCTC and FOP"),
405 opts::PrintDynoStats || opts::DynoStatsAll);
406
407 // Add the StokeInfo pass, which extract functions for stoke optimization and
408 // get the liveness information for them
409 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
410
411 // This pass introduces conditional jumps into external functions.
412 // Between extending CFG to support this and isolating this pass we chose
413 // the latter. Thus this pass will do double jump removal and unreachable
414 // code elimination if necessary and won't rely on peepholes/UCE for these
415 // optimizations.
416 // More generally this pass should be the last optimization pass that
417 // modifies branches/control flow. This pass is run after function
418 // reordering so that it can tell whether calls are forward/backward
419 // accurately.
420 Manager.registerPass(
421 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
422 opts::SimplifyConditionalTailCalls);
423
424 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
425
426 Manager.registerPass(std::make_unique<AlignerPass>());
427
428 // Perform reordering on data contained in one or more sections using
429 // memory profiling data.
430 Manager.registerPass(std::make_unique<ReorderData>());
431
432 if (BC.isAArch64()) {
433 Manager.registerPass(std::make_unique<ADRRelaxationPass>());
434
435 // Tighten branches according to offset differences between branch and
436 // targets. No extra instructions after this pass, otherwise we may have
437 // relocations out of range and crash during linking.
438 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
439 }
440
441 // This pass should always run last.*
442 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
443
444 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
445 // FrameOptimizer move values around and needs to update CFIs. To do this, it
446 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
447 // placed according to the final layout.
448 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
449
450 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
451
452 Manager.registerPass(
453 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
454
455 // Assign each function an output section.
456 Manager.registerPass(std::make_unique<AssignSections>());
457
458 // Patch original function entries
459 if (BC.HasRelocations)
460 Manager.registerPass(std::make_unique<PatchEntries>());
461
462 // This pass turns tail calls into jumps which makes them invisible to
463 // function reordering. It's unsafe to use any CFG or instruction analysis
464 // after this point.
465 Manager.registerPass(
466 std::make_unique<InstructionLowering>(PrintAfterLowering));
467
468 // In non-relocation mode, mark functions that do not fit into their original
469 // space as non-simple if we have to (e.g. for correct debug info update).
470 // NOTE: this pass depends on finalized code.
471 if (!BC.HasRelocations)
472 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
473
474 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
475
476 Manager.runPasses();
477 }
478
479 } // namespace bolt
480 } // namespace llvm
481