1 //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PassManagerBuilder class, which is used to set up a
11 // "standard" optimization sequence suitable for languages like C and C++.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
16 #include "llvm-c/Transforms/PassManagerBuilder.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
20 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/InlineCost.h"
23 #include "llvm/Analysis/Passes.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/LegacyPassManager.h"
29 #include "llvm/IR/ModuleSummaryIndex.h"
30 #include "llvm/IR/Verifier.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/ManagedStatic.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include "llvm/Transforms/IPO.h"
35 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
36 #include "llvm/Transforms/IPO/FunctionAttrs.h"
37 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
38 #include "llvm/Transforms/Instrumentation.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "llvm/Transforms/Scalar/GVN.h"
41 #include "llvm/Transforms/Vectorize.h"
42 
43 using namespace llvm;
44 
45 static cl::opt<bool>
46 RunLoopVectorization("vectorize-loops", cl::Hidden,
47                      cl::desc("Run the Loop vectorization passes"));
48 
49 static cl::opt<bool>
50 RunSLPVectorization("vectorize-slp", cl::Hidden,
51                     cl::desc("Run the SLP vectorization passes"));
52 
53 static cl::opt<bool>
54 RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
55                     cl::desc("Run the BB vectorization passes"));
56 
57 static cl::opt<bool>
58 UseGVNAfterVectorization("use-gvn-after-vectorization",
59   cl::init(false), cl::Hidden,
60   cl::desc("Run GVN instead of Early CSE after vectorization passes"));
61 
62 static cl::opt<bool> ExtraVectorizerPasses(
63     "extra-vectorizer-passes", cl::init(false), cl::Hidden,
64     cl::desc("Run cleanup optimization passes after vectorization."));
65 
66 static cl::opt<bool>
67 RunLoopRerolling("reroll-loops", cl::Hidden,
68                  cl::desc("Run the loop rerolling pass"));
69 
70 static cl::opt<bool>
71 RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
72              cl::desc("Run the float2int (float demotion) pass"));
73 
74 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
75                                     cl::Hidden,
76                                     cl::desc("Run the load combining pass"));
77 
78 static cl::opt<bool>
79 RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
80   cl::init(true), cl::Hidden,
81   cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
82            "vectorizer instead of before"));
83 
84 // Experimental option to use CFL-AA
85 enum class CFLAAType { None, Steensgaard, Andersen, Both };
86 static cl::opt<CFLAAType>
87     UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden,
88              cl::desc("Enable the new, experimental CFL alias analysis"),
89              cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
90                         clEnumValN(CFLAAType::Steensgaard, "steens",
91                                    "Enable unification-based CFL-AA"),
92                         clEnumValN(CFLAAType::Andersen, "anders",
93                                    "Enable inclusion-based CFL-AA"),
94                         clEnumValN(CFLAAType::Both, "both",
95                                    "Enable both variants of CFL-AA"),
96                         clEnumValEnd));
97 
98 static cl::opt<bool>
99 EnableMLSM("mlsm", cl::init(true), cl::Hidden,
100            cl::desc("Enable motion of merged load and store"));
101 
102 static cl::opt<bool> EnableLoopInterchange(
103     "enable-loopinterchange", cl::init(false), cl::Hidden,
104     cl::desc("Enable the new, experimental LoopInterchange Pass"));
105 
106 static cl::opt<bool> EnableNonLTOGlobalsModRef(
107     "enable-non-lto-gmr", cl::init(true), cl::Hidden,
108     cl::desc(
109         "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
110 
111 static cl::opt<bool> EnableLoopLoadElim(
112     "enable-loop-load-elim", cl::init(true), cl::Hidden,
113     cl::desc("Enable the LoopLoadElimination Pass"));
114 
115 static cl::opt<bool> RunPGOInstrGen(
116     "profile-generate", cl::init(false), cl::Hidden,
117     cl::desc("Enable PGO instrumentation."));
118 
119 static cl::opt<std::string>
120     PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
121                       cl::desc("Specify the path of profile data file."));
122 
123 static cl::opt<std::string> RunPGOInstrUse(
124     "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
125     cl::desc("Enable use phase of PGO instrumentation and specify the path "
126              "of profile data file"));
127 
128 static cl::opt<bool> UseLoopVersioningLICM(
129     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
130     cl::desc("Enable the experimental Loop Versioning LICM pass"));
131 
132 static cl::opt<bool>
133     DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
134                       cl::desc("Disable pre-instrumentation inliner"));
135 
136 static cl::opt<int> PreInlineThreshold(
137     "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
138     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
139              "(default = 75)"));
140 
141 static cl::opt<bool> EnableGVNHoist(
142     "enable-gvn-hoist", cl::init(true), cl::Hidden,
143     cl::desc("Enable the GVN hoisting pass (default = on)"));
144 
145 PassManagerBuilder::PassManagerBuilder() {
146     OptLevel = 2;
147     SizeLevel = 0;
148     LibraryInfo = nullptr;
149     Inliner = nullptr;
150     ModuleSummary = nullptr;
151     DisableUnitAtATime = false;
152     DisableUnrollLoops = false;
153     BBVectorize = RunBBVectorization;
154     SLPVectorize = RunSLPVectorization;
155     LoopVectorize = RunLoopVectorization;
156     RerollLoops = RunLoopRerolling;
157     LoadCombine = RunLoadCombine;
158     DisableGVNLoadPRE = false;
159     VerifyInput = false;
160     VerifyOutput = false;
161     MergeFunctions = false;
162     PrepareForLTO = false;
163     EnablePGOInstrGen = RunPGOInstrGen;
164     PGOInstrGen = PGOOutputFile;
165     PGOInstrUse = RunPGOInstrUse;
166     PrepareForThinLTO = false;
167     PerformThinLTO = false;
168 }
169 
170 PassManagerBuilder::~PassManagerBuilder() {
171   delete LibraryInfo;
172   delete Inliner;
173 }
174 
175 /// Set of global extensions, automatically added as part of the standard set.
176 static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
177    PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
178 
179 void PassManagerBuilder::addGlobalExtension(
180     PassManagerBuilder::ExtensionPointTy Ty,
181     PassManagerBuilder::ExtensionFn Fn) {
182   GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
183 }
184 
185 void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
186   Extensions.push_back(std::make_pair(Ty, std::move(Fn)));
187 }
188 
189 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
190                                            legacy::PassManagerBase &PM) const {
191   for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
192     if ((*GlobalExtensions)[i].first == ETy)
193       (*GlobalExtensions)[i].second(*this, PM);
194   for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
195     if (Extensions[i].first == ETy)
196       Extensions[i].second(*this, PM);
197 }
198 
199 void PassManagerBuilder::addInitialAliasAnalysisPasses(
200     legacy::PassManagerBase &PM) const {
201   switch (UseCFLAA) {
202   case CFLAAType::Steensgaard:
203     PM.add(createCFLSteensAAWrapperPass());
204     break;
205   case CFLAAType::Andersen:
206     PM.add(createCFLAndersAAWrapperPass());
207     break;
208   case CFLAAType::Both:
209     PM.add(createCFLSteensAAWrapperPass());
210     PM.add(createCFLAndersAAWrapperPass());
211     break;
212   default:
213     break;
214   }
215 
216   // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
217   // BasicAliasAnalysis wins if they disagree. This is intended to help
218   // support "obvious" type-punning idioms.
219   PM.add(createTypeBasedAAWrapperPass());
220   PM.add(createScopedNoAliasAAWrapperPass());
221 }
222 
223 void PassManagerBuilder::addInstructionCombiningPass(
224     legacy::PassManagerBase &PM) const {
225   bool ExpensiveCombines = OptLevel > 2;
226   PM.add(createInstructionCombiningPass(ExpensiveCombines));
227 }
228 
229 void PassManagerBuilder::populateFunctionPassManager(
230     legacy::FunctionPassManager &FPM) {
231   addExtensionsToPM(EP_EarlyAsPossible, FPM);
232 
233   // Add LibraryInfo if we have some.
234   if (LibraryInfo)
235     FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
236 
237   if (OptLevel == 0) return;
238 
239   addInitialAliasAnalysisPasses(FPM);
240 
241   FPM.add(createCFGSimplificationPass());
242   FPM.add(createSROAPass());
243   FPM.add(createEarlyCSEPass());
244   if(EnableGVNHoist)
245     FPM.add(createGVNHoistPass());
246   FPM.add(createLowerExpectIntrinsicPass());
247 }
248 
249 // Do PGO instrumentation generation or use pass as the option specified.
250 void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
251   if (!EnablePGOInstrGen && PGOInstrUse.empty())
252     return;
253   // Perform the preinline and cleanup passes for O1 and above.
254   // And avoid doing them if optimizing for size.
255   if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
256     // Create preinline pass. We construct an InlineParams object and specify
257     // the threshold here to avoid the command line options of the regular
258     // inliner to influence pre-inlining. The only fields of InlineParams we
259     // care about are DefaultThreshold and HintThreshold.
260     InlineParams IP;
261     IP.DefaultThreshold = PreInlineThreshold;
262     // FIXME: The hint threshold has the same value used by the regular inliner.
263     // This should probably be lowered after performance testing.
264     IP.HintThreshold = 325;
265 
266     MPM.add(createFunctionInliningPass(IP));
267     MPM.add(createSROAPass());
268     MPM.add(createEarlyCSEPass());             // Catch trivial redundancies
269     MPM.add(createCFGSimplificationPass());    // Merge & remove BBs
270     MPM.add(createInstructionCombiningPass()); // Combine silly seq's
271     addExtensionsToPM(EP_Peephole, MPM);
272   }
273   if (EnablePGOInstrGen) {
274     MPM.add(createPGOInstrumentationGenLegacyPass());
275     // Add the profile lowering pass.
276     InstrProfOptions Options;
277     if (!PGOInstrGen.empty())
278       Options.InstrProfileOutput = PGOInstrGen;
279     MPM.add(createInstrProfilingLegacyPass(Options));
280   }
281   if (!PGOInstrUse.empty())
282     MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
283 }
284 void PassManagerBuilder::addFunctionSimplificationPasses(
285     legacy::PassManagerBase &MPM) {
286   // Start of function pass.
287   // Break up aggregate allocas, using SSAUpdater.
288   MPM.add(createSROAPass());
289   MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
290   // Speculative execution if the target has divergent branches; otherwise nop.
291   MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
292   MPM.add(createJumpThreadingPass());         // Thread jumps.
293   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
294   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
295   // Combine silly seq's
296   addInstructionCombiningPass(MPM);
297   addExtensionsToPM(EP_Peephole, MPM);
298 
299   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
300   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
301   MPM.add(createReassociatePass());           // Reassociate expressions
302   // Rotate Loop - disable header duplication at -Oz
303   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
304   MPM.add(createLICMPass());                  // Hoist loop invariants
305   MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
306   MPM.add(createCFGSimplificationPass());
307   addInstructionCombiningPass(MPM);
308   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
309   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
310   MPM.add(createLoopDeletionPass());          // Delete dead loops
311   if (EnableLoopInterchange) {
312     MPM.add(createLoopInterchangePass()); // Interchange loops
313     MPM.add(createCFGSimplificationPass());
314   }
315   if (!DisableUnrollLoops)
316     MPM.add(createSimpleLoopUnrollPass());    // Unroll small loops
317   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
318 
319   if (OptLevel > 1) {
320     if (EnableMLSM)
321       MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
322     MPM.add(createGVNPass(DisableGVNLoadPRE));  // Remove redundancies
323   }
324   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
325   MPM.add(createSCCPPass());                  // Constant prop with SCCP
326 
327   // Delete dead bit computations (instcombine runs after to fold away the dead
328   // computations, and then ADCE will run later to exploit any new DCE
329   // opportunities that creates).
330   MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
331 
332   // Run instcombine after redundancy elimination to exploit opportunities
333   // opened up by them.
334   addInstructionCombiningPass(MPM);
335   addExtensionsToPM(EP_Peephole, MPM);
336   MPM.add(createJumpThreadingPass());         // Thread jumps
337   MPM.add(createCorrelatedValuePropagationPass());
338   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
339   MPM.add(createLICMPass());
340 
341   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
342 
343   if (RerollLoops)
344     MPM.add(createLoopRerollPass());
345   if (!RunSLPAfterLoopVectorization) {
346     if (SLPVectorize)
347       MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
348 
349     if (BBVectorize) {
350       MPM.add(createBBVectorizePass());
351       addInstructionCombiningPass(MPM);
352       addExtensionsToPM(EP_Peephole, MPM);
353       if (OptLevel > 1 && UseGVNAfterVectorization)
354         MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
355       else
356         MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
357 
358       // BBVectorize may have significantly shortened a loop body; unroll again.
359       if (!DisableUnrollLoops)
360         MPM.add(createLoopUnrollPass());
361     }
362   }
363 
364   if (LoadCombine)
365     MPM.add(createLoadCombinePass());
366 
367   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
368   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
369   // Clean up after everything.
370   addInstructionCombiningPass(MPM);
371   addExtensionsToPM(EP_Peephole, MPM);
372 }
373 
374 void PassManagerBuilder::populateModulePassManager(
375     legacy::PassManagerBase &MPM) {
376   // Allow forcing function attributes as a debugging and tuning aid.
377   MPM.add(createForceFunctionAttrsLegacyPass());
378 
379   // If all optimizations are disabled, just run the always-inline pass and,
380   // if enabled, the function merging pass.
381   if (OptLevel == 0) {
382     addPGOInstrPasses(MPM);
383     if (Inliner) {
384       MPM.add(Inliner);
385       Inliner = nullptr;
386     }
387 
388     // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
389     // creates a CGSCC pass manager, but we don't want to add extensions into
390     // that pass manager. To prevent this we insert a no-op module pass to reset
391     // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
392     // builds. The function merging pass is
393     if (MergeFunctions)
394       MPM.add(createMergeFunctionsPass());
395     else if (!GlobalExtensions->empty() || !Extensions.empty())
396       MPM.add(createBarrierNoopPass());
397 
398     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
399     return;
400   }
401 
402   // Add LibraryInfo if we have some.
403   if (LibraryInfo)
404     MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
405 
406   addInitialAliasAnalysisPasses(MPM);
407 
408   // For ThinLTO there are two passes of indirect call promotion. The
409   // first is during the compile phase when PerformThinLTO=false and
410   // intra-module indirect call targets are promoted. The second is during
411   // the ThinLTO backend when PerformThinLTO=true, when we promote imported
412   // inter-module indirect calls. For that we perform indirect call promotion
413   // earlier in the pass pipeline, here before globalopt. Otherwise imported
414   // available_externally functions look unreferenced and are removed.
415   if (PerformThinLTO)
416     MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true));
417 
418   if (!DisableUnitAtATime) {
419     // Infer attributes about declarations if possible.
420     MPM.add(createInferFunctionAttrsLegacyPass());
421 
422     addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
423 
424     MPM.add(createIPSCCPPass());          // IP SCCP
425     MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
426     // Promote any localized global vars.
427     MPM.add(createPromoteMemoryToRegisterPass());
428 
429     MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
430 
431     addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
432     addExtensionsToPM(EP_Peephole, MPM);
433     MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
434   }
435 
436   if (!PerformThinLTO) {
437     /// PGO instrumentation is added during the compile phase for ThinLTO, do
438     /// not run it a second time
439     addPGOInstrPasses(MPM);
440     // Indirect call promotion that promotes intra-module targets only.
441     // For ThinLTO this is done earlier due to interactions with globalopt
442     // for imported functions.
443     MPM.add(createPGOIndirectCallPromotionLegacyPass());
444   }
445 
446   if (EnableNonLTOGlobalsModRef)
447     // We add a module alias analysis pass here. In part due to bugs in the
448     // analysis infrastructure this "works" in that the analysis stays alive
449     // for the entire SCC pass run below.
450     MPM.add(createGlobalsAAWrapperPass());
451 
452   // Start of CallGraph SCC passes.
453   if (!DisableUnitAtATime)
454     MPM.add(createPruneEHPass()); // Remove dead EH info
455   if (Inliner) {
456     MPM.add(Inliner);
457     Inliner = nullptr;
458   }
459   if (!DisableUnitAtATime)
460     MPM.add(createPostOrderFunctionAttrsLegacyPass());
461   if (OptLevel > 2)
462     MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
463 
464   addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
465   addFunctionSimplificationPasses(MPM);
466 
467   // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
468   // pass manager that we are specifically trying to avoid. To prevent this
469   // we must insert a no-op module pass to reset the pass manager.
470   MPM.add(createBarrierNoopPass());
471 
472   if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
473       !PrepareForThinLTO)
474     // Remove avail extern fns and globals definitions if we aren't
475     // compiling an object file for later LTO. For LTO we want to preserve
476     // these so they are eligible for inlining at link-time. Note if they
477     // are unreferenced they will be removed by GlobalDCE later, so
478     // this only impacts referenced available externally globals.
479     // Eventually they will be suppressed during codegen, but eliminating
480     // here enables more opportunity for GlobalDCE as it may make
481     // globals referenced by available external functions dead
482     // and saves running remaining passes on the eliminated functions.
483     MPM.add(createEliminateAvailableExternallyPass());
484 
485   if (!DisableUnitAtATime)
486     MPM.add(createReversePostOrderFunctionAttrsPass());
487 
488   // If we are planning to perform ThinLTO later, let's not bloat the code with
489   // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
490   // during ThinLTO and perform the rest of the optimizations afterward.
491   if (PrepareForThinLTO) {
492     // Reduce the size of the IR as much as possible.
493     MPM.add(createGlobalOptimizerPass());
494     // Rename anon function to be able to export them in the summary.
495     MPM.add(createNameAnonFunctionPass());
496     return;
497   }
498 
499   if (PerformThinLTO)
500     // Optimize globals now when performing ThinLTO, this enables more
501     // optimizations later.
502     MPM.add(createGlobalOptimizerPass());
503 
504   // Scheduling LoopVersioningLICM when inlining is over, because after that
505   // we may see more accurate aliasing. Reason to run this late is that too
506   // early versioning may prevent further inlining due to increase of code
507   // size. By placing it just after inlining other optimizations which runs
508   // later might get benefit of no-alias assumption in clone loop.
509   if (UseLoopVersioningLICM) {
510     MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM
511     MPM.add(createLICMPass());                  // Hoist loop invariants
512   }
513 
514   if (EnableNonLTOGlobalsModRef)
515     // We add a fresh GlobalsModRef run at this point. This is particularly
516     // useful as the above will have inlined, DCE'ed, and function-attr
517     // propagated everything. We should at this point have a reasonably minimal
518     // and richly annotated call graph. By computing aliasing and mod/ref
519     // information for all local globals here, the late loop passes and notably
520     // the vectorizer will be able to use them to help recognize vectorizable
521     // memory operations.
522     //
523     // Note that this relies on a bug in the pass manager which preserves
524     // a module analysis into a function pass pipeline (and throughout it) so
525     // long as the first function pass doesn't invalidate the module analysis.
526     // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
527     // this to work. Fortunately, it is trivial to preserve AliasAnalysis
528     // (doing nothing preserves it as it is required to be conservatively
529     // correct in the face of IR changes).
530     MPM.add(createGlobalsAAWrapperPass());
531 
532   if (RunFloat2Int)
533     MPM.add(createFloat2IntPass());
534 
535   addExtensionsToPM(EP_VectorizerStart, MPM);
536 
537   // Re-rotate loops in all our loop nests. These may have fallout out of
538   // rotated form due to GVN or other transformations, and the vectorizer relies
539   // on the rotated form. Disable header duplication at -Oz.
540   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
541 
542   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
543   // into separate loop that would otherwise inhibit vectorization.  This is
544   // currently only performed for loops marked with the metadata
545   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
546   MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false));
547 
548   MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
549 
550   // Eliminate loads by forwarding stores from the previous iteration to loads
551   // of the current iteration.
552   if (EnableLoopLoadElim)
553     MPM.add(createLoopLoadEliminationPass());
554 
555   // FIXME: Because of #pragma vectorize enable, the passes below are always
556   // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
557   // on -O1 and no #pragma is found). Would be good to have these two passes
558   // as function calls, so that we can only pass them when the vectorizer
559   // changed the code.
560   addInstructionCombiningPass(MPM);
561   if (OptLevel > 1 && ExtraVectorizerPasses) {
562     // At higher optimization levels, try to clean up any runtime overlap and
563     // alignment checks inserted by the vectorizer. We want to track correllated
564     // runtime checks for two inner loops in the same outer loop, fold any
565     // common computations, hoist loop-invariant aspects out of any outer loop,
566     // and unswitch the runtime checks if possible. Once hoisted, we may have
567     // dead (or speculatable) control flows or more combining opportunities.
568     MPM.add(createEarlyCSEPass());
569     MPM.add(createCorrelatedValuePropagationPass());
570     addInstructionCombiningPass(MPM);
571     MPM.add(createLICMPass());
572     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
573     MPM.add(createCFGSimplificationPass());
574     addInstructionCombiningPass(MPM);
575   }
576 
577   if (RunSLPAfterLoopVectorization) {
578     if (SLPVectorize) {
579       MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
580       if (OptLevel > 1 && ExtraVectorizerPasses) {
581         MPM.add(createEarlyCSEPass());
582       }
583     }
584 
585     if (BBVectorize) {
586       MPM.add(createBBVectorizePass());
587       addInstructionCombiningPass(MPM);
588       addExtensionsToPM(EP_Peephole, MPM);
589       if (OptLevel > 1 && UseGVNAfterVectorization)
590         MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
591       else
592         MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
593 
594       // BBVectorize may have significantly shortened a loop body; unroll again.
595       if (!DisableUnrollLoops)
596         MPM.add(createLoopUnrollPass());
597     }
598   }
599 
600   addExtensionsToPM(EP_Peephole, MPM);
601   MPM.add(createCFGSimplificationPass());
602   addInstructionCombiningPass(MPM);
603 
604   if (!DisableUnrollLoops) {
605     MPM.add(createLoopUnrollPass());    // Unroll small loops
606 
607     // LoopUnroll may generate some redundency to cleanup.
608     addInstructionCombiningPass(MPM);
609 
610     // Runtime unrolling will introduce runtime check in loop prologue. If the
611     // unrolled loop is a inner loop, then the prologue will be inside the
612     // outer loop. LICM pass can help to promote the runtime check out if the
613     // checked value is loop invariant.
614     MPM.add(createLICMPass());
615 
616     // Get rid of LCSSA nodes.
617     MPM.add(createInstructionSimplifierPass());
618   }
619 
620   // After vectorization and unrolling, assume intrinsics may tell us more
621   // about pointer alignments.
622   MPM.add(createAlignmentFromAssumptionsPass());
623 
624   if (!DisableUnitAtATime) {
625     // FIXME: We shouldn't bother with this anymore.
626     MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
627 
628     // GlobalOpt already deletes dead functions and globals, at -O2 try a
629     // late pass of GlobalDCE.  It is capable of deleting dead cycles.
630     if (OptLevel > 1) {
631       MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
632       MPM.add(createConstantMergePass());     // Merge dup global constants
633     }
634   }
635 
636   if (MergeFunctions)
637     MPM.add(createMergeFunctionsPass());
638 
639   addExtensionsToPM(EP_OptimizerLast, MPM);
640 }
641 
642 void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
643   // Remove unused virtual tables to improve the quality of code generated by
644   // whole-program devirtualization and bitset lowering.
645   PM.add(createGlobalDCEPass());
646 
647   // Provide AliasAnalysis services for optimizations.
648   addInitialAliasAnalysisPasses(PM);
649 
650   if (ModuleSummary)
651     PM.add(createFunctionImportPass(ModuleSummary));
652 
653   // Allow forcing function attributes as a debugging and tuning aid.
654   PM.add(createForceFunctionAttrsLegacyPass());
655 
656   // Infer attributes about declarations if possible.
657   PM.add(createInferFunctionAttrsLegacyPass());
658 
659   if (OptLevel > 1) {
660     // Indirect call promotion. This should promote all the targets that are
661     // left by the earlier promotion pass that promotes intra-module targets.
662     // This two-step promotion is to save the compile time. For LTO, it should
663     // produce the same result as if we only do promotion here.
664     PM.add(createPGOIndirectCallPromotionLegacyPass(true));
665 
666     // Propagate constants at call sites into the functions they call.  This
667     // opens opportunities for globalopt (and inlining) by substituting function
668     // pointers passed as arguments to direct uses of functions.
669     PM.add(createIPSCCPPass());
670   }
671 
672   // Infer attributes about definitions. The readnone attribute in particular is
673   // required for virtual constant propagation.
674   PM.add(createPostOrderFunctionAttrsLegacyPass());
675   PM.add(createReversePostOrderFunctionAttrsPass());
676 
677   // Apply whole-program devirtualization and virtual constant propagation.
678   PM.add(createWholeProgramDevirtPass());
679 
680   // That's all we need at opt level 1.
681   if (OptLevel == 1)
682     return;
683 
684   // Now that we internalized some globals, see if we can hack on them!
685   PM.add(createGlobalOptimizerPass());
686   // Promote any localized global vars.
687   PM.add(createPromoteMemoryToRegisterPass());
688 
689   // Linking modules together can lead to duplicated global constants, only
690   // keep one copy of each constant.
691   PM.add(createConstantMergePass());
692 
693   // Remove unused arguments from functions.
694   PM.add(createDeadArgEliminationPass());
695 
696   // Reduce the code after globalopt and ipsccp.  Both can open up significant
697   // simplification opportunities, and both can propagate functions through
698   // function pointers.  When this happens, we often have to resolve varargs
699   // calls, etc, so let instcombine do this.
700   addInstructionCombiningPass(PM);
701   addExtensionsToPM(EP_Peephole, PM);
702 
703   // Inline small functions
704   bool RunInliner = Inliner;
705   if (RunInliner) {
706     PM.add(Inliner);
707     Inliner = nullptr;
708   }
709 
710   PM.add(createPruneEHPass());   // Remove dead EH info.
711 
712   // Optimize globals again if we ran the inliner.
713   if (RunInliner)
714     PM.add(createGlobalOptimizerPass());
715   PM.add(createGlobalDCEPass()); // Remove dead functions.
716 
717   // If we didn't decide to inline a function, check to see if we can
718   // transform it to pass arguments by value instead of by reference.
719   PM.add(createArgumentPromotionPass());
720 
721   // The IPO passes may leave cruft around.  Clean up after them.
722   addInstructionCombiningPass(PM);
723   addExtensionsToPM(EP_Peephole, PM);
724   PM.add(createJumpThreadingPass());
725 
726   // Break up allocas
727   PM.add(createSROAPass());
728 
729   // Run a few AA driven optimizations here and now, to cleanup the code.
730   PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
731   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
732 
733   PM.add(createLICMPass());                 // Hoist loop invariants.
734   if (EnableMLSM)
735     PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
736   PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
737   PM.add(createMemCpyOptPass());            // Remove dead memcpys.
738 
739   // Nuke dead stores.
740   PM.add(createDeadStoreEliminationPass());
741 
742   // More loops are countable; try to optimize them.
743   PM.add(createIndVarSimplifyPass());
744   PM.add(createLoopDeletionPass());
745   if (EnableLoopInterchange)
746     PM.add(createLoopInterchangePass());
747 
748   if (!DisableUnrollLoops)
749     PM.add(createSimpleLoopUnrollPass());   // Unroll small loops
750   PM.add(createLoopVectorizePass(true, LoopVectorize));
751   // The vectorizer may have significantly shortened a loop body; unroll again.
752   if (!DisableUnrollLoops)
753     PM.add(createLoopUnrollPass());
754 
755   // Now that we've optimized loops (in particular loop induction variables),
756   // we may have exposed more scalar opportunities. Run parts of the scalar
757   // optimizer again at this point.
758   addInstructionCombiningPass(PM); // Initial cleanup
759   PM.add(createCFGSimplificationPass()); // if-convert
760   PM.add(createSCCPPass()); // Propagate exposed constants
761   addInstructionCombiningPass(PM); // Clean up again
762   PM.add(createBitTrackingDCEPass());
763 
764   // More scalar chains could be vectorized due to more alias information
765   if (RunSLPAfterLoopVectorization)
766     if (SLPVectorize)
767       PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
768 
769   // After vectorization, assume intrinsics may tell us more about pointer
770   // alignments.
771   PM.add(createAlignmentFromAssumptionsPass());
772 
773   if (LoadCombine)
774     PM.add(createLoadCombinePass());
775 
776   // Cleanup and simplify the code after the scalar optimizations.
777   addInstructionCombiningPass(PM);
778   addExtensionsToPM(EP_Peephole, PM);
779 
780   PM.add(createJumpThreadingPass());
781 }
782 
783 void PassManagerBuilder::addLateLTOOptimizationPasses(
784     legacy::PassManagerBase &PM) {
785   // Delete basic blocks, which optimization passes may have killed.
786   PM.add(createCFGSimplificationPass());
787 
788   // Drop bodies of available externally objects to improve GlobalDCE.
789   PM.add(createEliminateAvailableExternallyPass());
790 
791   // Now that we have optimized the program, discard unreachable functions.
792   PM.add(createGlobalDCEPass());
793 
794   // FIXME: this is profitable (for compiler time) to do at -O0 too, but
795   // currently it damages debug info.
796   if (MergeFunctions)
797     PM.add(createMergeFunctionsPass());
798 }
799 
800 void PassManagerBuilder::populateThinLTOPassManager(
801     legacy::PassManagerBase &PM) {
802   PerformThinLTO = true;
803 
804   if (VerifyInput)
805     PM.add(createVerifierPass());
806 
807   if (ModuleSummary)
808     PM.add(createFunctionImportPass(ModuleSummary));
809 
810   populateModulePassManager(PM);
811 
812   if (VerifyOutput)
813     PM.add(createVerifierPass());
814   PerformThinLTO = false;
815 }
816 
817 void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
818   if (LibraryInfo)
819     PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
820 
821   if (VerifyInput)
822     PM.add(createVerifierPass());
823 
824   if (OptLevel != 0)
825     addLTOOptimizationPasses(PM);
826 
827   // Create a function that performs CFI checks for cross-DSO calls with targets
828   // in the current module.
829   PM.add(createCrossDSOCFIPass());
830 
831   // Lower type metadata and the type.test intrinsic. This pass supports Clang's
832   // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
833   // link time if CFI is enabled. The pass does nothing if CFI is disabled.
834   PM.add(createLowerTypeTestsPass());
835 
836   if (OptLevel != 0)
837     addLateLTOOptimizationPasses(PM);
838 
839   if (VerifyOutput)
840     PM.add(createVerifierPass());
841 }
842 
843 inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
844     return reinterpret_cast<PassManagerBuilder*>(P);
845 }
846 
847 inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
848   return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
849 }
850 
851 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
852   PassManagerBuilder *PMB = new PassManagerBuilder();
853   return wrap(PMB);
854 }
855 
856 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
857   PassManagerBuilder *Builder = unwrap(PMB);
858   delete Builder;
859 }
860 
861 void
862 LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
863                                   unsigned OptLevel) {
864   PassManagerBuilder *Builder = unwrap(PMB);
865   Builder->OptLevel = OptLevel;
866 }
867 
868 void
869 LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
870                                    unsigned SizeLevel) {
871   PassManagerBuilder *Builder = unwrap(PMB);
872   Builder->SizeLevel = SizeLevel;
873 }
874 
875 void
876 LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
877                                             LLVMBool Value) {
878   PassManagerBuilder *Builder = unwrap(PMB);
879   Builder->DisableUnitAtATime = Value;
880 }
881 
882 void
883 LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
884                                             LLVMBool Value) {
885   PassManagerBuilder *Builder = unwrap(PMB);
886   Builder->DisableUnrollLoops = Value;
887 }
888 
889 void
890 LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
891                                                  LLVMBool Value) {
892   // NOTE: The simplify-libcalls pass has been removed.
893 }
894 
895 void
896 LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
897                                               unsigned Threshold) {
898   PassManagerBuilder *Builder = unwrap(PMB);
899   Builder->Inliner = createFunctionInliningPass(Threshold);
900 }
901 
902 void
903 LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
904                                                   LLVMPassManagerRef PM) {
905   PassManagerBuilder *Builder = unwrap(PMB);
906   legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
907   Builder->populateFunctionPassManager(*FPM);
908 }
909 
910 void
911 LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
912                                                 LLVMPassManagerRef PM) {
913   PassManagerBuilder *Builder = unwrap(PMB);
914   legacy::PassManagerBase *MPM = unwrap(PM);
915   Builder->populateModulePassManager(*MPM);
916 }
917 
918 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
919                                                   LLVMPassManagerRef PM,
920                                                   LLVMBool Internalize,
921                                                   LLVMBool RunInliner) {
922   PassManagerBuilder *Builder = unwrap(PMB);
923   legacy::PassManagerBase *LPM = unwrap(PM);
924 
925   // A small backwards compatibility hack. populateLTOPassManager used to take
926   // an RunInliner option.
927   if (RunInliner && !Builder->Inliner)
928     Builder->Inliner = createFunctionInliningPass();
929 
930   Builder->populateLTOPassManager(*LPM);
931 }
932