1 //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the PassManagerBuilder class, which is used to set up a
11 // "standard" optimization sequence suitable for languages like C and C++.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
16 #include "llvm-c/Transforms/PassManagerBuilder.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BasicAliasAnalysis.h"
19 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
20 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/InlineCost.h"
23 #include "llvm/Analysis/Passes.h"
24 #include "llvm/Analysis/ScopedNoAliasAA.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/LegacyPassManager.h"
29 #include "llvm/IR/Verifier.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/ManagedStatic.h"
32 #include "llvm/Transforms/IPO.h"
33 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
34 #include "llvm/Transforms/IPO/FunctionAttrs.h"
35 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
36 #include "llvm/Transforms/Instrumentation.h"
37 #include "llvm/Transforms/Scalar.h"
38 #include "llvm/Transforms/Scalar/GVN.h"
39 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
40 #include "llvm/Transforms/Utils.h"
41 #include "llvm/Transforms/Vectorize.h"
42 
43 using namespace llvm;
44 
45 static cl::opt<bool>
46     RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
47                        cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
48 
49 static cl::opt<bool>
50     RunLoopVectorization("vectorize-loops", cl::Hidden,
51                          cl::desc("Run the Loop vectorization passes"));
52 
53 static cl::opt<bool>
54 RunSLPVectorization("vectorize-slp", cl::Hidden,
55                     cl::desc("Run the SLP vectorization passes"));
56 
57 static cl::opt<bool>
58 UseGVNAfterVectorization("use-gvn-after-vectorization",
59   cl::init(false), cl::Hidden,
60   cl::desc("Run GVN instead of Early CSE after vectorization passes"));
61 
62 static cl::opt<bool> ExtraVectorizerPasses(
63     "extra-vectorizer-passes", cl::init(false), cl::Hidden,
64     cl::desc("Run cleanup optimization passes after vectorization."));
65 
66 static cl::opt<bool>
67 RunLoopRerolling("reroll-loops", cl::Hidden,
68                  cl::desc("Run the loop rerolling pass"));
69 
70 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
71                                cl::desc("Run the NewGVN pass"));
72 
73 static cl::opt<bool>
74 RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
75   cl::init(true), cl::Hidden,
76   cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
77            "vectorizer instead of before"));
78 
79 // Experimental option to use CFL-AA
80 enum class CFLAAType { None, Steensgaard, Andersen, Both };
81 static cl::opt<CFLAAType>
82     UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden,
83              cl::desc("Enable the new, experimental CFL alias analysis"),
84              cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
85                         clEnumValN(CFLAAType::Steensgaard, "steens",
86                                    "Enable unification-based CFL-AA"),
87                         clEnumValN(CFLAAType::Andersen, "anders",
88                                    "Enable inclusion-based CFL-AA"),
89                         clEnumValN(CFLAAType::Both, "both",
90                                    "Enable both variants of CFL-AA")));
91 
92 static cl::opt<bool> EnableLoopInterchange(
93     "enable-loopinterchange", cl::init(false), cl::Hidden,
94     cl::desc("Enable the new, experimental LoopInterchange Pass"));
95 
96 static cl::opt<bool>
97     EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
98                             cl::desc("Enable preparation for ThinLTO."));
99 
100 static cl::opt<bool> RunPGOInstrGen(
101     "profile-generate", cl::init(false), cl::Hidden,
102     cl::desc("Enable PGO instrumentation."));
103 
104 static cl::opt<std::string>
105     PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
106                       cl::desc("Specify the path of profile data file."));
107 
108 static cl::opt<std::string> RunPGOInstrUse(
109     "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
110     cl::desc("Enable use phase of PGO instrumentation and specify the path "
111              "of profile data file"));
112 
113 static cl::opt<bool> UseLoopVersioningLICM(
114     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
115     cl::desc("Enable the experimental Loop Versioning LICM pass"));
116 
117 static cl::opt<bool>
118     DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
119                       cl::desc("Disable pre-instrumentation inliner"));
120 
121 static cl::opt<int> PreInlineThreshold(
122     "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
123     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
124              "(default = 75)"));
125 
126 static cl::opt<bool> EnableEarlyCSEMemSSA(
127     "enable-earlycse-memssa", cl::init(true), cl::Hidden,
128     cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
129 
130 static cl::opt<bool> EnableGVNHoist(
131     "enable-gvn-hoist", cl::init(false), cl::Hidden,
132     cl::desc("Enable the GVN hoisting pass (default = off)"));
133 
134 static cl::opt<bool>
135     DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
136                               cl::Hidden,
137                               cl::desc("Disable shrink-wrap library calls"));
138 
139 static cl::opt<bool>
140     EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
141                              cl::Hidden,
142                              cl::desc("Enable the simple loop unswitch pass."));
143 
144 static cl::opt<bool> EnableGVNSink(
145     "enable-gvn-sink", cl::init(false), cl::Hidden,
146     cl::desc("Enable the GVN sinking pass (default = off)"));
147 
148 PassManagerBuilder::PassManagerBuilder() {
149     OptLevel = 2;
150     SizeLevel = 0;
151     LibraryInfo = nullptr;
152     Inliner = nullptr;
153     DisableUnrollLoops = false;
154     SLPVectorize = RunSLPVectorization;
155     LoopVectorize = RunLoopVectorization;
156     RerollLoops = RunLoopRerolling;
157     NewGVN = RunNewGVN;
158     DisableGVNLoadPRE = false;
159     VerifyInput = false;
160     VerifyOutput = false;
161     MergeFunctions = false;
162     PrepareForLTO = false;
163     EnablePGOInstrGen = RunPGOInstrGen;
164     PGOInstrGen = PGOOutputFile;
165     PGOInstrUse = RunPGOInstrUse;
166     PrepareForThinLTO = EnablePrepareForThinLTO;
167     PerformThinLTO = false;
168     DivergentTarget = false;
169 }
170 
171 PassManagerBuilder::~PassManagerBuilder() {
172   delete LibraryInfo;
173   delete Inliner;
174 }
175 
176 /// Set of global extensions, automatically added as part of the standard set.
177 static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
178    PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
179 
180 /// Check if GlobalExtensions is constructed and not empty.
181 /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
182 /// the construction of the object.
183 static bool GlobalExtensionsNotEmpty() {
184   return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
185 }
186 
187 void PassManagerBuilder::addGlobalExtension(
188     PassManagerBuilder::ExtensionPointTy Ty,
189     PassManagerBuilder::ExtensionFn Fn) {
190   GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
191 }
192 
193 void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
194   Extensions.push_back(std::make_pair(Ty, std::move(Fn)));
195 }
196 
197 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
198                                            legacy::PassManagerBase &PM) const {
199   if (GlobalExtensionsNotEmpty()) {
200     for (auto &Ext : *GlobalExtensions) {
201       if (Ext.first == ETy)
202         Ext.second(*this, PM);
203     }
204   }
205   for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
206     if (Extensions[i].first == ETy)
207       Extensions[i].second(*this, PM);
208 }
209 
210 void PassManagerBuilder::addInitialAliasAnalysisPasses(
211     legacy::PassManagerBase &PM) const {
212   switch (UseCFLAA) {
213   case CFLAAType::Steensgaard:
214     PM.add(createCFLSteensAAWrapperPass());
215     break;
216   case CFLAAType::Andersen:
217     PM.add(createCFLAndersAAWrapperPass());
218     break;
219   case CFLAAType::Both:
220     PM.add(createCFLSteensAAWrapperPass());
221     PM.add(createCFLAndersAAWrapperPass());
222     break;
223   default:
224     break;
225   }
226 
227   // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
228   // BasicAliasAnalysis wins if they disagree. This is intended to help
229   // support "obvious" type-punning idioms.
230   PM.add(createTypeBasedAAWrapperPass());
231   PM.add(createScopedNoAliasAAWrapperPass());
232 }
233 
234 void PassManagerBuilder::addInstructionCombiningPass(
235     legacy::PassManagerBase &PM) const {
236   bool ExpensiveCombines = OptLevel > 2;
237   PM.add(createInstructionCombiningPass(ExpensiveCombines));
238 }
239 
240 void PassManagerBuilder::populateFunctionPassManager(
241     legacy::FunctionPassManager &FPM) {
242   addExtensionsToPM(EP_EarlyAsPossible, FPM);
243   FPM.add(createEntryExitInstrumenterPass());
244 
245   // Add LibraryInfo if we have some.
246   if (LibraryInfo)
247     FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
248 
249   if (OptLevel == 0) return;
250 
251   addInitialAliasAnalysisPasses(FPM);
252 
253   FPM.add(createCFGSimplificationPass());
254   FPM.add(createSROAPass());
255   FPM.add(createEarlyCSEPass());
256   FPM.add(createLowerExpectIntrinsicPass());
257 }
258 
259 // Do PGO instrumentation generation or use pass as the option specified.
260 void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
261   if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
262     return;
263   // Perform the preinline and cleanup passes for O1 and above.
264   // And avoid doing them if optimizing for size.
265   if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
266       PGOSampleUse.empty()) {
267     // Create preinline pass. We construct an InlineParams object and specify
268     // the threshold here to avoid the command line options of the regular
269     // inliner to influence pre-inlining. The only fields of InlineParams we
270     // care about are DefaultThreshold and HintThreshold.
271     InlineParams IP;
272     IP.DefaultThreshold = PreInlineThreshold;
273     // FIXME: The hint threshold has the same value used by the regular inliner.
274     // This should probably be lowered after performance testing.
275     IP.HintThreshold = 325;
276 
277     MPM.add(createFunctionInliningPass(IP));
278     MPM.add(createSROAPass());
279     MPM.add(createEarlyCSEPass());             // Catch trivial redundancies
280     MPM.add(createCFGSimplificationPass());    // Merge & remove BBs
281     MPM.add(createInstructionCombiningPass()); // Combine silly seq's
282     addExtensionsToPM(EP_Peephole, MPM);
283   }
284   if (EnablePGOInstrGen) {
285     MPM.add(createPGOInstrumentationGenLegacyPass());
286     // Add the profile lowering pass.
287     InstrProfOptions Options;
288     if (!PGOInstrGen.empty())
289       Options.InstrProfileOutput = PGOInstrGen;
290     Options.DoCounterPromotion = true;
291     MPM.add(createLoopRotatePass());
292     MPM.add(createInstrProfilingLegacyPass(Options));
293   }
294   if (!PGOInstrUse.empty())
295     MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
296   // Indirect call promotion that promotes intra-module targets only.
297   // For ThinLTO this is done earlier due to interactions with globalopt
298   // for imported functions. We don't run this at -O0.
299   if (OptLevel > 0)
300     MPM.add(
301         createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
302 }
303 void PassManagerBuilder::addFunctionSimplificationPasses(
304     legacy::PassManagerBase &MPM) {
305   // Start of function pass.
306   // Break up aggregate allocas, using SSAUpdater.
307   MPM.add(createSROAPass());
308   MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
309   if (EnableGVNHoist)
310     MPM.add(createGVNHoistPass());
311   if (EnableGVNSink) {
312     MPM.add(createGVNSinkPass());
313     MPM.add(createCFGSimplificationPass());
314   }
315 
316   // Speculative execution if the target has divergent branches; otherwise nop.
317   MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
318   MPM.add(createJumpThreadingPass());         // Thread jumps.
319   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
320   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
321   // Combine silly seq's
322   if (OptLevel > 2)
323     MPM.add(createAggressiveInstCombinerPass());
324   addInstructionCombiningPass(MPM);
325   if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
326     MPM.add(createLibCallsShrinkWrapPass());
327   addExtensionsToPM(EP_Peephole, MPM);
328 
329   // Optimize memory intrinsic calls based on the profiled size information.
330   if (SizeLevel == 0)
331     MPM.add(createPGOMemOPSizeOptLegacyPass());
332 
333   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
334   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
335   MPM.add(createReassociatePass());           // Reassociate expressions
336   // Rotate Loop - disable header duplication at -Oz
337   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
338   MPM.add(createLICMPass());                  // Hoist loop invariants
339   if (EnableSimpleLoopUnswitch)
340     MPM.add(createSimpleLoopUnswitchLegacyPass());
341   else
342     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
343   MPM.add(createCFGSimplificationPass());
344   addInstructionCombiningPass(MPM);
345   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
346   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
347   addExtensionsToPM(EP_LateLoopOptimizations, MPM);
348   MPM.add(createLoopDeletionPass());          // Delete dead loops
349 
350   if (EnableLoopInterchange) {
351     MPM.add(createLoopInterchangePass()); // Interchange loops
352     MPM.add(createCFGSimplificationPass());
353   }
354   if (!DisableUnrollLoops)
355     MPM.add(createSimpleLoopUnrollPass(OptLevel));    // Unroll small loops
356   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
357 
358   if (OptLevel > 1) {
359     MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
360     MPM.add(NewGVN ? createNewGVNPass()
361                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
362   }
363   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
364   MPM.add(createSCCPPass());                  // Constant prop with SCCP
365 
366   // Delete dead bit computations (instcombine runs after to fold away the dead
367   // computations, and then ADCE will run later to exploit any new DCE
368   // opportunities that creates).
369   MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
370 
371   // Run instcombine after redundancy elimination to exploit opportunities
372   // opened up by them.
373   addInstructionCombiningPass(MPM);
374   addExtensionsToPM(EP_Peephole, MPM);
375   MPM.add(createJumpThreadingPass());         // Thread jumps
376   MPM.add(createCorrelatedValuePropagationPass());
377   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
378   MPM.add(createLICMPass());
379 
380   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
381 
382   if (RerollLoops)
383     MPM.add(createLoopRerollPass());
384   if (!RunSLPAfterLoopVectorization && SLPVectorize)
385     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
386 
387   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
388   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
389   // Clean up after everything.
390   addInstructionCombiningPass(MPM);
391   addExtensionsToPM(EP_Peephole, MPM);
392 }
393 
394 void PassManagerBuilder::populateModulePassManager(
395     legacy::PassManagerBase &MPM) {
396   if (!PGOSampleUse.empty()) {
397     MPM.add(createPruneEHPass());
398     MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
399   }
400 
401   // Allow forcing function attributes as a debugging and tuning aid.
402   MPM.add(createForceFunctionAttrsLegacyPass());
403 
404   // If all optimizations are disabled, just run the always-inline pass and,
405   // if enabled, the function merging pass.
406   if (OptLevel == 0) {
407     addPGOInstrPasses(MPM);
408     if (Inliner) {
409       MPM.add(Inliner);
410       Inliner = nullptr;
411     }
412 
413     // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
414     // creates a CGSCC pass manager, but we don't want to add extensions into
415     // that pass manager. To prevent this we insert a no-op module pass to reset
416     // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
417     // builds. The function merging pass is
418     if (MergeFunctions)
419       MPM.add(createMergeFunctionsPass());
420     else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
421       MPM.add(createBarrierNoopPass());
422 
423     if (PerformThinLTO) {
424       // Drop available_externally and unreferenced globals. This is necessary
425       // with ThinLTO in order to avoid leaving undefined references to dead
426       // globals in the object file.
427       MPM.add(createEliminateAvailableExternallyPass());
428       MPM.add(createGlobalDCEPass());
429     }
430 
431     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
432 
433     // Rename anon globals to be able to export them in the summary.
434     // This has to be done after we add the extensions to the pass manager
435     // as there could be passes (e.g. Adddress sanitizer) which introduce
436     // new unnamed globals.
437     if (PrepareForThinLTO)
438       MPM.add(createNameAnonGlobalPass());
439     return;
440   }
441 
442   // Add LibraryInfo if we have some.
443   if (LibraryInfo)
444     MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
445 
446   addInitialAliasAnalysisPasses(MPM);
447 
448   // For ThinLTO there are two passes of indirect call promotion. The
449   // first is during the compile phase when PerformThinLTO=false and
450   // intra-module indirect call targets are promoted. The second is during
451   // the ThinLTO backend when PerformThinLTO=true, when we promote imported
452   // inter-module indirect calls. For that we perform indirect call promotion
453   // earlier in the pass pipeline, here before globalopt. Otherwise imported
454   // available_externally functions look unreferenced and are removed.
455   if (PerformThinLTO)
456     MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
457                                                      !PGOSampleUse.empty()));
458 
459   // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
460   // as it will change the CFG too much to make the 2nd profile annotation
461   // in backend more difficult.
462   bool PrepareForThinLTOUsingPGOSampleProfile =
463       PrepareForThinLTO && !PGOSampleUse.empty();
464   if (PrepareForThinLTOUsingPGOSampleProfile)
465     DisableUnrollLoops = true;
466 
467   // Infer attributes about declarations if possible.
468   MPM.add(createInferFunctionAttrsLegacyPass());
469 
470   addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
471 
472   if (OptLevel > 2)
473     MPM.add(createCallSiteSplittingPass());
474 
475   MPM.add(createIPSCCPPass());          // IP SCCP
476   MPM.add(createCalledValuePropagationPass());
477   MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
478   // Promote any localized global vars.
479   MPM.add(createPromoteMemoryToRegisterPass());
480 
481   MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
482 
483   addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
484   addExtensionsToPM(EP_Peephole, MPM);
485   MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
486 
487   // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
488   // call promotion as it will change the CFG too much to make the 2nd
489   // profile annotation in backend more difficult.
490   // PGO instrumentation is added during the compile phase for ThinLTO, do
491   // not run it a second time
492   if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
493     addPGOInstrPasses(MPM);
494 
495   // We add a module alias analysis pass here. In part due to bugs in the
496   // analysis infrastructure this "works" in that the analysis stays alive
497   // for the entire SCC pass run below.
498   MPM.add(createGlobalsAAWrapperPass());
499 
500   // Start of CallGraph SCC passes.
501   MPM.add(createPruneEHPass()); // Remove dead EH info
502   bool RunInliner = false;
503   if (Inliner) {
504     MPM.add(Inliner);
505     Inliner = nullptr;
506     RunInliner = true;
507   }
508 
509   MPM.add(createPostOrderFunctionAttrsLegacyPass());
510   if (OptLevel > 2)
511     MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
512 
513   addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
514   addFunctionSimplificationPasses(MPM);
515 
516   // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
517   // pass manager that we are specifically trying to avoid. To prevent this
518   // we must insert a no-op module pass to reset the pass manager.
519   MPM.add(createBarrierNoopPass());
520 
521   if (RunPartialInlining)
522     MPM.add(createPartialInliningPass());
523 
524   if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)
525     // Remove avail extern fns and globals definitions if we aren't
526     // compiling an object file for later LTO. For LTO we want to preserve
527     // these so they are eligible for inlining at link-time. Note if they
528     // are unreferenced they will be removed by GlobalDCE later, so
529     // this only impacts referenced available externally globals.
530     // Eventually they will be suppressed during codegen, but eliminating
531     // here enables more opportunity for GlobalDCE as it may make
532     // globals referenced by available external functions dead
533     // and saves running remaining passes on the eliminated functions.
534     MPM.add(createEliminateAvailableExternallyPass());
535 
536   MPM.add(createReversePostOrderFunctionAttrsPass());
537 
538   // The inliner performs some kind of dead code elimination as it goes,
539   // but there are cases that are not really caught by it. We might
540   // at some point consider teaching the inliner about them, but it
541   // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
542   // benefits generally outweight the cost, making the whole pipeline
543   // faster.
544   if (RunInliner) {
545     MPM.add(createGlobalOptimizerPass());
546     MPM.add(createGlobalDCEPass());
547   }
548 
549   // If we are planning to perform ThinLTO later, let's not bloat the code with
550   // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
551   // during ThinLTO and perform the rest of the optimizations afterward.
552   if (PrepareForThinLTO) {
553     // Ensure we perform any last passes, but do so before renaming anonymous
554     // globals in case the passes add any.
555     addExtensionsToPM(EP_OptimizerLast, MPM);
556     // Rename anon globals to be able to export them in the summary.
557     MPM.add(createNameAnonGlobalPass());
558     return;
559   }
560 
561   if (PerformThinLTO)
562     // Optimize globals now when performing ThinLTO, this enables more
563     // optimizations later.
564     MPM.add(createGlobalOptimizerPass());
565 
566   // Scheduling LoopVersioningLICM when inlining is over, because after that
567   // we may see more accurate aliasing. Reason to run this late is that too
568   // early versioning may prevent further inlining due to increase of code
569   // size. By placing it just after inlining other optimizations which runs
570   // later might get benefit of no-alias assumption in clone loop.
571   if (UseLoopVersioningLICM) {
572     MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM
573     MPM.add(createLICMPass());                  // Hoist loop invariants
574   }
575 
576   // We add a fresh GlobalsModRef run at this point. This is particularly
577   // useful as the above will have inlined, DCE'ed, and function-attr
578   // propagated everything. We should at this point have a reasonably minimal
579   // and richly annotated call graph. By computing aliasing and mod/ref
580   // information for all local globals here, the late loop passes and notably
581   // the vectorizer will be able to use them to help recognize vectorizable
582   // memory operations.
583   //
584   // Note that this relies on a bug in the pass manager which preserves
585   // a module analysis into a function pass pipeline (and throughout it) so
586   // long as the first function pass doesn't invalidate the module analysis.
587   // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
588   // this to work. Fortunately, it is trivial to preserve AliasAnalysis
589   // (doing nothing preserves it as it is required to be conservatively
590   // correct in the face of IR changes).
591   MPM.add(createGlobalsAAWrapperPass());
592 
593   MPM.add(createFloat2IntPass());
594 
595   addExtensionsToPM(EP_VectorizerStart, MPM);
596 
597   // Re-rotate loops in all our loop nests. These may have fallout out of
598   // rotated form due to GVN or other transformations, and the vectorizer relies
599   // on the rotated form. Disable header duplication at -Oz.
600   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
601 
602   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
603   // into separate loop that would otherwise inhibit vectorization.  This is
604   // currently only performed for loops marked with the metadata
605   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
606   MPM.add(createLoopDistributePass());
607 
608   MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
609 
610   // Eliminate loads by forwarding stores from the previous iteration to loads
611   // of the current iteration.
612   MPM.add(createLoopLoadEliminationPass());
613 
614   // FIXME: Because of #pragma vectorize enable, the passes below are always
615   // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
616   // on -O1 and no #pragma is found). Would be good to have these two passes
617   // as function calls, so that we can only pass them when the vectorizer
618   // changed the code.
619   addInstructionCombiningPass(MPM);
620   if (OptLevel > 1 && ExtraVectorizerPasses) {
621     // At higher optimization levels, try to clean up any runtime overlap and
622     // alignment checks inserted by the vectorizer. We want to track correllated
623     // runtime checks for two inner loops in the same outer loop, fold any
624     // common computations, hoist loop-invariant aspects out of any outer loop,
625     // and unswitch the runtime checks if possible. Once hoisted, we may have
626     // dead (or speculatable) control flows or more combining opportunities.
627     MPM.add(createEarlyCSEPass());
628     MPM.add(createCorrelatedValuePropagationPass());
629     addInstructionCombiningPass(MPM);
630     MPM.add(createLICMPass());
631     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
632     MPM.add(createCFGSimplificationPass());
633     addInstructionCombiningPass(MPM);
634   }
635 
636   // Cleanup after loop vectorization, etc. Simplification passes like CVP and
637   // GVN, loop transforms, and others have already run, so it's now better to
638   // convert to more optimized IR using more aggressive simplify CFG options.
639   // The extra sinking transform can create larger basic blocks, so do this
640   // before SLP vectorization.
641   MPM.add(createCFGSimplificationPass(1, true, true, false, true));
642 
643   if (RunSLPAfterLoopVectorization && SLPVectorize) {
644     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
645     if (OptLevel > 1 && ExtraVectorizerPasses) {
646       MPM.add(createEarlyCSEPass());
647     }
648   }
649 
650   addExtensionsToPM(EP_Peephole, MPM);
651   addInstructionCombiningPass(MPM);
652 
653   if (!DisableUnrollLoops) {
654     MPM.add(createLoopUnrollPass(OptLevel));    // Unroll small loops
655 
656     // LoopUnroll may generate some redundency to cleanup.
657     addInstructionCombiningPass(MPM);
658 
659     // Runtime unrolling will introduce runtime check in loop prologue. If the
660     // unrolled loop is a inner loop, then the prologue will be inside the
661     // outer loop. LICM pass can help to promote the runtime check out if the
662     // checked value is loop invariant.
663     MPM.add(createLICMPass());
664  }
665 
666   // After vectorization and unrolling, assume intrinsics may tell us more
667   // about pointer alignments.
668   MPM.add(createAlignmentFromAssumptionsPass());
669 
670   // FIXME: We shouldn't bother with this anymore.
671   MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
672 
673   // GlobalOpt already deletes dead functions and globals, at -O2 try a
674   // late pass of GlobalDCE.  It is capable of deleting dead cycles.
675   if (OptLevel > 1) {
676     MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
677     MPM.add(createConstantMergePass());     // Merge dup global constants
678   }
679 
680   if (MergeFunctions)
681     MPM.add(createMergeFunctionsPass());
682 
683   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
684   // canonicalization pass that enables other optimizations. As a result,
685   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
686   // result too early.
687   MPM.add(createLoopSinkPass());
688   // Get rid of LCSSA nodes.
689   MPM.add(createInstructionSimplifierPass());
690 
691   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
692   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
693   // flattening of blocks.
694   MPM.add(createDivRemPairsPass());
695 
696   // LoopSink (and other loop passes since the last simplifyCFG) might have
697   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
698   MPM.add(createCFGSimplificationPass());
699 
700   addExtensionsToPM(EP_OptimizerLast, MPM);
701 }
702 
703 void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
704   // Remove unused virtual tables to improve the quality of code generated by
705   // whole-program devirtualization and bitset lowering.
706   PM.add(createGlobalDCEPass());
707 
708   // Provide AliasAnalysis services for optimizations.
709   addInitialAliasAnalysisPasses(PM);
710 
711   // Allow forcing function attributes as a debugging and tuning aid.
712   PM.add(createForceFunctionAttrsLegacyPass());
713 
714   // Infer attributes about declarations if possible.
715   PM.add(createInferFunctionAttrsLegacyPass());
716 
717   if (OptLevel > 1) {
718     // Split call-site with more constrained arguments.
719     PM.add(createCallSiteSplittingPass());
720 
721     // Indirect call promotion. This should promote all the targets that are
722     // left by the earlier promotion pass that promotes intra-module targets.
723     // This two-step promotion is to save the compile time. For LTO, it should
724     // produce the same result as if we only do promotion here.
725     PM.add(
726         createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
727 
728     // Propagate constants at call sites into the functions they call.  This
729     // opens opportunities for globalopt (and inlining) by substituting function
730     // pointers passed as arguments to direct uses of functions.
731     PM.add(createIPSCCPPass());
732 
733     // Attach metadata to indirect call sites indicating the set of functions
734     // they may target at run-time. This should follow IPSCCP.
735     PM.add(createCalledValuePropagationPass());
736   }
737 
738   // Infer attributes about definitions. The readnone attribute in particular is
739   // required for virtual constant propagation.
740   PM.add(createPostOrderFunctionAttrsLegacyPass());
741   PM.add(createReversePostOrderFunctionAttrsPass());
742 
743   // Split globals using inrange annotations on GEP indices. This can help
744   // improve the quality of generated code when virtual constant propagation or
745   // control flow integrity are enabled.
746   PM.add(createGlobalSplitPass());
747 
748   // Apply whole-program devirtualization and virtual constant propagation.
749   PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
750 
751   // That's all we need at opt level 1.
752   if (OptLevel == 1)
753     return;
754 
755   // Now that we internalized some globals, see if we can hack on them!
756   PM.add(createGlobalOptimizerPass());
757   // Promote any localized global vars.
758   PM.add(createPromoteMemoryToRegisterPass());
759 
760   // Linking modules together can lead to duplicated global constants, only
761   // keep one copy of each constant.
762   PM.add(createConstantMergePass());
763 
764   // Remove unused arguments from functions.
765   PM.add(createDeadArgEliminationPass());
766 
767   // Reduce the code after globalopt and ipsccp.  Both can open up significant
768   // simplification opportunities, and both can propagate functions through
769   // function pointers.  When this happens, we often have to resolve varargs
770   // calls, etc, so let instcombine do this.
771   if (OptLevel > 2)
772     PM.add(createAggressiveInstCombinerPass());
773   addInstructionCombiningPass(PM);
774   addExtensionsToPM(EP_Peephole, PM);
775 
776   // Inline small functions
777   bool RunInliner = Inliner;
778   if (RunInliner) {
779     PM.add(Inliner);
780     Inliner = nullptr;
781   }
782 
783   PM.add(createPruneEHPass());   // Remove dead EH info.
784 
785   // Optimize globals again if we ran the inliner.
786   if (RunInliner)
787     PM.add(createGlobalOptimizerPass());
788   PM.add(createGlobalDCEPass()); // Remove dead functions.
789 
790   // If we didn't decide to inline a function, check to see if we can
791   // transform it to pass arguments by value instead of by reference.
792   PM.add(createArgumentPromotionPass());
793 
794   // The IPO passes may leave cruft around.  Clean up after them.
795   addInstructionCombiningPass(PM);
796   addExtensionsToPM(EP_Peephole, PM);
797   PM.add(createJumpThreadingPass());
798 
799   // Break up allocas
800   PM.add(createSROAPass());
801 
802   // Run a few AA driven optimizations here and now, to cleanup the code.
803   PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
804   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
805 
806   PM.add(createLICMPass());                 // Hoist loop invariants.
807   PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
808   PM.add(NewGVN ? createNewGVNPass()
809                 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
810   PM.add(createMemCpyOptPass());            // Remove dead memcpys.
811 
812   // Nuke dead stores.
813   PM.add(createDeadStoreEliminationPass());
814 
815   // More loops are countable; try to optimize them.
816   PM.add(createIndVarSimplifyPass());
817   PM.add(createLoopDeletionPass());
818   if (EnableLoopInterchange)
819     PM.add(createLoopInterchangePass());
820 
821   if (!DisableUnrollLoops)
822     PM.add(createSimpleLoopUnrollPass(OptLevel));   // Unroll small loops
823   PM.add(createLoopVectorizePass(true, LoopVectorize));
824   // The vectorizer may have significantly shortened a loop body; unroll again.
825   if (!DisableUnrollLoops)
826     PM.add(createLoopUnrollPass(OptLevel));
827 
828   // Now that we've optimized loops (in particular loop induction variables),
829   // we may have exposed more scalar opportunities. Run parts of the scalar
830   // optimizer again at this point.
831   addInstructionCombiningPass(PM); // Initial cleanup
832   PM.add(createCFGSimplificationPass()); // if-convert
833   PM.add(createSCCPPass()); // Propagate exposed constants
834   addInstructionCombiningPass(PM); // Clean up again
835   PM.add(createBitTrackingDCEPass());
836 
837   // More scalar chains could be vectorized due to more alias information
838   if (RunSLPAfterLoopVectorization)
839     if (SLPVectorize)
840       PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
841 
842   // After vectorization, assume intrinsics may tell us more about pointer
843   // alignments.
844   PM.add(createAlignmentFromAssumptionsPass());
845 
846   // Cleanup and simplify the code after the scalar optimizations.
847   addInstructionCombiningPass(PM);
848   addExtensionsToPM(EP_Peephole, PM);
849 
850   PM.add(createJumpThreadingPass());
851 }
852 
853 void PassManagerBuilder::addLateLTOOptimizationPasses(
854     legacy::PassManagerBase &PM) {
855   // Delete basic blocks, which optimization passes may have killed.
856   PM.add(createCFGSimplificationPass());
857 
858   // Drop bodies of available externally objects to improve GlobalDCE.
859   PM.add(createEliminateAvailableExternallyPass());
860 
861   // Now that we have optimized the program, discard unreachable functions.
862   PM.add(createGlobalDCEPass());
863 
864   // FIXME: this is profitable (for compiler time) to do at -O0 too, but
865   // currently it damages debug info.
866   if (MergeFunctions)
867     PM.add(createMergeFunctionsPass());
868 }
869 
870 void PassManagerBuilder::populateThinLTOPassManager(
871     legacy::PassManagerBase &PM) {
872   PerformThinLTO = true;
873 
874   if (VerifyInput)
875     PM.add(createVerifierPass());
876 
877   if (ImportSummary) {
878     // These passes import type identifier resolutions for whole-program
879     // devirtualization and CFI. They must run early because other passes may
880     // disturb the specific instruction patterns that these passes look for,
881     // creating dependencies on resolutions that may not appear in the summary.
882     //
883     // For example, GVN may transform the pattern assume(type.test) appearing in
884     // two basic blocks into assume(phi(type.test, type.test)), which would
885     // transform a dependency on a WPD resolution into a dependency on a type
886     // identifier resolution for CFI.
887     //
888     // Also, WPD has access to more precise information than ICP and can
889     // devirtualize more effectively, so it should operate on the IR first.
890     PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
891     PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
892   }
893 
894   populateModulePassManager(PM);
895 
896   if (VerifyOutput)
897     PM.add(createVerifierPass());
898   PerformThinLTO = false;
899 }
900 
901 void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
902   if (LibraryInfo)
903     PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
904 
905   if (VerifyInput)
906     PM.add(createVerifierPass());
907 
908   if (OptLevel != 0)
909     addLTOOptimizationPasses(PM);
910   else {
911     // The whole-program-devirt pass needs to run at -O0 because only it knows
912     // about the llvm.type.checked.load intrinsic: it needs to both lower the
913     // intrinsic itself and handle it in the summary.
914     PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
915   }
916 
917   // Create a function that performs CFI checks for cross-DSO calls with targets
918   // in the current module.
919   PM.add(createCrossDSOCFIPass());
920 
921   // Lower type metadata and the type.test intrinsic. This pass supports Clang's
922   // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
923   // link time if CFI is enabled. The pass does nothing if CFI is disabled.
924   PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
925 
926   if (OptLevel != 0)
927     addLateLTOOptimizationPasses(PM);
928 
929   if (VerifyOutput)
930     PM.add(createVerifierPass());
931 }
932 
933 inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
934     return reinterpret_cast<PassManagerBuilder*>(P);
935 }
936 
937 inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
938   return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
939 }
940 
941 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
942   PassManagerBuilder *PMB = new PassManagerBuilder();
943   return wrap(PMB);
944 }
945 
946 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
947   PassManagerBuilder *Builder = unwrap(PMB);
948   delete Builder;
949 }
950 
951 void
952 LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
953                                   unsigned OptLevel) {
954   PassManagerBuilder *Builder = unwrap(PMB);
955   Builder->OptLevel = OptLevel;
956 }
957 
958 void
959 LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
960                                    unsigned SizeLevel) {
961   PassManagerBuilder *Builder = unwrap(PMB);
962   Builder->SizeLevel = SizeLevel;
963 }
964 
965 void
966 LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
967                                             LLVMBool Value) {
968   // NOTE: The DisableUnitAtATime switch has been removed.
969 }
970 
971 void
972 LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
973                                             LLVMBool Value) {
974   PassManagerBuilder *Builder = unwrap(PMB);
975   Builder->DisableUnrollLoops = Value;
976 }
977 
978 void
979 LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
980                                                  LLVMBool Value) {
981   // NOTE: The simplify-libcalls pass has been removed.
982 }
983 
984 void
985 LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
986                                               unsigned Threshold) {
987   PassManagerBuilder *Builder = unwrap(PMB);
988   Builder->Inliner = createFunctionInliningPass(Threshold);
989 }
990 
991 void
992 LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
993                                                   LLVMPassManagerRef PM) {
994   PassManagerBuilder *Builder = unwrap(PMB);
995   legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
996   Builder->populateFunctionPassManager(*FPM);
997 }
998 
999 void
1000 LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
1001                                                 LLVMPassManagerRef PM) {
1002   PassManagerBuilder *Builder = unwrap(PMB);
1003   legacy::PassManagerBase *MPM = unwrap(PM);
1004   Builder->populateModulePassManager(*MPM);
1005 }
1006 
1007 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
1008                                                   LLVMPassManagerRef PM,
1009                                                   LLVMBool Internalize,
1010                                                   LLVMBool RunInliner) {
1011   PassManagerBuilder *Builder = unwrap(PMB);
1012   legacy::PassManagerBase *LPM = unwrap(PM);
1013 
1014   // A small backwards compatibility hack. populateLTOPassManager used to take
1015   // an RunInliner option.
1016   if (RunInliner && !Builder->Inliner)
1017     Builder->Inliner = createFunctionInliningPass();
1018 
1019   Builder->populateLTOPassManager(*LPM);
1020 }
1021