1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Frontend/OpenMP/OMPConstants.h"
23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
28 
29 using namespace llvm;
30 using namespace omp;
31 using namespace types;
32 
33 #define DEBUG_TYPE "openmp-opt"
34 
35 static cl::opt<bool> DisableOpenMPOptimizations(
36     "openmp-opt-disable", cl::ZeroOrMore,
37     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
38     cl::init(false));
39 
40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
41           "Number of OpenMP runtime calls deduplicated");
42 STATISTIC(NumOpenMPParallelRegionsDeleted,
43           "Number of OpenMP parallel regions deleted");
44 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
45           "Number of OpenMP runtime functions identified");
46 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
47           "Number of OpenMP runtime function uses identified");
48 
49 #if !defined(NDEBUG)
50 static constexpr auto TAG = "[" DEBUG_TYPE "]";
51 #endif
52 
53 namespace {
54 struct OpenMPOpt {
55 
56   using OptimizationRemarkGetter =
57       function_ref<OptimizationRemarkEmitter &(Function *)>;
58 
59   OpenMPOpt(SmallVectorImpl<Function *> &SCC,
60             SmallPtrSetImpl<Function *> &ModuleSlice,
61             CallGraphUpdater &CGUpdater, OptimizationRemarkGetter OREGetter)
62       : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
63         OMPBuilder(M), CGUpdater(CGUpdater), OREGetter(OREGetter) {
64     initializeTypes(M);
65     initializeRuntimeFunctions();
66     OMPBuilder.initialize();
67   }
68 
69   /// Generic information that describes a runtime function
70   struct RuntimeFunctionInfo {
71 
72     /// The kind, as described by the RuntimeFunction enum.
73     RuntimeFunction Kind;
74 
75     /// The name of the function.
76     StringRef Name;
77 
78     /// Flag to indicate a variadic function.
79     bool IsVarArg;
80 
81     /// The return type of the function.
82     Type *ReturnType;
83 
84     /// The argument types of the function.
85     SmallVector<Type *, 8> ArgumentTypes;
86 
87     /// The declaration if available.
88     Function *Declaration = nullptr;
89 
90     /// Uses of this runtime function per function containing the use.
91     using UseVector = SmallVector<Use *, 16>;
92 
93     /// Return the vector of uses in function \p F.
94     UseVector &getOrCreateUseVector(Function *F) {
95       std::unique_ptr<UseVector> &UV = UsesMap[F];
96       if (!UV)
97         UV = std::make_unique<UseVector>();
98       return *UV;
99     }
100 
101     /// Return the vector of uses in function \p F or `nullptr` if there are
102     /// none.
103     const UseVector *getUseVector(Function &F) const {
104       auto I = UsesMap.find(&F);
105       if (I != UsesMap.end())
106         return I->second.get();
107       return nullptr;
108     }
109 
110     /// Return how many functions contain uses of this runtime function.
111     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
112 
113     /// Return the number of arguments (or the minimal number for variadic
114     /// functions).
115     size_t getNumArgs() const { return ArgumentTypes.size(); }
116 
117     /// Run the callback \p CB on each use and forget the use if the result is
118     /// true. The callback will be fed the function in which the use was
119     /// encountered as second argument.
120     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
121       SmallVector<unsigned, 8> ToBeDeleted;
122       for (auto &It : UsesMap) {
123         ToBeDeleted.clear();
124         unsigned Idx = 0;
125         UseVector &UV = *It.second;
126         for (Use *U : UV) {
127           if (CB(*U, *It.first))
128             ToBeDeleted.push_back(Idx);
129           ++Idx;
130         }
131 
132         // Remove the to-be-deleted indices in reverse order as prior
133         // modifcations will not modify the smaller indices.
134         while (!ToBeDeleted.empty()) {
135           unsigned Idx = ToBeDeleted.pop_back_val();
136           UV[Idx] = UV.back();
137           UV.pop_back();
138         }
139       }
140     }
141 
142   private:
143     /// Map from functions to all uses of this runtime function contained in
144     /// them.
145     DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap;
146   };
147 
148   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
149   bool run() {
150     bool Changed = false;
151 
152     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
153                       << " functions in a slice with " << ModuleSlice.size()
154                       << " functions\n");
155 
156     Changed |= deduplicateRuntimeCalls();
157     Changed |= deleteParallelRegions();
158 
159     return Changed;
160   }
161 
162 private:
163   /// Try to delete parallel regions if possible.
164   bool deleteParallelRegions() {
165     const unsigned CallbackCalleeOperand = 2;
166 
167     RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call];
168     if (!RFI.Declaration)
169       return false;
170 
171     bool Changed = false;
172     auto DeleteCallCB = [&](Use &U, Function &) {
173       CallInst *CI = getCallIfRegularCall(U);
174       if (!CI)
175         return false;
176       auto *Fn = dyn_cast<Function>(
177           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
178       if (!Fn)
179         return false;
180       if (!Fn->onlyReadsMemory())
181         return false;
182       if (!Fn->hasFnAttribute(Attribute::WillReturn))
183         return false;
184 
185       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
186                         << CI->getCaller()->getName() << "\n");
187 
188       auto Remark = [&](OptimizationRemark OR) {
189         return OR << "Parallel region in "
190                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
191                   << " deleted";
192       };
193       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
194                                      Remark);
195 
196       CGUpdater.removeCallSite(*CI);
197       CI->eraseFromParent();
198       Changed = true;
199       ++NumOpenMPParallelRegionsDeleted;
200       return true;
201     };
202 
203     RFI.foreachUse(DeleteCallCB);
204 
205     return Changed;
206   }
207 
208   /// Try to eliminiate runtime calls by reusing existing ones.
209   bool deduplicateRuntimeCalls() {
210     bool Changed = false;
211 
212     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
213         OMPRTL_omp_get_num_threads,
214         OMPRTL_omp_in_parallel,
215         OMPRTL_omp_get_cancellation,
216         OMPRTL_omp_get_thread_limit,
217         OMPRTL_omp_get_supported_active_levels,
218         OMPRTL_omp_get_level,
219         OMPRTL_omp_get_ancestor_thread_num,
220         OMPRTL_omp_get_team_size,
221         OMPRTL_omp_get_active_level,
222         OMPRTL_omp_in_final,
223         OMPRTL_omp_get_proc_bind,
224         OMPRTL_omp_get_num_places,
225         OMPRTL_omp_get_num_procs,
226         OMPRTL_omp_get_place_num,
227         OMPRTL_omp_get_partition_num_places,
228         OMPRTL_omp_get_partition_place_nums};
229 
230     // Global-tid is handled separately.
231     SmallSetVector<Value *, 16> GTIdArgs;
232     collectGlobalThreadIdArguments(GTIdArgs);
233     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
234                       << " global thread ID arguments\n");
235 
236     for (Function *F : SCC) {
237       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
238         deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
239 
240       // __kmpc_global_thread_num is special as we can replace it with an
241       // argument in enough cases to make it worth trying.
242       Value *GTIdArg = nullptr;
243       for (Argument &Arg : F->args())
244         if (GTIdArgs.count(&Arg)) {
245           GTIdArg = &Arg;
246           break;
247         }
248       Changed |= deduplicateRuntimeCalls(
249           *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
250     }
251 
252     return Changed;
253   }
254 
255   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
256                                     bool GlobalOnly, bool &SingleChoice) {
257     if (CurrentIdent == NextIdent)
258       return CurrentIdent;
259 
260     // TODO: Figure out how to actually combine multiple debug locations. For
261     //       now we just keep an existing one if there is a single choice.
262     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
263       SingleChoice = !CurrentIdent;
264       return NextIdent;
265     }
266     return nullptr;
267   }
268 
269   /// Return an `struct ident_t*` value that represents the ones used in the
270   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
271   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
272   /// return value we create one from scratch. We also do not yet combine
273   /// information, e.g., the source locations, see combinedIdentStruct.
274   Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F,
275                                         bool GlobalOnly) {
276     bool SingleChoice = true;
277     Value *Ident = nullptr;
278     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
279       CallInst *CI = getCallIfRegularCall(U, &RFI);
280       if (!CI || &F != &Caller)
281         return false;
282       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
283                                   /* GlobalOnly */ true, SingleChoice);
284       return false;
285     };
286     RFI.foreachUse(CombineIdentStruct);
287 
288     if (!Ident || !SingleChoice) {
289       // The IRBuilder uses the insertion block to get to the module, this is
290       // unfortunate but we work around it for now.
291       if (!OMPBuilder.getInsertionPoint().getBlock())
292         OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
293             &F.getEntryBlock(), F.getEntryBlock().begin()));
294       // Create a fallback location if non was found.
295       // TODO: Use the debug locations of the calls instead.
296       Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr();
297       Ident = OMPBuilder.getOrCreateIdent(Loc);
298     }
299     return Ident;
300   }
301 
302   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
303   /// \p ReplVal if given.
304   bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
305                                Value *ReplVal = nullptr) {
306     auto *UV = RFI.getUseVector(F);
307     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
308       return false;
309 
310     LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << UV->size() << " uses of "
311                       << RFI.Name
312                       << (ReplVal ? " with an existing value\n" : "\n")
313                       << "\n");
314     assert((!ReplVal || (isa<Argument>(ReplVal) &&
315                          cast<Argument>(ReplVal)->getParent() == &F)) &&
316            "Unexpected replacement value!");
317 
318     // TODO: Use dominance to find a good position instead.
319     auto CanBeMoved = [](CallBase &CB) {
320       unsigned NumArgs = CB.getNumArgOperands();
321       if (NumArgs == 0)
322         return true;
323       if (CB.getArgOperand(0)->getType() != IdentPtr)
324         return false;
325       for (unsigned u = 1; u < NumArgs; ++u)
326         if (isa<Instruction>(CB.getArgOperand(u)))
327           return false;
328       return true;
329     };
330 
331     if (!ReplVal) {
332       for (Use *U : *UV)
333         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
334           if (!CanBeMoved(*CI))
335             continue;
336 
337           auto Remark = [&](OptimizationRemark OR) {
338             auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
339             return OR << "OpenMP runtime call "
340                       << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
341                       << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
342           };
343           emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
344 
345           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
346           ReplVal = CI;
347           break;
348         }
349       if (!ReplVal)
350         return false;
351     }
352 
353     // If we use a call as a replacement value we need to make sure the ident is
354     // valid at the new location. For now we just pick a global one, either
355     // existing and used by one of the calls, or created from scratch.
356     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
357       if (CI->getNumArgOperands() > 0 &&
358           CI->getArgOperand(0)->getType() == IdentPtr) {
359         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
360                                                       /* GlobalOnly */ true);
361         CI->setArgOperand(0, Ident);
362       }
363     }
364 
365     bool Changed = false;
366     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
367       CallInst *CI = getCallIfRegularCall(U, &RFI);
368       if (!CI || CI == ReplVal || &F != &Caller)
369         return false;
370       assert(CI->getCaller() == &F && "Unexpected call!");
371 
372       auto Remark = [&](OptimizationRemark OR) {
373         return OR << "OpenMP runtime call "
374                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
375       };
376       emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
377 
378       CGUpdater.removeCallSite(*CI);
379       CI->replaceAllUsesWith(ReplVal);
380       CI->eraseFromParent();
381       ++NumOpenMPRuntimeCallsDeduplicated;
382       Changed = true;
383       return true;
384     };
385     RFI.foreachUse(ReplaceAndDeleteCB);
386 
387     return Changed;
388   }
389 
390   /// Collect arguments that represent the global thread id in \p GTIdArgs.
391   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
392     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
393     //       initialization. We could define an AbstractAttribute instead and
394     //       run the Attributor here once it can be run as an SCC pass.
395 
396     // Helper to check the argument \p ArgNo at all call sites of \p F for
397     // a GTId.
398     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
399       if (!F.hasLocalLinkage())
400         return false;
401       for (Use &U : F.uses()) {
402         if (CallInst *CI = getCallIfRegularCall(U)) {
403           Value *ArgOp = CI->getArgOperand(ArgNo);
404           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
405               getCallIfRegularCall(*ArgOp,
406                                    &RFIs[OMPRTL___kmpc_global_thread_num]))
407             continue;
408         }
409         return false;
410       }
411       return true;
412     };
413 
414     // Helper to identify uses of a GTId as GTId arguments.
415     auto AddUserArgs = [&](Value &GTId) {
416       for (Use &U : GTId.uses())
417         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
418           if (CI->isArgOperand(&U))
419             if (Function *Callee = CI->getCalledFunction())
420               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
421                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
422     };
423 
424     // The argument users of __kmpc_global_thread_num calls are GTIds.
425     RuntimeFunctionInfo &GlobThreadNumRFI =
426         RFIs[OMPRTL___kmpc_global_thread_num];
427     GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) {
428       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
429         AddUserArgs(*CI);
430       return false;
431     });
432 
433     // Transitively search for more arguments by looking at the users of the
434     // ones we know already. During the search the GTIdArgs vector is extended
435     // so we cannot cache the size nor can we use a range based for.
436     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
437       AddUserArgs(*GTIdArgs[u]);
438   }
439 
440   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
441   /// given it has to be the callee or a nullptr is returned.
442   CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
443     CallInst *CI = dyn_cast<CallInst>(U.getUser());
444     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
445         (!RFI || CI->getCalledFunction() == RFI->Declaration))
446       return CI;
447     return nullptr;
448   }
449 
450   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
451   /// the callee or a nullptr is returned.
452   CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
453     CallInst *CI = dyn_cast<CallInst>(&V);
454     if (CI && !CI->hasOperandBundles() &&
455         (!RFI || CI->getCalledFunction() == RFI->Declaration))
456       return CI;
457     return nullptr;
458   }
459 
460   /// Returns true if the function declaration \p F matches the runtime
461   /// function types, that is, return type \p RTFRetType, and argument types
462   /// \p RTFArgTypes.
463   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
464                                   SmallVector<Type *, 8> &RTFArgTypes) {
465     // TODO: We should output information to the user (under debug output
466     //       and via remarks).
467 
468     if (!F)
469       return false;
470     if (F->getReturnType() != RTFRetType)
471       return false;
472     if (F->arg_size() != RTFArgTypes.size())
473       return false;
474 
475     auto RTFTyIt = RTFArgTypes.begin();
476     for (Argument &Arg : F->args()) {
477       if (Arg.getType() != *RTFTyIt)
478         return false;
479 
480       ++RTFTyIt;
481     }
482 
483     return true;
484   }
485 
486   /// Helper to initialize all runtime function information for those defined in
487   /// OpenMPKinds.def.
488   void initializeRuntimeFunctions() {
489     // Helper to collect all uses of the decleration in the UsesMap.
490     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
491       unsigned NumUses = 0;
492       if (!RFI.Declaration)
493         return NumUses;
494       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
495 
496       NumOpenMPRuntimeFunctionsIdentified += 1;
497       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
498 
499       // TODO: We directly convert uses into proper calls and unknown uses.
500       for (Use &U : RFI.Declaration->uses()) {
501         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
502           if (ModuleSlice.count(UserI->getFunction())) {
503             RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
504             ++NumUses;
505           }
506         } else {
507           RFI.getOrCreateUseVector(nullptr).push_back(&U);
508           ++NumUses;
509         }
510       }
511       return NumUses;
512     };
513 
514 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
515   {                                                                            \
516     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
517     Function *F = M.getFunction(_Name);                                        \
518     if (declMatchesRTFTypes(F, _ReturnType, ArgsTypes)) {                      \
519       auto &RFI = RFIs[_Enum];                                                 \
520       RFI.Kind = _Enum;                                                        \
521       RFI.Name = _Name;                                                        \
522       RFI.IsVarArg = _IsVarArg;                                                \
523       RFI.ReturnType = _ReturnType;                                            \
524       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
525       RFI.Declaration = F;                                                     \
526       unsigned NumUses = CollectUses(RFI);                                     \
527       (void)NumUses;                                                           \
528       LLVM_DEBUG({                                                             \
529         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
530                << " found\n";                                                  \
531         if (RFI.Declaration)                                                   \
532           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
533                  << RFI.getNumFunctionsWithUses()                              \
534                  << " different functions.\n";                                 \
535       });                                                                      \
536     }                                                                          \
537   }
538 #include "llvm/Frontend/OpenMP/OMPKinds.def"
539 
540     // TODO: We should attach the attributes defined in OMPKinds.def.
541   }
542 
543   /// Emit a remark generically
544   ///
545   /// This template function can be used to generically emit a remark. The
546   /// RemarkKind should be one of the following:
547   ///   - OptimizationRemark to indicate a successful optimization attempt
548   ///   - OptimizationRemarkMissed to report a failed optimization attempt
549   ///   - OptimizationRemarkAnalysis to provide additional information about an
550   ///     optimization attempt
551   ///
552   /// The remark is built using a callback function provided by the caller that
553   /// takes a RemarkKind as input and returns a RemarkKind.
554   template <typename RemarkKind,
555             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
556   void emitRemark(Instruction *Inst, StringRef RemarkName,
557                   RemarkCallBack &&RemarkCB) {
558     Function *F = Inst->getParent()->getParent();
559     auto &ORE = OREGetter(F);
560 
561     ORE.emit([&]() {
562       return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst));
563     });
564   }
565 
566   /// The underyling module.
567   Module &M;
568 
569   /// The SCC we are operating on.
570   SmallVectorImpl<Function *> &SCC;
571 
572   /// The slice of the module we are allowed to look at.
573   SmallPtrSetImpl<Function *> &ModuleSlice;
574 
575   /// An OpenMP-IR-Builder instance
576   OpenMPIRBuilder OMPBuilder;
577 
578   /// Callback to update the call graph, the first argument is a removed call,
579   /// the second an optional replacement call.
580   CallGraphUpdater &CGUpdater;
581 
582   /// Callback to get an OptimizationRemarkEmitter from a Function *
583   OptimizationRemarkGetter OREGetter;
584 
585   /// Map from runtime function kind to the runtime function description.
586   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
587                   RuntimeFunction::OMPRTL___last>
588       RFIs;
589 };
590 } // namespace
591 
592 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
593                                      CGSCCAnalysisManager &AM,
594                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
595   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
596     return PreservedAnalyses::all();
597 
598   if (DisableOpenMPOptimizations)
599     return PreservedAnalyses::all();
600 
601   SmallPtrSet<Function *, 16> ModuleSlice;
602   SmallVector<Function *, 16> SCC;
603   for (LazyCallGraph::Node &N : C) {
604     SCC.push_back(&N.getFunction());
605     ModuleSlice.insert(SCC.back());
606   }
607 
608   if (SCC.empty())
609     return PreservedAnalyses::all();
610 
611   auto OREGetter = [&C, &CG, &AM](Function *F) -> OptimizationRemarkEmitter & {
612     FunctionAnalysisManager &FAM =
613         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
614     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
615   };
616 
617   CallGraphUpdater CGUpdater;
618   CGUpdater.initialize(CG, C, AM, UR);
619   // TODO: Compute the module slice we are allowed to look at.
620   OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
621   bool Changed = OMPOpt.run();
622   (void)Changed;
623   return PreservedAnalyses::all();
624 }
625 
626 namespace {
627 
628 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
629   CallGraphUpdater CGUpdater;
630   OpenMPInModule OMPInModule;
631   static char ID;
632 
633   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
634     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
635   }
636 
637   void getAnalysisUsage(AnalysisUsage &AU) const override {
638     CallGraphSCCPass::getAnalysisUsage(AU);
639   }
640 
641   bool doInitialization(CallGraph &CG) override {
642     // Disable the pass if there is no OpenMP (runtime call) in the module.
643     containsOpenMP(CG.getModule(), OMPInModule);
644     return false;
645   }
646 
647   bool runOnSCC(CallGraphSCC &CGSCC) override {
648     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
649       return false;
650     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
651       return false;
652 
653     SmallPtrSet<Function *, 16> ModuleSlice;
654     SmallVector<Function *, 16> SCC;
655     for (CallGraphNode *CGN : CGSCC)
656       if (Function *Fn = CGN->getFunction())
657         if (!Fn->isDeclaration()) {
658           SCC.push_back(Fn);
659           ModuleSlice.insert(Fn);
660         }
661 
662     if (SCC.empty())
663       return false;
664 
665     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
666     CGUpdater.initialize(CG, CGSCC);
667 
668     // Maintain a map of functions to avoid rebuilding the ORE
669     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
670     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
671       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
672       if (!ORE)
673         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
674       return *ORE;
675     };
676 
677     // TODO: Compute the module slice we are allowed to look at.
678     OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
679     return OMPOpt.run();
680   }
681 
682   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
683 };
684 
685 } // end anonymous namespace
686 
687 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
688   if (OMPInModule.isKnown())
689     return OMPInModule;
690 
691 #define OMP_RTL(_Enum, _Name, ...)                                             \
692   if (M.getFunction(_Name))                                                    \
693     return OMPInModule = true;
694 #include "llvm/Frontend/OpenMP/OMPKinds.def"
695   return OMPInModule = false;
696 }
697 
698 char OpenMPOptLegacyPass::ID = 0;
699 
700 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
701                       "OpenMP specific optimizations", false, false)
702 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
703 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
704                     "OpenMP specific optimizations", false, false)
705 
706 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
707