1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Frontend/OpenMP/OMPConstants.h"
23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
28 
29 using namespace llvm;
30 using namespace omp;
31 using namespace types;
32 
33 #define DEBUG_TYPE "openmp-opt"
34 
35 static cl::opt<bool> DisableOpenMPOptimizations(
36     "openmp-opt-disable", cl::ZeroOrMore,
37     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
38     cl::init(false));
39 
40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
41           "Number of OpenMP runtime calls deduplicated");
42 STATISTIC(NumOpenMPParallelRegionsDeleted,
43           "Number of OpenMP parallel regions deleted");
44 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
45           "Number of OpenMP runtime functions identified");
46 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
47           "Number of OpenMP runtime function uses identified");
48 
49 #if !defined(NDEBUG)
50 static constexpr auto TAG = "[" DEBUG_TYPE "]";
51 #endif
52 
53 namespace {
54 struct OpenMPOpt {
55 
56   using OptimizationRemarkGetter =
57       function_ref<OptimizationRemarkEmitter &(Function *)>;
58 
59   OpenMPOpt(SmallVectorImpl<Function *> &SCC,
60             SmallPtrSetImpl<Function *> &ModuleSlice,
61             CallGraphUpdater &CGUpdater, OptimizationRemarkGetter OREGetter)
62       : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
63         OMPBuilder(M), CGUpdater(CGUpdater), OREGetter(OREGetter) {
64     initializeTypes(M);
65     initializeRuntimeFunctions();
66     OMPBuilder.initialize();
67   }
68 
69   /// Generic information that describes a runtime function
70   struct RuntimeFunctionInfo {
71 
72     /// The kind, as described by the RuntimeFunction enum.
73     RuntimeFunction Kind;
74 
75     /// The name of the function.
76     StringRef Name;
77 
78     /// Flag to indicate a variadic function.
79     bool IsVarArg;
80 
81     /// The return type of the function.
82     Type *ReturnType;
83 
84     /// The argument types of the function.
85     SmallVector<Type *, 8> ArgumentTypes;
86 
87     /// The declaration if available.
88     Function *Declaration = nullptr;
89 
90     /// Uses of this runtime function per function containing the use.
91     using UseVector = SmallVector<Use *, 16>;
92 
93     /// Return the vector of uses in function \p F.
94     UseVector &getOrCreateUseVector(Function *F) {
95       std::unique_ptr<UseVector> &UV = UsesMap[F];
96       if (!UV)
97         UV = std::make_unique<UseVector>();
98       return *UV;
99     }
100 
101     /// Return the vector of uses in function \p F or `nullptr` if there are
102     /// none.
103     const UseVector *getUseVector(Function &F) const {
104       auto I = UsesMap.find(&F);
105       if (I != UsesMap.end())
106         return I->second.get();
107       return nullptr;
108     }
109 
110     /// Return how many functions contain uses of this runtime function.
111     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
112 
113     /// Return the number of arguments (or the minimal number for variadic
114     /// functions).
115     size_t getNumArgs() const { return ArgumentTypes.size(); }
116 
117     /// Run the callback \p CB on each use and forget the use if the result is
118     /// true. The callback will be fed the function in which the use was
119     /// encountered as second argument.
120     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
121       for (auto &It : UsesMap)
122         foreachUse(CB, It.first, It.second.get());
123     }
124 
125     /// Run the callback \p CB on each use within the function \p F and forget
126     /// the use if the result is true.
127     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F,
128                     UseVector *Uses = nullptr) {
129       SmallVector<unsigned, 8> ToBeDeleted;
130       ToBeDeleted.clear();
131 
132       unsigned Idx = 0;
133       UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F);
134 
135       for (Use *U : UV) {
136         if (CB(*U, *F))
137           ToBeDeleted.push_back(Idx);
138         ++Idx;
139       }
140 
141       // Remove the to-be-deleted indices in reverse order as prior
142       // modifcations will not modify the smaller indices.
143       while (!ToBeDeleted.empty()) {
144         unsigned Idx = ToBeDeleted.pop_back_val();
145         UV[Idx] = UV.back();
146         UV.pop_back();
147       }
148     }
149 
150   private:
151     /// Map from functions to all uses of this runtime function contained in
152     /// them.
153     DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap;
154   };
155 
156   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
157   bool run() {
158     bool Changed = false;
159 
160     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
161                       << " functions in a slice with " << ModuleSlice.size()
162                       << " functions\n");
163 
164     Changed |= deduplicateRuntimeCalls();
165     Changed |= deleteParallelRegions();
166 
167     return Changed;
168   }
169 
170 private:
171   /// Try to delete parallel regions if possible.
172   bool deleteParallelRegions() {
173     const unsigned CallbackCalleeOperand = 2;
174 
175     RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call];
176     if (!RFI.Declaration)
177       return false;
178 
179     bool Changed = false;
180     auto DeleteCallCB = [&](Use &U, Function &) {
181       CallInst *CI = getCallIfRegularCall(U);
182       if (!CI)
183         return false;
184       auto *Fn = dyn_cast<Function>(
185           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
186       if (!Fn)
187         return false;
188       if (!Fn->onlyReadsMemory())
189         return false;
190       if (!Fn->hasFnAttribute(Attribute::WillReturn))
191         return false;
192 
193       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
194                         << CI->getCaller()->getName() << "\n");
195 
196       auto Remark = [&](OptimizationRemark OR) {
197         return OR << "Parallel region in "
198                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
199                   << " deleted";
200       };
201       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
202                                      Remark);
203 
204       CGUpdater.removeCallSite(*CI);
205       CI->eraseFromParent();
206       Changed = true;
207       ++NumOpenMPParallelRegionsDeleted;
208       return true;
209     };
210 
211     RFI.foreachUse(DeleteCallCB);
212 
213     return Changed;
214   }
215 
216   /// Try to eliminiate runtime calls by reusing existing ones.
217   bool deduplicateRuntimeCalls() {
218     bool Changed = false;
219 
220     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
221         OMPRTL_omp_get_num_threads,
222         OMPRTL_omp_in_parallel,
223         OMPRTL_omp_get_cancellation,
224         OMPRTL_omp_get_thread_limit,
225         OMPRTL_omp_get_supported_active_levels,
226         OMPRTL_omp_get_level,
227         OMPRTL_omp_get_ancestor_thread_num,
228         OMPRTL_omp_get_team_size,
229         OMPRTL_omp_get_active_level,
230         OMPRTL_omp_in_final,
231         OMPRTL_omp_get_proc_bind,
232         OMPRTL_omp_get_num_places,
233         OMPRTL_omp_get_num_procs,
234         OMPRTL_omp_get_place_num,
235         OMPRTL_omp_get_partition_num_places,
236         OMPRTL_omp_get_partition_place_nums};
237 
238     // Global-tid is handled separately.
239     SmallSetVector<Value *, 16> GTIdArgs;
240     collectGlobalThreadIdArguments(GTIdArgs);
241     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
242                       << " global thread ID arguments\n");
243 
244     for (Function *F : SCC) {
245       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
246         deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
247 
248       // __kmpc_global_thread_num is special as we can replace it with an
249       // argument in enough cases to make it worth trying.
250       Value *GTIdArg = nullptr;
251       for (Argument &Arg : F->args())
252         if (GTIdArgs.count(&Arg)) {
253           GTIdArg = &Arg;
254           break;
255         }
256       Changed |= deduplicateRuntimeCalls(
257           *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
258     }
259 
260     return Changed;
261   }
262 
263   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
264                                     bool GlobalOnly, bool &SingleChoice) {
265     if (CurrentIdent == NextIdent)
266       return CurrentIdent;
267 
268     // TODO: Figure out how to actually combine multiple debug locations. For
269     //       now we just keep an existing one if there is a single choice.
270     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
271       SingleChoice = !CurrentIdent;
272       return NextIdent;
273     }
274     return nullptr;
275   }
276 
277   /// Return an `struct ident_t*` value that represents the ones used in the
278   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
279   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
280   /// return value we create one from scratch. We also do not yet combine
281   /// information, e.g., the source locations, see combinedIdentStruct.
282   Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F,
283                                         bool GlobalOnly) {
284     bool SingleChoice = true;
285     Value *Ident = nullptr;
286     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
287       CallInst *CI = getCallIfRegularCall(U, &RFI);
288       if (!CI || &F != &Caller)
289         return false;
290       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
291                                   /* GlobalOnly */ true, SingleChoice);
292       return false;
293     };
294     RFI.foreachUse(CombineIdentStruct);
295 
296     if (!Ident || !SingleChoice) {
297       // The IRBuilder uses the insertion block to get to the module, this is
298       // unfortunate but we work around it for now.
299       if (!OMPBuilder.getInsertionPoint().getBlock())
300         OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
301             &F.getEntryBlock(), F.getEntryBlock().begin()));
302       // Create a fallback location if non was found.
303       // TODO: Use the debug locations of the calls instead.
304       Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr();
305       Ident = OMPBuilder.getOrCreateIdent(Loc);
306     }
307     return Ident;
308   }
309 
310   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
311   /// \p ReplVal if given.
312   bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
313                                Value *ReplVal = nullptr) {
314     auto *UV = RFI.getUseVector(F);
315     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
316       return false;
317 
318     LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << UV->size() << " uses of "
319                       << RFI.Name
320                       << (ReplVal ? " with an existing value\n" : "\n")
321                       << "\n");
322     assert((!ReplVal || (isa<Argument>(ReplVal) &&
323                          cast<Argument>(ReplVal)->getParent() == &F)) &&
324            "Unexpected replacement value!");
325 
326     // TODO: Use dominance to find a good position instead.
327     auto CanBeMoved = [](CallBase &CB) {
328       unsigned NumArgs = CB.getNumArgOperands();
329       if (NumArgs == 0)
330         return true;
331       if (CB.getArgOperand(0)->getType() != IdentPtr)
332         return false;
333       for (unsigned u = 1; u < NumArgs; ++u)
334         if (isa<Instruction>(CB.getArgOperand(u)))
335           return false;
336       return true;
337     };
338 
339     if (!ReplVal) {
340       for (Use *U : *UV)
341         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
342           if (!CanBeMoved(*CI))
343             continue;
344 
345           auto Remark = [&](OptimizationRemark OR) {
346             auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
347             return OR << "OpenMP runtime call "
348                       << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
349                       << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
350           };
351           emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
352 
353           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
354           ReplVal = CI;
355           break;
356         }
357       if (!ReplVal)
358         return false;
359     }
360 
361     // If we use a call as a replacement value we need to make sure the ident is
362     // valid at the new location. For now we just pick a global one, either
363     // existing and used by one of the calls, or created from scratch.
364     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
365       if (CI->getNumArgOperands() > 0 &&
366           CI->getArgOperand(0)->getType() == IdentPtr) {
367         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
368                                                       /* GlobalOnly */ true);
369         CI->setArgOperand(0, Ident);
370       }
371     }
372 
373     bool Changed = false;
374     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
375       CallInst *CI = getCallIfRegularCall(U, &RFI);
376       if (!CI || CI == ReplVal || &F != &Caller)
377         return false;
378       assert(CI->getCaller() == &F && "Unexpected call!");
379 
380       auto Remark = [&](OptimizationRemark OR) {
381         return OR << "OpenMP runtime call "
382                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
383       };
384       emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
385 
386       CGUpdater.removeCallSite(*CI);
387       CI->replaceAllUsesWith(ReplVal);
388       CI->eraseFromParent();
389       ++NumOpenMPRuntimeCallsDeduplicated;
390       Changed = true;
391       return true;
392     };
393     RFI.foreachUse(ReplaceAndDeleteCB);
394 
395     return Changed;
396   }
397 
398   /// Collect arguments that represent the global thread id in \p GTIdArgs.
399   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
400     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
401     //       initialization. We could define an AbstractAttribute instead and
402     //       run the Attributor here once it can be run as an SCC pass.
403 
404     // Helper to check the argument \p ArgNo at all call sites of \p F for
405     // a GTId.
406     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
407       if (!F.hasLocalLinkage())
408         return false;
409       for (Use &U : F.uses()) {
410         if (CallInst *CI = getCallIfRegularCall(U)) {
411           Value *ArgOp = CI->getArgOperand(ArgNo);
412           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
413               getCallIfRegularCall(*ArgOp,
414                                    &RFIs[OMPRTL___kmpc_global_thread_num]))
415             continue;
416         }
417         return false;
418       }
419       return true;
420     };
421 
422     // Helper to identify uses of a GTId as GTId arguments.
423     auto AddUserArgs = [&](Value &GTId) {
424       for (Use &U : GTId.uses())
425         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
426           if (CI->isArgOperand(&U))
427             if (Function *Callee = CI->getCalledFunction())
428               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
429                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
430     };
431 
432     // The argument users of __kmpc_global_thread_num calls are GTIds.
433     RuntimeFunctionInfo &GlobThreadNumRFI =
434         RFIs[OMPRTL___kmpc_global_thread_num];
435     GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) {
436       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
437         AddUserArgs(*CI);
438       return false;
439     });
440 
441     // Transitively search for more arguments by looking at the users of the
442     // ones we know already. During the search the GTIdArgs vector is extended
443     // so we cannot cache the size nor can we use a range based for.
444     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
445       AddUserArgs(*GTIdArgs[u]);
446   }
447 
448   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
449   /// given it has to be the callee or a nullptr is returned.
450   CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
451     CallInst *CI = dyn_cast<CallInst>(U.getUser());
452     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
453         (!RFI || CI->getCalledFunction() == RFI->Declaration))
454       return CI;
455     return nullptr;
456   }
457 
458   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
459   /// the callee or a nullptr is returned.
460   CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
461     CallInst *CI = dyn_cast<CallInst>(&V);
462     if (CI && !CI->hasOperandBundles() &&
463         (!RFI || CI->getCalledFunction() == RFI->Declaration))
464       return CI;
465     return nullptr;
466   }
467 
468   /// Returns true if the function declaration \p F matches the runtime
469   /// function types, that is, return type \p RTFRetType, and argument types
470   /// \p RTFArgTypes.
471   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
472                                   SmallVector<Type *, 8> &RTFArgTypes) {
473     // TODO: We should output information to the user (under debug output
474     //       and via remarks).
475 
476     if (!F)
477       return false;
478     if (F->getReturnType() != RTFRetType)
479       return false;
480     if (F->arg_size() != RTFArgTypes.size())
481       return false;
482 
483     auto RTFTyIt = RTFArgTypes.begin();
484     for (Argument &Arg : F->args()) {
485       if (Arg.getType() != *RTFTyIt)
486         return false;
487 
488       ++RTFTyIt;
489     }
490 
491     return true;
492   }
493 
494   /// Helper to initialize all runtime function information for those defined in
495   /// OpenMPKinds.def.
496   void initializeRuntimeFunctions() {
497     // Helper to collect all uses of the decleration in the UsesMap.
498     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
499       unsigned NumUses = 0;
500       if (!RFI.Declaration)
501         return NumUses;
502       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
503 
504       NumOpenMPRuntimeFunctionsIdentified += 1;
505       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
506 
507       // TODO: We directly convert uses into proper calls and unknown uses.
508       for (Use &U : RFI.Declaration->uses()) {
509         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
510           if (ModuleSlice.count(UserI->getFunction())) {
511             RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
512             ++NumUses;
513           }
514         } else {
515           RFI.getOrCreateUseVector(nullptr).push_back(&U);
516           ++NumUses;
517         }
518       }
519       return NumUses;
520     };
521 
522 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
523   {                                                                            \
524     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
525     Function *F = M.getFunction(_Name);                                        \
526     if (declMatchesRTFTypes(F, _ReturnType, ArgsTypes)) {                      \
527       auto &RFI = RFIs[_Enum];                                                 \
528       RFI.Kind = _Enum;                                                        \
529       RFI.Name = _Name;                                                        \
530       RFI.IsVarArg = _IsVarArg;                                                \
531       RFI.ReturnType = _ReturnType;                                            \
532       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
533       RFI.Declaration = F;                                                     \
534       unsigned NumUses = CollectUses(RFI);                                     \
535       (void)NumUses;                                                           \
536       LLVM_DEBUG({                                                             \
537         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
538                << " found\n";                                                  \
539         if (RFI.Declaration)                                                   \
540           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
541                  << RFI.getNumFunctionsWithUses()                              \
542                  << " different functions.\n";                                 \
543       });                                                                      \
544     }                                                                          \
545   }
546 #include "llvm/Frontend/OpenMP/OMPKinds.def"
547 
548     // TODO: We should attach the attributes defined in OMPKinds.def.
549   }
550 
551   /// Emit a remark generically
552   ///
553   /// This template function can be used to generically emit a remark. The
554   /// RemarkKind should be one of the following:
555   ///   - OptimizationRemark to indicate a successful optimization attempt
556   ///   - OptimizationRemarkMissed to report a failed optimization attempt
557   ///   - OptimizationRemarkAnalysis to provide additional information about an
558   ///     optimization attempt
559   ///
560   /// The remark is built using a callback function provided by the caller that
561   /// takes a RemarkKind as input and returns a RemarkKind.
562   template <typename RemarkKind,
563             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
564   void emitRemark(Instruction *Inst, StringRef RemarkName,
565                   RemarkCallBack &&RemarkCB) {
566     Function *F = Inst->getParent()->getParent();
567     auto &ORE = OREGetter(F);
568 
569     ORE.emit([&]() {
570       return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst));
571     });
572   }
573 
574   /// The underyling module.
575   Module &M;
576 
577   /// The SCC we are operating on.
578   SmallVectorImpl<Function *> &SCC;
579 
580   /// The slice of the module we are allowed to look at.
581   SmallPtrSetImpl<Function *> &ModuleSlice;
582 
583   /// An OpenMP-IR-Builder instance
584   OpenMPIRBuilder OMPBuilder;
585 
586   /// Callback to update the call graph, the first argument is a removed call,
587   /// the second an optional replacement call.
588   CallGraphUpdater &CGUpdater;
589 
590   /// Callback to get an OptimizationRemarkEmitter from a Function *
591   OptimizationRemarkGetter OREGetter;
592 
593   /// Map from runtime function kind to the runtime function description.
594   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
595                   RuntimeFunction::OMPRTL___last>
596       RFIs;
597 };
598 } // namespace
599 
600 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
601                                      CGSCCAnalysisManager &AM,
602                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
603   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
604     return PreservedAnalyses::all();
605 
606   if (DisableOpenMPOptimizations)
607     return PreservedAnalyses::all();
608 
609   SmallPtrSet<Function *, 16> ModuleSlice;
610   SmallVector<Function *, 16> SCC;
611   for (LazyCallGraph::Node &N : C) {
612     SCC.push_back(&N.getFunction());
613     ModuleSlice.insert(SCC.back());
614   }
615 
616   if (SCC.empty())
617     return PreservedAnalyses::all();
618 
619   auto OREGetter = [&C, &CG, &AM](Function *F) -> OptimizationRemarkEmitter & {
620     FunctionAnalysisManager &FAM =
621         AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
622     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
623   };
624 
625   CallGraphUpdater CGUpdater;
626   CGUpdater.initialize(CG, C, AM, UR);
627   // TODO: Compute the module slice we are allowed to look at.
628   OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
629   bool Changed = OMPOpt.run();
630   (void)Changed;
631   return PreservedAnalyses::all();
632 }
633 
634 namespace {
635 
636 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
637   CallGraphUpdater CGUpdater;
638   OpenMPInModule OMPInModule;
639   static char ID;
640 
641   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
642     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
643   }
644 
645   void getAnalysisUsage(AnalysisUsage &AU) const override {
646     CallGraphSCCPass::getAnalysisUsage(AU);
647   }
648 
649   bool doInitialization(CallGraph &CG) override {
650     // Disable the pass if there is no OpenMP (runtime call) in the module.
651     containsOpenMP(CG.getModule(), OMPInModule);
652     return false;
653   }
654 
655   bool runOnSCC(CallGraphSCC &CGSCC) override {
656     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
657       return false;
658     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
659       return false;
660 
661     SmallPtrSet<Function *, 16> ModuleSlice;
662     SmallVector<Function *, 16> SCC;
663     for (CallGraphNode *CGN : CGSCC)
664       if (Function *Fn = CGN->getFunction())
665         if (!Fn->isDeclaration()) {
666           SCC.push_back(Fn);
667           ModuleSlice.insert(Fn);
668         }
669 
670     if (SCC.empty())
671       return false;
672 
673     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
674     CGUpdater.initialize(CG, CGSCC);
675 
676     // Maintain a map of functions to avoid rebuilding the ORE
677     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
678     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
679       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
680       if (!ORE)
681         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
682       return *ORE;
683     };
684 
685     // TODO: Compute the module slice we are allowed to look at.
686     OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
687     return OMPOpt.run();
688   }
689 
690   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
691 };
692 
693 } // end anonymous namespace
694 
695 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
696   if (OMPInModule.isKnown())
697     return OMPInModule;
698 
699 #define OMP_RTL(_Enum, _Name, ...)                                             \
700   if (M.getFunction(_Name))                                                    \
701     return OMPInModule = true;
702 #include "llvm/Frontend/OpenMP/OMPKinds.def"
703   return OMPInModule = false;
704 }
705 
706 char OpenMPOptLegacyPass::ID = 0;
707 
708 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
709                       "OpenMP specific optimizations", false, false)
710 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
711 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
712                     "OpenMP specific optimizations", false, false)
713 
714 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
715