1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Frontend/OpenMP/OMPConstants.h"
23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/IPO/Attributor.h"
28 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
29 
30 using namespace llvm;
31 using namespace omp;
32 using namespace types;
33 
34 #define DEBUG_TYPE "openmp-opt"
35 
36 static cl::opt<bool> DisableOpenMPOptimizations(
37     "openmp-opt-disable", cl::ZeroOrMore,
38     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
39     cl::init(false));
40 
41 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
42                                     cl::Hidden);
43 
44 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
45           "Number of OpenMP runtime calls deduplicated");
46 STATISTIC(NumOpenMPParallelRegionsDeleted,
47           "Number of OpenMP parallel regions deleted");
48 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
49           "Number of OpenMP runtime functions identified");
50 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
51           "Number of OpenMP runtime function uses identified");
52 
53 #if !defined(NDEBUG)
54 static constexpr auto TAG = "[" DEBUG_TYPE "]";
55 #endif
56 
57 namespace {
58 
59 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
60 /// Attributor runs.
61 struct OMPInformationCache : public InformationCache {
62   OMPInformationCache(Module &M, AnalysisGetter &AG,
63                       BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
64                       SmallPtrSetImpl<Function *> &ModuleSlice)
65       : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice),
66         OMPBuilder(M) {
67     initializeTypes(M);
68     initializeRuntimeFunctions();
69     initializeInternalControlVars();
70 
71     OMPBuilder.initialize();
72   }
73 
74   /// Generic information that describes an internal control variable.
75   struct InternalControlVarInfo {
76     /// The kind, as described by InternalControlVar enum.
77     InternalControlVar Kind;
78 
79     /// The name of the ICV.
80     StringRef Name;
81 
82     /// Environment variable associated with this ICV.
83     StringRef EnvVarName;
84 
85     /// Initial value kind.
86     ICVInitValue InitKind;
87 
88     /// Initial value.
89     ConstantInt *InitValue;
90 
91     /// Setter RTL function associated with this ICV.
92     RuntimeFunction Setter;
93 
94     /// Getter RTL function associated with this ICV.
95     RuntimeFunction Getter;
96 
97     /// RTL Function corresponding to the override clause of this ICV
98     RuntimeFunction Clause;
99   };
100 
101   /// Generic information that describes a runtime function
102   struct RuntimeFunctionInfo {
103 
104     /// The kind, as described by the RuntimeFunction enum.
105     RuntimeFunction Kind;
106 
107     /// The name of the function.
108     StringRef Name;
109 
110     /// Flag to indicate a variadic function.
111     bool IsVarArg;
112 
113     /// The return type of the function.
114     Type *ReturnType;
115 
116     /// The argument types of the function.
117     SmallVector<Type *, 8> ArgumentTypes;
118 
119     /// The declaration if available.
120     Function *Declaration = nullptr;
121 
122     /// Uses of this runtime function per function containing the use.
123     using UseVector = SmallVector<Use *, 16>;
124 
125     /// Return the vector of uses in function \p F.
126     UseVector &getOrCreateUseVector(Function *F) {
127       std::unique_ptr<UseVector> &UV = UsesMap[F];
128       if (!UV)
129         UV = std::make_unique<UseVector>();
130       return *UV;
131     }
132 
133     /// Return the vector of uses in function \p F or `nullptr` if there are
134     /// none.
135     const UseVector *getUseVector(Function &F) const {
136       auto I = UsesMap.find(&F);
137       if (I != UsesMap.end())
138         return I->second.get();
139       return nullptr;
140     }
141 
142     /// Return how many functions contain uses of this runtime function.
143     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
144 
145     /// Return the number of arguments (or the minimal number for variadic
146     /// functions).
147     size_t getNumArgs() const { return ArgumentTypes.size(); }
148 
149     /// Run the callback \p CB on each use and forget the use if the result is
150     /// true. The callback will be fed the function in which the use was
151     /// encountered as second argument.
152     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
153       for (auto &It : UsesMap)
154         foreachUse(CB, It.first, It.second.get());
155     }
156 
157     /// Run the callback \p CB on each use within the function \p F and forget
158     /// the use if the result is true.
159     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F,
160                     UseVector *Uses = nullptr) {
161       SmallVector<unsigned, 8> ToBeDeleted;
162       ToBeDeleted.clear();
163 
164       unsigned Idx = 0;
165       UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F);
166 
167       for (Use *U : UV) {
168         if (CB(*U, *F))
169           ToBeDeleted.push_back(Idx);
170         ++Idx;
171       }
172 
173       // Remove the to-be-deleted indices in reverse order as prior
174       // modifcations will not modify the smaller indices.
175       while (!ToBeDeleted.empty()) {
176         unsigned Idx = ToBeDeleted.pop_back_val();
177         UV[Idx] = UV.back();
178         UV.pop_back();
179       }
180     }
181 
182   private:
183     /// Map from functions to all uses of this runtime function contained in
184     /// them.
185     DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap;
186   };
187 
188   /// The slice of the module we are allowed to look at.
189   SmallPtrSetImpl<Function *> &ModuleSlice;
190 
191   /// An OpenMP-IR-Builder instance
192   OpenMPIRBuilder OMPBuilder;
193 
194   /// Map from runtime function kind to the runtime function description.
195   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
196                   RuntimeFunction::OMPRTL___last>
197       RFIs;
198 
199   /// Map from ICV kind to the ICV description.
200   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
201                   InternalControlVar::ICV___last>
202       ICVs;
203 
204   /// Helper to initialize all internal control variable information for those
205   /// defined in OMPKinds.def.
206   void initializeInternalControlVars() {
207 #define ICV_RT_SET(_Name, RTL)                                                 \
208   {                                                                            \
209     auto &ICV = ICVs[_Name];                                                   \
210     ICV.Setter = RTL;                                                          \
211   }
212 #define ICV_RT_GET(Name, RTL)                                                  \
213   {                                                                            \
214     auto &ICV = ICVs[Name];                                                    \
215     ICV.Getter = RTL;                                                          \
216   }
217 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
218   {                                                                            \
219     auto &ICV = ICVs[Enum];                                                    \
220     ICV.Name = _Name;                                                          \
221     ICV.Kind = Enum;                                                           \
222     ICV.InitKind = Init;                                                       \
223     ICV.EnvVarName = _EnvVarName;                                              \
224     switch (ICV.InitKind) {                                                    \
225     case ICV_IMPLEMENTATION_DEFINED:                                           \
226       ICV.InitValue = nullptr;                                                 \
227       break;                                                                   \
228     case ICV_ZERO:                                                             \
229       ICV.InitValue =                                                          \
230           ConstantInt::get(Type::getInt32Ty(Int32->getContext()), 0);          \
231       break;                                                                   \
232     case ICV_FALSE:                                                            \
233       ICV.InitValue = ConstantInt::getFalse(Int1->getContext());               \
234       break;                                                                   \
235     case ICV_LAST:                                                             \
236       break;                                                                   \
237     }                                                                          \
238   }
239 #include "llvm/Frontend/OpenMP/OMPKinds.def"
240   }
241 
242   /// Returns true if the function declaration \p F matches the runtime
243   /// function types, that is, return type \p RTFRetType, and argument types
244   /// \p RTFArgTypes.
245   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
246                                   SmallVector<Type *, 8> &RTFArgTypes) {
247     // TODO: We should output information to the user (under debug output
248     //       and via remarks).
249 
250     if (!F)
251       return false;
252     if (F->getReturnType() != RTFRetType)
253       return false;
254     if (F->arg_size() != RTFArgTypes.size())
255       return false;
256 
257     auto RTFTyIt = RTFArgTypes.begin();
258     for (Argument &Arg : F->args()) {
259       if (Arg.getType() != *RTFTyIt)
260         return false;
261 
262       ++RTFTyIt;
263     }
264 
265     return true;
266   }
267 
268   /// Helper to initialize all runtime function information for those defined
269   /// in OpenMPKinds.def.
270   void initializeRuntimeFunctions() {
271     // Helper to collect all uses of the decleration in the UsesMap.
272     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
273       unsigned NumUses = 0;
274       if (!RFI.Declaration)
275         return NumUses;
276       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
277 
278       NumOpenMPRuntimeFunctionsIdentified += 1;
279       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
280 
281       // TODO: We directly convert uses into proper calls and unknown uses.
282       for (Use &U : RFI.Declaration->uses()) {
283         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
284           if (ModuleSlice.count(UserI->getFunction())) {
285             RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
286             ++NumUses;
287           }
288         } else {
289           RFI.getOrCreateUseVector(nullptr).push_back(&U);
290           ++NumUses;
291         }
292       }
293       return NumUses;
294     };
295 
296     Module &M = *((*ModuleSlice.begin())->getParent());
297 
298 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
299   {                                                                            \
300     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
301     Function *F = M.getFunction(_Name);                                        \
302     if (declMatchesRTFTypes(F, _ReturnType, ArgsTypes)) {                      \
303       auto &RFI = RFIs[_Enum];                                                 \
304       RFI.Kind = _Enum;                                                        \
305       RFI.Name = _Name;                                                        \
306       RFI.IsVarArg = _IsVarArg;                                                \
307       RFI.ReturnType = _ReturnType;                                            \
308       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
309       RFI.Declaration = F;                                                     \
310       unsigned NumUses = CollectUses(RFI);                                     \
311       (void)NumUses;                                                           \
312       LLVM_DEBUG({                                                             \
313         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
314                << " found\n";                                                  \
315         if (RFI.Declaration)                                                   \
316           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
317                  << RFI.getNumFunctionsWithUses()                              \
318                  << " different functions.\n";                                 \
319       });                                                                      \
320     }                                                                          \
321   }
322 #include "llvm/Frontend/OpenMP/OMPKinds.def"
323 
324     // TODO: We should attach the attributes defined in OMPKinds.def.
325   }
326 };
327 
328 struct OpenMPOpt {
329 
330   using OptimizationRemarkGetter =
331       function_ref<OptimizationRemarkEmitter &(Function *)>;
332 
333   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
334             OptimizationRemarkGetter OREGetter,
335             OMPInformationCache &OMPInfoCache)
336       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
337         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache) {}
338 
339   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
340   bool run() {
341     bool Changed = false;
342 
343     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
344                       << " functions in a slice with "
345                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
346 
347     /// Print initial ICV values for testing.
348     /// FIXME: This should be done from the Attributor once it is added.
349     if (PrintICVValues) {
350       InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
351 
352       for (Function *F : OMPInfoCache.ModuleSlice) {
353         for (auto ICV : ICVs) {
354           auto ICVInfo = OMPInfoCache.ICVs[ICV];
355           auto Remark = [&](OptimizationRemark OR) {
356             return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
357                       << " Value: "
358                       << (ICVInfo.InitValue
359                               ? ICVInfo.InitValue->getValue().toString(10, true)
360                               : "IMPLEMENTATION_DEFINED");
361           };
362 
363           emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
364         }
365       }
366     }
367 
368     Changed |= deduplicateRuntimeCalls();
369     Changed |= deleteParallelRegions();
370 
371     return Changed;
372   }
373 
374   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
375   /// given it has to be the callee or a nullptr is returned.
376   static CallInst *getCallIfRegularCall(
377       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
378     CallInst *CI = dyn_cast<CallInst>(U.getUser());
379     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
380         (!RFI || CI->getCalledFunction() == RFI->Declaration))
381       return CI;
382     return nullptr;
383   }
384 
385   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
386   /// the callee or a nullptr is returned.
387   static CallInst *getCallIfRegularCall(
388       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
389     CallInst *CI = dyn_cast<CallInst>(&V);
390     if (CI && !CI->hasOperandBundles() &&
391         (!RFI || CI->getCalledFunction() == RFI->Declaration))
392       return CI;
393     return nullptr;
394   }
395 
396 private:
397   /// Try to delete parallel regions if possible.
398   bool deleteParallelRegions() {
399     const unsigned CallbackCalleeOperand = 2;
400 
401     OMPInformationCache::RuntimeFunctionInfo &RFI =
402         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
403 
404     if (!RFI.Declaration)
405       return false;
406 
407     bool Changed = false;
408     auto DeleteCallCB = [&](Use &U, Function &) {
409       CallInst *CI = getCallIfRegularCall(U);
410       if (!CI)
411         return false;
412       auto *Fn = dyn_cast<Function>(
413           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
414       if (!Fn)
415         return false;
416       if (!Fn->onlyReadsMemory())
417         return false;
418       if (!Fn->hasFnAttribute(Attribute::WillReturn))
419         return false;
420 
421       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
422                         << CI->getCaller()->getName() << "\n");
423 
424       auto Remark = [&](OptimizationRemark OR) {
425         return OR << "Parallel region in "
426                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
427                   << " deleted";
428       };
429       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
430                                      Remark);
431 
432       CGUpdater.removeCallSite(*CI);
433       CI->eraseFromParent();
434       Changed = true;
435       ++NumOpenMPParallelRegionsDeleted;
436       return true;
437     };
438 
439     RFI.foreachUse(DeleteCallCB);
440 
441     return Changed;
442   }
443 
444   /// Try to eliminiate runtime calls by reusing existing ones.
445   bool deduplicateRuntimeCalls() {
446     bool Changed = false;
447 
448     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
449         OMPRTL_omp_get_num_threads,
450         OMPRTL_omp_in_parallel,
451         OMPRTL_omp_get_cancellation,
452         OMPRTL_omp_get_thread_limit,
453         OMPRTL_omp_get_supported_active_levels,
454         OMPRTL_omp_get_level,
455         OMPRTL_omp_get_ancestor_thread_num,
456         OMPRTL_omp_get_team_size,
457         OMPRTL_omp_get_active_level,
458         OMPRTL_omp_in_final,
459         OMPRTL_omp_get_proc_bind,
460         OMPRTL_omp_get_num_places,
461         OMPRTL_omp_get_num_procs,
462         OMPRTL_omp_get_place_num,
463         OMPRTL_omp_get_partition_num_places,
464         OMPRTL_omp_get_partition_place_nums};
465 
466     // Global-tid is handled separately.
467     SmallSetVector<Value *, 16> GTIdArgs;
468     collectGlobalThreadIdArguments(GTIdArgs);
469     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
470                       << " global thread ID arguments\n");
471 
472     for (Function *F : SCC) {
473       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
474         deduplicateRuntimeCalls(*F,
475                                 OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
476 
477       // __kmpc_global_thread_num is special as we can replace it with an
478       // argument in enough cases to make it worth trying.
479       Value *GTIdArg = nullptr;
480       for (Argument &Arg : F->args())
481         if (GTIdArgs.count(&Arg)) {
482           GTIdArg = &Arg;
483           break;
484         }
485       Changed |= deduplicateRuntimeCalls(
486           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
487     }
488 
489     return Changed;
490   }
491 
492   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
493                                     bool GlobalOnly, bool &SingleChoice) {
494     if (CurrentIdent == NextIdent)
495       return CurrentIdent;
496 
497     // TODO: Figure out how to actually combine multiple debug locations. For
498     //       now we just keep an existing one if there is a single choice.
499     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
500       SingleChoice = !CurrentIdent;
501       return NextIdent;
502     }
503     return nullptr;
504   }
505 
506   /// Return an `struct ident_t*` value that represents the ones used in the
507   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
508   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
509   /// return value we create one from scratch. We also do not yet combine
510   /// information, e.g., the source locations, see combinedIdentStruct.
511   Value *
512   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
513                                  Function &F, bool GlobalOnly) {
514     bool SingleChoice = true;
515     Value *Ident = nullptr;
516     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
517       CallInst *CI = getCallIfRegularCall(U, &RFI);
518       if (!CI || &F != &Caller)
519         return false;
520       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
521                                   /* GlobalOnly */ true, SingleChoice);
522       return false;
523     };
524     RFI.foreachUse(CombineIdentStruct);
525 
526     if (!Ident || !SingleChoice) {
527       // The IRBuilder uses the insertion block to get to the module, this is
528       // unfortunate but we work around it for now.
529       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
530         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
531             &F.getEntryBlock(), F.getEntryBlock().begin()));
532       // Create a fallback location if non was found.
533       // TODO: Use the debug locations of the calls instead.
534       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
535       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
536     }
537     return Ident;
538   }
539 
540   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
541   /// \p ReplVal if given.
542   bool deduplicateRuntimeCalls(Function &F,
543                                OMPInformationCache::RuntimeFunctionInfo &RFI,
544                                Value *ReplVal = nullptr) {
545     auto *UV = RFI.getUseVector(F);
546     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
547       return false;
548 
549     LLVM_DEBUG(
550         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
551                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
552 
553     assert((!ReplVal || (isa<Argument>(ReplVal) &&
554                          cast<Argument>(ReplVal)->getParent() == &F)) &&
555            "Unexpected replacement value!");
556 
557     // TODO: Use dominance to find a good position instead.
558     auto CanBeMoved = [](CallBase &CB) {
559       unsigned NumArgs = CB.getNumArgOperands();
560       if (NumArgs == 0)
561         return true;
562       if (CB.getArgOperand(0)->getType() != IdentPtr)
563         return false;
564       for (unsigned u = 1; u < NumArgs; ++u)
565         if (isa<Instruction>(CB.getArgOperand(u)))
566           return false;
567       return true;
568     };
569 
570     if (!ReplVal) {
571       for (Use *U : *UV)
572         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
573           if (!CanBeMoved(*CI))
574             continue;
575 
576           auto Remark = [&](OptimizationRemark OR) {
577             auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
578             return OR << "OpenMP runtime call "
579                       << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
580                       << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
581           };
582           emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
583 
584           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
585           ReplVal = CI;
586           break;
587         }
588       if (!ReplVal)
589         return false;
590     }
591 
592     // If we use a call as a replacement value we need to make sure the ident is
593     // valid at the new location. For now we just pick a global one, either
594     // existing and used by one of the calls, or created from scratch.
595     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
596       if (CI->getNumArgOperands() > 0 &&
597           CI->getArgOperand(0)->getType() == IdentPtr) {
598         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
599                                                       /* GlobalOnly */ true);
600         CI->setArgOperand(0, Ident);
601       }
602     }
603 
604     bool Changed = false;
605     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
606       CallInst *CI = getCallIfRegularCall(U, &RFI);
607       if (!CI || CI == ReplVal || &F != &Caller)
608         return false;
609       assert(CI->getCaller() == &F && "Unexpected call!");
610 
611       auto Remark = [&](OptimizationRemark OR) {
612         return OR << "OpenMP runtime call "
613                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
614       };
615       emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
616 
617       CGUpdater.removeCallSite(*CI);
618       CI->replaceAllUsesWith(ReplVal);
619       CI->eraseFromParent();
620       ++NumOpenMPRuntimeCallsDeduplicated;
621       Changed = true;
622       return true;
623     };
624     RFI.foreachUse(ReplaceAndDeleteCB);
625 
626     return Changed;
627   }
628 
629   /// Collect arguments that represent the global thread id in \p GTIdArgs.
630   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
631     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
632     //       initialization. We could define an AbstractAttribute instead and
633     //       run the Attributor here once it can be run as an SCC pass.
634 
635     // Helper to check the argument \p ArgNo at all call sites of \p F for
636     // a GTId.
637     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
638       if (!F.hasLocalLinkage())
639         return false;
640       for (Use &U : F.uses()) {
641         if (CallInst *CI = getCallIfRegularCall(U)) {
642           Value *ArgOp = CI->getArgOperand(ArgNo);
643           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
644               getCallIfRegularCall(
645                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
646             continue;
647         }
648         return false;
649       }
650       return true;
651     };
652 
653     // Helper to identify uses of a GTId as GTId arguments.
654     auto AddUserArgs = [&](Value &GTId) {
655       for (Use &U : GTId.uses())
656         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
657           if (CI->isArgOperand(&U))
658             if (Function *Callee = CI->getCalledFunction())
659               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
660                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
661     };
662 
663     // The argument users of __kmpc_global_thread_num calls are GTIds.
664     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
665         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
666 
667     GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) {
668       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
669         AddUserArgs(*CI);
670       return false;
671     });
672 
673     // Transitively search for more arguments by looking at the users of the
674     // ones we know already. During the search the GTIdArgs vector is extended
675     // so we cannot cache the size nor can we use a range based for.
676     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
677       AddUserArgs(*GTIdArgs[u]);
678   }
679 
680   /// Emit a remark generically
681   ///
682   /// This template function can be used to generically emit a remark. The
683   /// RemarkKind should be one of the following:
684   ///   - OptimizationRemark to indicate a successful optimization attempt
685   ///   - OptimizationRemarkMissed to report a failed optimization attempt
686   ///   - OptimizationRemarkAnalysis to provide additional information about an
687   ///     optimization attempt
688   ///
689   /// The remark is built using a callback function provided by the caller that
690   /// takes a RemarkKind as input and returns a RemarkKind.
691   template <typename RemarkKind,
692             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
693   void emitRemark(Instruction *Inst, StringRef RemarkName,
694                   RemarkCallBack &&RemarkCB) {
695     Function *F = Inst->getParent()->getParent();
696     auto &ORE = OREGetter(F);
697 
698     ORE.emit(
699         [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
700   }
701 
702   /// Emit a remark on a function. Since only OptimizationRemark is supporting
703   /// this, it can't be made generic.
704   void emitRemarkOnFunction(
705       Function *F, StringRef RemarkName,
706       function_ref<OptimizationRemark(OptimizationRemark &&)> &&RemarkCB) {
707     auto &ORE = OREGetter(F);
708 
709     ORE.emit([&]() {
710       return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
711     });
712   }
713 
714   /// The underyling module.
715   Module &M;
716 
717   /// The SCC we are operating on.
718   SmallVectorImpl<Function *> &SCC;
719 
720   /// Callback to update the call graph, the first argument is a removed call,
721   /// the second an optional replacement call.
722   CallGraphUpdater &CGUpdater;
723 
724   /// Callback to get an OptimizationRemarkEmitter from a Function *
725   OptimizationRemarkGetter OREGetter;
726 
727   /// OpenMP-specific information cache. Also Used for Attributor runs.
728   OMPInformationCache &OMPInfoCache;
729 };
730 } // namespace
731 
732 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
733                                      CGSCCAnalysisManager &AM,
734                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
735   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
736     return PreservedAnalyses::all();
737 
738   if (DisableOpenMPOptimizations)
739     return PreservedAnalyses::all();
740 
741   SmallPtrSet<Function *, 16> ModuleSlice;
742   SmallVector<Function *, 16> SCC;
743   for (LazyCallGraph::Node &N : C) {
744     SCC.push_back(&N.getFunction());
745     ModuleSlice.insert(SCC.back());
746   }
747 
748   if (SCC.empty())
749     return PreservedAnalyses::all();
750 
751   FunctionAnalysisManager &FAM =
752       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
753 
754   AnalysisGetter AG(FAM);
755 
756   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
757     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
758   };
759 
760   CallGraphUpdater CGUpdater;
761   CGUpdater.initialize(CG, C, AM, UR);
762 
763   SetVector<Function *> Functions(SCC.begin(), SCC.end());
764   BumpPtrAllocator Allocator;
765   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
766                                 /*CGSCC*/ &Functions, ModuleSlice);
767 
768   // TODO: Compute the module slice we are allowed to look at.
769   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache);
770   bool Changed = OMPOpt.run();
771   (void)Changed;
772   return PreservedAnalyses::all();
773 }
774 
775 namespace {
776 
777 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
778   CallGraphUpdater CGUpdater;
779   OpenMPInModule OMPInModule;
780   static char ID;
781 
782   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
783     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
784   }
785 
786   void getAnalysisUsage(AnalysisUsage &AU) const override {
787     CallGraphSCCPass::getAnalysisUsage(AU);
788   }
789 
790   bool doInitialization(CallGraph &CG) override {
791     // Disable the pass if there is no OpenMP (runtime call) in the module.
792     containsOpenMP(CG.getModule(), OMPInModule);
793     return false;
794   }
795 
796   bool runOnSCC(CallGraphSCC &CGSCC) override {
797     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
798       return false;
799     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
800       return false;
801 
802     SmallPtrSet<Function *, 16> ModuleSlice;
803     SmallVector<Function *, 16> SCC;
804     for (CallGraphNode *CGN : CGSCC)
805       if (Function *Fn = CGN->getFunction())
806         if (!Fn->isDeclaration()) {
807           SCC.push_back(Fn);
808           ModuleSlice.insert(Fn);
809         }
810 
811     if (SCC.empty())
812       return false;
813 
814     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
815     CGUpdater.initialize(CG, CGSCC);
816 
817     // Maintain a map of functions to avoid rebuilding the ORE
818     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
819     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
820       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
821       if (!ORE)
822         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
823       return *ORE;
824     };
825 
826     AnalysisGetter AG;
827     SetVector<Function *> Functions(SCC.begin(), SCC.end());
828     BumpPtrAllocator Allocator;
829     OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
830                                   Allocator,
831                                   /*CGSCC*/ &Functions, ModuleSlice);
832 
833     // TODO: Compute the module slice we are allowed to look at.
834     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache);
835     return OMPOpt.run();
836   }
837 
838   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
839 };
840 
841 } // end anonymous namespace
842 
843 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
844   if (OMPInModule.isKnown())
845     return OMPInModule;
846 
847 #define OMP_RTL(_Enum, _Name, ...)                                             \
848   if (M.getFunction(_Name))                                                    \
849     return OMPInModule = true;
850 #include "llvm/Frontend/OpenMP/OMPKinds.def"
851   return OMPInModule = false;
852 }
853 
854 char OpenMPOptLegacyPass::ID = 0;
855 
856 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
857                       "OpenMP specific optimizations", false, false)
858 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
859 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
860                     "OpenMP specific optimizations", false, false)
861 
862 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
863