1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Frontend/OpenMP/OMPConstants.h"
23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/IPO/Attributor.h"
28 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
29 
30 using namespace llvm;
31 using namespace omp;
32 
33 #define DEBUG_TYPE "openmp-opt"
34 
35 static cl::opt<bool> DisableOpenMPOptimizations(
36     "openmp-opt-disable", cl::ZeroOrMore,
37     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
38     cl::init(false));
39 
40 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
41                                     cl::Hidden);
42 
43 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
44           "Number of OpenMP runtime calls deduplicated");
45 STATISTIC(NumOpenMPParallelRegionsDeleted,
46           "Number of OpenMP parallel regions deleted");
47 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
48           "Number of OpenMP runtime functions identified");
49 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
50           "Number of OpenMP runtime function uses identified");
51 
52 #if !defined(NDEBUG)
53 static constexpr auto TAG = "[" DEBUG_TYPE "]";
54 #endif
55 
56 namespace {
57 
58 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
59 /// Attributor runs.
60 struct OMPInformationCache : public InformationCache {
61   OMPInformationCache(Module &M, AnalysisGetter &AG,
62                       BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
63                       SmallPtrSetImpl<Function *> &ModuleSlice)
64       : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice),
65         OMPBuilder(M) {
66     OMPBuilder.initialize();
67     initializeRuntimeFunctions();
68     initializeInternalControlVars();
69   }
70 
71   /// Generic information that describes an internal control variable.
72   struct InternalControlVarInfo {
73     /// The kind, as described by InternalControlVar enum.
74     InternalControlVar Kind;
75 
76     /// The name of the ICV.
77     StringRef Name;
78 
79     /// Environment variable associated with this ICV.
80     StringRef EnvVarName;
81 
82     /// Initial value kind.
83     ICVInitValue InitKind;
84 
85     /// Initial value.
86     ConstantInt *InitValue;
87 
88     /// Setter RTL function associated with this ICV.
89     RuntimeFunction Setter;
90 
91     /// Getter RTL function associated with this ICV.
92     RuntimeFunction Getter;
93 
94     /// RTL Function corresponding to the override clause of this ICV
95     RuntimeFunction Clause;
96   };
97 
98   /// Generic information that describes a runtime function
99   struct RuntimeFunctionInfo {
100 
101     /// The kind, as described by the RuntimeFunction enum.
102     RuntimeFunction Kind;
103 
104     /// The name of the function.
105     StringRef Name;
106 
107     /// Flag to indicate a variadic function.
108     bool IsVarArg;
109 
110     /// The return type of the function.
111     Type *ReturnType;
112 
113     /// The argument types of the function.
114     SmallVector<Type *, 8> ArgumentTypes;
115 
116     /// The declaration if available.
117     Function *Declaration = nullptr;
118 
119     /// Uses of this runtime function per function containing the use.
120     using UseVector = SmallVector<Use *, 16>;
121 
122     /// Return the vector of uses in function \p F.
123     UseVector &getOrCreateUseVector(Function *F) {
124       std::unique_ptr<UseVector> &UV = UsesMap[F];
125       if (!UV)
126         UV = std::make_unique<UseVector>();
127       return *UV;
128     }
129 
130     /// Return the vector of uses in function \p F or `nullptr` if there are
131     /// none.
132     const UseVector *getUseVector(Function &F) const {
133       auto I = UsesMap.find(&F);
134       if (I != UsesMap.end())
135         return I->second.get();
136       return nullptr;
137     }
138 
139     /// Return how many functions contain uses of this runtime function.
140     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
141 
142     /// Return the number of arguments (or the minimal number for variadic
143     /// functions).
144     size_t getNumArgs() const { return ArgumentTypes.size(); }
145 
146     /// Run the callback \p CB on each use and forget the use if the result is
147     /// true. The callback will be fed the function in which the use was
148     /// encountered as second argument.
149     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
150       for (auto &It : UsesMap)
151         foreachUse(CB, It.first, It.second.get());
152     }
153 
154     /// Run the callback \p CB on each use within the function \p F and forget
155     /// the use if the result is true.
156     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F,
157                     UseVector *Uses = nullptr) {
158       SmallVector<unsigned, 8> ToBeDeleted;
159       ToBeDeleted.clear();
160 
161       unsigned Idx = 0;
162       UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F);
163 
164       for (Use *U : UV) {
165         if (CB(*U, *F))
166           ToBeDeleted.push_back(Idx);
167         ++Idx;
168       }
169 
170       // Remove the to-be-deleted indices in reverse order as prior
171       // modifcations will not modify the smaller indices.
172       while (!ToBeDeleted.empty()) {
173         unsigned Idx = ToBeDeleted.pop_back_val();
174         UV[Idx] = UV.back();
175         UV.pop_back();
176       }
177     }
178 
179   private:
180     /// Map from functions to all uses of this runtime function contained in
181     /// them.
182     DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap;
183   };
184 
185   /// The slice of the module we are allowed to look at.
186   SmallPtrSetImpl<Function *> &ModuleSlice;
187 
188   /// An OpenMP-IR-Builder instance
189   OpenMPIRBuilder OMPBuilder;
190 
191   /// Map from runtime function kind to the runtime function description.
192   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
193                   RuntimeFunction::OMPRTL___last>
194       RFIs;
195 
196   /// Map from ICV kind to the ICV description.
197   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
198                   InternalControlVar::ICV___last>
199       ICVs;
200 
201   /// Helper to initialize all internal control variable information for those
202   /// defined in OMPKinds.def.
203   void initializeInternalControlVars() {
204 #define ICV_RT_SET(_Name, RTL)                                                 \
205   {                                                                            \
206     auto &ICV = ICVs[_Name];                                                   \
207     ICV.Setter = RTL;                                                          \
208   }
209 #define ICV_RT_GET(Name, RTL)                                                  \
210   {                                                                            \
211     auto &ICV = ICVs[Name];                                                    \
212     ICV.Getter = RTL;                                                          \
213   }
214 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
215   {                                                                            \
216     auto &ICV = ICVs[Enum];                                                    \
217     ICV.Name = _Name;                                                          \
218     ICV.Kind = Enum;                                                           \
219     ICV.InitKind = Init;                                                       \
220     ICV.EnvVarName = _EnvVarName;                                              \
221     switch (ICV.InitKind) {                                                    \
222     case ICV_IMPLEMENTATION_DEFINED:                                           \
223       ICV.InitValue = nullptr;                                                 \
224       break;                                                                   \
225     case ICV_ZERO:                                                             \
226       ICV.InitValue = ConstantInt::get(                                        \
227           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
228       break;                                                                   \
229     case ICV_FALSE:                                                            \
230       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
231       break;                                                                   \
232     case ICV_LAST:                                                             \
233       break;                                                                   \
234     }                                                                          \
235   }
236 #include "llvm/Frontend/OpenMP/OMPKinds.def"
237   }
238 
239   /// Returns true if the function declaration \p F matches the runtime
240   /// function types, that is, return type \p RTFRetType, and argument types
241   /// \p RTFArgTypes.
242   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
243                                   SmallVector<Type *, 8> &RTFArgTypes) {
244     // TODO: We should output information to the user (under debug output
245     //       and via remarks).
246 
247     if (!F)
248       return false;
249     if (F->getReturnType() != RTFRetType)
250       return false;
251     if (F->arg_size() != RTFArgTypes.size())
252       return false;
253 
254     auto RTFTyIt = RTFArgTypes.begin();
255     for (Argument &Arg : F->args()) {
256       if (Arg.getType() != *RTFTyIt)
257         return false;
258 
259       ++RTFTyIt;
260     }
261 
262     return true;
263   }
264 
265   /// Helper to initialize all runtime function information for those defined
266   /// in OpenMPKinds.def.
267   void initializeRuntimeFunctions() {
268     // Helper to collect all uses of the decleration in the UsesMap.
269     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
270       unsigned NumUses = 0;
271       if (!RFI.Declaration)
272         return NumUses;
273       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
274 
275       NumOpenMPRuntimeFunctionsIdentified += 1;
276       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
277 
278       // TODO: We directly convert uses into proper calls and unknown uses.
279       for (Use &U : RFI.Declaration->uses()) {
280         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
281           if (ModuleSlice.count(UserI->getFunction())) {
282             RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
283             ++NumUses;
284           }
285         } else {
286           RFI.getOrCreateUseVector(nullptr).push_back(&U);
287           ++NumUses;
288         }
289       }
290       return NumUses;
291     };
292 
293     Module &M = *((*ModuleSlice.begin())->getParent());
294 
295     // Helper macros for handling __VA_ARGS__ in OMP_RTL
296 #define OMP_TYPE(VarName, ...)                                                 \
297   Type *VarName = OMPBuilder.VarName;                                          \
298   (void)VarName;
299 
300 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
301   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
302   (void)VarName##Ty;                                                           \
303   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
304   (void)VarName##PtrTy;
305 
306 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
307   FunctionType *VarName = OMPBuilder.VarName;                                  \
308   (void)VarName;                                                               \
309   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
310   (void)VarName##Ptr;
311 
312 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
313   StructType *VarName = OMPBuilder.VarName;                                    \
314   (void)VarName;                                                               \
315   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
316   (void)VarName##Ptr;
317 
318 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
319   {                                                                            \
320     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
321     Function *F = M.getFunction(_Name);                                        \
322     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
323       auto &RFI = RFIs[_Enum];                                                 \
324       RFI.Kind = _Enum;                                                        \
325       RFI.Name = _Name;                                                        \
326       RFI.IsVarArg = _IsVarArg;                                                \
327       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
328       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
329       RFI.Declaration = F;                                                     \
330       unsigned NumUses = CollectUses(RFI);                                     \
331       (void)NumUses;                                                           \
332       LLVM_DEBUG({                                                             \
333         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
334                << " found\n";                                                  \
335         if (RFI.Declaration)                                                   \
336           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
337                  << RFI.getNumFunctionsWithUses()                              \
338                  << " different functions.\n";                                 \
339       });                                                                      \
340     }                                                                          \
341   }
342 #include "llvm/Frontend/OpenMP/OMPKinds.def"
343 
344     // TODO: We should attach the attributes defined in OMPKinds.def.
345   }
346 };
347 
348 struct OpenMPOpt {
349 
350   using OptimizationRemarkGetter =
351       function_ref<OptimizationRemarkEmitter &(Function *)>;
352 
353   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
354             OptimizationRemarkGetter OREGetter,
355             OMPInformationCache &OMPInfoCache)
356       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
357         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache) {}
358 
359   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
360   bool run() {
361     bool Changed = false;
362 
363     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
364                       << " functions in a slice with "
365                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
366 
367     /// Print initial ICV values for testing.
368     /// FIXME: This should be done from the Attributor once it is added.
369     if (PrintICVValues) {
370       InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
371 
372       for (Function *F : OMPInfoCache.ModuleSlice) {
373         for (auto ICV : ICVs) {
374           auto ICVInfo = OMPInfoCache.ICVs[ICV];
375           auto Remark = [&](OptimizationRemark OR) {
376             return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
377                       << " Value: "
378                       << (ICVInfo.InitValue
379                               ? ICVInfo.InitValue->getValue().toString(10, true)
380                               : "IMPLEMENTATION_DEFINED");
381           };
382 
383           emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
384         }
385       }
386     }
387 
388     Changed |= deduplicateRuntimeCalls();
389     Changed |= deleteParallelRegions();
390 
391     return Changed;
392   }
393 
394   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
395   /// given it has to be the callee or a nullptr is returned.
396   static CallInst *getCallIfRegularCall(
397       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
398     CallInst *CI = dyn_cast<CallInst>(U.getUser());
399     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
400         (!RFI || CI->getCalledFunction() == RFI->Declaration))
401       return CI;
402     return nullptr;
403   }
404 
405   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
406   /// the callee or a nullptr is returned.
407   static CallInst *getCallIfRegularCall(
408       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
409     CallInst *CI = dyn_cast<CallInst>(&V);
410     if (CI && !CI->hasOperandBundles() &&
411         (!RFI || CI->getCalledFunction() == RFI->Declaration))
412       return CI;
413     return nullptr;
414   }
415 
416 private:
417   /// Try to delete parallel regions if possible.
418   bool deleteParallelRegions() {
419     const unsigned CallbackCalleeOperand = 2;
420 
421     OMPInformationCache::RuntimeFunctionInfo &RFI =
422         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
423 
424     if (!RFI.Declaration)
425       return false;
426 
427     bool Changed = false;
428     auto DeleteCallCB = [&](Use &U, Function &) {
429       CallInst *CI = getCallIfRegularCall(U);
430       if (!CI)
431         return false;
432       auto *Fn = dyn_cast<Function>(
433           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
434       if (!Fn)
435         return false;
436       if (!Fn->onlyReadsMemory())
437         return false;
438       if (!Fn->hasFnAttribute(Attribute::WillReturn))
439         return false;
440 
441       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
442                         << CI->getCaller()->getName() << "\n");
443 
444       auto Remark = [&](OptimizationRemark OR) {
445         return OR << "Parallel region in "
446                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
447                   << " deleted";
448       };
449       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
450                                      Remark);
451 
452       CGUpdater.removeCallSite(*CI);
453       CI->eraseFromParent();
454       Changed = true;
455       ++NumOpenMPParallelRegionsDeleted;
456       return true;
457     };
458 
459     RFI.foreachUse(DeleteCallCB);
460 
461     return Changed;
462   }
463 
464   /// Try to eliminiate runtime calls by reusing existing ones.
465   bool deduplicateRuntimeCalls() {
466     bool Changed = false;
467 
468     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
469         OMPRTL_omp_get_num_threads,
470         OMPRTL_omp_in_parallel,
471         OMPRTL_omp_get_cancellation,
472         OMPRTL_omp_get_thread_limit,
473         OMPRTL_omp_get_supported_active_levels,
474         OMPRTL_omp_get_level,
475         OMPRTL_omp_get_ancestor_thread_num,
476         OMPRTL_omp_get_team_size,
477         OMPRTL_omp_get_active_level,
478         OMPRTL_omp_in_final,
479         OMPRTL_omp_get_proc_bind,
480         OMPRTL_omp_get_num_places,
481         OMPRTL_omp_get_num_procs,
482         OMPRTL_omp_get_place_num,
483         OMPRTL_omp_get_partition_num_places,
484         OMPRTL_omp_get_partition_place_nums};
485 
486     // Global-tid is handled separately.
487     SmallSetVector<Value *, 16> GTIdArgs;
488     collectGlobalThreadIdArguments(GTIdArgs);
489     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
490                       << " global thread ID arguments\n");
491 
492     for (Function *F : SCC) {
493       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
494         deduplicateRuntimeCalls(*F,
495                                 OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
496 
497       // __kmpc_global_thread_num is special as we can replace it with an
498       // argument in enough cases to make it worth trying.
499       Value *GTIdArg = nullptr;
500       for (Argument &Arg : F->args())
501         if (GTIdArgs.count(&Arg)) {
502           GTIdArg = &Arg;
503           break;
504         }
505       Changed |= deduplicateRuntimeCalls(
506           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
507     }
508 
509     return Changed;
510   }
511 
512   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
513                                     bool GlobalOnly, bool &SingleChoice) {
514     if (CurrentIdent == NextIdent)
515       return CurrentIdent;
516 
517     // TODO: Figure out how to actually combine multiple debug locations. For
518     //       now we just keep an existing one if there is a single choice.
519     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
520       SingleChoice = !CurrentIdent;
521       return NextIdent;
522     }
523     return nullptr;
524   }
525 
526   /// Return an `struct ident_t*` value that represents the ones used in the
527   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
528   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
529   /// return value we create one from scratch. We also do not yet combine
530   /// information, e.g., the source locations, see combinedIdentStruct.
531   Value *
532   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
533                                  Function &F, bool GlobalOnly) {
534     bool SingleChoice = true;
535     Value *Ident = nullptr;
536     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
537       CallInst *CI = getCallIfRegularCall(U, &RFI);
538       if (!CI || &F != &Caller)
539         return false;
540       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
541                                   /* GlobalOnly */ true, SingleChoice);
542       return false;
543     };
544     RFI.foreachUse(CombineIdentStruct);
545 
546     if (!Ident || !SingleChoice) {
547       // The IRBuilder uses the insertion block to get to the module, this is
548       // unfortunate but we work around it for now.
549       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
550         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
551             &F.getEntryBlock(), F.getEntryBlock().begin()));
552       // Create a fallback location if non was found.
553       // TODO: Use the debug locations of the calls instead.
554       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
555       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
556     }
557     return Ident;
558   }
559 
560   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
561   /// \p ReplVal if given.
562   bool deduplicateRuntimeCalls(Function &F,
563                                OMPInformationCache::RuntimeFunctionInfo &RFI,
564                                Value *ReplVal = nullptr) {
565     auto *UV = RFI.getUseVector(F);
566     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
567       return false;
568 
569     LLVM_DEBUG(
570         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
571                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
572 
573     assert((!ReplVal || (isa<Argument>(ReplVal) &&
574                          cast<Argument>(ReplVal)->getParent() == &F)) &&
575            "Unexpected replacement value!");
576 
577     // TODO: Use dominance to find a good position instead.
578     auto CanBeMoved = [this](CallBase &CB) {
579       unsigned NumArgs = CB.getNumArgOperands();
580       if (NumArgs == 0)
581         return true;
582       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
583         return false;
584       for (unsigned u = 1; u < NumArgs; ++u)
585         if (isa<Instruction>(CB.getArgOperand(u)))
586           return false;
587       return true;
588     };
589 
590     if (!ReplVal) {
591       for (Use *U : *UV)
592         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
593           if (!CanBeMoved(*CI))
594             continue;
595 
596           auto Remark = [&](OptimizationRemark OR) {
597             auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
598             return OR << "OpenMP runtime call "
599                       << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
600                       << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
601           };
602           emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
603 
604           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
605           ReplVal = CI;
606           break;
607         }
608       if (!ReplVal)
609         return false;
610     }
611 
612     // If we use a call as a replacement value we need to make sure the ident is
613     // valid at the new location. For now we just pick a global one, either
614     // existing and used by one of the calls, or created from scratch.
615     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
616       if (CI->getNumArgOperands() > 0 &&
617           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
618         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
619                                                       /* GlobalOnly */ true);
620         CI->setArgOperand(0, Ident);
621       }
622     }
623 
624     bool Changed = false;
625     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
626       CallInst *CI = getCallIfRegularCall(U, &RFI);
627       if (!CI || CI == ReplVal || &F != &Caller)
628         return false;
629       assert(CI->getCaller() == &F && "Unexpected call!");
630 
631       auto Remark = [&](OptimizationRemark OR) {
632         return OR << "OpenMP runtime call "
633                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
634       };
635       emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
636 
637       CGUpdater.removeCallSite(*CI);
638       CI->replaceAllUsesWith(ReplVal);
639       CI->eraseFromParent();
640       ++NumOpenMPRuntimeCallsDeduplicated;
641       Changed = true;
642       return true;
643     };
644     RFI.foreachUse(ReplaceAndDeleteCB);
645 
646     return Changed;
647   }
648 
649   /// Collect arguments that represent the global thread id in \p GTIdArgs.
650   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
651     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
652     //       initialization. We could define an AbstractAttribute instead and
653     //       run the Attributor here once it can be run as an SCC pass.
654 
655     // Helper to check the argument \p ArgNo at all call sites of \p F for
656     // a GTId.
657     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
658       if (!F.hasLocalLinkage())
659         return false;
660       for (Use &U : F.uses()) {
661         if (CallInst *CI = getCallIfRegularCall(U)) {
662           Value *ArgOp = CI->getArgOperand(ArgNo);
663           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
664               getCallIfRegularCall(
665                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
666             continue;
667         }
668         return false;
669       }
670       return true;
671     };
672 
673     // Helper to identify uses of a GTId as GTId arguments.
674     auto AddUserArgs = [&](Value &GTId) {
675       for (Use &U : GTId.uses())
676         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
677           if (CI->isArgOperand(&U))
678             if (Function *Callee = CI->getCalledFunction())
679               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
680                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
681     };
682 
683     // The argument users of __kmpc_global_thread_num calls are GTIds.
684     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
685         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
686 
687     GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) {
688       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
689         AddUserArgs(*CI);
690       return false;
691     });
692 
693     // Transitively search for more arguments by looking at the users of the
694     // ones we know already. During the search the GTIdArgs vector is extended
695     // so we cannot cache the size nor can we use a range based for.
696     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
697       AddUserArgs(*GTIdArgs[u]);
698   }
699 
700   /// Emit a remark generically
701   ///
702   /// This template function can be used to generically emit a remark. The
703   /// RemarkKind should be one of the following:
704   ///   - OptimizationRemark to indicate a successful optimization attempt
705   ///   - OptimizationRemarkMissed to report a failed optimization attempt
706   ///   - OptimizationRemarkAnalysis to provide additional information about an
707   ///     optimization attempt
708   ///
709   /// The remark is built using a callback function provided by the caller that
710   /// takes a RemarkKind as input and returns a RemarkKind.
711   template <typename RemarkKind,
712             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
713   void emitRemark(Instruction *Inst, StringRef RemarkName,
714                   RemarkCallBack &&RemarkCB) {
715     Function *F = Inst->getParent()->getParent();
716     auto &ORE = OREGetter(F);
717 
718     ORE.emit(
719         [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
720   }
721 
722   /// Emit a remark on a function. Since only OptimizationRemark is supporting
723   /// this, it can't be made generic.
724   void emitRemarkOnFunction(
725       Function *F, StringRef RemarkName,
726       function_ref<OptimizationRemark(OptimizationRemark &&)> &&RemarkCB) {
727     auto &ORE = OREGetter(F);
728 
729     ORE.emit([&]() {
730       return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
731     });
732   }
733 
734   /// The underyling module.
735   Module &M;
736 
737   /// The SCC we are operating on.
738   SmallVectorImpl<Function *> &SCC;
739 
740   /// Callback to update the call graph, the first argument is a removed call,
741   /// the second an optional replacement call.
742   CallGraphUpdater &CGUpdater;
743 
744   /// Callback to get an OptimizationRemarkEmitter from a Function *
745   OptimizationRemarkGetter OREGetter;
746 
747   /// OpenMP-specific information cache. Also Used for Attributor runs.
748   OMPInformationCache &OMPInfoCache;
749 };
750 } // namespace
751 
752 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
753                                      CGSCCAnalysisManager &AM,
754                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
755   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
756     return PreservedAnalyses::all();
757 
758   if (DisableOpenMPOptimizations)
759     return PreservedAnalyses::all();
760 
761   SmallPtrSet<Function *, 16> ModuleSlice;
762   SmallVector<Function *, 16> SCC;
763   for (LazyCallGraph::Node &N : C) {
764     SCC.push_back(&N.getFunction());
765     ModuleSlice.insert(SCC.back());
766   }
767 
768   if (SCC.empty())
769     return PreservedAnalyses::all();
770 
771   FunctionAnalysisManager &FAM =
772       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
773 
774   AnalysisGetter AG(FAM);
775 
776   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
777     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
778   };
779 
780   CallGraphUpdater CGUpdater;
781   CGUpdater.initialize(CG, C, AM, UR);
782 
783   SetVector<Function *> Functions(SCC.begin(), SCC.end());
784   BumpPtrAllocator Allocator;
785   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
786                                 /*CGSCC*/ &Functions, ModuleSlice);
787 
788   // TODO: Compute the module slice we are allowed to look at.
789   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache);
790   bool Changed = OMPOpt.run();
791   (void)Changed;
792   return PreservedAnalyses::all();
793 }
794 
795 namespace {
796 
797 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
798   CallGraphUpdater CGUpdater;
799   OpenMPInModule OMPInModule;
800   static char ID;
801 
802   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
803     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
804   }
805 
806   void getAnalysisUsage(AnalysisUsage &AU) const override {
807     CallGraphSCCPass::getAnalysisUsage(AU);
808   }
809 
810   bool doInitialization(CallGraph &CG) override {
811     // Disable the pass if there is no OpenMP (runtime call) in the module.
812     containsOpenMP(CG.getModule(), OMPInModule);
813     return false;
814   }
815 
816   bool runOnSCC(CallGraphSCC &CGSCC) override {
817     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
818       return false;
819     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
820       return false;
821 
822     SmallPtrSet<Function *, 16> ModuleSlice;
823     SmallVector<Function *, 16> SCC;
824     for (CallGraphNode *CGN : CGSCC)
825       if (Function *Fn = CGN->getFunction())
826         if (!Fn->isDeclaration()) {
827           SCC.push_back(Fn);
828           ModuleSlice.insert(Fn);
829         }
830 
831     if (SCC.empty())
832       return false;
833 
834     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
835     CGUpdater.initialize(CG, CGSCC);
836 
837     // Maintain a map of functions to avoid rebuilding the ORE
838     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
839     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
840       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
841       if (!ORE)
842         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
843       return *ORE;
844     };
845 
846     AnalysisGetter AG;
847     SetVector<Function *> Functions(SCC.begin(), SCC.end());
848     BumpPtrAllocator Allocator;
849     OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
850                                   Allocator,
851                                   /*CGSCC*/ &Functions, ModuleSlice);
852 
853     // TODO: Compute the module slice we are allowed to look at.
854     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache);
855     return OMPOpt.run();
856   }
857 
858   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
859 };
860 
861 } // end anonymous namespace
862 
863 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
864   if (OMPInModule.isKnown())
865     return OMPInModule;
866 
867 #define OMP_RTL(_Enum, _Name, ...)                                             \
868   if (M.getFunction(_Name))                                                    \
869     return OMPInModule = true;
870 #include "llvm/Frontend/OpenMP/OMPKinds.def"
871   return OMPInModule = false;
872 }
873 
874 char OpenMPOptLegacyPass::ID = 0;
875 
876 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
877                       "OpenMP specific optimizations", false, false)
878 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
879 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
880                     "OpenMP specific optimizations", false, false)
881 
882 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
883