19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
129548b74aSJohannes Doerfert //
139548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
149548b74aSJohannes Doerfert 
159548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
169548b74aSJohannes Doerfert 
179548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
189548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
199548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
209548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
214d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
229548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
23e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
249548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
259548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
269548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
277cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
289548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
299548b74aSJohannes Doerfert 
309548b74aSJohannes Doerfert using namespace llvm;
319548b74aSJohannes Doerfert using namespace omp;
329548b74aSJohannes Doerfert 
339548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
349548b74aSJohannes Doerfert 
359548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
369548b74aSJohannes Doerfert     "openmp-opt-disable", cl::ZeroOrMore,
379548b74aSJohannes Doerfert     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
389548b74aSJohannes Doerfert     cl::init(false));
399548b74aSJohannes Doerfert 
400f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
410f426935Ssstefan1                                     cl::Hidden);
42e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
43e8039ad4SJohannes Doerfert                                         cl::init(false), cl::Hidden);
440f426935Ssstefan1 
45496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
46496f8e5bSHamilton Tobon Mosquera     "openmp-hide-memory-transfer-latency",
47496f8e5bSHamilton Tobon Mosquera     cl::desc("[WIP] Tries to hide the latency of host to device memory"
48496f8e5bSHamilton Tobon Mosquera              " transfers"),
49496f8e5bSHamilton Tobon Mosquera     cl::Hidden, cl::init(false));
50496f8e5bSHamilton Tobon Mosquera 
51496f8e5bSHamilton Tobon Mosquera 
529548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
539548b74aSJohannes Doerfert           "Number of OpenMP runtime calls deduplicated");
5455eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
5555eb714aSRoman Lebedev           "Number of OpenMP parallel regions deleted");
569548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
579548b74aSJohannes Doerfert           "Number of OpenMP runtime functions identified");
589548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
599548b74aSJohannes Doerfert           "Number of OpenMP runtime function uses identified");
60e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
61e8039ad4SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) identified");
625b0581aeSJohannes Doerfert STATISTIC(
635b0581aeSJohannes Doerfert     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
645b0581aeSJohannes Doerfert     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
659548b74aSJohannes Doerfert 
66263c4a3cSrathod-sahaab #if !defined(NDEBUG)
679548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
68a50c0b0dSMikael Holmen #endif
699548b74aSJohannes Doerfert 
709548b74aSJohannes Doerfert namespace {
719548b74aSJohannes Doerfert 
72b8235d2bSsstefan1 struct AAICVTracker;
73b8235d2bSsstefan1 
747cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
757cfd267cSsstefan1 /// Attributor runs.
767cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
777cfd267cSsstefan1   OMPInformationCache(Module &M, AnalysisGetter &AG,
78624d34afSJohannes Doerfert                       BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
79e8039ad4SJohannes Doerfert                       SmallPtrSetImpl<Kernel> &Kernels)
80624d34afSJohannes Doerfert       : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
81624d34afSJohannes Doerfert         Kernels(Kernels) {
82624d34afSJohannes Doerfert 
8361238d26Ssstefan1     OMPBuilder.initialize();
849548b74aSJohannes Doerfert     initializeRuntimeFunctions();
850f426935Ssstefan1     initializeInternalControlVars();
869548b74aSJohannes Doerfert   }
879548b74aSJohannes Doerfert 
880f426935Ssstefan1   /// Generic information that describes an internal control variable.
890f426935Ssstefan1   struct InternalControlVarInfo {
900f426935Ssstefan1     /// The kind, as described by InternalControlVar enum.
910f426935Ssstefan1     InternalControlVar Kind;
920f426935Ssstefan1 
930f426935Ssstefan1     /// The name of the ICV.
940f426935Ssstefan1     StringRef Name;
950f426935Ssstefan1 
960f426935Ssstefan1     /// Environment variable associated with this ICV.
970f426935Ssstefan1     StringRef EnvVarName;
980f426935Ssstefan1 
990f426935Ssstefan1     /// Initial value kind.
1000f426935Ssstefan1     ICVInitValue InitKind;
1010f426935Ssstefan1 
1020f426935Ssstefan1     /// Initial value.
1030f426935Ssstefan1     ConstantInt *InitValue;
1040f426935Ssstefan1 
1050f426935Ssstefan1     /// Setter RTL function associated with this ICV.
1060f426935Ssstefan1     RuntimeFunction Setter;
1070f426935Ssstefan1 
1080f426935Ssstefan1     /// Getter RTL function associated with this ICV.
1090f426935Ssstefan1     RuntimeFunction Getter;
1100f426935Ssstefan1 
1110f426935Ssstefan1     /// RTL Function corresponding to the override clause of this ICV
1120f426935Ssstefan1     RuntimeFunction Clause;
1130f426935Ssstefan1   };
1140f426935Ssstefan1 
1159548b74aSJohannes Doerfert   /// Generic information that describes a runtime function
1169548b74aSJohannes Doerfert   struct RuntimeFunctionInfo {
1178855fec3SJohannes Doerfert 
1189548b74aSJohannes Doerfert     /// The kind, as described by the RuntimeFunction enum.
1199548b74aSJohannes Doerfert     RuntimeFunction Kind;
1209548b74aSJohannes Doerfert 
1219548b74aSJohannes Doerfert     /// The name of the function.
1229548b74aSJohannes Doerfert     StringRef Name;
1239548b74aSJohannes Doerfert 
1249548b74aSJohannes Doerfert     /// Flag to indicate a variadic function.
1259548b74aSJohannes Doerfert     bool IsVarArg;
1269548b74aSJohannes Doerfert 
1279548b74aSJohannes Doerfert     /// The return type of the function.
1289548b74aSJohannes Doerfert     Type *ReturnType;
1299548b74aSJohannes Doerfert 
1309548b74aSJohannes Doerfert     /// The argument types of the function.
1319548b74aSJohannes Doerfert     SmallVector<Type *, 8> ArgumentTypes;
1329548b74aSJohannes Doerfert 
1339548b74aSJohannes Doerfert     /// The declaration if available.
134f09f4b26SJohannes Doerfert     Function *Declaration = nullptr;
1359548b74aSJohannes Doerfert 
1369548b74aSJohannes Doerfert     /// Uses of this runtime function per function containing the use.
1378855fec3SJohannes Doerfert     using UseVector = SmallVector<Use *, 16>;
1388855fec3SJohannes Doerfert 
139b8235d2bSsstefan1     /// Clear UsesMap for runtime function.
140b8235d2bSsstefan1     void clearUsesMap() { UsesMap.clear(); }
141b8235d2bSsstefan1 
14254bd3751SJohannes Doerfert     /// Boolean conversion that is true if the runtime function was found.
14354bd3751SJohannes Doerfert     operator bool() const { return Declaration; }
14454bd3751SJohannes Doerfert 
1458855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F.
1468855fec3SJohannes Doerfert     UseVector &getOrCreateUseVector(Function *F) {
147b8235d2bSsstefan1       std::shared_ptr<UseVector> &UV = UsesMap[F];
1488855fec3SJohannes Doerfert       if (!UV)
149b8235d2bSsstefan1         UV = std::make_shared<UseVector>();
1508855fec3SJohannes Doerfert       return *UV;
1518855fec3SJohannes Doerfert     }
1528855fec3SJohannes Doerfert 
1538855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F or `nullptr` if there are
1548855fec3SJohannes Doerfert     /// none.
1558855fec3SJohannes Doerfert     const UseVector *getUseVector(Function &F) const {
15695e57072SDavid Blaikie       auto I = UsesMap.find(&F);
15795e57072SDavid Blaikie       if (I != UsesMap.end())
15895e57072SDavid Blaikie         return I->second.get();
15995e57072SDavid Blaikie       return nullptr;
1608855fec3SJohannes Doerfert     }
1618855fec3SJohannes Doerfert 
1628855fec3SJohannes Doerfert     /// Return how many functions contain uses of this runtime function.
1638855fec3SJohannes Doerfert     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
1649548b74aSJohannes Doerfert 
1659548b74aSJohannes Doerfert     /// Return the number of arguments (or the minimal number for variadic
1669548b74aSJohannes Doerfert     /// functions).
1679548b74aSJohannes Doerfert     size_t getNumArgs() const { return ArgumentTypes.size(); }
1689548b74aSJohannes Doerfert 
1699548b74aSJohannes Doerfert     /// Run the callback \p CB on each use and forget the use if the result is
1709548b74aSJohannes Doerfert     /// true. The callback will be fed the function in which the use was
1719548b74aSJohannes Doerfert     /// encountered as second argument.
172624d34afSJohannes Doerfert     void foreachUse(SmallVectorImpl<Function *> &SCC,
173624d34afSJohannes Doerfert                     function_ref<bool(Use &, Function &)> CB) {
174624d34afSJohannes Doerfert       for (Function *F : SCC)
175624d34afSJohannes Doerfert         foreachUse(CB, F);
176e099c7b6Ssstefan1     }
177e099c7b6Ssstefan1 
178e099c7b6Ssstefan1     /// Run the callback \p CB on each use within the function \p F and forget
179e099c7b6Ssstefan1     /// the use if the result is true.
180624d34afSJohannes Doerfert     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
1818855fec3SJohannes Doerfert       SmallVector<unsigned, 8> ToBeDeleted;
1829548b74aSJohannes Doerfert       ToBeDeleted.clear();
183e099c7b6Ssstefan1 
1848855fec3SJohannes Doerfert       unsigned Idx = 0;
185624d34afSJohannes Doerfert       UseVector &UV = getOrCreateUseVector(F);
186e099c7b6Ssstefan1 
1878855fec3SJohannes Doerfert       for (Use *U : UV) {
188e099c7b6Ssstefan1         if (CB(*U, *F))
1898855fec3SJohannes Doerfert           ToBeDeleted.push_back(Idx);
1908855fec3SJohannes Doerfert         ++Idx;
1918855fec3SJohannes Doerfert       }
1928855fec3SJohannes Doerfert 
1938855fec3SJohannes Doerfert       // Remove the to-be-deleted indices in reverse order as prior
194b726c557SJohannes Doerfert       // modifications will not modify the smaller indices.
1958855fec3SJohannes Doerfert       while (!ToBeDeleted.empty()) {
1968855fec3SJohannes Doerfert         unsigned Idx = ToBeDeleted.pop_back_val();
1978855fec3SJohannes Doerfert         UV[Idx] = UV.back();
1988855fec3SJohannes Doerfert         UV.pop_back();
1999548b74aSJohannes Doerfert       }
2009548b74aSJohannes Doerfert     }
2018855fec3SJohannes Doerfert 
2028855fec3SJohannes Doerfert   private:
2038855fec3SJohannes Doerfert     /// Map from functions to all uses of this runtime function contained in
2048855fec3SJohannes Doerfert     /// them.
205b8235d2bSsstefan1     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
2069548b74aSJohannes Doerfert   };
2079548b74aSJohannes Doerfert 
2087cfd267cSsstefan1   /// An OpenMP-IR-Builder instance
2097cfd267cSsstefan1   OpenMPIRBuilder OMPBuilder;
2107cfd267cSsstefan1 
2117cfd267cSsstefan1   /// Map from runtime function kind to the runtime function description.
2127cfd267cSsstefan1   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
2137cfd267cSsstefan1                   RuntimeFunction::OMPRTL___last>
2147cfd267cSsstefan1       RFIs;
2157cfd267cSsstefan1 
2160f426935Ssstefan1   /// Map from ICV kind to the ICV description.
2170f426935Ssstefan1   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
2180f426935Ssstefan1                   InternalControlVar::ICV___last>
2190f426935Ssstefan1       ICVs;
2200f426935Ssstefan1 
2210f426935Ssstefan1   /// Helper to initialize all internal control variable information for those
2220f426935Ssstefan1   /// defined in OMPKinds.def.
2230f426935Ssstefan1   void initializeInternalControlVars() {
2240f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL)                                                 \
2250f426935Ssstefan1   {                                                                            \
2260f426935Ssstefan1     auto &ICV = ICVs[_Name];                                                   \
2270f426935Ssstefan1     ICV.Setter = RTL;                                                          \
2280f426935Ssstefan1   }
2290f426935Ssstefan1 #define ICV_RT_GET(Name, RTL)                                                  \
2300f426935Ssstefan1   {                                                                            \
2310f426935Ssstefan1     auto &ICV = ICVs[Name];                                                    \
2320f426935Ssstefan1     ICV.Getter = RTL;                                                          \
2330f426935Ssstefan1   }
2340f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
2350f426935Ssstefan1   {                                                                            \
2360f426935Ssstefan1     auto &ICV = ICVs[Enum];                                                    \
2370f426935Ssstefan1     ICV.Name = _Name;                                                          \
2380f426935Ssstefan1     ICV.Kind = Enum;                                                           \
2390f426935Ssstefan1     ICV.InitKind = Init;                                                       \
2400f426935Ssstefan1     ICV.EnvVarName = _EnvVarName;                                              \
2410f426935Ssstefan1     switch (ICV.InitKind) {                                                    \
242951e43f3Ssstefan1     case ICV_IMPLEMENTATION_DEFINED:                                           \
2430f426935Ssstefan1       ICV.InitValue = nullptr;                                                 \
2440f426935Ssstefan1       break;                                                                   \
245951e43f3Ssstefan1     case ICV_ZERO:                                                             \
2466aab27baSsstefan1       ICV.InitValue = ConstantInt::get(                                        \
2476aab27baSsstefan1           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
2480f426935Ssstefan1       break;                                                                   \
249951e43f3Ssstefan1     case ICV_FALSE:                                                            \
2506aab27baSsstefan1       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
2510f426935Ssstefan1       break;                                                                   \
252951e43f3Ssstefan1     case ICV_LAST:                                                             \
2530f426935Ssstefan1       break;                                                                   \
2540f426935Ssstefan1     }                                                                          \
2550f426935Ssstefan1   }
2560f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2570f426935Ssstefan1   }
2580f426935Ssstefan1 
2597cfd267cSsstefan1   /// Returns true if the function declaration \p F matches the runtime
2607cfd267cSsstefan1   /// function types, that is, return type \p RTFRetType, and argument types
2617cfd267cSsstefan1   /// \p RTFArgTypes.
2627cfd267cSsstefan1   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
2637cfd267cSsstefan1                                   SmallVector<Type *, 8> &RTFArgTypes) {
2647cfd267cSsstefan1     // TODO: We should output information to the user (under debug output
2657cfd267cSsstefan1     //       and via remarks).
2667cfd267cSsstefan1 
2677cfd267cSsstefan1     if (!F)
2687cfd267cSsstefan1       return false;
2697cfd267cSsstefan1     if (F->getReturnType() != RTFRetType)
2707cfd267cSsstefan1       return false;
2717cfd267cSsstefan1     if (F->arg_size() != RTFArgTypes.size())
2727cfd267cSsstefan1       return false;
2737cfd267cSsstefan1 
2747cfd267cSsstefan1     auto RTFTyIt = RTFArgTypes.begin();
2757cfd267cSsstefan1     for (Argument &Arg : F->args()) {
2767cfd267cSsstefan1       if (Arg.getType() != *RTFTyIt)
2777cfd267cSsstefan1         return false;
2787cfd267cSsstefan1 
2797cfd267cSsstefan1       ++RTFTyIt;
2807cfd267cSsstefan1     }
2817cfd267cSsstefan1 
2827cfd267cSsstefan1     return true;
2837cfd267cSsstefan1   }
2847cfd267cSsstefan1 
285b726c557SJohannes Doerfert   // Helper to collect all uses of the declaration in the UsesMap.
286b8235d2bSsstefan1   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
2877cfd267cSsstefan1     unsigned NumUses = 0;
2887cfd267cSsstefan1     if (!RFI.Declaration)
2897cfd267cSsstefan1       return NumUses;
2907cfd267cSsstefan1     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
2917cfd267cSsstefan1 
292b8235d2bSsstefan1     if (CollectStats) {
2937cfd267cSsstefan1       NumOpenMPRuntimeFunctionsIdentified += 1;
2947cfd267cSsstefan1       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
295b8235d2bSsstefan1     }
2967cfd267cSsstefan1 
2977cfd267cSsstefan1     // TODO: We directly convert uses into proper calls and unknown uses.
2987cfd267cSsstefan1     for (Use &U : RFI.Declaration->uses()) {
2997cfd267cSsstefan1       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
3007cfd267cSsstefan1         if (ModuleSlice.count(UserI->getFunction())) {
3017cfd267cSsstefan1           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
3027cfd267cSsstefan1           ++NumUses;
3037cfd267cSsstefan1         }
3047cfd267cSsstefan1       } else {
3057cfd267cSsstefan1         RFI.getOrCreateUseVector(nullptr).push_back(&U);
3067cfd267cSsstefan1         ++NumUses;
3077cfd267cSsstefan1       }
3087cfd267cSsstefan1     }
3097cfd267cSsstefan1     return NumUses;
310b8235d2bSsstefan1   }
3117cfd267cSsstefan1 
312b8235d2bSsstefan1   // Helper function to recollect uses of all runtime functions.
313b8235d2bSsstefan1   void recollectUses() {
314b8235d2bSsstefan1     for (int Idx = 0; Idx < RFIs.size(); ++Idx) {
315b8235d2bSsstefan1       auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)];
316b8235d2bSsstefan1       RFI.clearUsesMap();
317b8235d2bSsstefan1       collectUses(RFI, /*CollectStats*/ false);
318b8235d2bSsstefan1     }
319b8235d2bSsstefan1   }
320b8235d2bSsstefan1 
321b8235d2bSsstefan1   /// Helper to initialize all runtime function information for those defined
322b8235d2bSsstefan1   /// in OpenMPKinds.def.
323b8235d2bSsstefan1   void initializeRuntimeFunctions() {
3247cfd267cSsstefan1     Module &M = *((*ModuleSlice.begin())->getParent());
3257cfd267cSsstefan1 
3266aab27baSsstefan1     // Helper macros for handling __VA_ARGS__ in OMP_RTL
3276aab27baSsstefan1 #define OMP_TYPE(VarName, ...)                                                 \
3286aab27baSsstefan1   Type *VarName = OMPBuilder.VarName;                                          \
3296aab27baSsstefan1   (void)VarName;
3306aab27baSsstefan1 
3316aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
3326aab27baSsstefan1   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
3336aab27baSsstefan1   (void)VarName##Ty;                                                           \
3346aab27baSsstefan1   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
3356aab27baSsstefan1   (void)VarName##PtrTy;
3366aab27baSsstefan1 
3376aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
3386aab27baSsstefan1   FunctionType *VarName = OMPBuilder.VarName;                                  \
3396aab27baSsstefan1   (void)VarName;                                                               \
3406aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3416aab27baSsstefan1   (void)VarName##Ptr;
3426aab27baSsstefan1 
3436aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
3446aab27baSsstefan1   StructType *VarName = OMPBuilder.VarName;                                    \
3456aab27baSsstefan1   (void)VarName;                                                               \
3466aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3476aab27baSsstefan1   (void)VarName##Ptr;
3486aab27baSsstefan1 
3497cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
3507cfd267cSsstefan1   {                                                                            \
3517cfd267cSsstefan1     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
3527cfd267cSsstefan1     Function *F = M.getFunction(_Name);                                        \
3536aab27baSsstefan1     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
3547cfd267cSsstefan1       auto &RFI = RFIs[_Enum];                                                 \
3557cfd267cSsstefan1       RFI.Kind = _Enum;                                                        \
3567cfd267cSsstefan1       RFI.Name = _Name;                                                        \
3577cfd267cSsstefan1       RFI.IsVarArg = _IsVarArg;                                                \
3586aab27baSsstefan1       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
3597cfd267cSsstefan1       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
3607cfd267cSsstefan1       RFI.Declaration = F;                                                     \
361b8235d2bSsstefan1       unsigned NumUses = collectUses(RFI);                                     \
3627cfd267cSsstefan1       (void)NumUses;                                                           \
3637cfd267cSsstefan1       LLVM_DEBUG({                                                             \
3647cfd267cSsstefan1         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
3657cfd267cSsstefan1                << " found\n";                                                  \
3667cfd267cSsstefan1         if (RFI.Declaration)                                                   \
3677cfd267cSsstefan1           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
3687cfd267cSsstefan1                  << RFI.getNumFunctionsWithUses()                              \
3697cfd267cSsstefan1                  << " different functions.\n";                                 \
3707cfd267cSsstefan1       });                                                                      \
3717cfd267cSsstefan1     }                                                                          \
3727cfd267cSsstefan1   }
3737cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3747cfd267cSsstefan1 
3757cfd267cSsstefan1     // TODO: We should attach the attributes defined in OMPKinds.def.
3767cfd267cSsstefan1   }
377e8039ad4SJohannes Doerfert 
378e8039ad4SJohannes Doerfert   /// Collection of known kernels (\see Kernel) in the module.
379e8039ad4SJohannes Doerfert   SmallPtrSetImpl<Kernel> &Kernels;
3807cfd267cSsstefan1 };
3817cfd267cSsstefan1 
3827cfd267cSsstefan1 struct OpenMPOpt {
3837cfd267cSsstefan1 
3847cfd267cSsstefan1   using OptimizationRemarkGetter =
3857cfd267cSsstefan1       function_ref<OptimizationRemarkEmitter &(Function *)>;
3867cfd267cSsstefan1 
3877cfd267cSsstefan1   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
3887cfd267cSsstefan1             OptimizationRemarkGetter OREGetter,
389b8235d2bSsstefan1             OMPInformationCache &OMPInfoCache, Attributor &A)
39077b79d79SMehdi Amini       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
391b8235d2bSsstefan1         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
3927cfd267cSsstefan1 
3939548b74aSJohannes Doerfert   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
3949548b74aSJohannes Doerfert   bool run() {
39554bd3751SJohannes Doerfert     if (SCC.empty())
39654bd3751SJohannes Doerfert       return false;
39754bd3751SJohannes Doerfert 
3989548b74aSJohannes Doerfert     bool Changed = false;
3999548b74aSJohannes Doerfert 
4009548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
40177b79d79SMehdi Amini                       << " functions in a slice with "
40277b79d79SMehdi Amini                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
4039548b74aSJohannes Doerfert 
404e8039ad4SJohannes Doerfert     if (PrintICVValues)
405e8039ad4SJohannes Doerfert       printICVs();
406e8039ad4SJohannes Doerfert     if (PrintOpenMPKernels)
407e8039ad4SJohannes Doerfert       printKernels();
408e8039ad4SJohannes Doerfert 
4095b0581aeSJohannes Doerfert     Changed |= rewriteDeviceCodeStateMachine();
4105b0581aeSJohannes Doerfert 
411e8039ad4SJohannes Doerfert     Changed |= runAttributor();
412e8039ad4SJohannes Doerfert 
413e8039ad4SJohannes Doerfert     // Recollect uses, in case Attributor deleted any.
414e8039ad4SJohannes Doerfert     OMPInfoCache.recollectUses();
415e8039ad4SJohannes Doerfert 
416e8039ad4SJohannes Doerfert     Changed |= deduplicateRuntimeCalls();
417e8039ad4SJohannes Doerfert     Changed |= deleteParallelRegions();
418496f8e5bSHamilton Tobon Mosquera     if (HideMemoryTransferLatency)
419496f8e5bSHamilton Tobon Mosquera       Changed |= hideMemTransfersLatency();
420e8039ad4SJohannes Doerfert 
421e8039ad4SJohannes Doerfert     return Changed;
422e8039ad4SJohannes Doerfert   }
423e8039ad4SJohannes Doerfert 
4240f426935Ssstefan1   /// Print initial ICV values for testing.
4250f426935Ssstefan1   /// FIXME: This should be done from the Attributor once it is added.
426e8039ad4SJohannes Doerfert   void printICVs() const {
4270f426935Ssstefan1     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
4280f426935Ssstefan1 
4290f426935Ssstefan1     for (Function *F : OMPInfoCache.ModuleSlice) {
4300f426935Ssstefan1       for (auto ICV : ICVs) {
4310f426935Ssstefan1         auto ICVInfo = OMPInfoCache.ICVs[ICV];
4320f426935Ssstefan1         auto Remark = [&](OptimizationRemark OR) {
4330f426935Ssstefan1           return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
4340f426935Ssstefan1                     << " Value: "
4350f426935Ssstefan1                     << (ICVInfo.InitValue
4360f426935Ssstefan1                             ? ICVInfo.InitValue->getValue().toString(10, true)
4370f426935Ssstefan1                             : "IMPLEMENTATION_DEFINED");
4380f426935Ssstefan1         };
4390f426935Ssstefan1 
4400f426935Ssstefan1         emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
4410f426935Ssstefan1       }
4420f426935Ssstefan1     }
4430f426935Ssstefan1   }
4440f426935Ssstefan1 
445e8039ad4SJohannes Doerfert   /// Print OpenMP GPU kernels for testing.
446e8039ad4SJohannes Doerfert   void printKernels() const {
447e8039ad4SJohannes Doerfert     for (Function *F : SCC) {
448e8039ad4SJohannes Doerfert       if (!OMPInfoCache.Kernels.count(F))
449e8039ad4SJohannes Doerfert         continue;
450b8235d2bSsstefan1 
451e8039ad4SJohannes Doerfert       auto Remark = [&](OptimizationRemark OR) {
452e8039ad4SJohannes Doerfert         return OR << "OpenMP GPU kernel "
453e8039ad4SJohannes Doerfert                   << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
454e8039ad4SJohannes Doerfert       };
455b8235d2bSsstefan1 
456e8039ad4SJohannes Doerfert       emitRemarkOnFunction(F, "OpenMPGPU", Remark);
457e8039ad4SJohannes Doerfert     }
4589548b74aSJohannes Doerfert   }
4599548b74aSJohannes Doerfert 
4607cfd267cSsstefan1   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
4617cfd267cSsstefan1   /// given it has to be the callee or a nullptr is returned.
4627cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
4637cfd267cSsstefan1       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
4647cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(U.getUser());
4657cfd267cSsstefan1     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
4667cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
4677cfd267cSsstefan1       return CI;
4687cfd267cSsstefan1     return nullptr;
4697cfd267cSsstefan1   }
4707cfd267cSsstefan1 
4717cfd267cSsstefan1   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
4727cfd267cSsstefan1   /// the callee or a nullptr is returned.
4737cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
4747cfd267cSsstefan1       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
4757cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(&V);
4767cfd267cSsstefan1     if (CI && !CI->hasOperandBundles() &&
4777cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
4787cfd267cSsstefan1       return CI;
4797cfd267cSsstefan1     return nullptr;
4807cfd267cSsstefan1   }
4817cfd267cSsstefan1 
4829548b74aSJohannes Doerfert private:
4839d38f98dSJohannes Doerfert   /// Try to delete parallel regions if possible.
484e565db49SJohannes Doerfert   bool deleteParallelRegions() {
485e565db49SJohannes Doerfert     const unsigned CallbackCalleeOperand = 2;
486e565db49SJohannes Doerfert 
4877cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &RFI =
4887cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
4897cfd267cSsstefan1 
490e565db49SJohannes Doerfert     if (!RFI.Declaration)
491e565db49SJohannes Doerfert       return false;
492e565db49SJohannes Doerfert 
493e565db49SJohannes Doerfert     bool Changed = false;
494e565db49SJohannes Doerfert     auto DeleteCallCB = [&](Use &U, Function &) {
495e565db49SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U);
496e565db49SJohannes Doerfert       if (!CI)
497e565db49SJohannes Doerfert         return false;
498e565db49SJohannes Doerfert       auto *Fn = dyn_cast<Function>(
499e565db49SJohannes Doerfert           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
500e565db49SJohannes Doerfert       if (!Fn)
501e565db49SJohannes Doerfert         return false;
502e565db49SJohannes Doerfert       if (!Fn->onlyReadsMemory())
503e565db49SJohannes Doerfert         return false;
504e565db49SJohannes Doerfert       if (!Fn->hasFnAttribute(Attribute::WillReturn))
505e565db49SJohannes Doerfert         return false;
506e565db49SJohannes Doerfert 
507e565db49SJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
508e565db49SJohannes Doerfert                         << CI->getCaller()->getName() << "\n");
5094d4ea9acSHuber, Joseph 
5104d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
5114d4ea9acSHuber, Joseph         return OR << "Parallel region in "
5124d4ea9acSHuber, Joseph                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
5134d4ea9acSHuber, Joseph                   << " deleted";
5144d4ea9acSHuber, Joseph       };
5154d4ea9acSHuber, Joseph       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
5164d4ea9acSHuber, Joseph                                      Remark);
5174d4ea9acSHuber, Joseph 
518e565db49SJohannes Doerfert       CGUpdater.removeCallSite(*CI);
519e565db49SJohannes Doerfert       CI->eraseFromParent();
520e565db49SJohannes Doerfert       Changed = true;
52155eb714aSRoman Lebedev       ++NumOpenMPParallelRegionsDeleted;
522e565db49SJohannes Doerfert       return true;
523e565db49SJohannes Doerfert     };
524e565db49SJohannes Doerfert 
525624d34afSJohannes Doerfert     RFI.foreachUse(SCC, DeleteCallCB);
526e565db49SJohannes Doerfert 
527e565db49SJohannes Doerfert     return Changed;
528e565db49SJohannes Doerfert   }
529e565db49SJohannes Doerfert 
530b726c557SJohannes Doerfert   /// Try to eliminate runtime calls by reusing existing ones.
5319548b74aSJohannes Doerfert   bool deduplicateRuntimeCalls() {
5329548b74aSJohannes Doerfert     bool Changed = false;
5339548b74aSJohannes Doerfert 
534e28936f6SJohannes Doerfert     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
535e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_threads,
536e28936f6SJohannes Doerfert         OMPRTL_omp_in_parallel,
537e28936f6SJohannes Doerfert         OMPRTL_omp_get_cancellation,
538e28936f6SJohannes Doerfert         OMPRTL_omp_get_thread_limit,
539e28936f6SJohannes Doerfert         OMPRTL_omp_get_supported_active_levels,
540e28936f6SJohannes Doerfert         OMPRTL_omp_get_level,
541e28936f6SJohannes Doerfert         OMPRTL_omp_get_ancestor_thread_num,
542e28936f6SJohannes Doerfert         OMPRTL_omp_get_team_size,
543e28936f6SJohannes Doerfert         OMPRTL_omp_get_active_level,
544e28936f6SJohannes Doerfert         OMPRTL_omp_in_final,
545e28936f6SJohannes Doerfert         OMPRTL_omp_get_proc_bind,
546e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_places,
547e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_procs,
548e28936f6SJohannes Doerfert         OMPRTL_omp_get_place_num,
549e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_num_places,
550e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_place_nums};
551e28936f6SJohannes Doerfert 
552bc93c2d7SMarek Kurdej     // Global-tid is handled separately.
5539548b74aSJohannes Doerfert     SmallSetVector<Value *, 16> GTIdArgs;
5549548b74aSJohannes Doerfert     collectGlobalThreadIdArguments(GTIdArgs);
5559548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
5569548b74aSJohannes Doerfert                       << " global thread ID arguments\n");
5579548b74aSJohannes Doerfert 
5589548b74aSJohannes Doerfert     for (Function *F : SCC) {
559e28936f6SJohannes Doerfert       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
5604e29d256Sserge-sans-paille         Changed |= deduplicateRuntimeCalls(
5614e29d256Sserge-sans-paille             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
562e28936f6SJohannes Doerfert 
563e28936f6SJohannes Doerfert       // __kmpc_global_thread_num is special as we can replace it with an
564e28936f6SJohannes Doerfert       // argument in enough cases to make it worth trying.
5659548b74aSJohannes Doerfert       Value *GTIdArg = nullptr;
5669548b74aSJohannes Doerfert       for (Argument &Arg : F->args())
5679548b74aSJohannes Doerfert         if (GTIdArgs.count(&Arg)) {
5689548b74aSJohannes Doerfert           GTIdArg = &Arg;
5699548b74aSJohannes Doerfert           break;
5709548b74aSJohannes Doerfert         }
5719548b74aSJohannes Doerfert       Changed |= deduplicateRuntimeCalls(
5727cfd267cSsstefan1           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
5739548b74aSJohannes Doerfert     }
5749548b74aSJohannes Doerfert 
5759548b74aSJohannes Doerfert     return Changed;
5769548b74aSJohannes Doerfert   }
5779548b74aSJohannes Doerfert 
578496f8e5bSHamilton Tobon Mosquera   /// Tries to hide the latency of runtime calls that involve host to
579496f8e5bSHamilton Tobon Mosquera   /// device memory transfers by splitting them into their "issue" and "wait"
580496f8e5bSHamilton Tobon Mosquera   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
581496f8e5bSHamilton Tobon Mosquera   /// moved downards as much as possible. The "issue" issues the memory transfer
582496f8e5bSHamilton Tobon Mosquera   /// asynchronously, returning a handle. The "wait" waits in the returned
583496f8e5bSHamilton Tobon Mosquera   /// handle for the memory transfer to finish.
584496f8e5bSHamilton Tobon Mosquera   bool hideMemTransfersLatency() {
585496f8e5bSHamilton Tobon Mosquera     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
586496f8e5bSHamilton Tobon Mosquera     bool Changed = false;
587496f8e5bSHamilton Tobon Mosquera     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
588496f8e5bSHamilton Tobon Mosquera       auto *RTCall = getCallIfRegularCall(U, &RFI);
589496f8e5bSHamilton Tobon Mosquera       if (!RTCall)
590496f8e5bSHamilton Tobon Mosquera         return false;
591496f8e5bSHamilton Tobon Mosquera 
592bd2fa181SHamilton Tobon Mosquera       // TODO: Check if can be moved upwards.
593bd2fa181SHamilton Tobon Mosquera       bool WasSplit = false;
594bd2fa181SHamilton Tobon Mosquera       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
595bd2fa181SHamilton Tobon Mosquera       if (WaitMovementPoint)
596bd2fa181SHamilton Tobon Mosquera         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
597bd2fa181SHamilton Tobon Mosquera 
598496f8e5bSHamilton Tobon Mosquera       Changed |= WasSplit;
599496f8e5bSHamilton Tobon Mosquera       return WasSplit;
600496f8e5bSHamilton Tobon Mosquera     };
601496f8e5bSHamilton Tobon Mosquera     RFI.foreachUse(SCC, SplitMemTransfers);
602496f8e5bSHamilton Tobon Mosquera 
603496f8e5bSHamilton Tobon Mosquera     return Changed;
604496f8e5bSHamilton Tobon Mosquera   }
605496f8e5bSHamilton Tobon Mosquera 
606bd2fa181SHamilton Tobon Mosquera   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
607bd2fa181SHamilton Tobon Mosquera   /// moved. Returns nullptr if the movement is not possible, or not worth it.
608bd2fa181SHamilton Tobon Mosquera   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
609bd2fa181SHamilton Tobon Mosquera     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
610bd2fa181SHamilton Tobon Mosquera     //  Make it traverse the CFG.
611bd2fa181SHamilton Tobon Mosquera 
612bd2fa181SHamilton Tobon Mosquera     Instruction *CurrentI = &RuntimeCall;
613bd2fa181SHamilton Tobon Mosquera     bool IsWorthIt = false;
614bd2fa181SHamilton Tobon Mosquera     while ((CurrentI = CurrentI->getNextNode())) {
615bd2fa181SHamilton Tobon Mosquera 
616bd2fa181SHamilton Tobon Mosquera       // TODO: Once we detect the regions to be offloaded we should use the
617bd2fa181SHamilton Tobon Mosquera       //  alias analysis manager to check if CurrentI may modify one of
618bd2fa181SHamilton Tobon Mosquera       //  the offloaded regions.
619bd2fa181SHamilton Tobon Mosquera       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
620bd2fa181SHamilton Tobon Mosquera         if (IsWorthIt)
621bd2fa181SHamilton Tobon Mosquera           return CurrentI;
622bd2fa181SHamilton Tobon Mosquera 
623bd2fa181SHamilton Tobon Mosquera         return nullptr;
624bd2fa181SHamilton Tobon Mosquera       }
625bd2fa181SHamilton Tobon Mosquera 
626bd2fa181SHamilton Tobon Mosquera       // FIXME: For now if we move it over anything without side effect
627bd2fa181SHamilton Tobon Mosquera       //  is worth it.
628bd2fa181SHamilton Tobon Mosquera       IsWorthIt = true;
629bd2fa181SHamilton Tobon Mosquera     }
630bd2fa181SHamilton Tobon Mosquera 
631bd2fa181SHamilton Tobon Mosquera     // Return end of BasicBlock.
632bd2fa181SHamilton Tobon Mosquera     return RuntimeCall.getParent()->getTerminator();
633bd2fa181SHamilton Tobon Mosquera   }
634bd2fa181SHamilton Tobon Mosquera 
635496f8e5bSHamilton Tobon Mosquera   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
636bd2fa181SHamilton Tobon Mosquera   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
637bd2fa181SHamilton Tobon Mosquera                                Instruction &WaitMovementPoint) {
638496f8e5bSHamilton Tobon Mosquera     auto &IRBuilder = OMPInfoCache.OMPBuilder;
639496f8e5bSHamilton Tobon Mosquera     // Add "issue" runtime call declaration:
640496f8e5bSHamilton Tobon Mosquera     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
641496f8e5bSHamilton Tobon Mosquera     //   i8**, i8**, i64*, i64*)
642496f8e5bSHamilton Tobon Mosquera     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
643496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_issue);
644496f8e5bSHamilton Tobon Mosquera 
645496f8e5bSHamilton Tobon Mosquera     // Change RuntimeCall call site for its asynchronous version.
646496f8e5bSHamilton Tobon Mosquera     SmallVector<Value *, 8> Args;
647bd2fa181SHamilton Tobon Mosquera     for (auto &Arg : RuntimeCall.args())
648496f8e5bSHamilton Tobon Mosquera       Args.push_back(Arg.get());
649496f8e5bSHamilton Tobon Mosquera 
650496f8e5bSHamilton Tobon Mosquera     CallInst *IssueCallsite =
651bd2fa181SHamilton Tobon Mosquera         CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall);
652bd2fa181SHamilton Tobon Mosquera     RuntimeCall.eraseFromParent();
653496f8e5bSHamilton Tobon Mosquera 
654496f8e5bSHamilton Tobon Mosquera     // Add "wait" runtime call declaration:
655496f8e5bSHamilton Tobon Mosquera     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
656496f8e5bSHamilton Tobon Mosquera     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
657496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_wait);
658496f8e5bSHamilton Tobon Mosquera 
659496f8e5bSHamilton Tobon Mosquera     // Add call site to WaitDecl.
660496f8e5bSHamilton Tobon Mosquera     Value *WaitParams[2] = {
661496f8e5bSHamilton Tobon Mosquera         IssueCallsite->getArgOperand(0), // device_id.
662496f8e5bSHamilton Tobon Mosquera         IssueCallsite // returned handle.
663496f8e5bSHamilton Tobon Mosquera     };
664bd2fa181SHamilton Tobon Mosquera     CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
665496f8e5bSHamilton Tobon Mosquera 
666496f8e5bSHamilton Tobon Mosquera     return true;
667496f8e5bSHamilton Tobon Mosquera   }
668496f8e5bSHamilton Tobon Mosquera 
669dc3b5b00SJohannes Doerfert   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
670dc3b5b00SJohannes Doerfert                                     bool GlobalOnly, bool &SingleChoice) {
671dc3b5b00SJohannes Doerfert     if (CurrentIdent == NextIdent)
672dc3b5b00SJohannes Doerfert       return CurrentIdent;
673dc3b5b00SJohannes Doerfert 
674396b7253SJohannes Doerfert     // TODO: Figure out how to actually combine multiple debug locations. For
675dc3b5b00SJohannes Doerfert     //       now we just keep an existing one if there is a single choice.
676dc3b5b00SJohannes Doerfert     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
677dc3b5b00SJohannes Doerfert       SingleChoice = !CurrentIdent;
678dc3b5b00SJohannes Doerfert       return NextIdent;
679dc3b5b00SJohannes Doerfert     }
680396b7253SJohannes Doerfert     return nullptr;
681396b7253SJohannes Doerfert   }
682396b7253SJohannes Doerfert 
683396b7253SJohannes Doerfert   /// Return an `struct ident_t*` value that represents the ones used in the
684396b7253SJohannes Doerfert   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
685396b7253SJohannes Doerfert   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
686396b7253SJohannes Doerfert   /// return value we create one from scratch. We also do not yet combine
687396b7253SJohannes Doerfert   /// information, e.g., the source locations, see combinedIdentStruct.
6887cfd267cSsstefan1   Value *
6897cfd267cSsstefan1   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
6907cfd267cSsstefan1                                  Function &F, bool GlobalOnly) {
691dc3b5b00SJohannes Doerfert     bool SingleChoice = true;
692396b7253SJohannes Doerfert     Value *Ident = nullptr;
693396b7253SJohannes Doerfert     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
694396b7253SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
695396b7253SJohannes Doerfert       if (!CI || &F != &Caller)
696396b7253SJohannes Doerfert         return false;
697396b7253SJohannes Doerfert       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
698dc3b5b00SJohannes Doerfert                                   /* GlobalOnly */ true, SingleChoice);
699396b7253SJohannes Doerfert       return false;
700396b7253SJohannes Doerfert     };
701624d34afSJohannes Doerfert     RFI.foreachUse(SCC, CombineIdentStruct);
702396b7253SJohannes Doerfert 
703dc3b5b00SJohannes Doerfert     if (!Ident || !SingleChoice) {
704396b7253SJohannes Doerfert       // The IRBuilder uses the insertion block to get to the module, this is
705396b7253SJohannes Doerfert       // unfortunate but we work around it for now.
7067cfd267cSsstefan1       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
7077cfd267cSsstefan1         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
708396b7253SJohannes Doerfert             &F.getEntryBlock(), F.getEntryBlock().begin()));
709396b7253SJohannes Doerfert       // Create a fallback location if non was found.
710396b7253SJohannes Doerfert       // TODO: Use the debug locations of the calls instead.
7117cfd267cSsstefan1       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
7127cfd267cSsstefan1       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
713396b7253SJohannes Doerfert     }
714396b7253SJohannes Doerfert     return Ident;
715396b7253SJohannes Doerfert   }
716396b7253SJohannes Doerfert 
717b726c557SJohannes Doerfert   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
7189548b74aSJohannes Doerfert   /// \p ReplVal if given.
7197cfd267cSsstefan1   bool deduplicateRuntimeCalls(Function &F,
7207cfd267cSsstefan1                                OMPInformationCache::RuntimeFunctionInfo &RFI,
7219548b74aSJohannes Doerfert                                Value *ReplVal = nullptr) {
7228855fec3SJohannes Doerfert     auto *UV = RFI.getUseVector(F);
7238855fec3SJohannes Doerfert     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
724b1fbf438SRoman Lebedev       return false;
725b1fbf438SRoman Lebedev 
7267cfd267cSsstefan1     LLVM_DEBUG(
7277cfd267cSsstefan1         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
7287cfd267cSsstefan1                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
7297cfd267cSsstefan1 
730ab3da5ddSMichael Liao     assert((!ReplVal || (isa<Argument>(ReplVal) &&
731ab3da5ddSMichael Liao                          cast<Argument>(ReplVal)->getParent() == &F)) &&
7329548b74aSJohannes Doerfert            "Unexpected replacement value!");
733396b7253SJohannes Doerfert 
734396b7253SJohannes Doerfert     // TODO: Use dominance to find a good position instead.
7356aab27baSsstefan1     auto CanBeMoved = [this](CallBase &CB) {
736396b7253SJohannes Doerfert       unsigned NumArgs = CB.getNumArgOperands();
737396b7253SJohannes Doerfert       if (NumArgs == 0)
738396b7253SJohannes Doerfert         return true;
7396aab27baSsstefan1       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
740396b7253SJohannes Doerfert         return false;
741396b7253SJohannes Doerfert       for (unsigned u = 1; u < NumArgs; ++u)
742396b7253SJohannes Doerfert         if (isa<Instruction>(CB.getArgOperand(u)))
743396b7253SJohannes Doerfert           return false;
744396b7253SJohannes Doerfert       return true;
745396b7253SJohannes Doerfert     };
746396b7253SJohannes Doerfert 
7479548b74aSJohannes Doerfert     if (!ReplVal) {
7488855fec3SJohannes Doerfert       for (Use *U : *UV)
7499548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
750396b7253SJohannes Doerfert           if (!CanBeMoved(*CI))
751396b7253SJohannes Doerfert             continue;
7524d4ea9acSHuber, Joseph 
7534d4ea9acSHuber, Joseph           auto Remark = [&](OptimizationRemark OR) {
7544d4ea9acSHuber, Joseph             auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
7554d4ea9acSHuber, Joseph             return OR << "OpenMP runtime call "
7564d4ea9acSHuber, Joseph                       << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
7574d4ea9acSHuber, Joseph                       << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
7584d4ea9acSHuber, Joseph           };
7594d4ea9acSHuber, Joseph           emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
7604d4ea9acSHuber, Joseph 
7619548b74aSJohannes Doerfert           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
7629548b74aSJohannes Doerfert           ReplVal = CI;
7639548b74aSJohannes Doerfert           break;
7649548b74aSJohannes Doerfert         }
7659548b74aSJohannes Doerfert       if (!ReplVal)
7669548b74aSJohannes Doerfert         return false;
7679548b74aSJohannes Doerfert     }
7689548b74aSJohannes Doerfert 
769396b7253SJohannes Doerfert     // If we use a call as a replacement value we need to make sure the ident is
770396b7253SJohannes Doerfert     // valid at the new location. For now we just pick a global one, either
771396b7253SJohannes Doerfert     // existing and used by one of the calls, or created from scratch.
772396b7253SJohannes Doerfert     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
773396b7253SJohannes Doerfert       if (CI->getNumArgOperands() > 0 &&
7746aab27baSsstefan1           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
775396b7253SJohannes Doerfert         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
776396b7253SJohannes Doerfert                                                       /* GlobalOnly */ true);
777396b7253SJohannes Doerfert         CI->setArgOperand(0, Ident);
778396b7253SJohannes Doerfert       }
779396b7253SJohannes Doerfert     }
780396b7253SJohannes Doerfert 
7819548b74aSJohannes Doerfert     bool Changed = false;
7829548b74aSJohannes Doerfert     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
7839548b74aSJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
7849548b74aSJohannes Doerfert       if (!CI || CI == ReplVal || &F != &Caller)
7859548b74aSJohannes Doerfert         return false;
7869548b74aSJohannes Doerfert       assert(CI->getCaller() == &F && "Unexpected call!");
7874d4ea9acSHuber, Joseph 
7884d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
7894d4ea9acSHuber, Joseph         return OR << "OpenMP runtime call "
7904d4ea9acSHuber, Joseph                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
7914d4ea9acSHuber, Joseph       };
7924d4ea9acSHuber, Joseph       emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
7934d4ea9acSHuber, Joseph 
7949548b74aSJohannes Doerfert       CGUpdater.removeCallSite(*CI);
7959548b74aSJohannes Doerfert       CI->replaceAllUsesWith(ReplVal);
7969548b74aSJohannes Doerfert       CI->eraseFromParent();
7979548b74aSJohannes Doerfert       ++NumOpenMPRuntimeCallsDeduplicated;
7989548b74aSJohannes Doerfert       Changed = true;
7999548b74aSJohannes Doerfert       return true;
8009548b74aSJohannes Doerfert     };
801624d34afSJohannes Doerfert     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
8029548b74aSJohannes Doerfert 
8039548b74aSJohannes Doerfert     return Changed;
8049548b74aSJohannes Doerfert   }
8059548b74aSJohannes Doerfert 
8069548b74aSJohannes Doerfert   /// Collect arguments that represent the global thread id in \p GTIdArgs.
8079548b74aSJohannes Doerfert   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
8089548b74aSJohannes Doerfert     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
8099548b74aSJohannes Doerfert     //       initialization. We could define an AbstractAttribute instead and
8109548b74aSJohannes Doerfert     //       run the Attributor here once it can be run as an SCC pass.
8119548b74aSJohannes Doerfert 
8129548b74aSJohannes Doerfert     // Helper to check the argument \p ArgNo at all call sites of \p F for
8139548b74aSJohannes Doerfert     // a GTId.
8149548b74aSJohannes Doerfert     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
8159548b74aSJohannes Doerfert       if (!F.hasLocalLinkage())
8169548b74aSJohannes Doerfert         return false;
8179548b74aSJohannes Doerfert       for (Use &U : F.uses()) {
8189548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(U)) {
8199548b74aSJohannes Doerfert           Value *ArgOp = CI->getArgOperand(ArgNo);
8209548b74aSJohannes Doerfert           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
8217cfd267cSsstefan1               getCallIfRegularCall(
8227cfd267cSsstefan1                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
8239548b74aSJohannes Doerfert             continue;
8249548b74aSJohannes Doerfert         }
8259548b74aSJohannes Doerfert         return false;
8269548b74aSJohannes Doerfert       }
8279548b74aSJohannes Doerfert       return true;
8289548b74aSJohannes Doerfert     };
8299548b74aSJohannes Doerfert 
8309548b74aSJohannes Doerfert     // Helper to identify uses of a GTId as GTId arguments.
8319548b74aSJohannes Doerfert     auto AddUserArgs = [&](Value &GTId) {
8329548b74aSJohannes Doerfert       for (Use &U : GTId.uses())
8339548b74aSJohannes Doerfert         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
8349548b74aSJohannes Doerfert           if (CI->isArgOperand(&U))
8359548b74aSJohannes Doerfert             if (Function *Callee = CI->getCalledFunction())
8369548b74aSJohannes Doerfert               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
8379548b74aSJohannes Doerfert                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
8389548b74aSJohannes Doerfert     };
8399548b74aSJohannes Doerfert 
8409548b74aSJohannes Doerfert     // The argument users of __kmpc_global_thread_num calls are GTIds.
8417cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
8427cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
8437cfd267cSsstefan1 
844624d34afSJohannes Doerfert     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
8458855fec3SJohannes Doerfert       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
8469548b74aSJohannes Doerfert         AddUserArgs(*CI);
8478855fec3SJohannes Doerfert       return false;
8488855fec3SJohannes Doerfert     });
8499548b74aSJohannes Doerfert 
8509548b74aSJohannes Doerfert     // Transitively search for more arguments by looking at the users of the
8519548b74aSJohannes Doerfert     // ones we know already. During the search the GTIdArgs vector is extended
8529548b74aSJohannes Doerfert     // so we cannot cache the size nor can we use a range based for.
8539548b74aSJohannes Doerfert     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
8549548b74aSJohannes Doerfert       AddUserArgs(*GTIdArgs[u]);
8559548b74aSJohannes Doerfert   }
8569548b74aSJohannes Doerfert 
8575b0581aeSJohannes Doerfert   /// Kernel (=GPU) optimizations and utility functions
8585b0581aeSJohannes Doerfert   ///
8595b0581aeSJohannes Doerfert   ///{{
8605b0581aeSJohannes Doerfert 
8615b0581aeSJohannes Doerfert   /// Check if \p F is a kernel, hence entry point for target offloading.
8625b0581aeSJohannes Doerfert   bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
8635b0581aeSJohannes Doerfert 
8645b0581aeSJohannes Doerfert   /// Cache to remember the unique kernel for a function.
8655b0581aeSJohannes Doerfert   DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
8665b0581aeSJohannes Doerfert 
8675b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p F, if any.
8685b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Function &F);
8695b0581aeSJohannes Doerfert 
8705b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p I, if any.
8715b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Instruction &I) {
8725b0581aeSJohannes Doerfert     return getUniqueKernelFor(*I.getFunction());
8735b0581aeSJohannes Doerfert   }
8745b0581aeSJohannes Doerfert 
8755b0581aeSJohannes Doerfert   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
8765b0581aeSJohannes Doerfert   /// the cases we can avoid taking the address of a function.
8775b0581aeSJohannes Doerfert   bool rewriteDeviceCodeStateMachine();
8785b0581aeSJohannes Doerfert 
8795b0581aeSJohannes Doerfert   ///
8805b0581aeSJohannes Doerfert   ///}}
8815b0581aeSJohannes Doerfert 
8824d4ea9acSHuber, Joseph   /// Emit a remark generically
8834d4ea9acSHuber, Joseph   ///
8844d4ea9acSHuber, Joseph   /// This template function can be used to generically emit a remark. The
8854d4ea9acSHuber, Joseph   /// RemarkKind should be one of the following:
8864d4ea9acSHuber, Joseph   ///   - OptimizationRemark to indicate a successful optimization attempt
8874d4ea9acSHuber, Joseph   ///   - OptimizationRemarkMissed to report a failed optimization attempt
8884d4ea9acSHuber, Joseph   ///   - OptimizationRemarkAnalysis to provide additional information about an
8894d4ea9acSHuber, Joseph   ///     optimization attempt
8904d4ea9acSHuber, Joseph   ///
8914d4ea9acSHuber, Joseph   /// The remark is built using a callback function provided by the caller that
8924d4ea9acSHuber, Joseph   /// takes a RemarkKind as input and returns a RemarkKind.
8934d4ea9acSHuber, Joseph   template <typename RemarkKind,
8944d4ea9acSHuber, Joseph             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
8954d4ea9acSHuber, Joseph   void emitRemark(Instruction *Inst, StringRef RemarkName,
896e8039ad4SJohannes Doerfert                   RemarkCallBack &&RemarkCB) const {
8974d4ea9acSHuber, Joseph     Function *F = Inst->getParent()->getParent();
8984d4ea9acSHuber, Joseph     auto &ORE = OREGetter(F);
8994d4ea9acSHuber, Joseph 
9007cfd267cSsstefan1     ORE.emit(
9017cfd267cSsstefan1         [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
9024d4ea9acSHuber, Joseph   }
9034d4ea9acSHuber, Joseph 
9040f426935Ssstefan1   /// Emit a remark on a function. Since only OptimizationRemark is supporting
9050f426935Ssstefan1   /// this, it can't be made generic.
906e8039ad4SJohannes Doerfert   void
907e8039ad4SJohannes Doerfert   emitRemarkOnFunction(Function *F, StringRef RemarkName,
908e8039ad4SJohannes Doerfert                        function_ref<OptimizationRemark(OptimizationRemark &&)>
909e8039ad4SJohannes Doerfert                            &&RemarkCB) const {
9100f426935Ssstefan1     auto &ORE = OREGetter(F);
9110f426935Ssstefan1 
9120f426935Ssstefan1     ORE.emit([&]() {
9130f426935Ssstefan1       return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
9140f426935Ssstefan1     });
9150f426935Ssstefan1   }
9160f426935Ssstefan1 
917b726c557SJohannes Doerfert   /// The underlying module.
9189548b74aSJohannes Doerfert   Module &M;
9199548b74aSJohannes Doerfert 
9209548b74aSJohannes Doerfert   /// The SCC we are operating on.
921ee17263aSJohannes Doerfert   SmallVectorImpl<Function *> &SCC;
9229548b74aSJohannes Doerfert 
9239548b74aSJohannes Doerfert   /// Callback to update the call graph, the first argument is a removed call,
9249548b74aSJohannes Doerfert   /// the second an optional replacement call.
9259548b74aSJohannes Doerfert   CallGraphUpdater &CGUpdater;
9269548b74aSJohannes Doerfert 
9274d4ea9acSHuber, Joseph   /// Callback to get an OptimizationRemarkEmitter from a Function *
9284d4ea9acSHuber, Joseph   OptimizationRemarkGetter OREGetter;
9294d4ea9acSHuber, Joseph 
9307cfd267cSsstefan1   /// OpenMP-specific information cache. Also Used for Attributor runs.
9317cfd267cSsstefan1   OMPInformationCache &OMPInfoCache;
932b8235d2bSsstefan1 
933b8235d2bSsstefan1   /// Attributor instance.
934b8235d2bSsstefan1   Attributor &A;
935b8235d2bSsstefan1 
936b8235d2bSsstefan1   /// Helper function to run Attributor on SCC.
937b8235d2bSsstefan1   bool runAttributor() {
938b8235d2bSsstefan1     if (SCC.empty())
939b8235d2bSsstefan1       return false;
940b8235d2bSsstefan1 
941b8235d2bSsstefan1     registerAAs();
942b8235d2bSsstefan1 
943b8235d2bSsstefan1     ChangeStatus Changed = A.run();
944b8235d2bSsstefan1 
945b8235d2bSsstefan1     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
946b8235d2bSsstefan1                       << " functions, result: " << Changed << ".\n");
947b8235d2bSsstefan1 
948b8235d2bSsstefan1     return Changed == ChangeStatus::CHANGED;
949b8235d2bSsstefan1   }
950b8235d2bSsstefan1 
951b8235d2bSsstefan1   /// Populate the Attributor with abstract attribute opportunities in the
952b8235d2bSsstefan1   /// function.
953b8235d2bSsstefan1   void registerAAs() {
954*5dfd7cc4Ssstefan1     if (SCC.empty())
955*5dfd7cc4Ssstefan1       return;
956b8235d2bSsstefan1 
957*5dfd7cc4Ssstefan1     // Create CallSite AA for all Getters.
958*5dfd7cc4Ssstefan1     for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
959*5dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
960*5dfd7cc4Ssstefan1 
961*5dfd7cc4Ssstefan1       auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
962*5dfd7cc4Ssstefan1 
963*5dfd7cc4Ssstefan1       auto CreateAA = [&](Use &U, Function &Caller) {
964*5dfd7cc4Ssstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
965*5dfd7cc4Ssstefan1         if (!CI)
966*5dfd7cc4Ssstefan1           return false;
967*5dfd7cc4Ssstefan1 
968*5dfd7cc4Ssstefan1         auto &CB = cast<CallBase>(*CI);
969*5dfd7cc4Ssstefan1 
970*5dfd7cc4Ssstefan1         IRPosition CBPos = IRPosition::callsite_function(CB);
971*5dfd7cc4Ssstefan1         A.getOrCreateAAFor<AAICVTracker>(CBPos);
972*5dfd7cc4Ssstefan1         return false;
973*5dfd7cc4Ssstefan1       };
974*5dfd7cc4Ssstefan1 
975*5dfd7cc4Ssstefan1       GetterRFI.foreachUse(SCC, CreateAA);
976b8235d2bSsstefan1     }
977b8235d2bSsstefan1   }
978b8235d2bSsstefan1 };
979b8235d2bSsstefan1 
9805b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
9815b0581aeSJohannes Doerfert   if (!OMPInfoCache.ModuleSlice.count(&F))
9825b0581aeSJohannes Doerfert     return nullptr;
9835b0581aeSJohannes Doerfert 
9845b0581aeSJohannes Doerfert   // Use a scope to keep the lifetime of the CachedKernel short.
9855b0581aeSJohannes Doerfert   {
9865b0581aeSJohannes Doerfert     Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
9875b0581aeSJohannes Doerfert     if (CachedKernel)
9885b0581aeSJohannes Doerfert       return *CachedKernel;
9895b0581aeSJohannes Doerfert 
9905b0581aeSJohannes Doerfert     // TODO: We should use an AA to create an (optimistic and callback
9915b0581aeSJohannes Doerfert     //       call-aware) call graph. For now we stick to simple patterns that
9925b0581aeSJohannes Doerfert     //       are less powerful, basically the worst fixpoint.
9935b0581aeSJohannes Doerfert     if (isKernel(F)) {
9945b0581aeSJohannes Doerfert       CachedKernel = Kernel(&F);
9955b0581aeSJohannes Doerfert       return *CachedKernel;
9965b0581aeSJohannes Doerfert     }
9975b0581aeSJohannes Doerfert 
9985b0581aeSJohannes Doerfert     CachedKernel = nullptr;
9995b0581aeSJohannes Doerfert     if (!F.hasLocalLinkage())
10005b0581aeSJohannes Doerfert       return nullptr;
10015b0581aeSJohannes Doerfert   }
10025b0581aeSJohannes Doerfert 
10035b0581aeSJohannes Doerfert   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
10045b0581aeSJohannes Doerfert     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
10055b0581aeSJohannes Doerfert       // Allow use in equality comparisons.
10065b0581aeSJohannes Doerfert       if (Cmp->isEquality())
10075b0581aeSJohannes Doerfert         return getUniqueKernelFor(*Cmp);
10085b0581aeSJohannes Doerfert       return nullptr;
10095b0581aeSJohannes Doerfert     }
10105b0581aeSJohannes Doerfert     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
10115b0581aeSJohannes Doerfert       // Allow direct calls.
10125b0581aeSJohannes Doerfert       if (CB->isCallee(&U))
10135b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
10145b0581aeSJohannes Doerfert       // Allow the use in __kmpc_kernel_prepare_parallel calls.
10155b0581aeSJohannes Doerfert       if (Function *Callee = CB->getCalledFunction())
10165b0581aeSJohannes Doerfert         if (Callee->getName() == "__kmpc_kernel_prepare_parallel")
10175b0581aeSJohannes Doerfert           return getUniqueKernelFor(*CB);
10185b0581aeSJohannes Doerfert       return nullptr;
10195b0581aeSJohannes Doerfert     }
10205b0581aeSJohannes Doerfert     // Disallow every other use.
10215b0581aeSJohannes Doerfert     return nullptr;
10225b0581aeSJohannes Doerfert   };
10235b0581aeSJohannes Doerfert 
10245b0581aeSJohannes Doerfert   // TODO: In the future we want to track more than just a unique kernel.
10255b0581aeSJohannes Doerfert   SmallPtrSet<Kernel, 2> PotentialKernels;
10268d8ce85bSsstefan1   OMPInformationCache::foreachUse(F, [&](const Use &U) {
10275b0581aeSJohannes Doerfert     PotentialKernels.insert(GetUniqueKernelForUse(U));
10285b0581aeSJohannes Doerfert   });
10295b0581aeSJohannes Doerfert 
10305b0581aeSJohannes Doerfert   Kernel K = nullptr;
10315b0581aeSJohannes Doerfert   if (PotentialKernels.size() == 1)
10325b0581aeSJohannes Doerfert     K = *PotentialKernels.begin();
10335b0581aeSJohannes Doerfert 
10345b0581aeSJohannes Doerfert   // Cache the result.
10355b0581aeSJohannes Doerfert   UniqueKernelMap[&F] = K;
10365b0581aeSJohannes Doerfert 
10375b0581aeSJohannes Doerfert   return K;
10385b0581aeSJohannes Doerfert }
10395b0581aeSJohannes Doerfert 
10405b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
10415b0581aeSJohannes Doerfert   OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
10425b0581aeSJohannes Doerfert       OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
10435b0581aeSJohannes Doerfert 
10445b0581aeSJohannes Doerfert   bool Changed = false;
10455b0581aeSJohannes Doerfert   if (!KernelPrepareParallelRFI)
10465b0581aeSJohannes Doerfert     return Changed;
10475b0581aeSJohannes Doerfert 
10485b0581aeSJohannes Doerfert   for (Function *F : SCC) {
10495b0581aeSJohannes Doerfert 
10505b0581aeSJohannes Doerfert     // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
10515b0581aeSJohannes Doerfert     // all.
10525b0581aeSJohannes Doerfert     bool UnknownUse = false;
1053fec1f210SJohannes Doerfert     bool KernelPrepareUse = false;
10545b0581aeSJohannes Doerfert     unsigned NumDirectCalls = 0;
10555b0581aeSJohannes Doerfert 
10565b0581aeSJohannes Doerfert     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
10578d8ce85bSsstefan1     OMPInformationCache::foreachUse(*F, [&](Use &U) {
10585b0581aeSJohannes Doerfert       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
10595b0581aeSJohannes Doerfert         if (CB->isCallee(&U)) {
10605b0581aeSJohannes Doerfert           ++NumDirectCalls;
10615b0581aeSJohannes Doerfert           return;
10625b0581aeSJohannes Doerfert         }
10635b0581aeSJohannes Doerfert 
106481db6144SMichael Liao       if (isa<ICmpInst>(U.getUser())) {
10655b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
10665b0581aeSJohannes Doerfert         return;
10675b0581aeSJohannes Doerfert       }
1068fec1f210SJohannes Doerfert       if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall(
1069fec1f210SJohannes Doerfert                                    *U.getUser(), &KernelPrepareParallelRFI)) {
1070fec1f210SJohannes Doerfert         KernelPrepareUse = true;
10715b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
10725b0581aeSJohannes Doerfert         return;
10735b0581aeSJohannes Doerfert       }
10745b0581aeSJohannes Doerfert       UnknownUse = true;
10755b0581aeSJohannes Doerfert     });
10765b0581aeSJohannes Doerfert 
1077fec1f210SJohannes Doerfert     // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel
1078fec1f210SJohannes Doerfert     // use.
1079fec1f210SJohannes Doerfert     if (!KernelPrepareUse)
10805b0581aeSJohannes Doerfert       continue;
10815b0581aeSJohannes Doerfert 
1082fec1f210SJohannes Doerfert     {
1083fec1f210SJohannes Doerfert       auto Remark = [&](OptimizationRemark OR) {
1084fec1f210SJohannes Doerfert         return OR << "Found a parallel region that is called in a target "
1085fec1f210SJohannes Doerfert                      "region but not part of a combined target construct nor "
1086fec1f210SJohannes Doerfert                      "nesed inside a target construct without intermediate "
1087fec1f210SJohannes Doerfert                      "code. This can lead to excessive register usage for "
1088fec1f210SJohannes Doerfert                      "unrelated target regions in the same translation unit "
1089fec1f210SJohannes Doerfert                      "due to spurious call edges assumed by ptxas.";
1090fec1f210SJohannes Doerfert       };
1091fec1f210SJohannes Doerfert       emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
1092fec1f210SJohannes Doerfert     }
1093fec1f210SJohannes Doerfert 
1094fec1f210SJohannes Doerfert     // If this ever hits, we should investigate.
1095fec1f210SJohannes Doerfert     // TODO: Checking the number of uses is not a necessary restriction and
1096fec1f210SJohannes Doerfert     // should be lifted.
1097fec1f210SJohannes Doerfert     if (UnknownUse || NumDirectCalls != 1 ||
1098fec1f210SJohannes Doerfert         ToBeReplacedStateMachineUses.size() != 2) {
1099fec1f210SJohannes Doerfert       {
1100fec1f210SJohannes Doerfert         auto Remark = [&](OptimizationRemark OR) {
1101fec1f210SJohannes Doerfert           return OR << "Parallel region is used in "
1102fec1f210SJohannes Doerfert                     << (UnknownUse ? "unknown" : "unexpected")
1103fec1f210SJohannes Doerfert                     << " ways; will not attempt to rewrite the state machine.";
1104fec1f210SJohannes Doerfert         };
1105fec1f210SJohannes Doerfert         emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
1106fec1f210SJohannes Doerfert       }
11075b0581aeSJohannes Doerfert       continue;
1108fec1f210SJohannes Doerfert     }
11095b0581aeSJohannes Doerfert 
11105b0581aeSJohannes Doerfert     // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
11115b0581aeSJohannes Doerfert     // up if the function is not called from a unique kernel.
11125b0581aeSJohannes Doerfert     Kernel K = getUniqueKernelFor(*F);
1113fec1f210SJohannes Doerfert     if (!K) {
1114fec1f210SJohannes Doerfert       {
1115fec1f210SJohannes Doerfert         auto Remark = [&](OptimizationRemark OR) {
1116fec1f210SJohannes Doerfert           return OR << "Parallel region is not known to be called from a "
1117fec1f210SJohannes Doerfert                        "unique single target region, maybe the surrounding "
1118fec1f210SJohannes Doerfert                        "function has external linkage?; will not attempt to "
1119fec1f210SJohannes Doerfert                        "rewrite the state machine use.";
1120fec1f210SJohannes Doerfert         };
1121fec1f210SJohannes Doerfert         emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
1122fec1f210SJohannes Doerfert                              Remark);
1123fec1f210SJohannes Doerfert       }
11245b0581aeSJohannes Doerfert       continue;
1125fec1f210SJohannes Doerfert     }
11265b0581aeSJohannes Doerfert 
11275b0581aeSJohannes Doerfert     // We now know F is a parallel body function called only from the kernel K.
11285b0581aeSJohannes Doerfert     // We also identified the state machine uses in which we replace the
11295b0581aeSJohannes Doerfert     // function pointer by a new global symbol for identification purposes. This
11305b0581aeSJohannes Doerfert     // ensures only direct calls to the function are left.
11315b0581aeSJohannes Doerfert 
1132fec1f210SJohannes Doerfert     {
1133fec1f210SJohannes Doerfert       auto RemarkParalleRegion = [&](OptimizationRemark OR) {
1134fec1f210SJohannes Doerfert         return OR << "Specialize parallel region that is only reached from a "
1135fec1f210SJohannes Doerfert                      "single target region to avoid spurious call edges and "
1136fec1f210SJohannes Doerfert                      "excessive register usage in other target regions. "
1137fec1f210SJohannes Doerfert                      "(parallel region ID: "
1138fec1f210SJohannes Doerfert                   << ore::NV("OpenMPParallelRegion", F->getName())
1139fec1f210SJohannes Doerfert                   << ", kernel ID: "
1140fec1f210SJohannes Doerfert                   << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1141fec1f210SJohannes Doerfert       };
1142fec1f210SJohannes Doerfert       emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
1143fec1f210SJohannes Doerfert                            RemarkParalleRegion);
1144fec1f210SJohannes Doerfert       auto RemarkKernel = [&](OptimizationRemark OR) {
1145fec1f210SJohannes Doerfert         return OR << "Target region containing the parallel region that is "
1146fec1f210SJohannes Doerfert                      "specialized. (parallel region ID: "
1147fec1f210SJohannes Doerfert                   << ore::NV("OpenMPParallelRegion", F->getName())
1148fec1f210SJohannes Doerfert                   << ", kernel ID: "
1149fec1f210SJohannes Doerfert                   << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1150fec1f210SJohannes Doerfert       };
1151fec1f210SJohannes Doerfert       emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
1152fec1f210SJohannes Doerfert     }
1153fec1f210SJohannes Doerfert 
11545b0581aeSJohannes Doerfert     Module &M = *F->getParent();
11555b0581aeSJohannes Doerfert     Type *Int8Ty = Type::getInt8Ty(M.getContext());
11565b0581aeSJohannes Doerfert 
11575b0581aeSJohannes Doerfert     auto *ID = new GlobalVariable(
11585b0581aeSJohannes Doerfert         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
11595b0581aeSJohannes Doerfert         UndefValue::get(Int8Ty), F->getName() + ".ID");
11605b0581aeSJohannes Doerfert 
11615b0581aeSJohannes Doerfert     for (Use *U : ToBeReplacedStateMachineUses)
11625b0581aeSJohannes Doerfert       U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
11635b0581aeSJohannes Doerfert 
11645b0581aeSJohannes Doerfert     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
11655b0581aeSJohannes Doerfert 
11665b0581aeSJohannes Doerfert     Changed = true;
11675b0581aeSJohannes Doerfert   }
11685b0581aeSJohannes Doerfert 
11695b0581aeSJohannes Doerfert   return Changed;
11705b0581aeSJohannes Doerfert }
11715b0581aeSJohannes Doerfert 
1172b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
1173b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
1174b8235d2bSsstefan1   using Base = StateWrapper<BooleanState, AbstractAttribute>;
1175b8235d2bSsstefan1   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1176b8235d2bSsstefan1 
1177*5dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
1178*5dfd7cc4Ssstefan1     Function *F = getAnchorScope();
1179*5dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
1180*5dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
1181*5dfd7cc4Ssstefan1   }
1182*5dfd7cc4Ssstefan1 
1183b8235d2bSsstefan1   /// Returns true if value is assumed to be tracked.
1184b8235d2bSsstefan1   bool isAssumedTracked() const { return getAssumed(); }
1185b8235d2bSsstefan1 
1186b8235d2bSsstefan1   /// Returns true if value is known to be tracked.
1187b8235d2bSsstefan1   bool isKnownTracked() const { return getAssumed(); }
1188b8235d2bSsstefan1 
1189b8235d2bSsstefan1   /// Create an abstract attribute biew for the position \p IRP.
1190b8235d2bSsstefan1   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
1191b8235d2bSsstefan1 
1192b8235d2bSsstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
1193*5dfd7cc4Ssstefan1   virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
1194*5dfd7cc4Ssstefan1                                                 const Instruction *I,
1195*5dfd7cc4Ssstefan1                                                 Attributor &A) const {
1196*5dfd7cc4Ssstefan1     return None;
1197*5dfd7cc4Ssstefan1   }
1198*5dfd7cc4Ssstefan1 
1199*5dfd7cc4Ssstefan1   /// Return an assumed unique ICV value if a single candidate is found. If
1200*5dfd7cc4Ssstefan1   /// there cannot be one, return a nullptr. If it is not clear yet, return the
1201*5dfd7cc4Ssstefan1   /// Optional::NoneType.
1202*5dfd7cc4Ssstefan1   virtual Optional<Value *>
1203*5dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
1204*5dfd7cc4Ssstefan1 
1205*5dfd7cc4Ssstefan1   // Currently only nthreads is being tracked.
1206*5dfd7cc4Ssstefan1   // this array will only grow with time.
1207*5dfd7cc4Ssstefan1   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
1208b8235d2bSsstefan1 
1209b8235d2bSsstefan1   /// See AbstractAttribute::getName()
1210b8235d2bSsstefan1   const std::string getName() const override { return "AAICVTracker"; }
1211b8235d2bSsstefan1 
1212233af895SLuofan Chen   /// See AbstractAttribute::getIdAddr()
1213233af895SLuofan Chen   const char *getIdAddr() const override { return &ID; }
1214233af895SLuofan Chen 
1215233af895SLuofan Chen   /// This function should return true if the type of the \p AA is AAICVTracker
1216233af895SLuofan Chen   static bool classof(const AbstractAttribute *AA) {
1217233af895SLuofan Chen     return (AA->getIdAddr() == &ID);
1218233af895SLuofan Chen   }
1219233af895SLuofan Chen 
1220b8235d2bSsstefan1   static const char ID;
1221b8235d2bSsstefan1 };
1222b8235d2bSsstefan1 
1223b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
1224b8235d2bSsstefan1   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
1225b8235d2bSsstefan1       : AAICVTracker(IRP, A) {}
1226b8235d2bSsstefan1 
1227b8235d2bSsstefan1   // FIXME: come up with better string.
1228*5dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerFunction"; }
1229b8235d2bSsstefan1 
1230b8235d2bSsstefan1   // FIXME: come up with some stats.
1231b8235d2bSsstefan1   void trackStatistics() const override {}
1232b8235d2bSsstefan1 
1233*5dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1234b8235d2bSsstefan1   ChangeStatus manifest(Attributor &A) override {
1235*5dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1236b8235d2bSsstefan1   }
1237b8235d2bSsstefan1 
1238b8235d2bSsstefan1   // Map of ICV to their values at specific program point.
1239*5dfd7cc4Ssstefan1   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
1240b8235d2bSsstefan1                   InternalControlVar::ICV___last>
1241*5dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1242b8235d2bSsstefan1 
1243b8235d2bSsstefan1   ChangeStatus updateImpl(Attributor &A) override {
1244b8235d2bSsstefan1     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
1245b8235d2bSsstefan1 
1246b8235d2bSsstefan1     Function *F = getAnchorScope();
1247b8235d2bSsstefan1 
1248b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1249b8235d2bSsstefan1 
1250b8235d2bSsstefan1     for (InternalControlVar ICV : TrackableICVs) {
1251b8235d2bSsstefan1       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1252b8235d2bSsstefan1 
1253*5dfd7cc4Ssstefan1       auto &ValuesMap = ICVReplacementValuesMap[ICV];
1254b8235d2bSsstefan1       auto TrackValues = [&](Use &U, Function &) {
1255b8235d2bSsstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
1256b8235d2bSsstefan1         if (!CI)
1257b8235d2bSsstefan1           return false;
1258b8235d2bSsstefan1 
1259b8235d2bSsstefan1         // FIXME: handle setters with more that 1 arguments.
1260b8235d2bSsstefan1         /// Track new value.
1261*5dfd7cc4Ssstefan1         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
1262b8235d2bSsstefan1           HasChanged = ChangeStatus::CHANGED;
1263b8235d2bSsstefan1 
1264b8235d2bSsstefan1         return false;
1265b8235d2bSsstefan1       };
1266b8235d2bSsstefan1 
1267*5dfd7cc4Ssstefan1       auto CallCheck = [&](Instruction &I) {
1268*5dfd7cc4Ssstefan1         Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
1269*5dfd7cc4Ssstefan1         if (ReplVal.hasValue() &&
1270*5dfd7cc4Ssstefan1             ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
1271*5dfd7cc4Ssstefan1           HasChanged = ChangeStatus::CHANGED;
1272*5dfd7cc4Ssstefan1 
1273*5dfd7cc4Ssstefan1         return true;
1274*5dfd7cc4Ssstefan1       };
1275*5dfd7cc4Ssstefan1 
1276*5dfd7cc4Ssstefan1       // Track all changes of an ICV.
1277b8235d2bSsstefan1       SetterRFI.foreachUse(TrackValues, F);
1278*5dfd7cc4Ssstefan1 
1279*5dfd7cc4Ssstefan1       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
1280*5dfd7cc4Ssstefan1                                 /* CheckBBLivenessOnly */ true);
1281*5dfd7cc4Ssstefan1 
1282*5dfd7cc4Ssstefan1       /// TODO: Figure out a way to avoid adding entry in
1283*5dfd7cc4Ssstefan1       /// ICVReplacementValuesMap
1284*5dfd7cc4Ssstefan1       Instruction *Entry = &F->getEntryBlock().front();
1285*5dfd7cc4Ssstefan1       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
1286*5dfd7cc4Ssstefan1         ValuesMap.insert(std::make_pair(Entry, nullptr));
1287b8235d2bSsstefan1     }
1288b8235d2bSsstefan1 
1289b8235d2bSsstefan1     return HasChanged;
1290b8235d2bSsstefan1   }
1291b8235d2bSsstefan1 
1292*5dfd7cc4Ssstefan1   /// Hepler to check if \p I is a call and get the value for it if it is
1293*5dfd7cc4Ssstefan1   /// unique.
1294*5dfd7cc4Ssstefan1   Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
1295*5dfd7cc4Ssstefan1                                     InternalControlVar &ICV) const {
1296b8235d2bSsstefan1 
1297*5dfd7cc4Ssstefan1     const auto *CB = dyn_cast<CallBase>(I);
1298*5dfd7cc4Ssstefan1     if (!CB)
1299*5dfd7cc4Ssstefan1       return None;
1300*5dfd7cc4Ssstefan1 
1301b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1302b8235d2bSsstefan1     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
1303*5dfd7cc4Ssstefan1     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1304*5dfd7cc4Ssstefan1     Function *CalledFunction = CB->getCalledFunction();
1305b8235d2bSsstefan1 
1306*5dfd7cc4Ssstefan1     if (CalledFunction == GetterRFI.Declaration)
1307*5dfd7cc4Ssstefan1       return None;
1308*5dfd7cc4Ssstefan1     if (CalledFunction == SetterRFI.Declaration) {
1309*5dfd7cc4Ssstefan1       if (ICVReplacementValuesMap[ICV].count(I))
1310*5dfd7cc4Ssstefan1         return ICVReplacementValuesMap[ICV].lookup(I);
1311*5dfd7cc4Ssstefan1 
1312*5dfd7cc4Ssstefan1       return nullptr;
1313*5dfd7cc4Ssstefan1     }
1314*5dfd7cc4Ssstefan1 
1315*5dfd7cc4Ssstefan1     // Since we don't know, assume it changes the ICV.
1316*5dfd7cc4Ssstefan1     if (CalledFunction->isDeclaration())
1317*5dfd7cc4Ssstefan1       return nullptr;
1318*5dfd7cc4Ssstefan1 
1319*5dfd7cc4Ssstefan1     const auto &ICVTrackingAA =
1320*5dfd7cc4Ssstefan1         A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB));
1321*5dfd7cc4Ssstefan1 
1322*5dfd7cc4Ssstefan1     if (ICVTrackingAA.isAssumedTracked())
1323*5dfd7cc4Ssstefan1       return ICVTrackingAA.getUniqueReplacementValue(ICV);
1324*5dfd7cc4Ssstefan1 
1325*5dfd7cc4Ssstefan1     // If we don't know, assume it changes.
1326*5dfd7cc4Ssstefan1     return nullptr;
1327*5dfd7cc4Ssstefan1   }
1328*5dfd7cc4Ssstefan1 
1329*5dfd7cc4Ssstefan1   // We don't check unique value for a function, so return None.
1330*5dfd7cc4Ssstefan1   Optional<Value *>
1331*5dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
1332*5dfd7cc4Ssstefan1     return None;
1333*5dfd7cc4Ssstefan1   }
1334*5dfd7cc4Ssstefan1 
1335*5dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
1336*5dfd7cc4Ssstefan1   Optional<Value *> getReplacementValue(InternalControlVar ICV,
1337*5dfd7cc4Ssstefan1                                         const Instruction *I,
1338*5dfd7cc4Ssstefan1                                         Attributor &A) const override {
1339*5dfd7cc4Ssstefan1     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
1340*5dfd7cc4Ssstefan1     if (ValuesMap.count(I))
1341*5dfd7cc4Ssstefan1       return ValuesMap.lookup(I);
1342*5dfd7cc4Ssstefan1 
1343*5dfd7cc4Ssstefan1     SmallVector<const Instruction *, 16> Worklist;
1344*5dfd7cc4Ssstefan1     SmallPtrSet<const Instruction *, 16> Visited;
1345*5dfd7cc4Ssstefan1     Worklist.push_back(I);
1346*5dfd7cc4Ssstefan1 
1347*5dfd7cc4Ssstefan1     Optional<Value *> ReplVal;
1348*5dfd7cc4Ssstefan1 
1349*5dfd7cc4Ssstefan1     while (!Worklist.empty()) {
1350*5dfd7cc4Ssstefan1       const Instruction *CurrInst = Worklist.pop_back_val();
1351*5dfd7cc4Ssstefan1       if (!Visited.insert(CurrInst).second)
1352b8235d2bSsstefan1         continue;
1353b8235d2bSsstefan1 
1354*5dfd7cc4Ssstefan1       const BasicBlock *CurrBB = CurrInst->getParent();
1355*5dfd7cc4Ssstefan1 
1356*5dfd7cc4Ssstefan1       // Go up and look for all potential setters/calls that might change the
1357*5dfd7cc4Ssstefan1       // ICV.
1358*5dfd7cc4Ssstefan1       while ((CurrInst = CurrInst->getPrevNode())) {
1359*5dfd7cc4Ssstefan1         if (ValuesMap.count(CurrInst)) {
1360*5dfd7cc4Ssstefan1           Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
1361*5dfd7cc4Ssstefan1           // Unknown value, track new.
1362*5dfd7cc4Ssstefan1           if (!ReplVal.hasValue()) {
1363*5dfd7cc4Ssstefan1             ReplVal = NewReplVal;
1364*5dfd7cc4Ssstefan1             break;
1365*5dfd7cc4Ssstefan1           }
1366*5dfd7cc4Ssstefan1 
1367*5dfd7cc4Ssstefan1           // If we found a new value, we can't know the icv value anymore.
1368*5dfd7cc4Ssstefan1           if (NewReplVal.hasValue())
1369*5dfd7cc4Ssstefan1             if (ReplVal != NewReplVal)
1370b8235d2bSsstefan1               return nullptr;
1371b8235d2bSsstefan1 
1372*5dfd7cc4Ssstefan1           break;
1373b8235d2bSsstefan1         }
1374b8235d2bSsstefan1 
1375*5dfd7cc4Ssstefan1         Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
1376*5dfd7cc4Ssstefan1         if (!NewReplVal.hasValue())
1377*5dfd7cc4Ssstefan1           continue;
1378*5dfd7cc4Ssstefan1 
1379*5dfd7cc4Ssstefan1         // Unknown value, track new.
1380*5dfd7cc4Ssstefan1         if (!ReplVal.hasValue()) {
1381*5dfd7cc4Ssstefan1           ReplVal = NewReplVal;
1382*5dfd7cc4Ssstefan1           break;
1383b8235d2bSsstefan1         }
1384b8235d2bSsstefan1 
1385*5dfd7cc4Ssstefan1         // if (NewReplVal.hasValue())
1386*5dfd7cc4Ssstefan1         // We found a new value, we can't know the icv value anymore.
1387*5dfd7cc4Ssstefan1         if (ReplVal != NewReplVal)
1388b8235d2bSsstefan1           return nullptr;
1389b8235d2bSsstefan1       }
1390*5dfd7cc4Ssstefan1 
1391*5dfd7cc4Ssstefan1       // If we are in the same BB and we have a value, we are done.
1392*5dfd7cc4Ssstefan1       if (CurrBB == I->getParent() && ReplVal.hasValue())
1393*5dfd7cc4Ssstefan1         return ReplVal;
1394*5dfd7cc4Ssstefan1 
1395*5dfd7cc4Ssstefan1       // Go through all predecessors and add terminators for analysis.
1396*5dfd7cc4Ssstefan1       for (const BasicBlock *Pred : predecessors(CurrBB))
1397*5dfd7cc4Ssstefan1         if (const Instruction *Terminator = Pred->getTerminator())
1398*5dfd7cc4Ssstefan1           Worklist.push_back(Terminator);
1399*5dfd7cc4Ssstefan1     }
1400*5dfd7cc4Ssstefan1 
1401*5dfd7cc4Ssstefan1     return ReplVal;
1402*5dfd7cc4Ssstefan1   }
1403*5dfd7cc4Ssstefan1 };
1404*5dfd7cc4Ssstefan1 
1405*5dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
1406*5dfd7cc4Ssstefan1   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
1407*5dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
1408*5dfd7cc4Ssstefan1 
1409*5dfd7cc4Ssstefan1   // FIXME: come up with better string.
1410*5dfd7cc4Ssstefan1   const std::string getAsStr() const override {
1411*5dfd7cc4Ssstefan1     return "ICVTrackerFunctionReturned";
1412*5dfd7cc4Ssstefan1   }
1413*5dfd7cc4Ssstefan1 
1414*5dfd7cc4Ssstefan1   // FIXME: come up with some stats.
1415*5dfd7cc4Ssstefan1   void trackStatistics() const override {}
1416*5dfd7cc4Ssstefan1 
1417*5dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1418*5dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
1419*5dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1420*5dfd7cc4Ssstefan1   }
1421*5dfd7cc4Ssstefan1 
1422*5dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
1423*5dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
1424*5dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
1425*5dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1426*5dfd7cc4Ssstefan1 
1427*5dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
1428*5dfd7cc4Ssstefan1   Optional<Value *>
1429*5dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
1430*5dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
1431*5dfd7cc4Ssstefan1   }
1432*5dfd7cc4Ssstefan1 
1433*5dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
1434*5dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
1435*5dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
1436*5dfd7cc4Ssstefan1         *this, IRPosition::function(*getAnchorScope()));
1437*5dfd7cc4Ssstefan1 
1438*5dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
1439*5dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
1440*5dfd7cc4Ssstefan1 
1441*5dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
1442*5dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
1443*5dfd7cc4Ssstefan1       Optional<Value *> UniqueICVValue;
1444*5dfd7cc4Ssstefan1 
1445*5dfd7cc4Ssstefan1       auto CheckReturnInst = [&](Instruction &I) {
1446*5dfd7cc4Ssstefan1         Optional<Value *> NewReplVal =
1447*5dfd7cc4Ssstefan1             ICVTrackingAA.getReplacementValue(ICV, &I, A);
1448*5dfd7cc4Ssstefan1 
1449*5dfd7cc4Ssstefan1         // If we found a second ICV value there is no unique returned value.
1450*5dfd7cc4Ssstefan1         if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
1451*5dfd7cc4Ssstefan1           return false;
1452*5dfd7cc4Ssstefan1 
1453*5dfd7cc4Ssstefan1         UniqueICVValue = NewReplVal;
1454*5dfd7cc4Ssstefan1 
1455*5dfd7cc4Ssstefan1         return true;
1456*5dfd7cc4Ssstefan1       };
1457*5dfd7cc4Ssstefan1 
1458*5dfd7cc4Ssstefan1       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
1459*5dfd7cc4Ssstefan1                                      /* CheckBBLivenessOnly */ true))
1460*5dfd7cc4Ssstefan1         UniqueICVValue = nullptr;
1461*5dfd7cc4Ssstefan1 
1462*5dfd7cc4Ssstefan1       if (UniqueICVValue == ReplVal)
1463*5dfd7cc4Ssstefan1         continue;
1464*5dfd7cc4Ssstefan1 
1465*5dfd7cc4Ssstefan1       ReplVal = UniqueICVValue;
1466*5dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
1467*5dfd7cc4Ssstefan1     }
1468*5dfd7cc4Ssstefan1 
1469*5dfd7cc4Ssstefan1     return Changed;
1470*5dfd7cc4Ssstefan1   }
1471*5dfd7cc4Ssstefan1 };
1472*5dfd7cc4Ssstefan1 
1473*5dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
1474*5dfd7cc4Ssstefan1   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
1475*5dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
1476*5dfd7cc4Ssstefan1 
1477*5dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
1478*5dfd7cc4Ssstefan1     Function *F = getAnchorScope();
1479*5dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
1480*5dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
1481*5dfd7cc4Ssstefan1 
1482*5dfd7cc4Ssstefan1     // We only initialize this AA for getters, so we need to know which ICV it
1483*5dfd7cc4Ssstefan1     // gets.
1484*5dfd7cc4Ssstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1485*5dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
1486*5dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[ICV];
1487*5dfd7cc4Ssstefan1       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
1488*5dfd7cc4Ssstefan1       if (Getter.Declaration == getAssociatedFunction()) {
1489*5dfd7cc4Ssstefan1         AssociatedICV = ICVInfo.Kind;
1490*5dfd7cc4Ssstefan1         return;
1491*5dfd7cc4Ssstefan1       }
1492*5dfd7cc4Ssstefan1     }
1493*5dfd7cc4Ssstefan1 
1494*5dfd7cc4Ssstefan1     /// Unknown ICV.
1495*5dfd7cc4Ssstefan1     indicatePessimisticFixpoint();
1496*5dfd7cc4Ssstefan1   }
1497*5dfd7cc4Ssstefan1 
1498*5dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
1499*5dfd7cc4Ssstefan1     if (!ReplVal.hasValue() || !ReplVal.getValue())
1500*5dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
1501*5dfd7cc4Ssstefan1 
1502*5dfd7cc4Ssstefan1     A.changeValueAfterManifest(*getCtxI(), **ReplVal);
1503*5dfd7cc4Ssstefan1     A.deleteAfterManifest(*getCtxI());
1504*5dfd7cc4Ssstefan1 
1505*5dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
1506*5dfd7cc4Ssstefan1   }
1507*5dfd7cc4Ssstefan1 
1508*5dfd7cc4Ssstefan1   // FIXME: come up with better string.
1509*5dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
1510*5dfd7cc4Ssstefan1 
1511*5dfd7cc4Ssstefan1   // FIXME: come up with some stats.
1512*5dfd7cc4Ssstefan1   void trackStatistics() const override {}
1513*5dfd7cc4Ssstefan1 
1514*5dfd7cc4Ssstefan1   InternalControlVar AssociatedICV;
1515*5dfd7cc4Ssstefan1   Optional<Value *> ReplVal;
1516*5dfd7cc4Ssstefan1 
1517*5dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
1518*5dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
1519*5dfd7cc4Ssstefan1         *this, IRPosition::function(*getAnchorScope()));
1520*5dfd7cc4Ssstefan1 
1521*5dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
1522*5dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
1523*5dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
1524*5dfd7cc4Ssstefan1 
1525*5dfd7cc4Ssstefan1     Optional<Value *> NewReplVal =
1526*5dfd7cc4Ssstefan1         ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
1527*5dfd7cc4Ssstefan1 
1528*5dfd7cc4Ssstefan1     if (ReplVal == NewReplVal)
1529*5dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
1530*5dfd7cc4Ssstefan1 
1531*5dfd7cc4Ssstefan1     ReplVal = NewReplVal;
1532*5dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
1533*5dfd7cc4Ssstefan1   }
1534*5dfd7cc4Ssstefan1 
1535*5dfd7cc4Ssstefan1   // Return the value with which associated value can be replaced for specific
1536*5dfd7cc4Ssstefan1   // \p ICV.
1537*5dfd7cc4Ssstefan1   Optional<Value *>
1538*5dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
1539*5dfd7cc4Ssstefan1     return ReplVal;
1540*5dfd7cc4Ssstefan1   }
1541*5dfd7cc4Ssstefan1 };
1542*5dfd7cc4Ssstefan1 
1543*5dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
1544*5dfd7cc4Ssstefan1   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
1545*5dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
1546*5dfd7cc4Ssstefan1 
1547*5dfd7cc4Ssstefan1   // FIXME: come up with better string.
1548*5dfd7cc4Ssstefan1   const std::string getAsStr() const override {
1549*5dfd7cc4Ssstefan1     return "ICVTrackerCallSiteReturned";
1550*5dfd7cc4Ssstefan1   }
1551*5dfd7cc4Ssstefan1 
1552*5dfd7cc4Ssstefan1   // FIXME: come up with some stats.
1553*5dfd7cc4Ssstefan1   void trackStatistics() const override {}
1554*5dfd7cc4Ssstefan1 
1555*5dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1556*5dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
1557*5dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1558*5dfd7cc4Ssstefan1   }
1559*5dfd7cc4Ssstefan1 
1560*5dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
1561*5dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
1562*5dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
1563*5dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1564*5dfd7cc4Ssstefan1 
1565*5dfd7cc4Ssstefan1   /// Return the value with which associated value can be replaced for specific
1566*5dfd7cc4Ssstefan1   /// \p ICV.
1567*5dfd7cc4Ssstefan1   Optional<Value *>
1568*5dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
1569*5dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
1570*5dfd7cc4Ssstefan1   }
1571*5dfd7cc4Ssstefan1 
1572*5dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
1573*5dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
1574*5dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
1575*5dfd7cc4Ssstefan1         *this, IRPosition::returned(*getAssociatedFunction()));
1576*5dfd7cc4Ssstefan1 
1577*5dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
1578*5dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
1579*5dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
1580*5dfd7cc4Ssstefan1 
1581*5dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
1582*5dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
1583*5dfd7cc4Ssstefan1       Optional<Value *> NewReplVal =
1584*5dfd7cc4Ssstefan1           ICVTrackingAA.getUniqueReplacementValue(ICV);
1585*5dfd7cc4Ssstefan1 
1586*5dfd7cc4Ssstefan1       if (ReplVal == NewReplVal)
1587*5dfd7cc4Ssstefan1         continue;
1588*5dfd7cc4Ssstefan1 
1589*5dfd7cc4Ssstefan1       ReplVal = NewReplVal;
1590*5dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
1591*5dfd7cc4Ssstefan1     }
1592*5dfd7cc4Ssstefan1     return Changed;
1593*5dfd7cc4Ssstefan1   }
15949548b74aSJohannes Doerfert };
15959548b74aSJohannes Doerfert } // namespace
15969548b74aSJohannes Doerfert 
1597b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
1598b8235d2bSsstefan1 
1599b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
1600b8235d2bSsstefan1                                               Attributor &A) {
1601b8235d2bSsstefan1   AAICVTracker *AA = nullptr;
1602b8235d2bSsstefan1   switch (IRP.getPositionKind()) {
1603b8235d2bSsstefan1   case IRPosition::IRP_INVALID:
1604b8235d2bSsstefan1   case IRPosition::IRP_FLOAT:
1605b8235d2bSsstefan1   case IRPosition::IRP_ARGUMENT:
1606b8235d2bSsstefan1   case IRPosition::IRP_CALL_SITE_ARGUMENT:
16071de70a72SJohannes Doerfert     llvm_unreachable("ICVTracker can only be created for function position!");
1608*5dfd7cc4Ssstefan1   case IRPosition::IRP_RETURNED:
1609*5dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
1610*5dfd7cc4Ssstefan1     break;
1611*5dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE_RETURNED:
1612*5dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
1613*5dfd7cc4Ssstefan1     break;
1614*5dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE:
1615*5dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
1616*5dfd7cc4Ssstefan1     break;
1617b8235d2bSsstefan1   case IRPosition::IRP_FUNCTION:
1618b8235d2bSsstefan1     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
1619b8235d2bSsstefan1     break;
1620b8235d2bSsstefan1   }
1621b8235d2bSsstefan1 
1622b8235d2bSsstefan1   return *AA;
1623b8235d2bSsstefan1 }
1624b8235d2bSsstefan1 
16259548b74aSJohannes Doerfert PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
16269548b74aSJohannes Doerfert                                      CGSCCAnalysisManager &AM,
16279548b74aSJohannes Doerfert                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
16289548b74aSJohannes Doerfert   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
16299548b74aSJohannes Doerfert     return PreservedAnalyses::all();
16309548b74aSJohannes Doerfert 
16319548b74aSJohannes Doerfert   if (DisableOpenMPOptimizations)
16329548b74aSJohannes Doerfert     return PreservedAnalyses::all();
16339548b74aSJohannes Doerfert 
1634ee17263aSJohannes Doerfert   SmallVector<Function *, 16> SCC;
1635351d234dSRoman Lebedev   // If there are kernels in the module, we have to run on all SCC's.
1636351d234dSRoman Lebedev   bool SCCIsInteresting = !OMPInModule.getKernels().empty();
1637351d234dSRoman Lebedev   for (LazyCallGraph::Node &N : C) {
1638351d234dSRoman Lebedev     Function *Fn = &N.getFunction();
1639351d234dSRoman Lebedev     SCC.push_back(Fn);
16409548b74aSJohannes Doerfert 
1641351d234dSRoman Lebedev     // Do we already know that the SCC contains kernels,
1642351d234dSRoman Lebedev     // or that OpenMP functions are called from this SCC?
1643351d234dSRoman Lebedev     if (SCCIsInteresting)
1644351d234dSRoman Lebedev       continue;
1645351d234dSRoman Lebedev     // If not, let's check that.
1646351d234dSRoman Lebedev     SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
1647351d234dSRoman Lebedev   }
1648351d234dSRoman Lebedev 
1649351d234dSRoman Lebedev   if (!SCCIsInteresting || SCC.empty())
16509548b74aSJohannes Doerfert     return PreservedAnalyses::all();
16519548b74aSJohannes Doerfert 
16524d4ea9acSHuber, Joseph   FunctionAnalysisManager &FAM =
16534d4ea9acSHuber, Joseph       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
16547cfd267cSsstefan1 
16557cfd267cSsstefan1   AnalysisGetter AG(FAM);
16567cfd267cSsstefan1 
16577cfd267cSsstefan1   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
16584d4ea9acSHuber, Joseph     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
16594d4ea9acSHuber, Joseph   };
16604d4ea9acSHuber, Joseph 
16619548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
16629548b74aSJohannes Doerfert   CGUpdater.initialize(CG, C, AM, UR);
16637cfd267cSsstefan1 
16647cfd267cSsstefan1   SetVector<Function *> Functions(SCC.begin(), SCC.end());
16657cfd267cSsstefan1   BumpPtrAllocator Allocator;
16667cfd267cSsstefan1   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
1667624d34afSJohannes Doerfert                                 /*CGSCC*/ Functions, OMPInModule.getKernels());
16687cfd267cSsstefan1 
16691de70a72SJohannes Doerfert   Attributor A(Functions, InfoCache, CGUpdater);
1670b8235d2bSsstefan1 
1671b8235d2bSsstefan1   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
16729548b74aSJohannes Doerfert   bool Changed = OMPOpt.run();
1673694ded37SGiorgis Georgakoudis   if (Changed)
1674694ded37SGiorgis Georgakoudis     return PreservedAnalyses::none();
1675694ded37SGiorgis Georgakoudis 
16769548b74aSJohannes Doerfert   return PreservedAnalyses::all();
16779548b74aSJohannes Doerfert }
16789548b74aSJohannes Doerfert 
16799548b74aSJohannes Doerfert namespace {
16809548b74aSJohannes Doerfert 
16819548b74aSJohannes Doerfert struct OpenMPOptLegacyPass : public CallGraphSCCPass {
16829548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
16839548b74aSJohannes Doerfert   OpenMPInModule OMPInModule;
16849548b74aSJohannes Doerfert   static char ID;
16859548b74aSJohannes Doerfert 
16869548b74aSJohannes Doerfert   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
16879548b74aSJohannes Doerfert     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
16889548b74aSJohannes Doerfert   }
16899548b74aSJohannes Doerfert 
16909548b74aSJohannes Doerfert   void getAnalysisUsage(AnalysisUsage &AU) const override {
16919548b74aSJohannes Doerfert     CallGraphSCCPass::getAnalysisUsage(AU);
16929548b74aSJohannes Doerfert   }
16939548b74aSJohannes Doerfert 
16949548b74aSJohannes Doerfert   bool doInitialization(CallGraph &CG) override {
16959548b74aSJohannes Doerfert     // Disable the pass if there is no OpenMP (runtime call) in the module.
16969548b74aSJohannes Doerfert     containsOpenMP(CG.getModule(), OMPInModule);
16979548b74aSJohannes Doerfert     return false;
16989548b74aSJohannes Doerfert   }
16999548b74aSJohannes Doerfert 
17009548b74aSJohannes Doerfert   bool runOnSCC(CallGraphSCC &CGSCC) override {
17019548b74aSJohannes Doerfert     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
17029548b74aSJohannes Doerfert       return false;
17039548b74aSJohannes Doerfert     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
17049548b74aSJohannes Doerfert       return false;
17059548b74aSJohannes Doerfert 
1706ee17263aSJohannes Doerfert     SmallVector<Function *, 16> SCC;
1707351d234dSRoman Lebedev     // If there are kernels in the module, we have to run on all SCC's.
1708351d234dSRoman Lebedev     bool SCCIsInteresting = !OMPInModule.getKernels().empty();
1709351d234dSRoman Lebedev     for (CallGraphNode *CGN : CGSCC) {
1710351d234dSRoman Lebedev       Function *Fn = CGN->getFunction();
1711351d234dSRoman Lebedev       if (!Fn || Fn->isDeclaration())
1712351d234dSRoman Lebedev         continue;
1713ee17263aSJohannes Doerfert       SCC.push_back(Fn);
17149548b74aSJohannes Doerfert 
1715351d234dSRoman Lebedev       // Do we already know that the SCC contains kernels,
1716351d234dSRoman Lebedev       // or that OpenMP functions are called from this SCC?
1717351d234dSRoman Lebedev       if (SCCIsInteresting)
1718351d234dSRoman Lebedev         continue;
1719351d234dSRoman Lebedev       // If not, let's check that.
1720351d234dSRoman Lebedev       SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
1721351d234dSRoman Lebedev     }
1722351d234dSRoman Lebedev 
1723351d234dSRoman Lebedev     if (!SCCIsInteresting || SCC.empty())
17249548b74aSJohannes Doerfert       return false;
17259548b74aSJohannes Doerfert 
17269548b74aSJohannes Doerfert     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
17279548b74aSJohannes Doerfert     CGUpdater.initialize(CG, CGSCC);
17289548b74aSJohannes Doerfert 
17294d4ea9acSHuber, Joseph     // Maintain a map of functions to avoid rebuilding the ORE
17304d4ea9acSHuber, Joseph     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
17314d4ea9acSHuber, Joseph     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
17324d4ea9acSHuber, Joseph       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
17334d4ea9acSHuber, Joseph       if (!ORE)
17344d4ea9acSHuber, Joseph         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
17354d4ea9acSHuber, Joseph       return *ORE;
17364d4ea9acSHuber, Joseph     };
17374d4ea9acSHuber, Joseph 
17387cfd267cSsstefan1     AnalysisGetter AG;
17397cfd267cSsstefan1     SetVector<Function *> Functions(SCC.begin(), SCC.end());
17407cfd267cSsstefan1     BumpPtrAllocator Allocator;
1741e8039ad4SJohannes Doerfert     OMPInformationCache InfoCache(
1742e8039ad4SJohannes Doerfert         *(Functions.back()->getParent()), AG, Allocator,
1743624d34afSJohannes Doerfert         /*CGSCC*/ Functions, OMPInModule.getKernels());
17447cfd267cSsstefan1 
17451de70a72SJohannes Doerfert     Attributor A(Functions, InfoCache, CGUpdater);
1746b8235d2bSsstefan1 
1747b8235d2bSsstefan1     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
17489548b74aSJohannes Doerfert     return OMPOpt.run();
17499548b74aSJohannes Doerfert   }
17509548b74aSJohannes Doerfert 
17519548b74aSJohannes Doerfert   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
17529548b74aSJohannes Doerfert };
17539548b74aSJohannes Doerfert 
17549548b74aSJohannes Doerfert } // end anonymous namespace
17559548b74aSJohannes Doerfert 
1756e8039ad4SJohannes Doerfert void OpenMPInModule::identifyKernels(Module &M) {
1757e8039ad4SJohannes Doerfert 
1758e8039ad4SJohannes Doerfert   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
1759e8039ad4SJohannes Doerfert   if (!MD)
1760e8039ad4SJohannes Doerfert     return;
1761e8039ad4SJohannes Doerfert 
1762e8039ad4SJohannes Doerfert   for (auto *Op : MD->operands()) {
1763e8039ad4SJohannes Doerfert     if (Op->getNumOperands() < 2)
1764e8039ad4SJohannes Doerfert       continue;
1765e8039ad4SJohannes Doerfert     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
1766e8039ad4SJohannes Doerfert     if (!KindID || KindID->getString() != "kernel")
1767e8039ad4SJohannes Doerfert       continue;
1768e8039ad4SJohannes Doerfert 
1769e8039ad4SJohannes Doerfert     Function *KernelFn =
1770e8039ad4SJohannes Doerfert         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
1771e8039ad4SJohannes Doerfert     if (!KernelFn)
1772e8039ad4SJohannes Doerfert       continue;
1773e8039ad4SJohannes Doerfert 
1774e8039ad4SJohannes Doerfert     ++NumOpenMPTargetRegionKernels;
1775e8039ad4SJohannes Doerfert 
1776e8039ad4SJohannes Doerfert     Kernels.insert(KernelFn);
1777e8039ad4SJohannes Doerfert   }
1778e8039ad4SJohannes Doerfert }
1779e8039ad4SJohannes Doerfert 
17809548b74aSJohannes Doerfert bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
17819548b74aSJohannes Doerfert   if (OMPInModule.isKnown())
17829548b74aSJohannes Doerfert     return OMPInModule;
1783dce6bc18SJohannes Doerfert 
1784351d234dSRoman Lebedev   auto RecordFunctionsContainingUsesOf = [&](Function *F) {
1785351d234dSRoman Lebedev     for (User *U : F->users())
1786351d234dSRoman Lebedev       if (auto *I = dyn_cast<Instruction>(U))
1787351d234dSRoman Lebedev         OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
1788351d234dSRoman Lebedev   };
1789351d234dSRoman Lebedev 
1790dce6bc18SJohannes Doerfert   // MSVC doesn't like long if-else chains for some reason and instead just
1791dce6bc18SJohannes Doerfert   // issues an error. Work around it..
1792dce6bc18SJohannes Doerfert   do {
17939548b74aSJohannes Doerfert #define OMP_RTL(_Enum, _Name, ...)                                             \
1794351d234dSRoman Lebedev   if (Function *F = M.getFunction(_Name)) {                                    \
1795351d234dSRoman Lebedev     RecordFunctionsContainingUsesOf(F);                                        \
1796dce6bc18SJohannes Doerfert     OMPInModule = true;                                                        \
1797dce6bc18SJohannes Doerfert   }
17989548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPKinds.def"
1799dce6bc18SJohannes Doerfert   } while (false);
1800e8039ad4SJohannes Doerfert 
1801e8039ad4SJohannes Doerfert   // Identify kernels once. TODO: We should split the OMPInformationCache into a
1802e8039ad4SJohannes Doerfert   // module and an SCC part. The kernel information, among other things, could
1803e8039ad4SJohannes Doerfert   // go into the module part.
1804e8039ad4SJohannes Doerfert   if (OMPInModule.isKnown() && OMPInModule) {
1805e8039ad4SJohannes Doerfert     OMPInModule.identifyKernels(M);
1806e8039ad4SJohannes Doerfert     return true;
1807e8039ad4SJohannes Doerfert   }
1808e8039ad4SJohannes Doerfert 
18099548b74aSJohannes Doerfert   return OMPInModule = false;
18109548b74aSJohannes Doerfert }
18119548b74aSJohannes Doerfert 
18129548b74aSJohannes Doerfert char OpenMPOptLegacyPass::ID = 0;
18139548b74aSJohannes Doerfert 
18149548b74aSJohannes Doerfert INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
18159548b74aSJohannes Doerfert                       "OpenMP specific optimizations", false, false)
18169548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
18179548b74aSJohannes Doerfert INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
18189548b74aSJohannes Doerfert                     "OpenMP specific optimizations", false, false)
18199548b74aSJohannes Doerfert 
18209548b74aSJohannes Doerfert Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
1821