19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
129548b74aSJohannes Doerfert //
139548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
149548b74aSJohannes Doerfert 
159548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
169548b74aSJohannes Doerfert 
179548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
1818283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h"
199548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
209548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
219548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
224d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
233a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h"
249548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
25e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
2668abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h"
2768abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsAMDGPU.h"
2868abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsNVPTX.h"
296fc51c9fSJoseph Huber #include "llvm/IR/PatternMatch.h"
309548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
319548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
329548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
337cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
343a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h"
359548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
3697517055SGiorgis Georgakoudis #include "llvm/Transforms/Utils/CodeExtractor.h"
379548b74aSJohannes Doerfert 
386fc51c9fSJoseph Huber using namespace llvm::PatternMatch;
399548b74aSJohannes Doerfert using namespace llvm;
409548b74aSJohannes Doerfert using namespace omp;
419548b74aSJohannes Doerfert 
429548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
439548b74aSJohannes Doerfert 
449548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
459548b74aSJohannes Doerfert     "openmp-opt-disable", cl::ZeroOrMore,
469548b74aSJohannes Doerfert     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
479548b74aSJohannes Doerfert     cl::init(false));
489548b74aSJohannes Doerfert 
493a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging(
503a6bfcf2SGiorgis Georgakoudis     "openmp-opt-enable-merging", cl::ZeroOrMore,
513a6bfcf2SGiorgis Georgakoudis     cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
523a6bfcf2SGiorgis Georgakoudis     cl::init(false));
533a6bfcf2SGiorgis Georgakoudis 
540f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
550f426935Ssstefan1                                     cl::Hidden);
56e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
57e8039ad4SJohannes Doerfert                                         cl::init(false), cl::Hidden);
580f426935Ssstefan1 
59496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
60496f8e5bSHamilton Tobon Mosquera     "openmp-hide-memory-transfer-latency",
61496f8e5bSHamilton Tobon Mosquera     cl::desc("[WIP] Tries to hide the latency of host to device memory"
62496f8e5bSHamilton Tobon Mosquera              " transfers"),
63496f8e5bSHamilton Tobon Mosquera     cl::Hidden, cl::init(false));
64496f8e5bSHamilton Tobon Mosquera 
659548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
669548b74aSJohannes Doerfert           "Number of OpenMP runtime calls deduplicated");
6755eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
6855eb714aSRoman Lebedev           "Number of OpenMP parallel regions deleted");
699548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
709548b74aSJohannes Doerfert           "Number of OpenMP runtime functions identified");
719548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
729548b74aSJohannes Doerfert           "Number of OpenMP runtime function uses identified");
73e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
74e8039ad4SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) identified");
755b0581aeSJohannes Doerfert STATISTIC(
765b0581aeSJohannes Doerfert     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
775b0581aeSJohannes Doerfert     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
783a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged,
793a6bfcf2SGiorgis Georgakoudis           "Number of OpenMP parallel regions merged");
806fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory,
816fc51c9fSJoseph Huber           "Amount of memory pushed to shared memory");
829548b74aSJohannes Doerfert 
83263c4a3cSrathod-sahaab #if !defined(NDEBUG)
849548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
85a50c0b0dSMikael Holmen #endif
869548b74aSJohannes Doerfert 
879548b74aSJohannes Doerfert namespace {
889548b74aSJohannes Doerfert 
896fc51c9fSJoseph Huber enum class AddressSpace : unsigned {
906fc51c9fSJoseph Huber   Generic = 0,
916fc51c9fSJoseph Huber   Global = 1,
926fc51c9fSJoseph Huber   Shared = 3,
936fc51c9fSJoseph Huber   Constant = 4,
946fc51c9fSJoseph Huber   Local = 5,
956fc51c9fSJoseph Huber };
966fc51c9fSJoseph Huber 
976fc51c9fSJoseph Huber struct AAHeapToShared;
986fc51c9fSJoseph Huber 
99b8235d2bSsstefan1 struct AAICVTracker;
100b8235d2bSsstefan1 
1017cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
1027cfd267cSsstefan1 /// Attributor runs.
1037cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
1047cfd267cSsstefan1   OMPInformationCache(Module &M, AnalysisGetter &AG,
105624d34afSJohannes Doerfert                       BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
106e8039ad4SJohannes Doerfert                       SmallPtrSetImpl<Kernel> &Kernels)
107624d34afSJohannes Doerfert       : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
108624d34afSJohannes Doerfert         Kernels(Kernels) {
109624d34afSJohannes Doerfert 
11061238d26Ssstefan1     OMPBuilder.initialize();
1119548b74aSJohannes Doerfert     initializeRuntimeFunctions();
1120f426935Ssstefan1     initializeInternalControlVars();
1139548b74aSJohannes Doerfert   }
1149548b74aSJohannes Doerfert 
1150f426935Ssstefan1   /// Generic information that describes an internal control variable.
1160f426935Ssstefan1   struct InternalControlVarInfo {
1170f426935Ssstefan1     /// The kind, as described by InternalControlVar enum.
1180f426935Ssstefan1     InternalControlVar Kind;
1190f426935Ssstefan1 
1200f426935Ssstefan1     /// The name of the ICV.
1210f426935Ssstefan1     StringRef Name;
1220f426935Ssstefan1 
1230f426935Ssstefan1     /// Environment variable associated with this ICV.
1240f426935Ssstefan1     StringRef EnvVarName;
1250f426935Ssstefan1 
1260f426935Ssstefan1     /// Initial value kind.
1270f426935Ssstefan1     ICVInitValue InitKind;
1280f426935Ssstefan1 
1290f426935Ssstefan1     /// Initial value.
1300f426935Ssstefan1     ConstantInt *InitValue;
1310f426935Ssstefan1 
1320f426935Ssstefan1     /// Setter RTL function associated with this ICV.
1330f426935Ssstefan1     RuntimeFunction Setter;
1340f426935Ssstefan1 
1350f426935Ssstefan1     /// Getter RTL function associated with this ICV.
1360f426935Ssstefan1     RuntimeFunction Getter;
1370f426935Ssstefan1 
1380f426935Ssstefan1     /// RTL Function corresponding to the override clause of this ICV
1390f426935Ssstefan1     RuntimeFunction Clause;
1400f426935Ssstefan1   };
1410f426935Ssstefan1 
1429548b74aSJohannes Doerfert   /// Generic information that describes a runtime function
1439548b74aSJohannes Doerfert   struct RuntimeFunctionInfo {
1448855fec3SJohannes Doerfert 
1459548b74aSJohannes Doerfert     /// The kind, as described by the RuntimeFunction enum.
1469548b74aSJohannes Doerfert     RuntimeFunction Kind;
1479548b74aSJohannes Doerfert 
1489548b74aSJohannes Doerfert     /// The name of the function.
1499548b74aSJohannes Doerfert     StringRef Name;
1509548b74aSJohannes Doerfert 
1519548b74aSJohannes Doerfert     /// Flag to indicate a variadic function.
1529548b74aSJohannes Doerfert     bool IsVarArg;
1539548b74aSJohannes Doerfert 
1549548b74aSJohannes Doerfert     /// The return type of the function.
1559548b74aSJohannes Doerfert     Type *ReturnType;
1569548b74aSJohannes Doerfert 
1579548b74aSJohannes Doerfert     /// The argument types of the function.
1589548b74aSJohannes Doerfert     SmallVector<Type *, 8> ArgumentTypes;
1599548b74aSJohannes Doerfert 
1609548b74aSJohannes Doerfert     /// The declaration if available.
161f09f4b26SJohannes Doerfert     Function *Declaration = nullptr;
1629548b74aSJohannes Doerfert 
1639548b74aSJohannes Doerfert     /// Uses of this runtime function per function containing the use.
1648855fec3SJohannes Doerfert     using UseVector = SmallVector<Use *, 16>;
1658855fec3SJohannes Doerfert 
166b8235d2bSsstefan1     /// Clear UsesMap for runtime function.
167b8235d2bSsstefan1     void clearUsesMap() { UsesMap.clear(); }
168b8235d2bSsstefan1 
16954bd3751SJohannes Doerfert     /// Boolean conversion that is true if the runtime function was found.
17054bd3751SJohannes Doerfert     operator bool() const { return Declaration; }
17154bd3751SJohannes Doerfert 
1728855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F.
1738855fec3SJohannes Doerfert     UseVector &getOrCreateUseVector(Function *F) {
174b8235d2bSsstefan1       std::shared_ptr<UseVector> &UV = UsesMap[F];
1758855fec3SJohannes Doerfert       if (!UV)
176b8235d2bSsstefan1         UV = std::make_shared<UseVector>();
1778855fec3SJohannes Doerfert       return *UV;
1788855fec3SJohannes Doerfert     }
1798855fec3SJohannes Doerfert 
1808855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F or `nullptr` if there are
1818855fec3SJohannes Doerfert     /// none.
1828855fec3SJohannes Doerfert     const UseVector *getUseVector(Function &F) const {
18395e57072SDavid Blaikie       auto I = UsesMap.find(&F);
18495e57072SDavid Blaikie       if (I != UsesMap.end())
18595e57072SDavid Blaikie         return I->second.get();
18695e57072SDavid Blaikie       return nullptr;
1878855fec3SJohannes Doerfert     }
1888855fec3SJohannes Doerfert 
1898855fec3SJohannes Doerfert     /// Return how many functions contain uses of this runtime function.
1908855fec3SJohannes Doerfert     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
1919548b74aSJohannes Doerfert 
1929548b74aSJohannes Doerfert     /// Return the number of arguments (or the minimal number for variadic
1939548b74aSJohannes Doerfert     /// functions).
1949548b74aSJohannes Doerfert     size_t getNumArgs() const { return ArgumentTypes.size(); }
1959548b74aSJohannes Doerfert 
1969548b74aSJohannes Doerfert     /// Run the callback \p CB on each use and forget the use if the result is
1979548b74aSJohannes Doerfert     /// true. The callback will be fed the function in which the use was
1989548b74aSJohannes Doerfert     /// encountered as second argument.
199624d34afSJohannes Doerfert     void foreachUse(SmallVectorImpl<Function *> &SCC,
200624d34afSJohannes Doerfert                     function_ref<bool(Use &, Function &)> CB) {
201624d34afSJohannes Doerfert       for (Function *F : SCC)
202624d34afSJohannes Doerfert         foreachUse(CB, F);
203e099c7b6Ssstefan1     }
204e099c7b6Ssstefan1 
205e099c7b6Ssstefan1     /// Run the callback \p CB on each use within the function \p F and forget
206e099c7b6Ssstefan1     /// the use if the result is true.
207624d34afSJohannes Doerfert     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
2088855fec3SJohannes Doerfert       SmallVector<unsigned, 8> ToBeDeleted;
2099548b74aSJohannes Doerfert       ToBeDeleted.clear();
210e099c7b6Ssstefan1 
2118855fec3SJohannes Doerfert       unsigned Idx = 0;
212624d34afSJohannes Doerfert       UseVector &UV = getOrCreateUseVector(F);
213e099c7b6Ssstefan1 
2148855fec3SJohannes Doerfert       for (Use *U : UV) {
215e099c7b6Ssstefan1         if (CB(*U, *F))
2168855fec3SJohannes Doerfert           ToBeDeleted.push_back(Idx);
2178855fec3SJohannes Doerfert         ++Idx;
2188855fec3SJohannes Doerfert       }
2198855fec3SJohannes Doerfert 
2208855fec3SJohannes Doerfert       // Remove the to-be-deleted indices in reverse order as prior
221b726c557SJohannes Doerfert       // modifications will not modify the smaller indices.
2228855fec3SJohannes Doerfert       while (!ToBeDeleted.empty()) {
2238855fec3SJohannes Doerfert         unsigned Idx = ToBeDeleted.pop_back_val();
2248855fec3SJohannes Doerfert         UV[Idx] = UV.back();
2258855fec3SJohannes Doerfert         UV.pop_back();
2269548b74aSJohannes Doerfert       }
2279548b74aSJohannes Doerfert     }
2288855fec3SJohannes Doerfert 
2298855fec3SJohannes Doerfert   private:
2308855fec3SJohannes Doerfert     /// Map from functions to all uses of this runtime function contained in
2318855fec3SJohannes Doerfert     /// them.
232b8235d2bSsstefan1     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
2339548b74aSJohannes Doerfert   };
2349548b74aSJohannes Doerfert 
2357cfd267cSsstefan1   /// An OpenMP-IR-Builder instance
2367cfd267cSsstefan1   OpenMPIRBuilder OMPBuilder;
2377cfd267cSsstefan1 
2387cfd267cSsstefan1   /// Map from runtime function kind to the runtime function description.
2397cfd267cSsstefan1   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
2407cfd267cSsstefan1                   RuntimeFunction::OMPRTL___last>
2417cfd267cSsstefan1       RFIs;
2427cfd267cSsstefan1 
2430f426935Ssstefan1   /// Map from ICV kind to the ICV description.
2440f426935Ssstefan1   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
2450f426935Ssstefan1                   InternalControlVar::ICV___last>
2460f426935Ssstefan1       ICVs;
2470f426935Ssstefan1 
2480f426935Ssstefan1   /// Helper to initialize all internal control variable information for those
2490f426935Ssstefan1   /// defined in OMPKinds.def.
2500f426935Ssstefan1   void initializeInternalControlVars() {
2510f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL)                                                 \
2520f426935Ssstefan1   {                                                                            \
2530f426935Ssstefan1     auto &ICV = ICVs[_Name];                                                   \
2540f426935Ssstefan1     ICV.Setter = RTL;                                                          \
2550f426935Ssstefan1   }
2560f426935Ssstefan1 #define ICV_RT_GET(Name, RTL)                                                  \
2570f426935Ssstefan1   {                                                                            \
2580f426935Ssstefan1     auto &ICV = ICVs[Name];                                                    \
2590f426935Ssstefan1     ICV.Getter = RTL;                                                          \
2600f426935Ssstefan1   }
2610f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
2620f426935Ssstefan1   {                                                                            \
2630f426935Ssstefan1     auto &ICV = ICVs[Enum];                                                    \
2640f426935Ssstefan1     ICV.Name = _Name;                                                          \
2650f426935Ssstefan1     ICV.Kind = Enum;                                                           \
2660f426935Ssstefan1     ICV.InitKind = Init;                                                       \
2670f426935Ssstefan1     ICV.EnvVarName = _EnvVarName;                                              \
2680f426935Ssstefan1     switch (ICV.InitKind) {                                                    \
269951e43f3Ssstefan1     case ICV_IMPLEMENTATION_DEFINED:                                           \
2700f426935Ssstefan1       ICV.InitValue = nullptr;                                                 \
2710f426935Ssstefan1       break;                                                                   \
272951e43f3Ssstefan1     case ICV_ZERO:                                                             \
2736aab27baSsstefan1       ICV.InitValue = ConstantInt::get(                                        \
2746aab27baSsstefan1           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
2750f426935Ssstefan1       break;                                                                   \
276951e43f3Ssstefan1     case ICV_FALSE:                                                            \
2776aab27baSsstefan1       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
2780f426935Ssstefan1       break;                                                                   \
279951e43f3Ssstefan1     case ICV_LAST:                                                             \
2800f426935Ssstefan1       break;                                                                   \
2810f426935Ssstefan1     }                                                                          \
2820f426935Ssstefan1   }
2830f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2840f426935Ssstefan1   }
2850f426935Ssstefan1 
2867cfd267cSsstefan1   /// Returns true if the function declaration \p F matches the runtime
2877cfd267cSsstefan1   /// function types, that is, return type \p RTFRetType, and argument types
2887cfd267cSsstefan1   /// \p RTFArgTypes.
2897cfd267cSsstefan1   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
2907cfd267cSsstefan1                                   SmallVector<Type *, 8> &RTFArgTypes) {
2917cfd267cSsstefan1     // TODO: We should output information to the user (under debug output
2927cfd267cSsstefan1     //       and via remarks).
2937cfd267cSsstefan1 
2947cfd267cSsstefan1     if (!F)
2957cfd267cSsstefan1       return false;
2967cfd267cSsstefan1     if (F->getReturnType() != RTFRetType)
2977cfd267cSsstefan1       return false;
2987cfd267cSsstefan1     if (F->arg_size() != RTFArgTypes.size())
2997cfd267cSsstefan1       return false;
3007cfd267cSsstefan1 
3017cfd267cSsstefan1     auto RTFTyIt = RTFArgTypes.begin();
3027cfd267cSsstefan1     for (Argument &Arg : F->args()) {
3037cfd267cSsstefan1       if (Arg.getType() != *RTFTyIt)
3047cfd267cSsstefan1         return false;
3057cfd267cSsstefan1 
3067cfd267cSsstefan1       ++RTFTyIt;
3077cfd267cSsstefan1     }
3087cfd267cSsstefan1 
3097cfd267cSsstefan1     return true;
3107cfd267cSsstefan1   }
3117cfd267cSsstefan1 
312b726c557SJohannes Doerfert   // Helper to collect all uses of the declaration in the UsesMap.
313b8235d2bSsstefan1   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
3147cfd267cSsstefan1     unsigned NumUses = 0;
3157cfd267cSsstefan1     if (!RFI.Declaration)
3167cfd267cSsstefan1       return NumUses;
3177cfd267cSsstefan1     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
3187cfd267cSsstefan1 
319b8235d2bSsstefan1     if (CollectStats) {
3207cfd267cSsstefan1       NumOpenMPRuntimeFunctionsIdentified += 1;
3217cfd267cSsstefan1       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
322b8235d2bSsstefan1     }
3237cfd267cSsstefan1 
3247cfd267cSsstefan1     // TODO: We directly convert uses into proper calls and unknown uses.
3257cfd267cSsstefan1     for (Use &U : RFI.Declaration->uses()) {
3267cfd267cSsstefan1       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
3277cfd267cSsstefan1         if (ModuleSlice.count(UserI->getFunction())) {
3287cfd267cSsstefan1           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
3297cfd267cSsstefan1           ++NumUses;
3307cfd267cSsstefan1         }
3317cfd267cSsstefan1       } else {
3327cfd267cSsstefan1         RFI.getOrCreateUseVector(nullptr).push_back(&U);
3337cfd267cSsstefan1         ++NumUses;
3347cfd267cSsstefan1       }
3357cfd267cSsstefan1     }
3367cfd267cSsstefan1     return NumUses;
337b8235d2bSsstefan1   }
3387cfd267cSsstefan1 
33997517055SGiorgis Georgakoudis   // Helper function to recollect uses of a runtime function.
34097517055SGiorgis Georgakoudis   void recollectUsesForFunction(RuntimeFunction RTF) {
34197517055SGiorgis Georgakoudis     auto &RFI = RFIs[RTF];
342b8235d2bSsstefan1     RFI.clearUsesMap();
343b8235d2bSsstefan1     collectUses(RFI, /*CollectStats*/ false);
344b8235d2bSsstefan1   }
34597517055SGiorgis Georgakoudis 
34697517055SGiorgis Georgakoudis   // Helper function to recollect uses of all runtime functions.
34797517055SGiorgis Georgakoudis   void recollectUses() {
34897517055SGiorgis Georgakoudis     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
34997517055SGiorgis Georgakoudis       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
350b8235d2bSsstefan1   }
351b8235d2bSsstefan1 
352b8235d2bSsstefan1   /// Helper to initialize all runtime function information for those defined
353b8235d2bSsstefan1   /// in OpenMPKinds.def.
354b8235d2bSsstefan1   void initializeRuntimeFunctions() {
3557cfd267cSsstefan1     Module &M = *((*ModuleSlice.begin())->getParent());
3567cfd267cSsstefan1 
3576aab27baSsstefan1     // Helper macros for handling __VA_ARGS__ in OMP_RTL
3586aab27baSsstefan1 #define OMP_TYPE(VarName, ...)                                                 \
3596aab27baSsstefan1   Type *VarName = OMPBuilder.VarName;                                          \
3606aab27baSsstefan1   (void)VarName;
3616aab27baSsstefan1 
3626aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
3636aab27baSsstefan1   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
3646aab27baSsstefan1   (void)VarName##Ty;                                                           \
3656aab27baSsstefan1   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
3666aab27baSsstefan1   (void)VarName##PtrTy;
3676aab27baSsstefan1 
3686aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
3696aab27baSsstefan1   FunctionType *VarName = OMPBuilder.VarName;                                  \
3706aab27baSsstefan1   (void)VarName;                                                               \
3716aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3726aab27baSsstefan1   (void)VarName##Ptr;
3736aab27baSsstefan1 
3746aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
3756aab27baSsstefan1   StructType *VarName = OMPBuilder.VarName;                                    \
3766aab27baSsstefan1   (void)VarName;                                                               \
3776aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3786aab27baSsstefan1   (void)VarName##Ptr;
3796aab27baSsstefan1 
3807cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
3817cfd267cSsstefan1   {                                                                            \
3827cfd267cSsstefan1     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
3837cfd267cSsstefan1     Function *F = M.getFunction(_Name);                                        \
3846aab27baSsstefan1     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
3857cfd267cSsstefan1       auto &RFI = RFIs[_Enum];                                                 \
3867cfd267cSsstefan1       RFI.Kind = _Enum;                                                        \
3877cfd267cSsstefan1       RFI.Name = _Name;                                                        \
3887cfd267cSsstefan1       RFI.IsVarArg = _IsVarArg;                                                \
3896aab27baSsstefan1       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
3907cfd267cSsstefan1       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
3917cfd267cSsstefan1       RFI.Declaration = F;                                                     \
392b8235d2bSsstefan1       unsigned NumUses = collectUses(RFI);                                     \
3937cfd267cSsstefan1       (void)NumUses;                                                           \
3947cfd267cSsstefan1       LLVM_DEBUG({                                                             \
3957cfd267cSsstefan1         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
3967cfd267cSsstefan1                << " found\n";                                                  \
3977cfd267cSsstefan1         if (RFI.Declaration)                                                   \
3987cfd267cSsstefan1           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
3997cfd267cSsstefan1                  << RFI.getNumFunctionsWithUses()                              \
4007cfd267cSsstefan1                  << " different functions.\n";                                 \
4017cfd267cSsstefan1       });                                                                      \
4027cfd267cSsstefan1     }                                                                          \
4037cfd267cSsstefan1   }
4047cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
4057cfd267cSsstefan1 
4067cfd267cSsstefan1     // TODO: We should attach the attributes defined in OMPKinds.def.
4077cfd267cSsstefan1   }
408e8039ad4SJohannes Doerfert 
409e8039ad4SJohannes Doerfert   /// Collection of known kernels (\see Kernel) in the module.
410e8039ad4SJohannes Doerfert   SmallPtrSetImpl<Kernel> &Kernels;
4117cfd267cSsstefan1 };
4127cfd267cSsstefan1 
4138931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload
4148931add6SHamilton Tobon Mosquera /// array, to a vector in memory.
4158931add6SHamilton Tobon Mosquera struct OffloadArray {
4168931add6SHamilton Tobon Mosquera   /// Physical array (in the IR).
4178931add6SHamilton Tobon Mosquera   AllocaInst *Array = nullptr;
4188931add6SHamilton Tobon Mosquera   /// Mapped values.
4198931add6SHamilton Tobon Mosquera   SmallVector<Value *, 8> StoredValues;
4208931add6SHamilton Tobon Mosquera   /// Last stores made in the offload array.
4218931add6SHamilton Tobon Mosquera   SmallVector<StoreInst *, 8> LastAccesses;
4228931add6SHamilton Tobon Mosquera 
4238931add6SHamilton Tobon Mosquera   OffloadArray() = default;
4248931add6SHamilton Tobon Mosquera 
4258931add6SHamilton Tobon Mosquera   /// Initializes the OffloadArray with the values stored in \p Array before
4268931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached. Returns false if the initialization
4278931add6SHamilton Tobon Mosquera   /// fails.
4288931add6SHamilton Tobon Mosquera   /// This MUST be used immediately after the construction of the object.
4298931add6SHamilton Tobon Mosquera   bool initialize(AllocaInst &Array, Instruction &Before) {
4308931add6SHamilton Tobon Mosquera     if (!Array.getAllocatedType()->isArrayTy())
4318931add6SHamilton Tobon Mosquera       return false;
4328931add6SHamilton Tobon Mosquera 
4338931add6SHamilton Tobon Mosquera     if (!getValues(Array, Before))
4348931add6SHamilton Tobon Mosquera       return false;
4358931add6SHamilton Tobon Mosquera 
4368931add6SHamilton Tobon Mosquera     this->Array = &Array;
4378931add6SHamilton Tobon Mosquera     return true;
4388931add6SHamilton Tobon Mosquera   }
4398931add6SHamilton Tobon Mosquera 
440da8bec47SJoseph Huber   static const unsigned DeviceIDArgNum = 1;
441da8bec47SJoseph Huber   static const unsigned BasePtrsArgNum = 3;
442da8bec47SJoseph Huber   static const unsigned PtrsArgNum = 4;
443da8bec47SJoseph Huber   static const unsigned SizesArgNum = 5;
4441d3d9b9cSHamilton Tobon Mosquera 
4458931add6SHamilton Tobon Mosquera private:
4468931add6SHamilton Tobon Mosquera   /// Traverses the BasicBlock where \p Array is, collecting the stores made to
4478931add6SHamilton Tobon Mosquera   /// \p Array, leaving StoredValues with the values stored before the
4488931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached.
4498931add6SHamilton Tobon Mosquera   bool getValues(AllocaInst &Array, Instruction &Before) {
4508931add6SHamilton Tobon Mosquera     // Initialize container.
451d08d490aSJohannes Doerfert     const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
4528931add6SHamilton Tobon Mosquera     StoredValues.assign(NumValues, nullptr);
4538931add6SHamilton Tobon Mosquera     LastAccesses.assign(NumValues, nullptr);
4548931add6SHamilton Tobon Mosquera 
4558931add6SHamilton Tobon Mosquera     // TODO: This assumes the instruction \p Before is in the same
4568931add6SHamilton Tobon Mosquera     //  BasicBlock as Array. Make it general, for any control flow graph.
4578931add6SHamilton Tobon Mosquera     BasicBlock *BB = Array.getParent();
4588931add6SHamilton Tobon Mosquera     if (BB != Before.getParent())
4598931add6SHamilton Tobon Mosquera       return false;
4608931add6SHamilton Tobon Mosquera 
4618931add6SHamilton Tobon Mosquera     const DataLayout &DL = Array.getModule()->getDataLayout();
4628931add6SHamilton Tobon Mosquera     const unsigned int PointerSize = DL.getPointerSize();
4638931add6SHamilton Tobon Mosquera 
4648931add6SHamilton Tobon Mosquera     for (Instruction &I : *BB) {
4658931add6SHamilton Tobon Mosquera       if (&I == &Before)
4668931add6SHamilton Tobon Mosquera         break;
4678931add6SHamilton Tobon Mosquera 
4688931add6SHamilton Tobon Mosquera       if (!isa<StoreInst>(&I))
4698931add6SHamilton Tobon Mosquera         continue;
4708931add6SHamilton Tobon Mosquera 
4718931add6SHamilton Tobon Mosquera       auto *S = cast<StoreInst>(&I);
4728931add6SHamilton Tobon Mosquera       int64_t Offset = -1;
473d08d490aSJohannes Doerfert       auto *Dst =
474d08d490aSJohannes Doerfert           GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
4758931add6SHamilton Tobon Mosquera       if (Dst == &Array) {
4768931add6SHamilton Tobon Mosquera         int64_t Idx = Offset / PointerSize;
4778931add6SHamilton Tobon Mosquera         StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
4788931add6SHamilton Tobon Mosquera         LastAccesses[Idx] = S;
4798931add6SHamilton Tobon Mosquera       }
4808931add6SHamilton Tobon Mosquera     }
4818931add6SHamilton Tobon Mosquera 
4828931add6SHamilton Tobon Mosquera     return isFilled();
4838931add6SHamilton Tobon Mosquera   }
4848931add6SHamilton Tobon Mosquera 
4858931add6SHamilton Tobon Mosquera   /// Returns true if all values in StoredValues and
4868931add6SHamilton Tobon Mosquera   /// LastAccesses are not nullptrs.
4878931add6SHamilton Tobon Mosquera   bool isFilled() {
4888931add6SHamilton Tobon Mosquera     const unsigned NumValues = StoredValues.size();
4898931add6SHamilton Tobon Mosquera     for (unsigned I = 0; I < NumValues; ++I) {
4908931add6SHamilton Tobon Mosquera       if (!StoredValues[I] || !LastAccesses[I])
4918931add6SHamilton Tobon Mosquera         return false;
4928931add6SHamilton Tobon Mosquera     }
4938931add6SHamilton Tobon Mosquera 
4948931add6SHamilton Tobon Mosquera     return true;
4958931add6SHamilton Tobon Mosquera   }
4968931add6SHamilton Tobon Mosquera };
4978931add6SHamilton Tobon Mosquera 
4987cfd267cSsstefan1 struct OpenMPOpt {
4997cfd267cSsstefan1 
5007cfd267cSsstefan1   using OptimizationRemarkGetter =
5017cfd267cSsstefan1       function_ref<OptimizationRemarkEmitter &(Function *)>;
5027cfd267cSsstefan1 
5037cfd267cSsstefan1   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
5047cfd267cSsstefan1             OptimizationRemarkGetter OREGetter,
505b8235d2bSsstefan1             OMPInformationCache &OMPInfoCache, Attributor &A)
50677b79d79SMehdi Amini       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
507b8235d2bSsstefan1         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
5087cfd267cSsstefan1 
509a2281419SJoseph Huber   /// Check if any remarks are enabled for openmp-opt
510a2281419SJoseph Huber   bool remarksEnabled() {
511a2281419SJoseph Huber     auto &Ctx = M.getContext();
512a2281419SJoseph Huber     return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
513a2281419SJoseph Huber   }
514a2281419SJoseph Huber 
5159548b74aSJohannes Doerfert   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
516b2ad63d3SJoseph Huber   bool run(bool IsModulePass) {
51754bd3751SJohannes Doerfert     if (SCC.empty())
51854bd3751SJohannes Doerfert       return false;
51954bd3751SJohannes Doerfert 
5209548b74aSJohannes Doerfert     bool Changed = false;
5219548b74aSJohannes Doerfert 
5229548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
52377b79d79SMehdi Amini                       << " functions in a slice with "
52477b79d79SMehdi Amini                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
5259548b74aSJohannes Doerfert 
526b2ad63d3SJoseph Huber     if (IsModulePass) {
52718283125SJoseph Huber       Changed |= runAttributor();
52818283125SJoseph Huber 
5296fc51c9fSJoseph Huber       // Recollect uses, in case Attributor deleted any.
5306fc51c9fSJoseph Huber       OMPInfoCache.recollectUses();
5316fc51c9fSJoseph Huber 
532b2ad63d3SJoseph Huber       if (remarksEnabled())
533b2ad63d3SJoseph Huber         analysisGlobalization();
534b2ad63d3SJoseph Huber     } else {
535e8039ad4SJohannes Doerfert       if (PrintICVValues)
536e8039ad4SJohannes Doerfert         printICVs();
537e8039ad4SJohannes Doerfert       if (PrintOpenMPKernels)
538e8039ad4SJohannes Doerfert         printKernels();
539e8039ad4SJohannes Doerfert 
5405b0581aeSJohannes Doerfert       Changed |= rewriteDeviceCodeStateMachine();
5415b0581aeSJohannes Doerfert 
542e8039ad4SJohannes Doerfert       Changed |= runAttributor();
543e8039ad4SJohannes Doerfert 
544e8039ad4SJohannes Doerfert       // Recollect uses, in case Attributor deleted any.
545e8039ad4SJohannes Doerfert       OMPInfoCache.recollectUses();
546e8039ad4SJohannes Doerfert 
547e8039ad4SJohannes Doerfert       Changed |= deleteParallelRegions();
548496f8e5bSHamilton Tobon Mosquera       if (HideMemoryTransferLatency)
549496f8e5bSHamilton Tobon Mosquera         Changed |= hideMemTransfersLatency();
5503a6bfcf2SGiorgis Georgakoudis       Changed |= deduplicateRuntimeCalls();
5513a6bfcf2SGiorgis Georgakoudis       if (EnableParallelRegionMerging) {
5523a6bfcf2SGiorgis Georgakoudis         if (mergeParallelRegions()) {
5533a6bfcf2SGiorgis Georgakoudis           deduplicateRuntimeCalls();
5543a6bfcf2SGiorgis Georgakoudis           Changed = true;
5553a6bfcf2SGiorgis Georgakoudis         }
5563a6bfcf2SGiorgis Georgakoudis       }
557b2ad63d3SJoseph Huber     }
558e8039ad4SJohannes Doerfert 
559e8039ad4SJohannes Doerfert     return Changed;
560e8039ad4SJohannes Doerfert   }
561e8039ad4SJohannes Doerfert 
5620f426935Ssstefan1   /// Print initial ICV values for testing.
5630f426935Ssstefan1   /// FIXME: This should be done from the Attributor once it is added.
564e8039ad4SJohannes Doerfert   void printICVs() const {
565cb9cfa0dSsstefan1     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
566cb9cfa0dSsstefan1                                  ICV_proc_bind};
5670f426935Ssstefan1 
5680f426935Ssstefan1     for (Function *F : OMPInfoCache.ModuleSlice) {
5690f426935Ssstefan1       for (auto ICV : ICVs) {
5700f426935Ssstefan1         auto ICVInfo = OMPInfoCache.ICVs[ICV];
5712db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5722db182ffSJoseph Huber           return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
5730f426935Ssstefan1                      << " Value: "
5740f426935Ssstefan1                      << (ICVInfo.InitValue
57561cdaf66SSimon Pilgrim                              ? toString(ICVInfo.InitValue->getValue(), 10, true)
5760f426935Ssstefan1                              : "IMPLEMENTATION_DEFINED");
5770f426935Ssstefan1         };
5780f426935Ssstefan1 
5792db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
5800f426935Ssstefan1       }
5810f426935Ssstefan1     }
5820f426935Ssstefan1   }
5830f426935Ssstefan1 
584e8039ad4SJohannes Doerfert   /// Print OpenMP GPU kernels for testing.
585e8039ad4SJohannes Doerfert   void printKernels() const {
586e8039ad4SJohannes Doerfert     for (Function *F : SCC) {
587e8039ad4SJohannes Doerfert       if (!OMPInfoCache.Kernels.count(F))
588e8039ad4SJohannes Doerfert         continue;
589b8235d2bSsstefan1 
5902db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5912db182ffSJoseph Huber         return ORA << "OpenMP GPU kernel "
592e8039ad4SJohannes Doerfert                    << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
593e8039ad4SJohannes Doerfert       };
594b8235d2bSsstefan1 
5952db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
596e8039ad4SJohannes Doerfert     }
5979548b74aSJohannes Doerfert   }
5989548b74aSJohannes Doerfert 
5997cfd267cSsstefan1   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
6007cfd267cSsstefan1   /// given it has to be the callee or a nullptr is returned.
6017cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6027cfd267cSsstefan1       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6037cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(U.getUser());
6047cfd267cSsstefan1     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
6057cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6067cfd267cSsstefan1       return CI;
6077cfd267cSsstefan1     return nullptr;
6087cfd267cSsstefan1   }
6097cfd267cSsstefan1 
6107cfd267cSsstefan1   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
6117cfd267cSsstefan1   /// the callee or a nullptr is returned.
6127cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6137cfd267cSsstefan1       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6147cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(&V);
6157cfd267cSsstefan1     if (CI && !CI->hasOperandBundles() &&
6167cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6177cfd267cSsstefan1       return CI;
6187cfd267cSsstefan1     return nullptr;
6197cfd267cSsstefan1   }
6207cfd267cSsstefan1 
6219548b74aSJohannes Doerfert private:
6223a6bfcf2SGiorgis Georgakoudis   /// Merge parallel regions when it is safe.
6233a6bfcf2SGiorgis Georgakoudis   bool mergeParallelRegions() {
6243a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackCalleeOperand = 2;
6253a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackFirstArgOperand = 3;
6263a6bfcf2SGiorgis Georgakoudis     using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6273a6bfcf2SGiorgis Georgakoudis 
6283a6bfcf2SGiorgis Georgakoudis     // Check if there are any __kmpc_fork_call calls to merge.
6293a6bfcf2SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo &RFI =
6303a6bfcf2SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
6313a6bfcf2SGiorgis Georgakoudis 
6323a6bfcf2SGiorgis Georgakoudis     if (!RFI.Declaration)
6333a6bfcf2SGiorgis Georgakoudis       return false;
6343a6bfcf2SGiorgis Georgakoudis 
63597517055SGiorgis Georgakoudis     // Unmergable calls that prevent merging a parallel region.
63697517055SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
63797517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
63897517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
63997517055SGiorgis Georgakoudis     };
6403a6bfcf2SGiorgis Georgakoudis 
6413a6bfcf2SGiorgis Georgakoudis     bool Changed = false;
6423a6bfcf2SGiorgis Georgakoudis     LoopInfo *LI = nullptr;
6433a6bfcf2SGiorgis Georgakoudis     DominatorTree *DT = nullptr;
6443a6bfcf2SGiorgis Georgakoudis 
6453a6bfcf2SGiorgis Georgakoudis     SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
6463a6bfcf2SGiorgis Georgakoudis 
6473a6bfcf2SGiorgis Georgakoudis     BasicBlock *StartBB = nullptr, *EndBB = nullptr;
6483a6bfcf2SGiorgis Georgakoudis     auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
6493a6bfcf2SGiorgis Georgakoudis                          BasicBlock &ContinuationIP) {
6503a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGStartBB = CodeGenIP.getBlock();
6513a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGEndBB =
6523a6bfcf2SGiorgis Georgakoudis           SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
6533a6bfcf2SGiorgis Georgakoudis       assert(StartBB != nullptr && "StartBB should not be null");
6543a6bfcf2SGiorgis Georgakoudis       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
6553a6bfcf2SGiorgis Georgakoudis       assert(EndBB != nullptr && "EndBB should not be null");
6563a6bfcf2SGiorgis Georgakoudis       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
6573a6bfcf2SGiorgis Georgakoudis     };
6583a6bfcf2SGiorgis Georgakoudis 
659240dd924SAlex Zinenko     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
660240dd924SAlex Zinenko                       Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
661240dd924SAlex Zinenko       ReplacementValue = &Inner;
6623a6bfcf2SGiorgis Georgakoudis       return CodeGenIP;
6633a6bfcf2SGiorgis Georgakoudis     };
6643a6bfcf2SGiorgis Georgakoudis 
6653a6bfcf2SGiorgis Georgakoudis     auto FiniCB = [&](InsertPointTy CodeGenIP) {};
6663a6bfcf2SGiorgis Georgakoudis 
66797517055SGiorgis Georgakoudis     /// Create a sequential execution region within a merged parallel region,
66897517055SGiorgis Georgakoudis     /// encapsulated in a master construct with a barrier for synchronization.
66997517055SGiorgis Georgakoudis     auto CreateSequentialRegion = [&](Function *OuterFn,
67097517055SGiorgis Georgakoudis                                       BasicBlock *OuterPredBB,
67197517055SGiorgis Georgakoudis                                       Instruction *SeqStartI,
67297517055SGiorgis Georgakoudis                                       Instruction *SeqEndI) {
67397517055SGiorgis Georgakoudis       // Isolate the instructions of the sequential region to a separate
67497517055SGiorgis Georgakoudis       // block.
67597517055SGiorgis Georgakoudis       BasicBlock *ParentBB = SeqStartI->getParent();
67697517055SGiorgis Georgakoudis       BasicBlock *SeqEndBB =
67797517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
67897517055SGiorgis Georgakoudis       BasicBlock *SeqAfterBB =
67997517055SGiorgis Georgakoudis           SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
68097517055SGiorgis Georgakoudis       BasicBlock *SeqStartBB =
68197517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
68297517055SGiorgis Georgakoudis 
68397517055SGiorgis Georgakoudis       assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
68497517055SGiorgis Georgakoudis              "Expected a different CFG");
68597517055SGiorgis Georgakoudis       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
68697517055SGiorgis Georgakoudis       ParentBB->getTerminator()->eraseFromParent();
68797517055SGiorgis Georgakoudis 
68897517055SGiorgis Georgakoudis       auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
68997517055SGiorgis Georgakoudis                            BasicBlock &ContinuationIP) {
69097517055SGiorgis Georgakoudis         BasicBlock *CGStartBB = CodeGenIP.getBlock();
69197517055SGiorgis Georgakoudis         BasicBlock *CGEndBB =
69297517055SGiorgis Georgakoudis             SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
69397517055SGiorgis Georgakoudis         assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
69497517055SGiorgis Georgakoudis         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
69597517055SGiorgis Georgakoudis         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
69697517055SGiorgis Georgakoudis         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
69797517055SGiorgis Georgakoudis       };
69897517055SGiorgis Georgakoudis       auto FiniCB = [&](InsertPointTy CodeGenIP) {};
69997517055SGiorgis Georgakoudis 
70097517055SGiorgis Georgakoudis       // Find outputs from the sequential region to outside users and
70197517055SGiorgis Georgakoudis       // broadcast their values to them.
70297517055SGiorgis Georgakoudis       for (Instruction &I : *SeqStartBB) {
70397517055SGiorgis Georgakoudis         SmallPtrSet<Instruction *, 4> OutsideUsers;
70497517055SGiorgis Georgakoudis         for (User *Usr : I.users()) {
70597517055SGiorgis Georgakoudis           Instruction &UsrI = *cast<Instruction>(Usr);
70697517055SGiorgis Georgakoudis           // Ignore outputs to LT intrinsics, code extraction for the merged
70797517055SGiorgis Georgakoudis           // parallel region will fix them.
70897517055SGiorgis Georgakoudis           if (UsrI.isLifetimeStartOrEnd())
70997517055SGiorgis Georgakoudis             continue;
71097517055SGiorgis Georgakoudis 
71197517055SGiorgis Georgakoudis           if (UsrI.getParent() != SeqStartBB)
71297517055SGiorgis Georgakoudis             OutsideUsers.insert(&UsrI);
71397517055SGiorgis Georgakoudis         }
71497517055SGiorgis Georgakoudis 
71597517055SGiorgis Georgakoudis         if (OutsideUsers.empty())
71697517055SGiorgis Georgakoudis           continue;
71797517055SGiorgis Georgakoudis 
71897517055SGiorgis Georgakoudis         // Emit an alloca in the outer region to store the broadcasted
71997517055SGiorgis Georgakoudis         // value.
72097517055SGiorgis Georgakoudis         const DataLayout &DL = M.getDataLayout();
72197517055SGiorgis Georgakoudis         AllocaInst *AllocaI = new AllocaInst(
72297517055SGiorgis Georgakoudis             I.getType(), DL.getAllocaAddrSpace(), nullptr,
72397517055SGiorgis Georgakoudis             I.getName() + ".seq.output.alloc", &OuterFn->front().front());
72497517055SGiorgis Georgakoudis 
72597517055SGiorgis Georgakoudis         // Emit a store instruction in the sequential BB to update the
72697517055SGiorgis Georgakoudis         // value.
72797517055SGiorgis Georgakoudis         new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
72897517055SGiorgis Georgakoudis 
72997517055SGiorgis Georgakoudis         // Emit a load instruction and replace the use of the output value
73097517055SGiorgis Georgakoudis         // with it.
73197517055SGiorgis Georgakoudis         for (Instruction *UsrI : OutsideUsers) {
7325b70c12fSJohannes Doerfert           LoadInst *LoadI = new LoadInst(
7335b70c12fSJohannes Doerfert               I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
73497517055SGiorgis Georgakoudis           UsrI->replaceUsesOfWith(&I, LoadI);
73597517055SGiorgis Georgakoudis         }
73697517055SGiorgis Georgakoudis       }
73797517055SGiorgis Georgakoudis 
73897517055SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(
73997517055SGiorgis Georgakoudis           InsertPointTy(ParentBB, ParentBB->end()), DL);
74097517055SGiorgis Georgakoudis       InsertPointTy SeqAfterIP =
74197517055SGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
74297517055SGiorgis Georgakoudis 
74397517055SGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
74497517055SGiorgis Georgakoudis 
74597517055SGiorgis Georgakoudis       BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
74697517055SGiorgis Georgakoudis 
74797517055SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
74897517055SGiorgis Georgakoudis                         << "\n");
74997517055SGiorgis Georgakoudis     };
75097517055SGiorgis Georgakoudis 
7513a6bfcf2SGiorgis Georgakoudis     // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
7523a6bfcf2SGiorgis Georgakoudis     // contained in BB and only separated by instructions that can be
7533a6bfcf2SGiorgis Georgakoudis     // redundantly executed in parallel. The block BB is split before the first
7543a6bfcf2SGiorgis Georgakoudis     // call (in MergableCIs) and after the last so the entire region we merge
7553a6bfcf2SGiorgis Georgakoudis     // into a single parallel region is contained in a single basic block
7563a6bfcf2SGiorgis Georgakoudis     // without any other instructions. We use the OpenMPIRBuilder to outline
7573a6bfcf2SGiorgis Georgakoudis     // that block and call the resulting function via __kmpc_fork_call.
7583a6bfcf2SGiorgis Georgakoudis     auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
7593a6bfcf2SGiorgis Georgakoudis       // TODO: Change the interface to allow single CIs expanded, e.g, to
7603a6bfcf2SGiorgis Georgakoudis       // include an outer loop.
7613a6bfcf2SGiorgis Georgakoudis       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
7623a6bfcf2SGiorgis Georgakoudis 
7633a6bfcf2SGiorgis Georgakoudis       auto Remark = [&](OptimizationRemark OR) {
7643a6bfcf2SGiorgis Georgakoudis         OR << "Parallel region at "
7653a6bfcf2SGiorgis Georgakoudis            << ore::NV("OpenMPParallelMergeFront",
7663a6bfcf2SGiorgis Georgakoudis                       MergableCIs.front()->getDebugLoc())
7673a6bfcf2SGiorgis Georgakoudis            << " merged with parallel regions at ";
76823b0ab2aSKazu Hirata         for (auto *CI : llvm::drop_begin(MergableCIs)) {
7693a6bfcf2SGiorgis Georgakoudis           OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
7703a6bfcf2SGiorgis Georgakoudis           if (CI != MergableCIs.back())
7713a6bfcf2SGiorgis Georgakoudis             OR << ", ";
7723a6bfcf2SGiorgis Georgakoudis         }
7733a6bfcf2SGiorgis Georgakoudis         return OR;
7743a6bfcf2SGiorgis Georgakoudis       };
7753a6bfcf2SGiorgis Georgakoudis 
7763a6bfcf2SGiorgis Georgakoudis       emitRemark<OptimizationRemark>(MergableCIs.front(),
7773a6bfcf2SGiorgis Georgakoudis                                      "OpenMPParallelRegionMerging", Remark);
7783a6bfcf2SGiorgis Georgakoudis 
7793a6bfcf2SGiorgis Georgakoudis       Function *OriginalFn = BB->getParent();
7803a6bfcf2SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
7813a6bfcf2SGiorgis Georgakoudis                         << " parallel regions in " << OriginalFn->getName()
7823a6bfcf2SGiorgis Georgakoudis                         << "\n");
7833a6bfcf2SGiorgis Georgakoudis 
7843a6bfcf2SGiorgis Georgakoudis       // Isolate the calls to merge in a separate block.
7853a6bfcf2SGiorgis Georgakoudis       EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
7863a6bfcf2SGiorgis Georgakoudis       BasicBlock *AfterBB =
7873a6bfcf2SGiorgis Georgakoudis           SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
7883a6bfcf2SGiorgis Georgakoudis       StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
7893a6bfcf2SGiorgis Georgakoudis                            "omp.par.merged");
7903a6bfcf2SGiorgis Georgakoudis 
7913a6bfcf2SGiorgis Georgakoudis       assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
7923a6bfcf2SGiorgis Georgakoudis       const DebugLoc DL = BB->getTerminator()->getDebugLoc();
7933a6bfcf2SGiorgis Georgakoudis       BB->getTerminator()->eraseFromParent();
7943a6bfcf2SGiorgis Georgakoudis 
79597517055SGiorgis Georgakoudis       // Create sequential regions for sequential instructions that are
79697517055SGiorgis Georgakoudis       // in-between mergable parallel regions.
79797517055SGiorgis Georgakoudis       for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
79897517055SGiorgis Georgakoudis            It != End; ++It) {
79997517055SGiorgis Georgakoudis         Instruction *ForkCI = *It;
80097517055SGiorgis Georgakoudis         Instruction *NextForkCI = *(It + 1);
80197517055SGiorgis Georgakoudis 
80297517055SGiorgis Georgakoudis         // Continue if there are not in-between instructions.
80397517055SGiorgis Georgakoudis         if (ForkCI->getNextNode() == NextForkCI)
80497517055SGiorgis Georgakoudis           continue;
80597517055SGiorgis Georgakoudis 
80697517055SGiorgis Georgakoudis         CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
80797517055SGiorgis Georgakoudis                                NextForkCI->getPrevNode());
80897517055SGiorgis Georgakoudis       }
80997517055SGiorgis Georgakoudis 
8103a6bfcf2SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
8113a6bfcf2SGiorgis Georgakoudis                                                DL);
8123a6bfcf2SGiorgis Georgakoudis       IRBuilder<>::InsertPoint AllocaIP(
8133a6bfcf2SGiorgis Georgakoudis           &OriginalFn->getEntryBlock(),
8143a6bfcf2SGiorgis Georgakoudis           OriginalFn->getEntryBlock().getFirstInsertionPt());
8153a6bfcf2SGiorgis Georgakoudis       // Create the merged parallel region with default proc binding, to
8163a6bfcf2SGiorgis Georgakoudis       // avoid overriding binding settings, and without explicit cancellation.
817e5dba2d7SMichael Kruse       InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
8183a6bfcf2SGiorgis Georgakoudis           Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
8193a6bfcf2SGiorgis Georgakoudis           OMP_PROC_BIND_default, /* IsCancellable */ false);
8203a6bfcf2SGiorgis Georgakoudis       BranchInst::Create(AfterBB, AfterIP.getBlock());
8213a6bfcf2SGiorgis Georgakoudis 
8223a6bfcf2SGiorgis Georgakoudis       // Perform the actual outlining.
823b1191206SMichael Kruse       OMPInfoCache.OMPBuilder.finalize(OriginalFn,
824b1191206SMichael Kruse                                        /* AllowExtractorSinking */ true);
8253a6bfcf2SGiorgis Georgakoudis 
8263a6bfcf2SGiorgis Georgakoudis       Function *OutlinedFn = MergableCIs.front()->getCaller();
8273a6bfcf2SGiorgis Georgakoudis 
8283a6bfcf2SGiorgis Georgakoudis       // Replace the __kmpc_fork_call calls with direct calls to the outlined
8293a6bfcf2SGiorgis Georgakoudis       // callbacks.
8303a6bfcf2SGiorgis Georgakoudis       SmallVector<Value *, 8> Args;
8313a6bfcf2SGiorgis Georgakoudis       for (auto *CI : MergableCIs) {
8323a6bfcf2SGiorgis Georgakoudis         Value *Callee =
8333a6bfcf2SGiorgis Georgakoudis             CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
8343a6bfcf2SGiorgis Georgakoudis         FunctionType *FT =
8353a6bfcf2SGiorgis Georgakoudis             cast<FunctionType>(Callee->getType()->getPointerElementType());
8363a6bfcf2SGiorgis Georgakoudis         Args.clear();
8373a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(0));
8383a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(1));
8393a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8403a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8413a6bfcf2SGiorgis Georgakoudis           Args.push_back(CI->getArgOperand(U));
8423a6bfcf2SGiorgis Georgakoudis 
8433a6bfcf2SGiorgis Georgakoudis         CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
8443a6bfcf2SGiorgis Georgakoudis         if (CI->getDebugLoc())
8453a6bfcf2SGiorgis Georgakoudis           NewCI->setDebugLoc(CI->getDebugLoc());
8463a6bfcf2SGiorgis Georgakoudis 
8473a6bfcf2SGiorgis Georgakoudis         // Forward parameter attributes from the callback to the callee.
8483a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8493a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8503a6bfcf2SGiorgis Georgakoudis           for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
8513a6bfcf2SGiorgis Georgakoudis             NewCI->addParamAttr(
8523a6bfcf2SGiorgis Georgakoudis                 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
8533a6bfcf2SGiorgis Georgakoudis 
8543a6bfcf2SGiorgis Georgakoudis         // Emit an explicit barrier to replace the implicit fork-join barrier.
8553a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.back()) {
8563a6bfcf2SGiorgis Georgakoudis           // TODO: Remove barrier if the merged parallel region includes the
8573a6bfcf2SGiorgis Georgakoudis           // 'nowait' clause.
858e5dba2d7SMichael Kruse           OMPInfoCache.OMPBuilder.createBarrier(
8593a6bfcf2SGiorgis Georgakoudis               InsertPointTy(NewCI->getParent(),
8603a6bfcf2SGiorgis Georgakoudis                             NewCI->getNextNode()->getIterator()),
8613a6bfcf2SGiorgis Georgakoudis               OMPD_parallel);
8623a6bfcf2SGiorgis Georgakoudis         }
8633a6bfcf2SGiorgis Georgakoudis 
8643a6bfcf2SGiorgis Georgakoudis         auto Remark = [&](OptimizationRemark OR) {
8653a6bfcf2SGiorgis Georgakoudis           return OR << "Parallel region at "
8663a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
8673a6bfcf2SGiorgis Georgakoudis                     << " merged with "
8683a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMergeFront",
8693a6bfcf2SGiorgis Georgakoudis                                MergableCIs.front()->getDebugLoc());
8703a6bfcf2SGiorgis Georgakoudis         };
8713a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.front())
8723a6bfcf2SGiorgis Georgakoudis           emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
8733a6bfcf2SGiorgis Georgakoudis                                          Remark);
8743a6bfcf2SGiorgis Georgakoudis 
8753a6bfcf2SGiorgis Georgakoudis         CI->eraseFromParent();
8763a6bfcf2SGiorgis Georgakoudis       }
8773a6bfcf2SGiorgis Georgakoudis 
8783a6bfcf2SGiorgis Georgakoudis       assert(OutlinedFn != OriginalFn && "Outlining failed");
8797fea561eSArthur Eubanks       CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
8803a6bfcf2SGiorgis Georgakoudis       CGUpdater.reanalyzeFunction(*OriginalFn);
8813a6bfcf2SGiorgis Georgakoudis 
8823a6bfcf2SGiorgis Georgakoudis       NumOpenMPParallelRegionsMerged += MergableCIs.size();
8833a6bfcf2SGiorgis Georgakoudis 
8843a6bfcf2SGiorgis Georgakoudis       return true;
8853a6bfcf2SGiorgis Georgakoudis     };
8863a6bfcf2SGiorgis Georgakoudis 
8873a6bfcf2SGiorgis Georgakoudis     // Helper function that identifes sequences of
8883a6bfcf2SGiorgis Georgakoudis     // __kmpc_fork_call uses in a basic block.
8893a6bfcf2SGiorgis Georgakoudis     auto DetectPRsCB = [&](Use &U, Function &F) {
8903a6bfcf2SGiorgis Georgakoudis       CallInst *CI = getCallIfRegularCall(U, &RFI);
8913a6bfcf2SGiorgis Georgakoudis       BB2PRMap[CI->getParent()].insert(CI);
8923a6bfcf2SGiorgis Georgakoudis 
8933a6bfcf2SGiorgis Georgakoudis       return false;
8943a6bfcf2SGiorgis Georgakoudis     };
8953a6bfcf2SGiorgis Georgakoudis 
8963a6bfcf2SGiorgis Georgakoudis     BB2PRMap.clear();
8973a6bfcf2SGiorgis Georgakoudis     RFI.foreachUse(SCC, DetectPRsCB);
8983a6bfcf2SGiorgis Georgakoudis     SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
8993a6bfcf2SGiorgis Georgakoudis     // Find mergable parallel regions within a basic block that are
9003a6bfcf2SGiorgis Georgakoudis     // safe to merge, that is any in-between instructions can safely
9013a6bfcf2SGiorgis Georgakoudis     // execute in parallel after merging.
9023a6bfcf2SGiorgis Georgakoudis     // TODO: support merging across basic-blocks.
9033a6bfcf2SGiorgis Georgakoudis     for (auto &It : BB2PRMap) {
9043a6bfcf2SGiorgis Georgakoudis       auto &CIs = It.getSecond();
9053a6bfcf2SGiorgis Georgakoudis       if (CIs.size() < 2)
9063a6bfcf2SGiorgis Georgakoudis         continue;
9073a6bfcf2SGiorgis Georgakoudis 
9083a6bfcf2SGiorgis Georgakoudis       BasicBlock *BB = It.getFirst();
9093a6bfcf2SGiorgis Georgakoudis       SmallVector<CallInst *, 4> MergableCIs;
9103a6bfcf2SGiorgis Georgakoudis 
91197517055SGiorgis Georgakoudis       /// Returns true if the instruction is mergable, false otherwise.
91297517055SGiorgis Georgakoudis       /// A terminator instruction is unmergable by definition since merging
91397517055SGiorgis Georgakoudis       /// works within a BB. Instructions before the mergable region are
91497517055SGiorgis Georgakoudis       /// mergable if they are not calls to OpenMP runtime functions that may
91597517055SGiorgis Georgakoudis       /// set different execution parameters for subsequent parallel regions.
91697517055SGiorgis Georgakoudis       /// Instructions in-between parallel regions are mergable if they are not
91797517055SGiorgis Georgakoudis       /// calls to any non-intrinsic function since that may call a non-mergable
91897517055SGiorgis Georgakoudis       /// OpenMP runtime function.
91997517055SGiorgis Georgakoudis       auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
92097517055SGiorgis Georgakoudis         // We do not merge across BBs, hence return false (unmergable) if the
92197517055SGiorgis Georgakoudis         // instruction is a terminator.
92297517055SGiorgis Georgakoudis         if (I.isTerminator())
92397517055SGiorgis Georgakoudis           return false;
92497517055SGiorgis Georgakoudis 
92597517055SGiorgis Georgakoudis         if (!isa<CallInst>(&I))
92697517055SGiorgis Georgakoudis           return true;
92797517055SGiorgis Georgakoudis 
92897517055SGiorgis Georgakoudis         CallInst *CI = cast<CallInst>(&I);
92997517055SGiorgis Georgakoudis         if (IsBeforeMergableRegion) {
93097517055SGiorgis Georgakoudis           Function *CalledFunction = CI->getCalledFunction();
93197517055SGiorgis Georgakoudis           if (!CalledFunction)
93297517055SGiorgis Georgakoudis             return false;
93397517055SGiorgis Georgakoudis           // Return false (unmergable) if the call before the parallel
93497517055SGiorgis Georgakoudis           // region calls an explicit affinity (proc_bind) or number of
93597517055SGiorgis Georgakoudis           // threads (num_threads) compiler-generated function. Those settings
93697517055SGiorgis Georgakoudis           // may be incompatible with following parallel regions.
93797517055SGiorgis Georgakoudis           // TODO: ICV tracking to detect compatibility.
93897517055SGiorgis Georgakoudis           for (const auto &RFI : UnmergableCallsInfo) {
93997517055SGiorgis Georgakoudis             if (CalledFunction == RFI.Declaration)
94097517055SGiorgis Georgakoudis               return false;
94197517055SGiorgis Georgakoudis           }
94297517055SGiorgis Georgakoudis         } else {
94397517055SGiorgis Georgakoudis           // Return false (unmergable) if there is a call instruction
94497517055SGiorgis Georgakoudis           // in-between parallel regions when it is not an intrinsic. It
94597517055SGiorgis Georgakoudis           // may call an unmergable OpenMP runtime function in its callpath.
94697517055SGiorgis Georgakoudis           // TODO: Keep track of possible OpenMP calls in the callpath.
94797517055SGiorgis Georgakoudis           if (!isa<IntrinsicInst>(CI))
94897517055SGiorgis Georgakoudis             return false;
94997517055SGiorgis Georgakoudis         }
95097517055SGiorgis Georgakoudis 
95197517055SGiorgis Georgakoudis         return true;
95297517055SGiorgis Georgakoudis       };
9533a6bfcf2SGiorgis Georgakoudis       // Find maximal number of parallel region CIs that are safe to merge.
95497517055SGiorgis Georgakoudis       for (auto It = BB->begin(), End = BB->end(); It != End;) {
95597517055SGiorgis Georgakoudis         Instruction &I = *It;
95697517055SGiorgis Georgakoudis         ++It;
95797517055SGiorgis Georgakoudis 
9583a6bfcf2SGiorgis Georgakoudis         if (CIs.count(&I)) {
9593a6bfcf2SGiorgis Georgakoudis           MergableCIs.push_back(cast<CallInst>(&I));
9603a6bfcf2SGiorgis Georgakoudis           continue;
9613a6bfcf2SGiorgis Georgakoudis         }
9623a6bfcf2SGiorgis Georgakoudis 
96397517055SGiorgis Georgakoudis         // Continue expanding if the instruction is mergable.
96497517055SGiorgis Georgakoudis         if (IsMergable(I, MergableCIs.empty()))
9653a6bfcf2SGiorgis Georgakoudis           continue;
9663a6bfcf2SGiorgis Georgakoudis 
96797517055SGiorgis Georgakoudis         // Forward the instruction iterator to skip the next parallel region
96897517055SGiorgis Georgakoudis         // since there is an unmergable instruction which can affect it.
96997517055SGiorgis Georgakoudis         for (; It != End; ++It) {
97097517055SGiorgis Georgakoudis           Instruction &SkipI = *It;
97197517055SGiorgis Georgakoudis           if (CIs.count(&SkipI)) {
97297517055SGiorgis Georgakoudis             LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
97397517055SGiorgis Georgakoudis                               << " due to " << I << "\n");
97497517055SGiorgis Georgakoudis             ++It;
97597517055SGiorgis Georgakoudis             break;
97697517055SGiorgis Georgakoudis           }
97797517055SGiorgis Georgakoudis         }
97897517055SGiorgis Georgakoudis 
97997517055SGiorgis Georgakoudis         // Store mergable regions found.
9803a6bfcf2SGiorgis Georgakoudis         if (MergableCIs.size() > 1) {
9813a6bfcf2SGiorgis Georgakoudis           MergableCIsVector.push_back(MergableCIs);
9823a6bfcf2SGiorgis Georgakoudis           LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
9833a6bfcf2SGiorgis Georgakoudis                             << " parallel regions in block " << BB->getName()
9843a6bfcf2SGiorgis Georgakoudis                             << " of function " << BB->getParent()->getName()
9853a6bfcf2SGiorgis Georgakoudis                             << "\n";);
9863a6bfcf2SGiorgis Georgakoudis         }
9873a6bfcf2SGiorgis Georgakoudis 
9883a6bfcf2SGiorgis Georgakoudis         MergableCIs.clear();
9893a6bfcf2SGiorgis Georgakoudis       }
9903a6bfcf2SGiorgis Georgakoudis 
9913a6bfcf2SGiorgis Georgakoudis       if (!MergableCIsVector.empty()) {
9923a6bfcf2SGiorgis Georgakoudis         Changed = true;
9933a6bfcf2SGiorgis Georgakoudis 
9943a6bfcf2SGiorgis Georgakoudis         for (auto &MergableCIs : MergableCIsVector)
9953a6bfcf2SGiorgis Georgakoudis           Merge(MergableCIs, BB);
996b2ad63d3SJoseph Huber         MergableCIsVector.clear();
9973a6bfcf2SGiorgis Georgakoudis       }
9983a6bfcf2SGiorgis Georgakoudis     }
9993a6bfcf2SGiorgis Georgakoudis 
10003a6bfcf2SGiorgis Georgakoudis     if (Changed) {
100197517055SGiorgis Georgakoudis       /// Re-collect use for fork calls, emitted barrier calls, and
100297517055SGiorgis Georgakoudis       /// any emitted master/end_master calls.
100397517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
100497517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
100597517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
100697517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
10073a6bfcf2SGiorgis Georgakoudis     }
10083a6bfcf2SGiorgis Georgakoudis 
10093a6bfcf2SGiorgis Georgakoudis     return Changed;
10103a6bfcf2SGiorgis Georgakoudis   }
10113a6bfcf2SGiorgis Georgakoudis 
10129d38f98dSJohannes Doerfert   /// Try to delete parallel regions if possible.
1013e565db49SJohannes Doerfert   bool deleteParallelRegions() {
1014e565db49SJohannes Doerfert     const unsigned CallbackCalleeOperand = 2;
1015e565db49SJohannes Doerfert 
10167cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &RFI =
10177cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
10187cfd267cSsstefan1 
1019e565db49SJohannes Doerfert     if (!RFI.Declaration)
1020e565db49SJohannes Doerfert       return false;
1021e565db49SJohannes Doerfert 
1022e565db49SJohannes Doerfert     bool Changed = false;
1023e565db49SJohannes Doerfert     auto DeleteCallCB = [&](Use &U, Function &) {
1024e565db49SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U);
1025e565db49SJohannes Doerfert       if (!CI)
1026e565db49SJohannes Doerfert         return false;
1027e565db49SJohannes Doerfert       auto *Fn = dyn_cast<Function>(
1028e565db49SJohannes Doerfert           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1029e565db49SJohannes Doerfert       if (!Fn)
1030e565db49SJohannes Doerfert         return false;
1031e565db49SJohannes Doerfert       if (!Fn->onlyReadsMemory())
1032e565db49SJohannes Doerfert         return false;
1033e565db49SJohannes Doerfert       if (!Fn->hasFnAttribute(Attribute::WillReturn))
1034e565db49SJohannes Doerfert         return false;
1035e565db49SJohannes Doerfert 
1036e565db49SJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1037e565db49SJohannes Doerfert                         << CI->getCaller()->getName() << "\n");
10384d4ea9acSHuber, Joseph 
10394d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
10404d4ea9acSHuber, Joseph         return OR << "Parallel region in "
10414d4ea9acSHuber, Joseph                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
10424d4ea9acSHuber, Joseph                   << " deleted";
10434d4ea9acSHuber, Joseph       };
10444d4ea9acSHuber, Joseph       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
10454d4ea9acSHuber, Joseph                                      Remark);
10464d4ea9acSHuber, Joseph 
1047e565db49SJohannes Doerfert       CGUpdater.removeCallSite(*CI);
1048e565db49SJohannes Doerfert       CI->eraseFromParent();
1049e565db49SJohannes Doerfert       Changed = true;
105055eb714aSRoman Lebedev       ++NumOpenMPParallelRegionsDeleted;
1051e565db49SJohannes Doerfert       return true;
1052e565db49SJohannes Doerfert     };
1053e565db49SJohannes Doerfert 
1054624d34afSJohannes Doerfert     RFI.foreachUse(SCC, DeleteCallCB);
1055e565db49SJohannes Doerfert 
1056e565db49SJohannes Doerfert     return Changed;
1057e565db49SJohannes Doerfert   }
1058e565db49SJohannes Doerfert 
1059b726c557SJohannes Doerfert   /// Try to eliminate runtime calls by reusing existing ones.
10609548b74aSJohannes Doerfert   bool deduplicateRuntimeCalls() {
10619548b74aSJohannes Doerfert     bool Changed = false;
10629548b74aSJohannes Doerfert 
1063e28936f6SJohannes Doerfert     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1064e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_threads,
1065e28936f6SJohannes Doerfert         OMPRTL_omp_in_parallel,
1066e28936f6SJohannes Doerfert         OMPRTL_omp_get_cancellation,
1067e28936f6SJohannes Doerfert         OMPRTL_omp_get_thread_limit,
1068e28936f6SJohannes Doerfert         OMPRTL_omp_get_supported_active_levels,
1069e28936f6SJohannes Doerfert         OMPRTL_omp_get_level,
1070e28936f6SJohannes Doerfert         OMPRTL_omp_get_ancestor_thread_num,
1071e28936f6SJohannes Doerfert         OMPRTL_omp_get_team_size,
1072e28936f6SJohannes Doerfert         OMPRTL_omp_get_active_level,
1073e28936f6SJohannes Doerfert         OMPRTL_omp_in_final,
1074e28936f6SJohannes Doerfert         OMPRTL_omp_get_proc_bind,
1075e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_places,
1076e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_procs,
1077e28936f6SJohannes Doerfert         OMPRTL_omp_get_place_num,
1078e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_num_places,
1079e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_place_nums};
1080e28936f6SJohannes Doerfert 
1081bc93c2d7SMarek Kurdej     // Global-tid is handled separately.
10829548b74aSJohannes Doerfert     SmallSetVector<Value *, 16> GTIdArgs;
10839548b74aSJohannes Doerfert     collectGlobalThreadIdArguments(GTIdArgs);
10849548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
10859548b74aSJohannes Doerfert                       << " global thread ID arguments\n");
10869548b74aSJohannes Doerfert 
10879548b74aSJohannes Doerfert     for (Function *F : SCC) {
1088e28936f6SJohannes Doerfert       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
10894e29d256Sserge-sans-paille         Changed |= deduplicateRuntimeCalls(
10904e29d256Sserge-sans-paille             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1091e28936f6SJohannes Doerfert 
1092e28936f6SJohannes Doerfert       // __kmpc_global_thread_num is special as we can replace it with an
1093e28936f6SJohannes Doerfert       // argument in enough cases to make it worth trying.
10949548b74aSJohannes Doerfert       Value *GTIdArg = nullptr;
10959548b74aSJohannes Doerfert       for (Argument &Arg : F->args())
10969548b74aSJohannes Doerfert         if (GTIdArgs.count(&Arg)) {
10979548b74aSJohannes Doerfert           GTIdArg = &Arg;
10989548b74aSJohannes Doerfert           break;
10999548b74aSJohannes Doerfert         }
11009548b74aSJohannes Doerfert       Changed |= deduplicateRuntimeCalls(
11017cfd267cSsstefan1           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
11029548b74aSJohannes Doerfert     }
11039548b74aSJohannes Doerfert 
11049548b74aSJohannes Doerfert     return Changed;
11059548b74aSJohannes Doerfert   }
11069548b74aSJohannes Doerfert 
1107496f8e5bSHamilton Tobon Mosquera   /// Tries to hide the latency of runtime calls that involve host to
1108496f8e5bSHamilton Tobon Mosquera   /// device memory transfers by splitting them into their "issue" and "wait"
1109496f8e5bSHamilton Tobon Mosquera   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1110496f8e5bSHamilton Tobon Mosquera   /// moved downards as much as possible. The "issue" issues the memory transfer
1111496f8e5bSHamilton Tobon Mosquera   /// asynchronously, returning a handle. The "wait" waits in the returned
1112496f8e5bSHamilton Tobon Mosquera   /// handle for the memory transfer to finish.
1113496f8e5bSHamilton Tobon Mosquera   bool hideMemTransfersLatency() {
1114496f8e5bSHamilton Tobon Mosquera     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1115496f8e5bSHamilton Tobon Mosquera     bool Changed = false;
1116496f8e5bSHamilton Tobon Mosquera     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1117496f8e5bSHamilton Tobon Mosquera       auto *RTCall = getCallIfRegularCall(U, &RFI);
1118496f8e5bSHamilton Tobon Mosquera       if (!RTCall)
1119496f8e5bSHamilton Tobon Mosquera         return false;
1120496f8e5bSHamilton Tobon Mosquera 
11218931add6SHamilton Tobon Mosquera       OffloadArray OffloadArrays[3];
11228931add6SHamilton Tobon Mosquera       if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
11238931add6SHamilton Tobon Mosquera         return false;
11248931add6SHamilton Tobon Mosquera 
11258931add6SHamilton Tobon Mosquera       LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
11268931add6SHamilton Tobon Mosquera 
1127bd2fa181SHamilton Tobon Mosquera       // TODO: Check if can be moved upwards.
1128bd2fa181SHamilton Tobon Mosquera       bool WasSplit = false;
1129bd2fa181SHamilton Tobon Mosquera       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1130bd2fa181SHamilton Tobon Mosquera       if (WaitMovementPoint)
1131bd2fa181SHamilton Tobon Mosquera         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1132bd2fa181SHamilton Tobon Mosquera 
1133496f8e5bSHamilton Tobon Mosquera       Changed |= WasSplit;
1134496f8e5bSHamilton Tobon Mosquera       return WasSplit;
1135496f8e5bSHamilton Tobon Mosquera     };
1136496f8e5bSHamilton Tobon Mosquera     RFI.foreachUse(SCC, SplitMemTransfers);
1137496f8e5bSHamilton Tobon Mosquera 
1138496f8e5bSHamilton Tobon Mosquera     return Changed;
1139496f8e5bSHamilton Tobon Mosquera   }
1140496f8e5bSHamilton Tobon Mosquera 
1141a2281419SJoseph Huber   void analysisGlobalization() {
11426fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
114382453e75SJoseph Huber 
114482453e75SJoseph Huber     auto CheckGlobalization = [&](Use &U, Function &Decl) {
1145a2281419SJoseph Huber       if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1146a2281419SJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1147a2281419SJoseph Huber           return ORA
1148a2281419SJoseph Huber                  << "Found thread data sharing on the GPU. "
1149a2281419SJoseph Huber                  << "Expect degraded performance due to data globalization.";
1150a2281419SJoseph Huber         };
1151a2281419SJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
1152a2281419SJoseph Huber                                                Remark);
1153a2281419SJoseph Huber       }
1154a2281419SJoseph Huber 
1155a2281419SJoseph Huber       return false;
1156a2281419SJoseph Huber     };
1157a2281419SJoseph Huber 
115882453e75SJoseph Huber     RFI.foreachUse(SCC, CheckGlobalization);
115982453e75SJoseph Huber   }
1160a2281419SJoseph Huber 
11618931add6SHamilton Tobon Mosquera   /// Maps the values stored in the offload arrays passed as arguments to
11628931add6SHamilton Tobon Mosquera   /// \p RuntimeCall into the offload arrays in \p OAs.
11638931add6SHamilton Tobon Mosquera   bool getValuesInOffloadArrays(CallInst &RuntimeCall,
11648931add6SHamilton Tobon Mosquera                                 MutableArrayRef<OffloadArray> OAs) {
11658931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "Need space for three offload arrays!");
11668931add6SHamilton Tobon Mosquera 
11678931add6SHamilton Tobon Mosquera     // A runtime call that involves memory offloading looks something like:
11688931add6SHamilton Tobon Mosquera     // call void @__tgt_target_data_begin_mapper(arg0, arg1,
11698931add6SHamilton Tobon Mosquera     //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
11708931add6SHamilton Tobon Mosquera     // ...)
11718931add6SHamilton Tobon Mosquera     // So, the idea is to access the allocas that allocate space for these
11728931add6SHamilton Tobon Mosquera     // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
11738931add6SHamilton Tobon Mosquera     // Therefore:
11748931add6SHamilton Tobon Mosquera     // i8** %offload_baseptrs.
11751d3d9b9cSHamilton Tobon Mosquera     Value *BasePtrsArg =
11761d3d9b9cSHamilton Tobon Mosquera         RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
11778931add6SHamilton Tobon Mosquera     // i8** %offload_ptrs.
11781d3d9b9cSHamilton Tobon Mosquera     Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
11798931add6SHamilton Tobon Mosquera     // i8** %offload_sizes.
11801d3d9b9cSHamilton Tobon Mosquera     Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
11818931add6SHamilton Tobon Mosquera 
11828931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11838931add6SHamilton Tobon Mosquera     auto *V = getUnderlyingObject(BasePtrsArg);
11848931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11858931add6SHamilton Tobon Mosquera       return false;
11868931add6SHamilton Tobon Mosquera     auto *BasePtrsArray = cast<AllocaInst>(V);
11878931add6SHamilton Tobon Mosquera     if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
11888931add6SHamilton Tobon Mosquera       return false;
11898931add6SHamilton Tobon Mosquera 
11908931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11918931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(PtrsArg);
11928931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11938931add6SHamilton Tobon Mosquera       return false;
11948931add6SHamilton Tobon Mosquera     auto *PtrsArray = cast<AllocaInst>(V);
11958931add6SHamilton Tobon Mosquera     if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
11968931add6SHamilton Tobon Mosquera       return false;
11978931add6SHamilton Tobon Mosquera 
11988931add6SHamilton Tobon Mosquera     // Get values stored in **offload_sizes.
11998931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(SizesArg);
12008931add6SHamilton Tobon Mosquera     // If it's a [constant] global array don't analyze it.
12018931add6SHamilton Tobon Mosquera     if (isa<GlobalValue>(V))
12028931add6SHamilton Tobon Mosquera       return isa<Constant>(V);
12038931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
12048931add6SHamilton Tobon Mosquera       return false;
12058931add6SHamilton Tobon Mosquera 
12068931add6SHamilton Tobon Mosquera     auto *SizesArray = cast<AllocaInst>(V);
12078931add6SHamilton Tobon Mosquera     if (!OAs[2].initialize(*SizesArray, RuntimeCall))
12088931add6SHamilton Tobon Mosquera       return false;
12098931add6SHamilton Tobon Mosquera 
12108931add6SHamilton Tobon Mosquera     return true;
12118931add6SHamilton Tobon Mosquera   }
12128931add6SHamilton Tobon Mosquera 
12138931add6SHamilton Tobon Mosquera   /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
12148931add6SHamilton Tobon Mosquera   /// For now this is a way to test that the function getValuesInOffloadArrays
12158931add6SHamilton Tobon Mosquera   /// is working properly.
12168931add6SHamilton Tobon Mosquera   /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
12178931add6SHamilton Tobon Mosquera   void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
12188931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "There are three offload arrays to debug!");
12198931add6SHamilton Tobon Mosquera 
12208931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
12218931add6SHamilton Tobon Mosquera     std::string ValuesStr;
12228931add6SHamilton Tobon Mosquera     raw_string_ostream Printer(ValuesStr);
12238931add6SHamilton Tobon Mosquera     std::string Separator = " --- ";
12248931add6SHamilton Tobon Mosquera 
12258931add6SHamilton Tobon Mosquera     for (auto *BP : OAs[0].StoredValues) {
12268931add6SHamilton Tobon Mosquera       BP->print(Printer);
12278931add6SHamilton Tobon Mosquera       Printer << Separator;
12288931add6SHamilton Tobon Mosquera     }
12298931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
12308931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12318931add6SHamilton Tobon Mosquera 
12328931add6SHamilton Tobon Mosquera     for (auto *P : OAs[1].StoredValues) {
12338931add6SHamilton Tobon Mosquera       P->print(Printer);
12348931add6SHamilton Tobon Mosquera       Printer << Separator;
12358931add6SHamilton Tobon Mosquera     }
12368931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
12378931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12388931add6SHamilton Tobon Mosquera 
12398931add6SHamilton Tobon Mosquera     for (auto *S : OAs[2].StoredValues) {
12408931add6SHamilton Tobon Mosquera       S->print(Printer);
12418931add6SHamilton Tobon Mosquera       Printer << Separator;
12428931add6SHamilton Tobon Mosquera     }
12438931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
12448931add6SHamilton Tobon Mosquera   }
12458931add6SHamilton Tobon Mosquera 
1246bd2fa181SHamilton Tobon Mosquera   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1247bd2fa181SHamilton Tobon Mosquera   /// moved. Returns nullptr if the movement is not possible, or not worth it.
1248bd2fa181SHamilton Tobon Mosquera   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1249bd2fa181SHamilton Tobon Mosquera     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1250bd2fa181SHamilton Tobon Mosquera     //  Make it traverse the CFG.
1251bd2fa181SHamilton Tobon Mosquera 
1252bd2fa181SHamilton Tobon Mosquera     Instruction *CurrentI = &RuntimeCall;
1253bd2fa181SHamilton Tobon Mosquera     bool IsWorthIt = false;
1254bd2fa181SHamilton Tobon Mosquera     while ((CurrentI = CurrentI->getNextNode())) {
1255bd2fa181SHamilton Tobon Mosquera 
1256bd2fa181SHamilton Tobon Mosquera       // TODO: Once we detect the regions to be offloaded we should use the
1257bd2fa181SHamilton Tobon Mosquera       //  alias analysis manager to check if CurrentI may modify one of
1258bd2fa181SHamilton Tobon Mosquera       //  the offloaded regions.
1259bd2fa181SHamilton Tobon Mosquera       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1260bd2fa181SHamilton Tobon Mosquera         if (IsWorthIt)
1261bd2fa181SHamilton Tobon Mosquera           return CurrentI;
1262bd2fa181SHamilton Tobon Mosquera 
1263bd2fa181SHamilton Tobon Mosquera         return nullptr;
1264bd2fa181SHamilton Tobon Mosquera       }
1265bd2fa181SHamilton Tobon Mosquera 
1266bd2fa181SHamilton Tobon Mosquera       // FIXME: For now if we move it over anything without side effect
1267bd2fa181SHamilton Tobon Mosquera       //  is worth it.
1268bd2fa181SHamilton Tobon Mosquera       IsWorthIt = true;
1269bd2fa181SHamilton Tobon Mosquera     }
1270bd2fa181SHamilton Tobon Mosquera 
1271bd2fa181SHamilton Tobon Mosquera     // Return end of BasicBlock.
1272bd2fa181SHamilton Tobon Mosquera     return RuntimeCall.getParent()->getTerminator();
1273bd2fa181SHamilton Tobon Mosquera   }
1274bd2fa181SHamilton Tobon Mosquera 
1275496f8e5bSHamilton Tobon Mosquera   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
1276bd2fa181SHamilton Tobon Mosquera   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1277bd2fa181SHamilton Tobon Mosquera                                Instruction &WaitMovementPoint) {
1278bd31abc1SHamilton Tobon Mosquera     // Create stack allocated handle (__tgt_async_info) at the beginning of the
1279bd31abc1SHamilton Tobon Mosquera     // function. Used for storing information of the async transfer, allowing to
1280bd31abc1SHamilton Tobon Mosquera     // wait on it later.
1281496f8e5bSHamilton Tobon Mosquera     auto &IRBuilder = OMPInfoCache.OMPBuilder;
1282bd31abc1SHamilton Tobon Mosquera     auto *F = RuntimeCall.getCaller();
1283bd31abc1SHamilton Tobon Mosquera     Instruction *FirstInst = &(F->getEntryBlock().front());
1284bd31abc1SHamilton Tobon Mosquera     AllocaInst *Handle = new AllocaInst(
1285bd31abc1SHamilton Tobon Mosquera         IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1286bd31abc1SHamilton Tobon Mosquera 
1287496f8e5bSHamilton Tobon Mosquera     // Add "issue" runtime call declaration:
1288496f8e5bSHamilton Tobon Mosquera     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1289496f8e5bSHamilton Tobon Mosquera     //   i8**, i8**, i64*, i64*)
1290496f8e5bSHamilton Tobon Mosquera     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1291496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_issue);
1292496f8e5bSHamilton Tobon Mosquera 
1293496f8e5bSHamilton Tobon Mosquera     // Change RuntimeCall call site for its asynchronous version.
129497e55cfeSJoseph Huber     SmallVector<Value *, 16> Args;
1295bd2fa181SHamilton Tobon Mosquera     for (auto &Arg : RuntimeCall.args())
1296496f8e5bSHamilton Tobon Mosquera       Args.push_back(Arg.get());
1297bd31abc1SHamilton Tobon Mosquera     Args.push_back(Handle);
1298496f8e5bSHamilton Tobon Mosquera 
1299496f8e5bSHamilton Tobon Mosquera     CallInst *IssueCallsite =
1300bd31abc1SHamilton Tobon Mosquera         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
1301bd2fa181SHamilton Tobon Mosquera     RuntimeCall.eraseFromParent();
1302496f8e5bSHamilton Tobon Mosquera 
1303496f8e5bSHamilton Tobon Mosquera     // Add "wait" runtime call declaration:
1304496f8e5bSHamilton Tobon Mosquera     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1305496f8e5bSHamilton Tobon Mosquera     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1306496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_wait);
1307496f8e5bSHamilton Tobon Mosquera 
1308496f8e5bSHamilton Tobon Mosquera     Value *WaitParams[2] = {
1309da8bec47SJoseph Huber         IssueCallsite->getArgOperand(
1310da8bec47SJoseph Huber             OffloadArray::DeviceIDArgNum), // device_id.
1311bd31abc1SHamilton Tobon Mosquera         Handle                             // handle to wait on.
1312496f8e5bSHamilton Tobon Mosquera     };
1313bd2fa181SHamilton Tobon Mosquera     CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
1314496f8e5bSHamilton Tobon Mosquera 
1315496f8e5bSHamilton Tobon Mosquera     return true;
1316496f8e5bSHamilton Tobon Mosquera   }
1317496f8e5bSHamilton Tobon Mosquera 
1318dc3b5b00SJohannes Doerfert   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1319dc3b5b00SJohannes Doerfert                                     bool GlobalOnly, bool &SingleChoice) {
1320dc3b5b00SJohannes Doerfert     if (CurrentIdent == NextIdent)
1321dc3b5b00SJohannes Doerfert       return CurrentIdent;
1322dc3b5b00SJohannes Doerfert 
1323396b7253SJohannes Doerfert     // TODO: Figure out how to actually combine multiple debug locations. For
1324dc3b5b00SJohannes Doerfert     //       now we just keep an existing one if there is a single choice.
1325dc3b5b00SJohannes Doerfert     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1326dc3b5b00SJohannes Doerfert       SingleChoice = !CurrentIdent;
1327dc3b5b00SJohannes Doerfert       return NextIdent;
1328dc3b5b00SJohannes Doerfert     }
1329396b7253SJohannes Doerfert     return nullptr;
1330396b7253SJohannes Doerfert   }
1331396b7253SJohannes Doerfert 
1332396b7253SJohannes Doerfert   /// Return an `struct ident_t*` value that represents the ones used in the
1333396b7253SJohannes Doerfert   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1334396b7253SJohannes Doerfert   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1335396b7253SJohannes Doerfert   /// return value we create one from scratch. We also do not yet combine
1336396b7253SJohannes Doerfert   /// information, e.g., the source locations, see combinedIdentStruct.
13377cfd267cSsstefan1   Value *
13387cfd267cSsstefan1   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
13397cfd267cSsstefan1                                  Function &F, bool GlobalOnly) {
1340dc3b5b00SJohannes Doerfert     bool SingleChoice = true;
1341396b7253SJohannes Doerfert     Value *Ident = nullptr;
1342396b7253SJohannes Doerfert     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1343396b7253SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
1344396b7253SJohannes Doerfert       if (!CI || &F != &Caller)
1345396b7253SJohannes Doerfert         return false;
1346396b7253SJohannes Doerfert       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1347dc3b5b00SJohannes Doerfert                                   /* GlobalOnly */ true, SingleChoice);
1348396b7253SJohannes Doerfert       return false;
1349396b7253SJohannes Doerfert     };
1350624d34afSJohannes Doerfert     RFI.foreachUse(SCC, CombineIdentStruct);
1351396b7253SJohannes Doerfert 
1352dc3b5b00SJohannes Doerfert     if (!Ident || !SingleChoice) {
1353396b7253SJohannes Doerfert       // The IRBuilder uses the insertion block to get to the module, this is
1354396b7253SJohannes Doerfert       // unfortunate but we work around it for now.
13557cfd267cSsstefan1       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
13567cfd267cSsstefan1         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1357396b7253SJohannes Doerfert             &F.getEntryBlock(), F.getEntryBlock().begin()));
1358396b7253SJohannes Doerfert       // Create a fallback location if non was found.
1359396b7253SJohannes Doerfert       // TODO: Use the debug locations of the calls instead.
13607cfd267cSsstefan1       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
13617cfd267cSsstefan1       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1362396b7253SJohannes Doerfert     }
1363396b7253SJohannes Doerfert     return Ident;
1364396b7253SJohannes Doerfert   }
1365396b7253SJohannes Doerfert 
1366b726c557SJohannes Doerfert   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
13679548b74aSJohannes Doerfert   /// \p ReplVal if given.
13687cfd267cSsstefan1   bool deduplicateRuntimeCalls(Function &F,
13697cfd267cSsstefan1                                OMPInformationCache::RuntimeFunctionInfo &RFI,
13709548b74aSJohannes Doerfert                                Value *ReplVal = nullptr) {
13718855fec3SJohannes Doerfert     auto *UV = RFI.getUseVector(F);
13728855fec3SJohannes Doerfert     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1373b1fbf438SRoman Lebedev       return false;
1374b1fbf438SRoman Lebedev 
13757cfd267cSsstefan1     LLVM_DEBUG(
13767cfd267cSsstefan1         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
13777cfd267cSsstefan1                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
13787cfd267cSsstefan1 
1379ab3da5ddSMichael Liao     assert((!ReplVal || (isa<Argument>(ReplVal) &&
1380ab3da5ddSMichael Liao                          cast<Argument>(ReplVal)->getParent() == &F)) &&
13819548b74aSJohannes Doerfert            "Unexpected replacement value!");
1382396b7253SJohannes Doerfert 
1383396b7253SJohannes Doerfert     // TODO: Use dominance to find a good position instead.
13846aab27baSsstefan1     auto CanBeMoved = [this](CallBase &CB) {
1385396b7253SJohannes Doerfert       unsigned NumArgs = CB.getNumArgOperands();
1386396b7253SJohannes Doerfert       if (NumArgs == 0)
1387396b7253SJohannes Doerfert         return true;
13886aab27baSsstefan1       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1389396b7253SJohannes Doerfert         return false;
1390396b7253SJohannes Doerfert       for (unsigned u = 1; u < NumArgs; ++u)
1391396b7253SJohannes Doerfert         if (isa<Instruction>(CB.getArgOperand(u)))
1392396b7253SJohannes Doerfert           return false;
1393396b7253SJohannes Doerfert       return true;
1394396b7253SJohannes Doerfert     };
1395396b7253SJohannes Doerfert 
13969548b74aSJohannes Doerfert     if (!ReplVal) {
13978855fec3SJohannes Doerfert       for (Use *U : *UV)
13989548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1399396b7253SJohannes Doerfert           if (!CanBeMoved(*CI))
1400396b7253SJohannes Doerfert             continue;
14014d4ea9acSHuber, Joseph 
14024d4ea9acSHuber, Joseph           auto Remark = [&](OptimizationRemark OR) {
14034d4ea9acSHuber, Joseph             return OR << "OpenMP runtime call "
14042db182ffSJoseph Huber                       << ore::NV("OpenMPOptRuntime", RFI.Name)
14052db182ffSJoseph Huber                       << " moved to beginning of OpenMP region";
14064d4ea9acSHuber, Joseph           };
14072db182ffSJoseph Huber           emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark);
14084d4ea9acSHuber, Joseph 
14099548b74aSJohannes Doerfert           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
14109548b74aSJohannes Doerfert           ReplVal = CI;
14119548b74aSJohannes Doerfert           break;
14129548b74aSJohannes Doerfert         }
14139548b74aSJohannes Doerfert       if (!ReplVal)
14149548b74aSJohannes Doerfert         return false;
14159548b74aSJohannes Doerfert     }
14169548b74aSJohannes Doerfert 
1417396b7253SJohannes Doerfert     // If we use a call as a replacement value we need to make sure the ident is
1418396b7253SJohannes Doerfert     // valid at the new location. For now we just pick a global one, either
1419396b7253SJohannes Doerfert     // existing and used by one of the calls, or created from scratch.
1420396b7253SJohannes Doerfert     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1421396b7253SJohannes Doerfert       if (CI->getNumArgOperands() > 0 &&
14226aab27baSsstefan1           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1423396b7253SJohannes Doerfert         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1424396b7253SJohannes Doerfert                                                       /* GlobalOnly */ true);
1425396b7253SJohannes Doerfert         CI->setArgOperand(0, Ident);
1426396b7253SJohannes Doerfert       }
1427396b7253SJohannes Doerfert     }
1428396b7253SJohannes Doerfert 
14299548b74aSJohannes Doerfert     bool Changed = false;
14309548b74aSJohannes Doerfert     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
14319548b74aSJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
14329548b74aSJohannes Doerfert       if (!CI || CI == ReplVal || &F != &Caller)
14339548b74aSJohannes Doerfert         return false;
14349548b74aSJohannes Doerfert       assert(CI->getCaller() == &F && "Unexpected call!");
14354d4ea9acSHuber, Joseph 
14364d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
14374d4ea9acSHuber, Joseph         return OR << "OpenMP runtime call "
14384d4ea9acSHuber, Joseph                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
14394d4ea9acSHuber, Joseph       };
14402db182ffSJoseph Huber       emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark);
14414d4ea9acSHuber, Joseph 
14429548b74aSJohannes Doerfert       CGUpdater.removeCallSite(*CI);
14439548b74aSJohannes Doerfert       CI->replaceAllUsesWith(ReplVal);
14449548b74aSJohannes Doerfert       CI->eraseFromParent();
14459548b74aSJohannes Doerfert       ++NumOpenMPRuntimeCallsDeduplicated;
14469548b74aSJohannes Doerfert       Changed = true;
14479548b74aSJohannes Doerfert       return true;
14489548b74aSJohannes Doerfert     };
1449624d34afSJohannes Doerfert     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
14509548b74aSJohannes Doerfert 
14519548b74aSJohannes Doerfert     return Changed;
14529548b74aSJohannes Doerfert   }
14539548b74aSJohannes Doerfert 
14549548b74aSJohannes Doerfert   /// Collect arguments that represent the global thread id in \p GTIdArgs.
14559548b74aSJohannes Doerfert   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
14569548b74aSJohannes Doerfert     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
14579548b74aSJohannes Doerfert     //       initialization. We could define an AbstractAttribute instead and
14589548b74aSJohannes Doerfert     //       run the Attributor here once it can be run as an SCC pass.
14599548b74aSJohannes Doerfert 
14609548b74aSJohannes Doerfert     // Helper to check the argument \p ArgNo at all call sites of \p F for
14619548b74aSJohannes Doerfert     // a GTId.
14629548b74aSJohannes Doerfert     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
14639548b74aSJohannes Doerfert       if (!F.hasLocalLinkage())
14649548b74aSJohannes Doerfert         return false;
14659548b74aSJohannes Doerfert       for (Use &U : F.uses()) {
14669548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(U)) {
14679548b74aSJohannes Doerfert           Value *ArgOp = CI->getArgOperand(ArgNo);
14689548b74aSJohannes Doerfert           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
14697cfd267cSsstefan1               getCallIfRegularCall(
14707cfd267cSsstefan1                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
14719548b74aSJohannes Doerfert             continue;
14729548b74aSJohannes Doerfert         }
14739548b74aSJohannes Doerfert         return false;
14749548b74aSJohannes Doerfert       }
14759548b74aSJohannes Doerfert       return true;
14769548b74aSJohannes Doerfert     };
14779548b74aSJohannes Doerfert 
14789548b74aSJohannes Doerfert     // Helper to identify uses of a GTId as GTId arguments.
14799548b74aSJohannes Doerfert     auto AddUserArgs = [&](Value &GTId) {
14809548b74aSJohannes Doerfert       for (Use &U : GTId.uses())
14819548b74aSJohannes Doerfert         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
14829548b74aSJohannes Doerfert           if (CI->isArgOperand(&U))
14839548b74aSJohannes Doerfert             if (Function *Callee = CI->getCalledFunction())
14849548b74aSJohannes Doerfert               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
14859548b74aSJohannes Doerfert                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
14869548b74aSJohannes Doerfert     };
14879548b74aSJohannes Doerfert 
14889548b74aSJohannes Doerfert     // The argument users of __kmpc_global_thread_num calls are GTIds.
14897cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
14907cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
14917cfd267cSsstefan1 
1492624d34afSJohannes Doerfert     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
14938855fec3SJohannes Doerfert       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
14949548b74aSJohannes Doerfert         AddUserArgs(*CI);
14958855fec3SJohannes Doerfert       return false;
14968855fec3SJohannes Doerfert     });
14979548b74aSJohannes Doerfert 
14989548b74aSJohannes Doerfert     // Transitively search for more arguments by looking at the users of the
14999548b74aSJohannes Doerfert     // ones we know already. During the search the GTIdArgs vector is extended
15009548b74aSJohannes Doerfert     // so we cannot cache the size nor can we use a range based for.
15019548b74aSJohannes Doerfert     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
15029548b74aSJohannes Doerfert       AddUserArgs(*GTIdArgs[u]);
15039548b74aSJohannes Doerfert   }
15049548b74aSJohannes Doerfert 
15055b0581aeSJohannes Doerfert   /// Kernel (=GPU) optimizations and utility functions
15065b0581aeSJohannes Doerfert   ///
15075b0581aeSJohannes Doerfert   ///{{
15085b0581aeSJohannes Doerfert 
15095b0581aeSJohannes Doerfert   /// Check if \p F is a kernel, hence entry point for target offloading.
15105b0581aeSJohannes Doerfert   bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
15115b0581aeSJohannes Doerfert 
15125b0581aeSJohannes Doerfert   /// Cache to remember the unique kernel for a function.
15135b0581aeSJohannes Doerfert   DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
15145b0581aeSJohannes Doerfert 
15155b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p F, if any.
15165b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Function &F);
15175b0581aeSJohannes Doerfert 
15185b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p I, if any.
15195b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Instruction &I) {
15205b0581aeSJohannes Doerfert     return getUniqueKernelFor(*I.getFunction());
15215b0581aeSJohannes Doerfert   }
15225b0581aeSJohannes Doerfert 
15235b0581aeSJohannes Doerfert   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
15245b0581aeSJohannes Doerfert   /// the cases we can avoid taking the address of a function.
15255b0581aeSJohannes Doerfert   bool rewriteDeviceCodeStateMachine();
15265b0581aeSJohannes Doerfert 
15275b0581aeSJohannes Doerfert   ///
15285b0581aeSJohannes Doerfert   ///}}
15295b0581aeSJohannes Doerfert 
15304d4ea9acSHuber, Joseph   /// Emit a remark generically
15314d4ea9acSHuber, Joseph   ///
15324d4ea9acSHuber, Joseph   /// This template function can be used to generically emit a remark. The
15334d4ea9acSHuber, Joseph   /// RemarkKind should be one of the following:
15344d4ea9acSHuber, Joseph   ///   - OptimizationRemark to indicate a successful optimization attempt
15354d4ea9acSHuber, Joseph   ///   - OptimizationRemarkMissed to report a failed optimization attempt
15364d4ea9acSHuber, Joseph   ///   - OptimizationRemarkAnalysis to provide additional information about an
15374d4ea9acSHuber, Joseph   ///     optimization attempt
15384d4ea9acSHuber, Joseph   ///
15394d4ea9acSHuber, Joseph   /// The remark is built using a callback function provided by the caller that
15404d4ea9acSHuber, Joseph   /// takes a RemarkKind as input and returns a RemarkKind.
15412db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15422db182ffSJoseph Huber   void emitRemark(Instruction *I, StringRef RemarkName,
1543e8039ad4SJohannes Doerfert                   RemarkCallBack &&RemarkCB) const {
15442db182ffSJoseph Huber     Function *F = I->getParent()->getParent();
15454d4ea9acSHuber, Joseph     auto &ORE = OREGetter(F);
15464d4ea9acSHuber, Joseph 
15472db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
15484d4ea9acSHuber, Joseph   }
15494d4ea9acSHuber, Joseph 
15502db182ffSJoseph Huber   /// Emit a remark on a function.
15512db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15522db182ffSJoseph Huber   void emitRemark(Function *F, StringRef RemarkName,
15532db182ffSJoseph Huber                   RemarkCallBack &&RemarkCB) const {
15540f426935Ssstefan1     auto &ORE = OREGetter(F);
15550f426935Ssstefan1 
15562db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
15570f426935Ssstefan1   }
15580f426935Ssstefan1 
1559b726c557SJohannes Doerfert   /// The underlying module.
15609548b74aSJohannes Doerfert   Module &M;
15619548b74aSJohannes Doerfert 
15629548b74aSJohannes Doerfert   /// The SCC we are operating on.
1563ee17263aSJohannes Doerfert   SmallVectorImpl<Function *> &SCC;
15649548b74aSJohannes Doerfert 
15659548b74aSJohannes Doerfert   /// Callback to update the call graph, the first argument is a removed call,
15669548b74aSJohannes Doerfert   /// the second an optional replacement call.
15679548b74aSJohannes Doerfert   CallGraphUpdater &CGUpdater;
15689548b74aSJohannes Doerfert 
15694d4ea9acSHuber, Joseph   /// Callback to get an OptimizationRemarkEmitter from a Function *
15704d4ea9acSHuber, Joseph   OptimizationRemarkGetter OREGetter;
15714d4ea9acSHuber, Joseph 
15727cfd267cSsstefan1   /// OpenMP-specific information cache. Also Used for Attributor runs.
15737cfd267cSsstefan1   OMPInformationCache &OMPInfoCache;
1574b8235d2bSsstefan1 
1575b8235d2bSsstefan1   /// Attributor instance.
1576b8235d2bSsstefan1   Attributor &A;
1577b8235d2bSsstefan1 
1578b8235d2bSsstefan1   /// Helper function to run Attributor on SCC.
1579b8235d2bSsstefan1   bool runAttributor() {
1580b8235d2bSsstefan1     if (SCC.empty())
1581b8235d2bSsstefan1       return false;
1582b8235d2bSsstefan1 
1583b8235d2bSsstefan1     registerAAs();
1584b8235d2bSsstefan1 
1585b8235d2bSsstefan1     ChangeStatus Changed = A.run();
1586b8235d2bSsstefan1 
1587b8235d2bSsstefan1     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
1588b8235d2bSsstefan1                       << " functions, result: " << Changed << ".\n");
1589b8235d2bSsstefan1 
1590b8235d2bSsstefan1     return Changed == ChangeStatus::CHANGED;
1591b8235d2bSsstefan1   }
1592b8235d2bSsstefan1 
1593b8235d2bSsstefan1   /// Populate the Attributor with abstract attribute opportunities in the
1594b8235d2bSsstefan1   /// function.
1595b8235d2bSsstefan1   void registerAAs() {
15965dfd7cc4Ssstefan1     if (SCC.empty())
15975dfd7cc4Ssstefan1       return;
1598b8235d2bSsstefan1 
15995dfd7cc4Ssstefan1     // Create CallSite AA for all Getters.
16005dfd7cc4Ssstefan1     for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
16015dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
16025dfd7cc4Ssstefan1 
16035dfd7cc4Ssstefan1       auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
16045dfd7cc4Ssstefan1 
16055dfd7cc4Ssstefan1       auto CreateAA = [&](Use &U, Function &Caller) {
16065dfd7cc4Ssstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
16075dfd7cc4Ssstefan1         if (!CI)
16085dfd7cc4Ssstefan1           return false;
16095dfd7cc4Ssstefan1 
16105dfd7cc4Ssstefan1         auto &CB = cast<CallBase>(*CI);
16115dfd7cc4Ssstefan1 
16125dfd7cc4Ssstefan1         IRPosition CBPos = IRPosition::callsite_function(CB);
16135dfd7cc4Ssstefan1         A.getOrCreateAAFor<AAICVTracker>(CBPos);
16145dfd7cc4Ssstefan1         return false;
16155dfd7cc4Ssstefan1       };
16165dfd7cc4Ssstefan1 
16175dfd7cc4Ssstefan1       GetterRFI.foreachUse(SCC, CreateAA);
1618b8235d2bSsstefan1     }
16196fc51c9fSJoseph Huber     auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
16206fc51c9fSJoseph Huber     auto CreateAA = [&](Use &U, Function &F) {
16216fc51c9fSJoseph Huber       A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
16226fc51c9fSJoseph Huber       return false;
16236fc51c9fSJoseph Huber     };
16246fc51c9fSJoseph Huber     GlobalizationRFI.foreachUse(SCC, CreateAA);
162518283125SJoseph Huber 
1626*7d69da71SJoseph Huber     // Create an ExecutionDomain AA for every function and a HeapToStack AA for
1627*7d69da71SJoseph Huber     // every function if there is a device kernel.
162803d7e61cSJoseph Huber     for (auto *F : SCC) {
162903d7e61cSJoseph Huber       if (!F->isDeclaration())
163003d7e61cSJoseph Huber         A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
1631*7d69da71SJoseph Huber       if (!OMPInfoCache.Kernels.empty())
1632*7d69da71SJoseph Huber         A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
163318283125SJoseph Huber     }
1634b8235d2bSsstefan1   }
1635b8235d2bSsstefan1 };
1636b8235d2bSsstefan1 
16375b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
16385b0581aeSJohannes Doerfert   if (!OMPInfoCache.ModuleSlice.count(&F))
16395b0581aeSJohannes Doerfert     return nullptr;
16405b0581aeSJohannes Doerfert 
16415b0581aeSJohannes Doerfert   // Use a scope to keep the lifetime of the CachedKernel short.
16425b0581aeSJohannes Doerfert   {
16435b0581aeSJohannes Doerfert     Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
16445b0581aeSJohannes Doerfert     if (CachedKernel)
16455b0581aeSJohannes Doerfert       return *CachedKernel;
16465b0581aeSJohannes Doerfert 
16475b0581aeSJohannes Doerfert     // TODO: We should use an AA to create an (optimistic and callback
16485b0581aeSJohannes Doerfert     //       call-aware) call graph. For now we stick to simple patterns that
16495b0581aeSJohannes Doerfert     //       are less powerful, basically the worst fixpoint.
16505b0581aeSJohannes Doerfert     if (isKernel(F)) {
16515b0581aeSJohannes Doerfert       CachedKernel = Kernel(&F);
16525b0581aeSJohannes Doerfert       return *CachedKernel;
16535b0581aeSJohannes Doerfert     }
16545b0581aeSJohannes Doerfert 
16555b0581aeSJohannes Doerfert     CachedKernel = nullptr;
1656994bb6ebSJohannes Doerfert     if (!F.hasLocalLinkage()) {
1657994bb6ebSJohannes Doerfert 
1658994bb6ebSJohannes Doerfert       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
16592db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
16602db182ffSJoseph Huber         return ORA
16612db182ffSJoseph Huber                << "[OMP100] Potentially unknown OpenMP target region caller";
1662994bb6ebSJohannes Doerfert       };
16632db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
1664994bb6ebSJohannes Doerfert 
16655b0581aeSJohannes Doerfert       return nullptr;
16665b0581aeSJohannes Doerfert     }
1667994bb6ebSJohannes Doerfert   }
16685b0581aeSJohannes Doerfert 
16695b0581aeSJohannes Doerfert   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
16705b0581aeSJohannes Doerfert     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
16715b0581aeSJohannes Doerfert       // Allow use in equality comparisons.
16725b0581aeSJohannes Doerfert       if (Cmp->isEquality())
16735b0581aeSJohannes Doerfert         return getUniqueKernelFor(*Cmp);
16745b0581aeSJohannes Doerfert       return nullptr;
16755b0581aeSJohannes Doerfert     }
16765b0581aeSJohannes Doerfert     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
16775b0581aeSJohannes Doerfert       // Allow direct calls.
16785b0581aeSJohannes Doerfert       if (CB->isCallee(&U))
16795b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
1680a2dbfb6bSGiorgis Georgakoudis 
1681a2dbfb6bSGiorgis Georgakoudis       OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1682a2dbfb6bSGiorgis Georgakoudis           OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1683a2dbfb6bSGiorgis Georgakoudis       // Allow the use in __kmpc_parallel_51 calls.
1684a2dbfb6bSGiorgis Georgakoudis       if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
16855b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
16865b0581aeSJohannes Doerfert       return nullptr;
16875b0581aeSJohannes Doerfert     }
16885b0581aeSJohannes Doerfert     // Disallow every other use.
16895b0581aeSJohannes Doerfert     return nullptr;
16905b0581aeSJohannes Doerfert   };
16915b0581aeSJohannes Doerfert 
16925b0581aeSJohannes Doerfert   // TODO: In the future we want to track more than just a unique kernel.
16935b0581aeSJohannes Doerfert   SmallPtrSet<Kernel, 2> PotentialKernels;
16948d8ce85bSsstefan1   OMPInformationCache::foreachUse(F, [&](const Use &U) {
16955b0581aeSJohannes Doerfert     PotentialKernels.insert(GetUniqueKernelForUse(U));
16965b0581aeSJohannes Doerfert   });
16975b0581aeSJohannes Doerfert 
16985b0581aeSJohannes Doerfert   Kernel K = nullptr;
16995b0581aeSJohannes Doerfert   if (PotentialKernels.size() == 1)
17005b0581aeSJohannes Doerfert     K = *PotentialKernels.begin();
17015b0581aeSJohannes Doerfert 
17025b0581aeSJohannes Doerfert   // Cache the result.
17035b0581aeSJohannes Doerfert   UniqueKernelMap[&F] = K;
17045b0581aeSJohannes Doerfert 
17055b0581aeSJohannes Doerfert   return K;
17065b0581aeSJohannes Doerfert }
17075b0581aeSJohannes Doerfert 
17085b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1709a2dbfb6bSGiorgis Georgakoudis   OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1710a2dbfb6bSGiorgis Georgakoudis       OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
17115b0581aeSJohannes Doerfert 
17125b0581aeSJohannes Doerfert   bool Changed = false;
1713a2dbfb6bSGiorgis Georgakoudis   if (!KernelParallelRFI)
17145b0581aeSJohannes Doerfert     return Changed;
17155b0581aeSJohannes Doerfert 
17165b0581aeSJohannes Doerfert   for (Function *F : SCC) {
17175b0581aeSJohannes Doerfert 
1718a2dbfb6bSGiorgis Georgakoudis     // Check if the function is a use in a __kmpc_parallel_51 call at
17195b0581aeSJohannes Doerfert     // all.
17205b0581aeSJohannes Doerfert     bool UnknownUse = false;
1721a2dbfb6bSGiorgis Georgakoudis     bool KernelParallelUse = false;
17225b0581aeSJohannes Doerfert     unsigned NumDirectCalls = 0;
17235b0581aeSJohannes Doerfert 
17245b0581aeSJohannes Doerfert     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
17258d8ce85bSsstefan1     OMPInformationCache::foreachUse(*F, [&](Use &U) {
17265b0581aeSJohannes Doerfert       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
17275b0581aeSJohannes Doerfert         if (CB->isCallee(&U)) {
17285b0581aeSJohannes Doerfert           ++NumDirectCalls;
17295b0581aeSJohannes Doerfert           return;
17305b0581aeSJohannes Doerfert         }
17315b0581aeSJohannes Doerfert 
173281db6144SMichael Liao       if (isa<ICmpInst>(U.getUser())) {
17335b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17345b0581aeSJohannes Doerfert         return;
17355b0581aeSJohannes Doerfert       }
1736a2dbfb6bSGiorgis Georgakoudis 
1737a2dbfb6bSGiorgis Georgakoudis       // Find wrapper functions that represent parallel kernels.
1738a2dbfb6bSGiorgis Georgakoudis       CallInst *CI =
1739a2dbfb6bSGiorgis Georgakoudis           OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
1740a2dbfb6bSGiorgis Georgakoudis       const unsigned int WrapperFunctionArgNo = 6;
1741a2dbfb6bSGiorgis Georgakoudis       if (!KernelParallelUse && CI &&
1742a2dbfb6bSGiorgis Georgakoudis           CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
1743a2dbfb6bSGiorgis Georgakoudis         KernelParallelUse = true;
17445b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17455b0581aeSJohannes Doerfert         return;
17465b0581aeSJohannes Doerfert       }
17475b0581aeSJohannes Doerfert       UnknownUse = true;
17485b0581aeSJohannes Doerfert     });
17495b0581aeSJohannes Doerfert 
1750a2dbfb6bSGiorgis Georgakoudis     // Do not emit a remark if we haven't seen a __kmpc_parallel_51
1751fec1f210SJohannes Doerfert     // use.
1752a2dbfb6bSGiorgis Georgakoudis     if (!KernelParallelUse)
17535b0581aeSJohannes Doerfert       continue;
17545b0581aeSJohannes Doerfert 
1755fec1f210SJohannes Doerfert     {
17562db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17572db182ffSJoseph Huber         return ORA << "Found a parallel region that is called in a target "
1758fec1f210SJohannes Doerfert                       "region but not part of a combined target construct nor "
1759a2dbfb6bSGiorgis Georgakoudis                       "nested inside a target construct without intermediate "
1760fec1f210SJohannes Doerfert                       "code. This can lead to excessive register usage for "
1761fec1f210SJohannes Doerfert                       "unrelated target regions in the same translation unit "
1762fec1f210SJohannes Doerfert                       "due to spurious call edges assumed by ptxas.";
1763fec1f210SJohannes Doerfert       };
17642db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
17652db182ffSJoseph Huber                                              Remark);
1766fec1f210SJohannes Doerfert     }
1767fec1f210SJohannes Doerfert 
1768fec1f210SJohannes Doerfert     // If this ever hits, we should investigate.
1769fec1f210SJohannes Doerfert     // TODO: Checking the number of uses is not a necessary restriction and
1770fec1f210SJohannes Doerfert     // should be lifted.
1771fec1f210SJohannes Doerfert     if (UnknownUse || NumDirectCalls != 1 ||
1772fec1f210SJohannes Doerfert         ToBeReplacedStateMachineUses.size() != 2) {
1773fec1f210SJohannes Doerfert       {
17742db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17752db182ffSJoseph Huber           return ORA << "Parallel region is used in "
1776fec1f210SJohannes Doerfert                      << (UnknownUse ? "unknown" : "unexpected")
1777fec1f210SJohannes Doerfert                      << " ways; will not attempt to rewrite the state machine.";
1778fec1f210SJohannes Doerfert         };
17792db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17802db182ffSJoseph Huber             F, "OpenMPParallelRegionInNonSPMD", Remark);
1781fec1f210SJohannes Doerfert       }
17825b0581aeSJohannes Doerfert       continue;
1783fec1f210SJohannes Doerfert     }
17845b0581aeSJohannes Doerfert 
1785a2dbfb6bSGiorgis Georgakoudis     // Even if we have __kmpc_parallel_51 calls, we (for now) give
17865b0581aeSJohannes Doerfert     // up if the function is not called from a unique kernel.
17875b0581aeSJohannes Doerfert     Kernel K = getUniqueKernelFor(*F);
1788fec1f210SJohannes Doerfert     if (!K) {
1789fec1f210SJohannes Doerfert       {
17902db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17912db182ffSJoseph Huber           return ORA << "Parallel region is not known to be called from a "
1792fec1f210SJohannes Doerfert                         "unique single target region, maybe the surrounding "
1793fec1f210SJohannes Doerfert                         "function has external linkage?; will not attempt to "
1794fec1f210SJohannes Doerfert                         "rewrite the state machine use.";
1795fec1f210SJohannes Doerfert         };
17962db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17972db182ffSJoseph Huber             F, "OpenMPParallelRegionInMultipleKernesl", Remark);
1798fec1f210SJohannes Doerfert       }
17995b0581aeSJohannes Doerfert       continue;
1800fec1f210SJohannes Doerfert     }
18015b0581aeSJohannes Doerfert 
18025b0581aeSJohannes Doerfert     // We now know F is a parallel body function called only from the kernel K.
18035b0581aeSJohannes Doerfert     // We also identified the state machine uses in which we replace the
18045b0581aeSJohannes Doerfert     // function pointer by a new global symbol for identification purposes. This
18055b0581aeSJohannes Doerfert     // ensures only direct calls to the function are left.
18065b0581aeSJohannes Doerfert 
1807fec1f210SJohannes Doerfert     {
18082db182ffSJoseph Huber       auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) {
18092db182ffSJoseph Huber         return ORA << "Specialize parallel region that is only reached from a "
1810fec1f210SJohannes Doerfert                       "single target region to avoid spurious call edges and "
1811fec1f210SJohannes Doerfert                       "excessive register usage in other target regions. "
1812fec1f210SJohannes Doerfert                       "(parallel region ID: "
1813fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1814fec1f210SJohannes Doerfert                    << ", kernel ID: "
1815fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1816fec1f210SJohannes Doerfert       };
18172db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
1818fec1f210SJohannes Doerfert                                              RemarkParalleRegion);
18192db182ffSJoseph Huber       auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) {
18202db182ffSJoseph Huber         return ORA << "Target region containing the parallel region that is "
1821fec1f210SJohannes Doerfert                       "specialized. (parallel region ID: "
1822fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1823fec1f210SJohannes Doerfert                    << ", kernel ID: "
1824fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1825fec1f210SJohannes Doerfert       };
18262db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD",
18272db182ffSJoseph Huber                                              RemarkKernel);
1828fec1f210SJohannes Doerfert     }
1829fec1f210SJohannes Doerfert 
18305b0581aeSJohannes Doerfert     Module &M = *F->getParent();
18315b0581aeSJohannes Doerfert     Type *Int8Ty = Type::getInt8Ty(M.getContext());
18325b0581aeSJohannes Doerfert 
18335b0581aeSJohannes Doerfert     auto *ID = new GlobalVariable(
18345b0581aeSJohannes Doerfert         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
18355b0581aeSJohannes Doerfert         UndefValue::get(Int8Ty), F->getName() + ".ID");
18365b0581aeSJohannes Doerfert 
18375b0581aeSJohannes Doerfert     for (Use *U : ToBeReplacedStateMachineUses)
18385b0581aeSJohannes Doerfert       U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
18395b0581aeSJohannes Doerfert 
18405b0581aeSJohannes Doerfert     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
18415b0581aeSJohannes Doerfert 
18425b0581aeSJohannes Doerfert     Changed = true;
18435b0581aeSJohannes Doerfert   }
18445b0581aeSJohannes Doerfert 
18455b0581aeSJohannes Doerfert   return Changed;
18465b0581aeSJohannes Doerfert }
18475b0581aeSJohannes Doerfert 
1848b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
1849b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
1850b8235d2bSsstefan1   using Base = StateWrapper<BooleanState, AbstractAttribute>;
1851b8235d2bSsstefan1   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1852b8235d2bSsstefan1 
18535dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
18545dfd7cc4Ssstefan1     Function *F = getAnchorScope();
18555dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
18565dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
18575dfd7cc4Ssstefan1   }
18585dfd7cc4Ssstefan1 
1859b8235d2bSsstefan1   /// Returns true if value is assumed to be tracked.
1860b8235d2bSsstefan1   bool isAssumedTracked() const { return getAssumed(); }
1861b8235d2bSsstefan1 
1862b8235d2bSsstefan1   /// Returns true if value is known to be tracked.
1863b8235d2bSsstefan1   bool isKnownTracked() const { return getAssumed(); }
1864b8235d2bSsstefan1 
1865b8235d2bSsstefan1   /// Create an abstract attribute biew for the position \p IRP.
1866b8235d2bSsstefan1   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
1867b8235d2bSsstefan1 
1868b8235d2bSsstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
18695dfd7cc4Ssstefan1   virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
18705dfd7cc4Ssstefan1                                                 const Instruction *I,
18715dfd7cc4Ssstefan1                                                 Attributor &A) const {
18725dfd7cc4Ssstefan1     return None;
18735dfd7cc4Ssstefan1   }
18745dfd7cc4Ssstefan1 
18755dfd7cc4Ssstefan1   /// Return an assumed unique ICV value if a single candidate is found. If
18765dfd7cc4Ssstefan1   /// there cannot be one, return a nullptr. If it is not clear yet, return the
18775dfd7cc4Ssstefan1   /// Optional::NoneType.
18785dfd7cc4Ssstefan1   virtual Optional<Value *>
18795dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
18805dfd7cc4Ssstefan1 
18815dfd7cc4Ssstefan1   // Currently only nthreads is being tracked.
18825dfd7cc4Ssstefan1   // this array will only grow with time.
18835dfd7cc4Ssstefan1   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
1884b8235d2bSsstefan1 
1885b8235d2bSsstefan1   /// See AbstractAttribute::getName()
1886b8235d2bSsstefan1   const std::string getName() const override { return "AAICVTracker"; }
1887b8235d2bSsstefan1 
1888233af895SLuofan Chen   /// See AbstractAttribute::getIdAddr()
1889233af895SLuofan Chen   const char *getIdAddr() const override { return &ID; }
1890233af895SLuofan Chen 
1891233af895SLuofan Chen   /// This function should return true if the type of the \p AA is AAICVTracker
1892233af895SLuofan Chen   static bool classof(const AbstractAttribute *AA) {
1893233af895SLuofan Chen     return (AA->getIdAddr() == &ID);
1894233af895SLuofan Chen   }
1895233af895SLuofan Chen 
1896b8235d2bSsstefan1   static const char ID;
1897b8235d2bSsstefan1 };
1898b8235d2bSsstefan1 
1899b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
1900b8235d2bSsstefan1   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
1901b8235d2bSsstefan1       : AAICVTracker(IRP, A) {}
1902b8235d2bSsstefan1 
1903b8235d2bSsstefan1   // FIXME: come up with better string.
19045dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerFunction"; }
1905b8235d2bSsstefan1 
1906b8235d2bSsstefan1   // FIXME: come up with some stats.
1907b8235d2bSsstefan1   void trackStatistics() const override {}
1908b8235d2bSsstefan1 
19095dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1910b8235d2bSsstefan1   ChangeStatus manifest(Attributor &A) override {
19115dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1912b8235d2bSsstefan1   }
1913b8235d2bSsstefan1 
1914b8235d2bSsstefan1   // Map of ICV to their values at specific program point.
19155dfd7cc4Ssstefan1   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
1916b8235d2bSsstefan1                   InternalControlVar::ICV___last>
19175dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1918b8235d2bSsstefan1 
1919b8235d2bSsstefan1   ChangeStatus updateImpl(Attributor &A) override {
1920b8235d2bSsstefan1     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
1921b8235d2bSsstefan1 
1922b8235d2bSsstefan1     Function *F = getAnchorScope();
1923b8235d2bSsstefan1 
1924b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1925b8235d2bSsstefan1 
1926b8235d2bSsstefan1     for (InternalControlVar ICV : TrackableICVs) {
1927b8235d2bSsstefan1       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1928b8235d2bSsstefan1 
19295dfd7cc4Ssstefan1       auto &ValuesMap = ICVReplacementValuesMap[ICV];
1930b8235d2bSsstefan1       auto TrackValues = [&](Use &U, Function &) {
1931b8235d2bSsstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
1932b8235d2bSsstefan1         if (!CI)
1933b8235d2bSsstefan1           return false;
1934b8235d2bSsstefan1 
1935b8235d2bSsstefan1         // FIXME: handle setters with more that 1 arguments.
1936b8235d2bSsstefan1         /// Track new value.
19375dfd7cc4Ssstefan1         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
1938b8235d2bSsstefan1           HasChanged = ChangeStatus::CHANGED;
1939b8235d2bSsstefan1 
1940b8235d2bSsstefan1         return false;
1941b8235d2bSsstefan1       };
1942b8235d2bSsstefan1 
19435dfd7cc4Ssstefan1       auto CallCheck = [&](Instruction &I) {
19445dfd7cc4Ssstefan1         Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
19455dfd7cc4Ssstefan1         if (ReplVal.hasValue() &&
19465dfd7cc4Ssstefan1             ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
19475dfd7cc4Ssstefan1           HasChanged = ChangeStatus::CHANGED;
19485dfd7cc4Ssstefan1 
19495dfd7cc4Ssstefan1         return true;
19505dfd7cc4Ssstefan1       };
19515dfd7cc4Ssstefan1 
19525dfd7cc4Ssstefan1       // Track all changes of an ICV.
1953b8235d2bSsstefan1       SetterRFI.foreachUse(TrackValues, F);
19545dfd7cc4Ssstefan1 
19555dfd7cc4Ssstefan1       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
19565dfd7cc4Ssstefan1                                 /* CheckBBLivenessOnly */ true);
19575dfd7cc4Ssstefan1 
19585dfd7cc4Ssstefan1       /// TODO: Figure out a way to avoid adding entry in
19595dfd7cc4Ssstefan1       /// ICVReplacementValuesMap
19605dfd7cc4Ssstefan1       Instruction *Entry = &F->getEntryBlock().front();
19615dfd7cc4Ssstefan1       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
19625dfd7cc4Ssstefan1         ValuesMap.insert(std::make_pair(Entry, nullptr));
1963b8235d2bSsstefan1     }
1964b8235d2bSsstefan1 
1965b8235d2bSsstefan1     return HasChanged;
1966b8235d2bSsstefan1   }
1967b8235d2bSsstefan1 
19685dfd7cc4Ssstefan1   /// Hepler to check if \p I is a call and get the value for it if it is
19695dfd7cc4Ssstefan1   /// unique.
19705dfd7cc4Ssstefan1   Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
19715dfd7cc4Ssstefan1                                     InternalControlVar &ICV) const {
1972b8235d2bSsstefan1 
19735dfd7cc4Ssstefan1     const auto *CB = dyn_cast<CallBase>(I);
1974dcaec812SJohannes Doerfert     if (!CB || CB->hasFnAttr("no_openmp") ||
1975dcaec812SJohannes Doerfert         CB->hasFnAttr("no_openmp_routines"))
19765dfd7cc4Ssstefan1       return None;
19775dfd7cc4Ssstefan1 
1978b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1979b8235d2bSsstefan1     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
19805dfd7cc4Ssstefan1     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
19815dfd7cc4Ssstefan1     Function *CalledFunction = CB->getCalledFunction();
1982b8235d2bSsstefan1 
19834eef14f9SWei Wang     // Indirect call, assume ICV changes.
19844eef14f9SWei Wang     if (CalledFunction == nullptr)
19854eef14f9SWei Wang       return nullptr;
19865dfd7cc4Ssstefan1     if (CalledFunction == GetterRFI.Declaration)
19875dfd7cc4Ssstefan1       return None;
19885dfd7cc4Ssstefan1     if (CalledFunction == SetterRFI.Declaration) {
19895dfd7cc4Ssstefan1       if (ICVReplacementValuesMap[ICV].count(I))
19905dfd7cc4Ssstefan1         return ICVReplacementValuesMap[ICV].lookup(I);
19915dfd7cc4Ssstefan1 
19925dfd7cc4Ssstefan1       return nullptr;
19935dfd7cc4Ssstefan1     }
19945dfd7cc4Ssstefan1 
19955dfd7cc4Ssstefan1     // Since we don't know, assume it changes the ICV.
19965dfd7cc4Ssstefan1     if (CalledFunction->isDeclaration())
19975dfd7cc4Ssstefan1       return nullptr;
19985dfd7cc4Ssstefan1 
19995b70c12fSJohannes Doerfert     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
20005b70c12fSJohannes Doerfert         *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
20015dfd7cc4Ssstefan1 
20025dfd7cc4Ssstefan1     if (ICVTrackingAA.isAssumedTracked())
20035dfd7cc4Ssstefan1       return ICVTrackingAA.getUniqueReplacementValue(ICV);
20045dfd7cc4Ssstefan1 
20055dfd7cc4Ssstefan1     // If we don't know, assume it changes.
20065dfd7cc4Ssstefan1     return nullptr;
20075dfd7cc4Ssstefan1   }
20085dfd7cc4Ssstefan1 
20095dfd7cc4Ssstefan1   // We don't check unique value for a function, so return None.
20105dfd7cc4Ssstefan1   Optional<Value *>
20115dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
20125dfd7cc4Ssstefan1     return None;
20135dfd7cc4Ssstefan1   }
20145dfd7cc4Ssstefan1 
20155dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
20165dfd7cc4Ssstefan1   Optional<Value *> getReplacementValue(InternalControlVar ICV,
20175dfd7cc4Ssstefan1                                         const Instruction *I,
20185dfd7cc4Ssstefan1                                         Attributor &A) const override {
20195dfd7cc4Ssstefan1     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
20205dfd7cc4Ssstefan1     if (ValuesMap.count(I))
20215dfd7cc4Ssstefan1       return ValuesMap.lookup(I);
20225dfd7cc4Ssstefan1 
20235dfd7cc4Ssstefan1     SmallVector<const Instruction *, 16> Worklist;
20245dfd7cc4Ssstefan1     SmallPtrSet<const Instruction *, 16> Visited;
20255dfd7cc4Ssstefan1     Worklist.push_back(I);
20265dfd7cc4Ssstefan1 
20275dfd7cc4Ssstefan1     Optional<Value *> ReplVal;
20285dfd7cc4Ssstefan1 
20295dfd7cc4Ssstefan1     while (!Worklist.empty()) {
20305dfd7cc4Ssstefan1       const Instruction *CurrInst = Worklist.pop_back_val();
20315dfd7cc4Ssstefan1       if (!Visited.insert(CurrInst).second)
2032b8235d2bSsstefan1         continue;
2033b8235d2bSsstefan1 
20345dfd7cc4Ssstefan1       const BasicBlock *CurrBB = CurrInst->getParent();
20355dfd7cc4Ssstefan1 
20365dfd7cc4Ssstefan1       // Go up and look for all potential setters/calls that might change the
20375dfd7cc4Ssstefan1       // ICV.
20385dfd7cc4Ssstefan1       while ((CurrInst = CurrInst->getPrevNode())) {
20395dfd7cc4Ssstefan1         if (ValuesMap.count(CurrInst)) {
20405dfd7cc4Ssstefan1           Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
20415dfd7cc4Ssstefan1           // Unknown value, track new.
20425dfd7cc4Ssstefan1           if (!ReplVal.hasValue()) {
20435dfd7cc4Ssstefan1             ReplVal = NewReplVal;
20445dfd7cc4Ssstefan1             break;
20455dfd7cc4Ssstefan1           }
20465dfd7cc4Ssstefan1 
20475dfd7cc4Ssstefan1           // If we found a new value, we can't know the icv value anymore.
20485dfd7cc4Ssstefan1           if (NewReplVal.hasValue())
20495dfd7cc4Ssstefan1             if (ReplVal != NewReplVal)
2050b8235d2bSsstefan1               return nullptr;
2051b8235d2bSsstefan1 
20525dfd7cc4Ssstefan1           break;
2053b8235d2bSsstefan1         }
2054b8235d2bSsstefan1 
20555dfd7cc4Ssstefan1         Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
20565dfd7cc4Ssstefan1         if (!NewReplVal.hasValue())
20575dfd7cc4Ssstefan1           continue;
20585dfd7cc4Ssstefan1 
20595dfd7cc4Ssstefan1         // Unknown value, track new.
20605dfd7cc4Ssstefan1         if (!ReplVal.hasValue()) {
20615dfd7cc4Ssstefan1           ReplVal = NewReplVal;
20625dfd7cc4Ssstefan1           break;
2063b8235d2bSsstefan1         }
2064b8235d2bSsstefan1 
20655dfd7cc4Ssstefan1         // if (NewReplVal.hasValue())
20665dfd7cc4Ssstefan1         // We found a new value, we can't know the icv value anymore.
20675dfd7cc4Ssstefan1         if (ReplVal != NewReplVal)
2068b8235d2bSsstefan1           return nullptr;
2069b8235d2bSsstefan1       }
20705dfd7cc4Ssstefan1 
20715dfd7cc4Ssstefan1       // If we are in the same BB and we have a value, we are done.
20725dfd7cc4Ssstefan1       if (CurrBB == I->getParent() && ReplVal.hasValue())
20735dfd7cc4Ssstefan1         return ReplVal;
20745dfd7cc4Ssstefan1 
20755dfd7cc4Ssstefan1       // Go through all predecessors and add terminators for analysis.
20765dfd7cc4Ssstefan1       for (const BasicBlock *Pred : predecessors(CurrBB))
20775dfd7cc4Ssstefan1         if (const Instruction *Terminator = Pred->getTerminator())
20785dfd7cc4Ssstefan1           Worklist.push_back(Terminator);
20795dfd7cc4Ssstefan1     }
20805dfd7cc4Ssstefan1 
20815dfd7cc4Ssstefan1     return ReplVal;
20825dfd7cc4Ssstefan1   }
20835dfd7cc4Ssstefan1 };
20845dfd7cc4Ssstefan1 
20855dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
20865dfd7cc4Ssstefan1   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
20875dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
20885dfd7cc4Ssstefan1 
20895dfd7cc4Ssstefan1   // FIXME: come up with better string.
20905dfd7cc4Ssstefan1   const std::string getAsStr() const override {
20915dfd7cc4Ssstefan1     return "ICVTrackerFunctionReturned";
20925dfd7cc4Ssstefan1   }
20935dfd7cc4Ssstefan1 
20945dfd7cc4Ssstefan1   // FIXME: come up with some stats.
20955dfd7cc4Ssstefan1   void trackStatistics() const override {}
20965dfd7cc4Ssstefan1 
20975dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
20985dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
20995dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
21005dfd7cc4Ssstefan1   }
21015dfd7cc4Ssstefan1 
21025dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
21035dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
21045dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
21055dfd7cc4Ssstefan1       ICVReplacementValuesMap;
21065dfd7cc4Ssstefan1 
21075dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
21085dfd7cc4Ssstefan1   Optional<Value *>
21095dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
21105dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
21115dfd7cc4Ssstefan1   }
21125dfd7cc4Ssstefan1 
21135dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21145dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
21155dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
21165b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
21175dfd7cc4Ssstefan1 
21185dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
21195dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
21205dfd7cc4Ssstefan1 
21215dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21225dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
21235dfd7cc4Ssstefan1       Optional<Value *> UniqueICVValue;
21245dfd7cc4Ssstefan1 
21255dfd7cc4Ssstefan1       auto CheckReturnInst = [&](Instruction &I) {
21265dfd7cc4Ssstefan1         Optional<Value *> NewReplVal =
21275dfd7cc4Ssstefan1             ICVTrackingAA.getReplacementValue(ICV, &I, A);
21285dfd7cc4Ssstefan1 
21295dfd7cc4Ssstefan1         // If we found a second ICV value there is no unique returned value.
21305dfd7cc4Ssstefan1         if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
21315dfd7cc4Ssstefan1           return false;
21325dfd7cc4Ssstefan1 
21335dfd7cc4Ssstefan1         UniqueICVValue = NewReplVal;
21345dfd7cc4Ssstefan1 
21355dfd7cc4Ssstefan1         return true;
21365dfd7cc4Ssstefan1       };
21375dfd7cc4Ssstefan1 
21385dfd7cc4Ssstefan1       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
21395dfd7cc4Ssstefan1                                      /* CheckBBLivenessOnly */ true))
21405dfd7cc4Ssstefan1         UniqueICVValue = nullptr;
21415dfd7cc4Ssstefan1 
21425dfd7cc4Ssstefan1       if (UniqueICVValue == ReplVal)
21435dfd7cc4Ssstefan1         continue;
21445dfd7cc4Ssstefan1 
21455dfd7cc4Ssstefan1       ReplVal = UniqueICVValue;
21465dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
21475dfd7cc4Ssstefan1     }
21485dfd7cc4Ssstefan1 
21495dfd7cc4Ssstefan1     return Changed;
21505dfd7cc4Ssstefan1   }
21515dfd7cc4Ssstefan1 };
21525dfd7cc4Ssstefan1 
21535dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
21545dfd7cc4Ssstefan1   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
21555dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
21565dfd7cc4Ssstefan1 
21575dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
21585dfd7cc4Ssstefan1     Function *F = getAnchorScope();
21595dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
21605dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
21615dfd7cc4Ssstefan1 
21625dfd7cc4Ssstefan1     // We only initialize this AA for getters, so we need to know which ICV it
21635dfd7cc4Ssstefan1     // gets.
21645dfd7cc4Ssstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
21655dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21665dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[ICV];
21675dfd7cc4Ssstefan1       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
21685dfd7cc4Ssstefan1       if (Getter.Declaration == getAssociatedFunction()) {
21695dfd7cc4Ssstefan1         AssociatedICV = ICVInfo.Kind;
21705dfd7cc4Ssstefan1         return;
21715dfd7cc4Ssstefan1       }
21725dfd7cc4Ssstefan1     }
21735dfd7cc4Ssstefan1 
21745dfd7cc4Ssstefan1     /// Unknown ICV.
21755dfd7cc4Ssstefan1     indicatePessimisticFixpoint();
21765dfd7cc4Ssstefan1   }
21775dfd7cc4Ssstefan1 
21785dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
21795dfd7cc4Ssstefan1     if (!ReplVal.hasValue() || !ReplVal.getValue())
21805dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
21815dfd7cc4Ssstefan1 
21825dfd7cc4Ssstefan1     A.changeValueAfterManifest(*getCtxI(), **ReplVal);
21835dfd7cc4Ssstefan1     A.deleteAfterManifest(*getCtxI());
21845dfd7cc4Ssstefan1 
21855dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
21865dfd7cc4Ssstefan1   }
21875dfd7cc4Ssstefan1 
21885dfd7cc4Ssstefan1   // FIXME: come up with better string.
21895dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
21905dfd7cc4Ssstefan1 
21915dfd7cc4Ssstefan1   // FIXME: come up with some stats.
21925dfd7cc4Ssstefan1   void trackStatistics() const override {}
21935dfd7cc4Ssstefan1 
21945dfd7cc4Ssstefan1   InternalControlVar AssociatedICV;
21955dfd7cc4Ssstefan1   Optional<Value *> ReplVal;
21965dfd7cc4Ssstefan1 
21975dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21985dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
21995b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
22005dfd7cc4Ssstefan1 
22015dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
22025dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22035dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22045dfd7cc4Ssstefan1 
22055dfd7cc4Ssstefan1     Optional<Value *> NewReplVal =
22065dfd7cc4Ssstefan1         ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
22075dfd7cc4Ssstefan1 
22085dfd7cc4Ssstefan1     if (ReplVal == NewReplVal)
22095dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
22105dfd7cc4Ssstefan1 
22115dfd7cc4Ssstefan1     ReplVal = NewReplVal;
22125dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
22135dfd7cc4Ssstefan1   }
22145dfd7cc4Ssstefan1 
22155dfd7cc4Ssstefan1   // Return the value with which associated value can be replaced for specific
22165dfd7cc4Ssstefan1   // \p ICV.
22175dfd7cc4Ssstefan1   Optional<Value *>
22185dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22195dfd7cc4Ssstefan1     return ReplVal;
22205dfd7cc4Ssstefan1   }
22215dfd7cc4Ssstefan1 };
22225dfd7cc4Ssstefan1 
22235dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
22245dfd7cc4Ssstefan1   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
22255dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
22265dfd7cc4Ssstefan1 
22275dfd7cc4Ssstefan1   // FIXME: come up with better string.
22285dfd7cc4Ssstefan1   const std::string getAsStr() const override {
22295dfd7cc4Ssstefan1     return "ICVTrackerCallSiteReturned";
22305dfd7cc4Ssstefan1   }
22315dfd7cc4Ssstefan1 
22325dfd7cc4Ssstefan1   // FIXME: come up with some stats.
22335dfd7cc4Ssstefan1   void trackStatistics() const override {}
22345dfd7cc4Ssstefan1 
22355dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
22365dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
22375dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
22385dfd7cc4Ssstefan1   }
22395dfd7cc4Ssstefan1 
22405dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
22415dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
22425dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
22435dfd7cc4Ssstefan1       ICVReplacementValuesMap;
22445dfd7cc4Ssstefan1 
22455dfd7cc4Ssstefan1   /// Return the value with which associated value can be replaced for specific
22465dfd7cc4Ssstefan1   /// \p ICV.
22475dfd7cc4Ssstefan1   Optional<Value *>
22485dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22495dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
22505dfd7cc4Ssstefan1   }
22515dfd7cc4Ssstefan1 
22525dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
22535dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
22545dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
22555b70c12fSJohannes Doerfert         *this, IRPosition::returned(*getAssociatedFunction()),
22565b70c12fSJohannes Doerfert         DepClassTy::REQUIRED);
22575dfd7cc4Ssstefan1 
22585dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
22595dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22605dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22615dfd7cc4Ssstefan1 
22625dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
22635dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
22645dfd7cc4Ssstefan1       Optional<Value *> NewReplVal =
22655dfd7cc4Ssstefan1           ICVTrackingAA.getUniqueReplacementValue(ICV);
22665dfd7cc4Ssstefan1 
22675dfd7cc4Ssstefan1       if (ReplVal == NewReplVal)
22685dfd7cc4Ssstefan1         continue;
22695dfd7cc4Ssstefan1 
22705dfd7cc4Ssstefan1       ReplVal = NewReplVal;
22715dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
22725dfd7cc4Ssstefan1     }
22735dfd7cc4Ssstefan1     return Changed;
22745dfd7cc4Ssstefan1   }
22759548b74aSJohannes Doerfert };
227618283125SJoseph Huber 
227718283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain {
227818283125SJoseph Huber   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
227918283125SJoseph Huber       : AAExecutionDomain(IRP, A) {}
228018283125SJoseph Huber 
228118283125SJoseph Huber   const std::string getAsStr() const override {
228218283125SJoseph Huber     return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
228318283125SJoseph Huber            "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
228418283125SJoseph Huber   }
228518283125SJoseph Huber 
228618283125SJoseph Huber   /// See AbstractAttribute::trackStatistics().
228718283125SJoseph Huber   void trackStatistics() const override {}
228818283125SJoseph Huber 
228918283125SJoseph Huber   void initialize(Attributor &A) override {
229018283125SJoseph Huber     Function *F = getAnchorScope();
229118283125SJoseph Huber     for (const auto &BB : *F)
229218283125SJoseph Huber       SingleThreadedBBs.insert(&BB);
229318283125SJoseph Huber     NumBBs = SingleThreadedBBs.size();
229418283125SJoseph Huber   }
229518283125SJoseph Huber 
229618283125SJoseph Huber   ChangeStatus manifest(Attributor &A) override {
229718283125SJoseph Huber     LLVM_DEBUG({
229818283125SJoseph Huber       for (const BasicBlock *BB : SingleThreadedBBs)
229918283125SJoseph Huber         dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
230018283125SJoseph Huber                << BB->getName() << " is executed by a single thread.\n";
230118283125SJoseph Huber     });
230218283125SJoseph Huber     return ChangeStatus::UNCHANGED;
230318283125SJoseph Huber   }
230418283125SJoseph Huber 
230518283125SJoseph Huber   ChangeStatus updateImpl(Attributor &A) override;
230618283125SJoseph Huber 
230718283125SJoseph Huber   /// Check if an instruction is executed by a single thread.
23089a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
23099a23e673SJohannes Doerfert     return isExecutedByInitialThreadOnly(*I.getParent());
231018283125SJoseph Huber   }
231118283125SJoseph Huber 
23129a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
231318283125SJoseph Huber     return SingleThreadedBBs.contains(&BB);
231418283125SJoseph Huber   }
231518283125SJoseph Huber 
231618283125SJoseph Huber   /// Set of basic blocks that are executed by a single thread.
231718283125SJoseph Huber   DenseSet<const BasicBlock *> SingleThreadedBBs;
231818283125SJoseph Huber 
231918283125SJoseph Huber   /// Total number of basic blocks in this function.
232018283125SJoseph Huber   long unsigned NumBBs;
232118283125SJoseph Huber };
232218283125SJoseph Huber 
232318283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
232418283125SJoseph Huber   Function *F = getAnchorScope();
232518283125SJoseph Huber   ReversePostOrderTraversal<Function *> RPOT(F);
232618283125SJoseph Huber   auto NumSingleThreadedBBs = SingleThreadedBBs.size();
232718283125SJoseph Huber 
232818283125SJoseph Huber   bool AllCallSitesKnown;
232918283125SJoseph Huber   auto PredForCallSite = [&](AbstractCallSite ACS) {
233018283125SJoseph Huber     const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
233118283125SJoseph Huber         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
233218283125SJoseph Huber         DepClassTy::REQUIRED);
23339a23e673SJohannes Doerfert     return ExecutionDomainAA.isExecutedByInitialThreadOnly(
23349a23e673SJohannes Doerfert         *ACS.getInstruction());
233518283125SJoseph Huber   };
233618283125SJoseph Huber 
233718283125SJoseph Huber   if (!A.checkForAllCallSites(PredForCallSite, *this,
233818283125SJoseph Huber                               /* RequiresAllCallSites */ true,
233918283125SJoseph Huber                               AllCallSitesKnown))
234018283125SJoseph Huber     SingleThreadedBBs.erase(&F->getEntryBlock());
234118283125SJoseph Huber 
234218283125SJoseph Huber   // Check if the edge into the successor block compares a thread-id function to
234318283125SJoseph Huber   // a constant zero.
234418283125SJoseph Huber   // TODO: Use AAValueSimplify to simplify and propogate constants.
234518283125SJoseph Huber   // TODO: Check more than a single use for thread ID's.
23466fc51c9fSJoseph Huber   auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
234718283125SJoseph Huber     if (!Edge || !Edge->isConditional())
234818283125SJoseph Huber       return false;
234918283125SJoseph Huber     if (Edge->getSuccessor(0) != SuccessorBB)
235018283125SJoseph Huber       return false;
235118283125SJoseph Huber 
235218283125SJoseph Huber     auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
235318283125SJoseph Huber     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
235418283125SJoseph Huber       return false;
235518283125SJoseph Huber 
23566fc51c9fSJoseph Huber     // Temporarily match the pattern generated by clang for teams regions.
23576fc51c9fSJoseph Huber     // TODO: Remove this once the new runtime is in place.
23586fc51c9fSJoseph Huber     ConstantInt *One, *NegOne;
23596fc51c9fSJoseph Huber     CmpInst::Predicate Pred;
23606fc51c9fSJoseph Huber     auto &&m_ThreadID = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_tid_x>();
23616fc51c9fSJoseph Huber     auto &&m_WarpSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_warpsize>();
23626fc51c9fSJoseph Huber     auto &&m_BlockSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_ntid_x>();
23636fc51c9fSJoseph Huber     if (match(Cmp, m_Cmp(Pred, m_ThreadID,
23646fc51c9fSJoseph Huber                          m_And(m_Sub(m_BlockSize, m_ConstantInt(One)),
23656fc51c9fSJoseph Huber                                m_Xor(m_Sub(m_WarpSize, m_ConstantInt(One)),
23666fc51c9fSJoseph Huber                                      m_ConstantInt(NegOne))))))
23676fc51c9fSJoseph Huber       if (One->isOne() && NegOne->isMinusOne() &&
23686fc51c9fSJoseph Huber           Pred == CmpInst::Predicate::ICMP_EQ)
23696fc51c9fSJoseph Huber         return true;
23706fc51c9fSJoseph Huber 
237118283125SJoseph Huber     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
237218283125SJoseph Huber     if (!C || !C->isZero())
237318283125SJoseph Huber       return false;
237418283125SJoseph Huber 
237568abc3d2SJoseph Huber     if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
237668abc3d2SJoseph Huber       if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
237718283125SJoseph Huber         return true;
237868abc3d2SJoseph Huber     if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
237968abc3d2SJoseph Huber       if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
238068abc3d2SJoseph Huber         return true;
238118283125SJoseph Huber 
238218283125SJoseph Huber     return false;
238318283125SJoseph Huber   };
238418283125SJoseph Huber 
238518283125SJoseph Huber   // Merge all the predecessor states into the current basic block. A basic
238618283125SJoseph Huber   // block is executed by a single thread if all of its predecessors are.
238718283125SJoseph Huber   auto MergePredecessorStates = [&](BasicBlock *BB) {
238818283125SJoseph Huber     if (pred_begin(BB) == pred_end(BB))
238918283125SJoseph Huber       return SingleThreadedBBs.contains(BB);
239018283125SJoseph Huber 
23916fc51c9fSJoseph Huber     bool IsInitialThread = true;
239218283125SJoseph Huber     for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
239318283125SJoseph Huber          PredBB != PredEndBB; ++PredBB) {
23946fc51c9fSJoseph Huber       if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
239518283125SJoseph Huber                               BB))
23966fc51c9fSJoseph Huber         IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
239718283125SJoseph Huber     }
239818283125SJoseph Huber 
23996fc51c9fSJoseph Huber     return IsInitialThread;
240018283125SJoseph Huber   };
240118283125SJoseph Huber 
240218283125SJoseph Huber   for (auto *BB : RPOT) {
240318283125SJoseph Huber     if (!MergePredecessorStates(BB))
240418283125SJoseph Huber       SingleThreadedBBs.erase(BB);
240518283125SJoseph Huber   }
240618283125SJoseph Huber 
240718283125SJoseph Huber   return (NumSingleThreadedBBs == SingleThreadedBBs.size())
240818283125SJoseph Huber              ? ChangeStatus::UNCHANGED
240918283125SJoseph Huber              : ChangeStatus::CHANGED;
241018283125SJoseph Huber }
241118283125SJoseph Huber 
24126fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a
24136fc51c9fSJoseph Huber /// static buffer of shared memory.
24146fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
24156fc51c9fSJoseph Huber   using Base = StateWrapper<BooleanState, AbstractAttribute>;
24166fc51c9fSJoseph Huber   AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
24176fc51c9fSJoseph Huber 
24186fc51c9fSJoseph Huber   /// Create an abstract attribute view for the position \p IRP.
24196fc51c9fSJoseph Huber   static AAHeapToShared &createForPosition(const IRPosition &IRP,
24206fc51c9fSJoseph Huber                                            Attributor &A);
24216fc51c9fSJoseph Huber 
24226fc51c9fSJoseph Huber   /// See AbstractAttribute::getName().
24236fc51c9fSJoseph Huber   const std::string getName() const override { return "AAHeapToShared"; }
24246fc51c9fSJoseph Huber 
24256fc51c9fSJoseph Huber   /// See AbstractAttribute::getIdAddr().
24266fc51c9fSJoseph Huber   const char *getIdAddr() const override { return &ID; }
24276fc51c9fSJoseph Huber 
24286fc51c9fSJoseph Huber   /// This function should return true if the type of the \p AA is
24296fc51c9fSJoseph Huber   /// AAHeapToShared.
24306fc51c9fSJoseph Huber   static bool classof(const AbstractAttribute *AA) {
24316fc51c9fSJoseph Huber     return (AA->getIdAddr() == &ID);
24326fc51c9fSJoseph Huber   }
24336fc51c9fSJoseph Huber 
24346fc51c9fSJoseph Huber   /// Unique ID (due to the unique address)
24356fc51c9fSJoseph Huber   static const char ID;
24366fc51c9fSJoseph Huber };
24376fc51c9fSJoseph Huber 
24386fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared {
24396fc51c9fSJoseph Huber   AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
24406fc51c9fSJoseph Huber       : AAHeapToShared(IRP, A) {}
24416fc51c9fSJoseph Huber 
24426fc51c9fSJoseph Huber   const std::string getAsStr() const override {
24436fc51c9fSJoseph Huber     return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
24446fc51c9fSJoseph Huber            " malloc calls eligible.";
24456fc51c9fSJoseph Huber   }
24466fc51c9fSJoseph Huber 
24476fc51c9fSJoseph Huber   /// See AbstractAttribute::trackStatistics().
24486fc51c9fSJoseph Huber   void trackStatistics() const override {}
24496fc51c9fSJoseph Huber 
24506fc51c9fSJoseph Huber   void initialize(Attributor &A) override {
24516fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24526fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
24536fc51c9fSJoseph Huber 
24546fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users())
24556fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
24566fc51c9fSJoseph Huber         MallocCalls.insert(CB);
24576fc51c9fSJoseph Huber   }
24586fc51c9fSJoseph Huber 
24596fc51c9fSJoseph Huber   ChangeStatus manifest(Attributor &A) override {
24606fc51c9fSJoseph Huber     if (MallocCalls.empty())
24616fc51c9fSJoseph Huber       return ChangeStatus::UNCHANGED;
24626fc51c9fSJoseph Huber 
24636fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24646fc51c9fSJoseph Huber     auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
24656fc51c9fSJoseph Huber 
24666fc51c9fSJoseph Huber     Function *F = getAnchorScope();
24676fc51c9fSJoseph Huber     auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
24686fc51c9fSJoseph Huber                                             DepClassTy::OPTIONAL);
24696fc51c9fSJoseph Huber 
24706fc51c9fSJoseph Huber     ChangeStatus Changed = ChangeStatus::UNCHANGED;
24716fc51c9fSJoseph Huber     for (CallBase *CB : MallocCalls) {
24726fc51c9fSJoseph Huber       // Skip replacing this if HeapToStack has already claimed it.
24736fc51c9fSJoseph Huber       if (HS && HS->isKnownHeapToStack(*CB))
24746fc51c9fSJoseph Huber         continue;
24756fc51c9fSJoseph Huber 
24766fc51c9fSJoseph Huber       // Find the unique free call to remove it.
24776fc51c9fSJoseph Huber       SmallVector<CallBase *, 4> FreeCalls;
24786fc51c9fSJoseph Huber       for (auto *U : CB->users()) {
24796fc51c9fSJoseph Huber         CallBase *C = dyn_cast<CallBase>(U);
24806fc51c9fSJoseph Huber         if (C && C->getCalledFunction() == FreeCall.Declaration)
24816fc51c9fSJoseph Huber           FreeCalls.push_back(C);
24826fc51c9fSJoseph Huber       }
24836fc51c9fSJoseph Huber       if (FreeCalls.size() != 1)
24846fc51c9fSJoseph Huber         continue;
24856fc51c9fSJoseph Huber 
24866fc51c9fSJoseph Huber       ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
24876fc51c9fSJoseph Huber 
24886fc51c9fSJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
24896fc51c9fSJoseph Huber                         << CB->getCaller()->getName() << " with "
24906fc51c9fSJoseph Huber                         << AllocSize->getZExtValue()
24916fc51c9fSJoseph Huber                         << " bytes of shared memory\n");
24926fc51c9fSJoseph Huber 
24936fc51c9fSJoseph Huber       // Create a new shared memory buffer of the same size as the allocation
24946fc51c9fSJoseph Huber       // and replace all the uses of the original allocation with it.
24956fc51c9fSJoseph Huber       Module *M = CB->getModule();
24966fc51c9fSJoseph Huber       Type *Int8Ty = Type::getInt8Ty(M->getContext());
24976fc51c9fSJoseph Huber       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
24986fc51c9fSJoseph Huber       auto *SharedMem = new GlobalVariable(
24996fc51c9fSJoseph Huber           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
25006fc51c9fSJoseph Huber           UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
25016fc51c9fSJoseph Huber           GlobalValue::NotThreadLocal,
25026fc51c9fSJoseph Huber           static_cast<unsigned>(AddressSpace::Shared));
25036fc51c9fSJoseph Huber       auto *NewBuffer =
25046fc51c9fSJoseph Huber           ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
25056fc51c9fSJoseph Huber 
25066fc51c9fSJoseph Huber       SharedMem->setAlignment(MaybeAlign(32));
25076fc51c9fSJoseph Huber 
25086fc51c9fSJoseph Huber       A.changeValueAfterManifest(*CB, *NewBuffer);
25096fc51c9fSJoseph Huber       A.deleteAfterManifest(*CB);
25106fc51c9fSJoseph Huber       A.deleteAfterManifest(*FreeCalls.front());
25116fc51c9fSJoseph Huber 
25126fc51c9fSJoseph Huber       NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
25136fc51c9fSJoseph Huber       Changed = ChangeStatus::CHANGED;
25146fc51c9fSJoseph Huber     }
25156fc51c9fSJoseph Huber 
25166fc51c9fSJoseph Huber     return Changed;
25176fc51c9fSJoseph Huber   }
25186fc51c9fSJoseph Huber 
25196fc51c9fSJoseph Huber   ChangeStatus updateImpl(Attributor &A) override {
25206fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
25216fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
25226fc51c9fSJoseph Huber     Function *F = getAnchorScope();
25236fc51c9fSJoseph Huber 
25246fc51c9fSJoseph Huber     auto NumMallocCalls = MallocCalls.size();
25256fc51c9fSJoseph Huber 
25266fc51c9fSJoseph Huber     // Only consider malloc calls executed by a single thread with a constant.
25276fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users()) {
25286fc51c9fSJoseph Huber       const auto &ED = A.getAAFor<AAExecutionDomain>(
25296fc51c9fSJoseph Huber           *this, IRPosition::function(*F), DepClassTy::REQUIRED);
25306fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
25316fc51c9fSJoseph Huber         if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
25326fc51c9fSJoseph Huber             !ED.isExecutedByInitialThreadOnly(*CB))
25336fc51c9fSJoseph Huber           MallocCalls.erase(CB);
25346fc51c9fSJoseph Huber     }
25356fc51c9fSJoseph Huber 
25366fc51c9fSJoseph Huber     if (NumMallocCalls != MallocCalls.size())
25376fc51c9fSJoseph Huber       return ChangeStatus::CHANGED;
25386fc51c9fSJoseph Huber 
25396fc51c9fSJoseph Huber     return ChangeStatus::UNCHANGED;
25406fc51c9fSJoseph Huber   }
25416fc51c9fSJoseph Huber 
25426fc51c9fSJoseph Huber   /// Collection of all malloc calls in a function.
25436fc51c9fSJoseph Huber   SmallPtrSet<CallBase *, 4> MallocCalls;
25446fc51c9fSJoseph Huber };
25456fc51c9fSJoseph Huber 
25469548b74aSJohannes Doerfert } // namespace
25479548b74aSJohannes Doerfert 
2548b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
254918283125SJoseph Huber const char AAExecutionDomain::ID = 0;
25506fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0;
2551b8235d2bSsstefan1 
2552b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
2553b8235d2bSsstefan1                                               Attributor &A) {
2554b8235d2bSsstefan1   AAICVTracker *AA = nullptr;
2555b8235d2bSsstefan1   switch (IRP.getPositionKind()) {
2556b8235d2bSsstefan1   case IRPosition::IRP_INVALID:
2557b8235d2bSsstefan1   case IRPosition::IRP_FLOAT:
2558b8235d2bSsstefan1   case IRPosition::IRP_ARGUMENT:
2559b8235d2bSsstefan1   case IRPosition::IRP_CALL_SITE_ARGUMENT:
25601de70a72SJohannes Doerfert     llvm_unreachable("ICVTracker can only be created for function position!");
25615dfd7cc4Ssstefan1   case IRPosition::IRP_RETURNED:
25625dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
25635dfd7cc4Ssstefan1     break;
25645dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE_RETURNED:
25655dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
25665dfd7cc4Ssstefan1     break;
25675dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE:
25685dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
25695dfd7cc4Ssstefan1     break;
2570b8235d2bSsstefan1   case IRPosition::IRP_FUNCTION:
2571b8235d2bSsstefan1     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
2572b8235d2bSsstefan1     break;
2573b8235d2bSsstefan1   }
2574b8235d2bSsstefan1 
2575b8235d2bSsstefan1   return *AA;
2576b8235d2bSsstefan1 }
2577b8235d2bSsstefan1 
257818283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
257918283125SJoseph Huber                                                         Attributor &A) {
258018283125SJoseph Huber   AAExecutionDomainFunction *AA = nullptr;
258118283125SJoseph Huber   switch (IRP.getPositionKind()) {
258218283125SJoseph Huber   case IRPosition::IRP_INVALID:
258318283125SJoseph Huber   case IRPosition::IRP_FLOAT:
258418283125SJoseph Huber   case IRPosition::IRP_ARGUMENT:
258518283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
258618283125SJoseph Huber   case IRPosition::IRP_RETURNED:
258718283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
258818283125SJoseph Huber   case IRPosition::IRP_CALL_SITE:
258918283125SJoseph Huber     llvm_unreachable(
259018283125SJoseph Huber         "AAExecutionDomain can only be created for function position!");
259118283125SJoseph Huber   case IRPosition::IRP_FUNCTION:
259218283125SJoseph Huber     AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
259318283125SJoseph Huber     break;
259418283125SJoseph Huber   }
259518283125SJoseph Huber 
259618283125SJoseph Huber   return *AA;
259718283125SJoseph Huber }
259818283125SJoseph Huber 
25996fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
26006fc51c9fSJoseph Huber                                                   Attributor &A) {
26016fc51c9fSJoseph Huber   AAHeapToSharedFunction *AA = nullptr;
26026fc51c9fSJoseph Huber   switch (IRP.getPositionKind()) {
26036fc51c9fSJoseph Huber   case IRPosition::IRP_INVALID:
26046fc51c9fSJoseph Huber   case IRPosition::IRP_FLOAT:
26056fc51c9fSJoseph Huber   case IRPosition::IRP_ARGUMENT:
26066fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
26076fc51c9fSJoseph Huber   case IRPosition::IRP_RETURNED:
26086fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
26096fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE:
26106fc51c9fSJoseph Huber     llvm_unreachable(
26116fc51c9fSJoseph Huber         "AAHeapToShared can only be created for function position!");
26126fc51c9fSJoseph Huber   case IRPosition::IRP_FUNCTION:
26136fc51c9fSJoseph Huber     AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
26146fc51c9fSJoseph Huber     break;
26156fc51c9fSJoseph Huber   }
26166fc51c9fSJoseph Huber 
26176fc51c9fSJoseph Huber   return *AA;
26186fc51c9fSJoseph Huber }
26196fc51c9fSJoseph Huber 
2620b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2621b2ad63d3SJoseph Huber   if (!containsOpenMP(M, OMPInModule))
2622b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2623b2ad63d3SJoseph Huber 
2624b2ad63d3SJoseph Huber   if (DisableOpenMPOptimizations)
2625b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2626b2ad63d3SJoseph Huber 
262703d7e61cSJoseph Huber   // Create internal copies of each function if this is a kernel Module.
262803d7e61cSJoseph Huber   DenseSet<const Function *> InternalizedFuncs;
262903d7e61cSJoseph Huber   if (!OMPInModule.getKernels().empty())
263003d7e61cSJoseph Huber     for (Function &F : M)
263103d7e61cSJoseph Huber       if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F))
263203d7e61cSJoseph Huber         if (Attributor::internalizeFunction(F, /* Force */ true))
263303d7e61cSJoseph Huber           InternalizedFuncs.insert(&F);
263403d7e61cSJoseph Huber 
263503d7e61cSJoseph Huber   // Look at every function definition in the Module that wasn't internalized.
2636b2ad63d3SJoseph Huber   SmallVector<Function *, 16> SCC;
263703d7e61cSJoseph Huber   for (Function &F : M)
263803d7e61cSJoseph Huber     if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
263903d7e61cSJoseph Huber       SCC.push_back(&F);
2640b2ad63d3SJoseph Huber 
2641b2ad63d3SJoseph Huber   if (SCC.empty())
2642b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2643b2ad63d3SJoseph Huber 
2644b2ad63d3SJoseph Huber   FunctionAnalysisManager &FAM =
2645b2ad63d3SJoseph Huber       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2646b2ad63d3SJoseph Huber 
2647b2ad63d3SJoseph Huber   AnalysisGetter AG(FAM);
2648b2ad63d3SJoseph Huber 
2649b2ad63d3SJoseph Huber   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
2650b2ad63d3SJoseph Huber     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
2651b2ad63d3SJoseph Huber   };
2652b2ad63d3SJoseph Huber 
2653b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
2654b2ad63d3SJoseph Huber   CallGraphUpdater CGUpdater;
2655b2ad63d3SJoseph Huber 
2656b2ad63d3SJoseph Huber   SetVector<Function *> Functions(SCC.begin(), SCC.end());
2657b2ad63d3SJoseph Huber   OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions,
2658b2ad63d3SJoseph Huber                                 OMPInModule.getKernels());
2659b2ad63d3SJoseph Huber 
266003d7e61cSJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false);
2661b2ad63d3SJoseph Huber 
2662b2ad63d3SJoseph Huber   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2663b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(true);
2664b2ad63d3SJoseph Huber   if (Changed)
2665b2ad63d3SJoseph Huber     return PreservedAnalyses::none();
2666b2ad63d3SJoseph Huber 
2667b2ad63d3SJoseph Huber   return PreservedAnalyses::all();
2668b2ad63d3SJoseph Huber }
2669b2ad63d3SJoseph Huber 
2670b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
26719548b74aSJohannes Doerfert                                           CGSCCAnalysisManager &AM,
2672b2ad63d3SJoseph Huber                                           LazyCallGraph &CG,
2673b2ad63d3SJoseph Huber                                           CGSCCUpdateResult &UR) {
26749548b74aSJohannes Doerfert   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
26759548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26769548b74aSJohannes Doerfert 
26779548b74aSJohannes Doerfert   if (DisableOpenMPOptimizations)
26789548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26799548b74aSJohannes Doerfert 
2680ee17263aSJohannes Doerfert   SmallVector<Function *, 16> SCC;
2681351d234dSRoman Lebedev   // If there are kernels in the module, we have to run on all SCC's.
2682351d234dSRoman Lebedev   bool SCCIsInteresting = !OMPInModule.getKernels().empty();
2683351d234dSRoman Lebedev   for (LazyCallGraph::Node &N : C) {
2684351d234dSRoman Lebedev     Function *Fn = &N.getFunction();
2685351d234dSRoman Lebedev     SCC.push_back(Fn);
26869548b74aSJohannes Doerfert 
2687351d234dSRoman Lebedev     // Do we already know that the SCC contains kernels,
2688351d234dSRoman Lebedev     // or that OpenMP functions are called from this SCC?
2689351d234dSRoman Lebedev     if (SCCIsInteresting)
2690351d234dSRoman Lebedev       continue;
2691351d234dSRoman Lebedev     // If not, let's check that.
2692351d234dSRoman Lebedev     SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
2693351d234dSRoman Lebedev   }
2694351d234dSRoman Lebedev 
2695351d234dSRoman Lebedev   if (!SCCIsInteresting || SCC.empty())
26969548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26979548b74aSJohannes Doerfert 
26984d4ea9acSHuber, Joseph   FunctionAnalysisManager &FAM =
26994d4ea9acSHuber, Joseph       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
27007cfd267cSsstefan1 
27017cfd267cSsstefan1   AnalysisGetter AG(FAM);
27027cfd267cSsstefan1 
27037cfd267cSsstefan1   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
27044d4ea9acSHuber, Joseph     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
27054d4ea9acSHuber, Joseph   };
27064d4ea9acSHuber, Joseph 
2707b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
27089548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27099548b74aSJohannes Doerfert   CGUpdater.initialize(CG, C, AM, UR);
27107cfd267cSsstefan1 
27117cfd267cSsstefan1   SetVector<Function *> Functions(SCC.begin(), SCC.end());
27127cfd267cSsstefan1   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
2713624d34afSJohannes Doerfert                                 /*CGSCC*/ Functions, OMPInModule.getKernels());
27147cfd267cSsstefan1 
27158b57ed09SJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, false);
2716b8235d2bSsstefan1 
2717b8235d2bSsstefan1   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2718b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(false);
2719694ded37SGiorgis Georgakoudis   if (Changed)
2720694ded37SGiorgis Georgakoudis     return PreservedAnalyses::none();
2721694ded37SGiorgis Georgakoudis 
27229548b74aSJohannes Doerfert   return PreservedAnalyses::all();
27239548b74aSJohannes Doerfert }
27248b57ed09SJoseph Huber 
27259548b74aSJohannes Doerfert namespace {
27269548b74aSJohannes Doerfert 
2727b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27289548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27299548b74aSJohannes Doerfert   OpenMPInModule OMPInModule;
27309548b74aSJohannes Doerfert   static char ID;
27319548b74aSJohannes Doerfert 
2732b2ad63d3SJoseph Huber   OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
2733b2ad63d3SJoseph Huber     initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
27349548b74aSJohannes Doerfert   }
27359548b74aSJohannes Doerfert 
27369548b74aSJohannes Doerfert   void getAnalysisUsage(AnalysisUsage &AU) const override {
27379548b74aSJohannes Doerfert     CallGraphSCCPass::getAnalysisUsage(AU);
27389548b74aSJohannes Doerfert   }
27399548b74aSJohannes Doerfert 
27409548b74aSJohannes Doerfert   bool doInitialization(CallGraph &CG) override {
27419548b74aSJohannes Doerfert     // Disable the pass if there is no OpenMP (runtime call) in the module.
27429548b74aSJohannes Doerfert     containsOpenMP(CG.getModule(), OMPInModule);
27439548b74aSJohannes Doerfert     return false;
27449548b74aSJohannes Doerfert   }
27459548b74aSJohannes Doerfert 
27469548b74aSJohannes Doerfert   bool runOnSCC(CallGraphSCC &CGSCC) override {
27479548b74aSJohannes Doerfert     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
27489548b74aSJohannes Doerfert       return false;
27499548b74aSJohannes Doerfert     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
27509548b74aSJohannes Doerfert       return false;
27519548b74aSJohannes Doerfert 
2752ee17263aSJohannes Doerfert     SmallVector<Function *, 16> SCC;
2753351d234dSRoman Lebedev     // If there are kernels in the module, we have to run on all SCC's.
2754351d234dSRoman Lebedev     bool SCCIsInteresting = !OMPInModule.getKernels().empty();
2755351d234dSRoman Lebedev     for (CallGraphNode *CGN : CGSCC) {
2756351d234dSRoman Lebedev       Function *Fn = CGN->getFunction();
2757351d234dSRoman Lebedev       if (!Fn || Fn->isDeclaration())
2758351d234dSRoman Lebedev         continue;
2759ee17263aSJohannes Doerfert       SCC.push_back(Fn);
27609548b74aSJohannes Doerfert 
2761351d234dSRoman Lebedev       // Do we already know that the SCC contains kernels,
2762351d234dSRoman Lebedev       // or that OpenMP functions are called from this SCC?
2763351d234dSRoman Lebedev       if (SCCIsInteresting)
2764351d234dSRoman Lebedev         continue;
2765351d234dSRoman Lebedev       // If not, let's check that.
2766351d234dSRoman Lebedev       SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
2767351d234dSRoman Lebedev     }
2768351d234dSRoman Lebedev 
2769351d234dSRoman Lebedev     if (!SCCIsInteresting || SCC.empty())
27709548b74aSJohannes Doerfert       return false;
27719548b74aSJohannes Doerfert 
27729548b74aSJohannes Doerfert     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
27739548b74aSJohannes Doerfert     CGUpdater.initialize(CG, CGSCC);
27749548b74aSJohannes Doerfert 
27754d4ea9acSHuber, Joseph     // Maintain a map of functions to avoid rebuilding the ORE
27764d4ea9acSHuber, Joseph     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
27774d4ea9acSHuber, Joseph     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
27784d4ea9acSHuber, Joseph       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
27794d4ea9acSHuber, Joseph       if (!ORE)
27804d4ea9acSHuber, Joseph         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
27814d4ea9acSHuber, Joseph       return *ORE;
27824d4ea9acSHuber, Joseph     };
27834d4ea9acSHuber, Joseph 
27847cfd267cSsstefan1     AnalysisGetter AG;
27857cfd267cSsstefan1     SetVector<Function *> Functions(SCC.begin(), SCC.end());
27867cfd267cSsstefan1     BumpPtrAllocator Allocator;
2787e8039ad4SJohannes Doerfert     OMPInformationCache InfoCache(
2788e8039ad4SJohannes Doerfert         *(Functions.back()->getParent()), AG, Allocator,
2789624d34afSJohannes Doerfert         /*CGSCC*/ Functions, OMPInModule.getKernels());
27907cfd267cSsstefan1 
27918b57ed09SJoseph Huber     Attributor A(Functions, InfoCache, CGUpdater, nullptr, false);
2792b8235d2bSsstefan1 
2793b8235d2bSsstefan1     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2794b2ad63d3SJoseph Huber     return OMPOpt.run(false);
27959548b74aSJohannes Doerfert   }
27969548b74aSJohannes Doerfert 
27979548b74aSJohannes Doerfert   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
27989548b74aSJohannes Doerfert };
27999548b74aSJohannes Doerfert 
28009548b74aSJohannes Doerfert } // end anonymous namespace
28019548b74aSJohannes Doerfert 
2802e8039ad4SJohannes Doerfert void OpenMPInModule::identifyKernels(Module &M) {
2803e8039ad4SJohannes Doerfert 
2804e8039ad4SJohannes Doerfert   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
2805e8039ad4SJohannes Doerfert   if (!MD)
2806e8039ad4SJohannes Doerfert     return;
2807e8039ad4SJohannes Doerfert 
2808e8039ad4SJohannes Doerfert   for (auto *Op : MD->operands()) {
2809e8039ad4SJohannes Doerfert     if (Op->getNumOperands() < 2)
2810e8039ad4SJohannes Doerfert       continue;
2811e8039ad4SJohannes Doerfert     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
2812e8039ad4SJohannes Doerfert     if (!KindID || KindID->getString() != "kernel")
2813e8039ad4SJohannes Doerfert       continue;
2814e8039ad4SJohannes Doerfert 
2815e8039ad4SJohannes Doerfert     Function *KernelFn =
2816e8039ad4SJohannes Doerfert         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
2817e8039ad4SJohannes Doerfert     if (!KernelFn)
2818e8039ad4SJohannes Doerfert       continue;
2819e8039ad4SJohannes Doerfert 
2820e8039ad4SJohannes Doerfert     ++NumOpenMPTargetRegionKernels;
2821e8039ad4SJohannes Doerfert 
2822e8039ad4SJohannes Doerfert     Kernels.insert(KernelFn);
2823e8039ad4SJohannes Doerfert   }
2824e8039ad4SJohannes Doerfert }
2825e8039ad4SJohannes Doerfert 
28269548b74aSJohannes Doerfert bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
28279548b74aSJohannes Doerfert   if (OMPInModule.isKnown())
28289548b74aSJohannes Doerfert     return OMPInModule;
2829dce6bc18SJohannes Doerfert 
2830351d234dSRoman Lebedev   auto RecordFunctionsContainingUsesOf = [&](Function *F) {
2831351d234dSRoman Lebedev     for (User *U : F->users())
2832351d234dSRoman Lebedev       if (auto *I = dyn_cast<Instruction>(U))
2833351d234dSRoman Lebedev         OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
2834351d234dSRoman Lebedev   };
2835351d234dSRoman Lebedev 
2836dce6bc18SJohannes Doerfert   // MSVC doesn't like long if-else chains for some reason and instead just
2837dce6bc18SJohannes Doerfert   // issues an error. Work around it..
2838dce6bc18SJohannes Doerfert   do {
28399548b74aSJohannes Doerfert #define OMP_RTL(_Enum, _Name, ...)                                             \
2840351d234dSRoman Lebedev   if (Function *F = M.getFunction(_Name)) {                                    \
2841351d234dSRoman Lebedev     RecordFunctionsContainingUsesOf(F);                                        \
2842dce6bc18SJohannes Doerfert     OMPInModule = true;                                                        \
2843dce6bc18SJohannes Doerfert   }
28449548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPKinds.def"
2845dce6bc18SJohannes Doerfert   } while (false);
2846e8039ad4SJohannes Doerfert 
2847e8039ad4SJohannes Doerfert   // Identify kernels once. TODO: We should split the OMPInformationCache into a
2848e8039ad4SJohannes Doerfert   // module and an SCC part. The kernel information, among other things, could
2849e8039ad4SJohannes Doerfert   // go into the module part.
2850e8039ad4SJohannes Doerfert   if (OMPInModule.isKnown() && OMPInModule) {
2851e8039ad4SJohannes Doerfert     OMPInModule.identifyKernels(M);
2852e8039ad4SJohannes Doerfert     return true;
2853e8039ad4SJohannes Doerfert   }
2854e8039ad4SJohannes Doerfert 
28559548b74aSJohannes Doerfert   return OMPInModule = false;
28569548b74aSJohannes Doerfert }
28579548b74aSJohannes Doerfert 
2858b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0;
28599548b74aSJohannes Doerfert 
2860b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28619548b74aSJohannes Doerfert                       "OpenMP specific optimizations", false, false)
28629548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
2863b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28649548b74aSJohannes Doerfert                     "OpenMP specific optimizations", false, false)
28659548b74aSJohannes Doerfert 
2866b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
2867b2ad63d3SJoseph Huber   return new OpenMPOptCGSCCLegacyPass();
2868b2ad63d3SJoseph Huber }
2869