19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12ca1560daSJoseph Huber // - Replacing globalized device memory with stack memory.
13ca1560daSJoseph Huber // - Replacing globalized device memory with shared memory.
149548b74aSJohannes Doerfert //
159548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
169548b74aSJohannes Doerfert 
179548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
189548b74aSJohannes Doerfert 
199548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
2018283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h"
219548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
229548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
239548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
244d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
253a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h"
269548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
27e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
2868abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h"
2968abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsAMDGPU.h"
3068abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsNVPTX.h"
316fc51c9fSJoseph Huber #include "llvm/IR/PatternMatch.h"
329548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
339548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
349548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
357cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
363a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h"
379548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
3897517055SGiorgis Georgakoudis #include "llvm/Transforms/Utils/CodeExtractor.h"
399548b74aSJohannes Doerfert 
406fc51c9fSJoseph Huber using namespace llvm::PatternMatch;
419548b74aSJohannes Doerfert using namespace llvm;
429548b74aSJohannes Doerfert using namespace omp;
439548b74aSJohannes Doerfert 
449548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
459548b74aSJohannes Doerfert 
469548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
479548b74aSJohannes Doerfert     "openmp-opt-disable", cl::ZeroOrMore,
489548b74aSJohannes Doerfert     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
499548b74aSJohannes Doerfert     cl::init(false));
509548b74aSJohannes Doerfert 
513a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging(
523a6bfcf2SGiorgis Georgakoudis     "openmp-opt-enable-merging", cl::ZeroOrMore,
533a6bfcf2SGiorgis Georgakoudis     cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
543a6bfcf2SGiorgis Georgakoudis     cl::init(false));
553a6bfcf2SGiorgis Georgakoudis 
560f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
570f426935Ssstefan1                                     cl::Hidden);
58e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
59e8039ad4SJohannes Doerfert                                         cl::init(false), cl::Hidden);
600f426935Ssstefan1 
61496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
62496f8e5bSHamilton Tobon Mosquera     "openmp-hide-memory-transfer-latency",
63496f8e5bSHamilton Tobon Mosquera     cl::desc("[WIP] Tries to hide the latency of host to device memory"
64496f8e5bSHamilton Tobon Mosquera              " transfers"),
65496f8e5bSHamilton Tobon Mosquera     cl::Hidden, cl::init(false));
66496f8e5bSHamilton Tobon Mosquera 
679548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
689548b74aSJohannes Doerfert           "Number of OpenMP runtime calls deduplicated");
6955eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
7055eb714aSRoman Lebedev           "Number of OpenMP parallel regions deleted");
719548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
729548b74aSJohannes Doerfert           "Number of OpenMP runtime functions identified");
739548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
749548b74aSJohannes Doerfert           "Number of OpenMP runtime function uses identified");
75e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
76e8039ad4SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) identified");
775b0581aeSJohannes Doerfert STATISTIC(
785b0581aeSJohannes Doerfert     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
795b0581aeSJohannes Doerfert     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
803a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged,
813a6bfcf2SGiorgis Georgakoudis           "Number of OpenMP parallel regions merged");
826fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory,
836fc51c9fSJoseph Huber           "Amount of memory pushed to shared memory");
849548b74aSJohannes Doerfert 
85263c4a3cSrathod-sahaab #if !defined(NDEBUG)
869548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
87a50c0b0dSMikael Holmen #endif
889548b74aSJohannes Doerfert 
899548b74aSJohannes Doerfert namespace {
909548b74aSJohannes Doerfert 
916fc51c9fSJoseph Huber enum class AddressSpace : unsigned {
926fc51c9fSJoseph Huber   Generic = 0,
936fc51c9fSJoseph Huber   Global = 1,
946fc51c9fSJoseph Huber   Shared = 3,
956fc51c9fSJoseph Huber   Constant = 4,
966fc51c9fSJoseph Huber   Local = 5,
976fc51c9fSJoseph Huber };
986fc51c9fSJoseph Huber 
996fc51c9fSJoseph Huber struct AAHeapToShared;
1006fc51c9fSJoseph Huber 
101b8235d2bSsstefan1 struct AAICVTracker;
102b8235d2bSsstefan1 
1037cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
1047cfd267cSsstefan1 /// Attributor runs.
1057cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
1067cfd267cSsstefan1   OMPInformationCache(Module &M, AnalysisGetter &AG,
107624d34afSJohannes Doerfert                       BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
108e8039ad4SJohannes Doerfert                       SmallPtrSetImpl<Kernel> &Kernels)
109624d34afSJohannes Doerfert       : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
110624d34afSJohannes Doerfert         Kernels(Kernels) {
111624d34afSJohannes Doerfert 
11261238d26Ssstefan1     OMPBuilder.initialize();
1139548b74aSJohannes Doerfert     initializeRuntimeFunctions();
1140f426935Ssstefan1     initializeInternalControlVars();
1159548b74aSJohannes Doerfert   }
1169548b74aSJohannes Doerfert 
1170f426935Ssstefan1   /// Generic information that describes an internal control variable.
1180f426935Ssstefan1   struct InternalControlVarInfo {
1190f426935Ssstefan1     /// The kind, as described by InternalControlVar enum.
1200f426935Ssstefan1     InternalControlVar Kind;
1210f426935Ssstefan1 
1220f426935Ssstefan1     /// The name of the ICV.
1230f426935Ssstefan1     StringRef Name;
1240f426935Ssstefan1 
1250f426935Ssstefan1     /// Environment variable associated with this ICV.
1260f426935Ssstefan1     StringRef EnvVarName;
1270f426935Ssstefan1 
1280f426935Ssstefan1     /// Initial value kind.
1290f426935Ssstefan1     ICVInitValue InitKind;
1300f426935Ssstefan1 
1310f426935Ssstefan1     /// Initial value.
1320f426935Ssstefan1     ConstantInt *InitValue;
1330f426935Ssstefan1 
1340f426935Ssstefan1     /// Setter RTL function associated with this ICV.
1350f426935Ssstefan1     RuntimeFunction Setter;
1360f426935Ssstefan1 
1370f426935Ssstefan1     /// Getter RTL function associated with this ICV.
1380f426935Ssstefan1     RuntimeFunction Getter;
1390f426935Ssstefan1 
1400f426935Ssstefan1     /// RTL Function corresponding to the override clause of this ICV
1410f426935Ssstefan1     RuntimeFunction Clause;
1420f426935Ssstefan1   };
1430f426935Ssstefan1 
1449548b74aSJohannes Doerfert   /// Generic information that describes a runtime function
1459548b74aSJohannes Doerfert   struct RuntimeFunctionInfo {
1468855fec3SJohannes Doerfert 
1479548b74aSJohannes Doerfert     /// The kind, as described by the RuntimeFunction enum.
1489548b74aSJohannes Doerfert     RuntimeFunction Kind;
1499548b74aSJohannes Doerfert 
1509548b74aSJohannes Doerfert     /// The name of the function.
1519548b74aSJohannes Doerfert     StringRef Name;
1529548b74aSJohannes Doerfert 
1539548b74aSJohannes Doerfert     /// Flag to indicate a variadic function.
1549548b74aSJohannes Doerfert     bool IsVarArg;
1559548b74aSJohannes Doerfert 
1569548b74aSJohannes Doerfert     /// The return type of the function.
1579548b74aSJohannes Doerfert     Type *ReturnType;
1589548b74aSJohannes Doerfert 
1599548b74aSJohannes Doerfert     /// The argument types of the function.
1609548b74aSJohannes Doerfert     SmallVector<Type *, 8> ArgumentTypes;
1619548b74aSJohannes Doerfert 
1629548b74aSJohannes Doerfert     /// The declaration if available.
163f09f4b26SJohannes Doerfert     Function *Declaration = nullptr;
1649548b74aSJohannes Doerfert 
1659548b74aSJohannes Doerfert     /// Uses of this runtime function per function containing the use.
1668855fec3SJohannes Doerfert     using UseVector = SmallVector<Use *, 16>;
1678855fec3SJohannes Doerfert 
168b8235d2bSsstefan1     /// Clear UsesMap for runtime function.
169b8235d2bSsstefan1     void clearUsesMap() { UsesMap.clear(); }
170b8235d2bSsstefan1 
17154bd3751SJohannes Doerfert     /// Boolean conversion that is true if the runtime function was found.
17254bd3751SJohannes Doerfert     operator bool() const { return Declaration; }
17354bd3751SJohannes Doerfert 
1748855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F.
1758855fec3SJohannes Doerfert     UseVector &getOrCreateUseVector(Function *F) {
176b8235d2bSsstefan1       std::shared_ptr<UseVector> &UV = UsesMap[F];
1778855fec3SJohannes Doerfert       if (!UV)
178b8235d2bSsstefan1         UV = std::make_shared<UseVector>();
1798855fec3SJohannes Doerfert       return *UV;
1808855fec3SJohannes Doerfert     }
1818855fec3SJohannes Doerfert 
1828855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F or `nullptr` if there are
1838855fec3SJohannes Doerfert     /// none.
1848855fec3SJohannes Doerfert     const UseVector *getUseVector(Function &F) const {
18595e57072SDavid Blaikie       auto I = UsesMap.find(&F);
18695e57072SDavid Blaikie       if (I != UsesMap.end())
18795e57072SDavid Blaikie         return I->second.get();
18895e57072SDavid Blaikie       return nullptr;
1898855fec3SJohannes Doerfert     }
1908855fec3SJohannes Doerfert 
1918855fec3SJohannes Doerfert     /// Return how many functions contain uses of this runtime function.
1928855fec3SJohannes Doerfert     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
1939548b74aSJohannes Doerfert 
1949548b74aSJohannes Doerfert     /// Return the number of arguments (or the minimal number for variadic
1959548b74aSJohannes Doerfert     /// functions).
1969548b74aSJohannes Doerfert     size_t getNumArgs() const { return ArgumentTypes.size(); }
1979548b74aSJohannes Doerfert 
1989548b74aSJohannes Doerfert     /// Run the callback \p CB on each use and forget the use if the result is
1999548b74aSJohannes Doerfert     /// true. The callback will be fed the function in which the use was
2009548b74aSJohannes Doerfert     /// encountered as second argument.
201624d34afSJohannes Doerfert     void foreachUse(SmallVectorImpl<Function *> &SCC,
202624d34afSJohannes Doerfert                     function_ref<bool(Use &, Function &)> CB) {
203624d34afSJohannes Doerfert       for (Function *F : SCC)
204624d34afSJohannes Doerfert         foreachUse(CB, F);
205e099c7b6Ssstefan1     }
206e099c7b6Ssstefan1 
207e099c7b6Ssstefan1     /// Run the callback \p CB on each use within the function \p F and forget
208e099c7b6Ssstefan1     /// the use if the result is true.
209624d34afSJohannes Doerfert     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
2108855fec3SJohannes Doerfert       SmallVector<unsigned, 8> ToBeDeleted;
2119548b74aSJohannes Doerfert       ToBeDeleted.clear();
212e099c7b6Ssstefan1 
2138855fec3SJohannes Doerfert       unsigned Idx = 0;
214624d34afSJohannes Doerfert       UseVector &UV = getOrCreateUseVector(F);
215e099c7b6Ssstefan1 
2168855fec3SJohannes Doerfert       for (Use *U : UV) {
217e099c7b6Ssstefan1         if (CB(*U, *F))
2188855fec3SJohannes Doerfert           ToBeDeleted.push_back(Idx);
2198855fec3SJohannes Doerfert         ++Idx;
2208855fec3SJohannes Doerfert       }
2218855fec3SJohannes Doerfert 
2228855fec3SJohannes Doerfert       // Remove the to-be-deleted indices in reverse order as prior
223b726c557SJohannes Doerfert       // modifications will not modify the smaller indices.
2248855fec3SJohannes Doerfert       while (!ToBeDeleted.empty()) {
2258855fec3SJohannes Doerfert         unsigned Idx = ToBeDeleted.pop_back_val();
2268855fec3SJohannes Doerfert         UV[Idx] = UV.back();
2278855fec3SJohannes Doerfert         UV.pop_back();
2289548b74aSJohannes Doerfert       }
2299548b74aSJohannes Doerfert     }
2308855fec3SJohannes Doerfert 
2318855fec3SJohannes Doerfert   private:
2328855fec3SJohannes Doerfert     /// Map from functions to all uses of this runtime function contained in
2338855fec3SJohannes Doerfert     /// them.
234b8235d2bSsstefan1     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
2359548b74aSJohannes Doerfert   };
2369548b74aSJohannes Doerfert 
2377cfd267cSsstefan1   /// An OpenMP-IR-Builder instance
2387cfd267cSsstefan1   OpenMPIRBuilder OMPBuilder;
2397cfd267cSsstefan1 
2407cfd267cSsstefan1   /// Map from runtime function kind to the runtime function description.
2417cfd267cSsstefan1   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
2427cfd267cSsstefan1                   RuntimeFunction::OMPRTL___last>
2437cfd267cSsstefan1       RFIs;
2447cfd267cSsstefan1 
2450f426935Ssstefan1   /// Map from ICV kind to the ICV description.
2460f426935Ssstefan1   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
2470f426935Ssstefan1                   InternalControlVar::ICV___last>
2480f426935Ssstefan1       ICVs;
2490f426935Ssstefan1 
2500f426935Ssstefan1   /// Helper to initialize all internal control variable information for those
2510f426935Ssstefan1   /// defined in OMPKinds.def.
2520f426935Ssstefan1   void initializeInternalControlVars() {
2530f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL)                                                 \
2540f426935Ssstefan1   {                                                                            \
2550f426935Ssstefan1     auto &ICV = ICVs[_Name];                                                   \
2560f426935Ssstefan1     ICV.Setter = RTL;                                                          \
2570f426935Ssstefan1   }
2580f426935Ssstefan1 #define ICV_RT_GET(Name, RTL)                                                  \
2590f426935Ssstefan1   {                                                                            \
2600f426935Ssstefan1     auto &ICV = ICVs[Name];                                                    \
2610f426935Ssstefan1     ICV.Getter = RTL;                                                          \
2620f426935Ssstefan1   }
2630f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
2640f426935Ssstefan1   {                                                                            \
2650f426935Ssstefan1     auto &ICV = ICVs[Enum];                                                    \
2660f426935Ssstefan1     ICV.Name = _Name;                                                          \
2670f426935Ssstefan1     ICV.Kind = Enum;                                                           \
2680f426935Ssstefan1     ICV.InitKind = Init;                                                       \
2690f426935Ssstefan1     ICV.EnvVarName = _EnvVarName;                                              \
2700f426935Ssstefan1     switch (ICV.InitKind) {                                                    \
271951e43f3Ssstefan1     case ICV_IMPLEMENTATION_DEFINED:                                           \
2720f426935Ssstefan1       ICV.InitValue = nullptr;                                                 \
2730f426935Ssstefan1       break;                                                                   \
274951e43f3Ssstefan1     case ICV_ZERO:                                                             \
2756aab27baSsstefan1       ICV.InitValue = ConstantInt::get(                                        \
2766aab27baSsstefan1           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
2770f426935Ssstefan1       break;                                                                   \
278951e43f3Ssstefan1     case ICV_FALSE:                                                            \
2796aab27baSsstefan1       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
2800f426935Ssstefan1       break;                                                                   \
281951e43f3Ssstefan1     case ICV_LAST:                                                             \
2820f426935Ssstefan1       break;                                                                   \
2830f426935Ssstefan1     }                                                                          \
2840f426935Ssstefan1   }
2850f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2860f426935Ssstefan1   }
2870f426935Ssstefan1 
2887cfd267cSsstefan1   /// Returns true if the function declaration \p F matches the runtime
2897cfd267cSsstefan1   /// function types, that is, return type \p RTFRetType, and argument types
2907cfd267cSsstefan1   /// \p RTFArgTypes.
2917cfd267cSsstefan1   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
2927cfd267cSsstefan1                                   SmallVector<Type *, 8> &RTFArgTypes) {
2937cfd267cSsstefan1     // TODO: We should output information to the user (under debug output
2947cfd267cSsstefan1     //       and via remarks).
2957cfd267cSsstefan1 
2967cfd267cSsstefan1     if (!F)
2977cfd267cSsstefan1       return false;
2987cfd267cSsstefan1     if (F->getReturnType() != RTFRetType)
2997cfd267cSsstefan1       return false;
3007cfd267cSsstefan1     if (F->arg_size() != RTFArgTypes.size())
3017cfd267cSsstefan1       return false;
3027cfd267cSsstefan1 
3037cfd267cSsstefan1     auto RTFTyIt = RTFArgTypes.begin();
3047cfd267cSsstefan1     for (Argument &Arg : F->args()) {
3057cfd267cSsstefan1       if (Arg.getType() != *RTFTyIt)
3067cfd267cSsstefan1         return false;
3077cfd267cSsstefan1 
3087cfd267cSsstefan1       ++RTFTyIt;
3097cfd267cSsstefan1     }
3107cfd267cSsstefan1 
3117cfd267cSsstefan1     return true;
3127cfd267cSsstefan1   }
3137cfd267cSsstefan1 
314b726c557SJohannes Doerfert   // Helper to collect all uses of the declaration in the UsesMap.
315b8235d2bSsstefan1   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
3167cfd267cSsstefan1     unsigned NumUses = 0;
3177cfd267cSsstefan1     if (!RFI.Declaration)
3187cfd267cSsstefan1       return NumUses;
3197cfd267cSsstefan1     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
3207cfd267cSsstefan1 
321b8235d2bSsstefan1     if (CollectStats) {
3227cfd267cSsstefan1       NumOpenMPRuntimeFunctionsIdentified += 1;
3237cfd267cSsstefan1       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
324b8235d2bSsstefan1     }
3257cfd267cSsstefan1 
3267cfd267cSsstefan1     // TODO: We directly convert uses into proper calls and unknown uses.
3277cfd267cSsstefan1     for (Use &U : RFI.Declaration->uses()) {
3287cfd267cSsstefan1       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
3297cfd267cSsstefan1         if (ModuleSlice.count(UserI->getFunction())) {
3307cfd267cSsstefan1           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
3317cfd267cSsstefan1           ++NumUses;
3327cfd267cSsstefan1         }
3337cfd267cSsstefan1       } else {
3347cfd267cSsstefan1         RFI.getOrCreateUseVector(nullptr).push_back(&U);
3357cfd267cSsstefan1         ++NumUses;
3367cfd267cSsstefan1       }
3377cfd267cSsstefan1     }
3387cfd267cSsstefan1     return NumUses;
339b8235d2bSsstefan1   }
3407cfd267cSsstefan1 
34197517055SGiorgis Georgakoudis   // Helper function to recollect uses of a runtime function.
34297517055SGiorgis Georgakoudis   void recollectUsesForFunction(RuntimeFunction RTF) {
34397517055SGiorgis Georgakoudis     auto &RFI = RFIs[RTF];
344b8235d2bSsstefan1     RFI.clearUsesMap();
345b8235d2bSsstefan1     collectUses(RFI, /*CollectStats*/ false);
346b8235d2bSsstefan1   }
34797517055SGiorgis Georgakoudis 
34897517055SGiorgis Georgakoudis   // Helper function to recollect uses of all runtime functions.
34997517055SGiorgis Georgakoudis   void recollectUses() {
35097517055SGiorgis Georgakoudis     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
35197517055SGiorgis Georgakoudis       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
352b8235d2bSsstefan1   }
353b8235d2bSsstefan1 
354b8235d2bSsstefan1   /// Helper to initialize all runtime function information for those defined
355b8235d2bSsstefan1   /// in OpenMPKinds.def.
356b8235d2bSsstefan1   void initializeRuntimeFunctions() {
3577cfd267cSsstefan1     Module &M = *((*ModuleSlice.begin())->getParent());
3587cfd267cSsstefan1 
3596aab27baSsstefan1     // Helper macros for handling __VA_ARGS__ in OMP_RTL
3606aab27baSsstefan1 #define OMP_TYPE(VarName, ...)                                                 \
3616aab27baSsstefan1   Type *VarName = OMPBuilder.VarName;                                          \
3626aab27baSsstefan1   (void)VarName;
3636aab27baSsstefan1 
3646aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
3656aab27baSsstefan1   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
3666aab27baSsstefan1   (void)VarName##Ty;                                                           \
3676aab27baSsstefan1   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
3686aab27baSsstefan1   (void)VarName##PtrTy;
3696aab27baSsstefan1 
3706aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
3716aab27baSsstefan1   FunctionType *VarName = OMPBuilder.VarName;                                  \
3726aab27baSsstefan1   (void)VarName;                                                               \
3736aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3746aab27baSsstefan1   (void)VarName##Ptr;
3756aab27baSsstefan1 
3766aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
3776aab27baSsstefan1   StructType *VarName = OMPBuilder.VarName;                                    \
3786aab27baSsstefan1   (void)VarName;                                                               \
3796aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3806aab27baSsstefan1   (void)VarName##Ptr;
3816aab27baSsstefan1 
3827cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
3837cfd267cSsstefan1   {                                                                            \
3847cfd267cSsstefan1     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
3857cfd267cSsstefan1     Function *F = M.getFunction(_Name);                                        \
3866aab27baSsstefan1     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
3877cfd267cSsstefan1       auto &RFI = RFIs[_Enum];                                                 \
3887cfd267cSsstefan1       RFI.Kind = _Enum;                                                        \
3897cfd267cSsstefan1       RFI.Name = _Name;                                                        \
3907cfd267cSsstefan1       RFI.IsVarArg = _IsVarArg;                                                \
3916aab27baSsstefan1       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
3927cfd267cSsstefan1       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
3937cfd267cSsstefan1       RFI.Declaration = F;                                                     \
394b8235d2bSsstefan1       unsigned NumUses = collectUses(RFI);                                     \
3957cfd267cSsstefan1       (void)NumUses;                                                           \
3967cfd267cSsstefan1       LLVM_DEBUG({                                                             \
3977cfd267cSsstefan1         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
3987cfd267cSsstefan1                << " found\n";                                                  \
3997cfd267cSsstefan1         if (RFI.Declaration)                                                   \
4007cfd267cSsstefan1           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
4017cfd267cSsstefan1                  << RFI.getNumFunctionsWithUses()                              \
4027cfd267cSsstefan1                  << " different functions.\n";                                 \
4037cfd267cSsstefan1       });                                                                      \
4047cfd267cSsstefan1     }                                                                          \
4057cfd267cSsstefan1   }
4067cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
4077cfd267cSsstefan1 
4087cfd267cSsstefan1     // TODO: We should attach the attributes defined in OMPKinds.def.
4097cfd267cSsstefan1   }
410e8039ad4SJohannes Doerfert 
411e8039ad4SJohannes Doerfert   /// Collection of known kernels (\see Kernel) in the module.
412e8039ad4SJohannes Doerfert   SmallPtrSetImpl<Kernel> &Kernels;
4137cfd267cSsstefan1 };
4147cfd267cSsstefan1 
4158931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload
4168931add6SHamilton Tobon Mosquera /// array, to a vector in memory.
4178931add6SHamilton Tobon Mosquera struct OffloadArray {
4188931add6SHamilton Tobon Mosquera   /// Physical array (in the IR).
4198931add6SHamilton Tobon Mosquera   AllocaInst *Array = nullptr;
4208931add6SHamilton Tobon Mosquera   /// Mapped values.
4218931add6SHamilton Tobon Mosquera   SmallVector<Value *, 8> StoredValues;
4228931add6SHamilton Tobon Mosquera   /// Last stores made in the offload array.
4238931add6SHamilton Tobon Mosquera   SmallVector<StoreInst *, 8> LastAccesses;
4248931add6SHamilton Tobon Mosquera 
4258931add6SHamilton Tobon Mosquera   OffloadArray() = default;
4268931add6SHamilton Tobon Mosquera 
4278931add6SHamilton Tobon Mosquera   /// Initializes the OffloadArray with the values stored in \p Array before
4288931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached. Returns false if the initialization
4298931add6SHamilton Tobon Mosquera   /// fails.
4308931add6SHamilton Tobon Mosquera   /// This MUST be used immediately after the construction of the object.
4318931add6SHamilton Tobon Mosquera   bool initialize(AllocaInst &Array, Instruction &Before) {
4328931add6SHamilton Tobon Mosquera     if (!Array.getAllocatedType()->isArrayTy())
4338931add6SHamilton Tobon Mosquera       return false;
4348931add6SHamilton Tobon Mosquera 
4358931add6SHamilton Tobon Mosquera     if (!getValues(Array, Before))
4368931add6SHamilton Tobon Mosquera       return false;
4378931add6SHamilton Tobon Mosquera 
4388931add6SHamilton Tobon Mosquera     this->Array = &Array;
4398931add6SHamilton Tobon Mosquera     return true;
4408931add6SHamilton Tobon Mosquera   }
4418931add6SHamilton Tobon Mosquera 
442da8bec47SJoseph Huber   static const unsigned DeviceIDArgNum = 1;
443da8bec47SJoseph Huber   static const unsigned BasePtrsArgNum = 3;
444da8bec47SJoseph Huber   static const unsigned PtrsArgNum = 4;
445da8bec47SJoseph Huber   static const unsigned SizesArgNum = 5;
4461d3d9b9cSHamilton Tobon Mosquera 
4478931add6SHamilton Tobon Mosquera private:
4488931add6SHamilton Tobon Mosquera   /// Traverses the BasicBlock where \p Array is, collecting the stores made to
4498931add6SHamilton Tobon Mosquera   /// \p Array, leaving StoredValues with the values stored before the
4508931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached.
4518931add6SHamilton Tobon Mosquera   bool getValues(AllocaInst &Array, Instruction &Before) {
4528931add6SHamilton Tobon Mosquera     // Initialize container.
453d08d490aSJohannes Doerfert     const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
4548931add6SHamilton Tobon Mosquera     StoredValues.assign(NumValues, nullptr);
4558931add6SHamilton Tobon Mosquera     LastAccesses.assign(NumValues, nullptr);
4568931add6SHamilton Tobon Mosquera 
4578931add6SHamilton Tobon Mosquera     // TODO: This assumes the instruction \p Before is in the same
4588931add6SHamilton Tobon Mosquera     //  BasicBlock as Array. Make it general, for any control flow graph.
4598931add6SHamilton Tobon Mosquera     BasicBlock *BB = Array.getParent();
4608931add6SHamilton Tobon Mosquera     if (BB != Before.getParent())
4618931add6SHamilton Tobon Mosquera       return false;
4628931add6SHamilton Tobon Mosquera 
4638931add6SHamilton Tobon Mosquera     const DataLayout &DL = Array.getModule()->getDataLayout();
4648931add6SHamilton Tobon Mosquera     const unsigned int PointerSize = DL.getPointerSize();
4658931add6SHamilton Tobon Mosquera 
4668931add6SHamilton Tobon Mosquera     for (Instruction &I : *BB) {
4678931add6SHamilton Tobon Mosquera       if (&I == &Before)
4688931add6SHamilton Tobon Mosquera         break;
4698931add6SHamilton Tobon Mosquera 
4708931add6SHamilton Tobon Mosquera       if (!isa<StoreInst>(&I))
4718931add6SHamilton Tobon Mosquera         continue;
4728931add6SHamilton Tobon Mosquera 
4738931add6SHamilton Tobon Mosquera       auto *S = cast<StoreInst>(&I);
4748931add6SHamilton Tobon Mosquera       int64_t Offset = -1;
475d08d490aSJohannes Doerfert       auto *Dst =
476d08d490aSJohannes Doerfert           GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
4778931add6SHamilton Tobon Mosquera       if (Dst == &Array) {
4788931add6SHamilton Tobon Mosquera         int64_t Idx = Offset / PointerSize;
4798931add6SHamilton Tobon Mosquera         StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
4808931add6SHamilton Tobon Mosquera         LastAccesses[Idx] = S;
4818931add6SHamilton Tobon Mosquera       }
4828931add6SHamilton Tobon Mosquera     }
4838931add6SHamilton Tobon Mosquera 
4848931add6SHamilton Tobon Mosquera     return isFilled();
4858931add6SHamilton Tobon Mosquera   }
4868931add6SHamilton Tobon Mosquera 
4878931add6SHamilton Tobon Mosquera   /// Returns true if all values in StoredValues and
4888931add6SHamilton Tobon Mosquera   /// LastAccesses are not nullptrs.
4898931add6SHamilton Tobon Mosquera   bool isFilled() {
4908931add6SHamilton Tobon Mosquera     const unsigned NumValues = StoredValues.size();
4918931add6SHamilton Tobon Mosquera     for (unsigned I = 0; I < NumValues; ++I) {
4928931add6SHamilton Tobon Mosquera       if (!StoredValues[I] || !LastAccesses[I])
4938931add6SHamilton Tobon Mosquera         return false;
4948931add6SHamilton Tobon Mosquera     }
4958931add6SHamilton Tobon Mosquera 
4968931add6SHamilton Tobon Mosquera     return true;
4978931add6SHamilton Tobon Mosquera   }
4988931add6SHamilton Tobon Mosquera };
4998931add6SHamilton Tobon Mosquera 
5007cfd267cSsstefan1 struct OpenMPOpt {
5017cfd267cSsstefan1 
5027cfd267cSsstefan1   using OptimizationRemarkGetter =
5037cfd267cSsstefan1       function_ref<OptimizationRemarkEmitter &(Function *)>;
5047cfd267cSsstefan1 
5057cfd267cSsstefan1   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
5067cfd267cSsstefan1             OptimizationRemarkGetter OREGetter,
507b8235d2bSsstefan1             OMPInformationCache &OMPInfoCache, Attributor &A)
50877b79d79SMehdi Amini       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
509b8235d2bSsstefan1         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
5107cfd267cSsstefan1 
511a2281419SJoseph Huber   /// Check if any remarks are enabled for openmp-opt
512a2281419SJoseph Huber   bool remarksEnabled() {
513a2281419SJoseph Huber     auto &Ctx = M.getContext();
514a2281419SJoseph Huber     return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
515a2281419SJoseph Huber   }
516a2281419SJoseph Huber 
5179548b74aSJohannes Doerfert   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
518b2ad63d3SJoseph Huber   bool run(bool IsModulePass) {
51954bd3751SJohannes Doerfert     if (SCC.empty())
52054bd3751SJohannes Doerfert       return false;
52154bd3751SJohannes Doerfert 
5229548b74aSJohannes Doerfert     bool Changed = false;
5239548b74aSJohannes Doerfert 
5249548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
52577b79d79SMehdi Amini                       << " functions in a slice with "
52677b79d79SMehdi Amini                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
5279548b74aSJohannes Doerfert 
528b2ad63d3SJoseph Huber     if (IsModulePass) {
52918283125SJoseph Huber       Changed |= runAttributor();
53018283125SJoseph Huber 
5316fc51c9fSJoseph Huber       // Recollect uses, in case Attributor deleted any.
5326fc51c9fSJoseph Huber       OMPInfoCache.recollectUses();
5336fc51c9fSJoseph Huber 
534b2ad63d3SJoseph Huber       if (remarksEnabled())
535b2ad63d3SJoseph Huber         analysisGlobalization();
536b2ad63d3SJoseph Huber     } else {
537e8039ad4SJohannes Doerfert       if (PrintICVValues)
538e8039ad4SJohannes Doerfert         printICVs();
539e8039ad4SJohannes Doerfert       if (PrintOpenMPKernels)
540e8039ad4SJohannes Doerfert         printKernels();
541e8039ad4SJohannes Doerfert 
5425b0581aeSJohannes Doerfert       Changed |= rewriteDeviceCodeStateMachine();
5435b0581aeSJohannes Doerfert 
544e8039ad4SJohannes Doerfert       Changed |= runAttributor();
545e8039ad4SJohannes Doerfert 
546e8039ad4SJohannes Doerfert       // Recollect uses, in case Attributor deleted any.
547e8039ad4SJohannes Doerfert       OMPInfoCache.recollectUses();
548e8039ad4SJohannes Doerfert 
549e8039ad4SJohannes Doerfert       Changed |= deleteParallelRegions();
550496f8e5bSHamilton Tobon Mosquera       if (HideMemoryTransferLatency)
551496f8e5bSHamilton Tobon Mosquera         Changed |= hideMemTransfersLatency();
5523a6bfcf2SGiorgis Georgakoudis       Changed |= deduplicateRuntimeCalls();
5533a6bfcf2SGiorgis Georgakoudis       if (EnableParallelRegionMerging) {
5543a6bfcf2SGiorgis Georgakoudis         if (mergeParallelRegions()) {
5553a6bfcf2SGiorgis Georgakoudis           deduplicateRuntimeCalls();
5563a6bfcf2SGiorgis Georgakoudis           Changed = true;
5573a6bfcf2SGiorgis Georgakoudis         }
5583a6bfcf2SGiorgis Georgakoudis       }
559b2ad63d3SJoseph Huber     }
560e8039ad4SJohannes Doerfert 
561e8039ad4SJohannes Doerfert     return Changed;
562e8039ad4SJohannes Doerfert   }
563e8039ad4SJohannes Doerfert 
5640f426935Ssstefan1   /// Print initial ICV values for testing.
5650f426935Ssstefan1   /// FIXME: This should be done from the Attributor once it is added.
566e8039ad4SJohannes Doerfert   void printICVs() const {
567cb9cfa0dSsstefan1     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
568cb9cfa0dSsstefan1                                  ICV_proc_bind};
5690f426935Ssstefan1 
5700f426935Ssstefan1     for (Function *F : OMPInfoCache.ModuleSlice) {
5710f426935Ssstefan1       for (auto ICV : ICVs) {
5720f426935Ssstefan1         auto ICVInfo = OMPInfoCache.ICVs[ICV];
5732db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5742db182ffSJoseph Huber           return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
5750f426935Ssstefan1                      << " Value: "
5760f426935Ssstefan1                      << (ICVInfo.InitValue
57761cdaf66SSimon Pilgrim                              ? toString(ICVInfo.InitValue->getValue(), 10, true)
5780f426935Ssstefan1                              : "IMPLEMENTATION_DEFINED");
5790f426935Ssstefan1         };
5800f426935Ssstefan1 
5812db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
5820f426935Ssstefan1       }
5830f426935Ssstefan1     }
5840f426935Ssstefan1   }
5850f426935Ssstefan1 
586e8039ad4SJohannes Doerfert   /// Print OpenMP GPU kernels for testing.
587e8039ad4SJohannes Doerfert   void printKernels() const {
588e8039ad4SJohannes Doerfert     for (Function *F : SCC) {
589e8039ad4SJohannes Doerfert       if (!OMPInfoCache.Kernels.count(F))
590e8039ad4SJohannes Doerfert         continue;
591b8235d2bSsstefan1 
5922db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5932db182ffSJoseph Huber         return ORA << "OpenMP GPU kernel "
594e8039ad4SJohannes Doerfert                    << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
595e8039ad4SJohannes Doerfert       };
596b8235d2bSsstefan1 
5972db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
598e8039ad4SJohannes Doerfert     }
5999548b74aSJohannes Doerfert   }
6009548b74aSJohannes Doerfert 
6017cfd267cSsstefan1   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
6027cfd267cSsstefan1   /// given it has to be the callee or a nullptr is returned.
6037cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6047cfd267cSsstefan1       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6057cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(U.getUser());
6067cfd267cSsstefan1     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
6077cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6087cfd267cSsstefan1       return CI;
6097cfd267cSsstefan1     return nullptr;
6107cfd267cSsstefan1   }
6117cfd267cSsstefan1 
6127cfd267cSsstefan1   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
6137cfd267cSsstefan1   /// the callee or a nullptr is returned.
6147cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6157cfd267cSsstefan1       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6167cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(&V);
6177cfd267cSsstefan1     if (CI && !CI->hasOperandBundles() &&
6187cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6197cfd267cSsstefan1       return CI;
6207cfd267cSsstefan1     return nullptr;
6217cfd267cSsstefan1   }
6227cfd267cSsstefan1 
6239548b74aSJohannes Doerfert private:
6243a6bfcf2SGiorgis Georgakoudis   /// Merge parallel regions when it is safe.
6253a6bfcf2SGiorgis Georgakoudis   bool mergeParallelRegions() {
6263a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackCalleeOperand = 2;
6273a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackFirstArgOperand = 3;
6283a6bfcf2SGiorgis Georgakoudis     using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6293a6bfcf2SGiorgis Georgakoudis 
6303a6bfcf2SGiorgis Georgakoudis     // Check if there are any __kmpc_fork_call calls to merge.
6313a6bfcf2SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo &RFI =
6323a6bfcf2SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
6333a6bfcf2SGiorgis Georgakoudis 
6343a6bfcf2SGiorgis Georgakoudis     if (!RFI.Declaration)
6353a6bfcf2SGiorgis Georgakoudis       return false;
6363a6bfcf2SGiorgis Georgakoudis 
63797517055SGiorgis Georgakoudis     // Unmergable calls that prevent merging a parallel region.
63897517055SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
63997517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
64097517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
64197517055SGiorgis Georgakoudis     };
6423a6bfcf2SGiorgis Georgakoudis 
6433a6bfcf2SGiorgis Georgakoudis     bool Changed = false;
6443a6bfcf2SGiorgis Georgakoudis     LoopInfo *LI = nullptr;
6453a6bfcf2SGiorgis Georgakoudis     DominatorTree *DT = nullptr;
6463a6bfcf2SGiorgis Georgakoudis 
6473a6bfcf2SGiorgis Georgakoudis     SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
6483a6bfcf2SGiorgis Georgakoudis 
6493a6bfcf2SGiorgis Georgakoudis     BasicBlock *StartBB = nullptr, *EndBB = nullptr;
6503a6bfcf2SGiorgis Georgakoudis     auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
6513a6bfcf2SGiorgis Georgakoudis                          BasicBlock &ContinuationIP) {
6523a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGStartBB = CodeGenIP.getBlock();
6533a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGEndBB =
6543a6bfcf2SGiorgis Georgakoudis           SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
6553a6bfcf2SGiorgis Georgakoudis       assert(StartBB != nullptr && "StartBB should not be null");
6563a6bfcf2SGiorgis Georgakoudis       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
6573a6bfcf2SGiorgis Georgakoudis       assert(EndBB != nullptr && "EndBB should not be null");
6583a6bfcf2SGiorgis Georgakoudis       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
6593a6bfcf2SGiorgis Georgakoudis     };
6603a6bfcf2SGiorgis Georgakoudis 
661240dd924SAlex Zinenko     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
662240dd924SAlex Zinenko                       Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
663240dd924SAlex Zinenko       ReplacementValue = &Inner;
6643a6bfcf2SGiorgis Georgakoudis       return CodeGenIP;
6653a6bfcf2SGiorgis Georgakoudis     };
6663a6bfcf2SGiorgis Georgakoudis 
6673a6bfcf2SGiorgis Georgakoudis     auto FiniCB = [&](InsertPointTy CodeGenIP) {};
6683a6bfcf2SGiorgis Georgakoudis 
66997517055SGiorgis Georgakoudis     /// Create a sequential execution region within a merged parallel region,
67097517055SGiorgis Georgakoudis     /// encapsulated in a master construct with a barrier for synchronization.
67197517055SGiorgis Georgakoudis     auto CreateSequentialRegion = [&](Function *OuterFn,
67297517055SGiorgis Georgakoudis                                       BasicBlock *OuterPredBB,
67397517055SGiorgis Georgakoudis                                       Instruction *SeqStartI,
67497517055SGiorgis Georgakoudis                                       Instruction *SeqEndI) {
67597517055SGiorgis Georgakoudis       // Isolate the instructions of the sequential region to a separate
67697517055SGiorgis Georgakoudis       // block.
67797517055SGiorgis Georgakoudis       BasicBlock *ParentBB = SeqStartI->getParent();
67897517055SGiorgis Georgakoudis       BasicBlock *SeqEndBB =
67997517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
68097517055SGiorgis Georgakoudis       BasicBlock *SeqAfterBB =
68197517055SGiorgis Georgakoudis           SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
68297517055SGiorgis Georgakoudis       BasicBlock *SeqStartBB =
68397517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
68497517055SGiorgis Georgakoudis 
68597517055SGiorgis Georgakoudis       assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
68697517055SGiorgis Georgakoudis              "Expected a different CFG");
68797517055SGiorgis Georgakoudis       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
68897517055SGiorgis Georgakoudis       ParentBB->getTerminator()->eraseFromParent();
68997517055SGiorgis Georgakoudis 
69097517055SGiorgis Georgakoudis       auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
69197517055SGiorgis Georgakoudis                            BasicBlock &ContinuationIP) {
69297517055SGiorgis Georgakoudis         BasicBlock *CGStartBB = CodeGenIP.getBlock();
69397517055SGiorgis Georgakoudis         BasicBlock *CGEndBB =
69497517055SGiorgis Georgakoudis             SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
69597517055SGiorgis Georgakoudis         assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
69697517055SGiorgis Georgakoudis         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
69797517055SGiorgis Georgakoudis         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
69897517055SGiorgis Georgakoudis         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
69997517055SGiorgis Georgakoudis       };
70097517055SGiorgis Georgakoudis       auto FiniCB = [&](InsertPointTy CodeGenIP) {};
70197517055SGiorgis Georgakoudis 
70297517055SGiorgis Georgakoudis       // Find outputs from the sequential region to outside users and
70397517055SGiorgis Georgakoudis       // broadcast their values to them.
70497517055SGiorgis Georgakoudis       for (Instruction &I : *SeqStartBB) {
70597517055SGiorgis Georgakoudis         SmallPtrSet<Instruction *, 4> OutsideUsers;
70697517055SGiorgis Georgakoudis         for (User *Usr : I.users()) {
70797517055SGiorgis Georgakoudis           Instruction &UsrI = *cast<Instruction>(Usr);
70897517055SGiorgis Georgakoudis           // Ignore outputs to LT intrinsics, code extraction for the merged
70997517055SGiorgis Georgakoudis           // parallel region will fix them.
71097517055SGiorgis Georgakoudis           if (UsrI.isLifetimeStartOrEnd())
71197517055SGiorgis Georgakoudis             continue;
71297517055SGiorgis Georgakoudis 
71397517055SGiorgis Georgakoudis           if (UsrI.getParent() != SeqStartBB)
71497517055SGiorgis Georgakoudis             OutsideUsers.insert(&UsrI);
71597517055SGiorgis Georgakoudis         }
71697517055SGiorgis Georgakoudis 
71797517055SGiorgis Georgakoudis         if (OutsideUsers.empty())
71897517055SGiorgis Georgakoudis           continue;
71997517055SGiorgis Georgakoudis 
72097517055SGiorgis Georgakoudis         // Emit an alloca in the outer region to store the broadcasted
72197517055SGiorgis Georgakoudis         // value.
72297517055SGiorgis Georgakoudis         const DataLayout &DL = M.getDataLayout();
72397517055SGiorgis Georgakoudis         AllocaInst *AllocaI = new AllocaInst(
72497517055SGiorgis Georgakoudis             I.getType(), DL.getAllocaAddrSpace(), nullptr,
72597517055SGiorgis Georgakoudis             I.getName() + ".seq.output.alloc", &OuterFn->front().front());
72697517055SGiorgis Georgakoudis 
72797517055SGiorgis Georgakoudis         // Emit a store instruction in the sequential BB to update the
72897517055SGiorgis Georgakoudis         // value.
72997517055SGiorgis Georgakoudis         new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
73097517055SGiorgis Georgakoudis 
73197517055SGiorgis Georgakoudis         // Emit a load instruction and replace the use of the output value
73297517055SGiorgis Georgakoudis         // with it.
73397517055SGiorgis Georgakoudis         for (Instruction *UsrI : OutsideUsers) {
7345b70c12fSJohannes Doerfert           LoadInst *LoadI = new LoadInst(
7355b70c12fSJohannes Doerfert               I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
73697517055SGiorgis Georgakoudis           UsrI->replaceUsesOfWith(&I, LoadI);
73797517055SGiorgis Georgakoudis         }
73897517055SGiorgis Georgakoudis       }
73997517055SGiorgis Georgakoudis 
74097517055SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(
74197517055SGiorgis Georgakoudis           InsertPointTy(ParentBB, ParentBB->end()), DL);
74297517055SGiorgis Georgakoudis       InsertPointTy SeqAfterIP =
74397517055SGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
74497517055SGiorgis Georgakoudis 
74597517055SGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
74697517055SGiorgis Georgakoudis 
74797517055SGiorgis Georgakoudis       BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
74897517055SGiorgis Georgakoudis 
74997517055SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
75097517055SGiorgis Georgakoudis                         << "\n");
75197517055SGiorgis Georgakoudis     };
75297517055SGiorgis Georgakoudis 
7533a6bfcf2SGiorgis Georgakoudis     // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
7543a6bfcf2SGiorgis Georgakoudis     // contained in BB and only separated by instructions that can be
7553a6bfcf2SGiorgis Georgakoudis     // redundantly executed in parallel. The block BB is split before the first
7563a6bfcf2SGiorgis Georgakoudis     // call (in MergableCIs) and after the last so the entire region we merge
7573a6bfcf2SGiorgis Georgakoudis     // into a single parallel region is contained in a single basic block
7583a6bfcf2SGiorgis Georgakoudis     // without any other instructions. We use the OpenMPIRBuilder to outline
7593a6bfcf2SGiorgis Georgakoudis     // that block and call the resulting function via __kmpc_fork_call.
7603a6bfcf2SGiorgis Georgakoudis     auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
7613a6bfcf2SGiorgis Georgakoudis       // TODO: Change the interface to allow single CIs expanded, e.g, to
7623a6bfcf2SGiorgis Georgakoudis       // include an outer loop.
7633a6bfcf2SGiorgis Georgakoudis       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
7643a6bfcf2SGiorgis Georgakoudis 
7653a6bfcf2SGiorgis Georgakoudis       auto Remark = [&](OptimizationRemark OR) {
7663a6bfcf2SGiorgis Georgakoudis         OR << "Parallel region at "
7673a6bfcf2SGiorgis Georgakoudis            << ore::NV("OpenMPParallelMergeFront",
7683a6bfcf2SGiorgis Georgakoudis                       MergableCIs.front()->getDebugLoc())
7693a6bfcf2SGiorgis Georgakoudis            << " merged with parallel regions at ";
77023b0ab2aSKazu Hirata         for (auto *CI : llvm::drop_begin(MergableCIs)) {
7713a6bfcf2SGiorgis Georgakoudis           OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
7723a6bfcf2SGiorgis Georgakoudis           if (CI != MergableCIs.back())
7733a6bfcf2SGiorgis Georgakoudis             OR << ", ";
7743a6bfcf2SGiorgis Georgakoudis         }
7753a6bfcf2SGiorgis Georgakoudis         return OR;
7763a6bfcf2SGiorgis Georgakoudis       };
7773a6bfcf2SGiorgis Georgakoudis 
7783a6bfcf2SGiorgis Georgakoudis       emitRemark<OptimizationRemark>(MergableCIs.front(),
7793a6bfcf2SGiorgis Georgakoudis                                      "OpenMPParallelRegionMerging", Remark);
7803a6bfcf2SGiorgis Georgakoudis 
7813a6bfcf2SGiorgis Georgakoudis       Function *OriginalFn = BB->getParent();
7823a6bfcf2SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
7833a6bfcf2SGiorgis Georgakoudis                         << " parallel regions in " << OriginalFn->getName()
7843a6bfcf2SGiorgis Georgakoudis                         << "\n");
7853a6bfcf2SGiorgis Georgakoudis 
7863a6bfcf2SGiorgis Georgakoudis       // Isolate the calls to merge in a separate block.
7873a6bfcf2SGiorgis Georgakoudis       EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
7883a6bfcf2SGiorgis Georgakoudis       BasicBlock *AfterBB =
7893a6bfcf2SGiorgis Georgakoudis           SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
7903a6bfcf2SGiorgis Georgakoudis       StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
7913a6bfcf2SGiorgis Georgakoudis                            "omp.par.merged");
7923a6bfcf2SGiorgis Georgakoudis 
7933a6bfcf2SGiorgis Georgakoudis       assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
7943a6bfcf2SGiorgis Georgakoudis       const DebugLoc DL = BB->getTerminator()->getDebugLoc();
7953a6bfcf2SGiorgis Georgakoudis       BB->getTerminator()->eraseFromParent();
7963a6bfcf2SGiorgis Georgakoudis 
79797517055SGiorgis Georgakoudis       // Create sequential regions for sequential instructions that are
79897517055SGiorgis Georgakoudis       // in-between mergable parallel regions.
79997517055SGiorgis Georgakoudis       for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
80097517055SGiorgis Georgakoudis            It != End; ++It) {
80197517055SGiorgis Georgakoudis         Instruction *ForkCI = *It;
80297517055SGiorgis Georgakoudis         Instruction *NextForkCI = *(It + 1);
80397517055SGiorgis Georgakoudis 
80497517055SGiorgis Georgakoudis         // Continue if there are not in-between instructions.
80597517055SGiorgis Georgakoudis         if (ForkCI->getNextNode() == NextForkCI)
80697517055SGiorgis Georgakoudis           continue;
80797517055SGiorgis Georgakoudis 
80897517055SGiorgis Georgakoudis         CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
80997517055SGiorgis Georgakoudis                                NextForkCI->getPrevNode());
81097517055SGiorgis Georgakoudis       }
81197517055SGiorgis Georgakoudis 
8123a6bfcf2SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
8133a6bfcf2SGiorgis Georgakoudis                                                DL);
8143a6bfcf2SGiorgis Georgakoudis       IRBuilder<>::InsertPoint AllocaIP(
8153a6bfcf2SGiorgis Georgakoudis           &OriginalFn->getEntryBlock(),
8163a6bfcf2SGiorgis Georgakoudis           OriginalFn->getEntryBlock().getFirstInsertionPt());
8173a6bfcf2SGiorgis Georgakoudis       // Create the merged parallel region with default proc binding, to
8183a6bfcf2SGiorgis Georgakoudis       // avoid overriding binding settings, and without explicit cancellation.
819e5dba2d7SMichael Kruse       InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
8203a6bfcf2SGiorgis Georgakoudis           Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
8213a6bfcf2SGiorgis Georgakoudis           OMP_PROC_BIND_default, /* IsCancellable */ false);
8223a6bfcf2SGiorgis Georgakoudis       BranchInst::Create(AfterBB, AfterIP.getBlock());
8233a6bfcf2SGiorgis Georgakoudis 
8243a6bfcf2SGiorgis Georgakoudis       // Perform the actual outlining.
825b1191206SMichael Kruse       OMPInfoCache.OMPBuilder.finalize(OriginalFn,
826b1191206SMichael Kruse                                        /* AllowExtractorSinking */ true);
8273a6bfcf2SGiorgis Georgakoudis 
8283a6bfcf2SGiorgis Georgakoudis       Function *OutlinedFn = MergableCIs.front()->getCaller();
8293a6bfcf2SGiorgis Georgakoudis 
8303a6bfcf2SGiorgis Georgakoudis       // Replace the __kmpc_fork_call calls with direct calls to the outlined
8313a6bfcf2SGiorgis Georgakoudis       // callbacks.
8323a6bfcf2SGiorgis Georgakoudis       SmallVector<Value *, 8> Args;
8333a6bfcf2SGiorgis Georgakoudis       for (auto *CI : MergableCIs) {
8343a6bfcf2SGiorgis Georgakoudis         Value *Callee =
8353a6bfcf2SGiorgis Georgakoudis             CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
8363a6bfcf2SGiorgis Georgakoudis         FunctionType *FT =
8373a6bfcf2SGiorgis Georgakoudis             cast<FunctionType>(Callee->getType()->getPointerElementType());
8383a6bfcf2SGiorgis Georgakoudis         Args.clear();
8393a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(0));
8403a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(1));
8413a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8423a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8433a6bfcf2SGiorgis Georgakoudis           Args.push_back(CI->getArgOperand(U));
8443a6bfcf2SGiorgis Georgakoudis 
8453a6bfcf2SGiorgis Georgakoudis         CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
8463a6bfcf2SGiorgis Georgakoudis         if (CI->getDebugLoc())
8473a6bfcf2SGiorgis Georgakoudis           NewCI->setDebugLoc(CI->getDebugLoc());
8483a6bfcf2SGiorgis Georgakoudis 
8493a6bfcf2SGiorgis Georgakoudis         // Forward parameter attributes from the callback to the callee.
8503a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8513a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8523a6bfcf2SGiorgis Georgakoudis           for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
8533a6bfcf2SGiorgis Georgakoudis             NewCI->addParamAttr(
8543a6bfcf2SGiorgis Georgakoudis                 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
8553a6bfcf2SGiorgis Georgakoudis 
8563a6bfcf2SGiorgis Georgakoudis         // Emit an explicit barrier to replace the implicit fork-join barrier.
8573a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.back()) {
8583a6bfcf2SGiorgis Georgakoudis           // TODO: Remove barrier if the merged parallel region includes the
8593a6bfcf2SGiorgis Georgakoudis           // 'nowait' clause.
860e5dba2d7SMichael Kruse           OMPInfoCache.OMPBuilder.createBarrier(
8613a6bfcf2SGiorgis Georgakoudis               InsertPointTy(NewCI->getParent(),
8623a6bfcf2SGiorgis Georgakoudis                             NewCI->getNextNode()->getIterator()),
8633a6bfcf2SGiorgis Georgakoudis               OMPD_parallel);
8643a6bfcf2SGiorgis Georgakoudis         }
8653a6bfcf2SGiorgis Georgakoudis 
8663a6bfcf2SGiorgis Georgakoudis         auto Remark = [&](OptimizationRemark OR) {
8673a6bfcf2SGiorgis Georgakoudis           return OR << "Parallel region at "
8683a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
8693a6bfcf2SGiorgis Georgakoudis                     << " merged with "
8703a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMergeFront",
8713a6bfcf2SGiorgis Georgakoudis                                MergableCIs.front()->getDebugLoc());
8723a6bfcf2SGiorgis Georgakoudis         };
8733a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.front())
8743a6bfcf2SGiorgis Georgakoudis           emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
8753a6bfcf2SGiorgis Georgakoudis                                          Remark);
8763a6bfcf2SGiorgis Georgakoudis 
8773a6bfcf2SGiorgis Georgakoudis         CI->eraseFromParent();
8783a6bfcf2SGiorgis Georgakoudis       }
8793a6bfcf2SGiorgis Georgakoudis 
8803a6bfcf2SGiorgis Georgakoudis       assert(OutlinedFn != OriginalFn && "Outlining failed");
8817fea561eSArthur Eubanks       CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
8823a6bfcf2SGiorgis Georgakoudis       CGUpdater.reanalyzeFunction(*OriginalFn);
8833a6bfcf2SGiorgis Georgakoudis 
8843a6bfcf2SGiorgis Georgakoudis       NumOpenMPParallelRegionsMerged += MergableCIs.size();
8853a6bfcf2SGiorgis Georgakoudis 
8863a6bfcf2SGiorgis Georgakoudis       return true;
8873a6bfcf2SGiorgis Georgakoudis     };
8883a6bfcf2SGiorgis Georgakoudis 
8893a6bfcf2SGiorgis Georgakoudis     // Helper function that identifes sequences of
8903a6bfcf2SGiorgis Georgakoudis     // __kmpc_fork_call uses in a basic block.
8913a6bfcf2SGiorgis Georgakoudis     auto DetectPRsCB = [&](Use &U, Function &F) {
8923a6bfcf2SGiorgis Georgakoudis       CallInst *CI = getCallIfRegularCall(U, &RFI);
8933a6bfcf2SGiorgis Georgakoudis       BB2PRMap[CI->getParent()].insert(CI);
8943a6bfcf2SGiorgis Georgakoudis 
8953a6bfcf2SGiorgis Georgakoudis       return false;
8963a6bfcf2SGiorgis Georgakoudis     };
8973a6bfcf2SGiorgis Georgakoudis 
8983a6bfcf2SGiorgis Georgakoudis     BB2PRMap.clear();
8993a6bfcf2SGiorgis Georgakoudis     RFI.foreachUse(SCC, DetectPRsCB);
9003a6bfcf2SGiorgis Georgakoudis     SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
9013a6bfcf2SGiorgis Georgakoudis     // Find mergable parallel regions within a basic block that are
9023a6bfcf2SGiorgis Georgakoudis     // safe to merge, that is any in-between instructions can safely
9033a6bfcf2SGiorgis Georgakoudis     // execute in parallel after merging.
9043a6bfcf2SGiorgis Georgakoudis     // TODO: support merging across basic-blocks.
9053a6bfcf2SGiorgis Georgakoudis     for (auto &It : BB2PRMap) {
9063a6bfcf2SGiorgis Georgakoudis       auto &CIs = It.getSecond();
9073a6bfcf2SGiorgis Georgakoudis       if (CIs.size() < 2)
9083a6bfcf2SGiorgis Georgakoudis         continue;
9093a6bfcf2SGiorgis Georgakoudis 
9103a6bfcf2SGiorgis Georgakoudis       BasicBlock *BB = It.getFirst();
9113a6bfcf2SGiorgis Georgakoudis       SmallVector<CallInst *, 4> MergableCIs;
9123a6bfcf2SGiorgis Georgakoudis 
91397517055SGiorgis Georgakoudis       /// Returns true if the instruction is mergable, false otherwise.
91497517055SGiorgis Georgakoudis       /// A terminator instruction is unmergable by definition since merging
91597517055SGiorgis Georgakoudis       /// works within a BB. Instructions before the mergable region are
91697517055SGiorgis Georgakoudis       /// mergable if they are not calls to OpenMP runtime functions that may
91797517055SGiorgis Georgakoudis       /// set different execution parameters for subsequent parallel regions.
91897517055SGiorgis Georgakoudis       /// Instructions in-between parallel regions are mergable if they are not
91997517055SGiorgis Georgakoudis       /// calls to any non-intrinsic function since that may call a non-mergable
92097517055SGiorgis Georgakoudis       /// OpenMP runtime function.
92197517055SGiorgis Georgakoudis       auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
92297517055SGiorgis Georgakoudis         // We do not merge across BBs, hence return false (unmergable) if the
92397517055SGiorgis Georgakoudis         // instruction is a terminator.
92497517055SGiorgis Georgakoudis         if (I.isTerminator())
92597517055SGiorgis Georgakoudis           return false;
92697517055SGiorgis Georgakoudis 
92797517055SGiorgis Georgakoudis         if (!isa<CallInst>(&I))
92897517055SGiorgis Georgakoudis           return true;
92997517055SGiorgis Georgakoudis 
93097517055SGiorgis Georgakoudis         CallInst *CI = cast<CallInst>(&I);
93197517055SGiorgis Georgakoudis         if (IsBeforeMergableRegion) {
93297517055SGiorgis Georgakoudis           Function *CalledFunction = CI->getCalledFunction();
93397517055SGiorgis Georgakoudis           if (!CalledFunction)
93497517055SGiorgis Georgakoudis             return false;
93597517055SGiorgis Georgakoudis           // Return false (unmergable) if the call before the parallel
93697517055SGiorgis Georgakoudis           // region calls an explicit affinity (proc_bind) or number of
93797517055SGiorgis Georgakoudis           // threads (num_threads) compiler-generated function. Those settings
93897517055SGiorgis Georgakoudis           // may be incompatible with following parallel regions.
93997517055SGiorgis Georgakoudis           // TODO: ICV tracking to detect compatibility.
94097517055SGiorgis Georgakoudis           for (const auto &RFI : UnmergableCallsInfo) {
94197517055SGiorgis Georgakoudis             if (CalledFunction == RFI.Declaration)
94297517055SGiorgis Georgakoudis               return false;
94397517055SGiorgis Georgakoudis           }
94497517055SGiorgis Georgakoudis         } else {
94597517055SGiorgis Georgakoudis           // Return false (unmergable) if there is a call instruction
94697517055SGiorgis Georgakoudis           // in-between parallel regions when it is not an intrinsic. It
94797517055SGiorgis Georgakoudis           // may call an unmergable OpenMP runtime function in its callpath.
94897517055SGiorgis Georgakoudis           // TODO: Keep track of possible OpenMP calls in the callpath.
94997517055SGiorgis Georgakoudis           if (!isa<IntrinsicInst>(CI))
95097517055SGiorgis Georgakoudis             return false;
95197517055SGiorgis Georgakoudis         }
95297517055SGiorgis Georgakoudis 
95397517055SGiorgis Georgakoudis         return true;
95497517055SGiorgis Georgakoudis       };
9553a6bfcf2SGiorgis Georgakoudis       // Find maximal number of parallel region CIs that are safe to merge.
95697517055SGiorgis Georgakoudis       for (auto It = BB->begin(), End = BB->end(); It != End;) {
95797517055SGiorgis Georgakoudis         Instruction &I = *It;
95897517055SGiorgis Georgakoudis         ++It;
95997517055SGiorgis Georgakoudis 
9603a6bfcf2SGiorgis Georgakoudis         if (CIs.count(&I)) {
9613a6bfcf2SGiorgis Georgakoudis           MergableCIs.push_back(cast<CallInst>(&I));
9623a6bfcf2SGiorgis Georgakoudis           continue;
9633a6bfcf2SGiorgis Georgakoudis         }
9643a6bfcf2SGiorgis Georgakoudis 
96597517055SGiorgis Georgakoudis         // Continue expanding if the instruction is mergable.
96697517055SGiorgis Georgakoudis         if (IsMergable(I, MergableCIs.empty()))
9673a6bfcf2SGiorgis Georgakoudis           continue;
9683a6bfcf2SGiorgis Georgakoudis 
96997517055SGiorgis Georgakoudis         // Forward the instruction iterator to skip the next parallel region
97097517055SGiorgis Georgakoudis         // since there is an unmergable instruction which can affect it.
97197517055SGiorgis Georgakoudis         for (; It != End; ++It) {
97297517055SGiorgis Georgakoudis           Instruction &SkipI = *It;
97397517055SGiorgis Georgakoudis           if (CIs.count(&SkipI)) {
97497517055SGiorgis Georgakoudis             LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
97597517055SGiorgis Georgakoudis                               << " due to " << I << "\n");
97697517055SGiorgis Georgakoudis             ++It;
97797517055SGiorgis Georgakoudis             break;
97897517055SGiorgis Georgakoudis           }
97997517055SGiorgis Georgakoudis         }
98097517055SGiorgis Georgakoudis 
98197517055SGiorgis Georgakoudis         // Store mergable regions found.
9823a6bfcf2SGiorgis Georgakoudis         if (MergableCIs.size() > 1) {
9833a6bfcf2SGiorgis Georgakoudis           MergableCIsVector.push_back(MergableCIs);
9843a6bfcf2SGiorgis Georgakoudis           LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
9853a6bfcf2SGiorgis Georgakoudis                             << " parallel regions in block " << BB->getName()
9863a6bfcf2SGiorgis Georgakoudis                             << " of function " << BB->getParent()->getName()
9873a6bfcf2SGiorgis Georgakoudis                             << "\n";);
9883a6bfcf2SGiorgis Georgakoudis         }
9893a6bfcf2SGiorgis Georgakoudis 
9903a6bfcf2SGiorgis Georgakoudis         MergableCIs.clear();
9913a6bfcf2SGiorgis Georgakoudis       }
9923a6bfcf2SGiorgis Georgakoudis 
9933a6bfcf2SGiorgis Georgakoudis       if (!MergableCIsVector.empty()) {
9943a6bfcf2SGiorgis Georgakoudis         Changed = true;
9953a6bfcf2SGiorgis Georgakoudis 
9963a6bfcf2SGiorgis Georgakoudis         for (auto &MergableCIs : MergableCIsVector)
9973a6bfcf2SGiorgis Georgakoudis           Merge(MergableCIs, BB);
998b2ad63d3SJoseph Huber         MergableCIsVector.clear();
9993a6bfcf2SGiorgis Georgakoudis       }
10003a6bfcf2SGiorgis Georgakoudis     }
10013a6bfcf2SGiorgis Georgakoudis 
10023a6bfcf2SGiorgis Georgakoudis     if (Changed) {
100397517055SGiorgis Georgakoudis       /// Re-collect use for fork calls, emitted barrier calls, and
100497517055SGiorgis Georgakoudis       /// any emitted master/end_master calls.
100597517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
100697517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
100797517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
100897517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
10093a6bfcf2SGiorgis Georgakoudis     }
10103a6bfcf2SGiorgis Georgakoudis 
10113a6bfcf2SGiorgis Georgakoudis     return Changed;
10123a6bfcf2SGiorgis Georgakoudis   }
10133a6bfcf2SGiorgis Georgakoudis 
10149d38f98dSJohannes Doerfert   /// Try to delete parallel regions if possible.
1015e565db49SJohannes Doerfert   bool deleteParallelRegions() {
1016e565db49SJohannes Doerfert     const unsigned CallbackCalleeOperand = 2;
1017e565db49SJohannes Doerfert 
10187cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &RFI =
10197cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
10207cfd267cSsstefan1 
1021e565db49SJohannes Doerfert     if (!RFI.Declaration)
1022e565db49SJohannes Doerfert       return false;
1023e565db49SJohannes Doerfert 
1024e565db49SJohannes Doerfert     bool Changed = false;
1025e565db49SJohannes Doerfert     auto DeleteCallCB = [&](Use &U, Function &) {
1026e565db49SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U);
1027e565db49SJohannes Doerfert       if (!CI)
1028e565db49SJohannes Doerfert         return false;
1029e565db49SJohannes Doerfert       auto *Fn = dyn_cast<Function>(
1030e565db49SJohannes Doerfert           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1031e565db49SJohannes Doerfert       if (!Fn)
1032e565db49SJohannes Doerfert         return false;
1033e565db49SJohannes Doerfert       if (!Fn->onlyReadsMemory())
1034e565db49SJohannes Doerfert         return false;
1035e565db49SJohannes Doerfert       if (!Fn->hasFnAttribute(Attribute::WillReturn))
1036e565db49SJohannes Doerfert         return false;
1037e565db49SJohannes Doerfert 
1038e565db49SJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1039e565db49SJohannes Doerfert                         << CI->getCaller()->getName() << "\n");
10404d4ea9acSHuber, Joseph 
10414d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
10424d4ea9acSHuber, Joseph         return OR << "Parallel region in "
10434d4ea9acSHuber, Joseph                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
10444d4ea9acSHuber, Joseph                   << " deleted";
10454d4ea9acSHuber, Joseph       };
10464d4ea9acSHuber, Joseph       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
10474d4ea9acSHuber, Joseph                                      Remark);
10484d4ea9acSHuber, Joseph 
1049e565db49SJohannes Doerfert       CGUpdater.removeCallSite(*CI);
1050e565db49SJohannes Doerfert       CI->eraseFromParent();
1051e565db49SJohannes Doerfert       Changed = true;
105255eb714aSRoman Lebedev       ++NumOpenMPParallelRegionsDeleted;
1053e565db49SJohannes Doerfert       return true;
1054e565db49SJohannes Doerfert     };
1055e565db49SJohannes Doerfert 
1056624d34afSJohannes Doerfert     RFI.foreachUse(SCC, DeleteCallCB);
1057e565db49SJohannes Doerfert 
1058e565db49SJohannes Doerfert     return Changed;
1059e565db49SJohannes Doerfert   }
1060e565db49SJohannes Doerfert 
1061b726c557SJohannes Doerfert   /// Try to eliminate runtime calls by reusing existing ones.
10629548b74aSJohannes Doerfert   bool deduplicateRuntimeCalls() {
10639548b74aSJohannes Doerfert     bool Changed = false;
10649548b74aSJohannes Doerfert 
1065e28936f6SJohannes Doerfert     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1066e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_threads,
1067e28936f6SJohannes Doerfert         OMPRTL_omp_in_parallel,
1068e28936f6SJohannes Doerfert         OMPRTL_omp_get_cancellation,
1069e28936f6SJohannes Doerfert         OMPRTL_omp_get_thread_limit,
1070e28936f6SJohannes Doerfert         OMPRTL_omp_get_supported_active_levels,
1071e28936f6SJohannes Doerfert         OMPRTL_omp_get_level,
1072e28936f6SJohannes Doerfert         OMPRTL_omp_get_ancestor_thread_num,
1073e28936f6SJohannes Doerfert         OMPRTL_omp_get_team_size,
1074e28936f6SJohannes Doerfert         OMPRTL_omp_get_active_level,
1075e28936f6SJohannes Doerfert         OMPRTL_omp_in_final,
1076e28936f6SJohannes Doerfert         OMPRTL_omp_get_proc_bind,
1077e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_places,
1078e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_procs,
1079e28936f6SJohannes Doerfert         OMPRTL_omp_get_place_num,
1080e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_num_places,
1081e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_place_nums};
1082e28936f6SJohannes Doerfert 
1083bc93c2d7SMarek Kurdej     // Global-tid is handled separately.
10849548b74aSJohannes Doerfert     SmallSetVector<Value *, 16> GTIdArgs;
10859548b74aSJohannes Doerfert     collectGlobalThreadIdArguments(GTIdArgs);
10869548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
10879548b74aSJohannes Doerfert                       << " global thread ID arguments\n");
10889548b74aSJohannes Doerfert 
10899548b74aSJohannes Doerfert     for (Function *F : SCC) {
1090e28936f6SJohannes Doerfert       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
10914e29d256Sserge-sans-paille         Changed |= deduplicateRuntimeCalls(
10924e29d256Sserge-sans-paille             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1093e28936f6SJohannes Doerfert 
1094e28936f6SJohannes Doerfert       // __kmpc_global_thread_num is special as we can replace it with an
1095e28936f6SJohannes Doerfert       // argument in enough cases to make it worth trying.
10969548b74aSJohannes Doerfert       Value *GTIdArg = nullptr;
10979548b74aSJohannes Doerfert       for (Argument &Arg : F->args())
10989548b74aSJohannes Doerfert         if (GTIdArgs.count(&Arg)) {
10999548b74aSJohannes Doerfert           GTIdArg = &Arg;
11009548b74aSJohannes Doerfert           break;
11019548b74aSJohannes Doerfert         }
11029548b74aSJohannes Doerfert       Changed |= deduplicateRuntimeCalls(
11037cfd267cSsstefan1           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
11049548b74aSJohannes Doerfert     }
11059548b74aSJohannes Doerfert 
11069548b74aSJohannes Doerfert     return Changed;
11079548b74aSJohannes Doerfert   }
11089548b74aSJohannes Doerfert 
1109496f8e5bSHamilton Tobon Mosquera   /// Tries to hide the latency of runtime calls that involve host to
1110496f8e5bSHamilton Tobon Mosquera   /// device memory transfers by splitting them into their "issue" and "wait"
1111496f8e5bSHamilton Tobon Mosquera   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1112496f8e5bSHamilton Tobon Mosquera   /// moved downards as much as possible. The "issue" issues the memory transfer
1113496f8e5bSHamilton Tobon Mosquera   /// asynchronously, returning a handle. The "wait" waits in the returned
1114496f8e5bSHamilton Tobon Mosquera   /// handle for the memory transfer to finish.
1115496f8e5bSHamilton Tobon Mosquera   bool hideMemTransfersLatency() {
1116496f8e5bSHamilton Tobon Mosquera     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1117496f8e5bSHamilton Tobon Mosquera     bool Changed = false;
1118496f8e5bSHamilton Tobon Mosquera     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1119496f8e5bSHamilton Tobon Mosquera       auto *RTCall = getCallIfRegularCall(U, &RFI);
1120496f8e5bSHamilton Tobon Mosquera       if (!RTCall)
1121496f8e5bSHamilton Tobon Mosquera         return false;
1122496f8e5bSHamilton Tobon Mosquera 
11238931add6SHamilton Tobon Mosquera       OffloadArray OffloadArrays[3];
11248931add6SHamilton Tobon Mosquera       if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
11258931add6SHamilton Tobon Mosquera         return false;
11268931add6SHamilton Tobon Mosquera 
11278931add6SHamilton Tobon Mosquera       LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
11288931add6SHamilton Tobon Mosquera 
1129bd2fa181SHamilton Tobon Mosquera       // TODO: Check if can be moved upwards.
1130bd2fa181SHamilton Tobon Mosquera       bool WasSplit = false;
1131bd2fa181SHamilton Tobon Mosquera       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1132bd2fa181SHamilton Tobon Mosquera       if (WaitMovementPoint)
1133bd2fa181SHamilton Tobon Mosquera         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1134bd2fa181SHamilton Tobon Mosquera 
1135496f8e5bSHamilton Tobon Mosquera       Changed |= WasSplit;
1136496f8e5bSHamilton Tobon Mosquera       return WasSplit;
1137496f8e5bSHamilton Tobon Mosquera     };
1138496f8e5bSHamilton Tobon Mosquera     RFI.foreachUse(SCC, SplitMemTransfers);
1139496f8e5bSHamilton Tobon Mosquera 
1140496f8e5bSHamilton Tobon Mosquera     return Changed;
1141496f8e5bSHamilton Tobon Mosquera   }
1142496f8e5bSHamilton Tobon Mosquera 
1143a2281419SJoseph Huber   void analysisGlobalization() {
11446fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
114582453e75SJoseph Huber 
114682453e75SJoseph Huber     auto CheckGlobalization = [&](Use &U, Function &Decl) {
1147a2281419SJoseph Huber       if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
114844feacc7SJoseph Huber         auto Remark = [&](OptimizationRemarkMissed ORM) {
114944feacc7SJoseph Huber           return ORM
1150a2281419SJoseph Huber                  << "Found thread data sharing on the GPU. "
1151a2281419SJoseph Huber                  << "Expect degraded performance due to data globalization.";
1152a2281419SJoseph Huber         };
115344feacc7SJoseph Huber         emitRemark<OptimizationRemarkMissed>(CI, "OpenMPGlobalization", Remark);
1154a2281419SJoseph Huber       }
1155a2281419SJoseph Huber 
1156a2281419SJoseph Huber       return false;
1157a2281419SJoseph Huber     };
1158a2281419SJoseph Huber 
115982453e75SJoseph Huber     RFI.foreachUse(SCC, CheckGlobalization);
116082453e75SJoseph Huber   }
1161a2281419SJoseph Huber 
11628931add6SHamilton Tobon Mosquera   /// Maps the values stored in the offload arrays passed as arguments to
11638931add6SHamilton Tobon Mosquera   /// \p RuntimeCall into the offload arrays in \p OAs.
11648931add6SHamilton Tobon Mosquera   bool getValuesInOffloadArrays(CallInst &RuntimeCall,
11658931add6SHamilton Tobon Mosquera                                 MutableArrayRef<OffloadArray> OAs) {
11668931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "Need space for three offload arrays!");
11678931add6SHamilton Tobon Mosquera 
11688931add6SHamilton Tobon Mosquera     // A runtime call that involves memory offloading looks something like:
11698931add6SHamilton Tobon Mosquera     // call void @__tgt_target_data_begin_mapper(arg0, arg1,
11708931add6SHamilton Tobon Mosquera     //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
11718931add6SHamilton Tobon Mosquera     // ...)
11728931add6SHamilton Tobon Mosquera     // So, the idea is to access the allocas that allocate space for these
11738931add6SHamilton Tobon Mosquera     // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
11748931add6SHamilton Tobon Mosquera     // Therefore:
11758931add6SHamilton Tobon Mosquera     // i8** %offload_baseptrs.
11761d3d9b9cSHamilton Tobon Mosquera     Value *BasePtrsArg =
11771d3d9b9cSHamilton Tobon Mosquera         RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
11788931add6SHamilton Tobon Mosquera     // i8** %offload_ptrs.
11791d3d9b9cSHamilton Tobon Mosquera     Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
11808931add6SHamilton Tobon Mosquera     // i8** %offload_sizes.
11811d3d9b9cSHamilton Tobon Mosquera     Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
11828931add6SHamilton Tobon Mosquera 
11838931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11848931add6SHamilton Tobon Mosquera     auto *V = getUnderlyingObject(BasePtrsArg);
11858931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11868931add6SHamilton Tobon Mosquera       return false;
11878931add6SHamilton Tobon Mosquera     auto *BasePtrsArray = cast<AllocaInst>(V);
11888931add6SHamilton Tobon Mosquera     if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
11898931add6SHamilton Tobon Mosquera       return false;
11908931add6SHamilton Tobon Mosquera 
11918931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11928931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(PtrsArg);
11938931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11948931add6SHamilton Tobon Mosquera       return false;
11958931add6SHamilton Tobon Mosquera     auto *PtrsArray = cast<AllocaInst>(V);
11968931add6SHamilton Tobon Mosquera     if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
11978931add6SHamilton Tobon Mosquera       return false;
11988931add6SHamilton Tobon Mosquera 
11998931add6SHamilton Tobon Mosquera     // Get values stored in **offload_sizes.
12008931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(SizesArg);
12018931add6SHamilton Tobon Mosquera     // If it's a [constant] global array don't analyze it.
12028931add6SHamilton Tobon Mosquera     if (isa<GlobalValue>(V))
12038931add6SHamilton Tobon Mosquera       return isa<Constant>(V);
12048931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
12058931add6SHamilton Tobon Mosquera       return false;
12068931add6SHamilton Tobon Mosquera 
12078931add6SHamilton Tobon Mosquera     auto *SizesArray = cast<AllocaInst>(V);
12088931add6SHamilton Tobon Mosquera     if (!OAs[2].initialize(*SizesArray, RuntimeCall))
12098931add6SHamilton Tobon Mosquera       return false;
12108931add6SHamilton Tobon Mosquera 
12118931add6SHamilton Tobon Mosquera     return true;
12128931add6SHamilton Tobon Mosquera   }
12138931add6SHamilton Tobon Mosquera 
12148931add6SHamilton Tobon Mosquera   /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
12158931add6SHamilton Tobon Mosquera   /// For now this is a way to test that the function getValuesInOffloadArrays
12168931add6SHamilton Tobon Mosquera   /// is working properly.
12178931add6SHamilton Tobon Mosquera   /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
12188931add6SHamilton Tobon Mosquera   void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
12198931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "There are three offload arrays to debug!");
12208931add6SHamilton Tobon Mosquera 
12218931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
12228931add6SHamilton Tobon Mosquera     std::string ValuesStr;
12238931add6SHamilton Tobon Mosquera     raw_string_ostream Printer(ValuesStr);
12248931add6SHamilton Tobon Mosquera     std::string Separator = " --- ";
12258931add6SHamilton Tobon Mosquera 
12268931add6SHamilton Tobon Mosquera     for (auto *BP : OAs[0].StoredValues) {
12278931add6SHamilton Tobon Mosquera       BP->print(Printer);
12288931add6SHamilton Tobon Mosquera       Printer << Separator;
12298931add6SHamilton Tobon Mosquera     }
12308931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
12318931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12328931add6SHamilton Tobon Mosquera 
12338931add6SHamilton Tobon Mosquera     for (auto *P : OAs[1].StoredValues) {
12348931add6SHamilton Tobon Mosquera       P->print(Printer);
12358931add6SHamilton Tobon Mosquera       Printer << Separator;
12368931add6SHamilton Tobon Mosquera     }
12378931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
12388931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12398931add6SHamilton Tobon Mosquera 
12408931add6SHamilton Tobon Mosquera     for (auto *S : OAs[2].StoredValues) {
12418931add6SHamilton Tobon Mosquera       S->print(Printer);
12428931add6SHamilton Tobon Mosquera       Printer << Separator;
12438931add6SHamilton Tobon Mosquera     }
12448931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
12458931add6SHamilton Tobon Mosquera   }
12468931add6SHamilton Tobon Mosquera 
1247bd2fa181SHamilton Tobon Mosquera   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1248bd2fa181SHamilton Tobon Mosquera   /// moved. Returns nullptr if the movement is not possible, or not worth it.
1249bd2fa181SHamilton Tobon Mosquera   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1250bd2fa181SHamilton Tobon Mosquera     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1251bd2fa181SHamilton Tobon Mosquera     //  Make it traverse the CFG.
1252bd2fa181SHamilton Tobon Mosquera 
1253bd2fa181SHamilton Tobon Mosquera     Instruction *CurrentI = &RuntimeCall;
1254bd2fa181SHamilton Tobon Mosquera     bool IsWorthIt = false;
1255bd2fa181SHamilton Tobon Mosquera     while ((CurrentI = CurrentI->getNextNode())) {
1256bd2fa181SHamilton Tobon Mosquera 
1257bd2fa181SHamilton Tobon Mosquera       // TODO: Once we detect the regions to be offloaded we should use the
1258bd2fa181SHamilton Tobon Mosquera       //  alias analysis manager to check if CurrentI may modify one of
1259bd2fa181SHamilton Tobon Mosquera       //  the offloaded regions.
1260bd2fa181SHamilton Tobon Mosquera       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1261bd2fa181SHamilton Tobon Mosquera         if (IsWorthIt)
1262bd2fa181SHamilton Tobon Mosquera           return CurrentI;
1263bd2fa181SHamilton Tobon Mosquera 
1264bd2fa181SHamilton Tobon Mosquera         return nullptr;
1265bd2fa181SHamilton Tobon Mosquera       }
1266bd2fa181SHamilton Tobon Mosquera 
1267bd2fa181SHamilton Tobon Mosquera       // FIXME: For now if we move it over anything without side effect
1268bd2fa181SHamilton Tobon Mosquera       //  is worth it.
1269bd2fa181SHamilton Tobon Mosquera       IsWorthIt = true;
1270bd2fa181SHamilton Tobon Mosquera     }
1271bd2fa181SHamilton Tobon Mosquera 
1272bd2fa181SHamilton Tobon Mosquera     // Return end of BasicBlock.
1273bd2fa181SHamilton Tobon Mosquera     return RuntimeCall.getParent()->getTerminator();
1274bd2fa181SHamilton Tobon Mosquera   }
1275bd2fa181SHamilton Tobon Mosquera 
1276496f8e5bSHamilton Tobon Mosquera   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
1277bd2fa181SHamilton Tobon Mosquera   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1278bd2fa181SHamilton Tobon Mosquera                                Instruction &WaitMovementPoint) {
1279bd31abc1SHamilton Tobon Mosquera     // Create stack allocated handle (__tgt_async_info) at the beginning of the
1280bd31abc1SHamilton Tobon Mosquera     // function. Used for storing information of the async transfer, allowing to
1281bd31abc1SHamilton Tobon Mosquera     // wait on it later.
1282496f8e5bSHamilton Tobon Mosquera     auto &IRBuilder = OMPInfoCache.OMPBuilder;
1283bd31abc1SHamilton Tobon Mosquera     auto *F = RuntimeCall.getCaller();
1284bd31abc1SHamilton Tobon Mosquera     Instruction *FirstInst = &(F->getEntryBlock().front());
1285bd31abc1SHamilton Tobon Mosquera     AllocaInst *Handle = new AllocaInst(
1286bd31abc1SHamilton Tobon Mosquera         IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1287bd31abc1SHamilton Tobon Mosquera 
1288496f8e5bSHamilton Tobon Mosquera     // Add "issue" runtime call declaration:
1289496f8e5bSHamilton Tobon Mosquera     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1290496f8e5bSHamilton Tobon Mosquera     //   i8**, i8**, i64*, i64*)
1291496f8e5bSHamilton Tobon Mosquera     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1292496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_issue);
1293496f8e5bSHamilton Tobon Mosquera 
1294496f8e5bSHamilton Tobon Mosquera     // Change RuntimeCall call site for its asynchronous version.
129597e55cfeSJoseph Huber     SmallVector<Value *, 16> Args;
1296bd2fa181SHamilton Tobon Mosquera     for (auto &Arg : RuntimeCall.args())
1297496f8e5bSHamilton Tobon Mosquera       Args.push_back(Arg.get());
1298bd31abc1SHamilton Tobon Mosquera     Args.push_back(Handle);
1299496f8e5bSHamilton Tobon Mosquera 
1300496f8e5bSHamilton Tobon Mosquera     CallInst *IssueCallsite =
1301bd31abc1SHamilton Tobon Mosquera         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
1302bd2fa181SHamilton Tobon Mosquera     RuntimeCall.eraseFromParent();
1303496f8e5bSHamilton Tobon Mosquera 
1304496f8e5bSHamilton Tobon Mosquera     // Add "wait" runtime call declaration:
1305496f8e5bSHamilton Tobon Mosquera     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1306496f8e5bSHamilton Tobon Mosquera     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1307496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_wait);
1308496f8e5bSHamilton Tobon Mosquera 
1309496f8e5bSHamilton Tobon Mosquera     Value *WaitParams[2] = {
1310da8bec47SJoseph Huber         IssueCallsite->getArgOperand(
1311da8bec47SJoseph Huber             OffloadArray::DeviceIDArgNum), // device_id.
1312bd31abc1SHamilton Tobon Mosquera         Handle                             // handle to wait on.
1313496f8e5bSHamilton Tobon Mosquera     };
1314bd2fa181SHamilton Tobon Mosquera     CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
1315496f8e5bSHamilton Tobon Mosquera 
1316496f8e5bSHamilton Tobon Mosquera     return true;
1317496f8e5bSHamilton Tobon Mosquera   }
1318496f8e5bSHamilton Tobon Mosquera 
1319dc3b5b00SJohannes Doerfert   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1320dc3b5b00SJohannes Doerfert                                     bool GlobalOnly, bool &SingleChoice) {
1321dc3b5b00SJohannes Doerfert     if (CurrentIdent == NextIdent)
1322dc3b5b00SJohannes Doerfert       return CurrentIdent;
1323dc3b5b00SJohannes Doerfert 
1324396b7253SJohannes Doerfert     // TODO: Figure out how to actually combine multiple debug locations. For
1325dc3b5b00SJohannes Doerfert     //       now we just keep an existing one if there is a single choice.
1326dc3b5b00SJohannes Doerfert     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1327dc3b5b00SJohannes Doerfert       SingleChoice = !CurrentIdent;
1328dc3b5b00SJohannes Doerfert       return NextIdent;
1329dc3b5b00SJohannes Doerfert     }
1330396b7253SJohannes Doerfert     return nullptr;
1331396b7253SJohannes Doerfert   }
1332396b7253SJohannes Doerfert 
1333396b7253SJohannes Doerfert   /// Return an `struct ident_t*` value that represents the ones used in the
1334396b7253SJohannes Doerfert   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1335396b7253SJohannes Doerfert   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1336396b7253SJohannes Doerfert   /// return value we create one from scratch. We also do not yet combine
1337396b7253SJohannes Doerfert   /// information, e.g., the source locations, see combinedIdentStruct.
13387cfd267cSsstefan1   Value *
13397cfd267cSsstefan1   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
13407cfd267cSsstefan1                                  Function &F, bool GlobalOnly) {
1341dc3b5b00SJohannes Doerfert     bool SingleChoice = true;
1342396b7253SJohannes Doerfert     Value *Ident = nullptr;
1343396b7253SJohannes Doerfert     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1344396b7253SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
1345396b7253SJohannes Doerfert       if (!CI || &F != &Caller)
1346396b7253SJohannes Doerfert         return false;
1347396b7253SJohannes Doerfert       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1348dc3b5b00SJohannes Doerfert                                   /* GlobalOnly */ true, SingleChoice);
1349396b7253SJohannes Doerfert       return false;
1350396b7253SJohannes Doerfert     };
1351624d34afSJohannes Doerfert     RFI.foreachUse(SCC, CombineIdentStruct);
1352396b7253SJohannes Doerfert 
1353dc3b5b00SJohannes Doerfert     if (!Ident || !SingleChoice) {
1354396b7253SJohannes Doerfert       // The IRBuilder uses the insertion block to get to the module, this is
1355396b7253SJohannes Doerfert       // unfortunate but we work around it for now.
13567cfd267cSsstefan1       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
13577cfd267cSsstefan1         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1358396b7253SJohannes Doerfert             &F.getEntryBlock(), F.getEntryBlock().begin()));
1359396b7253SJohannes Doerfert       // Create a fallback location if non was found.
1360396b7253SJohannes Doerfert       // TODO: Use the debug locations of the calls instead.
13617cfd267cSsstefan1       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
13627cfd267cSsstefan1       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1363396b7253SJohannes Doerfert     }
1364396b7253SJohannes Doerfert     return Ident;
1365396b7253SJohannes Doerfert   }
1366396b7253SJohannes Doerfert 
1367b726c557SJohannes Doerfert   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
13689548b74aSJohannes Doerfert   /// \p ReplVal if given.
13697cfd267cSsstefan1   bool deduplicateRuntimeCalls(Function &F,
13707cfd267cSsstefan1                                OMPInformationCache::RuntimeFunctionInfo &RFI,
13719548b74aSJohannes Doerfert                                Value *ReplVal = nullptr) {
13728855fec3SJohannes Doerfert     auto *UV = RFI.getUseVector(F);
13738855fec3SJohannes Doerfert     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1374b1fbf438SRoman Lebedev       return false;
1375b1fbf438SRoman Lebedev 
13767cfd267cSsstefan1     LLVM_DEBUG(
13777cfd267cSsstefan1         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
13787cfd267cSsstefan1                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
13797cfd267cSsstefan1 
1380ab3da5ddSMichael Liao     assert((!ReplVal || (isa<Argument>(ReplVal) &&
1381ab3da5ddSMichael Liao                          cast<Argument>(ReplVal)->getParent() == &F)) &&
13829548b74aSJohannes Doerfert            "Unexpected replacement value!");
1383396b7253SJohannes Doerfert 
1384396b7253SJohannes Doerfert     // TODO: Use dominance to find a good position instead.
13856aab27baSsstefan1     auto CanBeMoved = [this](CallBase &CB) {
1386396b7253SJohannes Doerfert       unsigned NumArgs = CB.getNumArgOperands();
1387396b7253SJohannes Doerfert       if (NumArgs == 0)
1388396b7253SJohannes Doerfert         return true;
13896aab27baSsstefan1       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1390396b7253SJohannes Doerfert         return false;
1391396b7253SJohannes Doerfert       for (unsigned u = 1; u < NumArgs; ++u)
1392396b7253SJohannes Doerfert         if (isa<Instruction>(CB.getArgOperand(u)))
1393396b7253SJohannes Doerfert           return false;
1394396b7253SJohannes Doerfert       return true;
1395396b7253SJohannes Doerfert     };
1396396b7253SJohannes Doerfert 
13979548b74aSJohannes Doerfert     if (!ReplVal) {
13988855fec3SJohannes Doerfert       for (Use *U : *UV)
13999548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1400396b7253SJohannes Doerfert           if (!CanBeMoved(*CI))
1401396b7253SJohannes Doerfert             continue;
14024d4ea9acSHuber, Joseph 
14034d4ea9acSHuber, Joseph           auto Remark = [&](OptimizationRemark OR) {
14044d4ea9acSHuber, Joseph             return OR << "OpenMP runtime call "
14052db182ffSJoseph Huber                       << ore::NV("OpenMPOptRuntime", RFI.Name)
14062db182ffSJoseph Huber                       << " moved to beginning of OpenMP region";
14074d4ea9acSHuber, Joseph           };
14082db182ffSJoseph Huber           emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark);
14094d4ea9acSHuber, Joseph 
14109548b74aSJohannes Doerfert           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
14119548b74aSJohannes Doerfert           ReplVal = CI;
14129548b74aSJohannes Doerfert           break;
14139548b74aSJohannes Doerfert         }
14149548b74aSJohannes Doerfert       if (!ReplVal)
14159548b74aSJohannes Doerfert         return false;
14169548b74aSJohannes Doerfert     }
14179548b74aSJohannes Doerfert 
1418396b7253SJohannes Doerfert     // If we use a call as a replacement value we need to make sure the ident is
1419396b7253SJohannes Doerfert     // valid at the new location. For now we just pick a global one, either
1420396b7253SJohannes Doerfert     // existing and used by one of the calls, or created from scratch.
1421396b7253SJohannes Doerfert     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1422396b7253SJohannes Doerfert       if (CI->getNumArgOperands() > 0 &&
14236aab27baSsstefan1           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1424396b7253SJohannes Doerfert         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1425396b7253SJohannes Doerfert                                                       /* GlobalOnly */ true);
1426396b7253SJohannes Doerfert         CI->setArgOperand(0, Ident);
1427396b7253SJohannes Doerfert       }
1428396b7253SJohannes Doerfert     }
1429396b7253SJohannes Doerfert 
14309548b74aSJohannes Doerfert     bool Changed = false;
14319548b74aSJohannes Doerfert     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
14329548b74aSJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
14339548b74aSJohannes Doerfert       if (!CI || CI == ReplVal || &F != &Caller)
14349548b74aSJohannes Doerfert         return false;
14359548b74aSJohannes Doerfert       assert(CI->getCaller() == &F && "Unexpected call!");
14364d4ea9acSHuber, Joseph 
14374d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
14384d4ea9acSHuber, Joseph         return OR << "OpenMP runtime call "
14394d4ea9acSHuber, Joseph                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
14404d4ea9acSHuber, Joseph       };
14412db182ffSJoseph Huber       emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark);
14424d4ea9acSHuber, Joseph 
14439548b74aSJohannes Doerfert       CGUpdater.removeCallSite(*CI);
14449548b74aSJohannes Doerfert       CI->replaceAllUsesWith(ReplVal);
14459548b74aSJohannes Doerfert       CI->eraseFromParent();
14469548b74aSJohannes Doerfert       ++NumOpenMPRuntimeCallsDeduplicated;
14479548b74aSJohannes Doerfert       Changed = true;
14489548b74aSJohannes Doerfert       return true;
14499548b74aSJohannes Doerfert     };
1450624d34afSJohannes Doerfert     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
14519548b74aSJohannes Doerfert 
14529548b74aSJohannes Doerfert     return Changed;
14539548b74aSJohannes Doerfert   }
14549548b74aSJohannes Doerfert 
14559548b74aSJohannes Doerfert   /// Collect arguments that represent the global thread id in \p GTIdArgs.
14569548b74aSJohannes Doerfert   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
14579548b74aSJohannes Doerfert     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
14589548b74aSJohannes Doerfert     //       initialization. We could define an AbstractAttribute instead and
14599548b74aSJohannes Doerfert     //       run the Attributor here once it can be run as an SCC pass.
14609548b74aSJohannes Doerfert 
14619548b74aSJohannes Doerfert     // Helper to check the argument \p ArgNo at all call sites of \p F for
14629548b74aSJohannes Doerfert     // a GTId.
14639548b74aSJohannes Doerfert     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
14649548b74aSJohannes Doerfert       if (!F.hasLocalLinkage())
14659548b74aSJohannes Doerfert         return false;
14669548b74aSJohannes Doerfert       for (Use &U : F.uses()) {
14679548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(U)) {
14689548b74aSJohannes Doerfert           Value *ArgOp = CI->getArgOperand(ArgNo);
14699548b74aSJohannes Doerfert           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
14707cfd267cSsstefan1               getCallIfRegularCall(
14717cfd267cSsstefan1                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
14729548b74aSJohannes Doerfert             continue;
14739548b74aSJohannes Doerfert         }
14749548b74aSJohannes Doerfert         return false;
14759548b74aSJohannes Doerfert       }
14769548b74aSJohannes Doerfert       return true;
14779548b74aSJohannes Doerfert     };
14789548b74aSJohannes Doerfert 
14799548b74aSJohannes Doerfert     // Helper to identify uses of a GTId as GTId arguments.
14809548b74aSJohannes Doerfert     auto AddUserArgs = [&](Value &GTId) {
14819548b74aSJohannes Doerfert       for (Use &U : GTId.uses())
14829548b74aSJohannes Doerfert         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
14839548b74aSJohannes Doerfert           if (CI->isArgOperand(&U))
14849548b74aSJohannes Doerfert             if (Function *Callee = CI->getCalledFunction())
14859548b74aSJohannes Doerfert               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
14869548b74aSJohannes Doerfert                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
14879548b74aSJohannes Doerfert     };
14889548b74aSJohannes Doerfert 
14899548b74aSJohannes Doerfert     // The argument users of __kmpc_global_thread_num calls are GTIds.
14907cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
14917cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
14927cfd267cSsstefan1 
1493624d34afSJohannes Doerfert     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
14948855fec3SJohannes Doerfert       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
14959548b74aSJohannes Doerfert         AddUserArgs(*CI);
14968855fec3SJohannes Doerfert       return false;
14978855fec3SJohannes Doerfert     });
14989548b74aSJohannes Doerfert 
14999548b74aSJohannes Doerfert     // Transitively search for more arguments by looking at the users of the
15009548b74aSJohannes Doerfert     // ones we know already. During the search the GTIdArgs vector is extended
15019548b74aSJohannes Doerfert     // so we cannot cache the size nor can we use a range based for.
15029548b74aSJohannes Doerfert     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
15039548b74aSJohannes Doerfert       AddUserArgs(*GTIdArgs[u]);
15049548b74aSJohannes Doerfert   }
15059548b74aSJohannes Doerfert 
15065b0581aeSJohannes Doerfert   /// Kernel (=GPU) optimizations and utility functions
15075b0581aeSJohannes Doerfert   ///
15085b0581aeSJohannes Doerfert   ///{{
15095b0581aeSJohannes Doerfert 
15105b0581aeSJohannes Doerfert   /// Check if \p F is a kernel, hence entry point for target offloading.
15115b0581aeSJohannes Doerfert   bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
15125b0581aeSJohannes Doerfert 
15135b0581aeSJohannes Doerfert   /// Cache to remember the unique kernel for a function.
15145b0581aeSJohannes Doerfert   DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
15155b0581aeSJohannes Doerfert 
15165b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p F, if any.
15175b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Function &F);
15185b0581aeSJohannes Doerfert 
15195b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p I, if any.
15205b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Instruction &I) {
15215b0581aeSJohannes Doerfert     return getUniqueKernelFor(*I.getFunction());
15225b0581aeSJohannes Doerfert   }
15235b0581aeSJohannes Doerfert 
15245b0581aeSJohannes Doerfert   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
15255b0581aeSJohannes Doerfert   /// the cases we can avoid taking the address of a function.
15265b0581aeSJohannes Doerfert   bool rewriteDeviceCodeStateMachine();
15275b0581aeSJohannes Doerfert 
15285b0581aeSJohannes Doerfert   ///
15295b0581aeSJohannes Doerfert   ///}}
15305b0581aeSJohannes Doerfert 
15314d4ea9acSHuber, Joseph   /// Emit a remark generically
15324d4ea9acSHuber, Joseph   ///
15334d4ea9acSHuber, Joseph   /// This template function can be used to generically emit a remark. The
15344d4ea9acSHuber, Joseph   /// RemarkKind should be one of the following:
15354d4ea9acSHuber, Joseph   ///   - OptimizationRemark to indicate a successful optimization attempt
15364d4ea9acSHuber, Joseph   ///   - OptimizationRemarkMissed to report a failed optimization attempt
15374d4ea9acSHuber, Joseph   ///   - OptimizationRemarkAnalysis to provide additional information about an
15384d4ea9acSHuber, Joseph   ///     optimization attempt
15394d4ea9acSHuber, Joseph   ///
15404d4ea9acSHuber, Joseph   /// The remark is built using a callback function provided by the caller that
15414d4ea9acSHuber, Joseph   /// takes a RemarkKind as input and returns a RemarkKind.
15422db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15432db182ffSJoseph Huber   void emitRemark(Instruction *I, StringRef RemarkName,
1544e8039ad4SJohannes Doerfert                   RemarkCallBack &&RemarkCB) const {
15452db182ffSJoseph Huber     Function *F = I->getParent()->getParent();
15464d4ea9acSHuber, Joseph     auto &ORE = OREGetter(F);
15474d4ea9acSHuber, Joseph 
15482db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
15494d4ea9acSHuber, Joseph   }
15504d4ea9acSHuber, Joseph 
15512db182ffSJoseph Huber   /// Emit a remark on a function.
15522db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15532db182ffSJoseph Huber   void emitRemark(Function *F, StringRef RemarkName,
15542db182ffSJoseph Huber                   RemarkCallBack &&RemarkCB) const {
15550f426935Ssstefan1     auto &ORE = OREGetter(F);
15560f426935Ssstefan1 
15572db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
15580f426935Ssstefan1   }
15590f426935Ssstefan1 
1560b726c557SJohannes Doerfert   /// The underlying module.
15619548b74aSJohannes Doerfert   Module &M;
15629548b74aSJohannes Doerfert 
15639548b74aSJohannes Doerfert   /// The SCC we are operating on.
1564ee17263aSJohannes Doerfert   SmallVectorImpl<Function *> &SCC;
15659548b74aSJohannes Doerfert 
15669548b74aSJohannes Doerfert   /// Callback to update the call graph, the first argument is a removed call,
15679548b74aSJohannes Doerfert   /// the second an optional replacement call.
15689548b74aSJohannes Doerfert   CallGraphUpdater &CGUpdater;
15699548b74aSJohannes Doerfert 
15704d4ea9acSHuber, Joseph   /// Callback to get an OptimizationRemarkEmitter from a Function *
15714d4ea9acSHuber, Joseph   OptimizationRemarkGetter OREGetter;
15724d4ea9acSHuber, Joseph 
15737cfd267cSsstefan1   /// OpenMP-specific information cache. Also Used for Attributor runs.
15747cfd267cSsstefan1   OMPInformationCache &OMPInfoCache;
1575b8235d2bSsstefan1 
1576b8235d2bSsstefan1   /// Attributor instance.
1577b8235d2bSsstefan1   Attributor &A;
1578b8235d2bSsstefan1 
1579b8235d2bSsstefan1   /// Helper function to run Attributor on SCC.
1580b8235d2bSsstefan1   bool runAttributor() {
1581b8235d2bSsstefan1     if (SCC.empty())
1582b8235d2bSsstefan1       return false;
1583b8235d2bSsstefan1 
1584b8235d2bSsstefan1     registerAAs();
1585b8235d2bSsstefan1 
1586b8235d2bSsstefan1     ChangeStatus Changed = A.run();
1587b8235d2bSsstefan1 
1588b8235d2bSsstefan1     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
1589b8235d2bSsstefan1                       << " functions, result: " << Changed << ".\n");
1590b8235d2bSsstefan1 
1591b8235d2bSsstefan1     return Changed == ChangeStatus::CHANGED;
1592b8235d2bSsstefan1   }
1593b8235d2bSsstefan1 
1594b8235d2bSsstefan1   /// Populate the Attributor with abstract attribute opportunities in the
1595b8235d2bSsstefan1   /// function.
1596b8235d2bSsstefan1   void registerAAs() {
15975dfd7cc4Ssstefan1     if (SCC.empty())
15985dfd7cc4Ssstefan1       return;
1599b8235d2bSsstefan1 
16005dfd7cc4Ssstefan1     // Create CallSite AA for all Getters.
16015dfd7cc4Ssstefan1     for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
16025dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
16035dfd7cc4Ssstefan1 
16045dfd7cc4Ssstefan1       auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
16055dfd7cc4Ssstefan1 
16065dfd7cc4Ssstefan1       auto CreateAA = [&](Use &U, Function &Caller) {
16075dfd7cc4Ssstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
16085dfd7cc4Ssstefan1         if (!CI)
16095dfd7cc4Ssstefan1           return false;
16105dfd7cc4Ssstefan1 
16115dfd7cc4Ssstefan1         auto &CB = cast<CallBase>(*CI);
16125dfd7cc4Ssstefan1 
16135dfd7cc4Ssstefan1         IRPosition CBPos = IRPosition::callsite_function(CB);
16145dfd7cc4Ssstefan1         A.getOrCreateAAFor<AAICVTracker>(CBPos);
16155dfd7cc4Ssstefan1         return false;
16165dfd7cc4Ssstefan1       };
16175dfd7cc4Ssstefan1 
16185dfd7cc4Ssstefan1       GetterRFI.foreachUse(SCC, CreateAA);
1619b8235d2bSsstefan1     }
16206fc51c9fSJoseph Huber     auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
16216fc51c9fSJoseph Huber     auto CreateAA = [&](Use &U, Function &F) {
16226fc51c9fSJoseph Huber       A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
16236fc51c9fSJoseph Huber       return false;
16246fc51c9fSJoseph Huber     };
16256fc51c9fSJoseph Huber     GlobalizationRFI.foreachUse(SCC, CreateAA);
162618283125SJoseph Huber 
16277d69da71SJoseph Huber     // Create an ExecutionDomain AA for every function and a HeapToStack AA for
16287d69da71SJoseph Huber     // every function if there is a device kernel.
162903d7e61cSJoseph Huber     for (auto *F : SCC) {
163003d7e61cSJoseph Huber       if (!F->isDeclaration())
163103d7e61cSJoseph Huber         A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
16325ccb7424SJoseph Huber       if (isOpenMPDevice(M))
16337d69da71SJoseph Huber         A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
163418283125SJoseph Huber     }
1635b8235d2bSsstefan1   }
1636b8235d2bSsstefan1 };
1637b8235d2bSsstefan1 
16385b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
16395b0581aeSJohannes Doerfert   if (!OMPInfoCache.ModuleSlice.count(&F))
16405b0581aeSJohannes Doerfert     return nullptr;
16415b0581aeSJohannes Doerfert 
16425b0581aeSJohannes Doerfert   // Use a scope to keep the lifetime of the CachedKernel short.
16435b0581aeSJohannes Doerfert   {
16445b0581aeSJohannes Doerfert     Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
16455b0581aeSJohannes Doerfert     if (CachedKernel)
16465b0581aeSJohannes Doerfert       return *CachedKernel;
16475b0581aeSJohannes Doerfert 
16485b0581aeSJohannes Doerfert     // TODO: We should use an AA to create an (optimistic and callback
16495b0581aeSJohannes Doerfert     //       call-aware) call graph. For now we stick to simple patterns that
16505b0581aeSJohannes Doerfert     //       are less powerful, basically the worst fixpoint.
16515b0581aeSJohannes Doerfert     if (isKernel(F)) {
16525b0581aeSJohannes Doerfert       CachedKernel = Kernel(&F);
16535b0581aeSJohannes Doerfert       return *CachedKernel;
16545b0581aeSJohannes Doerfert     }
16555b0581aeSJohannes Doerfert 
16565b0581aeSJohannes Doerfert     CachedKernel = nullptr;
1657994bb6ebSJohannes Doerfert     if (!F.hasLocalLinkage()) {
1658994bb6ebSJohannes Doerfert 
1659994bb6ebSJohannes Doerfert       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
16602db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
16612db182ffSJoseph Huber         return ORA
16622db182ffSJoseph Huber                << "[OMP100] Potentially unknown OpenMP target region caller";
1663994bb6ebSJohannes Doerfert       };
16642db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
1665994bb6ebSJohannes Doerfert 
16665b0581aeSJohannes Doerfert       return nullptr;
16675b0581aeSJohannes Doerfert     }
1668994bb6ebSJohannes Doerfert   }
16695b0581aeSJohannes Doerfert 
16705b0581aeSJohannes Doerfert   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
16715b0581aeSJohannes Doerfert     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
16725b0581aeSJohannes Doerfert       // Allow use in equality comparisons.
16735b0581aeSJohannes Doerfert       if (Cmp->isEquality())
16745b0581aeSJohannes Doerfert         return getUniqueKernelFor(*Cmp);
16755b0581aeSJohannes Doerfert       return nullptr;
16765b0581aeSJohannes Doerfert     }
16775b0581aeSJohannes Doerfert     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
16785b0581aeSJohannes Doerfert       // Allow direct calls.
16795b0581aeSJohannes Doerfert       if (CB->isCallee(&U))
16805b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
1681a2dbfb6bSGiorgis Georgakoudis 
1682a2dbfb6bSGiorgis Georgakoudis       OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1683a2dbfb6bSGiorgis Georgakoudis           OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1684a2dbfb6bSGiorgis Georgakoudis       // Allow the use in __kmpc_parallel_51 calls.
1685a2dbfb6bSGiorgis Georgakoudis       if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
16865b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
16875b0581aeSJohannes Doerfert       return nullptr;
16885b0581aeSJohannes Doerfert     }
16895b0581aeSJohannes Doerfert     // Disallow every other use.
16905b0581aeSJohannes Doerfert     return nullptr;
16915b0581aeSJohannes Doerfert   };
16925b0581aeSJohannes Doerfert 
16935b0581aeSJohannes Doerfert   // TODO: In the future we want to track more than just a unique kernel.
16945b0581aeSJohannes Doerfert   SmallPtrSet<Kernel, 2> PotentialKernels;
16958d8ce85bSsstefan1   OMPInformationCache::foreachUse(F, [&](const Use &U) {
16965b0581aeSJohannes Doerfert     PotentialKernels.insert(GetUniqueKernelForUse(U));
16975b0581aeSJohannes Doerfert   });
16985b0581aeSJohannes Doerfert 
16995b0581aeSJohannes Doerfert   Kernel K = nullptr;
17005b0581aeSJohannes Doerfert   if (PotentialKernels.size() == 1)
17015b0581aeSJohannes Doerfert     K = *PotentialKernels.begin();
17025b0581aeSJohannes Doerfert 
17035b0581aeSJohannes Doerfert   // Cache the result.
17045b0581aeSJohannes Doerfert   UniqueKernelMap[&F] = K;
17055b0581aeSJohannes Doerfert 
17065b0581aeSJohannes Doerfert   return K;
17075b0581aeSJohannes Doerfert }
17085b0581aeSJohannes Doerfert 
17095b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1710a2dbfb6bSGiorgis Georgakoudis   OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1711a2dbfb6bSGiorgis Georgakoudis       OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
17125b0581aeSJohannes Doerfert 
17135b0581aeSJohannes Doerfert   bool Changed = false;
1714a2dbfb6bSGiorgis Georgakoudis   if (!KernelParallelRFI)
17155b0581aeSJohannes Doerfert     return Changed;
17165b0581aeSJohannes Doerfert 
17175b0581aeSJohannes Doerfert   for (Function *F : SCC) {
17185b0581aeSJohannes Doerfert 
1719a2dbfb6bSGiorgis Georgakoudis     // Check if the function is a use in a __kmpc_parallel_51 call at
17205b0581aeSJohannes Doerfert     // all.
17215b0581aeSJohannes Doerfert     bool UnknownUse = false;
1722a2dbfb6bSGiorgis Georgakoudis     bool KernelParallelUse = false;
17235b0581aeSJohannes Doerfert     unsigned NumDirectCalls = 0;
17245b0581aeSJohannes Doerfert 
17255b0581aeSJohannes Doerfert     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
17268d8ce85bSsstefan1     OMPInformationCache::foreachUse(*F, [&](Use &U) {
17275b0581aeSJohannes Doerfert       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
17285b0581aeSJohannes Doerfert         if (CB->isCallee(&U)) {
17295b0581aeSJohannes Doerfert           ++NumDirectCalls;
17305b0581aeSJohannes Doerfert           return;
17315b0581aeSJohannes Doerfert         }
17325b0581aeSJohannes Doerfert 
173381db6144SMichael Liao       if (isa<ICmpInst>(U.getUser())) {
17345b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17355b0581aeSJohannes Doerfert         return;
17365b0581aeSJohannes Doerfert       }
1737a2dbfb6bSGiorgis Georgakoudis 
1738a2dbfb6bSGiorgis Georgakoudis       // Find wrapper functions that represent parallel kernels.
1739a2dbfb6bSGiorgis Georgakoudis       CallInst *CI =
1740a2dbfb6bSGiorgis Georgakoudis           OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
1741a2dbfb6bSGiorgis Georgakoudis       const unsigned int WrapperFunctionArgNo = 6;
1742a2dbfb6bSGiorgis Georgakoudis       if (!KernelParallelUse && CI &&
1743a2dbfb6bSGiorgis Georgakoudis           CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
1744a2dbfb6bSGiorgis Georgakoudis         KernelParallelUse = true;
17455b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17465b0581aeSJohannes Doerfert         return;
17475b0581aeSJohannes Doerfert       }
17485b0581aeSJohannes Doerfert       UnknownUse = true;
17495b0581aeSJohannes Doerfert     });
17505b0581aeSJohannes Doerfert 
1751a2dbfb6bSGiorgis Georgakoudis     // Do not emit a remark if we haven't seen a __kmpc_parallel_51
1752fec1f210SJohannes Doerfert     // use.
1753a2dbfb6bSGiorgis Georgakoudis     if (!KernelParallelUse)
17545b0581aeSJohannes Doerfert       continue;
17555b0581aeSJohannes Doerfert 
1756fec1f210SJohannes Doerfert     {
17572db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17582db182ffSJoseph Huber         return ORA << "Found a parallel region that is called in a target "
1759fec1f210SJohannes Doerfert                       "region but not part of a combined target construct nor "
1760a2dbfb6bSGiorgis Georgakoudis                       "nested inside a target construct without intermediate "
1761fec1f210SJohannes Doerfert                       "code. This can lead to excessive register usage for "
1762fec1f210SJohannes Doerfert                       "unrelated target regions in the same translation unit "
1763fec1f210SJohannes Doerfert                       "due to spurious call edges assumed by ptxas.";
1764fec1f210SJohannes Doerfert       };
17652db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
17662db182ffSJoseph Huber                                              Remark);
1767fec1f210SJohannes Doerfert     }
1768fec1f210SJohannes Doerfert 
1769fec1f210SJohannes Doerfert     // If this ever hits, we should investigate.
1770fec1f210SJohannes Doerfert     // TODO: Checking the number of uses is not a necessary restriction and
1771fec1f210SJohannes Doerfert     // should be lifted.
1772fec1f210SJohannes Doerfert     if (UnknownUse || NumDirectCalls != 1 ||
1773fec1f210SJohannes Doerfert         ToBeReplacedStateMachineUses.size() != 2) {
1774fec1f210SJohannes Doerfert       {
17752db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17762db182ffSJoseph Huber           return ORA << "Parallel region is used in "
1777fec1f210SJohannes Doerfert                      << (UnknownUse ? "unknown" : "unexpected")
1778fec1f210SJohannes Doerfert                      << " ways; will not attempt to rewrite the state machine.";
1779fec1f210SJohannes Doerfert         };
17802db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17812db182ffSJoseph Huber             F, "OpenMPParallelRegionInNonSPMD", Remark);
1782fec1f210SJohannes Doerfert       }
17835b0581aeSJohannes Doerfert       continue;
1784fec1f210SJohannes Doerfert     }
17855b0581aeSJohannes Doerfert 
1786a2dbfb6bSGiorgis Georgakoudis     // Even if we have __kmpc_parallel_51 calls, we (for now) give
17875b0581aeSJohannes Doerfert     // up if the function is not called from a unique kernel.
17885b0581aeSJohannes Doerfert     Kernel K = getUniqueKernelFor(*F);
1789fec1f210SJohannes Doerfert     if (!K) {
1790fec1f210SJohannes Doerfert       {
17912db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17922db182ffSJoseph Huber           return ORA << "Parallel region is not known to be called from a "
1793fec1f210SJohannes Doerfert                         "unique single target region, maybe the surrounding "
1794fec1f210SJohannes Doerfert                         "function has external linkage?; will not attempt to "
1795fec1f210SJohannes Doerfert                         "rewrite the state machine use.";
1796fec1f210SJohannes Doerfert         };
17972db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17982db182ffSJoseph Huber             F, "OpenMPParallelRegionInMultipleKernesl", Remark);
1799fec1f210SJohannes Doerfert       }
18005b0581aeSJohannes Doerfert       continue;
1801fec1f210SJohannes Doerfert     }
18025b0581aeSJohannes Doerfert 
18035b0581aeSJohannes Doerfert     // We now know F is a parallel body function called only from the kernel K.
18045b0581aeSJohannes Doerfert     // We also identified the state machine uses in which we replace the
18055b0581aeSJohannes Doerfert     // function pointer by a new global symbol for identification purposes. This
18065b0581aeSJohannes Doerfert     // ensures only direct calls to the function are left.
18075b0581aeSJohannes Doerfert 
1808fec1f210SJohannes Doerfert     {
18092db182ffSJoseph Huber       auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) {
18102db182ffSJoseph Huber         return ORA << "Specialize parallel region that is only reached from a "
1811fec1f210SJohannes Doerfert                       "single target region to avoid spurious call edges and "
1812fec1f210SJohannes Doerfert                       "excessive register usage in other target regions. "
1813fec1f210SJohannes Doerfert                       "(parallel region ID: "
1814fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1815fec1f210SJohannes Doerfert                    << ", kernel ID: "
1816fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1817fec1f210SJohannes Doerfert       };
18182db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
1819fec1f210SJohannes Doerfert                                              RemarkParalleRegion);
18202db182ffSJoseph Huber       auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) {
18212db182ffSJoseph Huber         return ORA << "Target region containing the parallel region that is "
1822fec1f210SJohannes Doerfert                       "specialized. (parallel region ID: "
1823fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1824fec1f210SJohannes Doerfert                    << ", kernel ID: "
1825fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1826fec1f210SJohannes Doerfert       };
18272db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD",
18282db182ffSJoseph Huber                                              RemarkKernel);
1829fec1f210SJohannes Doerfert     }
1830fec1f210SJohannes Doerfert 
18315b0581aeSJohannes Doerfert     Module &M = *F->getParent();
18325b0581aeSJohannes Doerfert     Type *Int8Ty = Type::getInt8Ty(M.getContext());
18335b0581aeSJohannes Doerfert 
18345b0581aeSJohannes Doerfert     auto *ID = new GlobalVariable(
18355b0581aeSJohannes Doerfert         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
18365b0581aeSJohannes Doerfert         UndefValue::get(Int8Ty), F->getName() + ".ID");
18375b0581aeSJohannes Doerfert 
18385b0581aeSJohannes Doerfert     for (Use *U : ToBeReplacedStateMachineUses)
18395b0581aeSJohannes Doerfert       U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
18405b0581aeSJohannes Doerfert 
18415b0581aeSJohannes Doerfert     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
18425b0581aeSJohannes Doerfert 
18435b0581aeSJohannes Doerfert     Changed = true;
18445b0581aeSJohannes Doerfert   }
18455b0581aeSJohannes Doerfert 
18465b0581aeSJohannes Doerfert   return Changed;
18475b0581aeSJohannes Doerfert }
18485b0581aeSJohannes Doerfert 
1849b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
1850b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
1851b8235d2bSsstefan1   using Base = StateWrapper<BooleanState, AbstractAttribute>;
1852b8235d2bSsstefan1   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1853b8235d2bSsstefan1 
18545dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
18555dfd7cc4Ssstefan1     Function *F = getAnchorScope();
18565dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
18575dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
18585dfd7cc4Ssstefan1   }
18595dfd7cc4Ssstefan1 
1860b8235d2bSsstefan1   /// Returns true if value is assumed to be tracked.
1861b8235d2bSsstefan1   bool isAssumedTracked() const { return getAssumed(); }
1862b8235d2bSsstefan1 
1863b8235d2bSsstefan1   /// Returns true if value is known to be tracked.
1864b8235d2bSsstefan1   bool isKnownTracked() const { return getAssumed(); }
1865b8235d2bSsstefan1 
1866b8235d2bSsstefan1   /// Create an abstract attribute biew for the position \p IRP.
1867b8235d2bSsstefan1   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
1868b8235d2bSsstefan1 
1869b8235d2bSsstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
18705dfd7cc4Ssstefan1   virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
18715dfd7cc4Ssstefan1                                                 const Instruction *I,
18725dfd7cc4Ssstefan1                                                 Attributor &A) const {
18735dfd7cc4Ssstefan1     return None;
18745dfd7cc4Ssstefan1   }
18755dfd7cc4Ssstefan1 
18765dfd7cc4Ssstefan1   /// Return an assumed unique ICV value if a single candidate is found. If
18775dfd7cc4Ssstefan1   /// there cannot be one, return a nullptr. If it is not clear yet, return the
18785dfd7cc4Ssstefan1   /// Optional::NoneType.
18795dfd7cc4Ssstefan1   virtual Optional<Value *>
18805dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
18815dfd7cc4Ssstefan1 
18825dfd7cc4Ssstefan1   // Currently only nthreads is being tracked.
18835dfd7cc4Ssstefan1   // this array will only grow with time.
18845dfd7cc4Ssstefan1   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
1885b8235d2bSsstefan1 
1886b8235d2bSsstefan1   /// See AbstractAttribute::getName()
1887b8235d2bSsstefan1   const std::string getName() const override { return "AAICVTracker"; }
1888b8235d2bSsstefan1 
1889233af895SLuofan Chen   /// See AbstractAttribute::getIdAddr()
1890233af895SLuofan Chen   const char *getIdAddr() const override { return &ID; }
1891233af895SLuofan Chen 
1892233af895SLuofan Chen   /// This function should return true if the type of the \p AA is AAICVTracker
1893233af895SLuofan Chen   static bool classof(const AbstractAttribute *AA) {
1894233af895SLuofan Chen     return (AA->getIdAddr() == &ID);
1895233af895SLuofan Chen   }
1896233af895SLuofan Chen 
1897b8235d2bSsstefan1   static const char ID;
1898b8235d2bSsstefan1 };
1899b8235d2bSsstefan1 
1900b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
1901b8235d2bSsstefan1   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
1902b8235d2bSsstefan1       : AAICVTracker(IRP, A) {}
1903b8235d2bSsstefan1 
1904b8235d2bSsstefan1   // FIXME: come up with better string.
19055dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerFunction"; }
1906b8235d2bSsstefan1 
1907b8235d2bSsstefan1   // FIXME: come up with some stats.
1908b8235d2bSsstefan1   void trackStatistics() const override {}
1909b8235d2bSsstefan1 
19105dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1911b8235d2bSsstefan1   ChangeStatus manifest(Attributor &A) override {
19125dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1913b8235d2bSsstefan1   }
1914b8235d2bSsstefan1 
1915b8235d2bSsstefan1   // Map of ICV to their values at specific program point.
19165dfd7cc4Ssstefan1   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
1917b8235d2bSsstefan1                   InternalControlVar::ICV___last>
19185dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1919b8235d2bSsstefan1 
1920b8235d2bSsstefan1   ChangeStatus updateImpl(Attributor &A) override {
1921b8235d2bSsstefan1     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
1922b8235d2bSsstefan1 
1923b8235d2bSsstefan1     Function *F = getAnchorScope();
1924b8235d2bSsstefan1 
1925b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1926b8235d2bSsstefan1 
1927b8235d2bSsstefan1     for (InternalControlVar ICV : TrackableICVs) {
1928b8235d2bSsstefan1       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1929b8235d2bSsstefan1 
19305dfd7cc4Ssstefan1       auto &ValuesMap = ICVReplacementValuesMap[ICV];
1931b8235d2bSsstefan1       auto TrackValues = [&](Use &U, Function &) {
1932b8235d2bSsstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
1933b8235d2bSsstefan1         if (!CI)
1934b8235d2bSsstefan1           return false;
1935b8235d2bSsstefan1 
1936b8235d2bSsstefan1         // FIXME: handle setters with more that 1 arguments.
1937b8235d2bSsstefan1         /// Track new value.
19385dfd7cc4Ssstefan1         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
1939b8235d2bSsstefan1           HasChanged = ChangeStatus::CHANGED;
1940b8235d2bSsstefan1 
1941b8235d2bSsstefan1         return false;
1942b8235d2bSsstefan1       };
1943b8235d2bSsstefan1 
19445dfd7cc4Ssstefan1       auto CallCheck = [&](Instruction &I) {
19455dfd7cc4Ssstefan1         Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
19465dfd7cc4Ssstefan1         if (ReplVal.hasValue() &&
19475dfd7cc4Ssstefan1             ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
19485dfd7cc4Ssstefan1           HasChanged = ChangeStatus::CHANGED;
19495dfd7cc4Ssstefan1 
19505dfd7cc4Ssstefan1         return true;
19515dfd7cc4Ssstefan1       };
19525dfd7cc4Ssstefan1 
19535dfd7cc4Ssstefan1       // Track all changes of an ICV.
1954b8235d2bSsstefan1       SetterRFI.foreachUse(TrackValues, F);
19555dfd7cc4Ssstefan1 
19565dfd7cc4Ssstefan1       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
19575dfd7cc4Ssstefan1                                 /* CheckBBLivenessOnly */ true);
19585dfd7cc4Ssstefan1 
19595dfd7cc4Ssstefan1       /// TODO: Figure out a way to avoid adding entry in
19605dfd7cc4Ssstefan1       /// ICVReplacementValuesMap
19615dfd7cc4Ssstefan1       Instruction *Entry = &F->getEntryBlock().front();
19625dfd7cc4Ssstefan1       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
19635dfd7cc4Ssstefan1         ValuesMap.insert(std::make_pair(Entry, nullptr));
1964b8235d2bSsstefan1     }
1965b8235d2bSsstefan1 
1966b8235d2bSsstefan1     return HasChanged;
1967b8235d2bSsstefan1   }
1968b8235d2bSsstefan1 
19695dfd7cc4Ssstefan1   /// Hepler to check if \p I is a call and get the value for it if it is
19705dfd7cc4Ssstefan1   /// unique.
19715dfd7cc4Ssstefan1   Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
19725dfd7cc4Ssstefan1                                     InternalControlVar &ICV) const {
1973b8235d2bSsstefan1 
19745dfd7cc4Ssstefan1     const auto *CB = dyn_cast<CallBase>(I);
1975dcaec812SJohannes Doerfert     if (!CB || CB->hasFnAttr("no_openmp") ||
1976dcaec812SJohannes Doerfert         CB->hasFnAttr("no_openmp_routines"))
19775dfd7cc4Ssstefan1       return None;
19785dfd7cc4Ssstefan1 
1979b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1980b8235d2bSsstefan1     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
19815dfd7cc4Ssstefan1     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
19825dfd7cc4Ssstefan1     Function *CalledFunction = CB->getCalledFunction();
1983b8235d2bSsstefan1 
19844eef14f9SWei Wang     // Indirect call, assume ICV changes.
19854eef14f9SWei Wang     if (CalledFunction == nullptr)
19864eef14f9SWei Wang       return nullptr;
19875dfd7cc4Ssstefan1     if (CalledFunction == GetterRFI.Declaration)
19885dfd7cc4Ssstefan1       return None;
19895dfd7cc4Ssstefan1     if (CalledFunction == SetterRFI.Declaration) {
19905dfd7cc4Ssstefan1       if (ICVReplacementValuesMap[ICV].count(I))
19915dfd7cc4Ssstefan1         return ICVReplacementValuesMap[ICV].lookup(I);
19925dfd7cc4Ssstefan1 
19935dfd7cc4Ssstefan1       return nullptr;
19945dfd7cc4Ssstefan1     }
19955dfd7cc4Ssstefan1 
19965dfd7cc4Ssstefan1     // Since we don't know, assume it changes the ICV.
19975dfd7cc4Ssstefan1     if (CalledFunction->isDeclaration())
19985dfd7cc4Ssstefan1       return nullptr;
19995dfd7cc4Ssstefan1 
20005b70c12fSJohannes Doerfert     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
20015b70c12fSJohannes Doerfert         *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
20025dfd7cc4Ssstefan1 
20035dfd7cc4Ssstefan1     if (ICVTrackingAA.isAssumedTracked())
20045dfd7cc4Ssstefan1       return ICVTrackingAA.getUniqueReplacementValue(ICV);
20055dfd7cc4Ssstefan1 
20065dfd7cc4Ssstefan1     // If we don't know, assume it changes.
20075dfd7cc4Ssstefan1     return nullptr;
20085dfd7cc4Ssstefan1   }
20095dfd7cc4Ssstefan1 
20105dfd7cc4Ssstefan1   // We don't check unique value for a function, so return None.
20115dfd7cc4Ssstefan1   Optional<Value *>
20125dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
20135dfd7cc4Ssstefan1     return None;
20145dfd7cc4Ssstefan1   }
20155dfd7cc4Ssstefan1 
20165dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
20175dfd7cc4Ssstefan1   Optional<Value *> getReplacementValue(InternalControlVar ICV,
20185dfd7cc4Ssstefan1                                         const Instruction *I,
20195dfd7cc4Ssstefan1                                         Attributor &A) const override {
20205dfd7cc4Ssstefan1     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
20215dfd7cc4Ssstefan1     if (ValuesMap.count(I))
20225dfd7cc4Ssstefan1       return ValuesMap.lookup(I);
20235dfd7cc4Ssstefan1 
20245dfd7cc4Ssstefan1     SmallVector<const Instruction *, 16> Worklist;
20255dfd7cc4Ssstefan1     SmallPtrSet<const Instruction *, 16> Visited;
20265dfd7cc4Ssstefan1     Worklist.push_back(I);
20275dfd7cc4Ssstefan1 
20285dfd7cc4Ssstefan1     Optional<Value *> ReplVal;
20295dfd7cc4Ssstefan1 
20305dfd7cc4Ssstefan1     while (!Worklist.empty()) {
20315dfd7cc4Ssstefan1       const Instruction *CurrInst = Worklist.pop_back_val();
20325dfd7cc4Ssstefan1       if (!Visited.insert(CurrInst).second)
2033b8235d2bSsstefan1         continue;
2034b8235d2bSsstefan1 
20355dfd7cc4Ssstefan1       const BasicBlock *CurrBB = CurrInst->getParent();
20365dfd7cc4Ssstefan1 
20375dfd7cc4Ssstefan1       // Go up and look for all potential setters/calls that might change the
20385dfd7cc4Ssstefan1       // ICV.
20395dfd7cc4Ssstefan1       while ((CurrInst = CurrInst->getPrevNode())) {
20405dfd7cc4Ssstefan1         if (ValuesMap.count(CurrInst)) {
20415dfd7cc4Ssstefan1           Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
20425dfd7cc4Ssstefan1           // Unknown value, track new.
20435dfd7cc4Ssstefan1           if (!ReplVal.hasValue()) {
20445dfd7cc4Ssstefan1             ReplVal = NewReplVal;
20455dfd7cc4Ssstefan1             break;
20465dfd7cc4Ssstefan1           }
20475dfd7cc4Ssstefan1 
20485dfd7cc4Ssstefan1           // If we found a new value, we can't know the icv value anymore.
20495dfd7cc4Ssstefan1           if (NewReplVal.hasValue())
20505dfd7cc4Ssstefan1             if (ReplVal != NewReplVal)
2051b8235d2bSsstefan1               return nullptr;
2052b8235d2bSsstefan1 
20535dfd7cc4Ssstefan1           break;
2054b8235d2bSsstefan1         }
2055b8235d2bSsstefan1 
20565dfd7cc4Ssstefan1         Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
20575dfd7cc4Ssstefan1         if (!NewReplVal.hasValue())
20585dfd7cc4Ssstefan1           continue;
20595dfd7cc4Ssstefan1 
20605dfd7cc4Ssstefan1         // Unknown value, track new.
20615dfd7cc4Ssstefan1         if (!ReplVal.hasValue()) {
20625dfd7cc4Ssstefan1           ReplVal = NewReplVal;
20635dfd7cc4Ssstefan1           break;
2064b8235d2bSsstefan1         }
2065b8235d2bSsstefan1 
20665dfd7cc4Ssstefan1         // if (NewReplVal.hasValue())
20675dfd7cc4Ssstefan1         // We found a new value, we can't know the icv value anymore.
20685dfd7cc4Ssstefan1         if (ReplVal != NewReplVal)
2069b8235d2bSsstefan1           return nullptr;
2070b8235d2bSsstefan1       }
20715dfd7cc4Ssstefan1 
20725dfd7cc4Ssstefan1       // If we are in the same BB and we have a value, we are done.
20735dfd7cc4Ssstefan1       if (CurrBB == I->getParent() && ReplVal.hasValue())
20745dfd7cc4Ssstefan1         return ReplVal;
20755dfd7cc4Ssstefan1 
20765dfd7cc4Ssstefan1       // Go through all predecessors and add terminators for analysis.
20775dfd7cc4Ssstefan1       for (const BasicBlock *Pred : predecessors(CurrBB))
20785dfd7cc4Ssstefan1         if (const Instruction *Terminator = Pred->getTerminator())
20795dfd7cc4Ssstefan1           Worklist.push_back(Terminator);
20805dfd7cc4Ssstefan1     }
20815dfd7cc4Ssstefan1 
20825dfd7cc4Ssstefan1     return ReplVal;
20835dfd7cc4Ssstefan1   }
20845dfd7cc4Ssstefan1 };
20855dfd7cc4Ssstefan1 
20865dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
20875dfd7cc4Ssstefan1   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
20885dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
20895dfd7cc4Ssstefan1 
20905dfd7cc4Ssstefan1   // FIXME: come up with better string.
20915dfd7cc4Ssstefan1   const std::string getAsStr() const override {
20925dfd7cc4Ssstefan1     return "ICVTrackerFunctionReturned";
20935dfd7cc4Ssstefan1   }
20945dfd7cc4Ssstefan1 
20955dfd7cc4Ssstefan1   // FIXME: come up with some stats.
20965dfd7cc4Ssstefan1   void trackStatistics() const override {}
20975dfd7cc4Ssstefan1 
20985dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
20995dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
21005dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
21015dfd7cc4Ssstefan1   }
21025dfd7cc4Ssstefan1 
21035dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
21045dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
21055dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
21065dfd7cc4Ssstefan1       ICVReplacementValuesMap;
21075dfd7cc4Ssstefan1 
21085dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
21095dfd7cc4Ssstefan1   Optional<Value *>
21105dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
21115dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
21125dfd7cc4Ssstefan1   }
21135dfd7cc4Ssstefan1 
21145dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21155dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
21165dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
21175b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
21185dfd7cc4Ssstefan1 
21195dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
21205dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
21215dfd7cc4Ssstefan1 
21225dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21235dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
21245dfd7cc4Ssstefan1       Optional<Value *> UniqueICVValue;
21255dfd7cc4Ssstefan1 
21265dfd7cc4Ssstefan1       auto CheckReturnInst = [&](Instruction &I) {
21275dfd7cc4Ssstefan1         Optional<Value *> NewReplVal =
21285dfd7cc4Ssstefan1             ICVTrackingAA.getReplacementValue(ICV, &I, A);
21295dfd7cc4Ssstefan1 
21305dfd7cc4Ssstefan1         // If we found a second ICV value there is no unique returned value.
21315dfd7cc4Ssstefan1         if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
21325dfd7cc4Ssstefan1           return false;
21335dfd7cc4Ssstefan1 
21345dfd7cc4Ssstefan1         UniqueICVValue = NewReplVal;
21355dfd7cc4Ssstefan1 
21365dfd7cc4Ssstefan1         return true;
21375dfd7cc4Ssstefan1       };
21385dfd7cc4Ssstefan1 
21395dfd7cc4Ssstefan1       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
21405dfd7cc4Ssstefan1                                      /* CheckBBLivenessOnly */ true))
21415dfd7cc4Ssstefan1         UniqueICVValue = nullptr;
21425dfd7cc4Ssstefan1 
21435dfd7cc4Ssstefan1       if (UniqueICVValue == ReplVal)
21445dfd7cc4Ssstefan1         continue;
21455dfd7cc4Ssstefan1 
21465dfd7cc4Ssstefan1       ReplVal = UniqueICVValue;
21475dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
21485dfd7cc4Ssstefan1     }
21495dfd7cc4Ssstefan1 
21505dfd7cc4Ssstefan1     return Changed;
21515dfd7cc4Ssstefan1   }
21525dfd7cc4Ssstefan1 };
21535dfd7cc4Ssstefan1 
21545dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
21555dfd7cc4Ssstefan1   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
21565dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
21575dfd7cc4Ssstefan1 
21585dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
21595dfd7cc4Ssstefan1     Function *F = getAnchorScope();
21605dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
21615dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
21625dfd7cc4Ssstefan1 
21635dfd7cc4Ssstefan1     // We only initialize this AA for getters, so we need to know which ICV it
21645dfd7cc4Ssstefan1     // gets.
21655dfd7cc4Ssstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
21665dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21675dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[ICV];
21685dfd7cc4Ssstefan1       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
21695dfd7cc4Ssstefan1       if (Getter.Declaration == getAssociatedFunction()) {
21705dfd7cc4Ssstefan1         AssociatedICV = ICVInfo.Kind;
21715dfd7cc4Ssstefan1         return;
21725dfd7cc4Ssstefan1       }
21735dfd7cc4Ssstefan1     }
21745dfd7cc4Ssstefan1 
21755dfd7cc4Ssstefan1     /// Unknown ICV.
21765dfd7cc4Ssstefan1     indicatePessimisticFixpoint();
21775dfd7cc4Ssstefan1   }
21785dfd7cc4Ssstefan1 
21795dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
21805dfd7cc4Ssstefan1     if (!ReplVal.hasValue() || !ReplVal.getValue())
21815dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
21825dfd7cc4Ssstefan1 
21835dfd7cc4Ssstefan1     A.changeValueAfterManifest(*getCtxI(), **ReplVal);
21845dfd7cc4Ssstefan1     A.deleteAfterManifest(*getCtxI());
21855dfd7cc4Ssstefan1 
21865dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
21875dfd7cc4Ssstefan1   }
21885dfd7cc4Ssstefan1 
21895dfd7cc4Ssstefan1   // FIXME: come up with better string.
21905dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
21915dfd7cc4Ssstefan1 
21925dfd7cc4Ssstefan1   // FIXME: come up with some stats.
21935dfd7cc4Ssstefan1   void trackStatistics() const override {}
21945dfd7cc4Ssstefan1 
21955dfd7cc4Ssstefan1   InternalControlVar AssociatedICV;
21965dfd7cc4Ssstefan1   Optional<Value *> ReplVal;
21975dfd7cc4Ssstefan1 
21985dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21995dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
22005b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
22015dfd7cc4Ssstefan1 
22025dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
22035dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22045dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22055dfd7cc4Ssstefan1 
22065dfd7cc4Ssstefan1     Optional<Value *> NewReplVal =
22075dfd7cc4Ssstefan1         ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
22085dfd7cc4Ssstefan1 
22095dfd7cc4Ssstefan1     if (ReplVal == NewReplVal)
22105dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
22115dfd7cc4Ssstefan1 
22125dfd7cc4Ssstefan1     ReplVal = NewReplVal;
22135dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
22145dfd7cc4Ssstefan1   }
22155dfd7cc4Ssstefan1 
22165dfd7cc4Ssstefan1   // Return the value with which associated value can be replaced for specific
22175dfd7cc4Ssstefan1   // \p ICV.
22185dfd7cc4Ssstefan1   Optional<Value *>
22195dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22205dfd7cc4Ssstefan1     return ReplVal;
22215dfd7cc4Ssstefan1   }
22225dfd7cc4Ssstefan1 };
22235dfd7cc4Ssstefan1 
22245dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
22255dfd7cc4Ssstefan1   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
22265dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
22275dfd7cc4Ssstefan1 
22285dfd7cc4Ssstefan1   // FIXME: come up with better string.
22295dfd7cc4Ssstefan1   const std::string getAsStr() const override {
22305dfd7cc4Ssstefan1     return "ICVTrackerCallSiteReturned";
22315dfd7cc4Ssstefan1   }
22325dfd7cc4Ssstefan1 
22335dfd7cc4Ssstefan1   // FIXME: come up with some stats.
22345dfd7cc4Ssstefan1   void trackStatistics() const override {}
22355dfd7cc4Ssstefan1 
22365dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
22375dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
22385dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
22395dfd7cc4Ssstefan1   }
22405dfd7cc4Ssstefan1 
22415dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
22425dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
22435dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
22445dfd7cc4Ssstefan1       ICVReplacementValuesMap;
22455dfd7cc4Ssstefan1 
22465dfd7cc4Ssstefan1   /// Return the value with which associated value can be replaced for specific
22475dfd7cc4Ssstefan1   /// \p ICV.
22485dfd7cc4Ssstefan1   Optional<Value *>
22495dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22505dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
22515dfd7cc4Ssstefan1   }
22525dfd7cc4Ssstefan1 
22535dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
22545dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
22555dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
22565b70c12fSJohannes Doerfert         *this, IRPosition::returned(*getAssociatedFunction()),
22575b70c12fSJohannes Doerfert         DepClassTy::REQUIRED);
22585dfd7cc4Ssstefan1 
22595dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
22605dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22615dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22625dfd7cc4Ssstefan1 
22635dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
22645dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
22655dfd7cc4Ssstefan1       Optional<Value *> NewReplVal =
22665dfd7cc4Ssstefan1           ICVTrackingAA.getUniqueReplacementValue(ICV);
22675dfd7cc4Ssstefan1 
22685dfd7cc4Ssstefan1       if (ReplVal == NewReplVal)
22695dfd7cc4Ssstefan1         continue;
22705dfd7cc4Ssstefan1 
22715dfd7cc4Ssstefan1       ReplVal = NewReplVal;
22725dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
22735dfd7cc4Ssstefan1     }
22745dfd7cc4Ssstefan1     return Changed;
22755dfd7cc4Ssstefan1   }
22769548b74aSJohannes Doerfert };
227718283125SJoseph Huber 
227818283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain {
227918283125SJoseph Huber   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
228018283125SJoseph Huber       : AAExecutionDomain(IRP, A) {}
228118283125SJoseph Huber 
228218283125SJoseph Huber   const std::string getAsStr() const override {
228318283125SJoseph Huber     return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
228418283125SJoseph Huber            "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
228518283125SJoseph Huber   }
228618283125SJoseph Huber 
228718283125SJoseph Huber   /// See AbstractAttribute::trackStatistics().
228818283125SJoseph Huber   void trackStatistics() const override {}
228918283125SJoseph Huber 
229018283125SJoseph Huber   void initialize(Attributor &A) override {
229118283125SJoseph Huber     Function *F = getAnchorScope();
229218283125SJoseph Huber     for (const auto &BB : *F)
229318283125SJoseph Huber       SingleThreadedBBs.insert(&BB);
229418283125SJoseph Huber     NumBBs = SingleThreadedBBs.size();
229518283125SJoseph Huber   }
229618283125SJoseph Huber 
229718283125SJoseph Huber   ChangeStatus manifest(Attributor &A) override {
229818283125SJoseph Huber     LLVM_DEBUG({
229918283125SJoseph Huber       for (const BasicBlock *BB : SingleThreadedBBs)
230018283125SJoseph Huber         dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
230118283125SJoseph Huber                << BB->getName() << " is executed by a single thread.\n";
230218283125SJoseph Huber     });
230318283125SJoseph Huber     return ChangeStatus::UNCHANGED;
230418283125SJoseph Huber   }
230518283125SJoseph Huber 
230618283125SJoseph Huber   ChangeStatus updateImpl(Attributor &A) override;
230718283125SJoseph Huber 
230818283125SJoseph Huber   /// Check if an instruction is executed by a single thread.
23099a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
23109a23e673SJohannes Doerfert     return isExecutedByInitialThreadOnly(*I.getParent());
231118283125SJoseph Huber   }
231218283125SJoseph Huber 
23139a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
23141cfdcae6SJoseph Huber     return isValidState() && SingleThreadedBBs.contains(&BB);
231518283125SJoseph Huber   }
231618283125SJoseph Huber 
231718283125SJoseph Huber   /// Set of basic blocks that are executed by a single thread.
231818283125SJoseph Huber   DenseSet<const BasicBlock *> SingleThreadedBBs;
231918283125SJoseph Huber 
232018283125SJoseph Huber   /// Total number of basic blocks in this function.
232118283125SJoseph Huber   long unsigned NumBBs;
232218283125SJoseph Huber };
232318283125SJoseph Huber 
232418283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
232518283125SJoseph Huber   Function *F = getAnchorScope();
232618283125SJoseph Huber   ReversePostOrderTraversal<Function *> RPOT(F);
232718283125SJoseph Huber   auto NumSingleThreadedBBs = SingleThreadedBBs.size();
232818283125SJoseph Huber 
232918283125SJoseph Huber   bool AllCallSitesKnown;
233018283125SJoseph Huber   auto PredForCallSite = [&](AbstractCallSite ACS) {
233118283125SJoseph Huber     const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
233218283125SJoseph Huber         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
233318283125SJoseph Huber         DepClassTy::REQUIRED);
23341cfdcae6SJoseph Huber     return ACS.isDirectCall() &&
23351cfdcae6SJoseph Huber            ExecutionDomainAA.isExecutedByInitialThreadOnly(
23369a23e673SJohannes Doerfert                *ACS.getInstruction());
233718283125SJoseph Huber   };
233818283125SJoseph Huber 
233918283125SJoseph Huber   if (!A.checkForAllCallSites(PredForCallSite, *this,
234018283125SJoseph Huber                               /* RequiresAllCallSites */ true,
234118283125SJoseph Huber                               AllCallSitesKnown))
234218283125SJoseph Huber     SingleThreadedBBs.erase(&F->getEntryBlock());
234318283125SJoseph Huber 
234418283125SJoseph Huber   // Check if the edge into the successor block compares a thread-id function to
234518283125SJoseph Huber   // a constant zero.
234618283125SJoseph Huber   // TODO: Use AAValueSimplify to simplify and propogate constants.
234718283125SJoseph Huber   // TODO: Check more than a single use for thread ID's.
23486fc51c9fSJoseph Huber   auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
234918283125SJoseph Huber     if (!Edge || !Edge->isConditional())
235018283125SJoseph Huber       return false;
235118283125SJoseph Huber     if (Edge->getSuccessor(0) != SuccessorBB)
235218283125SJoseph Huber       return false;
235318283125SJoseph Huber 
235418283125SJoseph Huber     auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
235518283125SJoseph Huber     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
235618283125SJoseph Huber       return false;
235718283125SJoseph Huber 
23586fc51c9fSJoseph Huber     // Temporarily match the pattern generated by clang for teams regions.
23596fc51c9fSJoseph Huber     // TODO: Remove this once the new runtime is in place.
23606fc51c9fSJoseph Huber     ConstantInt *One, *NegOne;
23616fc51c9fSJoseph Huber     CmpInst::Predicate Pred;
23626fc51c9fSJoseph Huber     auto &&m_ThreadID = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_tid_x>();
23636fc51c9fSJoseph Huber     auto &&m_WarpSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_warpsize>();
23646fc51c9fSJoseph Huber     auto &&m_BlockSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_ntid_x>();
23656fc51c9fSJoseph Huber     if (match(Cmp, m_Cmp(Pred, m_ThreadID,
23666fc51c9fSJoseph Huber                          m_And(m_Sub(m_BlockSize, m_ConstantInt(One)),
23676fc51c9fSJoseph Huber                                m_Xor(m_Sub(m_WarpSize, m_ConstantInt(One)),
23686fc51c9fSJoseph Huber                                      m_ConstantInt(NegOne))))))
23696fc51c9fSJoseph Huber       if (One->isOne() && NegOne->isMinusOne() &&
23706fc51c9fSJoseph Huber           Pred == CmpInst::Predicate::ICMP_EQ)
23716fc51c9fSJoseph Huber         return true;
23726fc51c9fSJoseph Huber 
237318283125SJoseph Huber     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
237418283125SJoseph Huber     if (!C || !C->isZero())
237518283125SJoseph Huber       return false;
237618283125SJoseph Huber 
237768abc3d2SJoseph Huber     if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
237868abc3d2SJoseph Huber       if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
237918283125SJoseph Huber         return true;
238068abc3d2SJoseph Huber     if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
238168abc3d2SJoseph Huber       if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
238268abc3d2SJoseph Huber         return true;
238318283125SJoseph Huber 
238418283125SJoseph Huber     return false;
238518283125SJoseph Huber   };
238618283125SJoseph Huber 
238718283125SJoseph Huber   // Merge all the predecessor states into the current basic block. A basic
238818283125SJoseph Huber   // block is executed by a single thread if all of its predecessors are.
238918283125SJoseph Huber   auto MergePredecessorStates = [&](BasicBlock *BB) {
239018283125SJoseph Huber     if (pred_begin(BB) == pred_end(BB))
239118283125SJoseph Huber       return SingleThreadedBBs.contains(BB);
239218283125SJoseph Huber 
23936fc51c9fSJoseph Huber     bool IsInitialThread = true;
239418283125SJoseph Huber     for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
239518283125SJoseph Huber          PredBB != PredEndBB; ++PredBB) {
23966fc51c9fSJoseph Huber       if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
239718283125SJoseph Huber                               BB))
23986fc51c9fSJoseph Huber         IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
239918283125SJoseph Huber     }
240018283125SJoseph Huber 
24016fc51c9fSJoseph Huber     return IsInitialThread;
240218283125SJoseph Huber   };
240318283125SJoseph Huber 
240418283125SJoseph Huber   for (auto *BB : RPOT) {
240518283125SJoseph Huber     if (!MergePredecessorStates(BB))
240618283125SJoseph Huber       SingleThreadedBBs.erase(BB);
240718283125SJoseph Huber   }
240818283125SJoseph Huber 
240918283125SJoseph Huber   return (NumSingleThreadedBBs == SingleThreadedBBs.size())
241018283125SJoseph Huber              ? ChangeStatus::UNCHANGED
241118283125SJoseph Huber              : ChangeStatus::CHANGED;
241218283125SJoseph Huber }
241318283125SJoseph Huber 
24146fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a
24156fc51c9fSJoseph Huber /// static buffer of shared memory.
24166fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
24176fc51c9fSJoseph Huber   using Base = StateWrapper<BooleanState, AbstractAttribute>;
24186fc51c9fSJoseph Huber   AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
24196fc51c9fSJoseph Huber 
24206fc51c9fSJoseph Huber   /// Create an abstract attribute view for the position \p IRP.
24216fc51c9fSJoseph Huber   static AAHeapToShared &createForPosition(const IRPosition &IRP,
24226fc51c9fSJoseph Huber                                            Attributor &A);
24236fc51c9fSJoseph Huber 
24246fc51c9fSJoseph Huber   /// See AbstractAttribute::getName().
24256fc51c9fSJoseph Huber   const std::string getName() const override { return "AAHeapToShared"; }
24266fc51c9fSJoseph Huber 
24276fc51c9fSJoseph Huber   /// See AbstractAttribute::getIdAddr().
24286fc51c9fSJoseph Huber   const char *getIdAddr() const override { return &ID; }
24296fc51c9fSJoseph Huber 
24306fc51c9fSJoseph Huber   /// This function should return true if the type of the \p AA is
24316fc51c9fSJoseph Huber   /// AAHeapToShared.
24326fc51c9fSJoseph Huber   static bool classof(const AbstractAttribute *AA) {
24336fc51c9fSJoseph Huber     return (AA->getIdAddr() == &ID);
24346fc51c9fSJoseph Huber   }
24356fc51c9fSJoseph Huber 
24366fc51c9fSJoseph Huber   /// Unique ID (due to the unique address)
24376fc51c9fSJoseph Huber   static const char ID;
24386fc51c9fSJoseph Huber };
24396fc51c9fSJoseph Huber 
24406fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared {
24416fc51c9fSJoseph Huber   AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
24426fc51c9fSJoseph Huber       : AAHeapToShared(IRP, A) {}
24436fc51c9fSJoseph Huber 
24446fc51c9fSJoseph Huber   const std::string getAsStr() const override {
24456fc51c9fSJoseph Huber     return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
24466fc51c9fSJoseph Huber            " malloc calls eligible.";
24476fc51c9fSJoseph Huber   }
24486fc51c9fSJoseph Huber 
24496fc51c9fSJoseph Huber   /// See AbstractAttribute::trackStatistics().
24506fc51c9fSJoseph Huber   void trackStatistics() const override {}
24516fc51c9fSJoseph Huber 
24526fc51c9fSJoseph Huber   void initialize(Attributor &A) override {
24536fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24546fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
24556fc51c9fSJoseph Huber 
24566fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users())
24576fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
24586fc51c9fSJoseph Huber         MallocCalls.insert(CB);
24596fc51c9fSJoseph Huber   }
24606fc51c9fSJoseph Huber 
24616fc51c9fSJoseph Huber   ChangeStatus manifest(Attributor &A) override {
24626fc51c9fSJoseph Huber     if (MallocCalls.empty())
24636fc51c9fSJoseph Huber       return ChangeStatus::UNCHANGED;
24646fc51c9fSJoseph Huber 
24656fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24666fc51c9fSJoseph Huber     auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
24676fc51c9fSJoseph Huber 
24686fc51c9fSJoseph Huber     Function *F = getAnchorScope();
24696fc51c9fSJoseph Huber     auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
24706fc51c9fSJoseph Huber                                             DepClassTy::OPTIONAL);
24716fc51c9fSJoseph Huber 
24726fc51c9fSJoseph Huber     ChangeStatus Changed = ChangeStatus::UNCHANGED;
24736fc51c9fSJoseph Huber     for (CallBase *CB : MallocCalls) {
24746fc51c9fSJoseph Huber       // Skip replacing this if HeapToStack has already claimed it.
24756fc51c9fSJoseph Huber       if (HS && HS->isKnownHeapToStack(*CB))
24766fc51c9fSJoseph Huber         continue;
24776fc51c9fSJoseph Huber 
24786fc51c9fSJoseph Huber       // Find the unique free call to remove it.
24796fc51c9fSJoseph Huber       SmallVector<CallBase *, 4> FreeCalls;
24806fc51c9fSJoseph Huber       for (auto *U : CB->users()) {
24816fc51c9fSJoseph Huber         CallBase *C = dyn_cast<CallBase>(U);
24826fc51c9fSJoseph Huber         if (C && C->getCalledFunction() == FreeCall.Declaration)
24836fc51c9fSJoseph Huber           FreeCalls.push_back(C);
24846fc51c9fSJoseph Huber       }
24856fc51c9fSJoseph Huber       if (FreeCalls.size() != 1)
24866fc51c9fSJoseph Huber         continue;
24876fc51c9fSJoseph Huber 
24886fc51c9fSJoseph Huber       ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
24896fc51c9fSJoseph Huber 
24906fc51c9fSJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
24916fc51c9fSJoseph Huber                         << CB->getCaller()->getName() << " with "
24926fc51c9fSJoseph Huber                         << AllocSize->getZExtValue()
24936fc51c9fSJoseph Huber                         << " bytes of shared memory\n");
24946fc51c9fSJoseph Huber 
24956fc51c9fSJoseph Huber       // Create a new shared memory buffer of the same size as the allocation
24966fc51c9fSJoseph Huber       // and replace all the uses of the original allocation with it.
24976fc51c9fSJoseph Huber       Module *M = CB->getModule();
24986fc51c9fSJoseph Huber       Type *Int8Ty = Type::getInt8Ty(M->getContext());
24996fc51c9fSJoseph Huber       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
25006fc51c9fSJoseph Huber       auto *SharedMem = new GlobalVariable(
25016fc51c9fSJoseph Huber           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
25026fc51c9fSJoseph Huber           UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
25036fc51c9fSJoseph Huber           GlobalValue::NotThreadLocal,
25046fc51c9fSJoseph Huber           static_cast<unsigned>(AddressSpace::Shared));
25056fc51c9fSJoseph Huber       auto *NewBuffer =
25066fc51c9fSJoseph Huber           ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
25076fc51c9fSJoseph Huber 
250830e36c9bSJoseph Huber       auto Remark = [&](OptimizationRemark OR) {
250930e36c9bSJoseph Huber         return OR << "Replaced globalized variable with "
251030e36c9bSJoseph Huber                   << ore::NV("SharedMemory", AllocSize->getZExtValue())
251130e36c9bSJoseph Huber                   << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
251230e36c9bSJoseph Huber                   << "of shared memory";
251330e36c9bSJoseph Huber       };
251430e36c9bSJoseph Huber       A.emitRemark<OptimizationRemark>(CB, "OpenMPReplaceGlobalization",
251530e36c9bSJoseph Huber                                        Remark);
251630e36c9bSJoseph Huber 
25176fc51c9fSJoseph Huber       SharedMem->setAlignment(MaybeAlign(32));
25186fc51c9fSJoseph Huber 
25196fc51c9fSJoseph Huber       A.changeValueAfterManifest(*CB, *NewBuffer);
25206fc51c9fSJoseph Huber       A.deleteAfterManifest(*CB);
25216fc51c9fSJoseph Huber       A.deleteAfterManifest(*FreeCalls.front());
25226fc51c9fSJoseph Huber 
25236fc51c9fSJoseph Huber       NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
25246fc51c9fSJoseph Huber       Changed = ChangeStatus::CHANGED;
25256fc51c9fSJoseph Huber     }
25266fc51c9fSJoseph Huber 
25276fc51c9fSJoseph Huber     return Changed;
25286fc51c9fSJoseph Huber   }
25296fc51c9fSJoseph Huber 
25306fc51c9fSJoseph Huber   ChangeStatus updateImpl(Attributor &A) override {
25316fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
25326fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
25336fc51c9fSJoseph Huber     Function *F = getAnchorScope();
25346fc51c9fSJoseph Huber 
25356fc51c9fSJoseph Huber     auto NumMallocCalls = MallocCalls.size();
25366fc51c9fSJoseph Huber 
25376fc51c9fSJoseph Huber     // Only consider malloc calls executed by a single thread with a constant.
25386fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users()) {
25396fc51c9fSJoseph Huber       const auto &ED = A.getAAFor<AAExecutionDomain>(
25406fc51c9fSJoseph Huber           *this, IRPosition::function(*F), DepClassTy::REQUIRED);
25416fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
25426fc51c9fSJoseph Huber         if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
25436fc51c9fSJoseph Huber             !ED.isExecutedByInitialThreadOnly(*CB))
25446fc51c9fSJoseph Huber           MallocCalls.erase(CB);
25456fc51c9fSJoseph Huber     }
25466fc51c9fSJoseph Huber 
25476fc51c9fSJoseph Huber     if (NumMallocCalls != MallocCalls.size())
25486fc51c9fSJoseph Huber       return ChangeStatus::CHANGED;
25496fc51c9fSJoseph Huber 
25506fc51c9fSJoseph Huber     return ChangeStatus::UNCHANGED;
25516fc51c9fSJoseph Huber   }
25526fc51c9fSJoseph Huber 
25536fc51c9fSJoseph Huber   /// Collection of all malloc calls in a function.
25546fc51c9fSJoseph Huber   SmallPtrSet<CallBase *, 4> MallocCalls;
25556fc51c9fSJoseph Huber };
25566fc51c9fSJoseph Huber 
25579548b74aSJohannes Doerfert } // namespace
25589548b74aSJohannes Doerfert 
2559b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
256018283125SJoseph Huber const char AAExecutionDomain::ID = 0;
25616fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0;
2562b8235d2bSsstefan1 
2563b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
2564b8235d2bSsstefan1                                               Attributor &A) {
2565b8235d2bSsstefan1   AAICVTracker *AA = nullptr;
2566b8235d2bSsstefan1   switch (IRP.getPositionKind()) {
2567b8235d2bSsstefan1   case IRPosition::IRP_INVALID:
2568b8235d2bSsstefan1   case IRPosition::IRP_FLOAT:
2569b8235d2bSsstefan1   case IRPosition::IRP_ARGUMENT:
2570b8235d2bSsstefan1   case IRPosition::IRP_CALL_SITE_ARGUMENT:
25711de70a72SJohannes Doerfert     llvm_unreachable("ICVTracker can only be created for function position!");
25725dfd7cc4Ssstefan1   case IRPosition::IRP_RETURNED:
25735dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
25745dfd7cc4Ssstefan1     break;
25755dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE_RETURNED:
25765dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
25775dfd7cc4Ssstefan1     break;
25785dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE:
25795dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
25805dfd7cc4Ssstefan1     break;
2581b8235d2bSsstefan1   case IRPosition::IRP_FUNCTION:
2582b8235d2bSsstefan1     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
2583b8235d2bSsstefan1     break;
2584b8235d2bSsstefan1   }
2585b8235d2bSsstefan1 
2586b8235d2bSsstefan1   return *AA;
2587b8235d2bSsstefan1 }
2588b8235d2bSsstefan1 
258918283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
259018283125SJoseph Huber                                                         Attributor &A) {
259118283125SJoseph Huber   AAExecutionDomainFunction *AA = nullptr;
259218283125SJoseph Huber   switch (IRP.getPositionKind()) {
259318283125SJoseph Huber   case IRPosition::IRP_INVALID:
259418283125SJoseph Huber   case IRPosition::IRP_FLOAT:
259518283125SJoseph Huber   case IRPosition::IRP_ARGUMENT:
259618283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
259718283125SJoseph Huber   case IRPosition::IRP_RETURNED:
259818283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
259918283125SJoseph Huber   case IRPosition::IRP_CALL_SITE:
260018283125SJoseph Huber     llvm_unreachable(
260118283125SJoseph Huber         "AAExecutionDomain can only be created for function position!");
260218283125SJoseph Huber   case IRPosition::IRP_FUNCTION:
260318283125SJoseph Huber     AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
260418283125SJoseph Huber     break;
260518283125SJoseph Huber   }
260618283125SJoseph Huber 
260718283125SJoseph Huber   return *AA;
260818283125SJoseph Huber }
260918283125SJoseph Huber 
26106fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
26116fc51c9fSJoseph Huber                                                   Attributor &A) {
26126fc51c9fSJoseph Huber   AAHeapToSharedFunction *AA = nullptr;
26136fc51c9fSJoseph Huber   switch (IRP.getPositionKind()) {
26146fc51c9fSJoseph Huber   case IRPosition::IRP_INVALID:
26156fc51c9fSJoseph Huber   case IRPosition::IRP_FLOAT:
26166fc51c9fSJoseph Huber   case IRPosition::IRP_ARGUMENT:
26176fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
26186fc51c9fSJoseph Huber   case IRPosition::IRP_RETURNED:
26196fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
26206fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE:
26216fc51c9fSJoseph Huber     llvm_unreachable(
26226fc51c9fSJoseph Huber         "AAHeapToShared can only be created for function position!");
26236fc51c9fSJoseph Huber   case IRPosition::IRP_FUNCTION:
26246fc51c9fSJoseph Huber     AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
26256fc51c9fSJoseph Huber     break;
26266fc51c9fSJoseph Huber   }
26276fc51c9fSJoseph Huber 
26286fc51c9fSJoseph Huber   return *AA;
26296fc51c9fSJoseph Huber }
26306fc51c9fSJoseph Huber 
2631b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
26325ccb7424SJoseph Huber   if (!containsOpenMP(M))
2633b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2634b2ad63d3SJoseph Huber   if (DisableOpenMPOptimizations)
2635b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2636b2ad63d3SJoseph Huber 
26375ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
26385ccb7424SJoseph Huber 
263903d7e61cSJoseph Huber   // Create internal copies of each function if this is a kernel Module.
264003d7e61cSJoseph Huber   DenseSet<const Function *> InternalizedFuncs;
26415ccb7424SJoseph Huber   if (isOpenMPDevice(M))
264203d7e61cSJoseph Huber     for (Function &F : M)
26435ccb7424SJoseph Huber       if (!F.isDeclaration() && !Kernels.contains(&F))
264403d7e61cSJoseph Huber         if (Attributor::internalizeFunction(F, /* Force */ true))
264503d7e61cSJoseph Huber           InternalizedFuncs.insert(&F);
264603d7e61cSJoseph Huber 
264703d7e61cSJoseph Huber   // Look at every function definition in the Module that wasn't internalized.
2648b2ad63d3SJoseph Huber   SmallVector<Function *, 16> SCC;
264903d7e61cSJoseph Huber   for (Function &F : M)
265003d7e61cSJoseph Huber     if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
265103d7e61cSJoseph Huber       SCC.push_back(&F);
2652b2ad63d3SJoseph Huber 
2653b2ad63d3SJoseph Huber   if (SCC.empty())
2654b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2655b2ad63d3SJoseph Huber 
2656b2ad63d3SJoseph Huber   FunctionAnalysisManager &FAM =
2657b2ad63d3SJoseph Huber       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2658b2ad63d3SJoseph Huber 
2659b2ad63d3SJoseph Huber   AnalysisGetter AG(FAM);
2660b2ad63d3SJoseph Huber 
2661b2ad63d3SJoseph Huber   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
2662b2ad63d3SJoseph Huber     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
2663b2ad63d3SJoseph Huber   };
2664b2ad63d3SJoseph Huber 
2665b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
2666b2ad63d3SJoseph Huber   CallGraphUpdater CGUpdater;
2667b2ad63d3SJoseph Huber 
2668b2ad63d3SJoseph Huber   SetVector<Function *> Functions(SCC.begin(), SCC.end());
26695ccb7424SJoseph Huber   OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
2670b2ad63d3SJoseph Huber 
2671*13b2fba2SJoseph Huber   unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
26724a6bd8e3SJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
2673*13b2fba2SJoseph Huber                MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2674b2ad63d3SJoseph Huber 
2675b2ad63d3SJoseph Huber   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2676b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(true);
2677b2ad63d3SJoseph Huber   if (Changed)
2678b2ad63d3SJoseph Huber     return PreservedAnalyses::none();
2679b2ad63d3SJoseph Huber 
2680b2ad63d3SJoseph Huber   return PreservedAnalyses::all();
2681b2ad63d3SJoseph Huber }
2682b2ad63d3SJoseph Huber 
2683b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
26849548b74aSJohannes Doerfert                                           CGSCCAnalysisManager &AM,
2685b2ad63d3SJoseph Huber                                           LazyCallGraph &CG,
2686b2ad63d3SJoseph Huber                                           CGSCCUpdateResult &UR) {
26875ccb7424SJoseph Huber   if (!containsOpenMP(*C.begin()->getFunction().getParent()))
26889548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26899548b74aSJohannes Doerfert   if (DisableOpenMPOptimizations)
26909548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26919548b74aSJohannes Doerfert 
2692ee17263aSJohannes Doerfert   SmallVector<Function *, 16> SCC;
2693351d234dSRoman Lebedev   // If there are kernels in the module, we have to run on all SCC's.
2694351d234dSRoman Lebedev   for (LazyCallGraph::Node &N : C) {
2695351d234dSRoman Lebedev     Function *Fn = &N.getFunction();
2696351d234dSRoman Lebedev     SCC.push_back(Fn);
2697351d234dSRoman Lebedev   }
2698351d234dSRoman Lebedev 
26995ccb7424SJoseph Huber   if (SCC.empty())
27009548b74aSJohannes Doerfert     return PreservedAnalyses::all();
27019548b74aSJohannes Doerfert 
27025ccb7424SJoseph Huber   Module &M = *C.begin()->getFunction().getParent();
27035ccb7424SJoseph Huber 
27045ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
27055ccb7424SJoseph Huber 
27064d4ea9acSHuber, Joseph   FunctionAnalysisManager &FAM =
27074d4ea9acSHuber, Joseph       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
27087cfd267cSsstefan1 
27097cfd267cSsstefan1   AnalysisGetter AG(FAM);
27107cfd267cSsstefan1 
27117cfd267cSsstefan1   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
27124d4ea9acSHuber, Joseph     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
27134d4ea9acSHuber, Joseph   };
27144d4ea9acSHuber, Joseph 
2715b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
27169548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27179548b74aSJohannes Doerfert   CGUpdater.initialize(CG, C, AM, UR);
27187cfd267cSsstefan1 
27197cfd267cSsstefan1   SetVector<Function *> Functions(SCC.begin(), SCC.end());
27207cfd267cSsstefan1   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
27215ccb7424SJoseph Huber                                 /*CGSCC*/ Functions, Kernels);
27227cfd267cSsstefan1 
2723*13b2fba2SJoseph Huber   unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
27244a6bd8e3SJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
2725*13b2fba2SJoseph Huber                MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2726b8235d2bSsstefan1 
2727b8235d2bSsstefan1   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2728b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(false);
2729694ded37SGiorgis Georgakoudis   if (Changed)
2730694ded37SGiorgis Georgakoudis     return PreservedAnalyses::none();
2731694ded37SGiorgis Georgakoudis 
27329548b74aSJohannes Doerfert   return PreservedAnalyses::all();
27339548b74aSJohannes Doerfert }
27348b57ed09SJoseph Huber 
27359548b74aSJohannes Doerfert namespace {
27369548b74aSJohannes Doerfert 
2737b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27389548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27399548b74aSJohannes Doerfert   static char ID;
27409548b74aSJohannes Doerfert 
2741b2ad63d3SJoseph Huber   OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
2742b2ad63d3SJoseph Huber     initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
27439548b74aSJohannes Doerfert   }
27449548b74aSJohannes Doerfert 
27459548b74aSJohannes Doerfert   void getAnalysisUsage(AnalysisUsage &AU) const override {
27469548b74aSJohannes Doerfert     CallGraphSCCPass::getAnalysisUsage(AU);
27479548b74aSJohannes Doerfert   }
27489548b74aSJohannes Doerfert 
27499548b74aSJohannes Doerfert   bool runOnSCC(CallGraphSCC &CGSCC) override {
27505ccb7424SJoseph Huber     if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
27519548b74aSJohannes Doerfert       return false;
27529548b74aSJohannes Doerfert     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
27539548b74aSJohannes Doerfert       return false;
27549548b74aSJohannes Doerfert 
2755ee17263aSJohannes Doerfert     SmallVector<Function *, 16> SCC;
2756351d234dSRoman Lebedev     // If there are kernels in the module, we have to run on all SCC's.
2757351d234dSRoman Lebedev     for (CallGraphNode *CGN : CGSCC) {
2758351d234dSRoman Lebedev       Function *Fn = CGN->getFunction();
2759351d234dSRoman Lebedev       if (!Fn || Fn->isDeclaration())
2760351d234dSRoman Lebedev         continue;
2761ee17263aSJohannes Doerfert       SCC.push_back(Fn);
2762351d234dSRoman Lebedev     }
2763351d234dSRoman Lebedev 
27645ccb7424SJoseph Huber     if (SCC.empty())
27659548b74aSJohannes Doerfert       return false;
27669548b74aSJohannes Doerfert 
27675ccb7424SJoseph Huber     Module &M = CGSCC.getCallGraph().getModule();
27685ccb7424SJoseph Huber     KernelSet Kernels = getDeviceKernels(M);
27695ccb7424SJoseph Huber 
27709548b74aSJohannes Doerfert     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
27719548b74aSJohannes Doerfert     CGUpdater.initialize(CG, CGSCC);
27729548b74aSJohannes Doerfert 
27734d4ea9acSHuber, Joseph     // Maintain a map of functions to avoid rebuilding the ORE
27744d4ea9acSHuber, Joseph     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
27754d4ea9acSHuber, Joseph     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
27764d4ea9acSHuber, Joseph       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
27774d4ea9acSHuber, Joseph       if (!ORE)
27784d4ea9acSHuber, Joseph         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
27794d4ea9acSHuber, Joseph       return *ORE;
27804d4ea9acSHuber, Joseph     };
27814d4ea9acSHuber, Joseph 
27827cfd267cSsstefan1     AnalysisGetter AG;
27837cfd267cSsstefan1     SetVector<Function *> Functions(SCC.begin(), SCC.end());
27847cfd267cSsstefan1     BumpPtrAllocator Allocator;
27855ccb7424SJoseph Huber     OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
27865ccb7424SJoseph Huber                                   Allocator,
27875ccb7424SJoseph Huber                                   /*CGSCC*/ Functions, Kernels);
27887cfd267cSsstefan1 
2789*13b2fba2SJoseph Huber     unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
279030e36c9bSJoseph Huber     Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
2791*13b2fba2SJoseph Huber                  MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2792b8235d2bSsstefan1 
2793b8235d2bSsstefan1     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2794b2ad63d3SJoseph Huber     return OMPOpt.run(false);
27959548b74aSJohannes Doerfert   }
27969548b74aSJohannes Doerfert 
27979548b74aSJohannes Doerfert   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
27989548b74aSJohannes Doerfert };
27999548b74aSJohannes Doerfert 
28009548b74aSJohannes Doerfert } // end anonymous namespace
28019548b74aSJohannes Doerfert 
28025ccb7424SJoseph Huber KernelSet llvm::omp::getDeviceKernels(Module &M) {
28035ccb7424SJoseph Huber   // TODO: Create a more cross-platform way of determining device kernels.
2804e8039ad4SJohannes Doerfert   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
28055ccb7424SJoseph Huber   KernelSet Kernels;
28065ccb7424SJoseph Huber 
2807e8039ad4SJohannes Doerfert   if (!MD)
28085ccb7424SJoseph Huber     return Kernels;
2809e8039ad4SJohannes Doerfert 
2810e8039ad4SJohannes Doerfert   for (auto *Op : MD->operands()) {
2811e8039ad4SJohannes Doerfert     if (Op->getNumOperands() < 2)
2812e8039ad4SJohannes Doerfert       continue;
2813e8039ad4SJohannes Doerfert     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
2814e8039ad4SJohannes Doerfert     if (!KindID || KindID->getString() != "kernel")
2815e8039ad4SJohannes Doerfert       continue;
2816e8039ad4SJohannes Doerfert 
2817e8039ad4SJohannes Doerfert     Function *KernelFn =
2818e8039ad4SJohannes Doerfert         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
2819e8039ad4SJohannes Doerfert     if (!KernelFn)
2820e8039ad4SJohannes Doerfert       continue;
2821e8039ad4SJohannes Doerfert 
2822e8039ad4SJohannes Doerfert     ++NumOpenMPTargetRegionKernels;
2823e8039ad4SJohannes Doerfert 
2824e8039ad4SJohannes Doerfert     Kernels.insert(KernelFn);
2825e8039ad4SJohannes Doerfert   }
28265ccb7424SJoseph Huber 
28275ccb7424SJoseph Huber   return Kernels;
2828e8039ad4SJohannes Doerfert }
2829e8039ad4SJohannes Doerfert 
28305ccb7424SJoseph Huber bool llvm::omp::containsOpenMP(Module &M) {
28315ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp");
28325ccb7424SJoseph Huber   if (!MD)
28335ccb7424SJoseph Huber     return false;
2834dce6bc18SJohannes Doerfert 
2835e8039ad4SJohannes Doerfert   return true;
2836e8039ad4SJohannes Doerfert }
2837e8039ad4SJohannes Doerfert 
28385ccb7424SJoseph Huber bool llvm::omp::isOpenMPDevice(Module &M) {
28395ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp-device");
28405ccb7424SJoseph Huber   if (!MD)
28415ccb7424SJoseph Huber     return false;
28425ccb7424SJoseph Huber 
28435ccb7424SJoseph Huber   return true;
28449548b74aSJohannes Doerfert }
28459548b74aSJohannes Doerfert 
2846b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0;
28479548b74aSJohannes Doerfert 
2848b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28499548b74aSJohannes Doerfert                       "OpenMP specific optimizations", false, false)
28509548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
2851b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28529548b74aSJohannes Doerfert                     "OpenMP specific optimizations", false, false)
28539548b74aSJohannes Doerfert 
2854b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
2855b2ad63d3SJoseph Huber   return new OpenMPOptCGSCCLegacyPass();
2856b2ad63d3SJoseph Huber }
2857