19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12ca1560daSJoseph Huber // - Replacing globalized device memory with stack memory.
13ca1560daSJoseph Huber // - Replacing globalized device memory with shared memory.
149548b74aSJohannes Doerfert //
159548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
169548b74aSJohannes Doerfert 
179548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
189548b74aSJohannes Doerfert 
199548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
2018283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h"
219548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
229548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
239548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
244d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
253a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h"
269548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
27e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
2868abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h"
299548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
309548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
319548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
327cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
333a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h"
349548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
3597517055SGiorgis Georgakoudis #include "llvm/Transforms/Utils/CodeExtractor.h"
369548b74aSJohannes Doerfert 
379548b74aSJohannes Doerfert using namespace llvm;
389548b74aSJohannes Doerfert using namespace omp;
399548b74aSJohannes Doerfert 
409548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
419548b74aSJohannes Doerfert 
429548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
439548b74aSJohannes Doerfert     "openmp-opt-disable", cl::ZeroOrMore,
449548b74aSJohannes Doerfert     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
459548b74aSJohannes Doerfert     cl::init(false));
469548b74aSJohannes Doerfert 
473a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging(
483a6bfcf2SGiorgis Georgakoudis     "openmp-opt-enable-merging", cl::ZeroOrMore,
493a6bfcf2SGiorgis Georgakoudis     cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
503a6bfcf2SGiorgis Georgakoudis     cl::init(false));
513a6bfcf2SGiorgis Georgakoudis 
520f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
530f426935Ssstefan1                                     cl::Hidden);
54e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
55e8039ad4SJohannes Doerfert                                         cl::init(false), cl::Hidden);
560f426935Ssstefan1 
57496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
58496f8e5bSHamilton Tobon Mosquera     "openmp-hide-memory-transfer-latency",
59496f8e5bSHamilton Tobon Mosquera     cl::desc("[WIP] Tries to hide the latency of host to device memory"
60496f8e5bSHamilton Tobon Mosquera              " transfers"),
61496f8e5bSHamilton Tobon Mosquera     cl::Hidden, cl::init(false));
62496f8e5bSHamilton Tobon Mosquera 
639548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
649548b74aSJohannes Doerfert           "Number of OpenMP runtime calls deduplicated");
6555eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
6655eb714aSRoman Lebedev           "Number of OpenMP parallel regions deleted");
679548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
689548b74aSJohannes Doerfert           "Number of OpenMP runtime functions identified");
699548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
709548b74aSJohannes Doerfert           "Number of OpenMP runtime function uses identified");
71e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
72e8039ad4SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) identified");
735b0581aeSJohannes Doerfert STATISTIC(
745b0581aeSJohannes Doerfert     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
755b0581aeSJohannes Doerfert     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
763a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged,
773a6bfcf2SGiorgis Georgakoudis           "Number of OpenMP parallel regions merged");
786fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory,
796fc51c9fSJoseph Huber           "Amount of memory pushed to shared memory");
809548b74aSJohannes Doerfert 
81263c4a3cSrathod-sahaab #if !defined(NDEBUG)
829548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
83a50c0b0dSMikael Holmen #endif
849548b74aSJohannes Doerfert 
859548b74aSJohannes Doerfert namespace {
869548b74aSJohannes Doerfert 
876fc51c9fSJoseph Huber enum class AddressSpace : unsigned {
886fc51c9fSJoseph Huber   Generic = 0,
896fc51c9fSJoseph Huber   Global = 1,
906fc51c9fSJoseph Huber   Shared = 3,
916fc51c9fSJoseph Huber   Constant = 4,
926fc51c9fSJoseph Huber   Local = 5,
936fc51c9fSJoseph Huber };
946fc51c9fSJoseph Huber 
956fc51c9fSJoseph Huber struct AAHeapToShared;
966fc51c9fSJoseph Huber 
97b8235d2bSsstefan1 struct AAICVTracker;
98b8235d2bSsstefan1 
997cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
1007cfd267cSsstefan1 /// Attributor runs.
1017cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
1027cfd267cSsstefan1   OMPInformationCache(Module &M, AnalysisGetter &AG,
103624d34afSJohannes Doerfert                       BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
104e8039ad4SJohannes Doerfert                       SmallPtrSetImpl<Kernel> &Kernels)
105624d34afSJohannes Doerfert       : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
106624d34afSJohannes Doerfert         Kernels(Kernels) {
107624d34afSJohannes Doerfert 
10861238d26Ssstefan1     OMPBuilder.initialize();
1099548b74aSJohannes Doerfert     initializeRuntimeFunctions();
1100f426935Ssstefan1     initializeInternalControlVars();
1119548b74aSJohannes Doerfert   }
1129548b74aSJohannes Doerfert 
1130f426935Ssstefan1   /// Generic information that describes an internal control variable.
1140f426935Ssstefan1   struct InternalControlVarInfo {
1150f426935Ssstefan1     /// The kind, as described by InternalControlVar enum.
1160f426935Ssstefan1     InternalControlVar Kind;
1170f426935Ssstefan1 
1180f426935Ssstefan1     /// The name of the ICV.
1190f426935Ssstefan1     StringRef Name;
1200f426935Ssstefan1 
1210f426935Ssstefan1     /// Environment variable associated with this ICV.
1220f426935Ssstefan1     StringRef EnvVarName;
1230f426935Ssstefan1 
1240f426935Ssstefan1     /// Initial value kind.
1250f426935Ssstefan1     ICVInitValue InitKind;
1260f426935Ssstefan1 
1270f426935Ssstefan1     /// Initial value.
1280f426935Ssstefan1     ConstantInt *InitValue;
1290f426935Ssstefan1 
1300f426935Ssstefan1     /// Setter RTL function associated with this ICV.
1310f426935Ssstefan1     RuntimeFunction Setter;
1320f426935Ssstefan1 
1330f426935Ssstefan1     /// Getter RTL function associated with this ICV.
1340f426935Ssstefan1     RuntimeFunction Getter;
1350f426935Ssstefan1 
1360f426935Ssstefan1     /// RTL Function corresponding to the override clause of this ICV
1370f426935Ssstefan1     RuntimeFunction Clause;
1380f426935Ssstefan1   };
1390f426935Ssstefan1 
1409548b74aSJohannes Doerfert   /// Generic information that describes a runtime function
1419548b74aSJohannes Doerfert   struct RuntimeFunctionInfo {
1428855fec3SJohannes Doerfert 
1439548b74aSJohannes Doerfert     /// The kind, as described by the RuntimeFunction enum.
1449548b74aSJohannes Doerfert     RuntimeFunction Kind;
1459548b74aSJohannes Doerfert 
1469548b74aSJohannes Doerfert     /// The name of the function.
1479548b74aSJohannes Doerfert     StringRef Name;
1489548b74aSJohannes Doerfert 
1499548b74aSJohannes Doerfert     /// Flag to indicate a variadic function.
1509548b74aSJohannes Doerfert     bool IsVarArg;
1519548b74aSJohannes Doerfert 
1529548b74aSJohannes Doerfert     /// The return type of the function.
1539548b74aSJohannes Doerfert     Type *ReturnType;
1549548b74aSJohannes Doerfert 
1559548b74aSJohannes Doerfert     /// The argument types of the function.
1569548b74aSJohannes Doerfert     SmallVector<Type *, 8> ArgumentTypes;
1579548b74aSJohannes Doerfert 
1589548b74aSJohannes Doerfert     /// The declaration if available.
159f09f4b26SJohannes Doerfert     Function *Declaration = nullptr;
1609548b74aSJohannes Doerfert 
1619548b74aSJohannes Doerfert     /// Uses of this runtime function per function containing the use.
1628855fec3SJohannes Doerfert     using UseVector = SmallVector<Use *, 16>;
1638855fec3SJohannes Doerfert 
164b8235d2bSsstefan1     /// Clear UsesMap for runtime function.
165b8235d2bSsstefan1     void clearUsesMap() { UsesMap.clear(); }
166b8235d2bSsstefan1 
16754bd3751SJohannes Doerfert     /// Boolean conversion that is true if the runtime function was found.
16854bd3751SJohannes Doerfert     operator bool() const { return Declaration; }
16954bd3751SJohannes Doerfert 
1708855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F.
1718855fec3SJohannes Doerfert     UseVector &getOrCreateUseVector(Function *F) {
172b8235d2bSsstefan1       std::shared_ptr<UseVector> &UV = UsesMap[F];
1738855fec3SJohannes Doerfert       if (!UV)
174b8235d2bSsstefan1         UV = std::make_shared<UseVector>();
1758855fec3SJohannes Doerfert       return *UV;
1768855fec3SJohannes Doerfert     }
1778855fec3SJohannes Doerfert 
1788855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F or `nullptr` if there are
1798855fec3SJohannes Doerfert     /// none.
1808855fec3SJohannes Doerfert     const UseVector *getUseVector(Function &F) const {
18195e57072SDavid Blaikie       auto I = UsesMap.find(&F);
18295e57072SDavid Blaikie       if (I != UsesMap.end())
18395e57072SDavid Blaikie         return I->second.get();
18495e57072SDavid Blaikie       return nullptr;
1858855fec3SJohannes Doerfert     }
1868855fec3SJohannes Doerfert 
1878855fec3SJohannes Doerfert     /// Return how many functions contain uses of this runtime function.
1888855fec3SJohannes Doerfert     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
1899548b74aSJohannes Doerfert 
1909548b74aSJohannes Doerfert     /// Return the number of arguments (or the minimal number for variadic
1919548b74aSJohannes Doerfert     /// functions).
1929548b74aSJohannes Doerfert     size_t getNumArgs() const { return ArgumentTypes.size(); }
1939548b74aSJohannes Doerfert 
1949548b74aSJohannes Doerfert     /// Run the callback \p CB on each use and forget the use if the result is
1959548b74aSJohannes Doerfert     /// true. The callback will be fed the function in which the use was
1969548b74aSJohannes Doerfert     /// encountered as second argument.
197624d34afSJohannes Doerfert     void foreachUse(SmallVectorImpl<Function *> &SCC,
198624d34afSJohannes Doerfert                     function_ref<bool(Use &, Function &)> CB) {
199624d34afSJohannes Doerfert       for (Function *F : SCC)
200624d34afSJohannes Doerfert         foreachUse(CB, F);
201e099c7b6Ssstefan1     }
202e099c7b6Ssstefan1 
203e099c7b6Ssstefan1     /// Run the callback \p CB on each use within the function \p F and forget
204e099c7b6Ssstefan1     /// the use if the result is true.
205624d34afSJohannes Doerfert     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
2068855fec3SJohannes Doerfert       SmallVector<unsigned, 8> ToBeDeleted;
2079548b74aSJohannes Doerfert       ToBeDeleted.clear();
208e099c7b6Ssstefan1 
2098855fec3SJohannes Doerfert       unsigned Idx = 0;
210624d34afSJohannes Doerfert       UseVector &UV = getOrCreateUseVector(F);
211e099c7b6Ssstefan1 
2128855fec3SJohannes Doerfert       for (Use *U : UV) {
213e099c7b6Ssstefan1         if (CB(*U, *F))
2148855fec3SJohannes Doerfert           ToBeDeleted.push_back(Idx);
2158855fec3SJohannes Doerfert         ++Idx;
2168855fec3SJohannes Doerfert       }
2178855fec3SJohannes Doerfert 
2188855fec3SJohannes Doerfert       // Remove the to-be-deleted indices in reverse order as prior
219b726c557SJohannes Doerfert       // modifications will not modify the smaller indices.
2208855fec3SJohannes Doerfert       while (!ToBeDeleted.empty()) {
2218855fec3SJohannes Doerfert         unsigned Idx = ToBeDeleted.pop_back_val();
2228855fec3SJohannes Doerfert         UV[Idx] = UV.back();
2238855fec3SJohannes Doerfert         UV.pop_back();
2249548b74aSJohannes Doerfert       }
2259548b74aSJohannes Doerfert     }
2268855fec3SJohannes Doerfert 
2278855fec3SJohannes Doerfert   private:
2288855fec3SJohannes Doerfert     /// Map from functions to all uses of this runtime function contained in
2298855fec3SJohannes Doerfert     /// them.
230b8235d2bSsstefan1     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
2319548b74aSJohannes Doerfert   };
2329548b74aSJohannes Doerfert 
2337cfd267cSsstefan1   /// An OpenMP-IR-Builder instance
2347cfd267cSsstefan1   OpenMPIRBuilder OMPBuilder;
2357cfd267cSsstefan1 
2367cfd267cSsstefan1   /// Map from runtime function kind to the runtime function description.
2377cfd267cSsstefan1   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
2387cfd267cSsstefan1                   RuntimeFunction::OMPRTL___last>
2397cfd267cSsstefan1       RFIs;
2407cfd267cSsstefan1 
2410f426935Ssstefan1   /// Map from ICV kind to the ICV description.
2420f426935Ssstefan1   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
2430f426935Ssstefan1                   InternalControlVar::ICV___last>
2440f426935Ssstefan1       ICVs;
2450f426935Ssstefan1 
2460f426935Ssstefan1   /// Helper to initialize all internal control variable information for those
2470f426935Ssstefan1   /// defined in OMPKinds.def.
2480f426935Ssstefan1   void initializeInternalControlVars() {
2490f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL)                                                 \
2500f426935Ssstefan1   {                                                                            \
2510f426935Ssstefan1     auto &ICV = ICVs[_Name];                                                   \
2520f426935Ssstefan1     ICV.Setter = RTL;                                                          \
2530f426935Ssstefan1   }
2540f426935Ssstefan1 #define ICV_RT_GET(Name, RTL)                                                  \
2550f426935Ssstefan1   {                                                                            \
2560f426935Ssstefan1     auto &ICV = ICVs[Name];                                                    \
2570f426935Ssstefan1     ICV.Getter = RTL;                                                          \
2580f426935Ssstefan1   }
2590f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
2600f426935Ssstefan1   {                                                                            \
2610f426935Ssstefan1     auto &ICV = ICVs[Enum];                                                    \
2620f426935Ssstefan1     ICV.Name = _Name;                                                          \
2630f426935Ssstefan1     ICV.Kind = Enum;                                                           \
2640f426935Ssstefan1     ICV.InitKind = Init;                                                       \
2650f426935Ssstefan1     ICV.EnvVarName = _EnvVarName;                                              \
2660f426935Ssstefan1     switch (ICV.InitKind) {                                                    \
267951e43f3Ssstefan1     case ICV_IMPLEMENTATION_DEFINED:                                           \
2680f426935Ssstefan1       ICV.InitValue = nullptr;                                                 \
2690f426935Ssstefan1       break;                                                                   \
270951e43f3Ssstefan1     case ICV_ZERO:                                                             \
2716aab27baSsstefan1       ICV.InitValue = ConstantInt::get(                                        \
2726aab27baSsstefan1           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
2730f426935Ssstefan1       break;                                                                   \
274951e43f3Ssstefan1     case ICV_FALSE:                                                            \
2756aab27baSsstefan1       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
2760f426935Ssstefan1       break;                                                                   \
277951e43f3Ssstefan1     case ICV_LAST:                                                             \
2780f426935Ssstefan1       break;                                                                   \
2790f426935Ssstefan1     }                                                                          \
2800f426935Ssstefan1   }
2810f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2820f426935Ssstefan1   }
2830f426935Ssstefan1 
2847cfd267cSsstefan1   /// Returns true if the function declaration \p F matches the runtime
2857cfd267cSsstefan1   /// function types, that is, return type \p RTFRetType, and argument types
2867cfd267cSsstefan1   /// \p RTFArgTypes.
2877cfd267cSsstefan1   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
2887cfd267cSsstefan1                                   SmallVector<Type *, 8> &RTFArgTypes) {
2897cfd267cSsstefan1     // TODO: We should output information to the user (under debug output
2907cfd267cSsstefan1     //       and via remarks).
2917cfd267cSsstefan1 
2927cfd267cSsstefan1     if (!F)
2937cfd267cSsstefan1       return false;
2947cfd267cSsstefan1     if (F->getReturnType() != RTFRetType)
2957cfd267cSsstefan1       return false;
2967cfd267cSsstefan1     if (F->arg_size() != RTFArgTypes.size())
2977cfd267cSsstefan1       return false;
2987cfd267cSsstefan1 
2997cfd267cSsstefan1     auto RTFTyIt = RTFArgTypes.begin();
3007cfd267cSsstefan1     for (Argument &Arg : F->args()) {
3017cfd267cSsstefan1       if (Arg.getType() != *RTFTyIt)
3027cfd267cSsstefan1         return false;
3037cfd267cSsstefan1 
3047cfd267cSsstefan1       ++RTFTyIt;
3057cfd267cSsstefan1     }
3067cfd267cSsstefan1 
3077cfd267cSsstefan1     return true;
3087cfd267cSsstefan1   }
3097cfd267cSsstefan1 
310b726c557SJohannes Doerfert   // Helper to collect all uses of the declaration in the UsesMap.
311b8235d2bSsstefan1   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
3127cfd267cSsstefan1     unsigned NumUses = 0;
3137cfd267cSsstefan1     if (!RFI.Declaration)
3147cfd267cSsstefan1       return NumUses;
3157cfd267cSsstefan1     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
3167cfd267cSsstefan1 
317b8235d2bSsstefan1     if (CollectStats) {
3187cfd267cSsstefan1       NumOpenMPRuntimeFunctionsIdentified += 1;
3197cfd267cSsstefan1       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
320b8235d2bSsstefan1     }
3217cfd267cSsstefan1 
3227cfd267cSsstefan1     // TODO: We directly convert uses into proper calls and unknown uses.
3237cfd267cSsstefan1     for (Use &U : RFI.Declaration->uses()) {
3247cfd267cSsstefan1       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
3257cfd267cSsstefan1         if (ModuleSlice.count(UserI->getFunction())) {
3267cfd267cSsstefan1           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
3277cfd267cSsstefan1           ++NumUses;
3287cfd267cSsstefan1         }
3297cfd267cSsstefan1       } else {
3307cfd267cSsstefan1         RFI.getOrCreateUseVector(nullptr).push_back(&U);
3317cfd267cSsstefan1         ++NumUses;
3327cfd267cSsstefan1       }
3337cfd267cSsstefan1     }
3347cfd267cSsstefan1     return NumUses;
335b8235d2bSsstefan1   }
3367cfd267cSsstefan1 
33797517055SGiorgis Georgakoudis   // Helper function to recollect uses of a runtime function.
33897517055SGiorgis Georgakoudis   void recollectUsesForFunction(RuntimeFunction RTF) {
33997517055SGiorgis Georgakoudis     auto &RFI = RFIs[RTF];
340b8235d2bSsstefan1     RFI.clearUsesMap();
341b8235d2bSsstefan1     collectUses(RFI, /*CollectStats*/ false);
342b8235d2bSsstefan1   }
34397517055SGiorgis Georgakoudis 
34497517055SGiorgis Georgakoudis   // Helper function to recollect uses of all runtime functions.
34597517055SGiorgis Georgakoudis   void recollectUses() {
34697517055SGiorgis Georgakoudis     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
34797517055SGiorgis Georgakoudis       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
348b8235d2bSsstefan1   }
349b8235d2bSsstefan1 
350b8235d2bSsstefan1   /// Helper to initialize all runtime function information for those defined
351b8235d2bSsstefan1   /// in OpenMPKinds.def.
352b8235d2bSsstefan1   void initializeRuntimeFunctions() {
3537cfd267cSsstefan1     Module &M = *((*ModuleSlice.begin())->getParent());
3547cfd267cSsstefan1 
3556aab27baSsstefan1     // Helper macros for handling __VA_ARGS__ in OMP_RTL
3566aab27baSsstefan1 #define OMP_TYPE(VarName, ...)                                                 \
3576aab27baSsstefan1   Type *VarName = OMPBuilder.VarName;                                          \
3586aab27baSsstefan1   (void)VarName;
3596aab27baSsstefan1 
3606aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
3616aab27baSsstefan1   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
3626aab27baSsstefan1   (void)VarName##Ty;                                                           \
3636aab27baSsstefan1   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
3646aab27baSsstefan1   (void)VarName##PtrTy;
3656aab27baSsstefan1 
3666aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
3676aab27baSsstefan1   FunctionType *VarName = OMPBuilder.VarName;                                  \
3686aab27baSsstefan1   (void)VarName;                                                               \
3696aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3706aab27baSsstefan1   (void)VarName##Ptr;
3716aab27baSsstefan1 
3726aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
3736aab27baSsstefan1   StructType *VarName = OMPBuilder.VarName;                                    \
3746aab27baSsstefan1   (void)VarName;                                                               \
3756aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
3766aab27baSsstefan1   (void)VarName##Ptr;
3776aab27baSsstefan1 
3787cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
3797cfd267cSsstefan1   {                                                                            \
3807cfd267cSsstefan1     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
3817cfd267cSsstefan1     Function *F = M.getFunction(_Name);                                        \
3826aab27baSsstefan1     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
3837cfd267cSsstefan1       auto &RFI = RFIs[_Enum];                                                 \
3847cfd267cSsstefan1       RFI.Kind = _Enum;                                                        \
3857cfd267cSsstefan1       RFI.Name = _Name;                                                        \
3867cfd267cSsstefan1       RFI.IsVarArg = _IsVarArg;                                                \
3876aab27baSsstefan1       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
3887cfd267cSsstefan1       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
3897cfd267cSsstefan1       RFI.Declaration = F;                                                     \
390b8235d2bSsstefan1       unsigned NumUses = collectUses(RFI);                                     \
3917cfd267cSsstefan1       (void)NumUses;                                                           \
3927cfd267cSsstefan1       LLVM_DEBUG({                                                             \
3937cfd267cSsstefan1         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
3947cfd267cSsstefan1                << " found\n";                                                  \
3957cfd267cSsstefan1         if (RFI.Declaration)                                                   \
3967cfd267cSsstefan1           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
3977cfd267cSsstefan1                  << RFI.getNumFunctionsWithUses()                              \
3987cfd267cSsstefan1                  << " different functions.\n";                                 \
3997cfd267cSsstefan1       });                                                                      \
4007cfd267cSsstefan1     }                                                                          \
4017cfd267cSsstefan1   }
4027cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
4037cfd267cSsstefan1 
4047cfd267cSsstefan1     // TODO: We should attach the attributes defined in OMPKinds.def.
4057cfd267cSsstefan1   }
406e8039ad4SJohannes Doerfert 
407e8039ad4SJohannes Doerfert   /// Collection of known kernels (\see Kernel) in the module.
408e8039ad4SJohannes Doerfert   SmallPtrSetImpl<Kernel> &Kernels;
4097cfd267cSsstefan1 };
4107cfd267cSsstefan1 
4118931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload
4128931add6SHamilton Tobon Mosquera /// array, to a vector in memory.
4138931add6SHamilton Tobon Mosquera struct OffloadArray {
4148931add6SHamilton Tobon Mosquera   /// Physical array (in the IR).
4158931add6SHamilton Tobon Mosquera   AllocaInst *Array = nullptr;
4168931add6SHamilton Tobon Mosquera   /// Mapped values.
4178931add6SHamilton Tobon Mosquera   SmallVector<Value *, 8> StoredValues;
4188931add6SHamilton Tobon Mosquera   /// Last stores made in the offload array.
4198931add6SHamilton Tobon Mosquera   SmallVector<StoreInst *, 8> LastAccesses;
4208931add6SHamilton Tobon Mosquera 
4218931add6SHamilton Tobon Mosquera   OffloadArray() = default;
4228931add6SHamilton Tobon Mosquera 
4238931add6SHamilton Tobon Mosquera   /// Initializes the OffloadArray with the values stored in \p Array before
4248931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached. Returns false if the initialization
4258931add6SHamilton Tobon Mosquera   /// fails.
4268931add6SHamilton Tobon Mosquera   /// This MUST be used immediately after the construction of the object.
4278931add6SHamilton Tobon Mosquera   bool initialize(AllocaInst &Array, Instruction &Before) {
4288931add6SHamilton Tobon Mosquera     if (!Array.getAllocatedType()->isArrayTy())
4298931add6SHamilton Tobon Mosquera       return false;
4308931add6SHamilton Tobon Mosquera 
4318931add6SHamilton Tobon Mosquera     if (!getValues(Array, Before))
4328931add6SHamilton Tobon Mosquera       return false;
4338931add6SHamilton Tobon Mosquera 
4348931add6SHamilton Tobon Mosquera     this->Array = &Array;
4358931add6SHamilton Tobon Mosquera     return true;
4368931add6SHamilton Tobon Mosquera   }
4378931add6SHamilton Tobon Mosquera 
438da8bec47SJoseph Huber   static const unsigned DeviceIDArgNum = 1;
439da8bec47SJoseph Huber   static const unsigned BasePtrsArgNum = 3;
440da8bec47SJoseph Huber   static const unsigned PtrsArgNum = 4;
441da8bec47SJoseph Huber   static const unsigned SizesArgNum = 5;
4421d3d9b9cSHamilton Tobon Mosquera 
4438931add6SHamilton Tobon Mosquera private:
4448931add6SHamilton Tobon Mosquera   /// Traverses the BasicBlock where \p Array is, collecting the stores made to
4458931add6SHamilton Tobon Mosquera   /// \p Array, leaving StoredValues with the values stored before the
4468931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached.
4478931add6SHamilton Tobon Mosquera   bool getValues(AllocaInst &Array, Instruction &Before) {
4488931add6SHamilton Tobon Mosquera     // Initialize container.
449d08d490aSJohannes Doerfert     const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
4508931add6SHamilton Tobon Mosquera     StoredValues.assign(NumValues, nullptr);
4518931add6SHamilton Tobon Mosquera     LastAccesses.assign(NumValues, nullptr);
4528931add6SHamilton Tobon Mosquera 
4538931add6SHamilton Tobon Mosquera     // TODO: This assumes the instruction \p Before is in the same
4548931add6SHamilton Tobon Mosquera     //  BasicBlock as Array. Make it general, for any control flow graph.
4558931add6SHamilton Tobon Mosquera     BasicBlock *BB = Array.getParent();
4568931add6SHamilton Tobon Mosquera     if (BB != Before.getParent())
4578931add6SHamilton Tobon Mosquera       return false;
4588931add6SHamilton Tobon Mosquera 
4598931add6SHamilton Tobon Mosquera     const DataLayout &DL = Array.getModule()->getDataLayout();
4608931add6SHamilton Tobon Mosquera     const unsigned int PointerSize = DL.getPointerSize();
4618931add6SHamilton Tobon Mosquera 
4628931add6SHamilton Tobon Mosquera     for (Instruction &I : *BB) {
4638931add6SHamilton Tobon Mosquera       if (&I == &Before)
4648931add6SHamilton Tobon Mosquera         break;
4658931add6SHamilton Tobon Mosquera 
4668931add6SHamilton Tobon Mosquera       if (!isa<StoreInst>(&I))
4678931add6SHamilton Tobon Mosquera         continue;
4688931add6SHamilton Tobon Mosquera 
4698931add6SHamilton Tobon Mosquera       auto *S = cast<StoreInst>(&I);
4708931add6SHamilton Tobon Mosquera       int64_t Offset = -1;
471d08d490aSJohannes Doerfert       auto *Dst =
472d08d490aSJohannes Doerfert           GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
4738931add6SHamilton Tobon Mosquera       if (Dst == &Array) {
4748931add6SHamilton Tobon Mosquera         int64_t Idx = Offset / PointerSize;
4758931add6SHamilton Tobon Mosquera         StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
4768931add6SHamilton Tobon Mosquera         LastAccesses[Idx] = S;
4778931add6SHamilton Tobon Mosquera       }
4788931add6SHamilton Tobon Mosquera     }
4798931add6SHamilton Tobon Mosquera 
4808931add6SHamilton Tobon Mosquera     return isFilled();
4818931add6SHamilton Tobon Mosquera   }
4828931add6SHamilton Tobon Mosquera 
4838931add6SHamilton Tobon Mosquera   /// Returns true if all values in StoredValues and
4848931add6SHamilton Tobon Mosquera   /// LastAccesses are not nullptrs.
4858931add6SHamilton Tobon Mosquera   bool isFilled() {
4868931add6SHamilton Tobon Mosquera     const unsigned NumValues = StoredValues.size();
4878931add6SHamilton Tobon Mosquera     for (unsigned I = 0; I < NumValues; ++I) {
4888931add6SHamilton Tobon Mosquera       if (!StoredValues[I] || !LastAccesses[I])
4898931add6SHamilton Tobon Mosquera         return false;
4908931add6SHamilton Tobon Mosquera     }
4918931add6SHamilton Tobon Mosquera 
4928931add6SHamilton Tobon Mosquera     return true;
4938931add6SHamilton Tobon Mosquera   }
4948931add6SHamilton Tobon Mosquera };
4958931add6SHamilton Tobon Mosquera 
4967cfd267cSsstefan1 struct OpenMPOpt {
4977cfd267cSsstefan1 
4987cfd267cSsstefan1   using OptimizationRemarkGetter =
4997cfd267cSsstefan1       function_ref<OptimizationRemarkEmitter &(Function *)>;
5007cfd267cSsstefan1 
5017cfd267cSsstefan1   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
5027cfd267cSsstefan1             OptimizationRemarkGetter OREGetter,
503b8235d2bSsstefan1             OMPInformationCache &OMPInfoCache, Attributor &A)
50477b79d79SMehdi Amini       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
505b8235d2bSsstefan1         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
5067cfd267cSsstefan1 
507a2281419SJoseph Huber   /// Check if any remarks are enabled for openmp-opt
508a2281419SJoseph Huber   bool remarksEnabled() {
509a2281419SJoseph Huber     auto &Ctx = M.getContext();
510a2281419SJoseph Huber     return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
511a2281419SJoseph Huber   }
512a2281419SJoseph Huber 
5139548b74aSJohannes Doerfert   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
514b2ad63d3SJoseph Huber   bool run(bool IsModulePass) {
51554bd3751SJohannes Doerfert     if (SCC.empty())
51654bd3751SJohannes Doerfert       return false;
51754bd3751SJohannes Doerfert 
5189548b74aSJohannes Doerfert     bool Changed = false;
5199548b74aSJohannes Doerfert 
5209548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
52177b79d79SMehdi Amini                       << " functions in a slice with "
52277b79d79SMehdi Amini                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
5239548b74aSJohannes Doerfert 
524b2ad63d3SJoseph Huber     if (IsModulePass) {
525d3e74913SNico Weber       Changed |= runAttributor();
52618283125SJoseph Huber 
5276fc51c9fSJoseph Huber       // Recollect uses, in case Attributor deleted any.
5286fc51c9fSJoseph Huber       OMPInfoCache.recollectUses();
5296fc51c9fSJoseph Huber 
530b2ad63d3SJoseph Huber       if (remarksEnabled())
531b2ad63d3SJoseph Huber         analysisGlobalization();
532b2ad63d3SJoseph Huber     } else {
533e8039ad4SJohannes Doerfert       if (PrintICVValues)
534e8039ad4SJohannes Doerfert         printICVs();
535e8039ad4SJohannes Doerfert       if (PrintOpenMPKernels)
536e8039ad4SJohannes Doerfert         printKernels();
537e8039ad4SJohannes Doerfert 
538d3e74913SNico Weber       Changed |= rewriteDeviceCodeStateMachine();
539d3e74913SNico Weber 
540d3e74913SNico Weber       Changed |= runAttributor();
541e8039ad4SJohannes Doerfert 
542e8039ad4SJohannes Doerfert       // Recollect uses, in case Attributor deleted any.
543e8039ad4SJohannes Doerfert       OMPInfoCache.recollectUses();
544e8039ad4SJohannes Doerfert 
545e8039ad4SJohannes Doerfert       Changed |= deleteParallelRegions();
546496f8e5bSHamilton Tobon Mosquera       if (HideMemoryTransferLatency)
547496f8e5bSHamilton Tobon Mosquera         Changed |= hideMemTransfersLatency();
5483a6bfcf2SGiorgis Georgakoudis       Changed |= deduplicateRuntimeCalls();
5493a6bfcf2SGiorgis Georgakoudis       if (EnableParallelRegionMerging) {
5503a6bfcf2SGiorgis Georgakoudis         if (mergeParallelRegions()) {
5513a6bfcf2SGiorgis Georgakoudis           deduplicateRuntimeCalls();
5523a6bfcf2SGiorgis Georgakoudis           Changed = true;
5533a6bfcf2SGiorgis Georgakoudis         }
5543a6bfcf2SGiorgis Georgakoudis       }
555b2ad63d3SJoseph Huber     }
556e8039ad4SJohannes Doerfert 
557e8039ad4SJohannes Doerfert     return Changed;
558e8039ad4SJohannes Doerfert   }
559e8039ad4SJohannes Doerfert 
5600f426935Ssstefan1   /// Print initial ICV values for testing.
5610f426935Ssstefan1   /// FIXME: This should be done from the Attributor once it is added.
562e8039ad4SJohannes Doerfert   void printICVs() const {
563cb9cfa0dSsstefan1     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
564cb9cfa0dSsstefan1                                  ICV_proc_bind};
5650f426935Ssstefan1 
5660f426935Ssstefan1     for (Function *F : OMPInfoCache.ModuleSlice) {
5670f426935Ssstefan1       for (auto ICV : ICVs) {
5680f426935Ssstefan1         auto ICVInfo = OMPInfoCache.ICVs[ICV];
5692db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5702db182ffSJoseph Huber           return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
5710f426935Ssstefan1                      << " Value: "
5720f426935Ssstefan1                      << (ICVInfo.InitValue
57361cdaf66SSimon Pilgrim                              ? toString(ICVInfo.InitValue->getValue(), 10, true)
5740f426935Ssstefan1                              : "IMPLEMENTATION_DEFINED");
5750f426935Ssstefan1         };
5760f426935Ssstefan1 
5772db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
5780f426935Ssstefan1       }
5790f426935Ssstefan1     }
5800f426935Ssstefan1   }
5810f426935Ssstefan1 
582e8039ad4SJohannes Doerfert   /// Print OpenMP GPU kernels for testing.
583e8039ad4SJohannes Doerfert   void printKernels() const {
584e8039ad4SJohannes Doerfert     for (Function *F : SCC) {
585e8039ad4SJohannes Doerfert       if (!OMPInfoCache.Kernels.count(F))
586e8039ad4SJohannes Doerfert         continue;
587b8235d2bSsstefan1 
5882db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
5892db182ffSJoseph Huber         return ORA << "OpenMP GPU kernel "
590e8039ad4SJohannes Doerfert                    << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
591e8039ad4SJohannes Doerfert       };
592b8235d2bSsstefan1 
5932db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
594e8039ad4SJohannes Doerfert     }
5959548b74aSJohannes Doerfert   }
5969548b74aSJohannes Doerfert 
5977cfd267cSsstefan1   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
5987cfd267cSsstefan1   /// given it has to be the callee or a nullptr is returned.
5997cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6007cfd267cSsstefan1       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6017cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(U.getUser());
6027cfd267cSsstefan1     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
6037cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6047cfd267cSsstefan1       return CI;
6057cfd267cSsstefan1     return nullptr;
6067cfd267cSsstefan1   }
6077cfd267cSsstefan1 
6087cfd267cSsstefan1   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
6097cfd267cSsstefan1   /// the callee or a nullptr is returned.
6107cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
6117cfd267cSsstefan1       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
6127cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(&V);
6137cfd267cSsstefan1     if (CI && !CI->hasOperandBundles() &&
6147cfd267cSsstefan1         (!RFI || CI->getCalledFunction() == RFI->Declaration))
6157cfd267cSsstefan1       return CI;
6167cfd267cSsstefan1     return nullptr;
6177cfd267cSsstefan1   }
6187cfd267cSsstefan1 
6199548b74aSJohannes Doerfert private:
6203a6bfcf2SGiorgis Georgakoudis   /// Merge parallel regions when it is safe.
6213a6bfcf2SGiorgis Georgakoudis   bool mergeParallelRegions() {
6223a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackCalleeOperand = 2;
6233a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackFirstArgOperand = 3;
6243a6bfcf2SGiorgis Georgakoudis     using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6253a6bfcf2SGiorgis Georgakoudis 
6263a6bfcf2SGiorgis Georgakoudis     // Check if there are any __kmpc_fork_call calls to merge.
6273a6bfcf2SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo &RFI =
6283a6bfcf2SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
6293a6bfcf2SGiorgis Georgakoudis 
6303a6bfcf2SGiorgis Georgakoudis     if (!RFI.Declaration)
6313a6bfcf2SGiorgis Georgakoudis       return false;
6323a6bfcf2SGiorgis Georgakoudis 
63397517055SGiorgis Georgakoudis     // Unmergable calls that prevent merging a parallel region.
63497517055SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
63597517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
63697517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
63797517055SGiorgis Georgakoudis     };
6383a6bfcf2SGiorgis Georgakoudis 
6393a6bfcf2SGiorgis Georgakoudis     bool Changed = false;
6403a6bfcf2SGiorgis Georgakoudis     LoopInfo *LI = nullptr;
6413a6bfcf2SGiorgis Georgakoudis     DominatorTree *DT = nullptr;
6423a6bfcf2SGiorgis Georgakoudis 
6433a6bfcf2SGiorgis Georgakoudis     SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
6443a6bfcf2SGiorgis Georgakoudis 
6453a6bfcf2SGiorgis Georgakoudis     BasicBlock *StartBB = nullptr, *EndBB = nullptr;
6463a6bfcf2SGiorgis Georgakoudis     auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
6473a6bfcf2SGiorgis Georgakoudis                          BasicBlock &ContinuationIP) {
6483a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGStartBB = CodeGenIP.getBlock();
6493a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGEndBB =
6503a6bfcf2SGiorgis Georgakoudis           SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
6513a6bfcf2SGiorgis Georgakoudis       assert(StartBB != nullptr && "StartBB should not be null");
6523a6bfcf2SGiorgis Georgakoudis       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
6533a6bfcf2SGiorgis Georgakoudis       assert(EndBB != nullptr && "EndBB should not be null");
6543a6bfcf2SGiorgis Georgakoudis       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
6553a6bfcf2SGiorgis Georgakoudis     };
6563a6bfcf2SGiorgis Georgakoudis 
657240dd924SAlex Zinenko     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
658240dd924SAlex Zinenko                       Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
659240dd924SAlex Zinenko       ReplacementValue = &Inner;
6603a6bfcf2SGiorgis Georgakoudis       return CodeGenIP;
6613a6bfcf2SGiorgis Georgakoudis     };
6623a6bfcf2SGiorgis Georgakoudis 
6633a6bfcf2SGiorgis Georgakoudis     auto FiniCB = [&](InsertPointTy CodeGenIP) {};
6643a6bfcf2SGiorgis Georgakoudis 
66597517055SGiorgis Georgakoudis     /// Create a sequential execution region within a merged parallel region,
66697517055SGiorgis Georgakoudis     /// encapsulated in a master construct with a barrier for synchronization.
66797517055SGiorgis Georgakoudis     auto CreateSequentialRegion = [&](Function *OuterFn,
66897517055SGiorgis Georgakoudis                                       BasicBlock *OuterPredBB,
66997517055SGiorgis Georgakoudis                                       Instruction *SeqStartI,
67097517055SGiorgis Georgakoudis                                       Instruction *SeqEndI) {
67197517055SGiorgis Georgakoudis       // Isolate the instructions of the sequential region to a separate
67297517055SGiorgis Georgakoudis       // block.
67397517055SGiorgis Georgakoudis       BasicBlock *ParentBB = SeqStartI->getParent();
67497517055SGiorgis Georgakoudis       BasicBlock *SeqEndBB =
67597517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
67697517055SGiorgis Georgakoudis       BasicBlock *SeqAfterBB =
67797517055SGiorgis Georgakoudis           SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
67897517055SGiorgis Georgakoudis       BasicBlock *SeqStartBB =
67997517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
68097517055SGiorgis Georgakoudis 
68197517055SGiorgis Georgakoudis       assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
68297517055SGiorgis Georgakoudis              "Expected a different CFG");
68397517055SGiorgis Georgakoudis       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
68497517055SGiorgis Georgakoudis       ParentBB->getTerminator()->eraseFromParent();
68597517055SGiorgis Georgakoudis 
68697517055SGiorgis Georgakoudis       auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
68797517055SGiorgis Georgakoudis                            BasicBlock &ContinuationIP) {
68897517055SGiorgis Georgakoudis         BasicBlock *CGStartBB = CodeGenIP.getBlock();
68997517055SGiorgis Georgakoudis         BasicBlock *CGEndBB =
69097517055SGiorgis Georgakoudis             SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
69197517055SGiorgis Georgakoudis         assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
69297517055SGiorgis Georgakoudis         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
69397517055SGiorgis Georgakoudis         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
69497517055SGiorgis Georgakoudis         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
69597517055SGiorgis Georgakoudis       };
69697517055SGiorgis Georgakoudis       auto FiniCB = [&](InsertPointTy CodeGenIP) {};
69797517055SGiorgis Georgakoudis 
69897517055SGiorgis Georgakoudis       // Find outputs from the sequential region to outside users and
69997517055SGiorgis Georgakoudis       // broadcast their values to them.
70097517055SGiorgis Georgakoudis       for (Instruction &I : *SeqStartBB) {
70197517055SGiorgis Georgakoudis         SmallPtrSet<Instruction *, 4> OutsideUsers;
70297517055SGiorgis Georgakoudis         for (User *Usr : I.users()) {
70397517055SGiorgis Georgakoudis           Instruction &UsrI = *cast<Instruction>(Usr);
70497517055SGiorgis Georgakoudis           // Ignore outputs to LT intrinsics, code extraction for the merged
70597517055SGiorgis Georgakoudis           // parallel region will fix them.
70697517055SGiorgis Georgakoudis           if (UsrI.isLifetimeStartOrEnd())
70797517055SGiorgis Georgakoudis             continue;
70897517055SGiorgis Georgakoudis 
70997517055SGiorgis Georgakoudis           if (UsrI.getParent() != SeqStartBB)
71097517055SGiorgis Georgakoudis             OutsideUsers.insert(&UsrI);
71197517055SGiorgis Georgakoudis         }
71297517055SGiorgis Georgakoudis 
71397517055SGiorgis Georgakoudis         if (OutsideUsers.empty())
71497517055SGiorgis Georgakoudis           continue;
71597517055SGiorgis Georgakoudis 
71697517055SGiorgis Georgakoudis         // Emit an alloca in the outer region to store the broadcasted
71797517055SGiorgis Georgakoudis         // value.
71897517055SGiorgis Georgakoudis         const DataLayout &DL = M.getDataLayout();
71997517055SGiorgis Georgakoudis         AllocaInst *AllocaI = new AllocaInst(
72097517055SGiorgis Georgakoudis             I.getType(), DL.getAllocaAddrSpace(), nullptr,
72197517055SGiorgis Georgakoudis             I.getName() + ".seq.output.alloc", &OuterFn->front().front());
72297517055SGiorgis Georgakoudis 
72397517055SGiorgis Georgakoudis         // Emit a store instruction in the sequential BB to update the
72497517055SGiorgis Georgakoudis         // value.
72597517055SGiorgis Georgakoudis         new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
72697517055SGiorgis Georgakoudis 
72797517055SGiorgis Georgakoudis         // Emit a load instruction and replace the use of the output value
72897517055SGiorgis Georgakoudis         // with it.
72997517055SGiorgis Georgakoudis         for (Instruction *UsrI : OutsideUsers) {
7305b70c12fSJohannes Doerfert           LoadInst *LoadI = new LoadInst(
7315b70c12fSJohannes Doerfert               I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
73297517055SGiorgis Georgakoudis           UsrI->replaceUsesOfWith(&I, LoadI);
73397517055SGiorgis Georgakoudis         }
73497517055SGiorgis Georgakoudis       }
73597517055SGiorgis Georgakoudis 
73697517055SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(
73797517055SGiorgis Georgakoudis           InsertPointTy(ParentBB, ParentBB->end()), DL);
73897517055SGiorgis Georgakoudis       InsertPointTy SeqAfterIP =
73997517055SGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
74097517055SGiorgis Georgakoudis 
74197517055SGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
74297517055SGiorgis Georgakoudis 
74397517055SGiorgis Georgakoudis       BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
74497517055SGiorgis Georgakoudis 
74597517055SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
74697517055SGiorgis Georgakoudis                         << "\n");
74797517055SGiorgis Georgakoudis     };
74897517055SGiorgis Georgakoudis 
7493a6bfcf2SGiorgis Georgakoudis     // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
7503a6bfcf2SGiorgis Georgakoudis     // contained in BB and only separated by instructions that can be
7513a6bfcf2SGiorgis Georgakoudis     // redundantly executed in parallel. The block BB is split before the first
7523a6bfcf2SGiorgis Georgakoudis     // call (in MergableCIs) and after the last so the entire region we merge
7533a6bfcf2SGiorgis Georgakoudis     // into a single parallel region is contained in a single basic block
7543a6bfcf2SGiorgis Georgakoudis     // without any other instructions. We use the OpenMPIRBuilder to outline
7553a6bfcf2SGiorgis Georgakoudis     // that block and call the resulting function via __kmpc_fork_call.
7563a6bfcf2SGiorgis Georgakoudis     auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
7573a6bfcf2SGiorgis Georgakoudis       // TODO: Change the interface to allow single CIs expanded, e.g, to
7583a6bfcf2SGiorgis Georgakoudis       // include an outer loop.
7593a6bfcf2SGiorgis Georgakoudis       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
7603a6bfcf2SGiorgis Georgakoudis 
7613a6bfcf2SGiorgis Georgakoudis       auto Remark = [&](OptimizationRemark OR) {
7623a6bfcf2SGiorgis Georgakoudis         OR << "Parallel region at "
7633a6bfcf2SGiorgis Georgakoudis            << ore::NV("OpenMPParallelMergeFront",
7643a6bfcf2SGiorgis Georgakoudis                       MergableCIs.front()->getDebugLoc())
7653a6bfcf2SGiorgis Georgakoudis            << " merged with parallel regions at ";
76623b0ab2aSKazu Hirata         for (auto *CI : llvm::drop_begin(MergableCIs)) {
7673a6bfcf2SGiorgis Georgakoudis           OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
7683a6bfcf2SGiorgis Georgakoudis           if (CI != MergableCIs.back())
7693a6bfcf2SGiorgis Georgakoudis             OR << ", ";
7703a6bfcf2SGiorgis Georgakoudis         }
7713a6bfcf2SGiorgis Georgakoudis         return OR;
7723a6bfcf2SGiorgis Georgakoudis       };
7733a6bfcf2SGiorgis Georgakoudis 
7743a6bfcf2SGiorgis Georgakoudis       emitRemark<OptimizationRemark>(MergableCIs.front(),
7753a6bfcf2SGiorgis Georgakoudis                                      "OpenMPParallelRegionMerging", Remark);
7763a6bfcf2SGiorgis Georgakoudis 
7773a6bfcf2SGiorgis Georgakoudis       Function *OriginalFn = BB->getParent();
7783a6bfcf2SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
7793a6bfcf2SGiorgis Georgakoudis                         << " parallel regions in " << OriginalFn->getName()
7803a6bfcf2SGiorgis Georgakoudis                         << "\n");
7813a6bfcf2SGiorgis Georgakoudis 
7823a6bfcf2SGiorgis Georgakoudis       // Isolate the calls to merge in a separate block.
7833a6bfcf2SGiorgis Georgakoudis       EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
7843a6bfcf2SGiorgis Georgakoudis       BasicBlock *AfterBB =
7853a6bfcf2SGiorgis Georgakoudis           SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
7863a6bfcf2SGiorgis Georgakoudis       StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
7873a6bfcf2SGiorgis Georgakoudis                            "omp.par.merged");
7883a6bfcf2SGiorgis Georgakoudis 
7893a6bfcf2SGiorgis Georgakoudis       assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
7903a6bfcf2SGiorgis Georgakoudis       const DebugLoc DL = BB->getTerminator()->getDebugLoc();
7913a6bfcf2SGiorgis Georgakoudis       BB->getTerminator()->eraseFromParent();
7923a6bfcf2SGiorgis Georgakoudis 
79397517055SGiorgis Georgakoudis       // Create sequential regions for sequential instructions that are
79497517055SGiorgis Georgakoudis       // in-between mergable parallel regions.
79597517055SGiorgis Georgakoudis       for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
79697517055SGiorgis Georgakoudis            It != End; ++It) {
79797517055SGiorgis Georgakoudis         Instruction *ForkCI = *It;
79897517055SGiorgis Georgakoudis         Instruction *NextForkCI = *(It + 1);
79997517055SGiorgis Georgakoudis 
80097517055SGiorgis Georgakoudis         // Continue if there are not in-between instructions.
80197517055SGiorgis Georgakoudis         if (ForkCI->getNextNode() == NextForkCI)
80297517055SGiorgis Georgakoudis           continue;
80397517055SGiorgis Georgakoudis 
80497517055SGiorgis Georgakoudis         CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
80597517055SGiorgis Georgakoudis                                NextForkCI->getPrevNode());
80697517055SGiorgis Georgakoudis       }
80797517055SGiorgis Georgakoudis 
8083a6bfcf2SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
8093a6bfcf2SGiorgis Georgakoudis                                                DL);
8103a6bfcf2SGiorgis Georgakoudis       IRBuilder<>::InsertPoint AllocaIP(
8113a6bfcf2SGiorgis Georgakoudis           &OriginalFn->getEntryBlock(),
8123a6bfcf2SGiorgis Georgakoudis           OriginalFn->getEntryBlock().getFirstInsertionPt());
8133a6bfcf2SGiorgis Georgakoudis       // Create the merged parallel region with default proc binding, to
8143a6bfcf2SGiorgis Georgakoudis       // avoid overriding binding settings, and without explicit cancellation.
815e5dba2d7SMichael Kruse       InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
8163a6bfcf2SGiorgis Georgakoudis           Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
8173a6bfcf2SGiorgis Georgakoudis           OMP_PROC_BIND_default, /* IsCancellable */ false);
8183a6bfcf2SGiorgis Georgakoudis       BranchInst::Create(AfterBB, AfterIP.getBlock());
8193a6bfcf2SGiorgis Georgakoudis 
8203a6bfcf2SGiorgis Georgakoudis       // Perform the actual outlining.
821b1191206SMichael Kruse       OMPInfoCache.OMPBuilder.finalize(OriginalFn,
822b1191206SMichael Kruse                                        /* AllowExtractorSinking */ true);
8233a6bfcf2SGiorgis Georgakoudis 
8243a6bfcf2SGiorgis Georgakoudis       Function *OutlinedFn = MergableCIs.front()->getCaller();
8253a6bfcf2SGiorgis Georgakoudis 
8263a6bfcf2SGiorgis Georgakoudis       // Replace the __kmpc_fork_call calls with direct calls to the outlined
8273a6bfcf2SGiorgis Georgakoudis       // callbacks.
8283a6bfcf2SGiorgis Georgakoudis       SmallVector<Value *, 8> Args;
8293a6bfcf2SGiorgis Georgakoudis       for (auto *CI : MergableCIs) {
8303a6bfcf2SGiorgis Georgakoudis         Value *Callee =
8313a6bfcf2SGiorgis Georgakoudis             CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
8323a6bfcf2SGiorgis Georgakoudis         FunctionType *FT =
8333a6bfcf2SGiorgis Georgakoudis             cast<FunctionType>(Callee->getType()->getPointerElementType());
8343a6bfcf2SGiorgis Georgakoudis         Args.clear();
8353a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(0));
8363a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(1));
8373a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8383a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8393a6bfcf2SGiorgis Georgakoudis           Args.push_back(CI->getArgOperand(U));
8403a6bfcf2SGiorgis Georgakoudis 
8413a6bfcf2SGiorgis Georgakoudis         CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
8423a6bfcf2SGiorgis Georgakoudis         if (CI->getDebugLoc())
8433a6bfcf2SGiorgis Georgakoudis           NewCI->setDebugLoc(CI->getDebugLoc());
8443a6bfcf2SGiorgis Georgakoudis 
8453a6bfcf2SGiorgis Georgakoudis         // Forward parameter attributes from the callback to the callee.
8463a6bfcf2SGiorgis Georgakoudis         for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
8473a6bfcf2SGiorgis Georgakoudis              U < E; ++U)
8483a6bfcf2SGiorgis Georgakoudis           for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
8493a6bfcf2SGiorgis Georgakoudis             NewCI->addParamAttr(
8503a6bfcf2SGiorgis Georgakoudis                 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
8513a6bfcf2SGiorgis Georgakoudis 
8523a6bfcf2SGiorgis Georgakoudis         // Emit an explicit barrier to replace the implicit fork-join barrier.
8533a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.back()) {
8543a6bfcf2SGiorgis Georgakoudis           // TODO: Remove barrier if the merged parallel region includes the
8553a6bfcf2SGiorgis Georgakoudis           // 'nowait' clause.
856e5dba2d7SMichael Kruse           OMPInfoCache.OMPBuilder.createBarrier(
8573a6bfcf2SGiorgis Georgakoudis               InsertPointTy(NewCI->getParent(),
8583a6bfcf2SGiorgis Georgakoudis                             NewCI->getNextNode()->getIterator()),
8593a6bfcf2SGiorgis Georgakoudis               OMPD_parallel);
8603a6bfcf2SGiorgis Georgakoudis         }
8613a6bfcf2SGiorgis Georgakoudis 
8623a6bfcf2SGiorgis Georgakoudis         auto Remark = [&](OptimizationRemark OR) {
8633a6bfcf2SGiorgis Georgakoudis           return OR << "Parallel region at "
8643a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMerge", CI->getDebugLoc())
8653a6bfcf2SGiorgis Georgakoudis                     << " merged with "
8663a6bfcf2SGiorgis Georgakoudis                     << ore::NV("OpenMPParallelMergeFront",
8673a6bfcf2SGiorgis Georgakoudis                                MergableCIs.front()->getDebugLoc());
8683a6bfcf2SGiorgis Georgakoudis         };
8693a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.front())
8703a6bfcf2SGiorgis Georgakoudis           emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging",
8713a6bfcf2SGiorgis Georgakoudis                                          Remark);
8723a6bfcf2SGiorgis Georgakoudis 
8733a6bfcf2SGiorgis Georgakoudis         CI->eraseFromParent();
8743a6bfcf2SGiorgis Georgakoudis       }
8753a6bfcf2SGiorgis Georgakoudis 
8763a6bfcf2SGiorgis Georgakoudis       assert(OutlinedFn != OriginalFn && "Outlining failed");
8777fea561eSArthur Eubanks       CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
8783a6bfcf2SGiorgis Georgakoudis       CGUpdater.reanalyzeFunction(*OriginalFn);
8793a6bfcf2SGiorgis Georgakoudis 
8803a6bfcf2SGiorgis Georgakoudis       NumOpenMPParallelRegionsMerged += MergableCIs.size();
8813a6bfcf2SGiorgis Georgakoudis 
8823a6bfcf2SGiorgis Georgakoudis       return true;
8833a6bfcf2SGiorgis Georgakoudis     };
8843a6bfcf2SGiorgis Georgakoudis 
8853a6bfcf2SGiorgis Georgakoudis     // Helper function that identifes sequences of
8863a6bfcf2SGiorgis Georgakoudis     // __kmpc_fork_call uses in a basic block.
8873a6bfcf2SGiorgis Georgakoudis     auto DetectPRsCB = [&](Use &U, Function &F) {
8883a6bfcf2SGiorgis Georgakoudis       CallInst *CI = getCallIfRegularCall(U, &RFI);
8893a6bfcf2SGiorgis Georgakoudis       BB2PRMap[CI->getParent()].insert(CI);
8903a6bfcf2SGiorgis Georgakoudis 
8913a6bfcf2SGiorgis Georgakoudis       return false;
8923a6bfcf2SGiorgis Georgakoudis     };
8933a6bfcf2SGiorgis Georgakoudis 
8943a6bfcf2SGiorgis Georgakoudis     BB2PRMap.clear();
8953a6bfcf2SGiorgis Georgakoudis     RFI.foreachUse(SCC, DetectPRsCB);
8963a6bfcf2SGiorgis Georgakoudis     SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
8973a6bfcf2SGiorgis Georgakoudis     // Find mergable parallel regions within a basic block that are
8983a6bfcf2SGiorgis Georgakoudis     // safe to merge, that is any in-between instructions can safely
8993a6bfcf2SGiorgis Georgakoudis     // execute in parallel after merging.
9003a6bfcf2SGiorgis Georgakoudis     // TODO: support merging across basic-blocks.
9013a6bfcf2SGiorgis Georgakoudis     for (auto &It : BB2PRMap) {
9023a6bfcf2SGiorgis Georgakoudis       auto &CIs = It.getSecond();
9033a6bfcf2SGiorgis Georgakoudis       if (CIs.size() < 2)
9043a6bfcf2SGiorgis Georgakoudis         continue;
9053a6bfcf2SGiorgis Georgakoudis 
9063a6bfcf2SGiorgis Georgakoudis       BasicBlock *BB = It.getFirst();
9073a6bfcf2SGiorgis Georgakoudis       SmallVector<CallInst *, 4> MergableCIs;
9083a6bfcf2SGiorgis Georgakoudis 
90997517055SGiorgis Georgakoudis       /// Returns true if the instruction is mergable, false otherwise.
91097517055SGiorgis Georgakoudis       /// A terminator instruction is unmergable by definition since merging
91197517055SGiorgis Georgakoudis       /// works within a BB. Instructions before the mergable region are
91297517055SGiorgis Georgakoudis       /// mergable if they are not calls to OpenMP runtime functions that may
91397517055SGiorgis Georgakoudis       /// set different execution parameters for subsequent parallel regions.
91497517055SGiorgis Georgakoudis       /// Instructions in-between parallel regions are mergable if they are not
91597517055SGiorgis Georgakoudis       /// calls to any non-intrinsic function since that may call a non-mergable
91697517055SGiorgis Georgakoudis       /// OpenMP runtime function.
91797517055SGiorgis Georgakoudis       auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
91897517055SGiorgis Georgakoudis         // We do not merge across BBs, hence return false (unmergable) if the
91997517055SGiorgis Georgakoudis         // instruction is a terminator.
92097517055SGiorgis Georgakoudis         if (I.isTerminator())
92197517055SGiorgis Georgakoudis           return false;
92297517055SGiorgis Georgakoudis 
92397517055SGiorgis Georgakoudis         if (!isa<CallInst>(&I))
92497517055SGiorgis Georgakoudis           return true;
92597517055SGiorgis Georgakoudis 
92697517055SGiorgis Georgakoudis         CallInst *CI = cast<CallInst>(&I);
92797517055SGiorgis Georgakoudis         if (IsBeforeMergableRegion) {
92897517055SGiorgis Georgakoudis           Function *CalledFunction = CI->getCalledFunction();
92997517055SGiorgis Georgakoudis           if (!CalledFunction)
93097517055SGiorgis Georgakoudis             return false;
93197517055SGiorgis Georgakoudis           // Return false (unmergable) if the call before the parallel
93297517055SGiorgis Georgakoudis           // region calls an explicit affinity (proc_bind) or number of
93397517055SGiorgis Georgakoudis           // threads (num_threads) compiler-generated function. Those settings
93497517055SGiorgis Georgakoudis           // may be incompatible with following parallel regions.
93597517055SGiorgis Georgakoudis           // TODO: ICV tracking to detect compatibility.
93697517055SGiorgis Georgakoudis           for (const auto &RFI : UnmergableCallsInfo) {
93797517055SGiorgis Georgakoudis             if (CalledFunction == RFI.Declaration)
93897517055SGiorgis Georgakoudis               return false;
93997517055SGiorgis Georgakoudis           }
94097517055SGiorgis Georgakoudis         } else {
94197517055SGiorgis Georgakoudis           // Return false (unmergable) if there is a call instruction
94297517055SGiorgis Georgakoudis           // in-between parallel regions when it is not an intrinsic. It
94397517055SGiorgis Georgakoudis           // may call an unmergable OpenMP runtime function in its callpath.
94497517055SGiorgis Georgakoudis           // TODO: Keep track of possible OpenMP calls in the callpath.
94597517055SGiorgis Georgakoudis           if (!isa<IntrinsicInst>(CI))
94697517055SGiorgis Georgakoudis             return false;
94797517055SGiorgis Georgakoudis         }
94897517055SGiorgis Georgakoudis 
94997517055SGiorgis Georgakoudis         return true;
95097517055SGiorgis Georgakoudis       };
9513a6bfcf2SGiorgis Georgakoudis       // Find maximal number of parallel region CIs that are safe to merge.
95297517055SGiorgis Georgakoudis       for (auto It = BB->begin(), End = BB->end(); It != End;) {
95397517055SGiorgis Georgakoudis         Instruction &I = *It;
95497517055SGiorgis Georgakoudis         ++It;
95597517055SGiorgis Georgakoudis 
9563a6bfcf2SGiorgis Georgakoudis         if (CIs.count(&I)) {
9573a6bfcf2SGiorgis Georgakoudis           MergableCIs.push_back(cast<CallInst>(&I));
9583a6bfcf2SGiorgis Georgakoudis           continue;
9593a6bfcf2SGiorgis Georgakoudis         }
9603a6bfcf2SGiorgis Georgakoudis 
96197517055SGiorgis Georgakoudis         // Continue expanding if the instruction is mergable.
96297517055SGiorgis Georgakoudis         if (IsMergable(I, MergableCIs.empty()))
9633a6bfcf2SGiorgis Georgakoudis           continue;
9643a6bfcf2SGiorgis Georgakoudis 
96597517055SGiorgis Georgakoudis         // Forward the instruction iterator to skip the next parallel region
96697517055SGiorgis Georgakoudis         // since there is an unmergable instruction which can affect it.
96797517055SGiorgis Georgakoudis         for (; It != End; ++It) {
96897517055SGiorgis Georgakoudis           Instruction &SkipI = *It;
96997517055SGiorgis Georgakoudis           if (CIs.count(&SkipI)) {
97097517055SGiorgis Georgakoudis             LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
97197517055SGiorgis Georgakoudis                               << " due to " << I << "\n");
97297517055SGiorgis Georgakoudis             ++It;
97397517055SGiorgis Georgakoudis             break;
97497517055SGiorgis Georgakoudis           }
97597517055SGiorgis Georgakoudis         }
97697517055SGiorgis Georgakoudis 
97797517055SGiorgis Georgakoudis         // Store mergable regions found.
9783a6bfcf2SGiorgis Georgakoudis         if (MergableCIs.size() > 1) {
9793a6bfcf2SGiorgis Georgakoudis           MergableCIsVector.push_back(MergableCIs);
9803a6bfcf2SGiorgis Georgakoudis           LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
9813a6bfcf2SGiorgis Georgakoudis                             << " parallel regions in block " << BB->getName()
9823a6bfcf2SGiorgis Georgakoudis                             << " of function " << BB->getParent()->getName()
9833a6bfcf2SGiorgis Georgakoudis                             << "\n";);
9843a6bfcf2SGiorgis Georgakoudis         }
9853a6bfcf2SGiorgis Georgakoudis 
9863a6bfcf2SGiorgis Georgakoudis         MergableCIs.clear();
9873a6bfcf2SGiorgis Georgakoudis       }
9883a6bfcf2SGiorgis Georgakoudis 
9893a6bfcf2SGiorgis Georgakoudis       if (!MergableCIsVector.empty()) {
9903a6bfcf2SGiorgis Georgakoudis         Changed = true;
9913a6bfcf2SGiorgis Georgakoudis 
9923a6bfcf2SGiorgis Georgakoudis         for (auto &MergableCIs : MergableCIsVector)
9933a6bfcf2SGiorgis Georgakoudis           Merge(MergableCIs, BB);
994b2ad63d3SJoseph Huber         MergableCIsVector.clear();
9953a6bfcf2SGiorgis Georgakoudis       }
9963a6bfcf2SGiorgis Georgakoudis     }
9973a6bfcf2SGiorgis Georgakoudis 
9983a6bfcf2SGiorgis Georgakoudis     if (Changed) {
99997517055SGiorgis Georgakoudis       /// Re-collect use for fork calls, emitted barrier calls, and
100097517055SGiorgis Georgakoudis       /// any emitted master/end_master calls.
100197517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
100297517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
100397517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
100497517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
10053a6bfcf2SGiorgis Georgakoudis     }
10063a6bfcf2SGiorgis Georgakoudis 
10073a6bfcf2SGiorgis Georgakoudis     return Changed;
10083a6bfcf2SGiorgis Georgakoudis   }
10093a6bfcf2SGiorgis Georgakoudis 
10109d38f98dSJohannes Doerfert   /// Try to delete parallel regions if possible.
1011e565db49SJohannes Doerfert   bool deleteParallelRegions() {
1012e565db49SJohannes Doerfert     const unsigned CallbackCalleeOperand = 2;
1013e565db49SJohannes Doerfert 
10147cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &RFI =
10157cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
10167cfd267cSsstefan1 
1017e565db49SJohannes Doerfert     if (!RFI.Declaration)
1018e565db49SJohannes Doerfert       return false;
1019e565db49SJohannes Doerfert 
1020e565db49SJohannes Doerfert     bool Changed = false;
1021e565db49SJohannes Doerfert     auto DeleteCallCB = [&](Use &U, Function &) {
1022e565db49SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U);
1023e565db49SJohannes Doerfert       if (!CI)
1024e565db49SJohannes Doerfert         return false;
1025e565db49SJohannes Doerfert       auto *Fn = dyn_cast<Function>(
1026e565db49SJohannes Doerfert           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1027e565db49SJohannes Doerfert       if (!Fn)
1028e565db49SJohannes Doerfert         return false;
1029e565db49SJohannes Doerfert       if (!Fn->onlyReadsMemory())
1030e565db49SJohannes Doerfert         return false;
1031e565db49SJohannes Doerfert       if (!Fn->hasFnAttribute(Attribute::WillReturn))
1032e565db49SJohannes Doerfert         return false;
1033e565db49SJohannes Doerfert 
1034e565db49SJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1035e565db49SJohannes Doerfert                         << CI->getCaller()->getName() << "\n");
10364d4ea9acSHuber, Joseph 
10374d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
10384d4ea9acSHuber, Joseph         return OR << "Parallel region in "
10394d4ea9acSHuber, Joseph                   << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
10404d4ea9acSHuber, Joseph                   << " deleted";
10414d4ea9acSHuber, Joseph       };
10424d4ea9acSHuber, Joseph       emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
10434d4ea9acSHuber, Joseph                                      Remark);
10444d4ea9acSHuber, Joseph 
1045e565db49SJohannes Doerfert       CGUpdater.removeCallSite(*CI);
1046e565db49SJohannes Doerfert       CI->eraseFromParent();
1047e565db49SJohannes Doerfert       Changed = true;
104855eb714aSRoman Lebedev       ++NumOpenMPParallelRegionsDeleted;
1049e565db49SJohannes Doerfert       return true;
1050e565db49SJohannes Doerfert     };
1051e565db49SJohannes Doerfert 
1052624d34afSJohannes Doerfert     RFI.foreachUse(SCC, DeleteCallCB);
1053e565db49SJohannes Doerfert 
1054e565db49SJohannes Doerfert     return Changed;
1055e565db49SJohannes Doerfert   }
1056e565db49SJohannes Doerfert 
1057b726c557SJohannes Doerfert   /// Try to eliminate runtime calls by reusing existing ones.
10589548b74aSJohannes Doerfert   bool deduplicateRuntimeCalls() {
10599548b74aSJohannes Doerfert     bool Changed = false;
10609548b74aSJohannes Doerfert 
1061e28936f6SJohannes Doerfert     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1062e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_threads,
1063e28936f6SJohannes Doerfert         OMPRTL_omp_in_parallel,
1064e28936f6SJohannes Doerfert         OMPRTL_omp_get_cancellation,
1065e28936f6SJohannes Doerfert         OMPRTL_omp_get_thread_limit,
1066e28936f6SJohannes Doerfert         OMPRTL_omp_get_supported_active_levels,
1067e28936f6SJohannes Doerfert         OMPRTL_omp_get_level,
1068e28936f6SJohannes Doerfert         OMPRTL_omp_get_ancestor_thread_num,
1069e28936f6SJohannes Doerfert         OMPRTL_omp_get_team_size,
1070e28936f6SJohannes Doerfert         OMPRTL_omp_get_active_level,
1071e28936f6SJohannes Doerfert         OMPRTL_omp_in_final,
1072e28936f6SJohannes Doerfert         OMPRTL_omp_get_proc_bind,
1073e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_places,
1074e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_procs,
1075e28936f6SJohannes Doerfert         OMPRTL_omp_get_place_num,
1076e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_num_places,
1077e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_place_nums};
1078e28936f6SJohannes Doerfert 
1079bc93c2d7SMarek Kurdej     // Global-tid is handled separately.
10809548b74aSJohannes Doerfert     SmallSetVector<Value *, 16> GTIdArgs;
10819548b74aSJohannes Doerfert     collectGlobalThreadIdArguments(GTIdArgs);
10829548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
10839548b74aSJohannes Doerfert                       << " global thread ID arguments\n");
10849548b74aSJohannes Doerfert 
10859548b74aSJohannes Doerfert     for (Function *F : SCC) {
1086e28936f6SJohannes Doerfert       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
10874e29d256Sserge-sans-paille         Changed |= deduplicateRuntimeCalls(
10884e29d256Sserge-sans-paille             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1089e28936f6SJohannes Doerfert 
1090e28936f6SJohannes Doerfert       // __kmpc_global_thread_num is special as we can replace it with an
1091e28936f6SJohannes Doerfert       // argument in enough cases to make it worth trying.
10929548b74aSJohannes Doerfert       Value *GTIdArg = nullptr;
10939548b74aSJohannes Doerfert       for (Argument &Arg : F->args())
10949548b74aSJohannes Doerfert         if (GTIdArgs.count(&Arg)) {
10959548b74aSJohannes Doerfert           GTIdArg = &Arg;
10969548b74aSJohannes Doerfert           break;
10979548b74aSJohannes Doerfert         }
10989548b74aSJohannes Doerfert       Changed |= deduplicateRuntimeCalls(
10997cfd267cSsstefan1           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
11009548b74aSJohannes Doerfert     }
11019548b74aSJohannes Doerfert 
11029548b74aSJohannes Doerfert     return Changed;
11039548b74aSJohannes Doerfert   }
11049548b74aSJohannes Doerfert 
1105496f8e5bSHamilton Tobon Mosquera   /// Tries to hide the latency of runtime calls that involve host to
1106496f8e5bSHamilton Tobon Mosquera   /// device memory transfers by splitting them into their "issue" and "wait"
1107496f8e5bSHamilton Tobon Mosquera   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1108496f8e5bSHamilton Tobon Mosquera   /// moved downards as much as possible. The "issue" issues the memory transfer
1109496f8e5bSHamilton Tobon Mosquera   /// asynchronously, returning a handle. The "wait" waits in the returned
1110496f8e5bSHamilton Tobon Mosquera   /// handle for the memory transfer to finish.
1111496f8e5bSHamilton Tobon Mosquera   bool hideMemTransfersLatency() {
1112496f8e5bSHamilton Tobon Mosquera     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1113496f8e5bSHamilton Tobon Mosquera     bool Changed = false;
1114496f8e5bSHamilton Tobon Mosquera     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1115496f8e5bSHamilton Tobon Mosquera       auto *RTCall = getCallIfRegularCall(U, &RFI);
1116496f8e5bSHamilton Tobon Mosquera       if (!RTCall)
1117496f8e5bSHamilton Tobon Mosquera         return false;
1118496f8e5bSHamilton Tobon Mosquera 
11198931add6SHamilton Tobon Mosquera       OffloadArray OffloadArrays[3];
11208931add6SHamilton Tobon Mosquera       if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
11218931add6SHamilton Tobon Mosquera         return false;
11228931add6SHamilton Tobon Mosquera 
11238931add6SHamilton Tobon Mosquera       LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
11248931add6SHamilton Tobon Mosquera 
1125bd2fa181SHamilton Tobon Mosquera       // TODO: Check if can be moved upwards.
1126bd2fa181SHamilton Tobon Mosquera       bool WasSplit = false;
1127bd2fa181SHamilton Tobon Mosquera       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1128bd2fa181SHamilton Tobon Mosquera       if (WaitMovementPoint)
1129bd2fa181SHamilton Tobon Mosquera         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1130bd2fa181SHamilton Tobon Mosquera 
1131496f8e5bSHamilton Tobon Mosquera       Changed |= WasSplit;
1132496f8e5bSHamilton Tobon Mosquera       return WasSplit;
1133496f8e5bSHamilton Tobon Mosquera     };
1134496f8e5bSHamilton Tobon Mosquera     RFI.foreachUse(SCC, SplitMemTransfers);
1135496f8e5bSHamilton Tobon Mosquera 
1136496f8e5bSHamilton Tobon Mosquera     return Changed;
1137496f8e5bSHamilton Tobon Mosquera   }
1138496f8e5bSHamilton Tobon Mosquera 
1139a2281419SJoseph Huber   void analysisGlobalization() {
11406fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
114182453e75SJoseph Huber 
114282453e75SJoseph Huber     auto CheckGlobalization = [&](Use &U, Function &Decl) {
1143a2281419SJoseph Huber       if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
114444feacc7SJoseph Huber         auto Remark = [&](OptimizationRemarkMissed ORM) {
114544feacc7SJoseph Huber           return ORM
1146a2281419SJoseph Huber                  << "Found thread data sharing on the GPU. "
1147a2281419SJoseph Huber                  << "Expect degraded performance due to data globalization.";
1148a2281419SJoseph Huber         };
114944feacc7SJoseph Huber         emitRemark<OptimizationRemarkMissed>(CI, "OpenMPGlobalization", Remark);
1150a2281419SJoseph Huber       }
1151a2281419SJoseph Huber 
1152a2281419SJoseph Huber       return false;
1153a2281419SJoseph Huber     };
1154a2281419SJoseph Huber 
115582453e75SJoseph Huber     RFI.foreachUse(SCC, CheckGlobalization);
115682453e75SJoseph Huber   }
1157a2281419SJoseph Huber 
11588931add6SHamilton Tobon Mosquera   /// Maps the values stored in the offload arrays passed as arguments to
11598931add6SHamilton Tobon Mosquera   /// \p RuntimeCall into the offload arrays in \p OAs.
11608931add6SHamilton Tobon Mosquera   bool getValuesInOffloadArrays(CallInst &RuntimeCall,
11618931add6SHamilton Tobon Mosquera                                 MutableArrayRef<OffloadArray> OAs) {
11628931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "Need space for three offload arrays!");
11638931add6SHamilton Tobon Mosquera 
11648931add6SHamilton Tobon Mosquera     // A runtime call that involves memory offloading looks something like:
11658931add6SHamilton Tobon Mosquera     // call void @__tgt_target_data_begin_mapper(arg0, arg1,
11668931add6SHamilton Tobon Mosquera     //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
11678931add6SHamilton Tobon Mosquera     // ...)
11688931add6SHamilton Tobon Mosquera     // So, the idea is to access the allocas that allocate space for these
11698931add6SHamilton Tobon Mosquera     // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
11708931add6SHamilton Tobon Mosquera     // Therefore:
11718931add6SHamilton Tobon Mosquera     // i8** %offload_baseptrs.
11721d3d9b9cSHamilton Tobon Mosquera     Value *BasePtrsArg =
11731d3d9b9cSHamilton Tobon Mosquera         RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
11748931add6SHamilton Tobon Mosquera     // i8** %offload_ptrs.
11751d3d9b9cSHamilton Tobon Mosquera     Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
11768931add6SHamilton Tobon Mosquera     // i8** %offload_sizes.
11771d3d9b9cSHamilton Tobon Mosquera     Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
11788931add6SHamilton Tobon Mosquera 
11798931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11808931add6SHamilton Tobon Mosquera     auto *V = getUnderlyingObject(BasePtrsArg);
11818931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11828931add6SHamilton Tobon Mosquera       return false;
11838931add6SHamilton Tobon Mosquera     auto *BasePtrsArray = cast<AllocaInst>(V);
11848931add6SHamilton Tobon Mosquera     if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
11858931add6SHamilton Tobon Mosquera       return false;
11868931add6SHamilton Tobon Mosquera 
11878931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
11888931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(PtrsArg);
11898931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
11908931add6SHamilton Tobon Mosquera       return false;
11918931add6SHamilton Tobon Mosquera     auto *PtrsArray = cast<AllocaInst>(V);
11928931add6SHamilton Tobon Mosquera     if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
11938931add6SHamilton Tobon Mosquera       return false;
11948931add6SHamilton Tobon Mosquera 
11958931add6SHamilton Tobon Mosquera     // Get values stored in **offload_sizes.
11968931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(SizesArg);
11978931add6SHamilton Tobon Mosquera     // If it's a [constant] global array don't analyze it.
11988931add6SHamilton Tobon Mosquera     if (isa<GlobalValue>(V))
11998931add6SHamilton Tobon Mosquera       return isa<Constant>(V);
12008931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
12018931add6SHamilton Tobon Mosquera       return false;
12028931add6SHamilton Tobon Mosquera 
12038931add6SHamilton Tobon Mosquera     auto *SizesArray = cast<AllocaInst>(V);
12048931add6SHamilton Tobon Mosquera     if (!OAs[2].initialize(*SizesArray, RuntimeCall))
12058931add6SHamilton Tobon Mosquera       return false;
12068931add6SHamilton Tobon Mosquera 
12078931add6SHamilton Tobon Mosquera     return true;
12088931add6SHamilton Tobon Mosquera   }
12098931add6SHamilton Tobon Mosquera 
12108931add6SHamilton Tobon Mosquera   /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
12118931add6SHamilton Tobon Mosquera   /// For now this is a way to test that the function getValuesInOffloadArrays
12128931add6SHamilton Tobon Mosquera   /// is working properly.
12138931add6SHamilton Tobon Mosquera   /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
12148931add6SHamilton Tobon Mosquera   void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
12158931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "There are three offload arrays to debug!");
12168931add6SHamilton Tobon Mosquera 
12178931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
12188931add6SHamilton Tobon Mosquera     std::string ValuesStr;
12198931add6SHamilton Tobon Mosquera     raw_string_ostream Printer(ValuesStr);
12208931add6SHamilton Tobon Mosquera     std::string Separator = " --- ";
12218931add6SHamilton Tobon Mosquera 
12228931add6SHamilton Tobon Mosquera     for (auto *BP : OAs[0].StoredValues) {
12238931add6SHamilton Tobon Mosquera       BP->print(Printer);
12248931add6SHamilton Tobon Mosquera       Printer << Separator;
12258931add6SHamilton Tobon Mosquera     }
12268931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
12278931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12288931add6SHamilton Tobon Mosquera 
12298931add6SHamilton Tobon Mosquera     for (auto *P : OAs[1].StoredValues) {
12308931add6SHamilton Tobon Mosquera       P->print(Printer);
12318931add6SHamilton Tobon Mosquera       Printer << Separator;
12328931add6SHamilton Tobon Mosquera     }
12338931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
12348931add6SHamilton Tobon Mosquera     ValuesStr.clear();
12358931add6SHamilton Tobon Mosquera 
12368931add6SHamilton Tobon Mosquera     for (auto *S : OAs[2].StoredValues) {
12378931add6SHamilton Tobon Mosquera       S->print(Printer);
12388931add6SHamilton Tobon Mosquera       Printer << Separator;
12398931add6SHamilton Tobon Mosquera     }
12408931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
12418931add6SHamilton Tobon Mosquera   }
12428931add6SHamilton Tobon Mosquera 
1243bd2fa181SHamilton Tobon Mosquera   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1244bd2fa181SHamilton Tobon Mosquera   /// moved. Returns nullptr if the movement is not possible, or not worth it.
1245bd2fa181SHamilton Tobon Mosquera   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1246bd2fa181SHamilton Tobon Mosquera     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1247bd2fa181SHamilton Tobon Mosquera     //  Make it traverse the CFG.
1248bd2fa181SHamilton Tobon Mosquera 
1249bd2fa181SHamilton Tobon Mosquera     Instruction *CurrentI = &RuntimeCall;
1250bd2fa181SHamilton Tobon Mosquera     bool IsWorthIt = false;
1251bd2fa181SHamilton Tobon Mosquera     while ((CurrentI = CurrentI->getNextNode())) {
1252bd2fa181SHamilton Tobon Mosquera 
1253bd2fa181SHamilton Tobon Mosquera       // TODO: Once we detect the regions to be offloaded we should use the
1254bd2fa181SHamilton Tobon Mosquera       //  alias analysis manager to check if CurrentI may modify one of
1255bd2fa181SHamilton Tobon Mosquera       //  the offloaded regions.
1256bd2fa181SHamilton Tobon Mosquera       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1257bd2fa181SHamilton Tobon Mosquera         if (IsWorthIt)
1258bd2fa181SHamilton Tobon Mosquera           return CurrentI;
1259bd2fa181SHamilton Tobon Mosquera 
1260bd2fa181SHamilton Tobon Mosquera         return nullptr;
1261bd2fa181SHamilton Tobon Mosquera       }
1262bd2fa181SHamilton Tobon Mosquera 
1263bd2fa181SHamilton Tobon Mosquera       // FIXME: For now if we move it over anything without side effect
1264bd2fa181SHamilton Tobon Mosquera       //  is worth it.
1265bd2fa181SHamilton Tobon Mosquera       IsWorthIt = true;
1266bd2fa181SHamilton Tobon Mosquera     }
1267bd2fa181SHamilton Tobon Mosquera 
1268bd2fa181SHamilton Tobon Mosquera     // Return end of BasicBlock.
1269bd2fa181SHamilton Tobon Mosquera     return RuntimeCall.getParent()->getTerminator();
1270bd2fa181SHamilton Tobon Mosquera   }
1271bd2fa181SHamilton Tobon Mosquera 
1272496f8e5bSHamilton Tobon Mosquera   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
1273bd2fa181SHamilton Tobon Mosquera   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1274bd2fa181SHamilton Tobon Mosquera                                Instruction &WaitMovementPoint) {
1275bd31abc1SHamilton Tobon Mosquera     // Create stack allocated handle (__tgt_async_info) at the beginning of the
1276bd31abc1SHamilton Tobon Mosquera     // function. Used for storing information of the async transfer, allowing to
1277bd31abc1SHamilton Tobon Mosquera     // wait on it later.
1278496f8e5bSHamilton Tobon Mosquera     auto &IRBuilder = OMPInfoCache.OMPBuilder;
1279bd31abc1SHamilton Tobon Mosquera     auto *F = RuntimeCall.getCaller();
1280bd31abc1SHamilton Tobon Mosquera     Instruction *FirstInst = &(F->getEntryBlock().front());
1281bd31abc1SHamilton Tobon Mosquera     AllocaInst *Handle = new AllocaInst(
1282bd31abc1SHamilton Tobon Mosquera         IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1283bd31abc1SHamilton Tobon Mosquera 
1284496f8e5bSHamilton Tobon Mosquera     // Add "issue" runtime call declaration:
1285496f8e5bSHamilton Tobon Mosquera     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1286496f8e5bSHamilton Tobon Mosquera     //   i8**, i8**, i64*, i64*)
1287496f8e5bSHamilton Tobon Mosquera     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1288496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_issue);
1289496f8e5bSHamilton Tobon Mosquera 
1290496f8e5bSHamilton Tobon Mosquera     // Change RuntimeCall call site for its asynchronous version.
129197e55cfeSJoseph Huber     SmallVector<Value *, 16> Args;
1292bd2fa181SHamilton Tobon Mosquera     for (auto &Arg : RuntimeCall.args())
1293496f8e5bSHamilton Tobon Mosquera       Args.push_back(Arg.get());
1294bd31abc1SHamilton Tobon Mosquera     Args.push_back(Handle);
1295496f8e5bSHamilton Tobon Mosquera 
1296496f8e5bSHamilton Tobon Mosquera     CallInst *IssueCallsite =
1297bd31abc1SHamilton Tobon Mosquera         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
1298bd2fa181SHamilton Tobon Mosquera     RuntimeCall.eraseFromParent();
1299496f8e5bSHamilton Tobon Mosquera 
1300496f8e5bSHamilton Tobon Mosquera     // Add "wait" runtime call declaration:
1301496f8e5bSHamilton Tobon Mosquera     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1302496f8e5bSHamilton Tobon Mosquera     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1303496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_wait);
1304496f8e5bSHamilton Tobon Mosquera 
1305496f8e5bSHamilton Tobon Mosquera     Value *WaitParams[2] = {
1306da8bec47SJoseph Huber         IssueCallsite->getArgOperand(
1307da8bec47SJoseph Huber             OffloadArray::DeviceIDArgNum), // device_id.
1308bd31abc1SHamilton Tobon Mosquera         Handle                             // handle to wait on.
1309496f8e5bSHamilton Tobon Mosquera     };
1310bd2fa181SHamilton Tobon Mosquera     CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
1311496f8e5bSHamilton Tobon Mosquera 
1312496f8e5bSHamilton Tobon Mosquera     return true;
1313496f8e5bSHamilton Tobon Mosquera   }
1314496f8e5bSHamilton Tobon Mosquera 
1315dc3b5b00SJohannes Doerfert   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1316dc3b5b00SJohannes Doerfert                                     bool GlobalOnly, bool &SingleChoice) {
1317dc3b5b00SJohannes Doerfert     if (CurrentIdent == NextIdent)
1318dc3b5b00SJohannes Doerfert       return CurrentIdent;
1319dc3b5b00SJohannes Doerfert 
1320396b7253SJohannes Doerfert     // TODO: Figure out how to actually combine multiple debug locations. For
1321dc3b5b00SJohannes Doerfert     //       now we just keep an existing one if there is a single choice.
1322dc3b5b00SJohannes Doerfert     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1323dc3b5b00SJohannes Doerfert       SingleChoice = !CurrentIdent;
1324dc3b5b00SJohannes Doerfert       return NextIdent;
1325dc3b5b00SJohannes Doerfert     }
1326396b7253SJohannes Doerfert     return nullptr;
1327396b7253SJohannes Doerfert   }
1328396b7253SJohannes Doerfert 
1329396b7253SJohannes Doerfert   /// Return an `struct ident_t*` value that represents the ones used in the
1330396b7253SJohannes Doerfert   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1331396b7253SJohannes Doerfert   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1332396b7253SJohannes Doerfert   /// return value we create one from scratch. We also do not yet combine
1333396b7253SJohannes Doerfert   /// information, e.g., the source locations, see combinedIdentStruct.
13347cfd267cSsstefan1   Value *
13357cfd267cSsstefan1   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
13367cfd267cSsstefan1                                  Function &F, bool GlobalOnly) {
1337dc3b5b00SJohannes Doerfert     bool SingleChoice = true;
1338396b7253SJohannes Doerfert     Value *Ident = nullptr;
1339396b7253SJohannes Doerfert     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1340396b7253SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
1341396b7253SJohannes Doerfert       if (!CI || &F != &Caller)
1342396b7253SJohannes Doerfert         return false;
1343396b7253SJohannes Doerfert       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1344dc3b5b00SJohannes Doerfert                                   /* GlobalOnly */ true, SingleChoice);
1345396b7253SJohannes Doerfert       return false;
1346396b7253SJohannes Doerfert     };
1347624d34afSJohannes Doerfert     RFI.foreachUse(SCC, CombineIdentStruct);
1348396b7253SJohannes Doerfert 
1349dc3b5b00SJohannes Doerfert     if (!Ident || !SingleChoice) {
1350396b7253SJohannes Doerfert       // The IRBuilder uses the insertion block to get to the module, this is
1351396b7253SJohannes Doerfert       // unfortunate but we work around it for now.
13527cfd267cSsstefan1       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
13537cfd267cSsstefan1         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1354396b7253SJohannes Doerfert             &F.getEntryBlock(), F.getEntryBlock().begin()));
1355396b7253SJohannes Doerfert       // Create a fallback location if non was found.
1356396b7253SJohannes Doerfert       // TODO: Use the debug locations of the calls instead.
13577cfd267cSsstefan1       Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
13587cfd267cSsstefan1       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1359396b7253SJohannes Doerfert     }
1360396b7253SJohannes Doerfert     return Ident;
1361396b7253SJohannes Doerfert   }
1362396b7253SJohannes Doerfert 
1363b726c557SJohannes Doerfert   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
13649548b74aSJohannes Doerfert   /// \p ReplVal if given.
13657cfd267cSsstefan1   bool deduplicateRuntimeCalls(Function &F,
13667cfd267cSsstefan1                                OMPInformationCache::RuntimeFunctionInfo &RFI,
13679548b74aSJohannes Doerfert                                Value *ReplVal = nullptr) {
13688855fec3SJohannes Doerfert     auto *UV = RFI.getUseVector(F);
13698855fec3SJohannes Doerfert     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1370b1fbf438SRoman Lebedev       return false;
1371b1fbf438SRoman Lebedev 
13727cfd267cSsstefan1     LLVM_DEBUG(
13737cfd267cSsstefan1         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
13747cfd267cSsstefan1                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
13757cfd267cSsstefan1 
1376ab3da5ddSMichael Liao     assert((!ReplVal || (isa<Argument>(ReplVal) &&
1377ab3da5ddSMichael Liao                          cast<Argument>(ReplVal)->getParent() == &F)) &&
13789548b74aSJohannes Doerfert            "Unexpected replacement value!");
1379396b7253SJohannes Doerfert 
1380396b7253SJohannes Doerfert     // TODO: Use dominance to find a good position instead.
13816aab27baSsstefan1     auto CanBeMoved = [this](CallBase &CB) {
1382396b7253SJohannes Doerfert       unsigned NumArgs = CB.getNumArgOperands();
1383396b7253SJohannes Doerfert       if (NumArgs == 0)
1384396b7253SJohannes Doerfert         return true;
13856aab27baSsstefan1       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1386396b7253SJohannes Doerfert         return false;
1387396b7253SJohannes Doerfert       for (unsigned u = 1; u < NumArgs; ++u)
1388396b7253SJohannes Doerfert         if (isa<Instruction>(CB.getArgOperand(u)))
1389396b7253SJohannes Doerfert           return false;
1390396b7253SJohannes Doerfert       return true;
1391396b7253SJohannes Doerfert     };
1392396b7253SJohannes Doerfert 
13939548b74aSJohannes Doerfert     if (!ReplVal) {
13948855fec3SJohannes Doerfert       for (Use *U : *UV)
13959548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1396396b7253SJohannes Doerfert           if (!CanBeMoved(*CI))
1397396b7253SJohannes Doerfert             continue;
13984d4ea9acSHuber, Joseph 
13994d4ea9acSHuber, Joseph           auto Remark = [&](OptimizationRemark OR) {
14004d4ea9acSHuber, Joseph             return OR << "OpenMP runtime call "
14012db182ffSJoseph Huber                       << ore::NV("OpenMPOptRuntime", RFI.Name)
14022db182ffSJoseph Huber                       << " moved to beginning of OpenMP region";
14034d4ea9acSHuber, Joseph           };
14042db182ffSJoseph Huber           emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark);
14054d4ea9acSHuber, Joseph 
14069548b74aSJohannes Doerfert           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
14079548b74aSJohannes Doerfert           ReplVal = CI;
14089548b74aSJohannes Doerfert           break;
14099548b74aSJohannes Doerfert         }
14109548b74aSJohannes Doerfert       if (!ReplVal)
14119548b74aSJohannes Doerfert         return false;
14129548b74aSJohannes Doerfert     }
14139548b74aSJohannes Doerfert 
1414396b7253SJohannes Doerfert     // If we use a call as a replacement value we need to make sure the ident is
1415396b7253SJohannes Doerfert     // valid at the new location. For now we just pick a global one, either
1416396b7253SJohannes Doerfert     // existing and used by one of the calls, or created from scratch.
1417396b7253SJohannes Doerfert     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1418396b7253SJohannes Doerfert       if (CI->getNumArgOperands() > 0 &&
14196aab27baSsstefan1           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1420396b7253SJohannes Doerfert         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1421396b7253SJohannes Doerfert                                                       /* GlobalOnly */ true);
1422396b7253SJohannes Doerfert         CI->setArgOperand(0, Ident);
1423396b7253SJohannes Doerfert       }
1424396b7253SJohannes Doerfert     }
1425396b7253SJohannes Doerfert 
14269548b74aSJohannes Doerfert     bool Changed = false;
14279548b74aSJohannes Doerfert     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
14289548b74aSJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
14299548b74aSJohannes Doerfert       if (!CI || CI == ReplVal || &F != &Caller)
14309548b74aSJohannes Doerfert         return false;
14319548b74aSJohannes Doerfert       assert(CI->getCaller() == &F && "Unexpected call!");
14324d4ea9acSHuber, Joseph 
14334d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
14344d4ea9acSHuber, Joseph         return OR << "OpenMP runtime call "
14354d4ea9acSHuber, Joseph                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
14364d4ea9acSHuber, Joseph       };
14372db182ffSJoseph Huber       emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark);
14384d4ea9acSHuber, Joseph 
14399548b74aSJohannes Doerfert       CGUpdater.removeCallSite(*CI);
14409548b74aSJohannes Doerfert       CI->replaceAllUsesWith(ReplVal);
14419548b74aSJohannes Doerfert       CI->eraseFromParent();
14429548b74aSJohannes Doerfert       ++NumOpenMPRuntimeCallsDeduplicated;
14439548b74aSJohannes Doerfert       Changed = true;
14449548b74aSJohannes Doerfert       return true;
14459548b74aSJohannes Doerfert     };
1446624d34afSJohannes Doerfert     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
14479548b74aSJohannes Doerfert 
14489548b74aSJohannes Doerfert     return Changed;
14499548b74aSJohannes Doerfert   }
14509548b74aSJohannes Doerfert 
14519548b74aSJohannes Doerfert   /// Collect arguments that represent the global thread id in \p GTIdArgs.
14529548b74aSJohannes Doerfert   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
14539548b74aSJohannes Doerfert     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
14549548b74aSJohannes Doerfert     //       initialization. We could define an AbstractAttribute instead and
14559548b74aSJohannes Doerfert     //       run the Attributor here once it can be run as an SCC pass.
14569548b74aSJohannes Doerfert 
14579548b74aSJohannes Doerfert     // Helper to check the argument \p ArgNo at all call sites of \p F for
14589548b74aSJohannes Doerfert     // a GTId.
14599548b74aSJohannes Doerfert     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
14609548b74aSJohannes Doerfert       if (!F.hasLocalLinkage())
14619548b74aSJohannes Doerfert         return false;
14629548b74aSJohannes Doerfert       for (Use &U : F.uses()) {
14639548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(U)) {
14649548b74aSJohannes Doerfert           Value *ArgOp = CI->getArgOperand(ArgNo);
14659548b74aSJohannes Doerfert           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
14667cfd267cSsstefan1               getCallIfRegularCall(
14677cfd267cSsstefan1                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
14689548b74aSJohannes Doerfert             continue;
14699548b74aSJohannes Doerfert         }
14709548b74aSJohannes Doerfert         return false;
14719548b74aSJohannes Doerfert       }
14729548b74aSJohannes Doerfert       return true;
14739548b74aSJohannes Doerfert     };
14749548b74aSJohannes Doerfert 
14759548b74aSJohannes Doerfert     // Helper to identify uses of a GTId as GTId arguments.
14769548b74aSJohannes Doerfert     auto AddUserArgs = [&](Value &GTId) {
14779548b74aSJohannes Doerfert       for (Use &U : GTId.uses())
14789548b74aSJohannes Doerfert         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
14799548b74aSJohannes Doerfert           if (CI->isArgOperand(&U))
14809548b74aSJohannes Doerfert             if (Function *Callee = CI->getCalledFunction())
14819548b74aSJohannes Doerfert               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
14829548b74aSJohannes Doerfert                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
14839548b74aSJohannes Doerfert     };
14849548b74aSJohannes Doerfert 
14859548b74aSJohannes Doerfert     // The argument users of __kmpc_global_thread_num calls are GTIds.
14867cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
14877cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
14887cfd267cSsstefan1 
1489624d34afSJohannes Doerfert     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
14908855fec3SJohannes Doerfert       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
14919548b74aSJohannes Doerfert         AddUserArgs(*CI);
14928855fec3SJohannes Doerfert       return false;
14938855fec3SJohannes Doerfert     });
14949548b74aSJohannes Doerfert 
14959548b74aSJohannes Doerfert     // Transitively search for more arguments by looking at the users of the
14969548b74aSJohannes Doerfert     // ones we know already. During the search the GTIdArgs vector is extended
14979548b74aSJohannes Doerfert     // so we cannot cache the size nor can we use a range based for.
14989548b74aSJohannes Doerfert     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
14999548b74aSJohannes Doerfert       AddUserArgs(*GTIdArgs[u]);
15009548b74aSJohannes Doerfert   }
15019548b74aSJohannes Doerfert 
15025b0581aeSJohannes Doerfert   /// Kernel (=GPU) optimizations and utility functions
15035b0581aeSJohannes Doerfert   ///
15045b0581aeSJohannes Doerfert   ///{{
15055b0581aeSJohannes Doerfert 
15065b0581aeSJohannes Doerfert   /// Check if \p F is a kernel, hence entry point for target offloading.
15075b0581aeSJohannes Doerfert   bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
15085b0581aeSJohannes Doerfert 
15095b0581aeSJohannes Doerfert   /// Cache to remember the unique kernel for a function.
15105b0581aeSJohannes Doerfert   DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
15115b0581aeSJohannes Doerfert 
15125b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p F, if any.
15135b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Function &F);
15145b0581aeSJohannes Doerfert 
15155b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p I, if any.
15165b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Instruction &I) {
15175b0581aeSJohannes Doerfert     return getUniqueKernelFor(*I.getFunction());
15185b0581aeSJohannes Doerfert   }
15195b0581aeSJohannes Doerfert 
15205b0581aeSJohannes Doerfert   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
15215b0581aeSJohannes Doerfert   /// the cases we can avoid taking the address of a function.
15225b0581aeSJohannes Doerfert   bool rewriteDeviceCodeStateMachine();
15235b0581aeSJohannes Doerfert 
15245b0581aeSJohannes Doerfert   ///
15255b0581aeSJohannes Doerfert   ///}}
15265b0581aeSJohannes Doerfert 
15274d4ea9acSHuber, Joseph   /// Emit a remark generically
15284d4ea9acSHuber, Joseph   ///
15294d4ea9acSHuber, Joseph   /// This template function can be used to generically emit a remark. The
15304d4ea9acSHuber, Joseph   /// RemarkKind should be one of the following:
15314d4ea9acSHuber, Joseph   ///   - OptimizationRemark to indicate a successful optimization attempt
15324d4ea9acSHuber, Joseph   ///   - OptimizationRemarkMissed to report a failed optimization attempt
15334d4ea9acSHuber, Joseph   ///   - OptimizationRemarkAnalysis to provide additional information about an
15344d4ea9acSHuber, Joseph   ///     optimization attempt
15354d4ea9acSHuber, Joseph   ///
15364d4ea9acSHuber, Joseph   /// The remark is built using a callback function provided by the caller that
15374d4ea9acSHuber, Joseph   /// takes a RemarkKind as input and returns a RemarkKind.
15382db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15392db182ffSJoseph Huber   void emitRemark(Instruction *I, StringRef RemarkName,
1540e8039ad4SJohannes Doerfert                   RemarkCallBack &&RemarkCB) const {
15412db182ffSJoseph Huber     Function *F = I->getParent()->getParent();
15424d4ea9acSHuber, Joseph     auto &ORE = OREGetter(F);
15434d4ea9acSHuber, Joseph 
15442db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
15454d4ea9acSHuber, Joseph   }
15464d4ea9acSHuber, Joseph 
15472db182ffSJoseph Huber   /// Emit a remark on a function.
15482db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
15492db182ffSJoseph Huber   void emitRemark(Function *F, StringRef RemarkName,
15502db182ffSJoseph Huber                   RemarkCallBack &&RemarkCB) const {
15510f426935Ssstefan1     auto &ORE = OREGetter(F);
15520f426935Ssstefan1 
15532db182ffSJoseph Huber     ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
15540f426935Ssstefan1   }
15550f426935Ssstefan1 
1556b726c557SJohannes Doerfert   /// The underlying module.
15579548b74aSJohannes Doerfert   Module &M;
15589548b74aSJohannes Doerfert 
15599548b74aSJohannes Doerfert   /// The SCC we are operating on.
1560ee17263aSJohannes Doerfert   SmallVectorImpl<Function *> &SCC;
15619548b74aSJohannes Doerfert 
15629548b74aSJohannes Doerfert   /// Callback to update the call graph, the first argument is a removed call,
15639548b74aSJohannes Doerfert   /// the second an optional replacement call.
15649548b74aSJohannes Doerfert   CallGraphUpdater &CGUpdater;
15659548b74aSJohannes Doerfert 
15664d4ea9acSHuber, Joseph   /// Callback to get an OptimizationRemarkEmitter from a Function *
15674d4ea9acSHuber, Joseph   OptimizationRemarkGetter OREGetter;
15684d4ea9acSHuber, Joseph 
15697cfd267cSsstefan1   /// OpenMP-specific information cache. Also Used for Attributor runs.
15707cfd267cSsstefan1   OMPInformationCache &OMPInfoCache;
1571b8235d2bSsstefan1 
1572b8235d2bSsstefan1   /// Attributor instance.
1573b8235d2bSsstefan1   Attributor &A;
1574b8235d2bSsstefan1 
1575b8235d2bSsstefan1   /// Helper function to run Attributor on SCC.
1576d3e74913SNico Weber   bool runAttributor() {
1577b8235d2bSsstefan1     if (SCC.empty())
1578b8235d2bSsstefan1       return false;
1579b8235d2bSsstefan1 
1580d3e74913SNico Weber     registerAAs();
1581b8235d2bSsstefan1 
1582b8235d2bSsstefan1     ChangeStatus Changed = A.run();
1583b8235d2bSsstefan1 
1584b8235d2bSsstefan1     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
1585b8235d2bSsstefan1                       << " functions, result: " << Changed << ".\n");
1586b8235d2bSsstefan1 
1587b8235d2bSsstefan1     return Changed == ChangeStatus::CHANGED;
1588b8235d2bSsstefan1   }
1589b8235d2bSsstefan1 
1590b8235d2bSsstefan1   /// Populate the Attributor with abstract attribute opportunities in the
1591b8235d2bSsstefan1   /// function.
1592d3e74913SNico Weber   void registerAAs() {
1593d3e74913SNico Weber     if (SCC.empty())
1594d3e74913SNico Weber       return;
1595d3e74913SNico Weber 
1596d3e74913SNico Weber     // Create CallSite AA for all Getters.
1597d3e74913SNico Weber     for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
1598d3e74913SNico Weber       auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
1599d3e74913SNico Weber 
1600d3e74913SNico Weber       auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
1601d3e74913SNico Weber 
1602d3e74913SNico Weber       auto CreateAA = [&](Use &U, Function &Caller) {
1603d3e74913SNico Weber         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
1604d3e74913SNico Weber         if (!CI)
1605d3e74913SNico Weber           return false;
1606d3e74913SNico Weber 
1607d3e74913SNico Weber         auto &CB = cast<CallBase>(*CI);
1608d3e74913SNico Weber 
1609d3e74913SNico Weber         IRPosition CBPos = IRPosition::callsite_function(CB);
1610d3e74913SNico Weber         A.getOrCreateAAFor<AAICVTracker>(CBPos);
1611d3e74913SNico Weber         return false;
1612d3e74913SNico Weber       };
1613d3e74913SNico Weber 
1614d3e74913SNico Weber       GetterRFI.foreachUse(SCC, CreateAA);
1615d3e74913SNico Weber     }
1616d3e74913SNico Weber     auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
1617d3e74913SNico Weber     auto CreateAA = [&](Use &U, Function &F) {
1618d3e74913SNico Weber       A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
1619d3e74913SNico Weber       return false;
1620d3e74913SNico Weber     };
1621d3e74913SNico Weber     GlobalizationRFI.foreachUse(SCC, CreateAA);
1622d3e74913SNico Weber 
1623d3e74913SNico Weber     // Create an ExecutionDomain AA for every function and a HeapToStack AA for
1624d3e74913SNico Weber     // every function if there is a device kernel.
1625d3e74913SNico Weber     for (auto *F : SCC) {
1626d3e74913SNico Weber       if (!F->isDeclaration())
1627d3e74913SNico Weber         A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
1628d3e74913SNico Weber       if (isOpenMPDevice(M))
1629d3e74913SNico Weber         A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
1630d3e74913SNico Weber     }
1631d3e74913SNico Weber   }
1632b8235d2bSsstefan1 };
1633b8235d2bSsstefan1 
16345b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
16355b0581aeSJohannes Doerfert   if (!OMPInfoCache.ModuleSlice.count(&F))
16365b0581aeSJohannes Doerfert     return nullptr;
16375b0581aeSJohannes Doerfert 
16385b0581aeSJohannes Doerfert   // Use a scope to keep the lifetime of the CachedKernel short.
16395b0581aeSJohannes Doerfert   {
16405b0581aeSJohannes Doerfert     Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
16415b0581aeSJohannes Doerfert     if (CachedKernel)
16425b0581aeSJohannes Doerfert       return *CachedKernel;
16435b0581aeSJohannes Doerfert 
16445b0581aeSJohannes Doerfert     // TODO: We should use an AA to create an (optimistic and callback
16455b0581aeSJohannes Doerfert     //       call-aware) call graph. For now we stick to simple patterns that
16465b0581aeSJohannes Doerfert     //       are less powerful, basically the worst fixpoint.
16475b0581aeSJohannes Doerfert     if (isKernel(F)) {
16485b0581aeSJohannes Doerfert       CachedKernel = Kernel(&F);
16495b0581aeSJohannes Doerfert       return *CachedKernel;
16505b0581aeSJohannes Doerfert     }
16515b0581aeSJohannes Doerfert 
16525b0581aeSJohannes Doerfert     CachedKernel = nullptr;
1653994bb6ebSJohannes Doerfert     if (!F.hasLocalLinkage()) {
1654994bb6ebSJohannes Doerfert 
1655994bb6ebSJohannes Doerfert       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
16562db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
16572db182ffSJoseph Huber         return ORA
16582db182ffSJoseph Huber                << "[OMP100] Potentially unknown OpenMP target region caller";
1659994bb6ebSJohannes Doerfert       };
16602db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
1661994bb6ebSJohannes Doerfert 
16625b0581aeSJohannes Doerfert       return nullptr;
16635b0581aeSJohannes Doerfert     }
1664994bb6ebSJohannes Doerfert   }
16655b0581aeSJohannes Doerfert 
16665b0581aeSJohannes Doerfert   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
16675b0581aeSJohannes Doerfert     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
16685b0581aeSJohannes Doerfert       // Allow use in equality comparisons.
16695b0581aeSJohannes Doerfert       if (Cmp->isEquality())
16705b0581aeSJohannes Doerfert         return getUniqueKernelFor(*Cmp);
16715b0581aeSJohannes Doerfert       return nullptr;
16725b0581aeSJohannes Doerfert     }
16735b0581aeSJohannes Doerfert     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
16745b0581aeSJohannes Doerfert       // Allow direct calls.
16755b0581aeSJohannes Doerfert       if (CB->isCallee(&U))
16765b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
1677a2dbfb6bSGiorgis Georgakoudis 
1678a2dbfb6bSGiorgis Georgakoudis       OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1679a2dbfb6bSGiorgis Georgakoudis           OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1680a2dbfb6bSGiorgis Georgakoudis       // Allow the use in __kmpc_parallel_51 calls.
1681a2dbfb6bSGiorgis Georgakoudis       if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
16825b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
16835b0581aeSJohannes Doerfert       return nullptr;
16845b0581aeSJohannes Doerfert     }
16855b0581aeSJohannes Doerfert     // Disallow every other use.
16865b0581aeSJohannes Doerfert     return nullptr;
16875b0581aeSJohannes Doerfert   };
16885b0581aeSJohannes Doerfert 
16895b0581aeSJohannes Doerfert   // TODO: In the future we want to track more than just a unique kernel.
16905b0581aeSJohannes Doerfert   SmallPtrSet<Kernel, 2> PotentialKernels;
16918d8ce85bSsstefan1   OMPInformationCache::foreachUse(F, [&](const Use &U) {
16925b0581aeSJohannes Doerfert     PotentialKernels.insert(GetUniqueKernelForUse(U));
16935b0581aeSJohannes Doerfert   });
16945b0581aeSJohannes Doerfert 
16955b0581aeSJohannes Doerfert   Kernel K = nullptr;
16965b0581aeSJohannes Doerfert   if (PotentialKernels.size() == 1)
16975b0581aeSJohannes Doerfert     K = *PotentialKernels.begin();
16985b0581aeSJohannes Doerfert 
16995b0581aeSJohannes Doerfert   // Cache the result.
17005b0581aeSJohannes Doerfert   UniqueKernelMap[&F] = K;
17015b0581aeSJohannes Doerfert 
17025b0581aeSJohannes Doerfert   return K;
17035b0581aeSJohannes Doerfert }
17045b0581aeSJohannes Doerfert 
17055b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1706a2dbfb6bSGiorgis Georgakoudis   OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1707a2dbfb6bSGiorgis Georgakoudis       OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
17085b0581aeSJohannes Doerfert 
17095b0581aeSJohannes Doerfert   bool Changed = false;
1710a2dbfb6bSGiorgis Georgakoudis   if (!KernelParallelRFI)
17115b0581aeSJohannes Doerfert     return Changed;
17125b0581aeSJohannes Doerfert 
17135b0581aeSJohannes Doerfert   for (Function *F : SCC) {
17145b0581aeSJohannes Doerfert 
1715a2dbfb6bSGiorgis Georgakoudis     // Check if the function is a use in a __kmpc_parallel_51 call at
17165b0581aeSJohannes Doerfert     // all.
17175b0581aeSJohannes Doerfert     bool UnknownUse = false;
1718a2dbfb6bSGiorgis Georgakoudis     bool KernelParallelUse = false;
17195b0581aeSJohannes Doerfert     unsigned NumDirectCalls = 0;
17205b0581aeSJohannes Doerfert 
17215b0581aeSJohannes Doerfert     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
17228d8ce85bSsstefan1     OMPInformationCache::foreachUse(*F, [&](Use &U) {
17235b0581aeSJohannes Doerfert       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
17245b0581aeSJohannes Doerfert         if (CB->isCallee(&U)) {
17255b0581aeSJohannes Doerfert           ++NumDirectCalls;
17265b0581aeSJohannes Doerfert           return;
17275b0581aeSJohannes Doerfert         }
17285b0581aeSJohannes Doerfert 
172981db6144SMichael Liao       if (isa<ICmpInst>(U.getUser())) {
17305b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17315b0581aeSJohannes Doerfert         return;
17325b0581aeSJohannes Doerfert       }
1733a2dbfb6bSGiorgis Georgakoudis 
1734a2dbfb6bSGiorgis Georgakoudis       // Find wrapper functions that represent parallel kernels.
1735a2dbfb6bSGiorgis Georgakoudis       CallInst *CI =
1736a2dbfb6bSGiorgis Georgakoudis           OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
1737a2dbfb6bSGiorgis Georgakoudis       const unsigned int WrapperFunctionArgNo = 6;
1738a2dbfb6bSGiorgis Georgakoudis       if (!KernelParallelUse && CI &&
1739a2dbfb6bSGiorgis Georgakoudis           CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
1740a2dbfb6bSGiorgis Georgakoudis         KernelParallelUse = true;
17415b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
17425b0581aeSJohannes Doerfert         return;
17435b0581aeSJohannes Doerfert       }
17445b0581aeSJohannes Doerfert       UnknownUse = true;
17455b0581aeSJohannes Doerfert     });
17465b0581aeSJohannes Doerfert 
1747a2dbfb6bSGiorgis Georgakoudis     // Do not emit a remark if we haven't seen a __kmpc_parallel_51
1748fec1f210SJohannes Doerfert     // use.
1749a2dbfb6bSGiorgis Georgakoudis     if (!KernelParallelUse)
17505b0581aeSJohannes Doerfert       continue;
17515b0581aeSJohannes Doerfert 
1752fec1f210SJohannes Doerfert     {
17532db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17542db182ffSJoseph Huber         return ORA << "Found a parallel region that is called in a target "
1755fec1f210SJohannes Doerfert                       "region but not part of a combined target construct nor "
1756a2dbfb6bSGiorgis Georgakoudis                       "nested inside a target construct without intermediate "
1757fec1f210SJohannes Doerfert                       "code. This can lead to excessive register usage for "
1758fec1f210SJohannes Doerfert                       "unrelated target regions in the same translation unit "
1759fec1f210SJohannes Doerfert                       "due to spurious call edges assumed by ptxas.";
1760fec1f210SJohannes Doerfert       };
17612db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
17622db182ffSJoseph Huber                                              Remark);
1763fec1f210SJohannes Doerfert     }
1764fec1f210SJohannes Doerfert 
1765fec1f210SJohannes Doerfert     // If this ever hits, we should investigate.
1766fec1f210SJohannes Doerfert     // TODO: Checking the number of uses is not a necessary restriction and
1767fec1f210SJohannes Doerfert     // should be lifted.
1768fec1f210SJohannes Doerfert     if (UnknownUse || NumDirectCalls != 1 ||
1769d3e74913SNico Weber         ToBeReplacedStateMachineUses.size() != 2) {
1770fec1f210SJohannes Doerfert       {
17712db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17722db182ffSJoseph Huber           return ORA << "Parallel region is used in "
1773fec1f210SJohannes Doerfert                      << (UnknownUse ? "unknown" : "unexpected")
1774fec1f210SJohannes Doerfert                      << " ways; will not attempt to rewrite the state machine.";
1775fec1f210SJohannes Doerfert         };
17762db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17772db182ffSJoseph Huber             F, "OpenMPParallelRegionInNonSPMD", Remark);
1778fec1f210SJohannes Doerfert       }
17795b0581aeSJohannes Doerfert       continue;
1780fec1f210SJohannes Doerfert     }
17815b0581aeSJohannes Doerfert 
1782a2dbfb6bSGiorgis Georgakoudis     // Even if we have __kmpc_parallel_51 calls, we (for now) give
17835b0581aeSJohannes Doerfert     // up if the function is not called from a unique kernel.
17845b0581aeSJohannes Doerfert     Kernel K = getUniqueKernelFor(*F);
1785fec1f210SJohannes Doerfert     if (!K) {
1786fec1f210SJohannes Doerfert       {
17872db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
17882db182ffSJoseph Huber           return ORA << "Parallel region is not known to be called from a "
1789fec1f210SJohannes Doerfert                         "unique single target region, maybe the surrounding "
1790fec1f210SJohannes Doerfert                         "function has external linkage?; will not attempt to "
1791fec1f210SJohannes Doerfert                         "rewrite the state machine use.";
1792fec1f210SJohannes Doerfert         };
17932db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(
17942db182ffSJoseph Huber             F, "OpenMPParallelRegionInMultipleKernesl", Remark);
1795fec1f210SJohannes Doerfert       }
17965b0581aeSJohannes Doerfert       continue;
1797fec1f210SJohannes Doerfert     }
17985b0581aeSJohannes Doerfert 
17995b0581aeSJohannes Doerfert     // We now know F is a parallel body function called only from the kernel K.
18005b0581aeSJohannes Doerfert     // We also identified the state machine uses in which we replace the
18015b0581aeSJohannes Doerfert     // function pointer by a new global symbol for identification purposes. This
18025b0581aeSJohannes Doerfert     // ensures only direct calls to the function are left.
18035b0581aeSJohannes Doerfert 
1804fec1f210SJohannes Doerfert     {
18052db182ffSJoseph Huber       auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) {
18062db182ffSJoseph Huber         return ORA << "Specialize parallel region that is only reached from a "
1807fec1f210SJohannes Doerfert                       "single target region to avoid spurious call edges and "
1808fec1f210SJohannes Doerfert                       "excessive register usage in other target regions. "
1809fec1f210SJohannes Doerfert                       "(parallel region ID: "
1810fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1811fec1f210SJohannes Doerfert                    << ", kernel ID: "
1812fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1813fec1f210SJohannes Doerfert       };
18142db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD",
1815fec1f210SJohannes Doerfert                                              RemarkParalleRegion);
18162db182ffSJoseph Huber       auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) {
18172db182ffSJoseph Huber         return ORA << "Target region containing the parallel region that is "
1818fec1f210SJohannes Doerfert                       "specialized. (parallel region ID: "
1819fec1f210SJohannes Doerfert                    << ore::NV("OpenMPParallelRegion", F->getName())
1820fec1f210SJohannes Doerfert                    << ", kernel ID: "
1821fec1f210SJohannes Doerfert                    << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
1822fec1f210SJohannes Doerfert       };
18232db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD",
18242db182ffSJoseph Huber                                              RemarkKernel);
1825fec1f210SJohannes Doerfert     }
1826fec1f210SJohannes Doerfert 
18275b0581aeSJohannes Doerfert     Module &M = *F->getParent();
18285b0581aeSJohannes Doerfert     Type *Int8Ty = Type::getInt8Ty(M.getContext());
18295b0581aeSJohannes Doerfert 
18305b0581aeSJohannes Doerfert     auto *ID = new GlobalVariable(
18315b0581aeSJohannes Doerfert         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
18325b0581aeSJohannes Doerfert         UndefValue::get(Int8Ty), F->getName() + ".ID");
18335b0581aeSJohannes Doerfert 
18345b0581aeSJohannes Doerfert     for (Use *U : ToBeReplacedStateMachineUses)
18355b0581aeSJohannes Doerfert       U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
18365b0581aeSJohannes Doerfert 
18375b0581aeSJohannes Doerfert     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
18385b0581aeSJohannes Doerfert 
18395b0581aeSJohannes Doerfert     Changed = true;
18405b0581aeSJohannes Doerfert   }
18415b0581aeSJohannes Doerfert 
18425b0581aeSJohannes Doerfert   return Changed;
18435b0581aeSJohannes Doerfert }
18445b0581aeSJohannes Doerfert 
1845b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
1846b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
1847b8235d2bSsstefan1   using Base = StateWrapper<BooleanState, AbstractAttribute>;
1848b8235d2bSsstefan1   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1849b8235d2bSsstefan1 
18505dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
18515dfd7cc4Ssstefan1     Function *F = getAnchorScope();
18525dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
18535dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
18545dfd7cc4Ssstefan1   }
18555dfd7cc4Ssstefan1 
1856b8235d2bSsstefan1   /// Returns true if value is assumed to be tracked.
1857b8235d2bSsstefan1   bool isAssumedTracked() const { return getAssumed(); }
1858b8235d2bSsstefan1 
1859b8235d2bSsstefan1   /// Returns true if value is known to be tracked.
1860b8235d2bSsstefan1   bool isKnownTracked() const { return getAssumed(); }
1861b8235d2bSsstefan1 
1862b8235d2bSsstefan1   /// Create an abstract attribute biew for the position \p IRP.
1863b8235d2bSsstefan1   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
1864b8235d2bSsstefan1 
1865b8235d2bSsstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
18665dfd7cc4Ssstefan1   virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
18675dfd7cc4Ssstefan1                                                 const Instruction *I,
18685dfd7cc4Ssstefan1                                                 Attributor &A) const {
18695dfd7cc4Ssstefan1     return None;
18705dfd7cc4Ssstefan1   }
18715dfd7cc4Ssstefan1 
18725dfd7cc4Ssstefan1   /// Return an assumed unique ICV value if a single candidate is found. If
18735dfd7cc4Ssstefan1   /// there cannot be one, return a nullptr. If it is not clear yet, return the
18745dfd7cc4Ssstefan1   /// Optional::NoneType.
18755dfd7cc4Ssstefan1   virtual Optional<Value *>
18765dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
18775dfd7cc4Ssstefan1 
18785dfd7cc4Ssstefan1   // Currently only nthreads is being tracked.
18795dfd7cc4Ssstefan1   // this array will only grow with time.
18805dfd7cc4Ssstefan1   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
1881b8235d2bSsstefan1 
1882b8235d2bSsstefan1   /// See AbstractAttribute::getName()
1883b8235d2bSsstefan1   const std::string getName() const override { return "AAICVTracker"; }
1884b8235d2bSsstefan1 
1885233af895SLuofan Chen   /// See AbstractAttribute::getIdAddr()
1886233af895SLuofan Chen   const char *getIdAddr() const override { return &ID; }
1887233af895SLuofan Chen 
1888233af895SLuofan Chen   /// This function should return true if the type of the \p AA is AAICVTracker
1889233af895SLuofan Chen   static bool classof(const AbstractAttribute *AA) {
1890233af895SLuofan Chen     return (AA->getIdAddr() == &ID);
1891233af895SLuofan Chen   }
1892233af895SLuofan Chen 
1893b8235d2bSsstefan1   static const char ID;
1894b8235d2bSsstefan1 };
1895b8235d2bSsstefan1 
1896b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
1897b8235d2bSsstefan1   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
1898b8235d2bSsstefan1       : AAICVTracker(IRP, A) {}
1899b8235d2bSsstefan1 
1900b8235d2bSsstefan1   // FIXME: come up with better string.
19015dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerFunction"; }
1902b8235d2bSsstefan1 
1903b8235d2bSsstefan1   // FIXME: come up with some stats.
1904b8235d2bSsstefan1   void trackStatistics() const override {}
1905b8235d2bSsstefan1 
19065dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
1907b8235d2bSsstefan1   ChangeStatus manifest(Attributor &A) override {
19085dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
1909b8235d2bSsstefan1   }
1910b8235d2bSsstefan1 
1911b8235d2bSsstefan1   // Map of ICV to their values at specific program point.
19125dfd7cc4Ssstefan1   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
1913b8235d2bSsstefan1                   InternalControlVar::ICV___last>
19145dfd7cc4Ssstefan1       ICVReplacementValuesMap;
1915b8235d2bSsstefan1 
1916b8235d2bSsstefan1   ChangeStatus updateImpl(Attributor &A) override {
1917b8235d2bSsstefan1     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
1918b8235d2bSsstefan1 
1919b8235d2bSsstefan1     Function *F = getAnchorScope();
1920b8235d2bSsstefan1 
1921b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1922b8235d2bSsstefan1 
1923b8235d2bSsstefan1     for (InternalControlVar ICV : TrackableICVs) {
1924b8235d2bSsstefan1       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
1925b8235d2bSsstefan1 
19265dfd7cc4Ssstefan1       auto &ValuesMap = ICVReplacementValuesMap[ICV];
1927b8235d2bSsstefan1       auto TrackValues = [&](Use &U, Function &) {
1928b8235d2bSsstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
1929b8235d2bSsstefan1         if (!CI)
1930b8235d2bSsstefan1           return false;
1931b8235d2bSsstefan1 
1932b8235d2bSsstefan1         // FIXME: handle setters with more that 1 arguments.
1933b8235d2bSsstefan1         /// Track new value.
19345dfd7cc4Ssstefan1         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
1935b8235d2bSsstefan1           HasChanged = ChangeStatus::CHANGED;
1936b8235d2bSsstefan1 
1937b8235d2bSsstefan1         return false;
1938b8235d2bSsstefan1       };
1939b8235d2bSsstefan1 
19405dfd7cc4Ssstefan1       auto CallCheck = [&](Instruction &I) {
19415dfd7cc4Ssstefan1         Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
19425dfd7cc4Ssstefan1         if (ReplVal.hasValue() &&
19435dfd7cc4Ssstefan1             ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
19445dfd7cc4Ssstefan1           HasChanged = ChangeStatus::CHANGED;
19455dfd7cc4Ssstefan1 
19465dfd7cc4Ssstefan1         return true;
19475dfd7cc4Ssstefan1       };
19485dfd7cc4Ssstefan1 
19495dfd7cc4Ssstefan1       // Track all changes of an ICV.
1950b8235d2bSsstefan1       SetterRFI.foreachUse(TrackValues, F);
19515dfd7cc4Ssstefan1 
19525dfd7cc4Ssstefan1       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
19535dfd7cc4Ssstefan1                                 /* CheckBBLivenessOnly */ true);
19545dfd7cc4Ssstefan1 
19555dfd7cc4Ssstefan1       /// TODO: Figure out a way to avoid adding entry in
19565dfd7cc4Ssstefan1       /// ICVReplacementValuesMap
19575dfd7cc4Ssstefan1       Instruction *Entry = &F->getEntryBlock().front();
19585dfd7cc4Ssstefan1       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
19595dfd7cc4Ssstefan1         ValuesMap.insert(std::make_pair(Entry, nullptr));
1960b8235d2bSsstefan1     }
1961b8235d2bSsstefan1 
1962b8235d2bSsstefan1     return HasChanged;
1963b8235d2bSsstefan1   }
1964b8235d2bSsstefan1 
19655dfd7cc4Ssstefan1   /// Hepler to check if \p I is a call and get the value for it if it is
19665dfd7cc4Ssstefan1   /// unique.
19675dfd7cc4Ssstefan1   Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
19685dfd7cc4Ssstefan1                                     InternalControlVar &ICV) const {
1969b8235d2bSsstefan1 
19705dfd7cc4Ssstefan1     const auto *CB = dyn_cast<CallBase>(I);
1971dcaec812SJohannes Doerfert     if (!CB || CB->hasFnAttr("no_openmp") ||
1972dcaec812SJohannes Doerfert         CB->hasFnAttr("no_openmp_routines"))
19735dfd7cc4Ssstefan1       return None;
19745dfd7cc4Ssstefan1 
1975b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
1976b8235d2bSsstefan1     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
19775dfd7cc4Ssstefan1     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
19785dfd7cc4Ssstefan1     Function *CalledFunction = CB->getCalledFunction();
1979b8235d2bSsstefan1 
19804eef14f9SWei Wang     // Indirect call, assume ICV changes.
19814eef14f9SWei Wang     if (CalledFunction == nullptr)
19824eef14f9SWei Wang       return nullptr;
19835dfd7cc4Ssstefan1     if (CalledFunction == GetterRFI.Declaration)
19845dfd7cc4Ssstefan1       return None;
19855dfd7cc4Ssstefan1     if (CalledFunction == SetterRFI.Declaration) {
19865dfd7cc4Ssstefan1       if (ICVReplacementValuesMap[ICV].count(I))
19875dfd7cc4Ssstefan1         return ICVReplacementValuesMap[ICV].lookup(I);
19885dfd7cc4Ssstefan1 
19895dfd7cc4Ssstefan1       return nullptr;
19905dfd7cc4Ssstefan1     }
19915dfd7cc4Ssstefan1 
19925dfd7cc4Ssstefan1     // Since we don't know, assume it changes the ICV.
19935dfd7cc4Ssstefan1     if (CalledFunction->isDeclaration())
19945dfd7cc4Ssstefan1       return nullptr;
19955dfd7cc4Ssstefan1 
19965b70c12fSJohannes Doerfert     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
19975b70c12fSJohannes Doerfert         *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
19985dfd7cc4Ssstefan1 
19995dfd7cc4Ssstefan1     if (ICVTrackingAA.isAssumedTracked())
20005dfd7cc4Ssstefan1       return ICVTrackingAA.getUniqueReplacementValue(ICV);
20015dfd7cc4Ssstefan1 
20025dfd7cc4Ssstefan1     // If we don't know, assume it changes.
20035dfd7cc4Ssstefan1     return nullptr;
20045dfd7cc4Ssstefan1   }
20055dfd7cc4Ssstefan1 
20065dfd7cc4Ssstefan1   // We don't check unique value for a function, so return None.
20075dfd7cc4Ssstefan1   Optional<Value *>
20085dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
20095dfd7cc4Ssstefan1     return None;
20105dfd7cc4Ssstefan1   }
20115dfd7cc4Ssstefan1 
20125dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
20135dfd7cc4Ssstefan1   Optional<Value *> getReplacementValue(InternalControlVar ICV,
20145dfd7cc4Ssstefan1                                         const Instruction *I,
20155dfd7cc4Ssstefan1                                         Attributor &A) const override {
20165dfd7cc4Ssstefan1     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
20175dfd7cc4Ssstefan1     if (ValuesMap.count(I))
20185dfd7cc4Ssstefan1       return ValuesMap.lookup(I);
20195dfd7cc4Ssstefan1 
20205dfd7cc4Ssstefan1     SmallVector<const Instruction *, 16> Worklist;
20215dfd7cc4Ssstefan1     SmallPtrSet<const Instruction *, 16> Visited;
20225dfd7cc4Ssstefan1     Worklist.push_back(I);
20235dfd7cc4Ssstefan1 
20245dfd7cc4Ssstefan1     Optional<Value *> ReplVal;
20255dfd7cc4Ssstefan1 
20265dfd7cc4Ssstefan1     while (!Worklist.empty()) {
20275dfd7cc4Ssstefan1       const Instruction *CurrInst = Worklist.pop_back_val();
20285dfd7cc4Ssstefan1       if (!Visited.insert(CurrInst).second)
2029b8235d2bSsstefan1         continue;
2030b8235d2bSsstefan1 
20315dfd7cc4Ssstefan1       const BasicBlock *CurrBB = CurrInst->getParent();
20325dfd7cc4Ssstefan1 
20335dfd7cc4Ssstefan1       // Go up and look for all potential setters/calls that might change the
20345dfd7cc4Ssstefan1       // ICV.
20355dfd7cc4Ssstefan1       while ((CurrInst = CurrInst->getPrevNode())) {
20365dfd7cc4Ssstefan1         if (ValuesMap.count(CurrInst)) {
20375dfd7cc4Ssstefan1           Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
20385dfd7cc4Ssstefan1           // Unknown value, track new.
20395dfd7cc4Ssstefan1           if (!ReplVal.hasValue()) {
20405dfd7cc4Ssstefan1             ReplVal = NewReplVal;
20415dfd7cc4Ssstefan1             break;
20425dfd7cc4Ssstefan1           }
20435dfd7cc4Ssstefan1 
20445dfd7cc4Ssstefan1           // If we found a new value, we can't know the icv value anymore.
20455dfd7cc4Ssstefan1           if (NewReplVal.hasValue())
20465dfd7cc4Ssstefan1             if (ReplVal != NewReplVal)
2047b8235d2bSsstefan1               return nullptr;
2048b8235d2bSsstefan1 
20495dfd7cc4Ssstefan1           break;
2050b8235d2bSsstefan1         }
2051b8235d2bSsstefan1 
20525dfd7cc4Ssstefan1         Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
20535dfd7cc4Ssstefan1         if (!NewReplVal.hasValue())
20545dfd7cc4Ssstefan1           continue;
20555dfd7cc4Ssstefan1 
20565dfd7cc4Ssstefan1         // Unknown value, track new.
20575dfd7cc4Ssstefan1         if (!ReplVal.hasValue()) {
20585dfd7cc4Ssstefan1           ReplVal = NewReplVal;
20595dfd7cc4Ssstefan1           break;
2060b8235d2bSsstefan1         }
2061b8235d2bSsstefan1 
20625dfd7cc4Ssstefan1         // if (NewReplVal.hasValue())
20635dfd7cc4Ssstefan1         // We found a new value, we can't know the icv value anymore.
20645dfd7cc4Ssstefan1         if (ReplVal != NewReplVal)
2065b8235d2bSsstefan1           return nullptr;
2066b8235d2bSsstefan1       }
20675dfd7cc4Ssstefan1 
20685dfd7cc4Ssstefan1       // If we are in the same BB and we have a value, we are done.
20695dfd7cc4Ssstefan1       if (CurrBB == I->getParent() && ReplVal.hasValue())
20705dfd7cc4Ssstefan1         return ReplVal;
20715dfd7cc4Ssstefan1 
20725dfd7cc4Ssstefan1       // Go through all predecessors and add terminators for analysis.
20735dfd7cc4Ssstefan1       for (const BasicBlock *Pred : predecessors(CurrBB))
20745dfd7cc4Ssstefan1         if (const Instruction *Terminator = Pred->getTerminator())
20755dfd7cc4Ssstefan1           Worklist.push_back(Terminator);
20765dfd7cc4Ssstefan1     }
20775dfd7cc4Ssstefan1 
20785dfd7cc4Ssstefan1     return ReplVal;
20795dfd7cc4Ssstefan1   }
20805dfd7cc4Ssstefan1 };
20815dfd7cc4Ssstefan1 
20825dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
20835dfd7cc4Ssstefan1   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
20845dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
20855dfd7cc4Ssstefan1 
20865dfd7cc4Ssstefan1   // FIXME: come up with better string.
20875dfd7cc4Ssstefan1   const std::string getAsStr() const override {
20885dfd7cc4Ssstefan1     return "ICVTrackerFunctionReturned";
20895dfd7cc4Ssstefan1   }
20905dfd7cc4Ssstefan1 
20915dfd7cc4Ssstefan1   // FIXME: come up with some stats.
20925dfd7cc4Ssstefan1   void trackStatistics() const override {}
20935dfd7cc4Ssstefan1 
20945dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
20955dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
20965dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
20975dfd7cc4Ssstefan1   }
20985dfd7cc4Ssstefan1 
20995dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
21005dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
21015dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
21025dfd7cc4Ssstefan1       ICVReplacementValuesMap;
21035dfd7cc4Ssstefan1 
21045dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
21055dfd7cc4Ssstefan1   Optional<Value *>
21065dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
21075dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
21085dfd7cc4Ssstefan1   }
21095dfd7cc4Ssstefan1 
21105dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21115dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
21125dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
21135b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
21145dfd7cc4Ssstefan1 
21155dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
21165dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
21175dfd7cc4Ssstefan1 
21185dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21195dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
21205dfd7cc4Ssstefan1       Optional<Value *> UniqueICVValue;
21215dfd7cc4Ssstefan1 
21225dfd7cc4Ssstefan1       auto CheckReturnInst = [&](Instruction &I) {
21235dfd7cc4Ssstefan1         Optional<Value *> NewReplVal =
21245dfd7cc4Ssstefan1             ICVTrackingAA.getReplacementValue(ICV, &I, A);
21255dfd7cc4Ssstefan1 
21265dfd7cc4Ssstefan1         // If we found a second ICV value there is no unique returned value.
21275dfd7cc4Ssstefan1         if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
21285dfd7cc4Ssstefan1           return false;
21295dfd7cc4Ssstefan1 
21305dfd7cc4Ssstefan1         UniqueICVValue = NewReplVal;
21315dfd7cc4Ssstefan1 
21325dfd7cc4Ssstefan1         return true;
21335dfd7cc4Ssstefan1       };
21345dfd7cc4Ssstefan1 
21355dfd7cc4Ssstefan1       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
21365dfd7cc4Ssstefan1                                      /* CheckBBLivenessOnly */ true))
21375dfd7cc4Ssstefan1         UniqueICVValue = nullptr;
21385dfd7cc4Ssstefan1 
21395dfd7cc4Ssstefan1       if (UniqueICVValue == ReplVal)
21405dfd7cc4Ssstefan1         continue;
21415dfd7cc4Ssstefan1 
21425dfd7cc4Ssstefan1       ReplVal = UniqueICVValue;
21435dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
21445dfd7cc4Ssstefan1     }
21455dfd7cc4Ssstefan1 
21465dfd7cc4Ssstefan1     return Changed;
21475dfd7cc4Ssstefan1   }
21485dfd7cc4Ssstefan1 };
21495dfd7cc4Ssstefan1 
21505dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
21515dfd7cc4Ssstefan1   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
21525dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
21535dfd7cc4Ssstefan1 
21545dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
21555dfd7cc4Ssstefan1     Function *F = getAnchorScope();
21565dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
21575dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
21585dfd7cc4Ssstefan1 
21595dfd7cc4Ssstefan1     // We only initialize this AA for getters, so we need to know which ICV it
21605dfd7cc4Ssstefan1     // gets.
21615dfd7cc4Ssstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
21625dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
21635dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[ICV];
21645dfd7cc4Ssstefan1       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
21655dfd7cc4Ssstefan1       if (Getter.Declaration == getAssociatedFunction()) {
21665dfd7cc4Ssstefan1         AssociatedICV = ICVInfo.Kind;
21675dfd7cc4Ssstefan1         return;
21685dfd7cc4Ssstefan1       }
21695dfd7cc4Ssstefan1     }
21705dfd7cc4Ssstefan1 
21715dfd7cc4Ssstefan1     /// Unknown ICV.
21725dfd7cc4Ssstefan1     indicatePessimisticFixpoint();
21735dfd7cc4Ssstefan1   }
21745dfd7cc4Ssstefan1 
21755dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
21765dfd7cc4Ssstefan1     if (!ReplVal.hasValue() || !ReplVal.getValue())
21775dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
21785dfd7cc4Ssstefan1 
21795dfd7cc4Ssstefan1     A.changeValueAfterManifest(*getCtxI(), **ReplVal);
21805dfd7cc4Ssstefan1     A.deleteAfterManifest(*getCtxI());
21815dfd7cc4Ssstefan1 
21825dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
21835dfd7cc4Ssstefan1   }
21845dfd7cc4Ssstefan1 
21855dfd7cc4Ssstefan1   // FIXME: come up with better string.
21865dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
21875dfd7cc4Ssstefan1 
21885dfd7cc4Ssstefan1   // FIXME: come up with some stats.
21895dfd7cc4Ssstefan1   void trackStatistics() const override {}
21905dfd7cc4Ssstefan1 
21915dfd7cc4Ssstefan1   InternalControlVar AssociatedICV;
21925dfd7cc4Ssstefan1   Optional<Value *> ReplVal;
21935dfd7cc4Ssstefan1 
21945dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
21955dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
21965b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
21975dfd7cc4Ssstefan1 
21985dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
21995dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22005dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22015dfd7cc4Ssstefan1 
22025dfd7cc4Ssstefan1     Optional<Value *> NewReplVal =
22035dfd7cc4Ssstefan1         ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
22045dfd7cc4Ssstefan1 
22055dfd7cc4Ssstefan1     if (ReplVal == NewReplVal)
22065dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
22075dfd7cc4Ssstefan1 
22085dfd7cc4Ssstefan1     ReplVal = NewReplVal;
22095dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
22105dfd7cc4Ssstefan1   }
22115dfd7cc4Ssstefan1 
22125dfd7cc4Ssstefan1   // Return the value with which associated value can be replaced for specific
22135dfd7cc4Ssstefan1   // \p ICV.
22145dfd7cc4Ssstefan1   Optional<Value *>
22155dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22165dfd7cc4Ssstefan1     return ReplVal;
22175dfd7cc4Ssstefan1   }
22185dfd7cc4Ssstefan1 };
22195dfd7cc4Ssstefan1 
22205dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
22215dfd7cc4Ssstefan1   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
22225dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
22235dfd7cc4Ssstefan1 
22245dfd7cc4Ssstefan1   // FIXME: come up with better string.
22255dfd7cc4Ssstefan1   const std::string getAsStr() const override {
22265dfd7cc4Ssstefan1     return "ICVTrackerCallSiteReturned";
22275dfd7cc4Ssstefan1   }
22285dfd7cc4Ssstefan1 
22295dfd7cc4Ssstefan1   // FIXME: come up with some stats.
22305dfd7cc4Ssstefan1   void trackStatistics() const override {}
22315dfd7cc4Ssstefan1 
22325dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
22335dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
22345dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
22355dfd7cc4Ssstefan1   }
22365dfd7cc4Ssstefan1 
22375dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
22385dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
22395dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
22405dfd7cc4Ssstefan1       ICVReplacementValuesMap;
22415dfd7cc4Ssstefan1 
22425dfd7cc4Ssstefan1   /// Return the value with which associated value can be replaced for specific
22435dfd7cc4Ssstefan1   /// \p ICV.
22445dfd7cc4Ssstefan1   Optional<Value *>
22455dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
22465dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
22475dfd7cc4Ssstefan1   }
22485dfd7cc4Ssstefan1 
22495dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
22505dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
22515dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
22525b70c12fSJohannes Doerfert         *this, IRPosition::returned(*getAssociatedFunction()),
22535b70c12fSJohannes Doerfert         DepClassTy::REQUIRED);
22545dfd7cc4Ssstefan1 
22555dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
22565dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
22575dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
22585dfd7cc4Ssstefan1 
22595dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
22605dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
22615dfd7cc4Ssstefan1       Optional<Value *> NewReplVal =
22625dfd7cc4Ssstefan1           ICVTrackingAA.getUniqueReplacementValue(ICV);
22635dfd7cc4Ssstefan1 
22645dfd7cc4Ssstefan1       if (ReplVal == NewReplVal)
22655dfd7cc4Ssstefan1         continue;
22665dfd7cc4Ssstefan1 
22675dfd7cc4Ssstefan1       ReplVal = NewReplVal;
22685dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
22695dfd7cc4Ssstefan1     }
22705dfd7cc4Ssstefan1     return Changed;
22715dfd7cc4Ssstefan1   }
22729548b74aSJohannes Doerfert };
227318283125SJoseph Huber 
227418283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain {
227518283125SJoseph Huber   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
227618283125SJoseph Huber       : AAExecutionDomain(IRP, A) {}
227718283125SJoseph Huber 
227818283125SJoseph Huber   const std::string getAsStr() const override {
227918283125SJoseph Huber     return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
228018283125SJoseph Huber            "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
228118283125SJoseph Huber   }
228218283125SJoseph Huber 
228318283125SJoseph Huber   /// See AbstractAttribute::trackStatistics().
228418283125SJoseph Huber   void trackStatistics() const override {}
228518283125SJoseph Huber 
228618283125SJoseph Huber   void initialize(Attributor &A) override {
228718283125SJoseph Huber     Function *F = getAnchorScope();
228818283125SJoseph Huber     for (const auto &BB : *F)
228918283125SJoseph Huber       SingleThreadedBBs.insert(&BB);
229018283125SJoseph Huber     NumBBs = SingleThreadedBBs.size();
229118283125SJoseph Huber   }
229218283125SJoseph Huber 
229318283125SJoseph Huber   ChangeStatus manifest(Attributor &A) override {
229418283125SJoseph Huber     LLVM_DEBUG({
229518283125SJoseph Huber       for (const BasicBlock *BB : SingleThreadedBBs)
229618283125SJoseph Huber         dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
229718283125SJoseph Huber                << BB->getName() << " is executed by a single thread.\n";
229818283125SJoseph Huber     });
229918283125SJoseph Huber     return ChangeStatus::UNCHANGED;
230018283125SJoseph Huber   }
230118283125SJoseph Huber 
230218283125SJoseph Huber   ChangeStatus updateImpl(Attributor &A) override;
230318283125SJoseph Huber 
230418283125SJoseph Huber   /// Check if an instruction is executed by a single thread.
23059a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
23069a23e673SJohannes Doerfert     return isExecutedByInitialThreadOnly(*I.getParent());
230718283125SJoseph Huber   }
230818283125SJoseph Huber 
23099a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
23101cfdcae6SJoseph Huber     return isValidState() && SingleThreadedBBs.contains(&BB);
231118283125SJoseph Huber   }
231218283125SJoseph Huber 
231318283125SJoseph Huber   /// Set of basic blocks that are executed by a single thread.
231418283125SJoseph Huber   DenseSet<const BasicBlock *> SingleThreadedBBs;
231518283125SJoseph Huber 
231618283125SJoseph Huber   /// Total number of basic blocks in this function.
231718283125SJoseph Huber   long unsigned NumBBs;
231818283125SJoseph Huber };
231918283125SJoseph Huber 
232018283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
232118283125SJoseph Huber   Function *F = getAnchorScope();
232218283125SJoseph Huber   ReversePostOrderTraversal<Function *> RPOT(F);
232318283125SJoseph Huber   auto NumSingleThreadedBBs = SingleThreadedBBs.size();
232418283125SJoseph Huber 
232518283125SJoseph Huber   bool AllCallSitesKnown;
232618283125SJoseph Huber   auto PredForCallSite = [&](AbstractCallSite ACS) {
232718283125SJoseph Huber     const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
232818283125SJoseph Huber         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
232918283125SJoseph Huber         DepClassTy::REQUIRED);
23301cfdcae6SJoseph Huber     return ACS.isDirectCall() &&
23311cfdcae6SJoseph Huber            ExecutionDomainAA.isExecutedByInitialThreadOnly(
23329a23e673SJohannes Doerfert                *ACS.getInstruction());
233318283125SJoseph Huber   };
233418283125SJoseph Huber 
233518283125SJoseph Huber   if (!A.checkForAllCallSites(PredForCallSite, *this,
233618283125SJoseph Huber                               /* RequiresAllCallSites */ true,
233718283125SJoseph Huber                               AllCallSitesKnown))
233818283125SJoseph Huber     SingleThreadedBBs.erase(&F->getEntryBlock());
233918283125SJoseph Huber 
2340*e2cfbfccSJohannes Doerfert   auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2341*e2cfbfccSJohannes Doerfert   auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2342*e2cfbfccSJohannes Doerfert 
2343*e2cfbfccSJohannes Doerfert   // Check if the edge into the successor block compares the __kmpc_target_init
2344*e2cfbfccSJohannes Doerfert   // result with -1. If we are in non-SPMD-mode that signals only the main
2345*e2cfbfccSJohannes Doerfert   // thread will execute the edge.
23466fc51c9fSJoseph Huber   auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
234718283125SJoseph Huber     if (!Edge || !Edge->isConditional())
234818283125SJoseph Huber       return false;
234918283125SJoseph Huber     if (Edge->getSuccessor(0) != SuccessorBB)
235018283125SJoseph Huber       return false;
235118283125SJoseph Huber 
235218283125SJoseph Huber     auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
235318283125SJoseph Huber     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
235418283125SJoseph Huber       return false;
235518283125SJoseph Huber 
235618283125SJoseph Huber     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2357*e2cfbfccSJohannes Doerfert     if (!C)
235818283125SJoseph Huber       return false;
235918283125SJoseph Huber 
2360*e2cfbfccSJohannes Doerfert     // Match:  -1 == __kmpc_target_init (for non-SPMD kernels only!)
2361*e2cfbfccSJohannes Doerfert     if (C->isAllOnesValue()) {
2362*e2cfbfccSJohannes Doerfert       auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2363*e2cfbfccSJohannes Doerfert       if (!CB || CB->getCalledFunction() != RFI.Declaration)
2364*e2cfbfccSJohannes Doerfert         return false;
2365*e2cfbfccSJohannes Doerfert       const int InitIsSPMDArgNo = 1;
2366*e2cfbfccSJohannes Doerfert       auto *IsSPMDModeCI =
2367*e2cfbfccSJohannes Doerfert           dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
2368*e2cfbfccSJohannes Doerfert       return IsSPMDModeCI && IsSPMDModeCI->isZero();
2369*e2cfbfccSJohannes Doerfert     }
237018283125SJoseph Huber 
237118283125SJoseph Huber     return false;
237218283125SJoseph Huber   };
237318283125SJoseph Huber 
237418283125SJoseph Huber   // Merge all the predecessor states into the current basic block. A basic
237518283125SJoseph Huber   // block is executed by a single thread if all of its predecessors are.
237618283125SJoseph Huber   auto MergePredecessorStates = [&](BasicBlock *BB) {
237718283125SJoseph Huber     if (pred_begin(BB) == pred_end(BB))
237818283125SJoseph Huber       return SingleThreadedBBs.contains(BB);
237918283125SJoseph Huber 
23806fc51c9fSJoseph Huber     bool IsInitialThread = true;
238118283125SJoseph Huber     for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
238218283125SJoseph Huber          PredBB != PredEndBB; ++PredBB) {
23836fc51c9fSJoseph Huber       if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
238418283125SJoseph Huber                                BB))
23856fc51c9fSJoseph Huber         IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
238618283125SJoseph Huber     }
238718283125SJoseph Huber 
23886fc51c9fSJoseph Huber     return IsInitialThread;
238918283125SJoseph Huber   };
239018283125SJoseph Huber 
239118283125SJoseph Huber   for (auto *BB : RPOT) {
239218283125SJoseph Huber     if (!MergePredecessorStates(BB))
239318283125SJoseph Huber       SingleThreadedBBs.erase(BB);
239418283125SJoseph Huber   }
239518283125SJoseph Huber 
239618283125SJoseph Huber   return (NumSingleThreadedBBs == SingleThreadedBBs.size())
239718283125SJoseph Huber              ? ChangeStatus::UNCHANGED
239818283125SJoseph Huber              : ChangeStatus::CHANGED;
239918283125SJoseph Huber }
240018283125SJoseph Huber 
24016fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a
24026fc51c9fSJoseph Huber /// static buffer of shared memory.
24036fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
24046fc51c9fSJoseph Huber   using Base = StateWrapper<BooleanState, AbstractAttribute>;
24056fc51c9fSJoseph Huber   AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
24066fc51c9fSJoseph Huber 
24076fc51c9fSJoseph Huber   /// Create an abstract attribute view for the position \p IRP.
24086fc51c9fSJoseph Huber   static AAHeapToShared &createForPosition(const IRPosition &IRP,
24096fc51c9fSJoseph Huber                                            Attributor &A);
24106fc51c9fSJoseph Huber 
24116fc51c9fSJoseph Huber   /// See AbstractAttribute::getName().
24126fc51c9fSJoseph Huber   const std::string getName() const override { return "AAHeapToShared"; }
24136fc51c9fSJoseph Huber 
24146fc51c9fSJoseph Huber   /// See AbstractAttribute::getIdAddr().
24156fc51c9fSJoseph Huber   const char *getIdAddr() const override { return &ID; }
24166fc51c9fSJoseph Huber 
24176fc51c9fSJoseph Huber   /// This function should return true if the type of the \p AA is
24186fc51c9fSJoseph Huber   /// AAHeapToShared.
24196fc51c9fSJoseph Huber   static bool classof(const AbstractAttribute *AA) {
24206fc51c9fSJoseph Huber     return (AA->getIdAddr() == &ID);
24216fc51c9fSJoseph Huber   }
24226fc51c9fSJoseph Huber 
24236fc51c9fSJoseph Huber   /// Unique ID (due to the unique address)
24246fc51c9fSJoseph Huber   static const char ID;
24256fc51c9fSJoseph Huber };
24266fc51c9fSJoseph Huber 
24276fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared {
24286fc51c9fSJoseph Huber   AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
24296fc51c9fSJoseph Huber       : AAHeapToShared(IRP, A) {}
24306fc51c9fSJoseph Huber 
24316fc51c9fSJoseph Huber   const std::string getAsStr() const override {
24326fc51c9fSJoseph Huber     return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
24336fc51c9fSJoseph Huber            " malloc calls eligible.";
24346fc51c9fSJoseph Huber   }
24356fc51c9fSJoseph Huber 
24366fc51c9fSJoseph Huber   /// See AbstractAttribute::trackStatistics().
24376fc51c9fSJoseph Huber   void trackStatistics() const override {}
24386fc51c9fSJoseph Huber 
24396fc51c9fSJoseph Huber   void initialize(Attributor &A) override {
24406fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24416fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
24426fc51c9fSJoseph Huber 
24436fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users())
24446fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
24456fc51c9fSJoseph Huber         MallocCalls.insert(CB);
24466fc51c9fSJoseph Huber   }
24476fc51c9fSJoseph Huber 
24486fc51c9fSJoseph Huber   ChangeStatus manifest(Attributor &A) override {
24496fc51c9fSJoseph Huber     if (MallocCalls.empty())
24506fc51c9fSJoseph Huber       return ChangeStatus::UNCHANGED;
24516fc51c9fSJoseph Huber 
24526fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
24536fc51c9fSJoseph Huber     auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
24546fc51c9fSJoseph Huber 
24556fc51c9fSJoseph Huber     Function *F = getAnchorScope();
24566fc51c9fSJoseph Huber     auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
24576fc51c9fSJoseph Huber                                             DepClassTy::OPTIONAL);
24586fc51c9fSJoseph Huber 
24596fc51c9fSJoseph Huber     ChangeStatus Changed = ChangeStatus::UNCHANGED;
24606fc51c9fSJoseph Huber     for (CallBase *CB : MallocCalls) {
24616fc51c9fSJoseph Huber       // Skip replacing this if HeapToStack has already claimed it.
2462c1c1fe93SJohannes Doerfert       if (HS && HS->isAssumedHeapToStack(*CB))
24636fc51c9fSJoseph Huber         continue;
24646fc51c9fSJoseph Huber 
24656fc51c9fSJoseph Huber       // Find the unique free call to remove it.
24666fc51c9fSJoseph Huber       SmallVector<CallBase *, 4> FreeCalls;
24676fc51c9fSJoseph Huber       for (auto *U : CB->users()) {
24686fc51c9fSJoseph Huber         CallBase *C = dyn_cast<CallBase>(U);
24696fc51c9fSJoseph Huber         if (C && C->getCalledFunction() == FreeCall.Declaration)
24706fc51c9fSJoseph Huber           FreeCalls.push_back(C);
24716fc51c9fSJoseph Huber       }
24726fc51c9fSJoseph Huber       if (FreeCalls.size() != 1)
24736fc51c9fSJoseph Huber         continue;
24746fc51c9fSJoseph Huber 
24756fc51c9fSJoseph Huber       ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
24766fc51c9fSJoseph Huber 
24776fc51c9fSJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
24786fc51c9fSJoseph Huber                         << CB->getCaller()->getName() << " with "
24796fc51c9fSJoseph Huber                         << AllocSize->getZExtValue()
24806fc51c9fSJoseph Huber                         << " bytes of shared memory\n");
24816fc51c9fSJoseph Huber 
24826fc51c9fSJoseph Huber       // Create a new shared memory buffer of the same size as the allocation
24836fc51c9fSJoseph Huber       // and replace all the uses of the original allocation with it.
24846fc51c9fSJoseph Huber       Module *M = CB->getModule();
24856fc51c9fSJoseph Huber       Type *Int8Ty = Type::getInt8Ty(M->getContext());
24866fc51c9fSJoseph Huber       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
24876fc51c9fSJoseph Huber       auto *SharedMem = new GlobalVariable(
24886fc51c9fSJoseph Huber           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
24896fc51c9fSJoseph Huber           UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
24906fc51c9fSJoseph Huber           GlobalValue::NotThreadLocal,
24916fc51c9fSJoseph Huber           static_cast<unsigned>(AddressSpace::Shared));
24926fc51c9fSJoseph Huber       auto *NewBuffer =
24936fc51c9fSJoseph Huber           ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
24946fc51c9fSJoseph Huber 
249530e36c9bSJoseph Huber       auto Remark = [&](OptimizationRemark OR) {
249630e36c9bSJoseph Huber         return OR << "Replaced globalized variable with "
249730e36c9bSJoseph Huber                   << ore::NV("SharedMemory", AllocSize->getZExtValue())
249830e36c9bSJoseph Huber                   << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
249930e36c9bSJoseph Huber                   << "of shared memory";
250030e36c9bSJoseph Huber       };
250130e36c9bSJoseph Huber       A.emitRemark<OptimizationRemark>(CB, "OpenMPReplaceGlobalization",
250230e36c9bSJoseph Huber                                        Remark);
250330e36c9bSJoseph Huber 
25046fc51c9fSJoseph Huber       SharedMem->setAlignment(MaybeAlign(32));
25056fc51c9fSJoseph Huber 
25066fc51c9fSJoseph Huber       A.changeValueAfterManifest(*CB, *NewBuffer);
25076fc51c9fSJoseph Huber       A.deleteAfterManifest(*CB);
25086fc51c9fSJoseph Huber       A.deleteAfterManifest(*FreeCalls.front());
25096fc51c9fSJoseph Huber 
25106fc51c9fSJoseph Huber       NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
25116fc51c9fSJoseph Huber       Changed = ChangeStatus::CHANGED;
25126fc51c9fSJoseph Huber     }
25136fc51c9fSJoseph Huber 
25146fc51c9fSJoseph Huber     return Changed;
25156fc51c9fSJoseph Huber   }
25166fc51c9fSJoseph Huber 
25176fc51c9fSJoseph Huber   ChangeStatus updateImpl(Attributor &A) override {
25186fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
25196fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
25206fc51c9fSJoseph Huber     Function *F = getAnchorScope();
25216fc51c9fSJoseph Huber 
25226fc51c9fSJoseph Huber     auto NumMallocCalls = MallocCalls.size();
25236fc51c9fSJoseph Huber 
25246fc51c9fSJoseph Huber     // Only consider malloc calls executed by a single thread with a constant.
25256fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users()) {
25266fc51c9fSJoseph Huber       const auto &ED = A.getAAFor<AAExecutionDomain>(
25276fc51c9fSJoseph Huber           *this, IRPosition::function(*F), DepClassTy::REQUIRED);
25286fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
25296fc51c9fSJoseph Huber         if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
25306fc51c9fSJoseph Huber             !ED.isExecutedByInitialThreadOnly(*CB))
25316fc51c9fSJoseph Huber           MallocCalls.erase(CB);
25326fc51c9fSJoseph Huber     }
25336fc51c9fSJoseph Huber 
25346fc51c9fSJoseph Huber     if (NumMallocCalls != MallocCalls.size())
25356fc51c9fSJoseph Huber       return ChangeStatus::CHANGED;
25366fc51c9fSJoseph Huber 
25376fc51c9fSJoseph Huber     return ChangeStatus::UNCHANGED;
25386fc51c9fSJoseph Huber   }
25396fc51c9fSJoseph Huber 
25406fc51c9fSJoseph Huber   /// Collection of all malloc calls in a function.
25416fc51c9fSJoseph Huber   SmallPtrSet<CallBase *, 4> MallocCalls;
25426fc51c9fSJoseph Huber };
25436fc51c9fSJoseph Huber 
25449548b74aSJohannes Doerfert } // namespace
25459548b74aSJohannes Doerfert 
2546b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
254718283125SJoseph Huber const char AAExecutionDomain::ID = 0;
25486fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0;
2549b8235d2bSsstefan1 
2550b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
2551b8235d2bSsstefan1                                               Attributor &A) {
2552b8235d2bSsstefan1   AAICVTracker *AA = nullptr;
2553b8235d2bSsstefan1   switch (IRP.getPositionKind()) {
2554b8235d2bSsstefan1   case IRPosition::IRP_INVALID:
2555b8235d2bSsstefan1   case IRPosition::IRP_FLOAT:
2556b8235d2bSsstefan1   case IRPosition::IRP_ARGUMENT:
2557b8235d2bSsstefan1   case IRPosition::IRP_CALL_SITE_ARGUMENT:
25581de70a72SJohannes Doerfert     llvm_unreachable("ICVTracker can only be created for function position!");
25595dfd7cc4Ssstefan1   case IRPosition::IRP_RETURNED:
25605dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
25615dfd7cc4Ssstefan1     break;
25625dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE_RETURNED:
25635dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
25645dfd7cc4Ssstefan1     break;
25655dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE:
25665dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
25675dfd7cc4Ssstefan1     break;
2568b8235d2bSsstefan1   case IRPosition::IRP_FUNCTION:
2569b8235d2bSsstefan1     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
2570b8235d2bSsstefan1     break;
2571b8235d2bSsstefan1   }
2572b8235d2bSsstefan1 
2573b8235d2bSsstefan1   return *AA;
2574b8235d2bSsstefan1 }
2575b8235d2bSsstefan1 
257618283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
257718283125SJoseph Huber                                                         Attributor &A) {
257818283125SJoseph Huber   AAExecutionDomainFunction *AA = nullptr;
257918283125SJoseph Huber   switch (IRP.getPositionKind()) {
258018283125SJoseph Huber   case IRPosition::IRP_INVALID:
258118283125SJoseph Huber   case IRPosition::IRP_FLOAT:
258218283125SJoseph Huber   case IRPosition::IRP_ARGUMENT:
258318283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
258418283125SJoseph Huber   case IRPosition::IRP_RETURNED:
258518283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
258618283125SJoseph Huber   case IRPosition::IRP_CALL_SITE:
258718283125SJoseph Huber     llvm_unreachable(
258818283125SJoseph Huber         "AAExecutionDomain can only be created for function position!");
258918283125SJoseph Huber   case IRPosition::IRP_FUNCTION:
259018283125SJoseph Huber     AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
259118283125SJoseph Huber     break;
259218283125SJoseph Huber   }
259318283125SJoseph Huber 
259418283125SJoseph Huber   return *AA;
259518283125SJoseph Huber }
259618283125SJoseph Huber 
25976fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
25986fc51c9fSJoseph Huber                                                   Attributor &A) {
25996fc51c9fSJoseph Huber   AAHeapToSharedFunction *AA = nullptr;
26006fc51c9fSJoseph Huber   switch (IRP.getPositionKind()) {
26016fc51c9fSJoseph Huber   case IRPosition::IRP_INVALID:
26026fc51c9fSJoseph Huber   case IRPosition::IRP_FLOAT:
26036fc51c9fSJoseph Huber   case IRPosition::IRP_ARGUMENT:
26046fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
26056fc51c9fSJoseph Huber   case IRPosition::IRP_RETURNED:
26066fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
26076fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE:
26086fc51c9fSJoseph Huber     llvm_unreachable(
26096fc51c9fSJoseph Huber         "AAHeapToShared can only be created for function position!");
26106fc51c9fSJoseph Huber   case IRPosition::IRP_FUNCTION:
26116fc51c9fSJoseph Huber     AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
26126fc51c9fSJoseph Huber     break;
26136fc51c9fSJoseph Huber   }
26146fc51c9fSJoseph Huber 
26156fc51c9fSJoseph Huber   return *AA;
26166fc51c9fSJoseph Huber }
26176fc51c9fSJoseph Huber 
2618b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
26195ccb7424SJoseph Huber   if (!containsOpenMP(M))
2620b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2621b2ad63d3SJoseph Huber   if (DisableOpenMPOptimizations)
2622b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2623b2ad63d3SJoseph Huber 
26240edb8777SJoseph Huber   FunctionAnalysisManager &FAM =
26250edb8777SJoseph Huber       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
26265ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
26275ccb7424SJoseph Huber 
262857ad2e10SJoseph Huber   auto IsCalled = [&](Function &F) {
262957ad2e10SJoseph Huber     if (Kernels.contains(&F))
263057ad2e10SJoseph Huber       return true;
263157ad2e10SJoseph Huber     for (const User *U : F.users())
263257ad2e10SJoseph Huber       if (!isa<BlockAddress>(U))
263357ad2e10SJoseph Huber         return true;
263457ad2e10SJoseph Huber     return false;
263557ad2e10SJoseph Huber   };
263657ad2e10SJoseph Huber 
26370edb8777SJoseph Huber   auto EmitRemark = [&](Function &F) {
26380edb8777SJoseph Huber     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
26390edb8777SJoseph Huber     ORE.emit([&]() {
2640ecabc668SJoseph Huber       OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "InternalizationFailure", &F);
2641ecabc668SJoseph Huber       return ORA << "Could not internalize function. "
26420edb8777SJoseph Huber                  << "Some optimizations may not be possible.";
26430edb8777SJoseph Huber     });
26440edb8777SJoseph Huber   };
26450edb8777SJoseph Huber 
264657ad2e10SJoseph Huber   // Create internal copies of each function if this is a kernel Module. This
264757ad2e10SJoseph Huber   // allows iterprocedural passes to see every call edge.
264803d7e61cSJoseph Huber   DenseSet<const Function *> InternalizedFuncs;
26495ccb7424SJoseph Huber   if (isOpenMPDevice(M))
265003d7e61cSJoseph Huber     for (Function &F : M)
26510edb8777SJoseph Huber       if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) {
26520edb8777SJoseph Huber         if (Attributor::internalizeFunction(F, /* Force */ true)) {
265303d7e61cSJoseph Huber           InternalizedFuncs.insert(&F);
2654ecabc668SJoseph Huber         } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
26550edb8777SJoseph Huber           EmitRemark(F);
26560edb8777SJoseph Huber         }
26570edb8777SJoseph Huber       }
265803d7e61cSJoseph Huber 
265957ad2e10SJoseph Huber   // Look at every function in the Module unless it was internalized.
2660b2ad63d3SJoseph Huber   SmallVector<Function *, 16> SCC;
266103d7e61cSJoseph Huber   for (Function &F : M)
266203d7e61cSJoseph Huber     if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
266303d7e61cSJoseph Huber       SCC.push_back(&F);
2664b2ad63d3SJoseph Huber 
2665b2ad63d3SJoseph Huber   if (SCC.empty())
2666b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
2667b2ad63d3SJoseph Huber 
2668b2ad63d3SJoseph Huber   AnalysisGetter AG(FAM);
2669b2ad63d3SJoseph Huber 
2670b2ad63d3SJoseph Huber   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
2671b2ad63d3SJoseph Huber     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
2672b2ad63d3SJoseph Huber   };
2673b2ad63d3SJoseph Huber 
2674b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
2675b2ad63d3SJoseph Huber   CallGraphUpdater CGUpdater;
2676b2ad63d3SJoseph Huber 
2677b2ad63d3SJoseph Huber   SetVector<Function *> Functions(SCC.begin(), SCC.end());
26785ccb7424SJoseph Huber   OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
2679b2ad63d3SJoseph Huber 
268013b2fba2SJoseph Huber   unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
26814a6bd8e3SJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
268213b2fba2SJoseph Huber                MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2683b2ad63d3SJoseph Huber 
2684b2ad63d3SJoseph Huber   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2685b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(true);
2686b2ad63d3SJoseph Huber   if (Changed)
2687b2ad63d3SJoseph Huber     return PreservedAnalyses::none();
2688b2ad63d3SJoseph Huber 
2689b2ad63d3SJoseph Huber   return PreservedAnalyses::all();
2690b2ad63d3SJoseph Huber }
2691b2ad63d3SJoseph Huber 
2692b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
26939548b74aSJohannes Doerfert                                           CGSCCAnalysisManager &AM,
2694b2ad63d3SJoseph Huber                                           LazyCallGraph &CG,
2695b2ad63d3SJoseph Huber                                           CGSCCUpdateResult &UR) {
26965ccb7424SJoseph Huber   if (!containsOpenMP(*C.begin()->getFunction().getParent()))
26979548b74aSJohannes Doerfert     return PreservedAnalyses::all();
26989548b74aSJohannes Doerfert   if (DisableOpenMPOptimizations)
26999548b74aSJohannes Doerfert     return PreservedAnalyses::all();
27009548b74aSJohannes Doerfert 
2701ee17263aSJohannes Doerfert   SmallVector<Function *, 16> SCC;
2702351d234dSRoman Lebedev   // If there are kernels in the module, we have to run on all SCC's.
2703351d234dSRoman Lebedev   for (LazyCallGraph::Node &N : C) {
2704351d234dSRoman Lebedev     Function *Fn = &N.getFunction();
2705351d234dSRoman Lebedev     SCC.push_back(Fn);
2706351d234dSRoman Lebedev   }
2707351d234dSRoman Lebedev 
27085ccb7424SJoseph Huber   if (SCC.empty())
27099548b74aSJohannes Doerfert     return PreservedAnalyses::all();
27109548b74aSJohannes Doerfert 
27115ccb7424SJoseph Huber   Module &M = *C.begin()->getFunction().getParent();
27125ccb7424SJoseph Huber 
27135ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
27145ccb7424SJoseph Huber 
27154d4ea9acSHuber, Joseph   FunctionAnalysisManager &FAM =
27164d4ea9acSHuber, Joseph       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
27177cfd267cSsstefan1 
27187cfd267cSsstefan1   AnalysisGetter AG(FAM);
27197cfd267cSsstefan1 
27207cfd267cSsstefan1   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
27214d4ea9acSHuber, Joseph     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
27224d4ea9acSHuber, Joseph   };
27234d4ea9acSHuber, Joseph 
2724b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
27259548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27269548b74aSJohannes Doerfert   CGUpdater.initialize(CG, C, AM, UR);
27277cfd267cSsstefan1 
27287cfd267cSsstefan1   SetVector<Function *> Functions(SCC.begin(), SCC.end());
27297cfd267cSsstefan1   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
27305ccb7424SJoseph Huber                                 /*CGSCC*/ Functions, Kernels);
27317cfd267cSsstefan1 
273213b2fba2SJoseph Huber   unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
27334a6bd8e3SJoseph Huber   Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
273413b2fba2SJoseph Huber                MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2735b8235d2bSsstefan1 
2736b8235d2bSsstefan1   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2737b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(false);
2738694ded37SGiorgis Georgakoudis   if (Changed)
2739694ded37SGiorgis Georgakoudis     return PreservedAnalyses::none();
2740694ded37SGiorgis Georgakoudis 
27419548b74aSJohannes Doerfert   return PreservedAnalyses::all();
27429548b74aSJohannes Doerfert }
27438b57ed09SJoseph Huber 
27449548b74aSJohannes Doerfert namespace {
27459548b74aSJohannes Doerfert 
2746b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27479548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
27489548b74aSJohannes Doerfert   static char ID;
27499548b74aSJohannes Doerfert 
2750b2ad63d3SJoseph Huber   OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
2751b2ad63d3SJoseph Huber     initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
27529548b74aSJohannes Doerfert   }
27539548b74aSJohannes Doerfert 
27549548b74aSJohannes Doerfert   void getAnalysisUsage(AnalysisUsage &AU) const override {
27559548b74aSJohannes Doerfert     CallGraphSCCPass::getAnalysisUsage(AU);
27569548b74aSJohannes Doerfert   }
27579548b74aSJohannes Doerfert 
27589548b74aSJohannes Doerfert   bool runOnSCC(CallGraphSCC &CGSCC) override {
27595ccb7424SJoseph Huber     if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
27609548b74aSJohannes Doerfert       return false;
27619548b74aSJohannes Doerfert     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
27629548b74aSJohannes Doerfert       return false;
27639548b74aSJohannes Doerfert 
2764ee17263aSJohannes Doerfert     SmallVector<Function *, 16> SCC;
2765351d234dSRoman Lebedev     // If there are kernels in the module, we have to run on all SCC's.
2766351d234dSRoman Lebedev     for (CallGraphNode *CGN : CGSCC) {
2767351d234dSRoman Lebedev       Function *Fn = CGN->getFunction();
2768351d234dSRoman Lebedev       if (!Fn || Fn->isDeclaration())
2769351d234dSRoman Lebedev         continue;
2770ee17263aSJohannes Doerfert       SCC.push_back(Fn);
2771351d234dSRoman Lebedev     }
2772351d234dSRoman Lebedev 
27735ccb7424SJoseph Huber     if (SCC.empty())
27749548b74aSJohannes Doerfert       return false;
27759548b74aSJohannes Doerfert 
27765ccb7424SJoseph Huber     Module &M = CGSCC.getCallGraph().getModule();
27775ccb7424SJoseph Huber     KernelSet Kernels = getDeviceKernels(M);
27785ccb7424SJoseph Huber 
27799548b74aSJohannes Doerfert     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
27809548b74aSJohannes Doerfert     CGUpdater.initialize(CG, CGSCC);
27819548b74aSJohannes Doerfert 
27824d4ea9acSHuber, Joseph     // Maintain a map of functions to avoid rebuilding the ORE
27834d4ea9acSHuber, Joseph     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
27844d4ea9acSHuber, Joseph     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
27854d4ea9acSHuber, Joseph       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
27864d4ea9acSHuber, Joseph       if (!ORE)
27874d4ea9acSHuber, Joseph         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
27884d4ea9acSHuber, Joseph       return *ORE;
27894d4ea9acSHuber, Joseph     };
27904d4ea9acSHuber, Joseph 
27917cfd267cSsstefan1     AnalysisGetter AG;
27927cfd267cSsstefan1     SetVector<Function *> Functions(SCC.begin(), SCC.end());
27937cfd267cSsstefan1     BumpPtrAllocator Allocator;
27945ccb7424SJoseph Huber     OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
27955ccb7424SJoseph Huber                                   Allocator,
27965ccb7424SJoseph Huber                                   /*CGSCC*/ Functions, Kernels);
27977cfd267cSsstefan1 
279813b2fba2SJoseph Huber     unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
279930e36c9bSJoseph Huber     Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
280013b2fba2SJoseph Huber                  MaxFixpointIterations, OREGetter, DEBUG_TYPE);
2801b8235d2bSsstefan1 
2802b8235d2bSsstefan1     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
2803b2ad63d3SJoseph Huber     return OMPOpt.run(false);
28049548b74aSJohannes Doerfert   }
28059548b74aSJohannes Doerfert 
28069548b74aSJohannes Doerfert   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
28079548b74aSJohannes Doerfert };
28089548b74aSJohannes Doerfert 
28099548b74aSJohannes Doerfert } // end anonymous namespace
28109548b74aSJohannes Doerfert 
28115ccb7424SJoseph Huber KernelSet llvm::omp::getDeviceKernels(Module &M) {
28125ccb7424SJoseph Huber   // TODO: Create a more cross-platform way of determining device kernels.
2813e8039ad4SJohannes Doerfert   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
28145ccb7424SJoseph Huber   KernelSet Kernels;
28155ccb7424SJoseph Huber 
2816e8039ad4SJohannes Doerfert   if (!MD)
28175ccb7424SJoseph Huber     return Kernels;
2818e8039ad4SJohannes Doerfert 
2819e8039ad4SJohannes Doerfert   for (auto *Op : MD->operands()) {
2820e8039ad4SJohannes Doerfert     if (Op->getNumOperands() < 2)
2821e8039ad4SJohannes Doerfert       continue;
2822e8039ad4SJohannes Doerfert     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
2823e8039ad4SJohannes Doerfert     if (!KindID || KindID->getString() != "kernel")
2824e8039ad4SJohannes Doerfert       continue;
2825e8039ad4SJohannes Doerfert 
2826e8039ad4SJohannes Doerfert     Function *KernelFn =
2827e8039ad4SJohannes Doerfert         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
2828e8039ad4SJohannes Doerfert     if (!KernelFn)
2829e8039ad4SJohannes Doerfert       continue;
2830e8039ad4SJohannes Doerfert 
2831e8039ad4SJohannes Doerfert     ++NumOpenMPTargetRegionKernels;
2832e8039ad4SJohannes Doerfert 
2833e8039ad4SJohannes Doerfert     Kernels.insert(KernelFn);
2834e8039ad4SJohannes Doerfert   }
28355ccb7424SJoseph Huber 
28365ccb7424SJoseph Huber   return Kernels;
2837e8039ad4SJohannes Doerfert }
2838e8039ad4SJohannes Doerfert 
28395ccb7424SJoseph Huber bool llvm::omp::containsOpenMP(Module &M) {
28405ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp");
28415ccb7424SJoseph Huber   if (!MD)
28425ccb7424SJoseph Huber     return false;
2843dce6bc18SJohannes Doerfert 
2844e8039ad4SJohannes Doerfert   return true;
2845e8039ad4SJohannes Doerfert }
2846e8039ad4SJohannes Doerfert 
28475ccb7424SJoseph Huber bool llvm::omp::isOpenMPDevice(Module &M) {
28485ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp-device");
28495ccb7424SJoseph Huber   if (!MD)
28505ccb7424SJoseph Huber     return false;
28515ccb7424SJoseph Huber 
28525ccb7424SJoseph Huber   return true;
28539548b74aSJohannes Doerfert }
28549548b74aSJohannes Doerfert 
2855b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0;
28569548b74aSJohannes Doerfert 
2857b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28589548b74aSJohannes Doerfert                       "OpenMP specific optimizations", false, false)
28599548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
2860b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
28619548b74aSJohannes Doerfert                     "OpenMP specific optimizations", false, false)
28629548b74aSJohannes Doerfert 
2863b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
2864b2ad63d3SJoseph Huber   return new OpenMPOptCGSCCLegacyPass();
2865b2ad63d3SJoseph Huber }
2866