19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12ca1560daSJoseph Huber // - Replacing globalized device memory with stack memory.
13ca1560daSJoseph Huber // - Replacing globalized device memory with shared memory.
14b910a109SJoseph Huber // - Parallel region merging.
15b910a109SJoseph Huber // - Transforming generic-mode device kernels to SPMD mode.
16b910a109SJoseph Huber // - Specializing the state machine for generic-mode device kernels.
179548b74aSJohannes Doerfert //
189548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
199548b74aSJohannes Doerfert 
209548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
219548b74aSJohannes Doerfert 
229548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
2318283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h"
249f04a0eaSJohannes Doerfert #include "llvm/ADT/SetVector.h"
259548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
26e6e440aeSJohannes Doerfert #include "llvm/ADT/StringRef.h"
279548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
289548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
293c8a4c6fSJohannes Doerfert #include "llvm/Analysis/MemoryLocation.h"
304d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
313a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h"
329548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
33e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
34d9659bf6SJohannes Doerfert #include "llvm/IR/Assumptions.h"
353c8a4c6fSJohannes Doerfert #include "llvm/IR/Constants.h"
36d9659bf6SJohannes Doerfert #include "llvm/IR/DiagnosticInfo.h"
37514c033dSJohannes Doerfert #include "llvm/IR/GlobalValue.h"
383c8a4c6fSJohannes Doerfert #include "llvm/IR/GlobalVariable.h"
39d9659bf6SJohannes Doerfert #include "llvm/IR/Instruction.h"
403c8a4c6fSJohannes Doerfert #include "llvm/IR/Instructions.h"
4168abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h"
4227905eebSJoseph Huber #include "llvm/IR/IntrinsicsAMDGPU.h"
4327905eebSJoseph Huber #include "llvm/IR/IntrinsicsNVPTX.h"
443c8a4c6fSJohannes Doerfert #include "llvm/IR/LLVMContext.h"
459548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
469548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
473c8a4c6fSJohannes Doerfert #include "llvm/Support/Debug.h"
489548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
497cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
503a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h"
519548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
529548b74aSJohannes Doerfert 
53e6e440aeSJohannes Doerfert #include <algorithm>
54e6e440aeSJohannes Doerfert 
559548b74aSJohannes Doerfert using namespace llvm;
569548b74aSJohannes Doerfert using namespace omp;
579548b74aSJohannes Doerfert 
589548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
599548b74aSJohannes Doerfert 
609548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
61557efc9aSFangrui Song     "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
62557efc9aSFangrui Song     cl::Hidden, cl::init(false));
639548b74aSJohannes Doerfert 
643a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging(
65557efc9aSFangrui Song     "openmp-opt-enable-merging",
663a6bfcf2SGiorgis Georgakoudis     cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
673a6bfcf2SGiorgis Georgakoudis     cl::init(false));
683a6bfcf2SGiorgis Georgakoudis 
694a668604SJoseph Huber static cl::opt<bool>
70557efc9aSFangrui Song     DisableInternalization("openmp-opt-disable-internalization",
714a668604SJoseph Huber                            cl::desc("Disable function internalization."),
724a668604SJoseph Huber                            cl::Hidden, cl::init(false));
734a668604SJoseph Huber 
740f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
750f426935Ssstefan1                                     cl::Hidden);
76e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
77e8039ad4SJohannes Doerfert                                         cl::init(false), cl::Hidden);
780f426935Ssstefan1 
79496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
80496f8e5bSHamilton Tobon Mosquera     "openmp-hide-memory-transfer-latency",
81496f8e5bSHamilton Tobon Mosquera     cl::desc("[WIP] Tries to hide the latency of host to device memory"
82496f8e5bSHamilton Tobon Mosquera              " transfers"),
83496f8e5bSHamilton Tobon Mosquera     cl::Hidden, cl::init(false));
84496f8e5bSHamilton Tobon Mosquera 
85cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptDeglobalization(
86557efc9aSFangrui Song     "openmp-opt-disable-deglobalization",
87cd0dd8ecSJoseph Huber     cl::desc("Disable OpenMP optimizations involving deglobalization."),
88cd0dd8ecSJoseph Huber     cl::Hidden, cl::init(false));
89cd0dd8ecSJoseph Huber 
90cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptSPMDization(
91557efc9aSFangrui Song     "openmp-opt-disable-spmdization",
92cd0dd8ecSJoseph Huber     cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
93cd0dd8ecSJoseph Huber     cl::Hidden, cl::init(false));
94cd0dd8ecSJoseph Huber 
95cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptFolding(
96557efc9aSFangrui Song     "openmp-opt-disable-folding",
97cd0dd8ecSJoseph Huber     cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
98cd0dd8ecSJoseph Huber     cl::init(false));
99cd0dd8ecSJoseph Huber 
100cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
101557efc9aSFangrui Song     "openmp-opt-disable-state-machine-rewrite",
102cd0dd8ecSJoseph Huber     cl::desc("Disable OpenMP optimizations that replace the state machine."),
103cd0dd8ecSJoseph Huber     cl::Hidden, cl::init(false));
104cd0dd8ecSJoseph Huber 
1053c8a4c6fSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptBarrierElimination(
106557efc9aSFangrui Song     "openmp-opt-disable-barrier-elimination",
1073c8a4c6fSJohannes Doerfert     cl::desc("Disable OpenMP optimizations that eliminate barriers."),
1083c8a4c6fSJohannes Doerfert     cl::Hidden, cl::init(false));
1093c8a4c6fSJohannes Doerfert 
110339aa765SJoseph Huber static cl::opt<bool> PrintModuleAfterOptimizations(
111557efc9aSFangrui Song     "openmp-opt-print-module-after",
112339aa765SJoseph Huber     cl::desc("Print the current module after OpenMP optimizations."),
113339aa765SJoseph Huber     cl::Hidden, cl::init(false));
114339aa765SJoseph Huber 
11566321807SJoseph Huber static cl::opt<bool> PrintModuleBeforeOptimizations(
116557efc9aSFangrui Song     "openmp-opt-print-module-before",
11766321807SJoseph Huber     cl::desc("Print the current module before OpenMP optimizations."),
11866321807SJoseph Huber     cl::Hidden, cl::init(false));
11966321807SJoseph Huber 
12029a74a39SJoseph Huber static cl::opt<bool> AlwaysInlineDeviceFunctions(
121557efc9aSFangrui Song     "openmp-opt-inline-device",
12229a74a39SJoseph Huber     cl::desc("Inline all applicible functions on the device."), cl::Hidden,
12329a74a39SJoseph Huber     cl::init(false));
12429a74a39SJoseph Huber 
1257eb899cbSJoseph Huber static cl::opt<bool>
126557efc9aSFangrui Song     EnableVerboseRemarks("openmp-opt-verbose-remarks",
1277eb899cbSJoseph Huber                          cl::desc("Enables more verbose remarks."), cl::Hidden,
1287eb899cbSJoseph Huber                          cl::init(false));
1297eb899cbSJoseph Huber 
130f074a6a0SJoseph Huber static cl::opt<unsigned>
131f074a6a0SJoseph Huber     SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
132f074a6a0SJoseph Huber                           cl::desc("Maximal number of attributor iterations."),
133f074a6a0SJoseph Huber                           cl::init(256));
134f074a6a0SJoseph Huber 
1350136a440SJoseph Huber static cl::opt<unsigned>
1360136a440SJoseph Huber     SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
1370136a440SJoseph Huber                       cl::desc("Maximum amount of shared memory to use."),
1380136a440SJoseph Huber                       cl::init(std::numeric_limits<unsigned>::max()));
1390136a440SJoseph Huber 
1409548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
1419548b74aSJohannes Doerfert           "Number of OpenMP runtime calls deduplicated");
14255eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
14355eb714aSRoman Lebedev           "Number of OpenMP parallel regions deleted");
1449548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
1459548b74aSJohannes Doerfert           "Number of OpenMP runtime functions identified");
1469548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
1479548b74aSJohannes Doerfert           "Number of OpenMP runtime function uses identified");
148e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
149e8039ad4SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) identified");
150514c033dSJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
151514c033dSJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) executed in "
152514c033dSJohannes Doerfert           "SPMD-mode instead of generic-mode");
153d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
154d9659bf6SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) executed in "
155d9659bf6SJohannes Doerfert           "generic-mode without a state machines");
156d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
157d9659bf6SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) executed in "
158d9659bf6SJohannes Doerfert           "generic-mode with customized state machines with fallback");
159d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
160d9659bf6SJohannes Doerfert           "Number of OpenMP target region entry points (=kernels) executed in "
161d9659bf6SJohannes Doerfert           "generic-mode with customized state machines without fallback");
1625b0581aeSJohannes Doerfert STATISTIC(
1635b0581aeSJohannes Doerfert     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
1645b0581aeSJohannes Doerfert     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
1653a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged,
1663a6bfcf2SGiorgis Georgakoudis           "Number of OpenMP parallel regions merged");
1676fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory,
1686fc51c9fSJoseph Huber           "Amount of memory pushed to shared memory");
1693c8a4c6fSJohannes Doerfert STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
1709548b74aSJohannes Doerfert 
171263c4a3cSrathod-sahaab #if !defined(NDEBUG)
1729548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
173a50c0b0dSMikael Holmen #endif
1749548b74aSJohannes Doerfert 
1759548b74aSJohannes Doerfert namespace {
1769548b74aSJohannes Doerfert 
1776fc51c9fSJoseph Huber struct AAHeapToShared;
1786fc51c9fSJoseph Huber 
179b8235d2bSsstefan1 struct AAICVTracker;
180b8235d2bSsstefan1 
1817cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
1827cfd267cSsstefan1 /// Attributor runs.
1837cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
OMPInformationCache__anon23c38c770111::OMPInformationCache1847cfd267cSsstefan1   OMPInformationCache(Module &M, AnalysisGetter &AG,
185624d34afSJohannes Doerfert                       BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
18686cdff0eSEli Friedman                       KernelSet &Kernels)
187624d34afSJohannes Doerfert       : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
188624d34afSJohannes Doerfert         Kernels(Kernels) {
189624d34afSJohannes Doerfert 
19061238d26Ssstefan1     OMPBuilder.initialize();
1919548b74aSJohannes Doerfert     initializeRuntimeFunctions();
1920f426935Ssstefan1     initializeInternalControlVars();
1939548b74aSJohannes Doerfert   }
1949548b74aSJohannes Doerfert 
1950f426935Ssstefan1   /// Generic information that describes an internal control variable.
1960f426935Ssstefan1   struct InternalControlVarInfo {
1970f426935Ssstefan1     /// The kind, as described by InternalControlVar enum.
1980f426935Ssstefan1     InternalControlVar Kind;
1990f426935Ssstefan1 
2000f426935Ssstefan1     /// The name of the ICV.
2010f426935Ssstefan1     StringRef Name;
2020f426935Ssstefan1 
2030f426935Ssstefan1     /// Environment variable associated with this ICV.
2040f426935Ssstefan1     StringRef EnvVarName;
2050f426935Ssstefan1 
2060f426935Ssstefan1     /// Initial value kind.
2070f426935Ssstefan1     ICVInitValue InitKind;
2080f426935Ssstefan1 
2090f426935Ssstefan1     /// Initial value.
2100f426935Ssstefan1     ConstantInt *InitValue;
2110f426935Ssstefan1 
2120f426935Ssstefan1     /// Setter RTL function associated with this ICV.
2130f426935Ssstefan1     RuntimeFunction Setter;
2140f426935Ssstefan1 
2150f426935Ssstefan1     /// Getter RTL function associated with this ICV.
2160f426935Ssstefan1     RuntimeFunction Getter;
2170f426935Ssstefan1 
2180f426935Ssstefan1     /// RTL Function corresponding to the override clause of this ICV
2190f426935Ssstefan1     RuntimeFunction Clause;
2200f426935Ssstefan1   };
2210f426935Ssstefan1 
2229548b74aSJohannes Doerfert   /// Generic information that describes a runtime function
2239548b74aSJohannes Doerfert   struct RuntimeFunctionInfo {
2248855fec3SJohannes Doerfert 
2259548b74aSJohannes Doerfert     /// The kind, as described by the RuntimeFunction enum.
2269548b74aSJohannes Doerfert     RuntimeFunction Kind;
2279548b74aSJohannes Doerfert 
2289548b74aSJohannes Doerfert     /// The name of the function.
2299548b74aSJohannes Doerfert     StringRef Name;
2309548b74aSJohannes Doerfert 
2319548b74aSJohannes Doerfert     /// Flag to indicate a variadic function.
2329548b74aSJohannes Doerfert     bool IsVarArg;
2339548b74aSJohannes Doerfert 
2349548b74aSJohannes Doerfert     /// The return type of the function.
2359548b74aSJohannes Doerfert     Type *ReturnType;
2369548b74aSJohannes Doerfert 
2379548b74aSJohannes Doerfert     /// The argument types of the function.
2389548b74aSJohannes Doerfert     SmallVector<Type *, 8> ArgumentTypes;
2399548b74aSJohannes Doerfert 
2409548b74aSJohannes Doerfert     /// The declaration if available.
241f09f4b26SJohannes Doerfert     Function *Declaration = nullptr;
2429548b74aSJohannes Doerfert 
2439548b74aSJohannes Doerfert     /// Uses of this runtime function per function containing the use.
2448855fec3SJohannes Doerfert     using UseVector = SmallVector<Use *, 16>;
2458855fec3SJohannes Doerfert 
246b8235d2bSsstefan1     /// Clear UsesMap for runtime function.
clearUsesMap__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo247b8235d2bSsstefan1     void clearUsesMap() { UsesMap.clear(); }
248b8235d2bSsstefan1 
24954bd3751SJohannes Doerfert     /// Boolean conversion that is true if the runtime function was found.
operator bool__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo25054bd3751SJohannes Doerfert     operator bool() const { return Declaration; }
25154bd3751SJohannes Doerfert 
2528855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F.
getOrCreateUseVector__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2538855fec3SJohannes Doerfert     UseVector &getOrCreateUseVector(Function *F) {
254b8235d2bSsstefan1       std::shared_ptr<UseVector> &UV = UsesMap[F];
2558855fec3SJohannes Doerfert       if (!UV)
256b8235d2bSsstefan1         UV = std::make_shared<UseVector>();
2578855fec3SJohannes Doerfert       return *UV;
2588855fec3SJohannes Doerfert     }
2598855fec3SJohannes Doerfert 
2608855fec3SJohannes Doerfert     /// Return the vector of uses in function \p F or `nullptr` if there are
2618855fec3SJohannes Doerfert     /// none.
getUseVector__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2628855fec3SJohannes Doerfert     const UseVector *getUseVector(Function &F) const {
26395e57072SDavid Blaikie       auto I = UsesMap.find(&F);
26495e57072SDavid Blaikie       if (I != UsesMap.end())
26595e57072SDavid Blaikie         return I->second.get();
26695e57072SDavid Blaikie       return nullptr;
2678855fec3SJohannes Doerfert     }
2688855fec3SJohannes Doerfert 
2698855fec3SJohannes Doerfert     /// Return how many functions contain uses of this runtime function.
getNumFunctionsWithUses__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2708855fec3SJohannes Doerfert     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
2719548b74aSJohannes Doerfert 
2729548b74aSJohannes Doerfert     /// Return the number of arguments (or the minimal number for variadic
2739548b74aSJohannes Doerfert     /// functions).
getNumArgs__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2749548b74aSJohannes Doerfert     size_t getNumArgs() const { return ArgumentTypes.size(); }
2759548b74aSJohannes Doerfert 
2769548b74aSJohannes Doerfert     /// Run the callback \p CB on each use and forget the use if the result is
2779548b74aSJohannes Doerfert     /// true. The callback will be fed the function in which the use was
2789548b74aSJohannes Doerfert     /// encountered as second argument.
foreachUse__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo279624d34afSJohannes Doerfert     void foreachUse(SmallVectorImpl<Function *> &SCC,
280624d34afSJohannes Doerfert                     function_ref<bool(Use &, Function &)> CB) {
281624d34afSJohannes Doerfert       for (Function *F : SCC)
282624d34afSJohannes Doerfert         foreachUse(CB, F);
283e099c7b6Ssstefan1     }
284e099c7b6Ssstefan1 
285e099c7b6Ssstefan1     /// Run the callback \p CB on each use within the function \p F and forget
286e099c7b6Ssstefan1     /// the use if the result is true.
foreachUse__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo287624d34afSJohannes Doerfert     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
2888855fec3SJohannes Doerfert       SmallVector<unsigned, 8> ToBeDeleted;
2899548b74aSJohannes Doerfert       ToBeDeleted.clear();
290e099c7b6Ssstefan1 
2918855fec3SJohannes Doerfert       unsigned Idx = 0;
292624d34afSJohannes Doerfert       UseVector &UV = getOrCreateUseVector(F);
293e099c7b6Ssstefan1 
2948855fec3SJohannes Doerfert       for (Use *U : UV) {
295e099c7b6Ssstefan1         if (CB(*U, *F))
2968855fec3SJohannes Doerfert           ToBeDeleted.push_back(Idx);
2978855fec3SJohannes Doerfert         ++Idx;
2988855fec3SJohannes Doerfert       }
2998855fec3SJohannes Doerfert 
3008855fec3SJohannes Doerfert       // Remove the to-be-deleted indices in reverse order as prior
301b726c557SJohannes Doerfert       // modifications will not modify the smaller indices.
3028855fec3SJohannes Doerfert       while (!ToBeDeleted.empty()) {
3038855fec3SJohannes Doerfert         unsigned Idx = ToBeDeleted.pop_back_val();
3048855fec3SJohannes Doerfert         UV[Idx] = UV.back();
3058855fec3SJohannes Doerfert         UV.pop_back();
3069548b74aSJohannes Doerfert       }
3079548b74aSJohannes Doerfert     }
3088855fec3SJohannes Doerfert 
3098855fec3SJohannes Doerfert   private:
3108855fec3SJohannes Doerfert     /// Map from functions to all uses of this runtime function contained in
3118855fec3SJohannes Doerfert     /// them.
312b8235d2bSsstefan1     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
313d9659bf6SJohannes Doerfert 
314d9659bf6SJohannes Doerfert   public:
315d9659bf6SJohannes Doerfert     /// Iterators for the uses of this runtime function.
begin__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo316d9659bf6SJohannes Doerfert     decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
end__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo317d9659bf6SJohannes Doerfert     decltype(UsesMap)::iterator end() { return UsesMap.end(); }
3189548b74aSJohannes Doerfert   };
3199548b74aSJohannes Doerfert 
3207cfd267cSsstefan1   /// An OpenMP-IR-Builder instance
3217cfd267cSsstefan1   OpenMPIRBuilder OMPBuilder;
3227cfd267cSsstefan1 
3237cfd267cSsstefan1   /// Map from runtime function kind to the runtime function description.
3247cfd267cSsstefan1   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
3257cfd267cSsstefan1                   RuntimeFunction::OMPRTL___last>
3267cfd267cSsstefan1       RFIs;
3277cfd267cSsstefan1 
328d9659bf6SJohannes Doerfert   /// Map from function declarations/definitions to their runtime enum type.
329d9659bf6SJohannes Doerfert   DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
330d9659bf6SJohannes Doerfert 
3310f426935Ssstefan1   /// Map from ICV kind to the ICV description.
3320f426935Ssstefan1   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
3330f426935Ssstefan1                   InternalControlVar::ICV___last>
3340f426935Ssstefan1       ICVs;
3350f426935Ssstefan1 
3360f426935Ssstefan1   /// Helper to initialize all internal control variable information for those
3370f426935Ssstefan1   /// defined in OMPKinds.def.
initializeInternalControlVars__anon23c38c770111::OMPInformationCache3380f426935Ssstefan1   void initializeInternalControlVars() {
3390f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL)                                                 \
3400f426935Ssstefan1   {                                                                            \
3410f426935Ssstefan1     auto &ICV = ICVs[_Name];                                                   \
3420f426935Ssstefan1     ICV.Setter = RTL;                                                          \
3430f426935Ssstefan1   }
3440f426935Ssstefan1 #define ICV_RT_GET(Name, RTL)                                                  \
3450f426935Ssstefan1   {                                                                            \
3460f426935Ssstefan1     auto &ICV = ICVs[Name];                                                    \
3470f426935Ssstefan1     ICV.Getter = RTL;                                                          \
3480f426935Ssstefan1   }
3490f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
3500f426935Ssstefan1   {                                                                            \
3510f426935Ssstefan1     auto &ICV = ICVs[Enum];                                                    \
3520f426935Ssstefan1     ICV.Name = _Name;                                                          \
3530f426935Ssstefan1     ICV.Kind = Enum;                                                           \
3540f426935Ssstefan1     ICV.InitKind = Init;                                                       \
3550f426935Ssstefan1     ICV.EnvVarName = _EnvVarName;                                              \
3560f426935Ssstefan1     switch (ICV.InitKind) {                                                    \
357951e43f3Ssstefan1     case ICV_IMPLEMENTATION_DEFINED:                                           \
3580f426935Ssstefan1       ICV.InitValue = nullptr;                                                 \
3590f426935Ssstefan1       break;                                                                   \
360951e43f3Ssstefan1     case ICV_ZERO:                                                             \
3616aab27baSsstefan1       ICV.InitValue = ConstantInt::get(                                        \
3626aab27baSsstefan1           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
3630f426935Ssstefan1       break;                                                                   \
364951e43f3Ssstefan1     case ICV_FALSE:                                                            \
3656aab27baSsstefan1       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
3660f426935Ssstefan1       break;                                                                   \
367951e43f3Ssstefan1     case ICV_LAST:                                                             \
3680f426935Ssstefan1       break;                                                                   \
3690f426935Ssstefan1     }                                                                          \
3700f426935Ssstefan1   }
3710f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3720f426935Ssstefan1   }
3730f426935Ssstefan1 
3747cfd267cSsstefan1   /// Returns true if the function declaration \p F matches the runtime
3757cfd267cSsstefan1   /// function types, that is, return type \p RTFRetType, and argument types
3767cfd267cSsstefan1   /// \p RTFArgTypes.
declMatchesRTFTypes__anon23c38c770111::OMPInformationCache3777cfd267cSsstefan1   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
3787cfd267cSsstefan1                                   SmallVector<Type *, 8> &RTFArgTypes) {
3797cfd267cSsstefan1     // TODO: We should output information to the user (under debug output
3807cfd267cSsstefan1     //       and via remarks).
3817cfd267cSsstefan1 
3827cfd267cSsstefan1     if (!F)
3837cfd267cSsstefan1       return false;
3847cfd267cSsstefan1     if (F->getReturnType() != RTFRetType)
3857cfd267cSsstefan1       return false;
3867cfd267cSsstefan1     if (F->arg_size() != RTFArgTypes.size())
3877cfd267cSsstefan1       return false;
3887cfd267cSsstefan1 
389c11ebfeaSJoseph Huber     auto *RTFTyIt = RTFArgTypes.begin();
3907cfd267cSsstefan1     for (Argument &Arg : F->args()) {
3917cfd267cSsstefan1       if (Arg.getType() != *RTFTyIt)
3927cfd267cSsstefan1         return false;
3937cfd267cSsstefan1 
3947cfd267cSsstefan1       ++RTFTyIt;
3957cfd267cSsstefan1     }
3967cfd267cSsstefan1 
3977cfd267cSsstefan1     return true;
3987cfd267cSsstefan1   }
3997cfd267cSsstefan1 
400b726c557SJohannes Doerfert   // Helper to collect all uses of the declaration in the UsesMap.
collectUses__anon23c38c770111::OMPInformationCache401b8235d2bSsstefan1   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
4027cfd267cSsstefan1     unsigned NumUses = 0;
4037cfd267cSsstefan1     if (!RFI.Declaration)
4047cfd267cSsstefan1       return NumUses;
4057cfd267cSsstefan1     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
4067cfd267cSsstefan1 
407b8235d2bSsstefan1     if (CollectStats) {
4087cfd267cSsstefan1       NumOpenMPRuntimeFunctionsIdentified += 1;
4097cfd267cSsstefan1       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
410b8235d2bSsstefan1     }
4117cfd267cSsstefan1 
4127cfd267cSsstefan1     // TODO: We directly convert uses into proper calls and unknown uses.
4137cfd267cSsstefan1     for (Use &U : RFI.Declaration->uses()) {
4147cfd267cSsstefan1       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
4157cfd267cSsstefan1         if (ModuleSlice.count(UserI->getFunction())) {
4167cfd267cSsstefan1           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
4177cfd267cSsstefan1           ++NumUses;
4187cfd267cSsstefan1         }
4197cfd267cSsstefan1       } else {
4207cfd267cSsstefan1         RFI.getOrCreateUseVector(nullptr).push_back(&U);
4217cfd267cSsstefan1         ++NumUses;
4227cfd267cSsstefan1       }
4237cfd267cSsstefan1     }
4247cfd267cSsstefan1     return NumUses;
425b8235d2bSsstefan1   }
4267cfd267cSsstefan1 
42797517055SGiorgis Georgakoudis   // Helper function to recollect uses of a runtime function.
recollectUsesForFunction__anon23c38c770111::OMPInformationCache42897517055SGiorgis Georgakoudis   void recollectUsesForFunction(RuntimeFunction RTF) {
42997517055SGiorgis Georgakoudis     auto &RFI = RFIs[RTF];
430b8235d2bSsstefan1     RFI.clearUsesMap();
431b8235d2bSsstefan1     collectUses(RFI, /*CollectStats*/ false);
432b8235d2bSsstefan1   }
43397517055SGiorgis Georgakoudis 
43497517055SGiorgis Georgakoudis   // Helper function to recollect uses of all runtime functions.
recollectUses__anon23c38c770111::OMPInformationCache43597517055SGiorgis Georgakoudis   void recollectUses() {
43697517055SGiorgis Georgakoudis     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
43797517055SGiorgis Georgakoudis       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
438b8235d2bSsstefan1   }
439b8235d2bSsstefan1 
44006cfdd52SJoseph Huber   // Helper function to inherit the calling convention of the function callee.
setCallingConvention__anon23c38c770111::OMPInformationCache44106cfdd52SJoseph Huber   void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
44206cfdd52SJoseph Huber     if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
44306cfdd52SJoseph Huber       CI->setCallingConv(Fn->getCallingConv());
44406cfdd52SJoseph Huber   }
44506cfdd52SJoseph Huber 
446b8235d2bSsstefan1   /// Helper to initialize all runtime function information for those defined
447b8235d2bSsstefan1   /// in OpenMPKinds.def.
initializeRuntimeFunctions__anon23c38c770111::OMPInformationCache448b8235d2bSsstefan1   void initializeRuntimeFunctions() {
4497cfd267cSsstefan1     Module &M = *((*ModuleSlice.begin())->getParent());
4507cfd267cSsstefan1 
4516aab27baSsstefan1     // Helper macros for handling __VA_ARGS__ in OMP_RTL
4526aab27baSsstefan1 #define OMP_TYPE(VarName, ...)                                                 \
4536aab27baSsstefan1   Type *VarName = OMPBuilder.VarName;                                          \
4546aab27baSsstefan1   (void)VarName;
4556aab27baSsstefan1 
4566aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...)                                           \
4576aab27baSsstefan1   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
4586aab27baSsstefan1   (void)VarName##Ty;                                                           \
4596aab27baSsstefan1   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
4606aab27baSsstefan1   (void)VarName##PtrTy;
4616aab27baSsstefan1 
4626aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
4636aab27baSsstefan1   FunctionType *VarName = OMPBuilder.VarName;                                  \
4646aab27baSsstefan1   (void)VarName;                                                               \
4656aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
4666aab27baSsstefan1   (void)VarName##Ptr;
4676aab27baSsstefan1 
4686aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...)                                          \
4696aab27baSsstefan1   StructType *VarName = OMPBuilder.VarName;                                    \
4706aab27baSsstefan1   (void)VarName;                                                               \
4716aab27baSsstefan1   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
4726aab27baSsstefan1   (void)VarName##Ptr;
4736aab27baSsstefan1 
4747cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
4757cfd267cSsstefan1   {                                                                            \
4767cfd267cSsstefan1     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
4777cfd267cSsstefan1     Function *F = M.getFunction(_Name);                                        \
478eef6601bSJoseph Huber     RTLFunctions.insert(F);                                                    \
4796aab27baSsstefan1     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
480d9659bf6SJohannes Doerfert       RuntimeFunctionIDMap[F] = _Enum;                                         \
4817cfd267cSsstefan1       auto &RFI = RFIs[_Enum];                                                 \
4827cfd267cSsstefan1       RFI.Kind = _Enum;                                                        \
4837cfd267cSsstefan1       RFI.Name = _Name;                                                        \
4847cfd267cSsstefan1       RFI.IsVarArg = _IsVarArg;                                                \
4856aab27baSsstefan1       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
4867cfd267cSsstefan1       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
4877cfd267cSsstefan1       RFI.Declaration = F;                                                     \
488b8235d2bSsstefan1       unsigned NumUses = collectUses(RFI);                                     \
4897cfd267cSsstefan1       (void)NumUses;                                                           \
4907cfd267cSsstefan1       LLVM_DEBUG({                                                             \
4917cfd267cSsstefan1         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
4927cfd267cSsstefan1                << " found\n";                                                  \
4937cfd267cSsstefan1         if (RFI.Declaration)                                                   \
4947cfd267cSsstefan1           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
4957cfd267cSsstefan1                  << RFI.getNumFunctionsWithUses()                              \
4967cfd267cSsstefan1                  << " different functions.\n";                                 \
4977cfd267cSsstefan1       });                                                                      \
4987cfd267cSsstefan1     }                                                                          \
4997cfd267cSsstefan1   }
5007cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
5017cfd267cSsstefan1 
502*fd8fd9e5SJoseph Huber     // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
503*fd8fd9e5SJoseph Huber     // functions, except if `optnone` is present.
504*fd8fd9e5SJoseph Huber     if (isOpenMPDevice(M)) {
505*fd8fd9e5SJoseph Huber       for (Function &F : M) {
506*fd8fd9e5SJoseph Huber         for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
507*fd8fd9e5SJoseph Huber           if (F.hasFnAttribute(Attribute::NoInline) &&
508*fd8fd9e5SJoseph Huber               F.getName().startswith(Prefix) &&
509*fd8fd9e5SJoseph Huber               !F.hasFnAttribute(Attribute::OptimizeNone))
510*fd8fd9e5SJoseph Huber             F.removeFnAttr(Attribute::NoInline);
511*fd8fd9e5SJoseph Huber       }
512*fd8fd9e5SJoseph Huber     }
513*fd8fd9e5SJoseph Huber 
5147cfd267cSsstefan1     // TODO: We should attach the attributes defined in OMPKinds.def.
5157cfd267cSsstefan1   }
516e8039ad4SJohannes Doerfert 
517e8039ad4SJohannes Doerfert   /// Collection of known kernels (\see Kernel) in the module.
51886cdff0eSEli Friedman   KernelSet &Kernels;
519eef6601bSJoseph Huber 
520eef6601bSJoseph Huber   /// Collection of known OpenMP runtime functions..
521eef6601bSJoseph Huber   DenseSet<const Function *> RTLFunctions;
5227cfd267cSsstefan1 };
5237cfd267cSsstefan1 
524d9659bf6SJohannes Doerfert template <typename Ty, bool InsertInvalidates = true>
5251a7f7790SShilei Tian struct BooleanStateWithSetVector : public BooleanState {
contains__anon23c38c770111::BooleanStateWithSetVector5261a7f7790SShilei Tian   bool contains(const Ty &Elem) const { return Set.contains(Elem); }
insert__anon23c38c770111::BooleanStateWithSetVector5271a7f7790SShilei Tian   bool insert(const Ty &Elem) {
528d9659bf6SJohannes Doerfert     if (InsertInvalidates)
529d9659bf6SJohannes Doerfert       BooleanState::indicatePessimisticFixpoint();
530d9659bf6SJohannes Doerfert     return Set.insert(Elem);
531d9659bf6SJohannes Doerfert   }
532d9659bf6SJohannes Doerfert 
operator []__anon23c38c770111::BooleanStateWithSetVector5331a7f7790SShilei Tian   const Ty &operator[](int Idx) const { return Set[Idx]; }
operator ==__anon23c38c770111::BooleanStateWithSetVector5341a7f7790SShilei Tian   bool operator==(const BooleanStateWithSetVector &RHS) const {
535d9659bf6SJohannes Doerfert     return BooleanState::operator==(RHS) && Set == RHS.Set;
536d9659bf6SJohannes Doerfert   }
operator !=__anon23c38c770111::BooleanStateWithSetVector5371a7f7790SShilei Tian   bool operator!=(const BooleanStateWithSetVector &RHS) const {
538d9659bf6SJohannes Doerfert     return !(*this == RHS);
539d9659bf6SJohannes Doerfert   }
540d9659bf6SJohannes Doerfert 
empty__anon23c38c770111::BooleanStateWithSetVector541d9659bf6SJohannes Doerfert   bool empty() const { return Set.empty(); }
size__anon23c38c770111::BooleanStateWithSetVector542d9659bf6SJohannes Doerfert   size_t size() const { return Set.size(); }
543d9659bf6SJohannes Doerfert 
544d9659bf6SJohannes Doerfert   /// "Clamp" this state with \p RHS.
operator ^=__anon23c38c770111::BooleanStateWithSetVector5451a7f7790SShilei Tian   BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
546d9659bf6SJohannes Doerfert     BooleanState::operator^=(RHS);
547d9659bf6SJohannes Doerfert     Set.insert(RHS.Set.begin(), RHS.Set.end());
548d9659bf6SJohannes Doerfert     return *this;
549d9659bf6SJohannes Doerfert   }
550d9659bf6SJohannes Doerfert 
551d9659bf6SJohannes Doerfert private:
552d9659bf6SJohannes Doerfert   /// A set to keep track of elements.
5531a7f7790SShilei Tian   SetVector<Ty> Set;
554d9659bf6SJohannes Doerfert 
555d9659bf6SJohannes Doerfert public:
begin__anon23c38c770111::BooleanStateWithSetVector556d9659bf6SJohannes Doerfert   typename decltype(Set)::iterator begin() { return Set.begin(); }
end__anon23c38c770111::BooleanStateWithSetVector557d9659bf6SJohannes Doerfert   typename decltype(Set)::iterator end() { return Set.end(); }
begin__anon23c38c770111::BooleanStateWithSetVector558d9659bf6SJohannes Doerfert   typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
end__anon23c38c770111::BooleanStateWithSetVector559d9659bf6SJohannes Doerfert   typename decltype(Set)::const_iterator end() const { return Set.end(); }
560d9659bf6SJohannes Doerfert };
561d9659bf6SJohannes Doerfert 
5621a7f7790SShilei Tian template <typename Ty, bool InsertInvalidates = true>
5631a7f7790SShilei Tian using BooleanStateWithPtrSetVector =
5641a7f7790SShilei Tian     BooleanStateWithSetVector<Ty *, InsertInvalidates>;
5651a7f7790SShilei Tian 
566d9659bf6SJohannes Doerfert struct KernelInfoState : AbstractState {
567d9659bf6SJohannes Doerfert   /// Flag to track if we reached a fixpoint.
568d9659bf6SJohannes Doerfert   bool IsAtFixpoint = false;
569d9659bf6SJohannes Doerfert 
570d9659bf6SJohannes Doerfert   /// The parallel regions (identified by the outlined parallel functions) that
571d9659bf6SJohannes Doerfert   /// can be reached from the associated function.
572d9659bf6SJohannes Doerfert   BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
573d9659bf6SJohannes Doerfert       ReachedKnownParallelRegions;
574d9659bf6SJohannes Doerfert 
575d9659bf6SJohannes Doerfert   /// State to track what parallel region we might reach.
576d9659bf6SJohannes Doerfert   BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
577d9659bf6SJohannes Doerfert 
578514c033dSJohannes Doerfert   /// State to track if we are in SPMD-mode, assumed or know, and why we decided
579e8439ec8SGiorgis Georgakoudis   /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
580e8439ec8SGiorgis Georgakoudis   /// false.
58129a3e3ddSGiorgis Georgakoudis   BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
582514c033dSJohannes Doerfert 
583d9659bf6SJohannes Doerfert   /// The __kmpc_target_init call in this kernel, if any. If we find more than
584d9659bf6SJohannes Doerfert   /// one we abort as the kernel is malformed.
585d9659bf6SJohannes Doerfert   CallBase *KernelInitCB = nullptr;
586d9659bf6SJohannes Doerfert 
587d9659bf6SJohannes Doerfert   /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
588d9659bf6SJohannes Doerfert   /// one we abort as the kernel is malformed.
589d9659bf6SJohannes Doerfert   CallBase *KernelDeinitCB = nullptr;
590d9659bf6SJohannes Doerfert 
591ca662297SShilei Tian   /// Flag to indicate if the associated function is a kernel entry.
592ca662297SShilei Tian   bool IsKernelEntry = false;
593ca662297SShilei Tian 
594ca662297SShilei Tian   /// State to track what kernel entries can reach the associated function.
595ca662297SShilei Tian   BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
596ca662297SShilei Tian 
597e97e0a4fSShilei Tian   /// State to indicate if we can track parallel level of the associated
598e97e0a4fSShilei Tian   /// function. We will give up tracking if we encounter unknown caller or the
599e97e0a4fSShilei Tian   /// caller is __kmpc_parallel_51.
600e97e0a4fSShilei Tian   BooleanStateWithSetVector<uint8_t> ParallelLevels;
601e97e0a4fSShilei Tian 
602d9659bf6SJohannes Doerfert   /// Abstract State interface
603d9659bf6SJohannes Doerfert   ///{
604d9659bf6SJohannes Doerfert 
6053a3cb929SKazu Hirata   KernelInfoState() = default;
KernelInfoState__anon23c38c770111::KernelInfoState606d9659bf6SJohannes Doerfert   KernelInfoState(bool BestState) {
607d9659bf6SJohannes Doerfert     if (!BestState)
608d9659bf6SJohannes Doerfert       indicatePessimisticFixpoint();
609d9659bf6SJohannes Doerfert   }
610d9659bf6SJohannes Doerfert 
611d9659bf6SJohannes Doerfert   /// See AbstractState::isValidState(...)
isValidState__anon23c38c770111::KernelInfoState612d9659bf6SJohannes Doerfert   bool isValidState() const override { return true; }
613d9659bf6SJohannes Doerfert 
614d9659bf6SJohannes Doerfert   /// See AbstractState::isAtFixpoint(...)
isAtFixpoint__anon23c38c770111::KernelInfoState615d9659bf6SJohannes Doerfert   bool isAtFixpoint() const override { return IsAtFixpoint; }
616d9659bf6SJohannes Doerfert 
617d9659bf6SJohannes Doerfert   /// See AbstractState::indicatePessimisticFixpoint(...)
indicatePessimisticFixpoint__anon23c38c770111::KernelInfoState618d9659bf6SJohannes Doerfert   ChangeStatus indicatePessimisticFixpoint() override {
619d9659bf6SJohannes Doerfert     IsAtFixpoint = true;
620c6457dcaSJohannes Doerfert     ReachingKernelEntries.indicatePessimisticFixpoint();
621514c033dSJohannes Doerfert     SPMDCompatibilityTracker.indicatePessimisticFixpoint();
622c6457dcaSJohannes Doerfert     ReachedKnownParallelRegions.indicatePessimisticFixpoint();
623d9659bf6SJohannes Doerfert     ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
624d9659bf6SJohannes Doerfert     return ChangeStatus::CHANGED;
625d9659bf6SJohannes Doerfert   }
626d9659bf6SJohannes Doerfert 
627d9659bf6SJohannes Doerfert   /// See AbstractState::indicateOptimisticFixpoint(...)
indicateOptimisticFixpoint__anon23c38c770111::KernelInfoState628d9659bf6SJohannes Doerfert   ChangeStatus indicateOptimisticFixpoint() override {
629d9659bf6SJohannes Doerfert     IsAtFixpoint = true;
630d61aac76SJohannes Doerfert     ReachingKernelEntries.indicateOptimisticFixpoint();
631d61aac76SJohannes Doerfert     SPMDCompatibilityTracker.indicateOptimisticFixpoint();
632d61aac76SJohannes Doerfert     ReachedKnownParallelRegions.indicateOptimisticFixpoint();
633d61aac76SJohannes Doerfert     ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
634d9659bf6SJohannes Doerfert     return ChangeStatus::UNCHANGED;
635d9659bf6SJohannes Doerfert   }
636d9659bf6SJohannes Doerfert 
637d9659bf6SJohannes Doerfert   /// Return the assumed state
getAssumed__anon23c38c770111::KernelInfoState638d9659bf6SJohannes Doerfert   KernelInfoState &getAssumed() { return *this; }
getAssumed__anon23c38c770111::KernelInfoState639d9659bf6SJohannes Doerfert   const KernelInfoState &getAssumed() const { return *this; }
640d9659bf6SJohannes Doerfert 
operator ==__anon23c38c770111::KernelInfoState641d9659bf6SJohannes Doerfert   bool operator==(const KernelInfoState &RHS) const {
642514c033dSJohannes Doerfert     if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
643514c033dSJohannes Doerfert       return false;
644d9659bf6SJohannes Doerfert     if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
645d9659bf6SJohannes Doerfert       return false;
646d9659bf6SJohannes Doerfert     if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
647d9659bf6SJohannes Doerfert       return false;
648ca662297SShilei Tian     if (ReachingKernelEntries != RHS.ReachingKernelEntries)
649ca662297SShilei Tian       return false;
650d9659bf6SJohannes Doerfert     return true;
651d9659bf6SJohannes Doerfert   }
652d9659bf6SJohannes Doerfert 
6536b9a3ec3SJoseph Huber   /// Returns true if this kernel contains any OpenMP parallel regions.
mayContainParallelRegion__anon23c38c770111::KernelInfoState6546b9a3ec3SJoseph Huber   bool mayContainParallelRegion() {
6556b9a3ec3SJoseph Huber     return !ReachedKnownParallelRegions.empty() ||
6566b9a3ec3SJoseph Huber            !ReachedUnknownParallelRegions.empty();
6576b9a3ec3SJoseph Huber   }
6586b9a3ec3SJoseph Huber 
659d9659bf6SJohannes Doerfert   /// Return empty set as the best state of potential values.
getBestState__anon23c38c770111::KernelInfoState660d9659bf6SJohannes Doerfert   static KernelInfoState getBestState() { return KernelInfoState(true); }
661d9659bf6SJohannes Doerfert 
getBestState__anon23c38c770111::KernelInfoState662d9659bf6SJohannes Doerfert   static KernelInfoState getBestState(KernelInfoState &KIS) {
663d9659bf6SJohannes Doerfert     return getBestState();
664d9659bf6SJohannes Doerfert   }
665d9659bf6SJohannes Doerfert 
666d9659bf6SJohannes Doerfert   /// Return full set as the worst state of potential values.
getWorstState__anon23c38c770111::KernelInfoState667d9659bf6SJohannes Doerfert   static KernelInfoState getWorstState() { return KernelInfoState(false); }
668d9659bf6SJohannes Doerfert 
669d9659bf6SJohannes Doerfert   /// "Clamp" this state with \p KIS.
operator ^=__anon23c38c770111::KernelInfoState670d9659bf6SJohannes Doerfert   KernelInfoState operator^=(const KernelInfoState &KIS) {
671d9659bf6SJohannes Doerfert     // Do not merge two different _init and _deinit call sites.
672d9659bf6SJohannes Doerfert     if (KIS.KernelInitCB) {
673d9659bf6SJohannes Doerfert       if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
674e6e440aeSJohannes Doerfert         llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
675e6e440aeSJohannes Doerfert                          "assumptions.");
676d9659bf6SJohannes Doerfert       KernelInitCB = KIS.KernelInitCB;
677d9659bf6SJohannes Doerfert     }
678d9659bf6SJohannes Doerfert     if (KIS.KernelDeinitCB) {
679d9659bf6SJohannes Doerfert       if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
680e6e440aeSJohannes Doerfert         llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
681e6e440aeSJohannes Doerfert                          "assumptions.");
682d9659bf6SJohannes Doerfert       KernelDeinitCB = KIS.KernelDeinitCB;
683d9659bf6SJohannes Doerfert     }
684514c033dSJohannes Doerfert     SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
685d9659bf6SJohannes Doerfert     ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
686d9659bf6SJohannes Doerfert     ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
687d9659bf6SJohannes Doerfert     return *this;
688d9659bf6SJohannes Doerfert   }
689d9659bf6SJohannes Doerfert 
operator &=__anon23c38c770111::KernelInfoState690d9659bf6SJohannes Doerfert   KernelInfoState operator&=(const KernelInfoState &KIS) {
691d9659bf6SJohannes Doerfert     return (*this ^= KIS);
692d9659bf6SJohannes Doerfert   }
693d9659bf6SJohannes Doerfert 
694d9659bf6SJohannes Doerfert   ///}
695d9659bf6SJohannes Doerfert };
696d9659bf6SJohannes Doerfert 
6978931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload
6988931add6SHamilton Tobon Mosquera /// array, to a vector in memory.
6998931add6SHamilton Tobon Mosquera struct OffloadArray {
7008931add6SHamilton Tobon Mosquera   /// Physical array (in the IR).
7018931add6SHamilton Tobon Mosquera   AllocaInst *Array = nullptr;
7028931add6SHamilton Tobon Mosquera   /// Mapped values.
7038931add6SHamilton Tobon Mosquera   SmallVector<Value *, 8> StoredValues;
7048931add6SHamilton Tobon Mosquera   /// Last stores made in the offload array.
7058931add6SHamilton Tobon Mosquera   SmallVector<StoreInst *, 8> LastAccesses;
7068931add6SHamilton Tobon Mosquera 
7078931add6SHamilton Tobon Mosquera   OffloadArray() = default;
7088931add6SHamilton Tobon Mosquera 
7098931add6SHamilton Tobon Mosquera   /// Initializes the OffloadArray with the values stored in \p Array before
7108931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached. Returns false if the initialization
7118931add6SHamilton Tobon Mosquera   /// fails.
7128931add6SHamilton Tobon Mosquera   /// This MUST be used immediately after the construction of the object.
initialize__anon23c38c770111::OffloadArray7138931add6SHamilton Tobon Mosquera   bool initialize(AllocaInst &Array, Instruction &Before) {
7148931add6SHamilton Tobon Mosquera     if (!Array.getAllocatedType()->isArrayTy())
7158931add6SHamilton Tobon Mosquera       return false;
7168931add6SHamilton Tobon Mosquera 
7178931add6SHamilton Tobon Mosquera     if (!getValues(Array, Before))
7188931add6SHamilton Tobon Mosquera       return false;
7198931add6SHamilton Tobon Mosquera 
7208931add6SHamilton Tobon Mosquera     this->Array = &Array;
7218931add6SHamilton Tobon Mosquera     return true;
7228931add6SHamilton Tobon Mosquera   }
7238931add6SHamilton Tobon Mosquera 
724da8bec47SJoseph Huber   static const unsigned DeviceIDArgNum = 1;
725da8bec47SJoseph Huber   static const unsigned BasePtrsArgNum = 3;
726da8bec47SJoseph Huber   static const unsigned PtrsArgNum = 4;
727da8bec47SJoseph Huber   static const unsigned SizesArgNum = 5;
7281d3d9b9cSHamilton Tobon Mosquera 
7298931add6SHamilton Tobon Mosquera private:
7308931add6SHamilton Tobon Mosquera   /// Traverses the BasicBlock where \p Array is, collecting the stores made to
7318931add6SHamilton Tobon Mosquera   /// \p Array, leaving StoredValues with the values stored before the
7328931add6SHamilton Tobon Mosquera   /// instruction \p Before is reached.
getValues__anon23c38c770111::OffloadArray7338931add6SHamilton Tobon Mosquera   bool getValues(AllocaInst &Array, Instruction &Before) {
7348931add6SHamilton Tobon Mosquera     // Initialize container.
735d08d490aSJohannes Doerfert     const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
7368931add6SHamilton Tobon Mosquera     StoredValues.assign(NumValues, nullptr);
7378931add6SHamilton Tobon Mosquera     LastAccesses.assign(NumValues, nullptr);
7388931add6SHamilton Tobon Mosquera 
7398931add6SHamilton Tobon Mosquera     // TODO: This assumes the instruction \p Before is in the same
7408931add6SHamilton Tobon Mosquera     //  BasicBlock as Array. Make it general, for any control flow graph.
7418931add6SHamilton Tobon Mosquera     BasicBlock *BB = Array.getParent();
7428931add6SHamilton Tobon Mosquera     if (BB != Before.getParent())
7438931add6SHamilton Tobon Mosquera       return false;
7448931add6SHamilton Tobon Mosquera 
7458931add6SHamilton Tobon Mosquera     const DataLayout &DL = Array.getModule()->getDataLayout();
7468931add6SHamilton Tobon Mosquera     const unsigned int PointerSize = DL.getPointerSize();
7478931add6SHamilton Tobon Mosquera 
7488931add6SHamilton Tobon Mosquera     for (Instruction &I : *BB) {
7498931add6SHamilton Tobon Mosquera       if (&I == &Before)
7508931add6SHamilton Tobon Mosquera         break;
7518931add6SHamilton Tobon Mosquera 
7528931add6SHamilton Tobon Mosquera       if (!isa<StoreInst>(&I))
7538931add6SHamilton Tobon Mosquera         continue;
7548931add6SHamilton Tobon Mosquera 
7558931add6SHamilton Tobon Mosquera       auto *S = cast<StoreInst>(&I);
7568931add6SHamilton Tobon Mosquera       int64_t Offset = -1;
757d08d490aSJohannes Doerfert       auto *Dst =
758d08d490aSJohannes Doerfert           GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
7598931add6SHamilton Tobon Mosquera       if (Dst == &Array) {
7608931add6SHamilton Tobon Mosquera         int64_t Idx = Offset / PointerSize;
7618931add6SHamilton Tobon Mosquera         StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
7628931add6SHamilton Tobon Mosquera         LastAccesses[Idx] = S;
7638931add6SHamilton Tobon Mosquera       }
7648931add6SHamilton Tobon Mosquera     }
7658931add6SHamilton Tobon Mosquera 
7668931add6SHamilton Tobon Mosquera     return isFilled();
7678931add6SHamilton Tobon Mosquera   }
7688931add6SHamilton Tobon Mosquera 
7698931add6SHamilton Tobon Mosquera   /// Returns true if all values in StoredValues and
7708931add6SHamilton Tobon Mosquera   /// LastAccesses are not nullptrs.
isFilled__anon23c38c770111::OffloadArray7718931add6SHamilton Tobon Mosquera   bool isFilled() {
7728931add6SHamilton Tobon Mosquera     const unsigned NumValues = StoredValues.size();
7738931add6SHamilton Tobon Mosquera     for (unsigned I = 0; I < NumValues; ++I) {
7748931add6SHamilton Tobon Mosquera       if (!StoredValues[I] || !LastAccesses[I])
7758931add6SHamilton Tobon Mosquera         return false;
7768931add6SHamilton Tobon Mosquera     }
7778931add6SHamilton Tobon Mosquera 
7788931add6SHamilton Tobon Mosquera     return true;
7798931add6SHamilton Tobon Mosquera   }
7808931add6SHamilton Tobon Mosquera };
7818931add6SHamilton Tobon Mosquera 
7827cfd267cSsstefan1 struct OpenMPOpt {
7837cfd267cSsstefan1 
7847cfd267cSsstefan1   using OptimizationRemarkGetter =
7857cfd267cSsstefan1       function_ref<OptimizationRemarkEmitter &(Function *)>;
7867cfd267cSsstefan1 
OpenMPOpt__anon23c38c770111::OpenMPOpt7877cfd267cSsstefan1   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
7887cfd267cSsstefan1             OptimizationRemarkGetter OREGetter,
789b8235d2bSsstefan1             OMPInformationCache &OMPInfoCache, Attributor &A)
79077b79d79SMehdi Amini       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
791b8235d2bSsstefan1         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
7927cfd267cSsstefan1 
793a2281419SJoseph Huber   /// Check if any remarks are enabled for openmp-opt
remarksEnabled__anon23c38c770111::OpenMPOpt794a2281419SJoseph Huber   bool remarksEnabled() {
795a2281419SJoseph Huber     auto &Ctx = M.getContext();
796a2281419SJoseph Huber     return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
797a2281419SJoseph Huber   }
798a2281419SJoseph Huber 
7999548b74aSJohannes Doerfert   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
run__anon23c38c770111::OpenMPOpt800b2ad63d3SJoseph Huber   bool run(bool IsModulePass) {
80154bd3751SJohannes Doerfert     if (SCC.empty())
80254bd3751SJohannes Doerfert       return false;
80354bd3751SJohannes Doerfert 
8049548b74aSJohannes Doerfert     bool Changed = false;
8059548b74aSJohannes Doerfert 
8069548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
80777b79d79SMehdi Amini                       << " functions in a slice with "
80877b79d79SMehdi Amini                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
8099548b74aSJohannes Doerfert 
810b2ad63d3SJoseph Huber     if (IsModulePass) {
811d9659bf6SJohannes Doerfert       Changed |= runAttributor(IsModulePass);
81218283125SJoseph Huber 
8136fc51c9fSJoseph Huber       // Recollect uses, in case Attributor deleted any.
8146fc51c9fSJoseph Huber       OMPInfoCache.recollectUses();
8156fc51c9fSJoseph Huber 
816be2b5696SJohannes Doerfert       // TODO: This should be folded into buildCustomStateMachine.
817be2b5696SJohannes Doerfert       Changed |= rewriteDeviceCodeStateMachine();
818be2b5696SJohannes Doerfert 
819b2ad63d3SJoseph Huber       if (remarksEnabled())
820b2ad63d3SJoseph Huber         analysisGlobalization();
8213c8a4c6fSJohannes Doerfert 
8223c8a4c6fSJohannes Doerfert       Changed |= eliminateBarriers();
823b2ad63d3SJoseph Huber     } else {
824e8039ad4SJohannes Doerfert       if (PrintICVValues)
825e8039ad4SJohannes Doerfert         printICVs();
826e8039ad4SJohannes Doerfert       if (PrintOpenMPKernels)
827e8039ad4SJohannes Doerfert         printKernels();
828e8039ad4SJohannes Doerfert 
829d9659bf6SJohannes Doerfert       Changed |= runAttributor(IsModulePass);
830e8039ad4SJohannes Doerfert 
831e8039ad4SJohannes Doerfert       // Recollect uses, in case Attributor deleted any.
832e8039ad4SJohannes Doerfert       OMPInfoCache.recollectUses();
833e8039ad4SJohannes Doerfert 
834e8039ad4SJohannes Doerfert       Changed |= deleteParallelRegions();
835d9659bf6SJohannes Doerfert 
836496f8e5bSHamilton Tobon Mosquera       if (HideMemoryTransferLatency)
837496f8e5bSHamilton Tobon Mosquera         Changed |= hideMemTransfersLatency();
8383a6bfcf2SGiorgis Georgakoudis       Changed |= deduplicateRuntimeCalls();
8393a6bfcf2SGiorgis Georgakoudis       if (EnableParallelRegionMerging) {
8403a6bfcf2SGiorgis Georgakoudis         if (mergeParallelRegions()) {
8413a6bfcf2SGiorgis Georgakoudis           deduplicateRuntimeCalls();
8423a6bfcf2SGiorgis Georgakoudis           Changed = true;
8433a6bfcf2SGiorgis Georgakoudis         }
8443a6bfcf2SGiorgis Georgakoudis       }
8453c8a4c6fSJohannes Doerfert 
8463c8a4c6fSJohannes Doerfert       Changed |= eliminateBarriers();
847b2ad63d3SJoseph Huber     }
848e8039ad4SJohannes Doerfert 
849e8039ad4SJohannes Doerfert     return Changed;
850e8039ad4SJohannes Doerfert   }
851e8039ad4SJohannes Doerfert 
8520f426935Ssstefan1   /// Print initial ICV values for testing.
8530f426935Ssstefan1   /// FIXME: This should be done from the Attributor once it is added.
printICVs__anon23c38c770111::OpenMPOpt854e8039ad4SJohannes Doerfert   void printICVs() const {
855cb9cfa0dSsstefan1     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
856cb9cfa0dSsstefan1                                  ICV_proc_bind};
8570f426935Ssstefan1 
8580f426935Ssstefan1     for (Function *F : OMPInfoCache.ModuleSlice) {
8590f426935Ssstefan1       for (auto ICV : ICVs) {
8600f426935Ssstefan1         auto ICVInfo = OMPInfoCache.ICVs[ICV];
8612db182ffSJoseph Huber         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
8622db182ffSJoseph Huber           return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
8630f426935Ssstefan1                      << " Value: "
8640f426935Ssstefan1                      << (ICVInfo.InitValue
86561cdaf66SSimon Pilgrim                              ? toString(ICVInfo.InitValue->getValue(), 10, true)
8660f426935Ssstefan1                              : "IMPLEMENTATION_DEFINED");
8670f426935Ssstefan1         };
8680f426935Ssstefan1 
8692db182ffSJoseph Huber         emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
8700f426935Ssstefan1       }
8710f426935Ssstefan1     }
8720f426935Ssstefan1   }
8730f426935Ssstefan1 
874e8039ad4SJohannes Doerfert   /// Print OpenMP GPU kernels for testing.
printKernels__anon23c38c770111::OpenMPOpt875e8039ad4SJohannes Doerfert   void printKernels() const {
876e8039ad4SJohannes Doerfert     for (Function *F : SCC) {
877e8039ad4SJohannes Doerfert       if (!OMPInfoCache.Kernels.count(F))
878e8039ad4SJohannes Doerfert         continue;
879b8235d2bSsstefan1 
8802db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
8812db182ffSJoseph Huber         return ORA << "OpenMP GPU kernel "
882e8039ad4SJohannes Doerfert                    << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
883e8039ad4SJohannes Doerfert       };
884b8235d2bSsstefan1 
8852db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
886e8039ad4SJohannes Doerfert     }
8879548b74aSJohannes Doerfert   }
8889548b74aSJohannes Doerfert 
8897cfd267cSsstefan1   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
8907cfd267cSsstefan1   /// given it has to be the callee or a nullptr is returned.
getCallIfRegularCall__anon23c38c770111::OpenMPOpt8917cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
8927cfd267cSsstefan1       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
8937cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(U.getUser());
8947cfd267cSsstefan1     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
895c4b1fe05SJohannes Doerfert         (!RFI ||
896c4b1fe05SJohannes Doerfert          (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
8977cfd267cSsstefan1       return CI;
8987cfd267cSsstefan1     return nullptr;
8997cfd267cSsstefan1   }
9007cfd267cSsstefan1 
9017cfd267cSsstefan1   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
9027cfd267cSsstefan1   /// the callee or a nullptr is returned.
getCallIfRegularCall__anon23c38c770111::OpenMPOpt9037cfd267cSsstefan1   static CallInst *getCallIfRegularCall(
9047cfd267cSsstefan1       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
9057cfd267cSsstefan1     CallInst *CI = dyn_cast<CallInst>(&V);
9067cfd267cSsstefan1     if (CI && !CI->hasOperandBundles() &&
907c4b1fe05SJohannes Doerfert         (!RFI ||
908c4b1fe05SJohannes Doerfert          (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
9097cfd267cSsstefan1       return CI;
9107cfd267cSsstefan1     return nullptr;
9117cfd267cSsstefan1   }
9127cfd267cSsstefan1 
9139548b74aSJohannes Doerfert private:
9143a6bfcf2SGiorgis Georgakoudis   /// Merge parallel regions when it is safe.
mergeParallelRegions__anon23c38c770111::OpenMPOpt9153a6bfcf2SGiorgis Georgakoudis   bool mergeParallelRegions() {
9163a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackCalleeOperand = 2;
9173a6bfcf2SGiorgis Georgakoudis     const unsigned CallbackFirstArgOperand = 3;
9183a6bfcf2SGiorgis Georgakoudis     using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
9193a6bfcf2SGiorgis Georgakoudis 
9203a6bfcf2SGiorgis Georgakoudis     // Check if there are any __kmpc_fork_call calls to merge.
9213a6bfcf2SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo &RFI =
9223a6bfcf2SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
9233a6bfcf2SGiorgis Georgakoudis 
9243a6bfcf2SGiorgis Georgakoudis     if (!RFI.Declaration)
9253a6bfcf2SGiorgis Georgakoudis       return false;
9263a6bfcf2SGiorgis Georgakoudis 
92797517055SGiorgis Georgakoudis     // Unmergable calls that prevent merging a parallel region.
92897517055SGiorgis Georgakoudis     OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
92997517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
93097517055SGiorgis Georgakoudis         OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
93197517055SGiorgis Georgakoudis     };
9323a6bfcf2SGiorgis Georgakoudis 
9333a6bfcf2SGiorgis Georgakoudis     bool Changed = false;
9343a6bfcf2SGiorgis Georgakoudis     LoopInfo *LI = nullptr;
9353a6bfcf2SGiorgis Georgakoudis     DominatorTree *DT = nullptr;
9363a6bfcf2SGiorgis Georgakoudis 
9373a6bfcf2SGiorgis Georgakoudis     SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
9383a6bfcf2SGiorgis Georgakoudis 
9393a6bfcf2SGiorgis Georgakoudis     BasicBlock *StartBB = nullptr, *EndBB = nullptr;
940ff289feeSMichael Kruse     auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
9413a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGStartBB = CodeGenIP.getBlock();
9423a6bfcf2SGiorgis Georgakoudis       BasicBlock *CGEndBB =
9433a6bfcf2SGiorgis Georgakoudis           SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
9443a6bfcf2SGiorgis Georgakoudis       assert(StartBB != nullptr && "StartBB should not be null");
9453a6bfcf2SGiorgis Georgakoudis       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
9463a6bfcf2SGiorgis Georgakoudis       assert(EndBB != nullptr && "EndBB should not be null");
9473a6bfcf2SGiorgis Georgakoudis       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
9483a6bfcf2SGiorgis Georgakoudis     };
9493a6bfcf2SGiorgis Georgakoudis 
950240dd924SAlex Zinenko     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
951240dd924SAlex Zinenko                       Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
952240dd924SAlex Zinenko       ReplacementValue = &Inner;
9533a6bfcf2SGiorgis Georgakoudis       return CodeGenIP;
9543a6bfcf2SGiorgis Georgakoudis     };
9553a6bfcf2SGiorgis Georgakoudis 
9563a6bfcf2SGiorgis Georgakoudis     auto FiniCB = [&](InsertPointTy CodeGenIP) {};
9573a6bfcf2SGiorgis Georgakoudis 
95897517055SGiorgis Georgakoudis     /// Create a sequential execution region within a merged parallel region,
95997517055SGiorgis Georgakoudis     /// encapsulated in a master construct with a barrier for synchronization.
96097517055SGiorgis Georgakoudis     auto CreateSequentialRegion = [&](Function *OuterFn,
96197517055SGiorgis Georgakoudis                                       BasicBlock *OuterPredBB,
96297517055SGiorgis Georgakoudis                                       Instruction *SeqStartI,
96397517055SGiorgis Georgakoudis                                       Instruction *SeqEndI) {
96497517055SGiorgis Georgakoudis       // Isolate the instructions of the sequential region to a separate
96597517055SGiorgis Georgakoudis       // block.
96697517055SGiorgis Georgakoudis       BasicBlock *ParentBB = SeqStartI->getParent();
96797517055SGiorgis Georgakoudis       BasicBlock *SeqEndBB =
96897517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
96997517055SGiorgis Georgakoudis       BasicBlock *SeqAfterBB =
97097517055SGiorgis Georgakoudis           SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
97197517055SGiorgis Georgakoudis       BasicBlock *SeqStartBB =
97297517055SGiorgis Georgakoudis           SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
97397517055SGiorgis Georgakoudis 
97497517055SGiorgis Georgakoudis       assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
97597517055SGiorgis Georgakoudis              "Expected a different CFG");
97697517055SGiorgis Georgakoudis       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
97797517055SGiorgis Georgakoudis       ParentBB->getTerminator()->eraseFromParent();
97897517055SGiorgis Georgakoudis 
979ff289feeSMichael Kruse       auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
98097517055SGiorgis Georgakoudis         BasicBlock *CGStartBB = CodeGenIP.getBlock();
98197517055SGiorgis Georgakoudis         BasicBlock *CGEndBB =
98297517055SGiorgis Georgakoudis             SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
98397517055SGiorgis Georgakoudis         assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
98497517055SGiorgis Georgakoudis         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
98597517055SGiorgis Georgakoudis         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
98697517055SGiorgis Georgakoudis         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
98797517055SGiorgis Georgakoudis       };
98897517055SGiorgis Georgakoudis       auto FiniCB = [&](InsertPointTy CodeGenIP) {};
98997517055SGiorgis Georgakoudis 
99097517055SGiorgis Georgakoudis       // Find outputs from the sequential region to outside users and
99197517055SGiorgis Georgakoudis       // broadcast their values to them.
99297517055SGiorgis Georgakoudis       for (Instruction &I : *SeqStartBB) {
99397517055SGiorgis Georgakoudis         SmallPtrSet<Instruction *, 4> OutsideUsers;
99497517055SGiorgis Georgakoudis         for (User *Usr : I.users()) {
99597517055SGiorgis Georgakoudis           Instruction &UsrI = *cast<Instruction>(Usr);
99697517055SGiorgis Georgakoudis           // Ignore outputs to LT intrinsics, code extraction for the merged
99797517055SGiorgis Georgakoudis           // parallel region will fix them.
99897517055SGiorgis Georgakoudis           if (UsrI.isLifetimeStartOrEnd())
99997517055SGiorgis Georgakoudis             continue;
100097517055SGiorgis Georgakoudis 
100197517055SGiorgis Georgakoudis           if (UsrI.getParent() != SeqStartBB)
100297517055SGiorgis Georgakoudis             OutsideUsers.insert(&UsrI);
100397517055SGiorgis Georgakoudis         }
100497517055SGiorgis Georgakoudis 
100597517055SGiorgis Georgakoudis         if (OutsideUsers.empty())
100697517055SGiorgis Georgakoudis           continue;
100797517055SGiorgis Georgakoudis 
100897517055SGiorgis Georgakoudis         // Emit an alloca in the outer region to store the broadcasted
100997517055SGiorgis Georgakoudis         // value.
101097517055SGiorgis Georgakoudis         const DataLayout &DL = M.getDataLayout();
101197517055SGiorgis Georgakoudis         AllocaInst *AllocaI = new AllocaInst(
101297517055SGiorgis Georgakoudis             I.getType(), DL.getAllocaAddrSpace(), nullptr,
101397517055SGiorgis Georgakoudis             I.getName() + ".seq.output.alloc", &OuterFn->front().front());
101497517055SGiorgis Georgakoudis 
101597517055SGiorgis Georgakoudis         // Emit a store instruction in the sequential BB to update the
101697517055SGiorgis Georgakoudis         // value.
101797517055SGiorgis Georgakoudis         new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
101897517055SGiorgis Georgakoudis 
101997517055SGiorgis Georgakoudis         // Emit a load instruction and replace the use of the output value
102097517055SGiorgis Georgakoudis         // with it.
102197517055SGiorgis Georgakoudis         for (Instruction *UsrI : OutsideUsers) {
10225b70c12fSJohannes Doerfert           LoadInst *LoadI = new LoadInst(
10235b70c12fSJohannes Doerfert               I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
102497517055SGiorgis Georgakoudis           UsrI->replaceUsesOfWith(&I, LoadI);
102597517055SGiorgis Georgakoudis         }
102697517055SGiorgis Georgakoudis       }
102797517055SGiorgis Georgakoudis 
102897517055SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(
102997517055SGiorgis Georgakoudis           InsertPointTy(ParentBB, ParentBB->end()), DL);
103097517055SGiorgis Georgakoudis       InsertPointTy SeqAfterIP =
103197517055SGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
103297517055SGiorgis Georgakoudis 
103397517055SGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
103497517055SGiorgis Georgakoudis 
103597517055SGiorgis Georgakoudis       BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
103697517055SGiorgis Georgakoudis 
103797517055SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
103897517055SGiorgis Georgakoudis                         << "\n");
103997517055SGiorgis Georgakoudis     };
104097517055SGiorgis Georgakoudis 
10413a6bfcf2SGiorgis Georgakoudis     // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
10423a6bfcf2SGiorgis Georgakoudis     // contained in BB and only separated by instructions that can be
10433a6bfcf2SGiorgis Georgakoudis     // redundantly executed in parallel. The block BB is split before the first
10443a6bfcf2SGiorgis Georgakoudis     // call (in MergableCIs) and after the last so the entire region we merge
10453a6bfcf2SGiorgis Georgakoudis     // into a single parallel region is contained in a single basic block
10463a6bfcf2SGiorgis Georgakoudis     // without any other instructions. We use the OpenMPIRBuilder to outline
10473a6bfcf2SGiorgis Georgakoudis     // that block and call the resulting function via __kmpc_fork_call.
10489e7a2bfcSNikita Popov     auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
10499e7a2bfcSNikita Popov                      BasicBlock *BB) {
10503a6bfcf2SGiorgis Georgakoudis       // TODO: Change the interface to allow single CIs expanded, e.g, to
10513a6bfcf2SGiorgis Georgakoudis       // include an outer loop.
10523a6bfcf2SGiorgis Georgakoudis       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
10533a6bfcf2SGiorgis Georgakoudis 
10543a6bfcf2SGiorgis Georgakoudis       auto Remark = [&](OptimizationRemark OR) {
1055eef6601bSJoseph Huber         OR << "Parallel region merged with parallel region"
1056eef6601bSJoseph Huber            << (MergableCIs.size() > 2 ? "s" : "") << " at ";
105723b0ab2aSKazu Hirata         for (auto *CI : llvm::drop_begin(MergableCIs)) {
10583a6bfcf2SGiorgis Georgakoudis           OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
10593a6bfcf2SGiorgis Georgakoudis           if (CI != MergableCIs.back())
10603a6bfcf2SGiorgis Georgakoudis             OR << ", ";
10613a6bfcf2SGiorgis Georgakoudis         }
1062eef6601bSJoseph Huber         return OR << ".";
10633a6bfcf2SGiorgis Georgakoudis       };
10643a6bfcf2SGiorgis Georgakoudis 
10652c31d5ebSJoseph Huber       emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
10663a6bfcf2SGiorgis Georgakoudis 
10673a6bfcf2SGiorgis Georgakoudis       Function *OriginalFn = BB->getParent();
10683a6bfcf2SGiorgis Georgakoudis       LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
10693a6bfcf2SGiorgis Georgakoudis                         << " parallel regions in " << OriginalFn->getName()
10703a6bfcf2SGiorgis Georgakoudis                         << "\n");
10713a6bfcf2SGiorgis Georgakoudis 
10723a6bfcf2SGiorgis Georgakoudis       // Isolate the calls to merge in a separate block.
10733a6bfcf2SGiorgis Georgakoudis       EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
10743a6bfcf2SGiorgis Georgakoudis       BasicBlock *AfterBB =
10753a6bfcf2SGiorgis Georgakoudis           SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
10763a6bfcf2SGiorgis Georgakoudis       StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
10773a6bfcf2SGiorgis Georgakoudis                            "omp.par.merged");
10783a6bfcf2SGiorgis Georgakoudis 
10793a6bfcf2SGiorgis Georgakoudis       assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
10803a6bfcf2SGiorgis Georgakoudis       const DebugLoc DL = BB->getTerminator()->getDebugLoc();
10813a6bfcf2SGiorgis Georgakoudis       BB->getTerminator()->eraseFromParent();
10823a6bfcf2SGiorgis Georgakoudis 
108397517055SGiorgis Georgakoudis       // Create sequential regions for sequential instructions that are
108497517055SGiorgis Georgakoudis       // in-between mergable parallel regions.
108597517055SGiorgis Georgakoudis       for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
108697517055SGiorgis Georgakoudis            It != End; ++It) {
108797517055SGiorgis Georgakoudis         Instruction *ForkCI = *It;
108897517055SGiorgis Georgakoudis         Instruction *NextForkCI = *(It + 1);
108997517055SGiorgis Georgakoudis 
109097517055SGiorgis Georgakoudis         // Continue if there are not in-between instructions.
109197517055SGiorgis Georgakoudis         if (ForkCI->getNextNode() == NextForkCI)
109297517055SGiorgis Georgakoudis           continue;
109397517055SGiorgis Georgakoudis 
109497517055SGiorgis Georgakoudis         CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
109597517055SGiorgis Georgakoudis                                NextForkCI->getPrevNode());
109697517055SGiorgis Georgakoudis       }
109797517055SGiorgis Georgakoudis 
10983a6bfcf2SGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
10993a6bfcf2SGiorgis Georgakoudis                                                DL);
11003a6bfcf2SGiorgis Georgakoudis       IRBuilder<>::InsertPoint AllocaIP(
11013a6bfcf2SGiorgis Georgakoudis           &OriginalFn->getEntryBlock(),
11023a6bfcf2SGiorgis Georgakoudis           OriginalFn->getEntryBlock().getFirstInsertionPt());
11033a6bfcf2SGiorgis Georgakoudis       // Create the merged parallel region with default proc binding, to
11043a6bfcf2SGiorgis Georgakoudis       // avoid overriding binding settings, and without explicit cancellation.
1105e5dba2d7SMichael Kruse       InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
11063a6bfcf2SGiorgis Georgakoudis           Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
11073a6bfcf2SGiorgis Georgakoudis           OMP_PROC_BIND_default, /* IsCancellable */ false);
11083a6bfcf2SGiorgis Georgakoudis       BranchInst::Create(AfterBB, AfterIP.getBlock());
11093a6bfcf2SGiorgis Georgakoudis 
11103a6bfcf2SGiorgis Georgakoudis       // Perform the actual outlining.
11117cb4c261SGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.finalize(OriginalFn);
11123a6bfcf2SGiorgis Georgakoudis 
11133a6bfcf2SGiorgis Georgakoudis       Function *OutlinedFn = MergableCIs.front()->getCaller();
11143a6bfcf2SGiorgis Georgakoudis 
11153a6bfcf2SGiorgis Georgakoudis       // Replace the __kmpc_fork_call calls with direct calls to the outlined
11163a6bfcf2SGiorgis Georgakoudis       // callbacks.
11173a6bfcf2SGiorgis Georgakoudis       SmallVector<Value *, 8> Args;
11183a6bfcf2SGiorgis Georgakoudis       for (auto *CI : MergableCIs) {
1119875782bdSNikita Popov         Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
1120875782bdSNikita Popov         FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
11213a6bfcf2SGiorgis Georgakoudis         Args.clear();
11223a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(0));
11233a6bfcf2SGiorgis Georgakoudis         Args.push_back(OutlinedFn->getArg(1));
11244f0225f6SKazu Hirata         for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
11254f0225f6SKazu Hirata              ++U)
11263a6bfcf2SGiorgis Georgakoudis           Args.push_back(CI->getArgOperand(U));
11273a6bfcf2SGiorgis Georgakoudis 
11283a6bfcf2SGiorgis Georgakoudis         CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
11293a6bfcf2SGiorgis Georgakoudis         if (CI->getDebugLoc())
11303a6bfcf2SGiorgis Georgakoudis           NewCI->setDebugLoc(CI->getDebugLoc());
11313a6bfcf2SGiorgis Georgakoudis 
11323a6bfcf2SGiorgis Georgakoudis         // Forward parameter attributes from the callback to the callee.
11334f0225f6SKazu Hirata         for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
11344f0225f6SKazu Hirata              ++U)
113580ea2bb5SArthur Eubanks           for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
11363a6bfcf2SGiorgis Georgakoudis             NewCI->addParamAttr(
11373a6bfcf2SGiorgis Georgakoudis                 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
11383a6bfcf2SGiorgis Georgakoudis 
11393a6bfcf2SGiorgis Georgakoudis         // Emit an explicit barrier to replace the implicit fork-join barrier.
11403a6bfcf2SGiorgis Georgakoudis         if (CI != MergableCIs.back()) {
11413a6bfcf2SGiorgis Georgakoudis           // TODO: Remove barrier if the merged parallel region includes the
11423a6bfcf2SGiorgis Georgakoudis           // 'nowait' clause.
1143e5dba2d7SMichael Kruse           OMPInfoCache.OMPBuilder.createBarrier(
11443a6bfcf2SGiorgis Georgakoudis               InsertPointTy(NewCI->getParent(),
11453a6bfcf2SGiorgis Georgakoudis                             NewCI->getNextNode()->getIterator()),
11463a6bfcf2SGiorgis Georgakoudis               OMPD_parallel);
11473a6bfcf2SGiorgis Georgakoudis         }
11483a6bfcf2SGiorgis Georgakoudis 
11493a6bfcf2SGiorgis Georgakoudis         CI->eraseFromParent();
11503a6bfcf2SGiorgis Georgakoudis       }
11513a6bfcf2SGiorgis Georgakoudis 
11523a6bfcf2SGiorgis Georgakoudis       assert(OutlinedFn != OriginalFn && "Outlining failed");
11537fea561eSArthur Eubanks       CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
11543a6bfcf2SGiorgis Georgakoudis       CGUpdater.reanalyzeFunction(*OriginalFn);
11553a6bfcf2SGiorgis Georgakoudis 
11563a6bfcf2SGiorgis Georgakoudis       NumOpenMPParallelRegionsMerged += MergableCIs.size();
11573a6bfcf2SGiorgis Georgakoudis 
11583a6bfcf2SGiorgis Georgakoudis       return true;
11593a6bfcf2SGiorgis Georgakoudis     };
11603a6bfcf2SGiorgis Georgakoudis 
11613a6bfcf2SGiorgis Georgakoudis     // Helper function that identifes sequences of
11623a6bfcf2SGiorgis Georgakoudis     // __kmpc_fork_call uses in a basic block.
11633a6bfcf2SGiorgis Georgakoudis     auto DetectPRsCB = [&](Use &U, Function &F) {
11643a6bfcf2SGiorgis Georgakoudis       CallInst *CI = getCallIfRegularCall(U, &RFI);
11653a6bfcf2SGiorgis Georgakoudis       BB2PRMap[CI->getParent()].insert(CI);
11663a6bfcf2SGiorgis Georgakoudis 
11673a6bfcf2SGiorgis Georgakoudis       return false;
11683a6bfcf2SGiorgis Georgakoudis     };
11693a6bfcf2SGiorgis Georgakoudis 
11703a6bfcf2SGiorgis Georgakoudis     BB2PRMap.clear();
11713a6bfcf2SGiorgis Georgakoudis     RFI.foreachUse(SCC, DetectPRsCB);
11723a6bfcf2SGiorgis Georgakoudis     SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
11733a6bfcf2SGiorgis Georgakoudis     // Find mergable parallel regions within a basic block that are
11743a6bfcf2SGiorgis Georgakoudis     // safe to merge, that is any in-between instructions can safely
11753a6bfcf2SGiorgis Georgakoudis     // execute in parallel after merging.
11763a6bfcf2SGiorgis Georgakoudis     // TODO: support merging across basic-blocks.
11773a6bfcf2SGiorgis Georgakoudis     for (auto &It : BB2PRMap) {
11783a6bfcf2SGiorgis Georgakoudis       auto &CIs = It.getSecond();
11793a6bfcf2SGiorgis Georgakoudis       if (CIs.size() < 2)
11803a6bfcf2SGiorgis Georgakoudis         continue;
11813a6bfcf2SGiorgis Georgakoudis 
11823a6bfcf2SGiorgis Georgakoudis       BasicBlock *BB = It.getFirst();
11833a6bfcf2SGiorgis Georgakoudis       SmallVector<CallInst *, 4> MergableCIs;
11843a6bfcf2SGiorgis Georgakoudis 
118597517055SGiorgis Georgakoudis       /// Returns true if the instruction is mergable, false otherwise.
118697517055SGiorgis Georgakoudis       /// A terminator instruction is unmergable by definition since merging
118797517055SGiorgis Georgakoudis       /// works within a BB. Instructions before the mergable region are
118897517055SGiorgis Georgakoudis       /// mergable if they are not calls to OpenMP runtime functions that may
118997517055SGiorgis Georgakoudis       /// set different execution parameters for subsequent parallel regions.
119097517055SGiorgis Georgakoudis       /// Instructions in-between parallel regions are mergable if they are not
119197517055SGiorgis Georgakoudis       /// calls to any non-intrinsic function since that may call a non-mergable
119297517055SGiorgis Georgakoudis       /// OpenMP runtime function.
119397517055SGiorgis Georgakoudis       auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
119497517055SGiorgis Georgakoudis         // We do not merge across BBs, hence return false (unmergable) if the
119597517055SGiorgis Georgakoudis         // instruction is a terminator.
119697517055SGiorgis Georgakoudis         if (I.isTerminator())
119797517055SGiorgis Georgakoudis           return false;
119897517055SGiorgis Georgakoudis 
119997517055SGiorgis Georgakoudis         if (!isa<CallInst>(&I))
120097517055SGiorgis Georgakoudis           return true;
120197517055SGiorgis Georgakoudis 
120297517055SGiorgis Georgakoudis         CallInst *CI = cast<CallInst>(&I);
120397517055SGiorgis Georgakoudis         if (IsBeforeMergableRegion) {
120497517055SGiorgis Georgakoudis           Function *CalledFunction = CI->getCalledFunction();
120597517055SGiorgis Georgakoudis           if (!CalledFunction)
120697517055SGiorgis Georgakoudis             return false;
120797517055SGiorgis Georgakoudis           // Return false (unmergable) if the call before the parallel
120897517055SGiorgis Georgakoudis           // region calls an explicit affinity (proc_bind) or number of
120997517055SGiorgis Georgakoudis           // threads (num_threads) compiler-generated function. Those settings
121097517055SGiorgis Georgakoudis           // may be incompatible with following parallel regions.
121197517055SGiorgis Georgakoudis           // TODO: ICV tracking to detect compatibility.
121297517055SGiorgis Georgakoudis           for (const auto &RFI : UnmergableCallsInfo) {
121397517055SGiorgis Georgakoudis             if (CalledFunction == RFI.Declaration)
121497517055SGiorgis Georgakoudis               return false;
121597517055SGiorgis Georgakoudis           }
121697517055SGiorgis Georgakoudis         } else {
121797517055SGiorgis Georgakoudis           // Return false (unmergable) if there is a call instruction
121897517055SGiorgis Georgakoudis           // in-between parallel regions when it is not an intrinsic. It
121997517055SGiorgis Georgakoudis           // may call an unmergable OpenMP runtime function in its callpath.
122097517055SGiorgis Georgakoudis           // TODO: Keep track of possible OpenMP calls in the callpath.
122197517055SGiorgis Georgakoudis           if (!isa<IntrinsicInst>(CI))
122297517055SGiorgis Georgakoudis             return false;
122397517055SGiorgis Georgakoudis         }
122497517055SGiorgis Georgakoudis 
122597517055SGiorgis Georgakoudis         return true;
122697517055SGiorgis Georgakoudis       };
12273a6bfcf2SGiorgis Georgakoudis       // Find maximal number of parallel region CIs that are safe to merge.
122897517055SGiorgis Georgakoudis       for (auto It = BB->begin(), End = BB->end(); It != End;) {
122997517055SGiorgis Georgakoudis         Instruction &I = *It;
123097517055SGiorgis Georgakoudis         ++It;
123197517055SGiorgis Georgakoudis 
12323a6bfcf2SGiorgis Georgakoudis         if (CIs.count(&I)) {
12333a6bfcf2SGiorgis Georgakoudis           MergableCIs.push_back(cast<CallInst>(&I));
12343a6bfcf2SGiorgis Georgakoudis           continue;
12353a6bfcf2SGiorgis Georgakoudis         }
12363a6bfcf2SGiorgis Georgakoudis 
123797517055SGiorgis Georgakoudis         // Continue expanding if the instruction is mergable.
123897517055SGiorgis Georgakoudis         if (IsMergable(I, MergableCIs.empty()))
12393a6bfcf2SGiorgis Georgakoudis           continue;
12403a6bfcf2SGiorgis Georgakoudis 
124197517055SGiorgis Georgakoudis         // Forward the instruction iterator to skip the next parallel region
124297517055SGiorgis Georgakoudis         // since there is an unmergable instruction which can affect it.
124397517055SGiorgis Georgakoudis         for (; It != End; ++It) {
124497517055SGiorgis Georgakoudis           Instruction &SkipI = *It;
124597517055SGiorgis Georgakoudis           if (CIs.count(&SkipI)) {
124697517055SGiorgis Georgakoudis             LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
124797517055SGiorgis Georgakoudis                               << " due to " << I << "\n");
124897517055SGiorgis Georgakoudis             ++It;
124997517055SGiorgis Georgakoudis             break;
125097517055SGiorgis Georgakoudis           }
125197517055SGiorgis Georgakoudis         }
125297517055SGiorgis Georgakoudis 
125397517055SGiorgis Georgakoudis         // Store mergable regions found.
12543a6bfcf2SGiorgis Georgakoudis         if (MergableCIs.size() > 1) {
12553a6bfcf2SGiorgis Georgakoudis           MergableCIsVector.push_back(MergableCIs);
12563a6bfcf2SGiorgis Georgakoudis           LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
12573a6bfcf2SGiorgis Georgakoudis                             << " parallel regions in block " << BB->getName()
12583a6bfcf2SGiorgis Georgakoudis                             << " of function " << BB->getParent()->getName()
12593a6bfcf2SGiorgis Georgakoudis                             << "\n";);
12603a6bfcf2SGiorgis Georgakoudis         }
12613a6bfcf2SGiorgis Georgakoudis 
12623a6bfcf2SGiorgis Georgakoudis         MergableCIs.clear();
12633a6bfcf2SGiorgis Georgakoudis       }
12643a6bfcf2SGiorgis Georgakoudis 
12653a6bfcf2SGiorgis Georgakoudis       if (!MergableCIsVector.empty()) {
12663a6bfcf2SGiorgis Georgakoudis         Changed = true;
12673a6bfcf2SGiorgis Georgakoudis 
12683a6bfcf2SGiorgis Georgakoudis         for (auto &MergableCIs : MergableCIsVector)
12693a6bfcf2SGiorgis Georgakoudis           Merge(MergableCIs, BB);
1270b2ad63d3SJoseph Huber         MergableCIsVector.clear();
12713a6bfcf2SGiorgis Georgakoudis       }
12723a6bfcf2SGiorgis Georgakoudis     }
12733a6bfcf2SGiorgis Georgakoudis 
12743a6bfcf2SGiorgis Georgakoudis     if (Changed) {
127597517055SGiorgis Georgakoudis       /// Re-collect use for fork calls, emitted barrier calls, and
127697517055SGiorgis Georgakoudis       /// any emitted master/end_master calls.
127797517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
127897517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
127997517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
128097517055SGiorgis Georgakoudis       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
12813a6bfcf2SGiorgis Georgakoudis     }
12823a6bfcf2SGiorgis Georgakoudis 
12833a6bfcf2SGiorgis Georgakoudis     return Changed;
12843a6bfcf2SGiorgis Georgakoudis   }
12853a6bfcf2SGiorgis Georgakoudis 
12869d38f98dSJohannes Doerfert   /// Try to delete parallel regions if possible.
deleteParallelRegions__anon23c38c770111::OpenMPOpt1287e565db49SJohannes Doerfert   bool deleteParallelRegions() {
1288e565db49SJohannes Doerfert     const unsigned CallbackCalleeOperand = 2;
1289e565db49SJohannes Doerfert 
12907cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &RFI =
12917cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
12927cfd267cSsstefan1 
1293e565db49SJohannes Doerfert     if (!RFI.Declaration)
1294e565db49SJohannes Doerfert       return false;
1295e565db49SJohannes Doerfert 
1296e565db49SJohannes Doerfert     bool Changed = false;
1297e565db49SJohannes Doerfert     auto DeleteCallCB = [&](Use &U, Function &) {
1298e565db49SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U);
1299e565db49SJohannes Doerfert       if (!CI)
1300e565db49SJohannes Doerfert         return false;
1301e565db49SJohannes Doerfert       auto *Fn = dyn_cast<Function>(
1302e565db49SJohannes Doerfert           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1303e565db49SJohannes Doerfert       if (!Fn)
1304e565db49SJohannes Doerfert         return false;
1305e565db49SJohannes Doerfert       if (!Fn->onlyReadsMemory())
1306e565db49SJohannes Doerfert         return false;
1307e565db49SJohannes Doerfert       if (!Fn->hasFnAttribute(Attribute::WillReturn))
1308e565db49SJohannes Doerfert         return false;
1309e565db49SJohannes Doerfert 
1310e565db49SJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1311e565db49SJohannes Doerfert                         << CI->getCaller()->getName() << "\n");
13124d4ea9acSHuber, Joseph 
13134d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
1314eef6601bSJoseph Huber         return OR << "Removing parallel region with no side-effects.";
13154d4ea9acSHuber, Joseph       };
13162c31d5ebSJoseph Huber       emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
13174d4ea9acSHuber, Joseph 
1318e565db49SJohannes Doerfert       CGUpdater.removeCallSite(*CI);
1319e565db49SJohannes Doerfert       CI->eraseFromParent();
1320e565db49SJohannes Doerfert       Changed = true;
132155eb714aSRoman Lebedev       ++NumOpenMPParallelRegionsDeleted;
1322e565db49SJohannes Doerfert       return true;
1323e565db49SJohannes Doerfert     };
1324e565db49SJohannes Doerfert 
1325624d34afSJohannes Doerfert     RFI.foreachUse(SCC, DeleteCallCB);
1326e565db49SJohannes Doerfert 
1327e565db49SJohannes Doerfert     return Changed;
1328e565db49SJohannes Doerfert   }
1329e565db49SJohannes Doerfert 
1330b726c557SJohannes Doerfert   /// Try to eliminate runtime calls by reusing existing ones.
deduplicateRuntimeCalls__anon23c38c770111::OpenMPOpt13319548b74aSJohannes Doerfert   bool deduplicateRuntimeCalls() {
13329548b74aSJohannes Doerfert     bool Changed = false;
13339548b74aSJohannes Doerfert 
1334e28936f6SJohannes Doerfert     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1335e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_threads,
1336e28936f6SJohannes Doerfert         OMPRTL_omp_in_parallel,
1337e28936f6SJohannes Doerfert         OMPRTL_omp_get_cancellation,
1338e28936f6SJohannes Doerfert         OMPRTL_omp_get_thread_limit,
1339e28936f6SJohannes Doerfert         OMPRTL_omp_get_supported_active_levels,
1340e28936f6SJohannes Doerfert         OMPRTL_omp_get_level,
1341e28936f6SJohannes Doerfert         OMPRTL_omp_get_ancestor_thread_num,
1342e28936f6SJohannes Doerfert         OMPRTL_omp_get_team_size,
1343e28936f6SJohannes Doerfert         OMPRTL_omp_get_active_level,
1344e28936f6SJohannes Doerfert         OMPRTL_omp_in_final,
1345e28936f6SJohannes Doerfert         OMPRTL_omp_get_proc_bind,
1346e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_places,
1347e28936f6SJohannes Doerfert         OMPRTL_omp_get_num_procs,
1348e28936f6SJohannes Doerfert         OMPRTL_omp_get_place_num,
1349e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_num_places,
1350e28936f6SJohannes Doerfert         OMPRTL_omp_get_partition_place_nums};
1351e28936f6SJohannes Doerfert 
1352bc93c2d7SMarek Kurdej     // Global-tid is handled separately.
13539548b74aSJohannes Doerfert     SmallSetVector<Value *, 16> GTIdArgs;
13549548b74aSJohannes Doerfert     collectGlobalThreadIdArguments(GTIdArgs);
13559548b74aSJohannes Doerfert     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
13569548b74aSJohannes Doerfert                       << " global thread ID arguments\n");
13579548b74aSJohannes Doerfert 
13589548b74aSJohannes Doerfert     for (Function *F : SCC) {
1359e28936f6SJohannes Doerfert       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
13604e29d256Sserge-sans-paille         Changed |= deduplicateRuntimeCalls(
13614e29d256Sserge-sans-paille             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1362e28936f6SJohannes Doerfert 
1363e28936f6SJohannes Doerfert       // __kmpc_global_thread_num is special as we can replace it with an
1364e28936f6SJohannes Doerfert       // argument in enough cases to make it worth trying.
13659548b74aSJohannes Doerfert       Value *GTIdArg = nullptr;
13669548b74aSJohannes Doerfert       for (Argument &Arg : F->args())
13679548b74aSJohannes Doerfert         if (GTIdArgs.count(&Arg)) {
13689548b74aSJohannes Doerfert           GTIdArg = &Arg;
13699548b74aSJohannes Doerfert           break;
13709548b74aSJohannes Doerfert         }
13719548b74aSJohannes Doerfert       Changed |= deduplicateRuntimeCalls(
13727cfd267cSsstefan1           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
13739548b74aSJohannes Doerfert     }
13749548b74aSJohannes Doerfert 
13759548b74aSJohannes Doerfert     return Changed;
13769548b74aSJohannes Doerfert   }
13779548b74aSJohannes Doerfert 
1378496f8e5bSHamilton Tobon Mosquera   /// Tries to hide the latency of runtime calls that involve host to
1379496f8e5bSHamilton Tobon Mosquera   /// device memory transfers by splitting them into their "issue" and "wait"
1380496f8e5bSHamilton Tobon Mosquera   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1381496f8e5bSHamilton Tobon Mosquera   /// moved downards as much as possible. The "issue" issues the memory transfer
1382496f8e5bSHamilton Tobon Mosquera   /// asynchronously, returning a handle. The "wait" waits in the returned
1383496f8e5bSHamilton Tobon Mosquera   /// handle for the memory transfer to finish.
hideMemTransfersLatency__anon23c38c770111::OpenMPOpt1384496f8e5bSHamilton Tobon Mosquera   bool hideMemTransfersLatency() {
1385496f8e5bSHamilton Tobon Mosquera     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1386496f8e5bSHamilton Tobon Mosquera     bool Changed = false;
1387496f8e5bSHamilton Tobon Mosquera     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1388496f8e5bSHamilton Tobon Mosquera       auto *RTCall = getCallIfRegularCall(U, &RFI);
1389496f8e5bSHamilton Tobon Mosquera       if (!RTCall)
1390496f8e5bSHamilton Tobon Mosquera         return false;
1391496f8e5bSHamilton Tobon Mosquera 
13928931add6SHamilton Tobon Mosquera       OffloadArray OffloadArrays[3];
13938931add6SHamilton Tobon Mosquera       if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
13948931add6SHamilton Tobon Mosquera         return false;
13958931add6SHamilton Tobon Mosquera 
13968931add6SHamilton Tobon Mosquera       LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
13978931add6SHamilton Tobon Mosquera 
1398bd2fa181SHamilton Tobon Mosquera       // TODO: Check if can be moved upwards.
1399bd2fa181SHamilton Tobon Mosquera       bool WasSplit = false;
1400bd2fa181SHamilton Tobon Mosquera       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1401bd2fa181SHamilton Tobon Mosquera       if (WaitMovementPoint)
1402bd2fa181SHamilton Tobon Mosquera         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1403bd2fa181SHamilton Tobon Mosquera 
1404496f8e5bSHamilton Tobon Mosquera       Changed |= WasSplit;
1405496f8e5bSHamilton Tobon Mosquera       return WasSplit;
1406496f8e5bSHamilton Tobon Mosquera     };
1407496f8e5bSHamilton Tobon Mosquera     RFI.foreachUse(SCC, SplitMemTransfers);
1408496f8e5bSHamilton Tobon Mosquera 
1409496f8e5bSHamilton Tobon Mosquera     return Changed;
1410496f8e5bSHamilton Tobon Mosquera   }
1411496f8e5bSHamilton Tobon Mosquera 
14123c8a4c6fSJohannes Doerfert   /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
14133c8a4c6fSJohannes Doerfert   /// TODO: Make this an AA and expand it to work across blocks and functions.
eliminateBarriers__anon23c38c770111::OpenMPOpt14143c8a4c6fSJohannes Doerfert   bool eliminateBarriers() {
14153c8a4c6fSJohannes Doerfert     bool Changed = false;
14163c8a4c6fSJohannes Doerfert 
14173c8a4c6fSJohannes Doerfert     if (DisableOpenMPOptBarrierElimination)
14183c8a4c6fSJohannes Doerfert       return /*Changed=*/false;
14193c8a4c6fSJohannes Doerfert 
14203c8a4c6fSJohannes Doerfert     if (OMPInfoCache.Kernels.empty())
14213c8a4c6fSJohannes Doerfert       return /*Changed=*/false;
14223c8a4c6fSJohannes Doerfert 
14233c8a4c6fSJohannes Doerfert     enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
14243c8a4c6fSJohannes Doerfert 
14253c8a4c6fSJohannes Doerfert     class BarrierInfo {
14263c8a4c6fSJohannes Doerfert       Instruction *I;
14273c8a4c6fSJohannes Doerfert       enum ImplicitBarrierType Type;
14283c8a4c6fSJohannes Doerfert 
14293c8a4c6fSJohannes Doerfert     public:
14303c8a4c6fSJohannes Doerfert       BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
14313c8a4c6fSJohannes Doerfert       BarrierInfo(Instruction &I) : I(&I) {}
14323c8a4c6fSJohannes Doerfert 
14333c8a4c6fSJohannes Doerfert       bool isImplicit() { return !I; }
14343c8a4c6fSJohannes Doerfert 
14353c8a4c6fSJohannes Doerfert       bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
14363c8a4c6fSJohannes Doerfert 
14373c8a4c6fSJohannes Doerfert       bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
14383c8a4c6fSJohannes Doerfert 
14393c8a4c6fSJohannes Doerfert       Instruction *getInstruction() { return I; }
14403c8a4c6fSJohannes Doerfert     };
14413c8a4c6fSJohannes Doerfert 
14423c8a4c6fSJohannes Doerfert     for (Function *Kernel : OMPInfoCache.Kernels) {
14433c8a4c6fSJohannes Doerfert       for (BasicBlock &BB : *Kernel) {
14443c8a4c6fSJohannes Doerfert         SmallVector<BarrierInfo, 8> BarriersInBlock;
14453c8a4c6fSJohannes Doerfert         SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
14463c8a4c6fSJohannes Doerfert 
14473c8a4c6fSJohannes Doerfert         // Add the kernel entry implicit barrier.
14483c8a4c6fSJohannes Doerfert         if (&Kernel->getEntryBlock() == &BB)
14493c8a4c6fSJohannes Doerfert           BarriersInBlock.push_back(IBT_ENTRY);
14503c8a4c6fSJohannes Doerfert 
14513c8a4c6fSJohannes Doerfert         // Find implicit and explicit aligned barriers in the same basic block.
14523c8a4c6fSJohannes Doerfert         for (Instruction &I : BB) {
14533c8a4c6fSJohannes Doerfert           if (isa<ReturnInst>(I)) {
14543c8a4c6fSJohannes Doerfert             // Add the implicit barrier when exiting the kernel.
14553c8a4c6fSJohannes Doerfert             BarriersInBlock.push_back(IBT_EXIT);
14563c8a4c6fSJohannes Doerfert             continue;
14573c8a4c6fSJohannes Doerfert           }
14583c8a4c6fSJohannes Doerfert           CallBase *CB = dyn_cast<CallBase>(&I);
14593c8a4c6fSJohannes Doerfert           if (!CB)
14603c8a4c6fSJohannes Doerfert             continue;
14613c8a4c6fSJohannes Doerfert 
14623c8a4c6fSJohannes Doerfert           auto IsAlignBarrierCB = [&](CallBase &CB) {
14633c8a4c6fSJohannes Doerfert             switch (CB.getIntrinsicID()) {
14643c8a4c6fSJohannes Doerfert             case Intrinsic::nvvm_barrier0:
14653c8a4c6fSJohannes Doerfert             case Intrinsic::nvvm_barrier0_and:
14663c8a4c6fSJohannes Doerfert             case Intrinsic::nvvm_barrier0_or:
14673c8a4c6fSJohannes Doerfert             case Intrinsic::nvvm_barrier0_popc:
14683c8a4c6fSJohannes Doerfert               return true;
14693c8a4c6fSJohannes Doerfert             default:
14703c8a4c6fSJohannes Doerfert               break;
14713c8a4c6fSJohannes Doerfert             }
14723c8a4c6fSJohannes Doerfert             return hasAssumption(CB,
14733c8a4c6fSJohannes Doerfert                                  KnownAssumptionString("ompx_aligned_barrier"));
14743c8a4c6fSJohannes Doerfert           };
14753c8a4c6fSJohannes Doerfert 
14763c8a4c6fSJohannes Doerfert           if (IsAlignBarrierCB(*CB)) {
14773c8a4c6fSJohannes Doerfert             // Add an explicit aligned barrier.
14783c8a4c6fSJohannes Doerfert             BarriersInBlock.push_back(I);
14793c8a4c6fSJohannes Doerfert           }
14803c8a4c6fSJohannes Doerfert         }
14813c8a4c6fSJohannes Doerfert 
14823c8a4c6fSJohannes Doerfert         if (BarriersInBlock.size() <= 1)
14833c8a4c6fSJohannes Doerfert           continue;
14843c8a4c6fSJohannes Doerfert 
14853c8a4c6fSJohannes Doerfert         // A barrier in a barrier pair is removeable if all instructions
14863c8a4c6fSJohannes Doerfert         // between the barriers in the pair are side-effect free modulo the
14873c8a4c6fSJohannes Doerfert         // barrier operation.
14883c8a4c6fSJohannes Doerfert         auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
14893c8a4c6fSJohannes Doerfert                                              BarrierInfo *EndBI) {
14903c8a4c6fSJohannes Doerfert           assert(
14913c8a4c6fSJohannes Doerfert               !StartBI->isImplicitExit() &&
14923c8a4c6fSJohannes Doerfert               "Expected start barrier to be other than a kernel exit barrier");
14933c8a4c6fSJohannes Doerfert           assert(
14943c8a4c6fSJohannes Doerfert               !EndBI->isImplicitEntry() &&
14953c8a4c6fSJohannes Doerfert               "Expected end barrier to be other than a kernel entry barrier");
14963c8a4c6fSJohannes Doerfert           // If StarBI instructions is null then this the implicit
14973c8a4c6fSJohannes Doerfert           // kernel entry barrier, so iterate from the first instruction in the
14983c8a4c6fSJohannes Doerfert           // entry block.
14993c8a4c6fSJohannes Doerfert           Instruction *I = (StartBI->isImplicitEntry())
15003c8a4c6fSJohannes Doerfert                                ? &Kernel->getEntryBlock().front()
15013c8a4c6fSJohannes Doerfert                                : StartBI->getInstruction()->getNextNode();
15023c8a4c6fSJohannes Doerfert           assert(I && "Expected non-null start instruction");
15033c8a4c6fSJohannes Doerfert           Instruction *E = (EndBI->isImplicitExit())
15043c8a4c6fSJohannes Doerfert                                ? I->getParent()->getTerminator()
15053c8a4c6fSJohannes Doerfert                                : EndBI->getInstruction();
15063c8a4c6fSJohannes Doerfert           assert(E && "Expected non-null end instruction");
15073c8a4c6fSJohannes Doerfert 
15083c8a4c6fSJohannes Doerfert           for (; I != E; I = I->getNextNode()) {
15093c8a4c6fSJohannes Doerfert             if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
15103c8a4c6fSJohannes Doerfert               continue;
15113c8a4c6fSJohannes Doerfert 
15123c8a4c6fSJohannes Doerfert             auto IsPotentiallyAffectedByBarrier =
15133c8a4c6fSJohannes Doerfert                 [](Optional<MemoryLocation> Loc) {
15143c8a4c6fSJohannes Doerfert                   const Value *Obj = (Loc && Loc->Ptr)
15153c8a4c6fSJohannes Doerfert                                          ? getUnderlyingObject(Loc->Ptr)
15163c8a4c6fSJohannes Doerfert                                          : nullptr;
15173c8a4c6fSJohannes Doerfert                   if (!Obj) {
15183c8a4c6fSJohannes Doerfert                     LLVM_DEBUG(
15193c8a4c6fSJohannes Doerfert                         dbgs()
15203c8a4c6fSJohannes Doerfert                         << "Access to unknown location requires barriers\n");
15213c8a4c6fSJohannes Doerfert                     return true;
15223c8a4c6fSJohannes Doerfert                   }
15233c8a4c6fSJohannes Doerfert                   if (isa<UndefValue>(Obj))
15243c8a4c6fSJohannes Doerfert                     return false;
15253c8a4c6fSJohannes Doerfert                   if (isa<AllocaInst>(Obj))
15263c8a4c6fSJohannes Doerfert                     return false;
15273c8a4c6fSJohannes Doerfert                   if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
15283c8a4c6fSJohannes Doerfert                     if (GV->isConstant())
15293c8a4c6fSJohannes Doerfert                       return false;
15303c8a4c6fSJohannes Doerfert                     if (GV->isThreadLocal())
15313c8a4c6fSJohannes Doerfert                       return false;
15323c8a4c6fSJohannes Doerfert                     if (GV->getAddressSpace() == (int)AddressSpace::Local)
15333c8a4c6fSJohannes Doerfert                       return false;
15343c8a4c6fSJohannes Doerfert                     if (GV->getAddressSpace() == (int)AddressSpace::Constant)
15353c8a4c6fSJohannes Doerfert                       return false;
15363c8a4c6fSJohannes Doerfert                   }
15373c8a4c6fSJohannes Doerfert                   LLVM_DEBUG(dbgs() << "Access to '" << *Obj
15383c8a4c6fSJohannes Doerfert                                     << "' requires barriers\n");
15393c8a4c6fSJohannes Doerfert                   return true;
15403c8a4c6fSJohannes Doerfert                 };
15413c8a4c6fSJohannes Doerfert 
15423c8a4c6fSJohannes Doerfert             if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
15433c8a4c6fSJohannes Doerfert               Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
15443c8a4c6fSJohannes Doerfert               if (IsPotentiallyAffectedByBarrier(Loc))
15453c8a4c6fSJohannes Doerfert                 return false;
15463c8a4c6fSJohannes Doerfert               if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
15473c8a4c6fSJohannes Doerfert                 Optional<MemoryLocation> Loc =
15483c8a4c6fSJohannes Doerfert                     MemoryLocation::getForSource(MTI);
15493c8a4c6fSJohannes Doerfert                 if (IsPotentiallyAffectedByBarrier(Loc))
15503c8a4c6fSJohannes Doerfert                   return false;
15513c8a4c6fSJohannes Doerfert               }
15523c8a4c6fSJohannes Doerfert               continue;
15533c8a4c6fSJohannes Doerfert             }
15543c8a4c6fSJohannes Doerfert 
15553c8a4c6fSJohannes Doerfert             if (auto *LI = dyn_cast<LoadInst>(I))
15563c8a4c6fSJohannes Doerfert               if (LI->hasMetadata(LLVMContext::MD_invariant_load))
15573c8a4c6fSJohannes Doerfert                 continue;
15583c8a4c6fSJohannes Doerfert 
15593c8a4c6fSJohannes Doerfert             Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
15603c8a4c6fSJohannes Doerfert             if (IsPotentiallyAffectedByBarrier(Loc))
15613c8a4c6fSJohannes Doerfert               return false;
15623c8a4c6fSJohannes Doerfert           }
15633c8a4c6fSJohannes Doerfert 
15643c8a4c6fSJohannes Doerfert           return true;
15653c8a4c6fSJohannes Doerfert         };
15663c8a4c6fSJohannes Doerfert 
15673c8a4c6fSJohannes Doerfert         // Iterate barrier pairs and remove an explicit barrier if analysis
15683c8a4c6fSJohannes Doerfert         // deems it removeable.
15693c8a4c6fSJohannes Doerfert         for (auto *It = BarriersInBlock.begin(),
15703c8a4c6fSJohannes Doerfert                   *End = BarriersInBlock.end() - 1;
15713c8a4c6fSJohannes Doerfert              It != End; ++It) {
15723c8a4c6fSJohannes Doerfert 
15733c8a4c6fSJohannes Doerfert           BarrierInfo *StartBI = It;
15743c8a4c6fSJohannes Doerfert           BarrierInfo *EndBI = (It + 1);
15753c8a4c6fSJohannes Doerfert 
15763c8a4c6fSJohannes Doerfert           // Cannot remove when both are implicit barriers, continue.
15773c8a4c6fSJohannes Doerfert           if (StartBI->isImplicit() && EndBI->isImplicit())
15783c8a4c6fSJohannes Doerfert             continue;
15793c8a4c6fSJohannes Doerfert 
15803c8a4c6fSJohannes Doerfert           if (!IsBarrierRemoveable(StartBI, EndBI))
15813c8a4c6fSJohannes Doerfert             continue;
15823c8a4c6fSJohannes Doerfert 
15833c8a4c6fSJohannes Doerfert           assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
15843c8a4c6fSJohannes Doerfert                  "Expected at least one explicit barrier to remove.");
15853c8a4c6fSJohannes Doerfert 
15863c8a4c6fSJohannes Doerfert           // Remove an explicit barrier, check first, then second.
15873c8a4c6fSJohannes Doerfert           if (!StartBI->isImplicit()) {
15883c8a4c6fSJohannes Doerfert             LLVM_DEBUG(dbgs() << "Remove start barrier "
15893c8a4c6fSJohannes Doerfert                               << *StartBI->getInstruction() << "\n");
15903c8a4c6fSJohannes Doerfert             BarriersToBeDeleted.insert(StartBI->getInstruction());
15913c8a4c6fSJohannes Doerfert           } else {
15923c8a4c6fSJohannes Doerfert             LLVM_DEBUG(dbgs() << "Remove end barrier "
15933c8a4c6fSJohannes Doerfert                               << *EndBI->getInstruction() << "\n");
15943c8a4c6fSJohannes Doerfert             BarriersToBeDeleted.insert(EndBI->getInstruction());
15953c8a4c6fSJohannes Doerfert           }
15963c8a4c6fSJohannes Doerfert         }
15973c8a4c6fSJohannes Doerfert 
15983c8a4c6fSJohannes Doerfert         if (BarriersToBeDeleted.empty())
15993c8a4c6fSJohannes Doerfert           continue;
16003c8a4c6fSJohannes Doerfert 
16013c8a4c6fSJohannes Doerfert         Changed = true;
16023c8a4c6fSJohannes Doerfert         for (Instruction *I : BarriersToBeDeleted) {
16033c8a4c6fSJohannes Doerfert           ++NumBarriersEliminated;
16043c8a4c6fSJohannes Doerfert           auto Remark = [&](OptimizationRemark OR) {
16053c8a4c6fSJohannes Doerfert             return OR << "Redundant barrier eliminated.";
16063c8a4c6fSJohannes Doerfert           };
16073c8a4c6fSJohannes Doerfert 
16083c8a4c6fSJohannes Doerfert           if (EnableVerboseRemarks)
16093c8a4c6fSJohannes Doerfert             emitRemark<OptimizationRemark>(I, "OMP190", Remark);
16103c8a4c6fSJohannes Doerfert           I->eraseFromParent();
16113c8a4c6fSJohannes Doerfert         }
16123c8a4c6fSJohannes Doerfert       }
16133c8a4c6fSJohannes Doerfert     }
16143c8a4c6fSJohannes Doerfert 
16153c8a4c6fSJohannes Doerfert     return Changed;
16163c8a4c6fSJohannes Doerfert   }
16173c8a4c6fSJohannes Doerfert 
analysisGlobalization__anon23c38c770111::OpenMPOpt1618a2281419SJoseph Huber   void analysisGlobalization() {
16196fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
162082453e75SJoseph Huber 
162182453e75SJoseph Huber     auto CheckGlobalization = [&](Use &U, Function &Decl) {
1622a2281419SJoseph Huber       if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
162344feacc7SJoseph Huber         auto Remark = [&](OptimizationRemarkMissed ORM) {
162444feacc7SJoseph Huber           return ORM
1625a2281419SJoseph Huber                  << "Found thread data sharing on the GPU. "
1626a2281419SJoseph Huber                  << "Expect degraded performance due to data globalization.";
1627a2281419SJoseph Huber         };
16282c31d5ebSJoseph Huber         emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1629a2281419SJoseph Huber       }
1630a2281419SJoseph Huber 
1631a2281419SJoseph Huber       return false;
1632a2281419SJoseph Huber     };
1633a2281419SJoseph Huber 
163482453e75SJoseph Huber     RFI.foreachUse(SCC, CheckGlobalization);
163582453e75SJoseph Huber   }
1636a2281419SJoseph Huber 
16378931add6SHamilton Tobon Mosquera   /// Maps the values stored in the offload arrays passed as arguments to
16388931add6SHamilton Tobon Mosquera   /// \p RuntimeCall into the offload arrays in \p OAs.
getValuesInOffloadArrays__anon23c38c770111::OpenMPOpt16398931add6SHamilton Tobon Mosquera   bool getValuesInOffloadArrays(CallInst &RuntimeCall,
16408931add6SHamilton Tobon Mosquera                                 MutableArrayRef<OffloadArray> OAs) {
16418931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "Need space for three offload arrays!");
16428931add6SHamilton Tobon Mosquera 
16438931add6SHamilton Tobon Mosquera     // A runtime call that involves memory offloading looks something like:
16448931add6SHamilton Tobon Mosquera     // call void @__tgt_target_data_begin_mapper(arg0, arg1,
16458931add6SHamilton Tobon Mosquera     //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
16468931add6SHamilton Tobon Mosquera     // ...)
16478931add6SHamilton Tobon Mosquera     // So, the idea is to access the allocas that allocate space for these
16488931add6SHamilton Tobon Mosquera     // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
16498931add6SHamilton Tobon Mosquera     // Therefore:
16508931add6SHamilton Tobon Mosquera     // i8** %offload_baseptrs.
16511d3d9b9cSHamilton Tobon Mosquera     Value *BasePtrsArg =
16521d3d9b9cSHamilton Tobon Mosquera         RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
16538931add6SHamilton Tobon Mosquera     // i8** %offload_ptrs.
16541d3d9b9cSHamilton Tobon Mosquera     Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
16558931add6SHamilton Tobon Mosquera     // i8** %offload_sizes.
16561d3d9b9cSHamilton Tobon Mosquera     Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
16578931add6SHamilton Tobon Mosquera 
16588931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
16598931add6SHamilton Tobon Mosquera     auto *V = getUnderlyingObject(BasePtrsArg);
16608931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
16618931add6SHamilton Tobon Mosquera       return false;
16628931add6SHamilton Tobon Mosquera     auto *BasePtrsArray = cast<AllocaInst>(V);
16638931add6SHamilton Tobon Mosquera     if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
16648931add6SHamilton Tobon Mosquera       return false;
16658931add6SHamilton Tobon Mosquera 
16668931add6SHamilton Tobon Mosquera     // Get values stored in **offload_baseptrs.
16678931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(PtrsArg);
16688931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
16698931add6SHamilton Tobon Mosquera       return false;
16708931add6SHamilton Tobon Mosquera     auto *PtrsArray = cast<AllocaInst>(V);
16718931add6SHamilton Tobon Mosquera     if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
16728931add6SHamilton Tobon Mosquera       return false;
16738931add6SHamilton Tobon Mosquera 
16748931add6SHamilton Tobon Mosquera     // Get values stored in **offload_sizes.
16758931add6SHamilton Tobon Mosquera     V = getUnderlyingObject(SizesArg);
16768931add6SHamilton Tobon Mosquera     // If it's a [constant] global array don't analyze it.
16778931add6SHamilton Tobon Mosquera     if (isa<GlobalValue>(V))
16788931add6SHamilton Tobon Mosquera       return isa<Constant>(V);
16798931add6SHamilton Tobon Mosquera     if (!isa<AllocaInst>(V))
16808931add6SHamilton Tobon Mosquera       return false;
16818931add6SHamilton Tobon Mosquera 
16828931add6SHamilton Tobon Mosquera     auto *SizesArray = cast<AllocaInst>(V);
16838931add6SHamilton Tobon Mosquera     if (!OAs[2].initialize(*SizesArray, RuntimeCall))
16848931add6SHamilton Tobon Mosquera       return false;
16858931add6SHamilton Tobon Mosquera 
16868931add6SHamilton Tobon Mosquera     return true;
16878931add6SHamilton Tobon Mosquera   }
16888931add6SHamilton Tobon Mosquera 
16898931add6SHamilton Tobon Mosquera   /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
16908931add6SHamilton Tobon Mosquera   /// For now this is a way to test that the function getValuesInOffloadArrays
16918931add6SHamilton Tobon Mosquera   /// is working properly.
16928931add6SHamilton Tobon Mosquera   /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
dumpValuesInOffloadArrays__anon23c38c770111::OpenMPOpt16938931add6SHamilton Tobon Mosquera   void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
16948931add6SHamilton Tobon Mosquera     assert(OAs.size() == 3 && "There are three offload arrays to debug!");
16958931add6SHamilton Tobon Mosquera 
16968931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
16978931add6SHamilton Tobon Mosquera     std::string ValuesStr;
16988931add6SHamilton Tobon Mosquera     raw_string_ostream Printer(ValuesStr);
16998931add6SHamilton Tobon Mosquera     std::string Separator = " --- ";
17008931add6SHamilton Tobon Mosquera 
17018931add6SHamilton Tobon Mosquera     for (auto *BP : OAs[0].StoredValues) {
17028931add6SHamilton Tobon Mosquera       BP->print(Printer);
17038931add6SHamilton Tobon Mosquera       Printer << Separator;
17048931add6SHamilton Tobon Mosquera     }
17058931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
17068931add6SHamilton Tobon Mosquera     ValuesStr.clear();
17078931add6SHamilton Tobon Mosquera 
17088931add6SHamilton Tobon Mosquera     for (auto *P : OAs[1].StoredValues) {
17098931add6SHamilton Tobon Mosquera       P->print(Printer);
17108931add6SHamilton Tobon Mosquera       Printer << Separator;
17118931add6SHamilton Tobon Mosquera     }
17128931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
17138931add6SHamilton Tobon Mosquera     ValuesStr.clear();
17148931add6SHamilton Tobon Mosquera 
17158931add6SHamilton Tobon Mosquera     for (auto *S : OAs[2].StoredValues) {
17168931add6SHamilton Tobon Mosquera       S->print(Printer);
17178931add6SHamilton Tobon Mosquera       Printer << Separator;
17188931add6SHamilton Tobon Mosquera     }
17198931add6SHamilton Tobon Mosquera     LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
17208931add6SHamilton Tobon Mosquera   }
17218931add6SHamilton Tobon Mosquera 
1722bd2fa181SHamilton Tobon Mosquera   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1723bd2fa181SHamilton Tobon Mosquera   /// moved. Returns nullptr if the movement is not possible, or not worth it.
canBeMovedDownwards__anon23c38c770111::OpenMPOpt1724bd2fa181SHamilton Tobon Mosquera   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1725bd2fa181SHamilton Tobon Mosquera     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1726bd2fa181SHamilton Tobon Mosquera     //  Make it traverse the CFG.
1727bd2fa181SHamilton Tobon Mosquera 
1728bd2fa181SHamilton Tobon Mosquera     Instruction *CurrentI = &RuntimeCall;
1729bd2fa181SHamilton Tobon Mosquera     bool IsWorthIt = false;
1730bd2fa181SHamilton Tobon Mosquera     while ((CurrentI = CurrentI->getNextNode())) {
1731bd2fa181SHamilton Tobon Mosquera 
1732bd2fa181SHamilton Tobon Mosquera       // TODO: Once we detect the regions to be offloaded we should use the
1733bd2fa181SHamilton Tobon Mosquera       //  alias analysis manager to check if CurrentI may modify one of
1734bd2fa181SHamilton Tobon Mosquera       //  the offloaded regions.
1735bd2fa181SHamilton Tobon Mosquera       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1736bd2fa181SHamilton Tobon Mosquera         if (IsWorthIt)
1737bd2fa181SHamilton Tobon Mosquera           return CurrentI;
1738bd2fa181SHamilton Tobon Mosquera 
1739bd2fa181SHamilton Tobon Mosquera         return nullptr;
1740bd2fa181SHamilton Tobon Mosquera       }
1741bd2fa181SHamilton Tobon Mosquera 
1742bd2fa181SHamilton Tobon Mosquera       // FIXME: For now if we move it over anything without side effect
1743bd2fa181SHamilton Tobon Mosquera       //  is worth it.
1744bd2fa181SHamilton Tobon Mosquera       IsWorthIt = true;
1745bd2fa181SHamilton Tobon Mosquera     }
1746bd2fa181SHamilton Tobon Mosquera 
1747bd2fa181SHamilton Tobon Mosquera     // Return end of BasicBlock.
1748bd2fa181SHamilton Tobon Mosquera     return RuntimeCall.getParent()->getTerminator();
1749bd2fa181SHamilton Tobon Mosquera   }
1750bd2fa181SHamilton Tobon Mosquera 
1751496f8e5bSHamilton Tobon Mosquera   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
splitTargetDataBeginRTC__anon23c38c770111::OpenMPOpt1752bd2fa181SHamilton Tobon Mosquera   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1753bd2fa181SHamilton Tobon Mosquera                                Instruction &WaitMovementPoint) {
1754bd31abc1SHamilton Tobon Mosquera     // Create stack allocated handle (__tgt_async_info) at the beginning of the
1755bd31abc1SHamilton Tobon Mosquera     // function. Used for storing information of the async transfer, allowing to
1756bd31abc1SHamilton Tobon Mosquera     // wait on it later.
1757496f8e5bSHamilton Tobon Mosquera     auto &IRBuilder = OMPInfoCache.OMPBuilder;
1758bd31abc1SHamilton Tobon Mosquera     auto *F = RuntimeCall.getCaller();
1759bd31abc1SHamilton Tobon Mosquera     Instruction *FirstInst = &(F->getEntryBlock().front());
1760bd31abc1SHamilton Tobon Mosquera     AllocaInst *Handle = new AllocaInst(
1761bd31abc1SHamilton Tobon Mosquera         IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1762bd31abc1SHamilton Tobon Mosquera 
1763496f8e5bSHamilton Tobon Mosquera     // Add "issue" runtime call declaration:
1764496f8e5bSHamilton Tobon Mosquera     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1765496f8e5bSHamilton Tobon Mosquera     //   i8**, i8**, i64*, i64*)
1766496f8e5bSHamilton Tobon Mosquera     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1767496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_issue);
1768496f8e5bSHamilton Tobon Mosquera 
1769496f8e5bSHamilton Tobon Mosquera     // Change RuntimeCall call site for its asynchronous version.
177097e55cfeSJoseph Huber     SmallVector<Value *, 16> Args;
1771bd2fa181SHamilton Tobon Mosquera     for (auto &Arg : RuntimeCall.args())
1772496f8e5bSHamilton Tobon Mosquera       Args.push_back(Arg.get());
1773bd31abc1SHamilton Tobon Mosquera     Args.push_back(Handle);
1774496f8e5bSHamilton Tobon Mosquera 
1775496f8e5bSHamilton Tobon Mosquera     CallInst *IssueCallsite =
1776bd31abc1SHamilton Tobon Mosquera         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
177706cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
1778bd2fa181SHamilton Tobon Mosquera     RuntimeCall.eraseFromParent();
1779496f8e5bSHamilton Tobon Mosquera 
1780496f8e5bSHamilton Tobon Mosquera     // Add "wait" runtime call declaration:
1781496f8e5bSHamilton Tobon Mosquera     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1782496f8e5bSHamilton Tobon Mosquera     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1783496f8e5bSHamilton Tobon Mosquera         M, OMPRTL___tgt_target_data_begin_mapper_wait);
1784496f8e5bSHamilton Tobon Mosquera 
1785496f8e5bSHamilton Tobon Mosquera     Value *WaitParams[2] = {
1786da8bec47SJoseph Huber         IssueCallsite->getArgOperand(
1787da8bec47SJoseph Huber             OffloadArray::DeviceIDArgNum), // device_id.
1788bd31abc1SHamilton Tobon Mosquera         Handle                             // handle to wait on.
1789496f8e5bSHamilton Tobon Mosquera     };
179006cfdd52SJoseph Huber     CallInst *WaitCallsite = CallInst::Create(
179106cfdd52SJoseph Huber         WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
179206cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
1793496f8e5bSHamilton Tobon Mosquera 
1794496f8e5bSHamilton Tobon Mosquera     return true;
1795496f8e5bSHamilton Tobon Mosquera   }
1796496f8e5bSHamilton Tobon Mosquera 
combinedIdentStruct__anon23c38c770111::OpenMPOpt1797dc3b5b00SJohannes Doerfert   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1798dc3b5b00SJohannes Doerfert                                     bool GlobalOnly, bool &SingleChoice) {
1799dc3b5b00SJohannes Doerfert     if (CurrentIdent == NextIdent)
1800dc3b5b00SJohannes Doerfert       return CurrentIdent;
1801dc3b5b00SJohannes Doerfert 
1802396b7253SJohannes Doerfert     // TODO: Figure out how to actually combine multiple debug locations. For
1803dc3b5b00SJohannes Doerfert     //       now we just keep an existing one if there is a single choice.
1804dc3b5b00SJohannes Doerfert     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1805dc3b5b00SJohannes Doerfert       SingleChoice = !CurrentIdent;
1806dc3b5b00SJohannes Doerfert       return NextIdent;
1807dc3b5b00SJohannes Doerfert     }
1808396b7253SJohannes Doerfert     return nullptr;
1809396b7253SJohannes Doerfert   }
1810396b7253SJohannes Doerfert 
1811396b7253SJohannes Doerfert   /// Return an `struct ident_t*` value that represents the ones used in the
1812396b7253SJohannes Doerfert   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1813396b7253SJohannes Doerfert   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1814396b7253SJohannes Doerfert   /// return value we create one from scratch. We also do not yet combine
1815396b7253SJohannes Doerfert   /// information, e.g., the source locations, see combinedIdentStruct.
18167cfd267cSsstefan1   Value *
getCombinedIdentFromCallUsesIn__anon23c38c770111::OpenMPOpt18177cfd267cSsstefan1   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
18187cfd267cSsstefan1                                  Function &F, bool GlobalOnly) {
1819dc3b5b00SJohannes Doerfert     bool SingleChoice = true;
1820396b7253SJohannes Doerfert     Value *Ident = nullptr;
1821396b7253SJohannes Doerfert     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1822396b7253SJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
1823396b7253SJohannes Doerfert       if (!CI || &F != &Caller)
1824396b7253SJohannes Doerfert         return false;
1825396b7253SJohannes Doerfert       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1826dc3b5b00SJohannes Doerfert                                   /* GlobalOnly */ true, SingleChoice);
1827396b7253SJohannes Doerfert       return false;
1828396b7253SJohannes Doerfert     };
1829624d34afSJohannes Doerfert     RFI.foreachUse(SCC, CombineIdentStruct);
1830396b7253SJohannes Doerfert 
1831dc3b5b00SJohannes Doerfert     if (!Ident || !SingleChoice) {
1832396b7253SJohannes Doerfert       // The IRBuilder uses the insertion block to get to the module, this is
1833396b7253SJohannes Doerfert       // unfortunate but we work around it for now.
18347cfd267cSsstefan1       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
18357cfd267cSsstefan1         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1836396b7253SJohannes Doerfert             &F.getEntryBlock(), F.getEntryBlock().begin()));
1837396b7253SJohannes Doerfert       // Create a fallback location if non was found.
1838396b7253SJohannes Doerfert       // TODO: Use the debug locations of the calls instead.
1839944aa042SJohannes Doerfert       uint32_t SrcLocStrSize;
1840944aa042SJohannes Doerfert       Constant *Loc =
1841944aa042SJohannes Doerfert           OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1842944aa042SJohannes Doerfert       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
1843396b7253SJohannes Doerfert     }
1844396b7253SJohannes Doerfert     return Ident;
1845396b7253SJohannes Doerfert   }
1846396b7253SJohannes Doerfert 
1847b726c557SJohannes Doerfert   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
18489548b74aSJohannes Doerfert   /// \p ReplVal if given.
deduplicateRuntimeCalls__anon23c38c770111::OpenMPOpt18497cfd267cSsstefan1   bool deduplicateRuntimeCalls(Function &F,
18507cfd267cSsstefan1                                OMPInformationCache::RuntimeFunctionInfo &RFI,
18519548b74aSJohannes Doerfert                                Value *ReplVal = nullptr) {
18528855fec3SJohannes Doerfert     auto *UV = RFI.getUseVector(F);
18538855fec3SJohannes Doerfert     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1854b1fbf438SRoman Lebedev       return false;
1855b1fbf438SRoman Lebedev 
18567cfd267cSsstefan1     LLVM_DEBUG(
18577cfd267cSsstefan1         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
18587cfd267cSsstefan1                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
18597cfd267cSsstefan1 
1860ab3da5ddSMichael Liao     assert((!ReplVal || (isa<Argument>(ReplVal) &&
1861ab3da5ddSMichael Liao                          cast<Argument>(ReplVal)->getParent() == &F)) &&
18629548b74aSJohannes Doerfert            "Unexpected replacement value!");
1863396b7253SJohannes Doerfert 
1864396b7253SJohannes Doerfert     // TODO: Use dominance to find a good position instead.
18656aab27baSsstefan1     auto CanBeMoved = [this](CallBase &CB) {
18664f0225f6SKazu Hirata       unsigned NumArgs = CB.arg_size();
1867396b7253SJohannes Doerfert       if (NumArgs == 0)
1868396b7253SJohannes Doerfert         return true;
18696aab27baSsstefan1       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1870396b7253SJohannes Doerfert         return false;
1871c11ebfeaSJoseph Huber       for (unsigned U = 1; U < NumArgs; ++U)
1872c11ebfeaSJoseph Huber         if (isa<Instruction>(CB.getArgOperand(U)))
1873396b7253SJohannes Doerfert           return false;
1874396b7253SJohannes Doerfert       return true;
1875396b7253SJohannes Doerfert     };
1876396b7253SJohannes Doerfert 
18779548b74aSJohannes Doerfert     if (!ReplVal) {
18788855fec3SJohannes Doerfert       for (Use *U : *UV)
18799548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1880396b7253SJohannes Doerfert           if (!CanBeMoved(*CI))
1881396b7253SJohannes Doerfert             continue;
18824d4ea9acSHuber, Joseph 
1883f97de4cbSGiorgis Georgakoudis           // If the function is a kernel, dedup will move
1884f97de4cbSGiorgis Georgakoudis           // the runtime call right after the kernel init callsite. Otherwise,
1885f97de4cbSGiorgis Georgakoudis           // it will move it to the beginning of the caller function.
1886f97de4cbSGiorgis Georgakoudis           if (isKernel(F)) {
1887f97de4cbSGiorgis Georgakoudis             auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
1888f97de4cbSGiorgis Georgakoudis             auto *KernelInitUV = KernelInitRFI.getUseVector(F);
1889f97de4cbSGiorgis Georgakoudis 
1890f97de4cbSGiorgis Georgakoudis             if (KernelInitUV->empty())
1891f97de4cbSGiorgis Georgakoudis               continue;
1892f97de4cbSGiorgis Georgakoudis 
1893f97de4cbSGiorgis Georgakoudis             assert(KernelInitUV->size() == 1 &&
1894f97de4cbSGiorgis Georgakoudis                    "Expected a single __kmpc_target_init in kernel\n");
1895f97de4cbSGiorgis Georgakoudis 
1896f97de4cbSGiorgis Georgakoudis             CallInst *KernelInitCI =
1897f97de4cbSGiorgis Georgakoudis                 getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
1898f97de4cbSGiorgis Georgakoudis             assert(KernelInitCI &&
1899f97de4cbSGiorgis Georgakoudis                    "Expected a call to __kmpc_target_init in kernel\n");
1900f97de4cbSGiorgis Georgakoudis 
1901f97de4cbSGiorgis Georgakoudis             CI->moveAfter(KernelInitCI);
1902f97de4cbSGiorgis Georgakoudis           } else
19039548b74aSJohannes Doerfert             CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
19049548b74aSJohannes Doerfert           ReplVal = CI;
19059548b74aSJohannes Doerfert           break;
19069548b74aSJohannes Doerfert         }
19079548b74aSJohannes Doerfert       if (!ReplVal)
19089548b74aSJohannes Doerfert         return false;
19099548b74aSJohannes Doerfert     }
19109548b74aSJohannes Doerfert 
1911396b7253SJohannes Doerfert     // If we use a call as a replacement value we need to make sure the ident is
1912396b7253SJohannes Doerfert     // valid at the new location. For now we just pick a global one, either
1913396b7253SJohannes Doerfert     // existing and used by one of the calls, or created from scratch.
1914396b7253SJohannes Doerfert     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
191592c9ff6dSKazu Hirata       if (!CI->arg_empty() &&
19166aab27baSsstefan1           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1917396b7253SJohannes Doerfert         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1918396b7253SJohannes Doerfert                                                       /* GlobalOnly */ true);
1919396b7253SJohannes Doerfert         CI->setArgOperand(0, Ident);
1920396b7253SJohannes Doerfert       }
1921396b7253SJohannes Doerfert     }
1922396b7253SJohannes Doerfert 
19239548b74aSJohannes Doerfert     bool Changed = false;
19249548b74aSJohannes Doerfert     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
19259548b74aSJohannes Doerfert       CallInst *CI = getCallIfRegularCall(U, &RFI);
19269548b74aSJohannes Doerfert       if (!CI || CI == ReplVal || &F != &Caller)
19279548b74aSJohannes Doerfert         return false;
19289548b74aSJohannes Doerfert       assert(CI->getCaller() == &F && "Unexpected call!");
19294d4ea9acSHuber, Joseph 
19304d4ea9acSHuber, Joseph       auto Remark = [&](OptimizationRemark OR) {
19314d4ea9acSHuber, Joseph         return OR << "OpenMP runtime call "
1932eef6601bSJoseph Huber                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
19334d4ea9acSHuber, Joseph       };
1934eef6601bSJoseph Huber       if (CI->getDebugLoc())
19352c31d5ebSJoseph Huber         emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1936eef6601bSJoseph Huber       else
19372c31d5ebSJoseph Huber         emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
19384d4ea9acSHuber, Joseph 
19399548b74aSJohannes Doerfert       CGUpdater.removeCallSite(*CI);
19409548b74aSJohannes Doerfert       CI->replaceAllUsesWith(ReplVal);
19419548b74aSJohannes Doerfert       CI->eraseFromParent();
19429548b74aSJohannes Doerfert       ++NumOpenMPRuntimeCallsDeduplicated;
19439548b74aSJohannes Doerfert       Changed = true;
19449548b74aSJohannes Doerfert       return true;
19459548b74aSJohannes Doerfert     };
1946624d34afSJohannes Doerfert     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
19479548b74aSJohannes Doerfert 
19489548b74aSJohannes Doerfert     return Changed;
19499548b74aSJohannes Doerfert   }
19509548b74aSJohannes Doerfert 
19519548b74aSJohannes Doerfert   /// Collect arguments that represent the global thread id in \p GTIdArgs.
collectGlobalThreadIdArguments__anon23c38c770111::OpenMPOpt19529548b74aSJohannes Doerfert   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
19539548b74aSJohannes Doerfert     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
19549548b74aSJohannes Doerfert     //       initialization. We could define an AbstractAttribute instead and
19559548b74aSJohannes Doerfert     //       run the Attributor here once it can be run as an SCC pass.
19569548b74aSJohannes Doerfert 
19579548b74aSJohannes Doerfert     // Helper to check the argument \p ArgNo at all call sites of \p F for
19589548b74aSJohannes Doerfert     // a GTId.
19599548b74aSJohannes Doerfert     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
19609548b74aSJohannes Doerfert       if (!F.hasLocalLinkage())
19619548b74aSJohannes Doerfert         return false;
19629548b74aSJohannes Doerfert       for (Use &U : F.uses()) {
19639548b74aSJohannes Doerfert         if (CallInst *CI = getCallIfRegularCall(U)) {
19649548b74aSJohannes Doerfert           Value *ArgOp = CI->getArgOperand(ArgNo);
19659548b74aSJohannes Doerfert           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
19667cfd267cSsstefan1               getCallIfRegularCall(
19677cfd267cSsstefan1                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
19689548b74aSJohannes Doerfert             continue;
19699548b74aSJohannes Doerfert         }
19709548b74aSJohannes Doerfert         return false;
19719548b74aSJohannes Doerfert       }
19729548b74aSJohannes Doerfert       return true;
19739548b74aSJohannes Doerfert     };
19749548b74aSJohannes Doerfert 
19759548b74aSJohannes Doerfert     // Helper to identify uses of a GTId as GTId arguments.
19769548b74aSJohannes Doerfert     auto AddUserArgs = [&](Value &GTId) {
19779548b74aSJohannes Doerfert       for (Use &U : GTId.uses())
19789548b74aSJohannes Doerfert         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
19799548b74aSJohannes Doerfert           if (CI->isArgOperand(&U))
19809548b74aSJohannes Doerfert             if (Function *Callee = CI->getCalledFunction())
19819548b74aSJohannes Doerfert               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
19829548b74aSJohannes Doerfert                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
19839548b74aSJohannes Doerfert     };
19849548b74aSJohannes Doerfert 
19859548b74aSJohannes Doerfert     // The argument users of __kmpc_global_thread_num calls are GTIds.
19867cfd267cSsstefan1     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
19877cfd267cSsstefan1         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
19887cfd267cSsstefan1 
1989624d34afSJohannes Doerfert     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
19908855fec3SJohannes Doerfert       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
19919548b74aSJohannes Doerfert         AddUserArgs(*CI);
19928855fec3SJohannes Doerfert       return false;
19938855fec3SJohannes Doerfert     });
19949548b74aSJohannes Doerfert 
19959548b74aSJohannes Doerfert     // Transitively search for more arguments by looking at the users of the
19969548b74aSJohannes Doerfert     // ones we know already. During the search the GTIdArgs vector is extended
19979548b74aSJohannes Doerfert     // so we cannot cache the size nor can we use a range based for.
1998c11ebfeaSJoseph Huber     for (unsigned U = 0; U < GTIdArgs.size(); ++U)
1999c11ebfeaSJoseph Huber       AddUserArgs(*GTIdArgs[U]);
20009548b74aSJohannes Doerfert   }
20019548b74aSJohannes Doerfert 
20025b0581aeSJohannes Doerfert   /// Kernel (=GPU) optimizations and utility functions
20035b0581aeSJohannes Doerfert   ///
20045b0581aeSJohannes Doerfert   ///{{
20055b0581aeSJohannes Doerfert 
20065b0581aeSJohannes Doerfert   /// Check if \p F is a kernel, hence entry point for target offloading.
isKernel__anon23c38c770111::OpenMPOpt20075b0581aeSJohannes Doerfert   bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
20085b0581aeSJohannes Doerfert 
20095b0581aeSJohannes Doerfert   /// Cache to remember the unique kernel for a function.
20105b0581aeSJohannes Doerfert   DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
20115b0581aeSJohannes Doerfert 
20125b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p F, if any.
20135b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Function &F);
20145b0581aeSJohannes Doerfert 
20155b0581aeSJohannes Doerfert   /// Find the unique kernel that will execute \p I, if any.
getUniqueKernelFor__anon23c38c770111::OpenMPOpt20165b0581aeSJohannes Doerfert   Kernel getUniqueKernelFor(Instruction &I) {
20175b0581aeSJohannes Doerfert     return getUniqueKernelFor(*I.getFunction());
20185b0581aeSJohannes Doerfert   }
20195b0581aeSJohannes Doerfert 
20205b0581aeSJohannes Doerfert   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
20215b0581aeSJohannes Doerfert   /// the cases we can avoid taking the address of a function.
20225b0581aeSJohannes Doerfert   bool rewriteDeviceCodeStateMachine();
20235b0581aeSJohannes Doerfert 
20245b0581aeSJohannes Doerfert   ///
20255b0581aeSJohannes Doerfert   ///}}
20265b0581aeSJohannes Doerfert 
20274d4ea9acSHuber, Joseph   /// Emit a remark generically
20284d4ea9acSHuber, Joseph   ///
20294d4ea9acSHuber, Joseph   /// This template function can be used to generically emit a remark. The
20304d4ea9acSHuber, Joseph   /// RemarkKind should be one of the following:
20314d4ea9acSHuber, Joseph   ///   - OptimizationRemark to indicate a successful optimization attempt
20324d4ea9acSHuber, Joseph   ///   - OptimizationRemarkMissed to report a failed optimization attempt
20334d4ea9acSHuber, Joseph   ///   - OptimizationRemarkAnalysis to provide additional information about an
20344d4ea9acSHuber, Joseph   ///     optimization attempt
20354d4ea9acSHuber, Joseph   ///
20364d4ea9acSHuber, Joseph   /// The remark is built using a callback function provided by the caller that
20374d4ea9acSHuber, Joseph   /// takes a RemarkKind as input and returns a RemarkKind.
20382db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon23c38c770111::OpenMPOpt20392db182ffSJoseph Huber   void emitRemark(Instruction *I, StringRef RemarkName,
2040e8039ad4SJohannes Doerfert                   RemarkCallBack &&RemarkCB) const {
20412db182ffSJoseph Huber     Function *F = I->getParent()->getParent();
20424d4ea9acSHuber, Joseph     auto &ORE = OREGetter(F);
20434d4ea9acSHuber, Joseph 
20442c31d5ebSJoseph Huber     if (RemarkName.startswith("OMP"))
20452c31d5ebSJoseph Huber       ORE.emit([&]() {
20462c31d5ebSJoseph Huber         return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
20472c31d5ebSJoseph Huber                << " [" << RemarkName << "]";
20482c31d5ebSJoseph Huber       });
20492c31d5ebSJoseph Huber     else
20502c31d5ebSJoseph Huber       ORE.emit(
20512c31d5ebSJoseph Huber           [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
20524d4ea9acSHuber, Joseph   }
20534d4ea9acSHuber, Joseph 
20542db182ffSJoseph Huber   /// Emit a remark on a function.
20552db182ffSJoseph Huber   template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon23c38c770111::OpenMPOpt20562db182ffSJoseph Huber   void emitRemark(Function *F, StringRef RemarkName,
20572db182ffSJoseph Huber                   RemarkCallBack &&RemarkCB) const {
20580f426935Ssstefan1     auto &ORE = OREGetter(F);
20590f426935Ssstefan1 
20602c31d5ebSJoseph Huber     if (RemarkName.startswith("OMP"))
20612c31d5ebSJoseph Huber       ORE.emit([&]() {
20622c31d5ebSJoseph Huber         return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
20632c31d5ebSJoseph Huber                << " [" << RemarkName << "]";
20642c31d5ebSJoseph Huber       });
20652c31d5ebSJoseph Huber     else
20662c31d5ebSJoseph Huber       ORE.emit(
20672c31d5ebSJoseph Huber           [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
20680f426935Ssstefan1   }
20690f426935Ssstefan1 
207058725c12SJoseph Huber   /// RAII struct to temporarily change an RTL function's linkage to external.
207158725c12SJoseph Huber   /// This prevents it from being mistakenly removed by other optimizations.
207258725c12SJoseph Huber   struct ExternalizationRAII {
ExternalizationRAII__anon23c38c770111::OpenMPOpt::ExternalizationRAII207358725c12SJoseph Huber     ExternalizationRAII(OMPInformationCache &OMPInfoCache,
207458725c12SJoseph Huber                         RuntimeFunction RFKind)
2075e757a3b0SJoseph Huber         : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
207658725c12SJoseph Huber       if (!Declaration)
207758725c12SJoseph Huber         return;
207858725c12SJoseph Huber 
207958725c12SJoseph Huber       LinkageType = Declaration->getLinkage();
208058725c12SJoseph Huber       Declaration->setLinkage(GlobalValue::ExternalLinkage);
208158725c12SJoseph Huber     }
208258725c12SJoseph Huber 
~ExternalizationRAII__anon23c38c770111::OpenMPOpt::ExternalizationRAII208358725c12SJoseph Huber     ~ExternalizationRAII() {
208458725c12SJoseph Huber       if (!Declaration)
208558725c12SJoseph Huber         return;
208658725c12SJoseph Huber 
208758725c12SJoseph Huber       Declaration->setLinkage(LinkageType);
208858725c12SJoseph Huber     }
208958725c12SJoseph Huber 
209058725c12SJoseph Huber     Function *Declaration;
209158725c12SJoseph Huber     GlobalValue::LinkageTypes LinkageType;
209258725c12SJoseph Huber   };
209358725c12SJoseph Huber 
2094b726c557SJohannes Doerfert   /// The underlying module.
20959548b74aSJohannes Doerfert   Module &M;
20969548b74aSJohannes Doerfert 
20979548b74aSJohannes Doerfert   /// The SCC we are operating on.
2098ee17263aSJohannes Doerfert   SmallVectorImpl<Function *> &SCC;
20999548b74aSJohannes Doerfert 
21009548b74aSJohannes Doerfert   /// Callback to update the call graph, the first argument is a removed call,
21019548b74aSJohannes Doerfert   /// the second an optional replacement call.
21029548b74aSJohannes Doerfert   CallGraphUpdater &CGUpdater;
21039548b74aSJohannes Doerfert 
21044d4ea9acSHuber, Joseph   /// Callback to get an OptimizationRemarkEmitter from a Function *
21054d4ea9acSHuber, Joseph   OptimizationRemarkGetter OREGetter;
21064d4ea9acSHuber, Joseph 
21077cfd267cSsstefan1   /// OpenMP-specific information cache. Also Used for Attributor runs.
21087cfd267cSsstefan1   OMPInformationCache &OMPInfoCache;
2109b8235d2bSsstefan1 
2110b8235d2bSsstefan1   /// Attributor instance.
2111b8235d2bSsstefan1   Attributor &A;
2112b8235d2bSsstefan1 
2113b8235d2bSsstefan1   /// Helper function to run Attributor on SCC.
runAttributor__anon23c38c770111::OpenMPOpt2114d9659bf6SJohannes Doerfert   bool runAttributor(bool IsModulePass) {
2115b8235d2bSsstefan1     if (SCC.empty())
2116b8235d2bSsstefan1       return false;
2117b8235d2bSsstefan1 
211858725c12SJoseph Huber     // Temporarily make these function have external linkage so the Attributor
211958725c12SJoseph Huber     // doesn't remove them when we try to look them up later.
212058725c12SJoseph Huber     ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
212158725c12SJoseph Huber     ExternalizationRAII EndParallel(OMPInfoCache,
212258725c12SJoseph Huber                                     OMPRTL___kmpc_kernel_end_parallel);
212358725c12SJoseph Huber     ExternalizationRAII BarrierSPMD(OMPInfoCache,
212458725c12SJoseph Huber                                     OMPRTL___kmpc_barrier_simple_spmd);
212573720c80SJohannes Doerfert     ExternalizationRAII BarrierGeneric(OMPInfoCache,
212673720c80SJohannes Doerfert                                        OMPRTL___kmpc_barrier_simple_generic);
21271cf86df8SJoseph Huber     ExternalizationRAII ThreadId(OMPInfoCache,
21281cf86df8SJoseph Huber                                  OMPRTL___kmpc_get_hardware_thread_id_in_block);
212974cacf21SJoseph Huber     ExternalizationRAII NumThreads(
213074cacf21SJoseph Huber         OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
21317986a5f2SJoseph Huber     ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
213258725c12SJoseph Huber 
2133d9659bf6SJohannes Doerfert     registerAAs(IsModulePass);
2134b8235d2bSsstefan1 
2135b8235d2bSsstefan1     ChangeStatus Changed = A.run();
2136b8235d2bSsstefan1 
2137b8235d2bSsstefan1     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
2138b8235d2bSsstefan1                       << " functions, result: " << Changed << ".\n");
2139b8235d2bSsstefan1 
2140b8235d2bSsstefan1     return Changed == ChangeStatus::CHANGED;
2141b8235d2bSsstefan1   }
2142b8235d2bSsstefan1 
21435ab6aeddSJose M Monsalve Diaz   void registerFoldRuntimeCall(RuntimeFunction RF);
21445ab6aeddSJose M Monsalve Diaz 
2145b8235d2bSsstefan1   /// Populate the Attributor with abstract attribute opportunities in the
2146b8235d2bSsstefan1   /// function.
2147d9659bf6SJohannes Doerfert   void registerAAs(bool IsModulePass);
2148b8235d2bSsstefan1 };
2149b8235d2bSsstefan1 
getUniqueKernelFor(Function & F)21505b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
21515b0581aeSJohannes Doerfert   if (!OMPInfoCache.ModuleSlice.count(&F))
21525b0581aeSJohannes Doerfert     return nullptr;
21535b0581aeSJohannes Doerfert 
21545b0581aeSJohannes Doerfert   // Use a scope to keep the lifetime of the CachedKernel short.
21555b0581aeSJohannes Doerfert   {
21565b0581aeSJohannes Doerfert     Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
21575b0581aeSJohannes Doerfert     if (CachedKernel)
21585b0581aeSJohannes Doerfert       return *CachedKernel;
21595b0581aeSJohannes Doerfert 
21605b0581aeSJohannes Doerfert     // TODO: We should use an AA to create an (optimistic and callback
21615b0581aeSJohannes Doerfert     //       call-aware) call graph. For now we stick to simple patterns that
21625b0581aeSJohannes Doerfert     //       are less powerful, basically the worst fixpoint.
21635b0581aeSJohannes Doerfert     if (isKernel(F)) {
21645b0581aeSJohannes Doerfert       CachedKernel = Kernel(&F);
21655b0581aeSJohannes Doerfert       return *CachedKernel;
21665b0581aeSJohannes Doerfert     }
21675b0581aeSJohannes Doerfert 
21685b0581aeSJohannes Doerfert     CachedKernel = nullptr;
2169994bb6ebSJohannes Doerfert     if (!F.hasLocalLinkage()) {
2170994bb6ebSJohannes Doerfert 
2171994bb6ebSJohannes Doerfert       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
21722db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2173eef6601bSJoseph Huber         return ORA << "Potentially unknown OpenMP target region caller.";
2174994bb6ebSJohannes Doerfert       };
21752db182ffSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
2176994bb6ebSJohannes Doerfert 
21775b0581aeSJohannes Doerfert       return nullptr;
21785b0581aeSJohannes Doerfert     }
2179994bb6ebSJohannes Doerfert   }
21805b0581aeSJohannes Doerfert 
21815b0581aeSJohannes Doerfert   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
21825b0581aeSJohannes Doerfert     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
21835b0581aeSJohannes Doerfert       // Allow use in equality comparisons.
21845b0581aeSJohannes Doerfert       if (Cmp->isEquality())
21855b0581aeSJohannes Doerfert         return getUniqueKernelFor(*Cmp);
21865b0581aeSJohannes Doerfert       return nullptr;
21875b0581aeSJohannes Doerfert     }
21885b0581aeSJohannes Doerfert     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
21895b0581aeSJohannes Doerfert       // Allow direct calls.
21905b0581aeSJohannes Doerfert       if (CB->isCallee(&U))
21915b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
2192a2dbfb6bSGiorgis Georgakoudis 
2193a2dbfb6bSGiorgis Georgakoudis       OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2194a2dbfb6bSGiorgis Georgakoudis           OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2195a2dbfb6bSGiorgis Georgakoudis       // Allow the use in __kmpc_parallel_51 calls.
2196a2dbfb6bSGiorgis Georgakoudis       if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
21975b0581aeSJohannes Doerfert         return getUniqueKernelFor(*CB);
21985b0581aeSJohannes Doerfert       return nullptr;
21995b0581aeSJohannes Doerfert     }
22005b0581aeSJohannes Doerfert     // Disallow every other use.
22015b0581aeSJohannes Doerfert     return nullptr;
22025b0581aeSJohannes Doerfert   };
22035b0581aeSJohannes Doerfert 
22045b0581aeSJohannes Doerfert   // TODO: In the future we want to track more than just a unique kernel.
22055b0581aeSJohannes Doerfert   SmallPtrSet<Kernel, 2> PotentialKernels;
22068d8ce85bSsstefan1   OMPInformationCache::foreachUse(F, [&](const Use &U) {
22075b0581aeSJohannes Doerfert     PotentialKernels.insert(GetUniqueKernelForUse(U));
22085b0581aeSJohannes Doerfert   });
22095b0581aeSJohannes Doerfert 
22105b0581aeSJohannes Doerfert   Kernel K = nullptr;
22115b0581aeSJohannes Doerfert   if (PotentialKernels.size() == 1)
22125b0581aeSJohannes Doerfert     K = *PotentialKernels.begin();
22135b0581aeSJohannes Doerfert 
22145b0581aeSJohannes Doerfert   // Cache the result.
22155b0581aeSJohannes Doerfert   UniqueKernelMap[&F] = K;
22165b0581aeSJohannes Doerfert 
22175b0581aeSJohannes Doerfert   return K;
22185b0581aeSJohannes Doerfert }
22195b0581aeSJohannes Doerfert 
rewriteDeviceCodeStateMachine()22205b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
2221a2dbfb6bSGiorgis Georgakoudis   OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2222a2dbfb6bSGiorgis Georgakoudis       OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
22235b0581aeSJohannes Doerfert 
22245b0581aeSJohannes Doerfert   bool Changed = false;
2225a2dbfb6bSGiorgis Georgakoudis   if (!KernelParallelRFI)
22265b0581aeSJohannes Doerfert     return Changed;
22275b0581aeSJohannes Doerfert 
2228cd0dd8ecSJoseph Huber   // If we have disabled state machine changes, exit
2229cd0dd8ecSJoseph Huber   if (DisableOpenMPOptStateMachineRewrite)
2230cd0dd8ecSJoseph Huber     return Changed;
2231cd0dd8ecSJoseph Huber 
22325b0581aeSJohannes Doerfert   for (Function *F : SCC) {
22335b0581aeSJohannes Doerfert 
2234a2dbfb6bSGiorgis Georgakoudis     // Check if the function is a use in a __kmpc_parallel_51 call at
22355b0581aeSJohannes Doerfert     // all.
22365b0581aeSJohannes Doerfert     bool UnknownUse = false;
2237a2dbfb6bSGiorgis Georgakoudis     bool KernelParallelUse = false;
22385b0581aeSJohannes Doerfert     unsigned NumDirectCalls = 0;
22395b0581aeSJohannes Doerfert 
22405b0581aeSJohannes Doerfert     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
22418d8ce85bSsstefan1     OMPInformationCache::foreachUse(*F, [&](Use &U) {
22425b0581aeSJohannes Doerfert       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
22435b0581aeSJohannes Doerfert         if (CB->isCallee(&U)) {
22445b0581aeSJohannes Doerfert           ++NumDirectCalls;
22455b0581aeSJohannes Doerfert           return;
22465b0581aeSJohannes Doerfert         }
22475b0581aeSJohannes Doerfert 
224881db6144SMichael Liao       if (isa<ICmpInst>(U.getUser())) {
22495b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
22505b0581aeSJohannes Doerfert         return;
22515b0581aeSJohannes Doerfert       }
2252a2dbfb6bSGiorgis Georgakoudis 
2253a2dbfb6bSGiorgis Georgakoudis       // Find wrapper functions that represent parallel kernels.
2254a2dbfb6bSGiorgis Georgakoudis       CallInst *CI =
2255a2dbfb6bSGiorgis Georgakoudis           OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
2256a2dbfb6bSGiorgis Georgakoudis       const unsigned int WrapperFunctionArgNo = 6;
2257a2dbfb6bSGiorgis Georgakoudis       if (!KernelParallelUse && CI &&
2258a2dbfb6bSGiorgis Georgakoudis           CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
2259a2dbfb6bSGiorgis Georgakoudis         KernelParallelUse = true;
22605b0581aeSJohannes Doerfert         ToBeReplacedStateMachineUses.push_back(&U);
22615b0581aeSJohannes Doerfert         return;
22625b0581aeSJohannes Doerfert       }
22635b0581aeSJohannes Doerfert       UnknownUse = true;
22645b0581aeSJohannes Doerfert     });
22655b0581aeSJohannes Doerfert 
2266a2dbfb6bSGiorgis Georgakoudis     // Do not emit a remark if we haven't seen a __kmpc_parallel_51
2267fec1f210SJohannes Doerfert     // use.
2268a2dbfb6bSGiorgis Georgakoudis     if (!KernelParallelUse)
22695b0581aeSJohannes Doerfert       continue;
22705b0581aeSJohannes Doerfert 
2271fec1f210SJohannes Doerfert     // If this ever hits, we should investigate.
2272fec1f210SJohannes Doerfert     // TODO: Checking the number of uses is not a necessary restriction and
2273fec1f210SJohannes Doerfert     // should be lifted.
2274fec1f210SJohannes Doerfert     if (UnknownUse || NumDirectCalls != 1 ||
2275d9659bf6SJohannes Doerfert         ToBeReplacedStateMachineUses.size() > 2) {
22762db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
22772db182ffSJoseph Huber         return ORA << "Parallel region is used in "
2278fec1f210SJohannes Doerfert                    << (UnknownUse ? "unknown" : "unexpected")
2279eef6601bSJoseph Huber                    << " ways. Will not attempt to rewrite the state machine.";
2280fec1f210SJohannes Doerfert       };
22812c31d5ebSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
22825b0581aeSJohannes Doerfert       continue;
2283fec1f210SJohannes Doerfert     }
22845b0581aeSJohannes Doerfert 
2285a2dbfb6bSGiorgis Georgakoudis     // Even if we have __kmpc_parallel_51 calls, we (for now) give
22865b0581aeSJohannes Doerfert     // up if the function is not called from a unique kernel.
22875b0581aeSJohannes Doerfert     Kernel K = getUniqueKernelFor(*F);
2288fec1f210SJohannes Doerfert     if (!K) {
22892db182ffSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2290eef6601bSJoseph Huber         return ORA << "Parallel region is not called from a unique kernel. "
2291eef6601bSJoseph Huber                       "Will not attempt to rewrite the state machine.";
2292fec1f210SJohannes Doerfert       };
22932c31d5ebSJoseph Huber       emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
22945b0581aeSJohannes Doerfert       continue;
2295fec1f210SJohannes Doerfert     }
22965b0581aeSJohannes Doerfert 
22975b0581aeSJohannes Doerfert     // We now know F is a parallel body function called only from the kernel K.
22985b0581aeSJohannes Doerfert     // We also identified the state machine uses in which we replace the
22995b0581aeSJohannes Doerfert     // function pointer by a new global symbol for identification purposes. This
23005b0581aeSJohannes Doerfert     // ensures only direct calls to the function are left.
23015b0581aeSJohannes Doerfert 
23025b0581aeSJohannes Doerfert     Module &M = *F->getParent();
23035b0581aeSJohannes Doerfert     Type *Int8Ty = Type::getInt8Ty(M.getContext());
23045b0581aeSJohannes Doerfert 
23055b0581aeSJohannes Doerfert     auto *ID = new GlobalVariable(
23065b0581aeSJohannes Doerfert         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
23075b0581aeSJohannes Doerfert         UndefValue::get(Int8Ty), F->getName() + ".ID");
23085b0581aeSJohannes Doerfert 
23095b0581aeSJohannes Doerfert     for (Use *U : ToBeReplacedStateMachineUses)
231071052ea1SJon Chesterfield       U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
231171052ea1SJon Chesterfield           ID, U->get()->getType()));
23125b0581aeSJohannes Doerfert 
23135b0581aeSJohannes Doerfert     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
23145b0581aeSJohannes Doerfert 
23155b0581aeSJohannes Doerfert     Changed = true;
23165b0581aeSJohannes Doerfert   }
23175b0581aeSJohannes Doerfert 
23185b0581aeSJohannes Doerfert   return Changed;
23195b0581aeSJohannes Doerfert }
23205b0581aeSJohannes Doerfert 
2321b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
2322b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2323b8235d2bSsstefan1   using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker__anon23c38c770111::AAICVTracker2324b8235d2bSsstefan1   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2325b8235d2bSsstefan1 
initialize__anon23c38c770111::AAICVTracker23265dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
23275dfd7cc4Ssstefan1     Function *F = getAnchorScope();
23285dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
23295dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
23305dfd7cc4Ssstefan1   }
23315dfd7cc4Ssstefan1 
2332b8235d2bSsstefan1   /// Returns true if value is assumed to be tracked.
isAssumedTracked__anon23c38c770111::AAICVTracker2333b8235d2bSsstefan1   bool isAssumedTracked() const { return getAssumed(); }
2334b8235d2bSsstefan1 
2335b8235d2bSsstefan1   /// Returns true if value is known to be tracked.
isKnownTracked__anon23c38c770111::AAICVTracker2336b8235d2bSsstefan1   bool isKnownTracked() const { return getAssumed(); }
2337b8235d2bSsstefan1 
2338b8235d2bSsstefan1   /// Create an abstract attribute biew for the position \p IRP.
2339b8235d2bSsstefan1   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2340b8235d2bSsstefan1 
2341b8235d2bSsstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon23c38c770111::AAICVTracker23425dfd7cc4Ssstefan1   virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
23435dfd7cc4Ssstefan1                                                 const Instruction *I,
23445dfd7cc4Ssstefan1                                                 Attributor &A) const {
23455dfd7cc4Ssstefan1     return None;
23465dfd7cc4Ssstefan1   }
23475dfd7cc4Ssstefan1 
23485dfd7cc4Ssstefan1   /// Return an assumed unique ICV value if a single candidate is found. If
23495dfd7cc4Ssstefan1   /// there cannot be one, return a nullptr. If it is not clear yet, return the
23505dfd7cc4Ssstefan1   /// Optional::NoneType.
23515dfd7cc4Ssstefan1   virtual Optional<Value *>
23525dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
23535dfd7cc4Ssstefan1 
23545dfd7cc4Ssstefan1   // Currently only nthreads is being tracked.
23555dfd7cc4Ssstefan1   // this array will only grow with time.
23565dfd7cc4Ssstefan1   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2357b8235d2bSsstefan1 
2358b8235d2bSsstefan1   /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAICVTracker2359b8235d2bSsstefan1   const std::string getName() const override { return "AAICVTracker"; }
2360b8235d2bSsstefan1 
2361233af895SLuofan Chen   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAICVTracker2362233af895SLuofan Chen   const char *getIdAddr() const override { return &ID; }
2363233af895SLuofan Chen 
2364233af895SLuofan Chen   /// This function should return true if the type of the \p AA is AAICVTracker
classof__anon23c38c770111::AAICVTracker2365233af895SLuofan Chen   static bool classof(const AbstractAttribute *AA) {
2366233af895SLuofan Chen     return (AA->getIdAddr() == &ID);
2367233af895SLuofan Chen   }
2368233af895SLuofan Chen 
2369b8235d2bSsstefan1   static const char ID;
2370b8235d2bSsstefan1 };
2371b8235d2bSsstefan1 
2372b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction__anon23c38c770111::AAICVTrackerFunction2373b8235d2bSsstefan1   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2374b8235d2bSsstefan1       : AAICVTracker(IRP, A) {}
2375b8235d2bSsstefan1 
2376b8235d2bSsstefan1   // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerFunction23775dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerFunction"; }
2378b8235d2bSsstefan1 
2379b8235d2bSsstefan1   // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerFunction2380b8235d2bSsstefan1   void trackStatistics() const override {}
2381b8235d2bSsstefan1 
23825dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerFunction2383b8235d2bSsstefan1   ChangeStatus manifest(Attributor &A) override {
23845dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
2385b8235d2bSsstefan1   }
2386b8235d2bSsstefan1 
2387b8235d2bSsstefan1   // Map of ICV to their values at specific program point.
23885dfd7cc4Ssstefan1   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2389b8235d2bSsstefan1                   InternalControlVar::ICV___last>
23905dfd7cc4Ssstefan1       ICVReplacementValuesMap;
2391b8235d2bSsstefan1 
updateImpl__anon23c38c770111::AAICVTrackerFunction2392b8235d2bSsstefan1   ChangeStatus updateImpl(Attributor &A) override {
2393b8235d2bSsstefan1     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2394b8235d2bSsstefan1 
2395b8235d2bSsstefan1     Function *F = getAnchorScope();
2396b8235d2bSsstefan1 
2397b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2398b8235d2bSsstefan1 
2399b8235d2bSsstefan1     for (InternalControlVar ICV : TrackableICVs) {
2400b8235d2bSsstefan1       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2401b8235d2bSsstefan1 
24025dfd7cc4Ssstefan1       auto &ValuesMap = ICVReplacementValuesMap[ICV];
2403b8235d2bSsstefan1       auto TrackValues = [&](Use &U, Function &) {
2404b8235d2bSsstefan1         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2405b8235d2bSsstefan1         if (!CI)
2406b8235d2bSsstefan1           return false;
2407b8235d2bSsstefan1 
2408b8235d2bSsstefan1         // FIXME: handle setters with more that 1 arguments.
2409b8235d2bSsstefan1         /// Track new value.
24105dfd7cc4Ssstefan1         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2411b8235d2bSsstefan1           HasChanged = ChangeStatus::CHANGED;
2412b8235d2bSsstefan1 
2413b8235d2bSsstefan1         return false;
2414b8235d2bSsstefan1       };
2415b8235d2bSsstefan1 
24165dfd7cc4Ssstefan1       auto CallCheck = [&](Instruction &I) {
2417b4a75598SJohannes Doerfert         Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
2418ad7ce1e7SKazu Hirata         if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
24195dfd7cc4Ssstefan1           HasChanged = ChangeStatus::CHANGED;
24205dfd7cc4Ssstefan1 
24215dfd7cc4Ssstefan1         return true;
24225dfd7cc4Ssstefan1       };
24235dfd7cc4Ssstefan1 
24245dfd7cc4Ssstefan1       // Track all changes of an ICV.
2425b8235d2bSsstefan1       SetterRFI.foreachUse(TrackValues, F);
24265dfd7cc4Ssstefan1 
2427792aac98SJohannes Doerfert       bool UsedAssumedInformation = false;
24285dfd7cc4Ssstefan1       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2429792aac98SJohannes Doerfert                                 UsedAssumedInformation,
24305dfd7cc4Ssstefan1                                 /* CheckBBLivenessOnly */ true);
24315dfd7cc4Ssstefan1 
24325dfd7cc4Ssstefan1       /// TODO: Figure out a way to avoid adding entry in
24335dfd7cc4Ssstefan1       /// ICVReplacementValuesMap
24345dfd7cc4Ssstefan1       Instruction *Entry = &F->getEntryBlock().front();
24355dfd7cc4Ssstefan1       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
24365dfd7cc4Ssstefan1         ValuesMap.insert(std::make_pair(Entry, nullptr));
2437b8235d2bSsstefan1     }
2438b8235d2bSsstefan1 
2439b8235d2bSsstefan1     return HasChanged;
2440b8235d2bSsstefan1   }
2441b8235d2bSsstefan1 
2442b4a75598SJohannes Doerfert   /// Helper to check if \p I is a call and get the value for it if it is
24435dfd7cc4Ssstefan1   /// unique.
getValueForCall__anon23c38c770111::AAICVTrackerFunction2444b4a75598SJohannes Doerfert   Optional<Value *> getValueForCall(Attributor &A, const Instruction &I,
24455dfd7cc4Ssstefan1                                     InternalControlVar &ICV) const {
2446b8235d2bSsstefan1 
2447b4a75598SJohannes Doerfert     const auto *CB = dyn_cast<CallBase>(&I);
2448dcaec812SJohannes Doerfert     if (!CB || CB->hasFnAttr("no_openmp") ||
2449dcaec812SJohannes Doerfert         CB->hasFnAttr("no_openmp_routines"))
24505dfd7cc4Ssstefan1       return None;
24515dfd7cc4Ssstefan1 
2452b8235d2bSsstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2453b8235d2bSsstefan1     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
24545dfd7cc4Ssstefan1     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
24555dfd7cc4Ssstefan1     Function *CalledFunction = CB->getCalledFunction();
2456b8235d2bSsstefan1 
24574eef14f9SWei Wang     // Indirect call, assume ICV changes.
24584eef14f9SWei Wang     if (CalledFunction == nullptr)
24594eef14f9SWei Wang       return nullptr;
24605dfd7cc4Ssstefan1     if (CalledFunction == GetterRFI.Declaration)
24615dfd7cc4Ssstefan1       return None;
24625dfd7cc4Ssstefan1     if (CalledFunction == SetterRFI.Declaration) {
2463b4a75598SJohannes Doerfert       if (ICVReplacementValuesMap[ICV].count(&I))
2464b4a75598SJohannes Doerfert         return ICVReplacementValuesMap[ICV].lookup(&I);
24655dfd7cc4Ssstefan1 
24665dfd7cc4Ssstefan1       return nullptr;
24675dfd7cc4Ssstefan1     }
24685dfd7cc4Ssstefan1 
24695dfd7cc4Ssstefan1     // Since we don't know, assume it changes the ICV.
24705dfd7cc4Ssstefan1     if (CalledFunction->isDeclaration())
24715dfd7cc4Ssstefan1       return nullptr;
24725dfd7cc4Ssstefan1 
24735b70c12fSJohannes Doerfert     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
24745b70c12fSJohannes Doerfert         *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
24755dfd7cc4Ssstefan1 
2476b4a75598SJohannes Doerfert     if (ICVTrackingAA.isAssumedTracked()) {
2477b4a75598SJohannes Doerfert       Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
2478481b8f31SJohannes Doerfert       if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
2479481b8f31SJohannes Doerfert                                                  OMPInfoCache)))
2480b4a75598SJohannes Doerfert         return URV;
2481b4a75598SJohannes Doerfert     }
24825dfd7cc4Ssstefan1 
24835dfd7cc4Ssstefan1     // If we don't know, assume it changes.
24845dfd7cc4Ssstefan1     return nullptr;
24855dfd7cc4Ssstefan1   }
24865dfd7cc4Ssstefan1 
24875dfd7cc4Ssstefan1   // We don't check unique value for a function, so return None.
24885dfd7cc4Ssstefan1   Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerFunction24895dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
24905dfd7cc4Ssstefan1     return None;
24915dfd7cc4Ssstefan1   }
24925dfd7cc4Ssstefan1 
24935dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon23c38c770111::AAICVTrackerFunction24945dfd7cc4Ssstefan1   Optional<Value *> getReplacementValue(InternalControlVar ICV,
24955dfd7cc4Ssstefan1                                         const Instruction *I,
24965dfd7cc4Ssstefan1                                         Attributor &A) const override {
24975dfd7cc4Ssstefan1     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
24985dfd7cc4Ssstefan1     if (ValuesMap.count(I))
24995dfd7cc4Ssstefan1       return ValuesMap.lookup(I);
25005dfd7cc4Ssstefan1 
25015dfd7cc4Ssstefan1     SmallVector<const Instruction *, 16> Worklist;
25025dfd7cc4Ssstefan1     SmallPtrSet<const Instruction *, 16> Visited;
25035dfd7cc4Ssstefan1     Worklist.push_back(I);
25045dfd7cc4Ssstefan1 
25055dfd7cc4Ssstefan1     Optional<Value *> ReplVal;
25065dfd7cc4Ssstefan1 
25075dfd7cc4Ssstefan1     while (!Worklist.empty()) {
25085dfd7cc4Ssstefan1       const Instruction *CurrInst = Worklist.pop_back_val();
25095dfd7cc4Ssstefan1       if (!Visited.insert(CurrInst).second)
2510b8235d2bSsstefan1         continue;
2511b8235d2bSsstefan1 
25125dfd7cc4Ssstefan1       const BasicBlock *CurrBB = CurrInst->getParent();
25135dfd7cc4Ssstefan1 
25145dfd7cc4Ssstefan1       // Go up and look for all potential setters/calls that might change the
25155dfd7cc4Ssstefan1       // ICV.
25165dfd7cc4Ssstefan1       while ((CurrInst = CurrInst->getPrevNode())) {
25175dfd7cc4Ssstefan1         if (ValuesMap.count(CurrInst)) {
25185dfd7cc4Ssstefan1           Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
25195dfd7cc4Ssstefan1           // Unknown value, track new.
2520a7938c74SKazu Hirata           if (!ReplVal) {
25215dfd7cc4Ssstefan1             ReplVal = NewReplVal;
25225dfd7cc4Ssstefan1             break;
25235dfd7cc4Ssstefan1           }
25245dfd7cc4Ssstefan1 
25255dfd7cc4Ssstefan1           // If we found a new value, we can't know the icv value anymore.
2526a7938c74SKazu Hirata           if (NewReplVal)
25275dfd7cc4Ssstefan1             if (ReplVal != NewReplVal)
2528b8235d2bSsstefan1               return nullptr;
2529b8235d2bSsstefan1 
25305dfd7cc4Ssstefan1           break;
2531b8235d2bSsstefan1         }
2532b8235d2bSsstefan1 
2533b4a75598SJohannes Doerfert         Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
2534a7938c74SKazu Hirata         if (!NewReplVal)
25355dfd7cc4Ssstefan1           continue;
25365dfd7cc4Ssstefan1 
25375dfd7cc4Ssstefan1         // Unknown value, track new.
2538a7938c74SKazu Hirata         if (!ReplVal) {
25395dfd7cc4Ssstefan1           ReplVal = NewReplVal;
25405dfd7cc4Ssstefan1           break;
2541b8235d2bSsstefan1         }
2542b8235d2bSsstefan1 
25435dfd7cc4Ssstefan1         // if (NewReplVal.hasValue())
25445dfd7cc4Ssstefan1         // We found a new value, we can't know the icv value anymore.
25455dfd7cc4Ssstefan1         if (ReplVal != NewReplVal)
2546b8235d2bSsstefan1           return nullptr;
2547b8235d2bSsstefan1       }
25485dfd7cc4Ssstefan1 
25495dfd7cc4Ssstefan1       // If we are in the same BB and we have a value, we are done.
2550e0e687a6SKazu Hirata       if (CurrBB == I->getParent() && ReplVal)
25515dfd7cc4Ssstefan1         return ReplVal;
25525dfd7cc4Ssstefan1 
25535dfd7cc4Ssstefan1       // Go through all predecessors and add terminators for analysis.
25545dfd7cc4Ssstefan1       for (const BasicBlock *Pred : predecessors(CurrBB))
25555dfd7cc4Ssstefan1         if (const Instruction *Terminator = Pred->getTerminator())
25565dfd7cc4Ssstefan1           Worklist.push_back(Terminator);
25575dfd7cc4Ssstefan1     }
25585dfd7cc4Ssstefan1 
25595dfd7cc4Ssstefan1     return ReplVal;
25605dfd7cc4Ssstefan1   }
25615dfd7cc4Ssstefan1 };
25625dfd7cc4Ssstefan1 
25635dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned__anon23c38c770111::AAICVTrackerFunctionReturned25645dfd7cc4Ssstefan1   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
25655dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
25665dfd7cc4Ssstefan1 
25675dfd7cc4Ssstefan1   // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerFunctionReturned25685dfd7cc4Ssstefan1   const std::string getAsStr() const override {
25695dfd7cc4Ssstefan1     return "ICVTrackerFunctionReturned";
25705dfd7cc4Ssstefan1   }
25715dfd7cc4Ssstefan1 
25725dfd7cc4Ssstefan1   // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerFunctionReturned25735dfd7cc4Ssstefan1   void trackStatistics() const override {}
25745dfd7cc4Ssstefan1 
25755dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerFunctionReturned25765dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
25775dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
25785dfd7cc4Ssstefan1   }
25795dfd7cc4Ssstefan1 
25805dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
25815dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
25825dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
25835dfd7cc4Ssstefan1       ICVReplacementValuesMap;
25845dfd7cc4Ssstefan1 
25855dfd7cc4Ssstefan1   /// Return the value with which \p I can be replaced for specific \p ICV.
25865dfd7cc4Ssstefan1   Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerFunctionReturned25875dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
25885dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
25895dfd7cc4Ssstefan1   }
25905dfd7cc4Ssstefan1 
updateImpl__anon23c38c770111::AAICVTrackerFunctionReturned25915dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
25925dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
25935dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
25945b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
25955dfd7cc4Ssstefan1 
25965dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
25975dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
25985dfd7cc4Ssstefan1 
25995dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
26005dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
26015dfd7cc4Ssstefan1       Optional<Value *> UniqueICVValue;
26025dfd7cc4Ssstefan1 
26035dfd7cc4Ssstefan1       auto CheckReturnInst = [&](Instruction &I) {
26045dfd7cc4Ssstefan1         Optional<Value *> NewReplVal =
26055dfd7cc4Ssstefan1             ICVTrackingAA.getReplacementValue(ICV, &I, A);
26065dfd7cc4Ssstefan1 
26075dfd7cc4Ssstefan1         // If we found a second ICV value there is no unique returned value.
2608e0e687a6SKazu Hirata         if (UniqueICVValue && UniqueICVValue != NewReplVal)
26095dfd7cc4Ssstefan1           return false;
26105dfd7cc4Ssstefan1 
26115dfd7cc4Ssstefan1         UniqueICVValue = NewReplVal;
26125dfd7cc4Ssstefan1 
26135dfd7cc4Ssstefan1         return true;
26145dfd7cc4Ssstefan1       };
26155dfd7cc4Ssstefan1 
2616792aac98SJohannes Doerfert       bool UsedAssumedInformation = false;
26175dfd7cc4Ssstefan1       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2618792aac98SJohannes Doerfert                                      UsedAssumedInformation,
26195dfd7cc4Ssstefan1                                      /* CheckBBLivenessOnly */ true))
26205dfd7cc4Ssstefan1         UniqueICVValue = nullptr;
26215dfd7cc4Ssstefan1 
26225dfd7cc4Ssstefan1       if (UniqueICVValue == ReplVal)
26235dfd7cc4Ssstefan1         continue;
26245dfd7cc4Ssstefan1 
26255dfd7cc4Ssstefan1       ReplVal = UniqueICVValue;
26265dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
26275dfd7cc4Ssstefan1     }
26285dfd7cc4Ssstefan1 
26295dfd7cc4Ssstefan1     return Changed;
26305dfd7cc4Ssstefan1   }
26315dfd7cc4Ssstefan1 };
26325dfd7cc4Ssstefan1 
26335dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite__anon23c38c770111::AAICVTrackerCallSite26345dfd7cc4Ssstefan1   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
26355dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
26365dfd7cc4Ssstefan1 
initialize__anon23c38c770111::AAICVTrackerCallSite26375dfd7cc4Ssstefan1   void initialize(Attributor &A) override {
26385dfd7cc4Ssstefan1     Function *F = getAnchorScope();
26395dfd7cc4Ssstefan1     if (!F || !A.isFunctionIPOAmendable(*F))
26405dfd7cc4Ssstefan1       indicatePessimisticFixpoint();
26415dfd7cc4Ssstefan1 
26425dfd7cc4Ssstefan1     // We only initialize this AA for getters, so we need to know which ICV it
26435dfd7cc4Ssstefan1     // gets.
26445dfd7cc4Ssstefan1     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
26455dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
26465dfd7cc4Ssstefan1       auto ICVInfo = OMPInfoCache.ICVs[ICV];
26475dfd7cc4Ssstefan1       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
26485dfd7cc4Ssstefan1       if (Getter.Declaration == getAssociatedFunction()) {
26495dfd7cc4Ssstefan1         AssociatedICV = ICVInfo.Kind;
26505dfd7cc4Ssstefan1         return;
26515dfd7cc4Ssstefan1       }
26525dfd7cc4Ssstefan1     }
26535dfd7cc4Ssstefan1 
26545dfd7cc4Ssstefan1     /// Unknown ICV.
26555dfd7cc4Ssstefan1     indicatePessimisticFixpoint();
26565dfd7cc4Ssstefan1   }
26575dfd7cc4Ssstefan1 
manifest__anon23c38c770111::AAICVTrackerCallSite26585dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
2659e0e687a6SKazu Hirata     if (!ReplVal || !*ReplVal)
26605dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
26615dfd7cc4Ssstefan1 
26627a07b88fSJohannes Doerfert     A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
26635dfd7cc4Ssstefan1     A.deleteAfterManifest(*getCtxI());
26645dfd7cc4Ssstefan1 
26655dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
26665dfd7cc4Ssstefan1   }
26675dfd7cc4Ssstefan1 
26685dfd7cc4Ssstefan1   // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerCallSite26695dfd7cc4Ssstefan1   const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
26705dfd7cc4Ssstefan1 
26715dfd7cc4Ssstefan1   // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerCallSite26725dfd7cc4Ssstefan1   void trackStatistics() const override {}
26735dfd7cc4Ssstefan1 
26745dfd7cc4Ssstefan1   InternalControlVar AssociatedICV;
26755dfd7cc4Ssstefan1   Optional<Value *> ReplVal;
26765dfd7cc4Ssstefan1 
updateImpl__anon23c38c770111::AAICVTrackerCallSite26775dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
26785dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
26795b70c12fSJohannes Doerfert         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
26805dfd7cc4Ssstefan1 
26815dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
26825dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
26835dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
26845dfd7cc4Ssstefan1 
26855dfd7cc4Ssstefan1     Optional<Value *> NewReplVal =
26865dfd7cc4Ssstefan1         ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
26875dfd7cc4Ssstefan1 
26885dfd7cc4Ssstefan1     if (ReplVal == NewReplVal)
26895dfd7cc4Ssstefan1       return ChangeStatus::UNCHANGED;
26905dfd7cc4Ssstefan1 
26915dfd7cc4Ssstefan1     ReplVal = NewReplVal;
26925dfd7cc4Ssstefan1     return ChangeStatus::CHANGED;
26935dfd7cc4Ssstefan1   }
26945dfd7cc4Ssstefan1 
26955dfd7cc4Ssstefan1   // Return the value with which associated value can be replaced for specific
26965dfd7cc4Ssstefan1   // \p ICV.
26975dfd7cc4Ssstefan1   Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerCallSite26985dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
26995dfd7cc4Ssstefan1     return ReplVal;
27005dfd7cc4Ssstefan1   }
27015dfd7cc4Ssstefan1 };
27025dfd7cc4Ssstefan1 
27035dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned__anon23c38c770111::AAICVTrackerCallSiteReturned27045dfd7cc4Ssstefan1   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
27055dfd7cc4Ssstefan1       : AAICVTracker(IRP, A) {}
27065dfd7cc4Ssstefan1 
27075dfd7cc4Ssstefan1   // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerCallSiteReturned27085dfd7cc4Ssstefan1   const std::string getAsStr() const override {
27095dfd7cc4Ssstefan1     return "ICVTrackerCallSiteReturned";
27105dfd7cc4Ssstefan1   }
27115dfd7cc4Ssstefan1 
27125dfd7cc4Ssstefan1   // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerCallSiteReturned27135dfd7cc4Ssstefan1   void trackStatistics() const override {}
27145dfd7cc4Ssstefan1 
27155dfd7cc4Ssstefan1   /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerCallSiteReturned27165dfd7cc4Ssstefan1   ChangeStatus manifest(Attributor &A) override {
27175dfd7cc4Ssstefan1     return ChangeStatus::UNCHANGED;
27185dfd7cc4Ssstefan1   }
27195dfd7cc4Ssstefan1 
27205dfd7cc4Ssstefan1   // Map of ICV to their values at specific program point.
27215dfd7cc4Ssstefan1   EnumeratedArray<Optional<Value *>, InternalControlVar,
27225dfd7cc4Ssstefan1                   InternalControlVar::ICV___last>
27235dfd7cc4Ssstefan1       ICVReplacementValuesMap;
27245dfd7cc4Ssstefan1 
27255dfd7cc4Ssstefan1   /// Return the value with which associated value can be replaced for specific
27265dfd7cc4Ssstefan1   /// \p ICV.
27275dfd7cc4Ssstefan1   Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerCallSiteReturned27285dfd7cc4Ssstefan1   getUniqueReplacementValue(InternalControlVar ICV) const override {
27295dfd7cc4Ssstefan1     return ICVReplacementValuesMap[ICV];
27305dfd7cc4Ssstefan1   }
27315dfd7cc4Ssstefan1 
updateImpl__anon23c38c770111::AAICVTrackerCallSiteReturned27325dfd7cc4Ssstefan1   ChangeStatus updateImpl(Attributor &A) override {
27335dfd7cc4Ssstefan1     ChangeStatus Changed = ChangeStatus::UNCHANGED;
27345dfd7cc4Ssstefan1     const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
27355b70c12fSJohannes Doerfert         *this, IRPosition::returned(*getAssociatedFunction()),
27365b70c12fSJohannes Doerfert         DepClassTy::REQUIRED);
27375dfd7cc4Ssstefan1 
27385dfd7cc4Ssstefan1     // We don't have any information, so we assume it changes the ICV.
27395dfd7cc4Ssstefan1     if (!ICVTrackingAA.isAssumedTracked())
27405dfd7cc4Ssstefan1       return indicatePessimisticFixpoint();
27415dfd7cc4Ssstefan1 
27425dfd7cc4Ssstefan1     for (InternalControlVar ICV : TrackableICVs) {
27435dfd7cc4Ssstefan1       Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
27445dfd7cc4Ssstefan1       Optional<Value *> NewReplVal =
27455dfd7cc4Ssstefan1           ICVTrackingAA.getUniqueReplacementValue(ICV);
27465dfd7cc4Ssstefan1 
27475dfd7cc4Ssstefan1       if (ReplVal == NewReplVal)
27485dfd7cc4Ssstefan1         continue;
27495dfd7cc4Ssstefan1 
27505dfd7cc4Ssstefan1       ReplVal = NewReplVal;
27515dfd7cc4Ssstefan1       Changed = ChangeStatus::CHANGED;
27525dfd7cc4Ssstefan1     }
27535dfd7cc4Ssstefan1     return Changed;
27545dfd7cc4Ssstefan1   }
27559548b74aSJohannes Doerfert };
275618283125SJoseph Huber 
275718283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction__anon23c38c770111::AAExecutionDomainFunction275818283125SJoseph Huber   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
275918283125SJoseph Huber       : AAExecutionDomain(IRP, A) {}
276018283125SJoseph Huber 
getAsStr__anon23c38c770111::AAExecutionDomainFunction276118283125SJoseph Huber   const std::string getAsStr() const override {
276218283125SJoseph Huber     return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
276318283125SJoseph Huber            "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
276418283125SJoseph Huber   }
276518283125SJoseph Huber 
276618283125SJoseph Huber   /// See AbstractAttribute::trackStatistics().
trackStatistics__anon23c38c770111::AAExecutionDomainFunction276718283125SJoseph Huber   void trackStatistics() const override {}
276818283125SJoseph Huber 
initialize__anon23c38c770111::AAExecutionDomainFunction276918283125SJoseph Huber   void initialize(Attributor &A) override {
277018283125SJoseph Huber     Function *F = getAnchorScope();
277118283125SJoseph Huber     for (const auto &BB : *F)
277218283125SJoseph Huber       SingleThreadedBBs.insert(&BB);
277318283125SJoseph Huber     NumBBs = SingleThreadedBBs.size();
277418283125SJoseph Huber   }
277518283125SJoseph Huber 
manifest__anon23c38c770111::AAExecutionDomainFunction277618283125SJoseph Huber   ChangeStatus manifest(Attributor &A) override {
277718283125SJoseph Huber     LLVM_DEBUG({
277818283125SJoseph Huber       for (const BasicBlock *BB : SingleThreadedBBs)
277918283125SJoseph Huber         dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
278018283125SJoseph Huber                << BB->getName() << " is executed by a single thread.\n";
278118283125SJoseph Huber     });
278218283125SJoseph Huber     return ChangeStatus::UNCHANGED;
278318283125SJoseph Huber   }
278418283125SJoseph Huber 
278518283125SJoseph Huber   ChangeStatus updateImpl(Attributor &A) override;
278618283125SJoseph Huber 
278718283125SJoseph Huber   /// Check if an instruction is executed by a single thread.
isExecutedByInitialThreadOnly__anon23c38c770111::AAExecutionDomainFunction27889a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
27899a23e673SJohannes Doerfert     return isExecutedByInitialThreadOnly(*I.getParent());
279018283125SJoseph Huber   }
279118283125SJoseph Huber 
isExecutedByInitialThreadOnly__anon23c38c770111::AAExecutionDomainFunction27929a23e673SJohannes Doerfert   bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
27931cfdcae6SJoseph Huber     return isValidState() && SingleThreadedBBs.contains(&BB);
279418283125SJoseph Huber   }
279518283125SJoseph Huber 
279618283125SJoseph Huber   /// Set of basic blocks that are executed by a single thread.
27979f04a0eaSJohannes Doerfert   SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
279818283125SJoseph Huber 
279918283125SJoseph Huber   /// Total number of basic blocks in this function.
280059a6b668SJohannes Doerfert   long unsigned NumBBs = 0;
280118283125SJoseph Huber };
280218283125SJoseph Huber 
updateImpl(Attributor & A)280318283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
280418283125SJoseph Huber   Function *F = getAnchorScope();
280518283125SJoseph Huber   ReversePostOrderTraversal<Function *> RPOT(F);
280618283125SJoseph Huber   auto NumSingleThreadedBBs = SingleThreadedBBs.size();
280718283125SJoseph Huber 
280818283125SJoseph Huber   bool AllCallSitesKnown;
280918283125SJoseph Huber   auto PredForCallSite = [&](AbstractCallSite ACS) {
281018283125SJoseph Huber     const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
281118283125SJoseph Huber         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
281218283125SJoseph Huber         DepClassTy::REQUIRED);
28131cfdcae6SJoseph Huber     return ACS.isDirectCall() &&
28141cfdcae6SJoseph Huber            ExecutionDomainAA.isExecutedByInitialThreadOnly(
28159a23e673SJohannes Doerfert                *ACS.getInstruction());
281618283125SJoseph Huber   };
281718283125SJoseph Huber 
281818283125SJoseph Huber   if (!A.checkForAllCallSites(PredForCallSite, *this,
281918283125SJoseph Huber                               /* RequiresAllCallSites */ true,
282018283125SJoseph Huber                               AllCallSitesKnown))
28219f04a0eaSJohannes Doerfert     SingleThreadedBBs.remove(&F->getEntryBlock());
282218283125SJoseph Huber 
2823e2cfbfccSJohannes Doerfert   auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2824e2cfbfccSJohannes Doerfert   auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2825e2cfbfccSJohannes Doerfert 
282627905eebSJoseph Huber   // Check if the edge into the successor block contains a condition that only
282727905eebSJoseph Huber   // lets the main thread execute it.
28286fc51c9fSJoseph Huber   auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
282918283125SJoseph Huber     if (!Edge || !Edge->isConditional())
283018283125SJoseph Huber       return false;
283118283125SJoseph Huber     if (Edge->getSuccessor(0) != SuccessorBB)
283218283125SJoseph Huber       return false;
283318283125SJoseph Huber 
283418283125SJoseph Huber     auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
283518283125SJoseph Huber     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
283618283125SJoseph Huber       return false;
283718283125SJoseph Huber 
283818283125SJoseph Huber     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2839e2cfbfccSJohannes Doerfert     if (!C)
284018283125SJoseph Huber       return false;
284118283125SJoseph Huber 
2842e2cfbfccSJohannes Doerfert     // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2843e2cfbfccSJohannes Doerfert     if (C->isAllOnesValue()) {
2844e2cfbfccSJohannes Doerfert       auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2845c4b1fe05SJohannes Doerfert       CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2846c4b1fe05SJohannes Doerfert       if (!CB)
2847e2cfbfccSJohannes Doerfert         return false;
2848423d34f7SShilei Tian       const int InitModeArgNo = 1;
2849423d34f7SShilei Tian       auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
2850423d34f7SShilei Tian       return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
2851e2cfbfccSJohannes Doerfert     }
285218283125SJoseph Huber 
285327905eebSJoseph Huber     if (C->isZero()) {
285427905eebSJoseph Huber       // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
285527905eebSJoseph Huber       if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
285627905eebSJoseph Huber         if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
285727905eebSJoseph Huber           return true;
285827905eebSJoseph Huber 
285927905eebSJoseph Huber       // Match: 0 == llvm.amdgcn.workitem.id.x()
286027905eebSJoseph Huber       if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
286127905eebSJoseph Huber         if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
286227905eebSJoseph Huber           return true;
286327905eebSJoseph Huber     }
286427905eebSJoseph Huber 
286518283125SJoseph Huber     return false;
286618283125SJoseph Huber   };
286718283125SJoseph Huber 
286818283125SJoseph Huber   // Merge all the predecessor states into the current basic block. A basic
286918283125SJoseph Huber   // block is executed by a single thread if all of its predecessors are.
287018283125SJoseph Huber   auto MergePredecessorStates = [&](BasicBlock *BB) {
287198007313SKazu Hirata     if (pred_empty(BB))
287218283125SJoseph Huber       return SingleThreadedBBs.contains(BB);
287318283125SJoseph Huber 
28746fc51c9fSJoseph Huber     bool IsInitialThread = true;
287598007313SKazu Hirata     for (BasicBlock *PredBB : predecessors(BB)) {
287698007313SKazu Hirata       if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
287718283125SJoseph Huber                                BB))
287898007313SKazu Hirata         IsInitialThread &= SingleThreadedBBs.contains(PredBB);
287918283125SJoseph Huber     }
288018283125SJoseph Huber 
28816fc51c9fSJoseph Huber     return IsInitialThread;
288218283125SJoseph Huber   };
288318283125SJoseph Huber 
288418283125SJoseph Huber   for (auto *BB : RPOT) {
288518283125SJoseph Huber     if (!MergePredecessorStates(BB))
28869f04a0eaSJohannes Doerfert       SingleThreadedBBs.remove(BB);
288718283125SJoseph Huber   }
288818283125SJoseph Huber 
288918283125SJoseph Huber   return (NumSingleThreadedBBs == SingleThreadedBBs.size())
289018283125SJoseph Huber              ? ChangeStatus::UNCHANGED
289118283125SJoseph Huber              : ChangeStatus::CHANGED;
289218283125SJoseph Huber }
289318283125SJoseph Huber 
28946fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a
28956fc51c9fSJoseph Huber /// static buffer of shared memory.
28966fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
28976fc51c9fSJoseph Huber   using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared__anon23c38c770111::AAHeapToShared28986fc51c9fSJoseph Huber   AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
28996fc51c9fSJoseph Huber 
29006fc51c9fSJoseph Huber   /// Create an abstract attribute view for the position \p IRP.
29016fc51c9fSJoseph Huber   static AAHeapToShared &createForPosition(const IRPosition &IRP,
29026fc51c9fSJoseph Huber                                            Attributor &A);
29036fc51c9fSJoseph Huber 
2904f8c40ed8SGiorgis Georgakoudis   /// Returns true if HeapToShared conversion is assumed to be possible.
2905f8c40ed8SGiorgis Georgakoudis   virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
2906f8c40ed8SGiorgis Georgakoudis 
2907f8c40ed8SGiorgis Georgakoudis   /// Returns true if HeapToShared conversion is assumed and the CB is a
2908f8c40ed8SGiorgis Georgakoudis   /// callsite to a free operation to be removed.
2909f8c40ed8SGiorgis Georgakoudis   virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
2910f8c40ed8SGiorgis Georgakoudis 
29116fc51c9fSJoseph Huber   /// See AbstractAttribute::getName().
getName__anon23c38c770111::AAHeapToShared29126fc51c9fSJoseph Huber   const std::string getName() const override { return "AAHeapToShared"; }
29136fc51c9fSJoseph Huber 
29146fc51c9fSJoseph Huber   /// See AbstractAttribute::getIdAddr().
getIdAddr__anon23c38c770111::AAHeapToShared29156fc51c9fSJoseph Huber   const char *getIdAddr() const override { return &ID; }
29166fc51c9fSJoseph Huber 
29176fc51c9fSJoseph Huber   /// This function should return true if the type of the \p AA is
29186fc51c9fSJoseph Huber   /// AAHeapToShared.
classof__anon23c38c770111::AAHeapToShared29196fc51c9fSJoseph Huber   static bool classof(const AbstractAttribute *AA) {
29206fc51c9fSJoseph Huber     return (AA->getIdAddr() == &ID);
29216fc51c9fSJoseph Huber   }
29226fc51c9fSJoseph Huber 
29236fc51c9fSJoseph Huber   /// Unique ID (due to the unique address)
29246fc51c9fSJoseph Huber   static const char ID;
29256fc51c9fSJoseph Huber };
29266fc51c9fSJoseph Huber 
29276fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction__anon23c38c770111::AAHeapToSharedFunction29286fc51c9fSJoseph Huber   AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
29296fc51c9fSJoseph Huber       : AAHeapToShared(IRP, A) {}
29306fc51c9fSJoseph Huber 
getAsStr__anon23c38c770111::AAHeapToSharedFunction29316fc51c9fSJoseph Huber   const std::string getAsStr() const override {
29326fc51c9fSJoseph Huber     return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
29336fc51c9fSJoseph Huber            " malloc calls eligible.";
29346fc51c9fSJoseph Huber   }
29356fc51c9fSJoseph Huber 
29366fc51c9fSJoseph Huber   /// See AbstractAttribute::trackStatistics().
trackStatistics__anon23c38c770111::AAHeapToSharedFunction29376fc51c9fSJoseph Huber   void trackStatistics() const override {}
29386fc51c9fSJoseph Huber 
2939f8c40ed8SGiorgis Georgakoudis   /// This functions finds free calls that will be removed by the
2940f8c40ed8SGiorgis Georgakoudis   /// HeapToShared transformation.
findPotentialRemovedFreeCalls__anon23c38c770111::AAHeapToSharedFunction2941f8c40ed8SGiorgis Georgakoudis   void findPotentialRemovedFreeCalls(Attributor &A) {
2942f8c40ed8SGiorgis Georgakoudis     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2943f8c40ed8SGiorgis Georgakoudis     auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2944f8c40ed8SGiorgis Georgakoudis 
2945f8c40ed8SGiorgis Georgakoudis     PotentialRemovedFreeCalls.clear();
2946f8c40ed8SGiorgis Georgakoudis     // Update free call users of found malloc calls.
2947f8c40ed8SGiorgis Georgakoudis     for (CallBase *CB : MallocCalls) {
2948f8c40ed8SGiorgis Georgakoudis       SmallVector<CallBase *, 4> FreeCalls;
2949f8c40ed8SGiorgis Georgakoudis       for (auto *U : CB->users()) {
2950f8c40ed8SGiorgis Georgakoudis         CallBase *C = dyn_cast<CallBase>(U);
2951f8c40ed8SGiorgis Georgakoudis         if (C && C->getCalledFunction() == FreeRFI.Declaration)
2952f8c40ed8SGiorgis Georgakoudis           FreeCalls.push_back(C);
2953f8c40ed8SGiorgis Georgakoudis       }
2954f8c40ed8SGiorgis Georgakoudis 
2955f8c40ed8SGiorgis Georgakoudis       if (FreeCalls.size() != 1)
2956f8c40ed8SGiorgis Georgakoudis         continue;
2957f8c40ed8SGiorgis Georgakoudis 
2958f8c40ed8SGiorgis Georgakoudis       PotentialRemovedFreeCalls.insert(FreeCalls.front());
2959f8c40ed8SGiorgis Georgakoudis     }
2960f8c40ed8SGiorgis Georgakoudis   }
2961f8c40ed8SGiorgis Georgakoudis 
initialize__anon23c38c770111::AAHeapToSharedFunction29626fc51c9fSJoseph Huber   void initialize(Attributor &A) override {
29635b4acb20SJohannes Doerfert     if (DisableOpenMPOptDeglobalization) {
29645b4acb20SJohannes Doerfert       indicatePessimisticFixpoint();
29655b4acb20SJohannes Doerfert       return;
29665b4acb20SJohannes Doerfert     }
29675b4acb20SJohannes Doerfert 
29686fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29696fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
29706fc51c9fSJoseph Huber 
2971192a34ddSJohannes Doerfert     Attributor::SimplifictionCallbackTy SCB =
2972192a34ddSJohannes Doerfert         [](const IRPosition &, const AbstractAttribute *,
2973192a34ddSJohannes Doerfert            bool &) -> Optional<Value *> { return nullptr; };
29746fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users())
2975192a34ddSJohannes Doerfert       if (CallBase *CB = dyn_cast<CallBase>(U)) {
29766fc51c9fSJoseph Huber         MallocCalls.insert(CB);
2977192a34ddSJohannes Doerfert         A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
2978192a34ddSJohannes Doerfert                                          SCB);
2979192a34ddSJohannes Doerfert       }
2980f8c40ed8SGiorgis Georgakoudis 
2981f8c40ed8SGiorgis Georgakoudis     findPotentialRemovedFreeCalls(A);
2982f8c40ed8SGiorgis Georgakoudis   }
2983f8c40ed8SGiorgis Georgakoudis 
isAssumedHeapToShared__anon23c38c770111::AAHeapToSharedFunction2984eaab880eSGiorgis Georgakoudis   bool isAssumedHeapToShared(CallBase &CB) const override {
2985f8c40ed8SGiorgis Georgakoudis     return isValidState() && MallocCalls.count(&CB);
2986f8c40ed8SGiorgis Georgakoudis   }
2987f8c40ed8SGiorgis Georgakoudis 
isAssumedHeapToSharedRemovedFree__anon23c38c770111::AAHeapToSharedFunction2988eaab880eSGiorgis Georgakoudis   bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
2989f8c40ed8SGiorgis Georgakoudis     return isValidState() && PotentialRemovedFreeCalls.count(&CB);
29906fc51c9fSJoseph Huber   }
29916fc51c9fSJoseph Huber 
manifest__anon23c38c770111::AAHeapToSharedFunction29926fc51c9fSJoseph Huber   ChangeStatus manifest(Attributor &A) override {
29936fc51c9fSJoseph Huber     if (MallocCalls.empty())
29946fc51c9fSJoseph Huber       return ChangeStatus::UNCHANGED;
29956fc51c9fSJoseph Huber 
29966fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29976fc51c9fSJoseph Huber     auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
29986fc51c9fSJoseph Huber 
29996fc51c9fSJoseph Huber     Function *F = getAnchorScope();
30006fc51c9fSJoseph Huber     auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
30016fc51c9fSJoseph Huber                                             DepClassTy::OPTIONAL);
30026fc51c9fSJoseph Huber 
30036fc51c9fSJoseph Huber     ChangeStatus Changed = ChangeStatus::UNCHANGED;
30046fc51c9fSJoseph Huber     for (CallBase *CB : MallocCalls) {
30056fc51c9fSJoseph Huber       // Skip replacing this if HeapToStack has already claimed it.
3006c1c1fe93SJohannes Doerfert       if (HS && HS->isAssumedHeapToStack(*CB))
30076fc51c9fSJoseph Huber         continue;
30086fc51c9fSJoseph Huber 
30096fc51c9fSJoseph Huber       // Find the unique free call to remove it.
30106fc51c9fSJoseph Huber       SmallVector<CallBase *, 4> FreeCalls;
30116fc51c9fSJoseph Huber       for (auto *U : CB->users()) {
30126fc51c9fSJoseph Huber         CallBase *C = dyn_cast<CallBase>(U);
30136fc51c9fSJoseph Huber         if (C && C->getCalledFunction() == FreeCall.Declaration)
30146fc51c9fSJoseph Huber           FreeCalls.push_back(C);
30156fc51c9fSJoseph Huber       }
30166fc51c9fSJoseph Huber       if (FreeCalls.size() != 1)
30176fc51c9fSJoseph Huber         continue;
30186fc51c9fSJoseph Huber 
3019274359cfSSimon Pilgrim       auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
30206fc51c9fSJoseph Huber 
30210136a440SJoseph Huber       if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
30220136a440SJoseph Huber         LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
30230136a440SJoseph Huber                           << " with shared memory."
30240136a440SJoseph Huber                           << " Shared memory usage is limited to "
30250136a440SJoseph Huber                           << SharedMemoryLimit << " bytes\n");
30260136a440SJoseph Huber         continue;
30270136a440SJoseph Huber       }
30280136a440SJoseph Huber 
302913d8f000SJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
303013d8f000SJoseph Huber                         << " with " << AllocSize->getZExtValue()
30316fc51c9fSJoseph Huber                         << " bytes of shared memory\n");
30326fc51c9fSJoseph Huber 
30336fc51c9fSJoseph Huber       // Create a new shared memory buffer of the same size as the allocation
30346fc51c9fSJoseph Huber       // and replace all the uses of the original allocation with it.
30356fc51c9fSJoseph Huber       Module *M = CB->getModule();
30366fc51c9fSJoseph Huber       Type *Int8Ty = Type::getInt8Ty(M->getContext());
30376fc51c9fSJoseph Huber       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
30386fc51c9fSJoseph Huber       auto *SharedMem = new GlobalVariable(
30396fc51c9fSJoseph Huber           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
30405eb49009SJoseph Huber           UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
30416fc51c9fSJoseph Huber           GlobalValue::NotThreadLocal,
30426fc51c9fSJoseph Huber           static_cast<unsigned>(AddressSpace::Shared));
30436fc51c9fSJoseph Huber       auto *NewBuffer =
30446fc51c9fSJoseph Huber           ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
30456fc51c9fSJoseph Huber 
304630e36c9bSJoseph Huber       auto Remark = [&](OptimizationRemark OR) {
304730e36c9bSJoseph Huber         return OR << "Replaced globalized variable with "
304830e36c9bSJoseph Huber                   << ore::NV("SharedMemory", AllocSize->getZExtValue())
304930e36c9bSJoseph Huber                   << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
3050eef6601bSJoseph Huber                   << "of shared memory.";
305130e36c9bSJoseph Huber       };
30522c31d5ebSJoseph Huber       A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
305330e36c9bSJoseph Huber 
30546e220296SJoseph Huber       MaybeAlign Alignment = CB->getRetAlign();
30556e220296SJoseph Huber       assert(Alignment &&
30566e220296SJoseph Huber              "HeapToShared on allocation without alignment attribute");
30576e220296SJoseph Huber       SharedMem->setAlignment(MaybeAlign(Alignment));
30586fc51c9fSJoseph Huber 
30597a07b88fSJohannes Doerfert       A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
30606fc51c9fSJoseph Huber       A.deleteAfterManifest(*CB);
30616fc51c9fSJoseph Huber       A.deleteAfterManifest(*FreeCalls.front());
30626fc51c9fSJoseph Huber 
30630136a440SJoseph Huber       SharedMemoryUsed += AllocSize->getZExtValue();
30640136a440SJoseph Huber       NumBytesMovedToSharedMemory = SharedMemoryUsed;
30656fc51c9fSJoseph Huber       Changed = ChangeStatus::CHANGED;
30666fc51c9fSJoseph Huber     }
30676fc51c9fSJoseph Huber 
30686fc51c9fSJoseph Huber     return Changed;
30696fc51c9fSJoseph Huber   }
30706fc51c9fSJoseph Huber 
updateImpl__anon23c38c770111::AAHeapToSharedFunction30716fc51c9fSJoseph Huber   ChangeStatus updateImpl(Attributor &A) override {
30726fc51c9fSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
30736fc51c9fSJoseph Huber     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
30746fc51c9fSJoseph Huber     Function *F = getAnchorScope();
30756fc51c9fSJoseph Huber 
30766fc51c9fSJoseph Huber     auto NumMallocCalls = MallocCalls.size();
30776fc51c9fSJoseph Huber 
30786fc51c9fSJoseph Huber     // Only consider malloc calls executed by a single thread with a constant.
30796fc51c9fSJoseph Huber     for (User *U : RFI.Declaration->users()) {
30806fc51c9fSJoseph Huber       const auto &ED = A.getAAFor<AAExecutionDomain>(
30816fc51c9fSJoseph Huber           *this, IRPosition::function(*F), DepClassTy::REQUIRED);
30826fc51c9fSJoseph Huber       if (CallBase *CB = dyn_cast<CallBase>(U))
3083d243cbf8SKazu Hirata         if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
30846fc51c9fSJoseph Huber             !ED.isExecutedByInitialThreadOnly(*CB))
3085ba70f3a5SJohannes Doerfert           MallocCalls.remove(CB);
30866fc51c9fSJoseph Huber     }
30876fc51c9fSJoseph Huber 
3088f8c40ed8SGiorgis Georgakoudis     findPotentialRemovedFreeCalls(A);
3089f8c40ed8SGiorgis Georgakoudis 
30906fc51c9fSJoseph Huber     if (NumMallocCalls != MallocCalls.size())
30916fc51c9fSJoseph Huber       return ChangeStatus::CHANGED;
30926fc51c9fSJoseph Huber 
30936fc51c9fSJoseph Huber     return ChangeStatus::UNCHANGED;
30946fc51c9fSJoseph Huber   }
30956fc51c9fSJoseph Huber 
30966fc51c9fSJoseph Huber   /// Collection of all malloc calls in a function.
3097ba70f3a5SJohannes Doerfert   SmallSetVector<CallBase *, 4> MallocCalls;
3098f8c40ed8SGiorgis Georgakoudis   /// Collection of potentially removed free calls in a function.
3099f8c40ed8SGiorgis Georgakoudis   SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
31000136a440SJoseph Huber   /// The total amount of shared memory that has been used for HeapToShared.
31010136a440SJoseph Huber   unsigned SharedMemoryUsed = 0;
31026fc51c9fSJoseph Huber };
31036fc51c9fSJoseph Huber 
3104d9659bf6SJohannes Doerfert struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
3105d9659bf6SJohannes Doerfert   using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo__anon23c38c770111::AAKernelInfo3106d9659bf6SJohannes Doerfert   AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3107d9659bf6SJohannes Doerfert 
3108d9659bf6SJohannes Doerfert   /// Statistics are tracked as part of manifest for now.
trackStatistics__anon23c38c770111::AAKernelInfo3109d9659bf6SJohannes Doerfert   void trackStatistics() const override {}
3110d9659bf6SJohannes Doerfert 
3111d9659bf6SJohannes Doerfert   /// See AbstractAttribute::getAsStr()
getAsStr__anon23c38c770111::AAKernelInfo3112d9659bf6SJohannes Doerfert   const std::string getAsStr() const override {
3113d9659bf6SJohannes Doerfert     if (!isValidState())
3114d9659bf6SJohannes Doerfert       return "<invalid>";
3115514c033dSJohannes Doerfert     return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
3116514c033dSJohannes Doerfert                                                             : "generic") +
3117514c033dSJohannes Doerfert            std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
3118514c033dSJohannes Doerfert                                                                : "") +
3119d9659bf6SJohannes Doerfert            std::string(" #PRs: ") +
3120c6457dcaSJohannes Doerfert            (ReachedKnownParallelRegions.isValidState()
3121c6457dcaSJohannes Doerfert                 ? std::to_string(ReachedKnownParallelRegions.size())
3122c6457dcaSJohannes Doerfert                 : "<invalid>") +
3123d9659bf6SJohannes Doerfert            ", #Unknown PRs: " +
3124c6457dcaSJohannes Doerfert            (ReachedUnknownParallelRegions.isValidState()
3125c6457dcaSJohannes Doerfert                 ? std::to_string(ReachedUnknownParallelRegions.size())
3126c6457dcaSJohannes Doerfert                 : "<invalid>") +
31270a16c560SJohannes Doerfert            ", #Reaching Kernels: " +
31280a16c560SJohannes Doerfert            (ReachingKernelEntries.isValidState()
31290a16c560SJohannes Doerfert                 ? std::to_string(ReachingKernelEntries.size())
31300a16c560SJohannes Doerfert                 : "<invalid>");
3131d9659bf6SJohannes Doerfert   }
3132d9659bf6SJohannes Doerfert 
3133d9659bf6SJohannes Doerfert   /// Create an abstract attribute biew for the position \p IRP.
3134d9659bf6SJohannes Doerfert   static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
3135d9659bf6SJohannes Doerfert 
3136d9659bf6SJohannes Doerfert   /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAKernelInfo3137d9659bf6SJohannes Doerfert   const std::string getName() const override { return "AAKernelInfo"; }
3138d9659bf6SJohannes Doerfert 
3139d9659bf6SJohannes Doerfert   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAKernelInfo3140d9659bf6SJohannes Doerfert   const char *getIdAddr() const override { return &ID; }
3141d9659bf6SJohannes Doerfert 
3142d9659bf6SJohannes Doerfert   /// This function should return true if the type of the \p AA is AAKernelInfo
classof__anon23c38c770111::AAKernelInfo3143d9659bf6SJohannes Doerfert   static bool classof(const AbstractAttribute *AA) {
3144d9659bf6SJohannes Doerfert     return (AA->getIdAddr() == &ID);
3145d9659bf6SJohannes Doerfert   }
3146d9659bf6SJohannes Doerfert 
3147d9659bf6SJohannes Doerfert   static const char ID;
3148d9659bf6SJohannes Doerfert };
3149d9659bf6SJohannes Doerfert 
3150d9659bf6SJohannes Doerfert /// The function kernel info abstract attribute, basically, what can we say
3151d9659bf6SJohannes Doerfert /// about a function with regards to the KernelInfoState.
3152d9659bf6SJohannes Doerfert struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction__anon23c38c770111::AAKernelInfoFunction3153d9659bf6SJohannes Doerfert   AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
3154d9659bf6SJohannes Doerfert       : AAKernelInfo(IRP, A) {}
3155d9659bf6SJohannes Doerfert 
315629a3e3ddSGiorgis Georgakoudis   SmallPtrSet<Instruction *, 4> GuardedInstructions;
315729a3e3ddSGiorgis Georgakoudis 
getGuardedInstructions__anon23c38c770111::AAKernelInfoFunction315829a3e3ddSGiorgis Georgakoudis   SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
315929a3e3ddSGiorgis Georgakoudis     return GuardedInstructions;
316029a3e3ddSGiorgis Georgakoudis   }
316129a3e3ddSGiorgis Georgakoudis 
3162d9659bf6SJohannes Doerfert   /// See AbstractAttribute::initialize(...).
initialize__anon23c38c770111::AAKernelInfoFunction3163d9659bf6SJohannes Doerfert   void initialize(Attributor &A) override {
3164d9659bf6SJohannes Doerfert     // This is a high-level transform that might change the constant arguments
3165d9659bf6SJohannes Doerfert     // of the init and dinit calls. We need to tell the Attributor about this
3166d9659bf6SJohannes Doerfert     // to avoid other parts using the current constant value for simpliication.
3167d9659bf6SJohannes Doerfert     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3168d9659bf6SJohannes Doerfert 
3169d9659bf6SJohannes Doerfert     Function *Fn = getAnchorScope();
3170ca662297SShilei Tian 
3171d9659bf6SJohannes Doerfert     OMPInformationCache::RuntimeFunctionInfo &InitRFI =
3172d9659bf6SJohannes Doerfert         OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
3173d9659bf6SJohannes Doerfert     OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
3174d9659bf6SJohannes Doerfert         OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
3175d9659bf6SJohannes Doerfert 
3176d9659bf6SJohannes Doerfert     // For kernels we perform more initialization work, first we find the init
3177d9659bf6SJohannes Doerfert     // and deinit calls.
3178d9659bf6SJohannes Doerfert     auto StoreCallBase = [](Use &U,
3179d9659bf6SJohannes Doerfert                             OMPInformationCache::RuntimeFunctionInfo &RFI,
3180d9659bf6SJohannes Doerfert                             CallBase *&Storage) {
3181d9659bf6SJohannes Doerfert       CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
3182d9659bf6SJohannes Doerfert       assert(CB &&
3183d9659bf6SJohannes Doerfert              "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
3184d9659bf6SJohannes Doerfert       assert(!Storage &&
3185d9659bf6SJohannes Doerfert              "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
3186d9659bf6SJohannes Doerfert       Storage = CB;
3187d9659bf6SJohannes Doerfert       return false;
3188d9659bf6SJohannes Doerfert     };
3189d9659bf6SJohannes Doerfert     InitRFI.foreachUse(
3190d9659bf6SJohannes Doerfert         [&](Use &U, Function &) {
3191d9659bf6SJohannes Doerfert           StoreCallBase(U, InitRFI, KernelInitCB);
3192d9659bf6SJohannes Doerfert           return false;
3193d9659bf6SJohannes Doerfert         },
3194d9659bf6SJohannes Doerfert         Fn);
3195d9659bf6SJohannes Doerfert     DeinitRFI.foreachUse(
3196d9659bf6SJohannes Doerfert         [&](Use &U, Function &) {
3197d9659bf6SJohannes Doerfert           StoreCallBase(U, DeinitRFI, KernelDeinitCB);
3198d9659bf6SJohannes Doerfert           return false;
3199d9659bf6SJohannes Doerfert         },
3200d9659bf6SJohannes Doerfert         Fn);
3201d9659bf6SJohannes Doerfert 
320258f93264SJoseph Huber     // Ignore kernels without initializers such as global constructors.
32034166738cSJohannes Doerfert     if (!KernelInitCB || !KernelDeinitCB)
320458f93264SJoseph Huber       return;
32054166738cSJohannes Doerfert 
32064166738cSJohannes Doerfert     // Add itself to the reaching kernel and set IsKernelEntry.
32074166738cSJohannes Doerfert     ReachingKernelEntries.insert(Fn);
32084166738cSJohannes Doerfert     IsKernelEntry = true;
3209d9659bf6SJohannes Doerfert 
3210514c033dSJohannes Doerfert     // For kernels we might need to initialize/finalize the IsSPMD state and
3211514c033dSJohannes Doerfert     // we need to register a simplification callback so that the Attributor
3212514c033dSJohannes Doerfert     // knows the constant arguments to __kmpc_target_init and
3213d9659bf6SJohannes Doerfert     // __kmpc_target_deinit might actually change.
3214d9659bf6SJohannes Doerfert 
3215d9659bf6SJohannes Doerfert     Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
3216d9659bf6SJohannes Doerfert         [&](const IRPosition &IRP, const AbstractAttribute *AA,
3217d9659bf6SJohannes Doerfert             bool &UsedAssumedInformation) -> Optional<Value *> {
3218d9659bf6SJohannes Doerfert       // IRP represents the "use generic state machine" argument of an
3219d9659bf6SJohannes Doerfert       // __kmpc_target_init call. We will answer this one with the internal
3220d9659bf6SJohannes Doerfert       // state. As long as we are not in an invalid state, we will create a
3221d9659bf6SJohannes Doerfert       // custom state machine so the value should be a `i1 false`. If we are
3222d9659bf6SJohannes Doerfert       // in an invalid state, we won't change the value that is in the IR.
3223e6e440aeSJohannes Doerfert       if (!ReachedKnownParallelRegions.isValidState())
3224d9659bf6SJohannes Doerfert         return nullptr;
3225e0c5d83aSJohannes Doerfert       // If we have disabled state machine rewrites, don't make a custom one.
3226e0c5d83aSJohannes Doerfert       if (DisableOpenMPOptStateMachineRewrite)
3227e0c5d83aSJohannes Doerfert         return nullptr;
3228d9659bf6SJohannes Doerfert       if (AA)
3229d9659bf6SJohannes Doerfert         A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3230d9659bf6SJohannes Doerfert       UsedAssumedInformation = !isAtFixpoint();
3231d9659bf6SJohannes Doerfert       auto *FalseVal =
32322aed0813SKazu Hirata           ConstantInt::getBool(IRP.getAnchorValue().getContext(), false);
3233d9659bf6SJohannes Doerfert       return FalseVal;
3234d9659bf6SJohannes Doerfert     };
3235d9659bf6SJohannes Doerfert 
3236423d34f7SShilei Tian     Attributor::SimplifictionCallbackTy ModeSimplifyCB =
3237514c033dSJohannes Doerfert         [&](const IRPosition &IRP, const AbstractAttribute *AA,
3238514c033dSJohannes Doerfert             bool &UsedAssumedInformation) -> Optional<Value *> {
3239514c033dSJohannes Doerfert       // IRP represents the "SPMDCompatibilityTracker" argument of an
3240514c033dSJohannes Doerfert       // __kmpc_target_init or
3241514c033dSJohannes Doerfert       // __kmpc_target_deinit call. We will answer this one with the internal
3242514c033dSJohannes Doerfert       // state.
324397387fdfSJohannes Doerfert       if (!SPMDCompatibilityTracker.isValidState())
3244514c033dSJohannes Doerfert         return nullptr;
3245514c033dSJohannes Doerfert       if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3246514c033dSJohannes Doerfert         if (AA)
3247514c033dSJohannes Doerfert           A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3248514c033dSJohannes Doerfert         UsedAssumedInformation = true;
3249514c033dSJohannes Doerfert       } else {
3250514c033dSJohannes Doerfert         UsedAssumedInformation = false;
3251514c033dSJohannes Doerfert       }
3252423d34f7SShilei Tian       auto *Val = ConstantInt::getSigned(
3253423d34f7SShilei Tian           IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
3254423d34f7SShilei Tian           SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
3255423d34f7SShilei Tian                                                : OMP_TGT_EXEC_MODE_GENERIC);
3256514c033dSJohannes Doerfert       return Val;
3257514c033dSJohannes Doerfert     };
3258514c033dSJohannes Doerfert 
3259e8439ec8SGiorgis Georgakoudis     Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
3260e8439ec8SGiorgis Georgakoudis         [&](const IRPosition &IRP, const AbstractAttribute *AA,
3261e8439ec8SGiorgis Georgakoudis             bool &UsedAssumedInformation) -> Optional<Value *> {
3262e8439ec8SGiorgis Georgakoudis       // IRP represents the "RequiresFullRuntime" argument of an
3263e8439ec8SGiorgis Georgakoudis       // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
3264e8439ec8SGiorgis Georgakoudis       // one with the internal state of the SPMDCompatibilityTracker, so if
3265e8439ec8SGiorgis Georgakoudis       // generic then true, if SPMD then false.
3266e8439ec8SGiorgis Georgakoudis       if (!SPMDCompatibilityTracker.isValidState())
3267e8439ec8SGiorgis Georgakoudis         return nullptr;
3268e8439ec8SGiorgis Georgakoudis       if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3269e8439ec8SGiorgis Georgakoudis         if (AA)
3270e8439ec8SGiorgis Georgakoudis           A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3271e8439ec8SGiorgis Georgakoudis         UsedAssumedInformation = true;
3272e8439ec8SGiorgis Georgakoudis       } else {
3273e8439ec8SGiorgis Georgakoudis         UsedAssumedInformation = false;
3274e8439ec8SGiorgis Georgakoudis       }
3275e8439ec8SGiorgis Georgakoudis       auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
3276e8439ec8SGiorgis Georgakoudis                                        !SPMDCompatibilityTracker.isAssumed());
3277e8439ec8SGiorgis Georgakoudis       return Val;
3278e8439ec8SGiorgis Georgakoudis     };
3279e8439ec8SGiorgis Georgakoudis 
3280423d34f7SShilei Tian     constexpr const int InitModeArgNo = 1;
3281423d34f7SShilei Tian     constexpr const int DeinitModeArgNo = 1;
3282d9659bf6SJohannes Doerfert     constexpr const int InitUseStateMachineArgNo = 2;
3283e8439ec8SGiorgis Georgakoudis     constexpr const int InitRequiresFullRuntimeArgNo = 3;
3284e8439ec8SGiorgis Georgakoudis     constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
3285d9659bf6SJohannes Doerfert     A.registerSimplificationCallback(
3286d9659bf6SJohannes Doerfert         IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
3287d9659bf6SJohannes Doerfert         StateMachineSimplifyCB);
3288514c033dSJohannes Doerfert     A.registerSimplificationCallback(
3289423d34f7SShilei Tian         IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
3290423d34f7SShilei Tian         ModeSimplifyCB);
3291514c033dSJohannes Doerfert     A.registerSimplificationCallback(
3292423d34f7SShilei Tian         IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
3293423d34f7SShilei Tian         ModeSimplifyCB);
3294e8439ec8SGiorgis Georgakoudis     A.registerSimplificationCallback(
3295e8439ec8SGiorgis Georgakoudis         IRPosition::callsite_argument(*KernelInitCB,
3296e8439ec8SGiorgis Georgakoudis                                       InitRequiresFullRuntimeArgNo),
3297e8439ec8SGiorgis Georgakoudis         IsGenericModeSimplifyCB);
3298e8439ec8SGiorgis Georgakoudis     A.registerSimplificationCallback(
3299e8439ec8SGiorgis Georgakoudis         IRPosition::callsite_argument(*KernelDeinitCB,
3300e8439ec8SGiorgis Georgakoudis                                       DeinitRequiresFullRuntimeArgNo),
3301e8439ec8SGiorgis Georgakoudis         IsGenericModeSimplifyCB);
3302514c033dSJohannes Doerfert 
3303514c033dSJohannes Doerfert     // Check if we know we are in SPMD-mode already.
3304423d34f7SShilei Tian     ConstantInt *ModeArg =
3305423d34f7SShilei Tian         dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3306423d34f7SShilei Tian     if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3307514c033dSJohannes Doerfert       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
330860e643feSGiorgis Georgakoudis     // This is a generic region but SPMDization is disabled so stop tracking.
330960e643feSGiorgis Georgakoudis     else if (DisableOpenMPOptSPMDization)
331060e643feSGiorgis Georgakoudis       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3311d9659bf6SJohannes Doerfert   }
3312d9659bf6SJohannes Doerfert 
3313e6e440aeSJohannes Doerfert   /// Sanitize the string \p S such that it is a suitable global symbol name.
sanitizeForGlobalName__anon23c38c770111::AAKernelInfoFunction3314e6e440aeSJohannes Doerfert   static std::string sanitizeForGlobalName(std::string S) {
3315e6e440aeSJohannes Doerfert     std::replace_if(
3316e6e440aeSJohannes Doerfert         S.begin(), S.end(),
3317e6e440aeSJohannes Doerfert         [](const char C) {
3318e6e440aeSJohannes Doerfert           return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
3319e6e440aeSJohannes Doerfert                    (C >= '0' && C <= '9') || C == '_');
3320e6e440aeSJohannes Doerfert         },
3321e6e440aeSJohannes Doerfert         '.');
3322e6e440aeSJohannes Doerfert     return S;
3323e6e440aeSJohannes Doerfert   }
3324e6e440aeSJohannes Doerfert 
3325d9659bf6SJohannes Doerfert   /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
3326d9659bf6SJohannes Doerfert   /// finished now.
manifest__anon23c38c770111::AAKernelInfoFunction3327d9659bf6SJohannes Doerfert   ChangeStatus manifest(Attributor &A) override {
3328d9659bf6SJohannes Doerfert     // If we are not looking at a kernel with __kmpc_target_init and
3329d9659bf6SJohannes Doerfert     // __kmpc_target_deinit call we cannot actually manifest the information.
3330d9659bf6SJohannes Doerfert     if (!KernelInitCB || !KernelDeinitCB)
3331d9659bf6SJohannes Doerfert       return ChangeStatus::UNCHANGED;
3332d9659bf6SJohannes Doerfert 
3333514c033dSJohannes Doerfert     // If we can we change the execution mode to SPMD-mode otherwise we build a
3334514c033dSJohannes Doerfert     // custom state machine.
3335d61aac76SJohannes Doerfert     ChangeStatus Changed = ChangeStatus::UNCHANGED;
3336d61aac76SJohannes Doerfert     if (!changeToSPMDMode(A, Changed))
3337c6457dcaSJohannes Doerfert       return buildCustomStateMachine(A);
3338d9659bf6SJohannes Doerfert 
3339d61aac76SJohannes Doerfert     return Changed;
3340d9659bf6SJohannes Doerfert   }
3341d9659bf6SJohannes Doerfert 
changeToSPMDMode__anon23c38c770111::AAKernelInfoFunction3342d61aac76SJohannes Doerfert   bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
3343c771eaf0SJohannes Doerfert     if (!mayContainParallelRegion())
3344c771eaf0SJohannes Doerfert       return false;
3345c771eaf0SJohannes Doerfert 
3346eef6601bSJoseph Huber     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3347eef6601bSJoseph Huber 
3348514c033dSJohannes Doerfert     if (!SPMDCompatibilityTracker.isAssumed()) {
3349514c033dSJohannes Doerfert       for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
3350514c033dSJohannes Doerfert         if (!NonCompatibleI)
3351514c033dSJohannes Doerfert           continue;
3352eef6601bSJoseph Huber 
3353eef6601bSJoseph Huber         // Skip diagnostics on calls to known OpenMP runtime functions for now.
3354eef6601bSJoseph Huber         if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
3355eef6601bSJoseph Huber           if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
3356eef6601bSJoseph Huber             continue;
3357eef6601bSJoseph Huber 
3358514c033dSJohannes Doerfert         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3359eef6601bSJoseph Huber           ORA << "Value has potential side effects preventing SPMD-mode "
3360eef6601bSJoseph Huber                  "execution";
3361eef6601bSJoseph Huber           if (isa<CallBase>(NonCompatibleI)) {
3362eef6601bSJoseph Huber             ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
3363eef6601bSJoseph Huber                    "the called function to override";
3364514c033dSJohannes Doerfert           }
3365514c033dSJohannes Doerfert           return ORA << ".";
3366514c033dSJohannes Doerfert         };
33672c31d5ebSJoseph Huber         A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
33682c31d5ebSJoseph Huber                                                  Remark);
3369514c033dSJohannes Doerfert 
3370514c033dSJohannes Doerfert         LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
3371514c033dSJohannes Doerfert                           << *NonCompatibleI << "\n");
3372514c033dSJohannes Doerfert       }
3373514c033dSJohannes Doerfert 
3374514c033dSJohannes Doerfert       return false;
3375514c033dSJohannes Doerfert     }
3376514c033dSJohannes Doerfert 
33774166738cSJohannes Doerfert     // Get the actual kernel, could be the caller of the anchor scope if we have
33784166738cSJohannes Doerfert     // a debug wrapper.
3379d61aac76SJohannes Doerfert     Function *Kernel = getAnchorScope();
33804166738cSJohannes Doerfert     if (Kernel->hasLocalLinkage()) {
33814166738cSJohannes Doerfert       assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
33824166738cSJohannes Doerfert       auto *CB = cast<CallBase>(Kernel->user_back());
33834166738cSJohannes Doerfert       Kernel = CB->getCaller();
33844166738cSJohannes Doerfert     }
33854166738cSJohannes Doerfert     assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
33864166738cSJohannes Doerfert 
33874166738cSJohannes Doerfert     // Check if the kernel is already in SPMD mode, if so, return success.
3388d61aac76SJohannes Doerfert     GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
3389d61aac76SJohannes Doerfert         (Kernel->getName() + "_exec_mode").str());
3390d61aac76SJohannes Doerfert     assert(ExecMode && "Kernel without exec mode?");
3391d61aac76SJohannes Doerfert     assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
3392d61aac76SJohannes Doerfert 
3393d61aac76SJohannes Doerfert     // Set the global exec mode flag to indicate SPMD-Generic mode.
3394d61aac76SJohannes Doerfert     assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
3395d61aac76SJohannes Doerfert            "ExecMode is not an integer!");
3396d61aac76SJohannes Doerfert     const int8_t ExecModeVal =
3397d61aac76SJohannes Doerfert         cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
3398d61aac76SJohannes Doerfert     if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
3399d61aac76SJohannes Doerfert       return true;
3400d61aac76SJohannes Doerfert 
3401d61aac76SJohannes Doerfert     // We will now unconditionally modify the IR, indicate a change.
3402d61aac76SJohannes Doerfert     Changed = ChangeStatus::CHANGED;
3403d61aac76SJohannes Doerfert 
340429a3e3ddSGiorgis Georgakoudis     auto CreateGuardedRegion = [&](Instruction *RegionStartI,
340529a3e3ddSGiorgis Georgakoudis                                    Instruction *RegionEndI) {
340629a3e3ddSGiorgis Georgakoudis       LoopInfo *LI = nullptr;
340729a3e3ddSGiorgis Georgakoudis       DominatorTree *DT = nullptr;
340829a3e3ddSGiorgis Georgakoudis       MemorySSAUpdater *MSU = nullptr;
340929a3e3ddSGiorgis Georgakoudis       using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
341029a3e3ddSGiorgis Georgakoudis 
341129a3e3ddSGiorgis Georgakoudis       BasicBlock *ParentBB = RegionStartI->getParent();
341229a3e3ddSGiorgis Georgakoudis       Function *Fn = ParentBB->getParent();
341329a3e3ddSGiorgis Georgakoudis       Module &M = *Fn->getParent();
341429a3e3ddSGiorgis Georgakoudis 
341529a3e3ddSGiorgis Georgakoudis       // Create all the blocks and logic.
341629a3e3ddSGiorgis Georgakoudis       // ParentBB:
341729a3e3ddSGiorgis Georgakoudis       //    goto RegionCheckTidBB
341829a3e3ddSGiorgis Georgakoudis       // RegionCheckTidBB:
341929a3e3ddSGiorgis Georgakoudis       //    Tid = __kmpc_hardware_thread_id()
342029a3e3ddSGiorgis Georgakoudis       //    if (Tid != 0)
342129a3e3ddSGiorgis Georgakoudis       //        goto RegionBarrierBB
342229a3e3ddSGiorgis Georgakoudis       // RegionStartBB:
342329a3e3ddSGiorgis Georgakoudis       //    <execute instructions guarded>
342429a3e3ddSGiorgis Georgakoudis       //    goto RegionEndBB
342529a3e3ddSGiorgis Georgakoudis       // RegionEndBB:
342629a3e3ddSGiorgis Georgakoudis       //    <store escaping values to shared mem>
342729a3e3ddSGiorgis Georgakoudis       //    goto RegionBarrierBB
342829a3e3ddSGiorgis Georgakoudis       //  RegionBarrierBB:
342929a3e3ddSGiorgis Georgakoudis       //    __kmpc_simple_barrier_spmd()
343029a3e3ddSGiorgis Georgakoudis       //    // second barrier is omitted if lacking escaping values.
343129a3e3ddSGiorgis Georgakoudis       //    <load escaping values from shared mem>
343229a3e3ddSGiorgis Georgakoudis       //    __kmpc_simple_barrier_spmd()
343329a3e3ddSGiorgis Georgakoudis       //    goto RegionExitBB
343429a3e3ddSGiorgis Georgakoudis       // RegionExitBB:
343529a3e3ddSGiorgis Georgakoudis       //    <execute rest of instructions>
343629a3e3ddSGiorgis Georgakoudis 
343729a3e3ddSGiorgis Georgakoudis       BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
343829a3e3ddSGiorgis Georgakoudis                                            DT, LI, MSU, "region.guarded.end");
343929a3e3ddSGiorgis Georgakoudis       BasicBlock *RegionBarrierBB =
344029a3e3ddSGiorgis Georgakoudis           SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
344129a3e3ddSGiorgis Georgakoudis                      MSU, "region.barrier");
344229a3e3ddSGiorgis Georgakoudis       BasicBlock *RegionExitBB =
344329a3e3ddSGiorgis Georgakoudis           SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
344429a3e3ddSGiorgis Georgakoudis                      DT, LI, MSU, "region.exit");
344529a3e3ddSGiorgis Georgakoudis       BasicBlock *RegionStartBB =
344629a3e3ddSGiorgis Georgakoudis           SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
344729a3e3ddSGiorgis Georgakoudis 
344829a3e3ddSGiorgis Georgakoudis       assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
344929a3e3ddSGiorgis Georgakoudis              "Expected a different CFG");
345029a3e3ddSGiorgis Georgakoudis 
345129a3e3ddSGiorgis Georgakoudis       BasicBlock *RegionCheckTidBB = SplitBlock(
345229a3e3ddSGiorgis Georgakoudis           ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
345329a3e3ddSGiorgis Georgakoudis 
345429a3e3ddSGiorgis Georgakoudis       // Register basic blocks with the Attributor.
345529a3e3ddSGiorgis Georgakoudis       A.registerManifestAddedBasicBlock(*RegionEndBB);
345629a3e3ddSGiorgis Georgakoudis       A.registerManifestAddedBasicBlock(*RegionBarrierBB);
345729a3e3ddSGiorgis Georgakoudis       A.registerManifestAddedBasicBlock(*RegionExitBB);
345829a3e3ddSGiorgis Georgakoudis       A.registerManifestAddedBasicBlock(*RegionStartBB);
345929a3e3ddSGiorgis Georgakoudis       A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
346029a3e3ddSGiorgis Georgakoudis 
346129a3e3ddSGiorgis Georgakoudis       bool HasBroadcastValues = false;
346229a3e3ddSGiorgis Georgakoudis       // Find escaping outputs from the guarded region to outside users and
346329a3e3ddSGiorgis Georgakoudis       // broadcast their values to them.
346429a3e3ddSGiorgis Georgakoudis       for (Instruction &I : *RegionStartBB) {
346529a3e3ddSGiorgis Georgakoudis         SmallPtrSet<Instruction *, 4> OutsideUsers;
346629a3e3ddSGiorgis Georgakoudis         for (User *Usr : I.users()) {
346729a3e3ddSGiorgis Georgakoudis           Instruction &UsrI = *cast<Instruction>(Usr);
346829a3e3ddSGiorgis Georgakoudis           if (UsrI.getParent() != RegionStartBB)
346929a3e3ddSGiorgis Georgakoudis             OutsideUsers.insert(&UsrI);
347029a3e3ddSGiorgis Georgakoudis         }
347129a3e3ddSGiorgis Georgakoudis 
347229a3e3ddSGiorgis Georgakoudis         if (OutsideUsers.empty())
347329a3e3ddSGiorgis Georgakoudis           continue;
347429a3e3ddSGiorgis Georgakoudis 
347529a3e3ddSGiorgis Georgakoudis         HasBroadcastValues = true;
347629a3e3ddSGiorgis Georgakoudis 
347729a3e3ddSGiorgis Georgakoudis         // Emit a global variable in shared memory to store the broadcasted
347829a3e3ddSGiorgis Georgakoudis         // value.
347929a3e3ddSGiorgis Georgakoudis         auto *SharedMem = new GlobalVariable(
348029a3e3ddSGiorgis Georgakoudis             M, I.getType(), /* IsConstant */ false,
348129a3e3ddSGiorgis Georgakoudis             GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
3482e6e440aeSJohannes Doerfert             sanitizeForGlobalName(
3483e6e440aeSJohannes Doerfert                 (I.getName() + ".guarded.output.alloc").str()),
3484e6e440aeSJohannes Doerfert             nullptr, GlobalValue::NotThreadLocal,
348529a3e3ddSGiorgis Georgakoudis             static_cast<unsigned>(AddressSpace::Shared));
348629a3e3ddSGiorgis Georgakoudis 
348729a3e3ddSGiorgis Georgakoudis         // Emit a store instruction to update the value.
348829a3e3ddSGiorgis Georgakoudis         new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
348929a3e3ddSGiorgis Georgakoudis 
349029a3e3ddSGiorgis Georgakoudis         LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
349129a3e3ddSGiorgis Georgakoudis                                        I.getName() + ".guarded.output.load",
349229a3e3ddSGiorgis Georgakoudis                                        RegionBarrierBB->getTerminator());
349329a3e3ddSGiorgis Georgakoudis 
349429a3e3ddSGiorgis Georgakoudis         // Emit a load instruction and replace uses of the output value.
3495e6e440aeSJohannes Doerfert         for (Instruction *UsrI : OutsideUsers)
349629a3e3ddSGiorgis Georgakoudis           UsrI->replaceUsesOfWith(&I, LoadI);
349729a3e3ddSGiorgis Georgakoudis       }
349829a3e3ddSGiorgis Georgakoudis 
349929a3e3ddSGiorgis Georgakoudis       auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
350029a3e3ddSGiorgis Georgakoudis 
350129a3e3ddSGiorgis Georgakoudis       // Go to tid check BB in ParentBB.
350229a3e3ddSGiorgis Georgakoudis       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
350329a3e3ddSGiorgis Georgakoudis       ParentBB->getTerminator()->eraseFromParent();
350429a3e3ddSGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription Loc(
350529a3e3ddSGiorgis Georgakoudis           InsertPointTy(ParentBB, ParentBB->end()), DL);
350629a3e3ddSGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.updateToLocation(Loc);
3507944aa042SJohannes Doerfert       uint32_t SrcLocStrSize;
3508944aa042SJohannes Doerfert       auto *SrcLocStr =
3509944aa042SJohannes Doerfert           OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3510944aa042SJohannes Doerfert       Value *Ident =
3511944aa042SJohannes Doerfert           OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
351229a3e3ddSGiorgis Georgakoudis       BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
351329a3e3ddSGiorgis Georgakoudis 
351429a3e3ddSGiorgis Georgakoudis       // Add check for Tid in RegionCheckTidBB
351529a3e3ddSGiorgis Georgakoudis       RegionCheckTidBB->getTerminator()->eraseFromParent();
351629a3e3ddSGiorgis Georgakoudis       OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
351729a3e3ddSGiorgis Georgakoudis           InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
351829a3e3ddSGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
351929a3e3ddSGiorgis Georgakoudis       FunctionCallee HardwareTidFn =
352029a3e3ddSGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
352129a3e3ddSGiorgis Georgakoudis               M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
352206cfdd52SJoseph Huber       CallInst *Tid =
352329a3e3ddSGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
352406cfdd52SJoseph Huber       Tid->setDebugLoc(DL);
352506cfdd52SJoseph Huber       OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
352629a3e3ddSGiorgis Georgakoudis       Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
352729a3e3ddSGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.Builder
352829a3e3ddSGiorgis Georgakoudis           .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
352929a3e3ddSGiorgis Georgakoudis           ->setDebugLoc(DL);
353029a3e3ddSGiorgis Georgakoudis 
353129a3e3ddSGiorgis Georgakoudis       // First barrier for synchronization, ensures main thread has updated
353229a3e3ddSGiorgis Georgakoudis       // values.
353329a3e3ddSGiorgis Georgakoudis       FunctionCallee BarrierFn =
353429a3e3ddSGiorgis Georgakoudis           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
353529a3e3ddSGiorgis Georgakoudis               M, OMPRTL___kmpc_barrier_simple_spmd);
353629a3e3ddSGiorgis Georgakoudis       OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
353729a3e3ddSGiorgis Georgakoudis           RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
353806cfdd52SJoseph Huber       CallInst *Barrier =
353906cfdd52SJoseph Huber           OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
354006cfdd52SJoseph Huber       Barrier->setDebugLoc(DL);
354106cfdd52SJoseph Huber       OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
354229a3e3ddSGiorgis Georgakoudis 
354329a3e3ddSGiorgis Georgakoudis       // Second barrier ensures workers have read broadcast values.
354406cfdd52SJoseph Huber       if (HasBroadcastValues) {
354506cfdd52SJoseph Huber         CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "",
354606cfdd52SJoseph Huber                                              RegionBarrierBB->getTerminator());
354706cfdd52SJoseph Huber         Barrier->setDebugLoc(DL);
354806cfdd52SJoseph Huber         OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
354906cfdd52SJoseph Huber       }
355029a3e3ddSGiorgis Georgakoudis     };
355129a3e3ddSGiorgis Georgakoudis 
355299ea8ac9SJohannes Doerfert     auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
355399ea8ac9SJohannes Doerfert     SmallPtrSet<BasicBlock *, 8> Visited;
355499ea8ac9SJohannes Doerfert     for (Instruction *GuardedI : SPMDCompatibilityTracker) {
355599ea8ac9SJohannes Doerfert       BasicBlock *BB = GuardedI->getParent();
355699ea8ac9SJohannes Doerfert       if (!Visited.insert(BB).second)
355799ea8ac9SJohannes Doerfert         continue;
355899ea8ac9SJohannes Doerfert 
355999ea8ac9SJohannes Doerfert       SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
356099ea8ac9SJohannes Doerfert       Instruction *LastEffect = nullptr;
356199ea8ac9SJohannes Doerfert       BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
356299ea8ac9SJohannes Doerfert       while (++IP != IPEnd) {
356399ea8ac9SJohannes Doerfert         if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
356499ea8ac9SJohannes Doerfert           continue;
356599ea8ac9SJohannes Doerfert         Instruction *I = &*IP;
356699ea8ac9SJohannes Doerfert         if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
356799ea8ac9SJohannes Doerfert           continue;
356899ea8ac9SJohannes Doerfert         if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
356999ea8ac9SJohannes Doerfert           LastEffect = nullptr;
357099ea8ac9SJohannes Doerfert           continue;
357199ea8ac9SJohannes Doerfert         }
357299ea8ac9SJohannes Doerfert         if (LastEffect)
357399ea8ac9SJohannes Doerfert           Reorders.push_back({I, LastEffect});
357499ea8ac9SJohannes Doerfert         LastEffect = &*IP;
357599ea8ac9SJohannes Doerfert       }
357699ea8ac9SJohannes Doerfert       for (auto &Reorder : Reorders)
357799ea8ac9SJohannes Doerfert         Reorder.first->moveBefore(Reorder.second);
357899ea8ac9SJohannes Doerfert     }
357999ea8ac9SJohannes Doerfert 
358029a3e3ddSGiorgis Georgakoudis     SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
358129a3e3ddSGiorgis Georgakoudis 
358229a3e3ddSGiorgis Georgakoudis     for (Instruction *GuardedI : SPMDCompatibilityTracker) {
358329a3e3ddSGiorgis Georgakoudis       BasicBlock *BB = GuardedI->getParent();
358429a3e3ddSGiorgis Georgakoudis       auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
358529a3e3ddSGiorgis Georgakoudis           IRPosition::function(*GuardedI->getFunction()), nullptr,
358629a3e3ddSGiorgis Georgakoudis           DepClassTy::NONE);
358729a3e3ddSGiorgis Georgakoudis       assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
358829a3e3ddSGiorgis Georgakoudis       auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
358929a3e3ddSGiorgis Georgakoudis       // Continue if instruction is already guarded.
359029a3e3ddSGiorgis Georgakoudis       if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
359129a3e3ddSGiorgis Georgakoudis         continue;
359229a3e3ddSGiorgis Georgakoudis 
359329a3e3ddSGiorgis Georgakoudis       Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
359429a3e3ddSGiorgis Georgakoudis       for (Instruction &I : *BB) {
359529a3e3ddSGiorgis Georgakoudis         // If instruction I needs to be guarded update the guarded region
359629a3e3ddSGiorgis Georgakoudis         // bounds.
359729a3e3ddSGiorgis Georgakoudis         if (SPMDCompatibilityTracker.contains(&I)) {
359829a3e3ddSGiorgis Georgakoudis           CalleeAAFunction.getGuardedInstructions().insert(&I);
359929a3e3ddSGiorgis Georgakoudis           if (GuardedRegionStart)
360029a3e3ddSGiorgis Georgakoudis             GuardedRegionEnd = &I;
360129a3e3ddSGiorgis Georgakoudis           else
360229a3e3ddSGiorgis Georgakoudis             GuardedRegionStart = GuardedRegionEnd = &I;
360329a3e3ddSGiorgis Georgakoudis 
360429a3e3ddSGiorgis Georgakoudis           continue;
360529a3e3ddSGiorgis Georgakoudis         }
360629a3e3ddSGiorgis Georgakoudis 
360729a3e3ddSGiorgis Georgakoudis         // Instruction I does not need guarding, store
360829a3e3ddSGiorgis Georgakoudis         // any region found and reset bounds.
360929a3e3ddSGiorgis Georgakoudis         if (GuardedRegionStart) {
361029a3e3ddSGiorgis Georgakoudis           GuardedRegions.push_back(
361129a3e3ddSGiorgis Georgakoudis               std::make_pair(GuardedRegionStart, GuardedRegionEnd));
361229a3e3ddSGiorgis Georgakoudis           GuardedRegionStart = nullptr;
361329a3e3ddSGiorgis Georgakoudis           GuardedRegionEnd = nullptr;
361429a3e3ddSGiorgis Georgakoudis         }
361529a3e3ddSGiorgis Georgakoudis       }
361629a3e3ddSGiorgis Georgakoudis     }
361729a3e3ddSGiorgis Georgakoudis 
361829a3e3ddSGiorgis Georgakoudis     for (auto &GR : GuardedRegions)
361929a3e3ddSGiorgis Georgakoudis       CreateGuardedRegion(GR.first, GR.second);
362029a3e3ddSGiorgis Georgakoudis 
3621514c033dSJohannes Doerfert     // Adjust the global exec mode flag that tells the runtime what mode this
3622514c033dSJohannes Doerfert     // kernel is executed in.
3623ca999f71SShilei Tian     assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
3624ca999f71SShilei Tian            "Initially non-SPMD kernel has SPMD exec mode!");
3625514c033dSJohannes Doerfert     ExecMode->setInitializer(
3626ca999f71SShilei Tian         ConstantInt::get(ExecMode->getInitializer()->getType(),
3627ca999f71SShilei Tian                          ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
3628514c033dSJohannes Doerfert 
3629514c033dSJohannes Doerfert     // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
3630423d34f7SShilei Tian     const int InitModeArgNo = 1;
3631423d34f7SShilei Tian     const int DeinitModeArgNo = 1;
3632514c033dSJohannes Doerfert     const int InitUseStateMachineArgNo = 2;
3633e8439ec8SGiorgis Georgakoudis     const int InitRequiresFullRuntimeArgNo = 3;
3634e8439ec8SGiorgis Georgakoudis     const int DeinitRequiresFullRuntimeArgNo = 2;
3635514c033dSJohannes Doerfert 
3636514c033dSJohannes Doerfert     auto &Ctx = getAnchorValue().getContext();
3637423d34f7SShilei Tian     A.changeUseAfterManifest(
3638423d34f7SShilei Tian         KernelInitCB->getArgOperandUse(InitModeArgNo),
3639423d34f7SShilei Tian         *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3640423d34f7SShilei Tian                                 OMP_TGT_EXEC_MODE_SPMD));
3641514c033dSJohannes Doerfert     A.changeUseAfterManifest(
3642514c033dSJohannes Doerfert         KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
36432aed0813SKazu Hirata         *ConstantInt::getBool(Ctx, false));
3644514c033dSJohannes Doerfert     A.changeUseAfterManifest(
3645423d34f7SShilei Tian         KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
3646423d34f7SShilei Tian         *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3647423d34f7SShilei Tian                                 OMP_TGT_EXEC_MODE_SPMD));
3648e8439ec8SGiorgis Georgakoudis     A.changeUseAfterManifest(
3649e8439ec8SGiorgis Georgakoudis         KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
36502aed0813SKazu Hirata         *ConstantInt::getBool(Ctx, false));
3651e8439ec8SGiorgis Georgakoudis     A.changeUseAfterManifest(
3652e8439ec8SGiorgis Georgakoudis         KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
36532aed0813SKazu Hirata         *ConstantInt::getBool(Ctx, false));
3654e8439ec8SGiorgis Georgakoudis 
3655514c033dSJohannes Doerfert     ++NumOpenMPTargetRegionKernelsSPMD;
3656514c033dSJohannes Doerfert 
3657514c033dSJohannes Doerfert     auto Remark = [&](OptimizationRemark OR) {
3658eef6601bSJoseph Huber       return OR << "Transformed generic-mode kernel to SPMD-mode.";
3659514c033dSJohannes Doerfert     };
36602c31d5ebSJoseph Huber     A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
3661514c033dSJohannes Doerfert     return true;
3662514c033dSJohannes Doerfert   };
3663514c033dSJohannes Doerfert 
buildCustomStateMachine__anon23c38c770111::AAKernelInfoFunction3664d9659bf6SJohannes Doerfert   ChangeStatus buildCustomStateMachine(Attributor &A) {
3665cd0dd8ecSJoseph Huber     // If we have disabled state machine rewrites, don't make a custom one
3666cd0dd8ecSJoseph Huber     if (DisableOpenMPOptStateMachineRewrite)
3667c6457dcaSJohannes Doerfert       return ChangeStatus::UNCHANGED;
3668cd0dd8ecSJoseph Huber 
3669f074a6a0SJoseph Huber     // Don't rewrite the state machine if we are not in a valid state.
3670f074a6a0SJoseph Huber     if (!ReachedKnownParallelRegions.isValidState())
3671f074a6a0SJoseph Huber       return ChangeStatus::UNCHANGED;
3672d9659bf6SJohannes Doerfert 
3673423d34f7SShilei Tian     const int InitModeArgNo = 1;
3674d9659bf6SJohannes Doerfert     const int InitUseStateMachineArgNo = 2;
3675d9659bf6SJohannes Doerfert 
3676d9659bf6SJohannes Doerfert     // Check if the current configuration is non-SPMD and generic state machine.
3677d9659bf6SJohannes Doerfert     // If we already have SPMD mode or a custom state machine we do not need to
3678d9659bf6SJohannes Doerfert     // go any further. If it is anything but a constant something is weird and
3679d9659bf6SJohannes Doerfert     // we give up.
3680d9659bf6SJohannes Doerfert     ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
3681d9659bf6SJohannes Doerfert         KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
3682423d34f7SShilei Tian     ConstantInt *Mode =
3683423d34f7SShilei Tian         dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3684d9659bf6SJohannes Doerfert 
3685d9659bf6SJohannes Doerfert     // If we are stuck with generic mode, try to create a custom device (=GPU)
3686d9659bf6SJohannes Doerfert     // state machine which is specialized for the parallel regions that are
3687d9659bf6SJohannes Doerfert     // reachable by the kernel.
3688423d34f7SShilei Tian     if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
3689423d34f7SShilei Tian         (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3690d9659bf6SJohannes Doerfert       return ChangeStatus::UNCHANGED;
3691d9659bf6SJohannes Doerfert 
3692514c033dSJohannes Doerfert     // If not SPMD mode, indicate we use a custom state machine now.
3693d9659bf6SJohannes Doerfert     auto &Ctx = getAnchorValue().getContext();
36942aed0813SKazu Hirata     auto *FalseVal = ConstantInt::getBool(Ctx, false);
3695d9659bf6SJohannes Doerfert     A.changeUseAfterManifest(
3696d9659bf6SJohannes Doerfert         KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
3697d9659bf6SJohannes Doerfert 
3698d9659bf6SJohannes Doerfert     // If we don't actually need a state machine we are done here. This can
3699d9659bf6SJohannes Doerfert     // happen if there simply are no parallel regions. In the resulting kernel
3700d9659bf6SJohannes Doerfert     // all worker threads will simply exit right away, leaving the main thread
3701d9659bf6SJohannes Doerfert     // to do the work alone.
37026b9a3ec3SJoseph Huber     if (!mayContainParallelRegion()) {
3703d9659bf6SJohannes Doerfert       ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
3704d9659bf6SJohannes Doerfert 
3705d9659bf6SJohannes Doerfert       auto Remark = [&](OptimizationRemark OR) {
3706eef6601bSJoseph Huber         return OR << "Removing unused state machine from generic-mode kernel.";
3707d9659bf6SJohannes Doerfert       };
37082c31d5ebSJoseph Huber       A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
3709d9659bf6SJohannes Doerfert 
3710d9659bf6SJohannes Doerfert       return ChangeStatus::CHANGED;
3711d9659bf6SJohannes Doerfert     }
3712d9659bf6SJohannes Doerfert 
3713d9659bf6SJohannes Doerfert     // Keep track in the statistics of our new shiny custom state machine.
3714d9659bf6SJohannes Doerfert     if (ReachedUnknownParallelRegions.empty()) {
3715d9659bf6SJohannes Doerfert       ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
3716d9659bf6SJohannes Doerfert 
3717d9659bf6SJohannes Doerfert       auto Remark = [&](OptimizationRemark OR) {
3718eef6601bSJoseph Huber         return OR << "Rewriting generic-mode kernel with a customized state "
3719eef6601bSJoseph Huber                      "machine.";
3720d9659bf6SJohannes Doerfert       };
37212c31d5ebSJoseph Huber       A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
3722d9659bf6SJohannes Doerfert     } else {
3723d9659bf6SJohannes Doerfert       ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
3724d9659bf6SJohannes Doerfert 
3725eef6601bSJoseph Huber       auto Remark = [&](OptimizationRemarkAnalysis OR) {
3726d9659bf6SJohannes Doerfert         return OR << "Generic-mode kernel is executed with a customized state "
3727eef6601bSJoseph Huber                      "machine that requires a fallback.";
3728d9659bf6SJohannes Doerfert       };
37292c31d5ebSJoseph Huber       A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
3730d9659bf6SJohannes Doerfert 
3731d9659bf6SJohannes Doerfert       // Tell the user why we ended up with a fallback.
3732d9659bf6SJohannes Doerfert       for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
3733d9659bf6SJohannes Doerfert         if (!UnknownParallelRegionCB)
3734d9659bf6SJohannes Doerfert           continue;
3735d9659bf6SJohannes Doerfert         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3736eef6601bSJoseph Huber           return ORA << "Call may contain unknown parallel regions. Use "
3737eef6601bSJoseph Huber                      << "`__attribute__((assume(\"omp_no_parallelism\")))` to "
3738eef6601bSJoseph Huber                         "override.";
3739d9659bf6SJohannes Doerfert         };
37402c31d5ebSJoseph Huber         A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
37412c31d5ebSJoseph Huber                                                  "OMP133", Remark);
3742d9659bf6SJohannes Doerfert       }
3743d9659bf6SJohannes Doerfert     }
3744d9659bf6SJohannes Doerfert 
3745d9659bf6SJohannes Doerfert     // Create all the blocks:
3746d9659bf6SJohannes Doerfert     //
3747d9659bf6SJohannes Doerfert     //                       InitCB = __kmpc_target_init(...)
3748c9dfe322SJoel E. Denny     //                       BlockHwSize =
3749c9dfe322SJoel E. Denny     //                         __kmpc_get_hardware_num_threads_in_block();
3750c9dfe322SJoel E. Denny     //                       WarpSize = __kmpc_get_warp_size();
3751c9dfe322SJoel E. Denny     //                       BlockSize = BlockHwSize - WarpSize;
3752f9c2d600SJohannes Doerfert     // IsWorkerCheckBB:      bool IsWorker = InitCB != -1;
3753d9659bf6SJohannes Doerfert     //                       if (IsWorker) {
3754f9c2d600SJohannes Doerfert     //                         if (InitCB >= BlockSize) return;
375573720c80SJohannes Doerfert     // SMBeginBB:               __kmpc_barrier_simple_generic(...);
3756d9659bf6SJohannes Doerfert     //                         void *WorkFn;
3757d9659bf6SJohannes Doerfert     //                         bool Active = __kmpc_kernel_parallel(&WorkFn);
3758d9659bf6SJohannes Doerfert     //                         if (!WorkFn) return;
3759d9659bf6SJohannes Doerfert     // SMIsActiveCheckBB:       if (Active) {
3760d9659bf6SJohannes Doerfert     // SMIfCascadeCurrentBB:      if      (WorkFn == <ParFn0>)
3761d9659bf6SJohannes Doerfert     //                              ParFn0(...);
3762d9659bf6SJohannes Doerfert     // SMIfCascadeCurrentBB:      else if (WorkFn == <ParFn1>)
3763d9659bf6SJohannes Doerfert     //                              ParFn1(...);
3764d9659bf6SJohannes Doerfert     //                            ...
3765d9659bf6SJohannes Doerfert     // SMIfCascadeCurrentBB:      else
3766d9659bf6SJohannes Doerfert     //                              ((WorkFnTy*)WorkFn)(...);
3767d9659bf6SJohannes Doerfert     // SMEndParallelBB:           __kmpc_kernel_end_parallel(...);
3768d9659bf6SJohannes Doerfert     //                          }
376973720c80SJohannes Doerfert     // SMDoneBB:                __kmpc_barrier_simple_generic(...);
3770d9659bf6SJohannes Doerfert     //                          goto SMBeginBB;
3771d9659bf6SJohannes Doerfert     //                       }
3772d9659bf6SJohannes Doerfert     // UserCodeEntryBB:      // user code
3773d9659bf6SJohannes Doerfert     //                       __kmpc_target_deinit(...)
3774d9659bf6SJohannes Doerfert     //
3775d9659bf6SJohannes Doerfert     Function *Kernel = getAssociatedFunction();
3776d9659bf6SJohannes Doerfert     assert(Kernel && "Expected an associated function!");
3777d9659bf6SJohannes Doerfert 
3778d9659bf6SJohannes Doerfert     BasicBlock *InitBB = KernelInitCB->getParent();
3779d9659bf6SJohannes Doerfert     BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
3780d9659bf6SJohannes Doerfert         KernelInitCB->getNextNode(), "thread.user_code.check");
3781c9dfe322SJoel E. Denny     BasicBlock *IsWorkerCheckBB =
3782c9dfe322SJoel E. Denny         BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
3783d9659bf6SJohannes Doerfert     BasicBlock *StateMachineBeginBB = BasicBlock::Create(
3784d9659bf6SJohannes Doerfert         Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
3785d9659bf6SJohannes Doerfert     BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
3786d9659bf6SJohannes Doerfert         Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
3787d9659bf6SJohannes Doerfert     BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
3788d9659bf6SJohannes Doerfert         Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
3789d9659bf6SJohannes Doerfert     BasicBlock *StateMachineIfCascadeCurrentBB =
3790d9659bf6SJohannes Doerfert         BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3791d9659bf6SJohannes Doerfert                            Kernel, UserCodeEntryBB);
3792d9659bf6SJohannes Doerfert     BasicBlock *StateMachineEndParallelBB =
3793d9659bf6SJohannes Doerfert         BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
3794d9659bf6SJohannes Doerfert                            Kernel, UserCodeEntryBB);
3795d9659bf6SJohannes Doerfert     BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
3796d9659bf6SJohannes Doerfert         Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
37973f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*InitBB);
37983f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
3799c9dfe322SJoel E. Denny     A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
38003f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
38013f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
38023f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
38033f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
38043f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
38053f71b425SGiorgis Georgakoudis     A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
3806d9659bf6SJohannes Doerfert 
3807d9659bf6SJohannes Doerfert     const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
3808d9659bf6SJohannes Doerfert     ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
3809d9659bf6SJohannes Doerfert     InitBB->getTerminator()->eraseFromParent();
3810c9dfe322SJoel E. Denny 
3811f9c2d600SJohannes Doerfert     Instruction *IsWorker =
3812f9c2d600SJohannes Doerfert         ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
3813f9c2d600SJohannes Doerfert                          ConstantInt::get(KernelInitCB->getType(), -1),
3814f9c2d600SJohannes Doerfert                          "thread.is_worker", InitBB);
3815f9c2d600SJohannes Doerfert     IsWorker->setDebugLoc(DLoc);
3816f9c2d600SJohannes Doerfert     BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
3817f9c2d600SJohannes Doerfert 
3818c9dfe322SJoel E. Denny     Module &M = *Kernel->getParent();
3819c9dfe322SJoel E. Denny     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3820c9dfe322SJoel E. Denny     FunctionCallee BlockHwSizeFn =
3821c9dfe322SJoel E. Denny         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3822c9dfe322SJoel E. Denny             M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
3823c9dfe322SJoel E. Denny     FunctionCallee WarpSizeFn =
3824c9dfe322SJoel E. Denny         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3825c9dfe322SJoel E. Denny             M, OMPRTL___kmpc_get_warp_size);
382606cfdd52SJoseph Huber     CallInst *BlockHwSize =
3827f9c2d600SJohannes Doerfert         CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
382806cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
3829c9dfe322SJoel E. Denny     BlockHwSize->setDebugLoc(DLoc);
3830f9c2d600SJohannes Doerfert     CallInst *WarpSize =
3831f9c2d600SJohannes Doerfert         CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
383206cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
3833c9dfe322SJoel E. Denny     WarpSize->setDebugLoc(DLoc);
3834f9c2d600SJohannes Doerfert     Instruction *BlockSize = BinaryOperator::CreateSub(
3835f9c2d600SJohannes Doerfert         BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
3836c9dfe322SJoel E. Denny     BlockSize->setDebugLoc(DLoc);
3837f9c2d600SJohannes Doerfert     Instruction *IsMainOrWorker = ICmpInst::Create(
3838f9c2d600SJohannes Doerfert         ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
3839f9c2d600SJohannes Doerfert         "thread.is_main_or_worker", IsWorkerCheckBB);
3840c9dfe322SJoel E. Denny     IsMainOrWorker->setDebugLoc(DLoc);
3841f9c2d600SJohannes Doerfert     BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
3842f9c2d600SJohannes Doerfert                        IsMainOrWorker, IsWorkerCheckBB);
384371052ea1SJon Chesterfield 
3844d9659bf6SJohannes Doerfert     // Create local storage for the work function pointer.
384571052ea1SJon Chesterfield     const DataLayout &DL = M.getDataLayout();
3846d9659bf6SJohannes Doerfert     Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
384771052ea1SJon Chesterfield     Instruction *WorkFnAI =
384871052ea1SJon Chesterfield         new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
384971052ea1SJon Chesterfield                        "worker.work_fn.addr", &Kernel->getEntryBlock().front());
3850d9659bf6SJohannes Doerfert     WorkFnAI->setDebugLoc(DLoc);
3851d9659bf6SJohannes Doerfert 
3852d9659bf6SJohannes Doerfert     OMPInfoCache.OMPBuilder.updateToLocation(
3853d9659bf6SJohannes Doerfert         OpenMPIRBuilder::LocationDescription(
3854d9659bf6SJohannes Doerfert             IRBuilder<>::InsertPoint(StateMachineBeginBB,
3855d9659bf6SJohannes Doerfert                                      StateMachineBeginBB->end()),
3856d9659bf6SJohannes Doerfert             DLoc));
3857d9659bf6SJohannes Doerfert 
3858d9659bf6SJohannes Doerfert     Value *Ident = KernelInitCB->getArgOperand(0);
3859d9659bf6SJohannes Doerfert     Value *GTid = KernelInitCB;
3860d9659bf6SJohannes Doerfert 
3861d9659bf6SJohannes Doerfert     FunctionCallee BarrierFn =
3862d9659bf6SJohannes Doerfert         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
386373720c80SJohannes Doerfert             M, OMPRTL___kmpc_barrier_simple_generic);
386406cfdd52SJoseph Huber     CallInst *Barrier =
386506cfdd52SJoseph Huber         CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
386606cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
386706cfdd52SJoseph Huber     Barrier->setDebugLoc(DLoc);
3868d9659bf6SJohannes Doerfert 
386971052ea1SJon Chesterfield     if (WorkFnAI->getType()->getPointerAddressSpace() !=
387071052ea1SJon Chesterfield         (unsigned int)AddressSpace::Generic) {
387171052ea1SJon Chesterfield       WorkFnAI = new AddrSpaceCastInst(
387271052ea1SJon Chesterfield           WorkFnAI,
387371052ea1SJon Chesterfield           PointerType::getWithSamePointeeType(
387471052ea1SJon Chesterfield               cast<PointerType>(WorkFnAI->getType()),
387571052ea1SJon Chesterfield               (unsigned int)AddressSpace::Generic),
387671052ea1SJon Chesterfield           WorkFnAI->getName() + ".generic", StateMachineBeginBB);
387771052ea1SJon Chesterfield       WorkFnAI->setDebugLoc(DLoc);
387871052ea1SJon Chesterfield     }
387971052ea1SJon Chesterfield 
3880d9659bf6SJohannes Doerfert     FunctionCallee KernelParallelFn =
3881d9659bf6SJohannes Doerfert         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3882d9659bf6SJohannes Doerfert             M, OMPRTL___kmpc_kernel_parallel);
388306cfdd52SJoseph Huber     CallInst *IsActiveWorker = CallInst::Create(
3884d9659bf6SJohannes Doerfert         KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
388506cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
3886d9659bf6SJohannes Doerfert     IsActiveWorker->setDebugLoc(DLoc);
3887d9659bf6SJohannes Doerfert     Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
3888d9659bf6SJohannes Doerfert                                        StateMachineBeginBB);
3889d9659bf6SJohannes Doerfert     WorkFn->setDebugLoc(DLoc);
3890d9659bf6SJohannes Doerfert 
3891d9659bf6SJohannes Doerfert     FunctionType *ParallelRegionFnTy = FunctionType::get(
3892d9659bf6SJohannes Doerfert         Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
3893d9659bf6SJohannes Doerfert         false);
3894d9659bf6SJohannes Doerfert     Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
3895d9659bf6SJohannes Doerfert         WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
3896d9659bf6SJohannes Doerfert         StateMachineBeginBB);
3897d9659bf6SJohannes Doerfert 
3898d9659bf6SJohannes Doerfert     Instruction *IsDone =
3899d9659bf6SJohannes Doerfert         ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
3900d9659bf6SJohannes Doerfert                          Constant::getNullValue(VoidPtrTy), "worker.is_done",
3901d9659bf6SJohannes Doerfert                          StateMachineBeginBB);
3902d9659bf6SJohannes Doerfert     IsDone->setDebugLoc(DLoc);
3903d9659bf6SJohannes Doerfert     BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
3904d9659bf6SJohannes Doerfert                        IsDone, StateMachineBeginBB)
3905d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3906d9659bf6SJohannes Doerfert 
3907d9659bf6SJohannes Doerfert     BranchInst::Create(StateMachineIfCascadeCurrentBB,
3908d9659bf6SJohannes Doerfert                        StateMachineDoneBarrierBB, IsActiveWorker,
3909d9659bf6SJohannes Doerfert                        StateMachineIsActiveCheckBB)
3910d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3911d9659bf6SJohannes Doerfert 
3912d9659bf6SJohannes Doerfert     Value *ZeroArg =
3913d9659bf6SJohannes Doerfert         Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
3914d9659bf6SJohannes Doerfert 
3915d9659bf6SJohannes Doerfert     // Now that we have most of the CFG skeleton it is time for the if-cascade
3916d9659bf6SJohannes Doerfert     // that checks the function pointer we got from the runtime against the
3917d9659bf6SJohannes Doerfert     // parallel regions we expect, if there are any.
3918c11ebfeaSJoseph Huber     for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
3919c11ebfeaSJoseph Huber       auto *ParallelRegion = ReachedKnownParallelRegions[I];
3920d9659bf6SJohannes Doerfert       BasicBlock *PRExecuteBB = BasicBlock::Create(
3921d9659bf6SJohannes Doerfert           Ctx, "worker_state_machine.parallel_region.execute", Kernel,
3922d9659bf6SJohannes Doerfert           StateMachineEndParallelBB);
3923d9659bf6SJohannes Doerfert       CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
3924d9659bf6SJohannes Doerfert           ->setDebugLoc(DLoc);
3925d9659bf6SJohannes Doerfert       BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
3926d9659bf6SJohannes Doerfert           ->setDebugLoc(DLoc);
3927d9659bf6SJohannes Doerfert 
3928d9659bf6SJohannes Doerfert       BasicBlock *PRNextBB =
3929d9659bf6SJohannes Doerfert           BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3930d9659bf6SJohannes Doerfert                              Kernel, StateMachineEndParallelBB);
3931d9659bf6SJohannes Doerfert 
3932d9659bf6SJohannes Doerfert       // Check if we need to compare the pointer at all or if we can just
3933d9659bf6SJohannes Doerfert       // call the parallel region function.
3934d9659bf6SJohannes Doerfert       Value *IsPR;
3935c11ebfeaSJoseph Huber       if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
3936d9659bf6SJohannes Doerfert         Instruction *CmpI = ICmpInst::Create(
3937d9659bf6SJohannes Doerfert             ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
3938d9659bf6SJohannes Doerfert             "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
3939d9659bf6SJohannes Doerfert         CmpI->setDebugLoc(DLoc);
3940d9659bf6SJohannes Doerfert         IsPR = CmpI;
3941d9659bf6SJohannes Doerfert       } else {
3942d9659bf6SJohannes Doerfert         IsPR = ConstantInt::getTrue(Ctx);
3943d9659bf6SJohannes Doerfert       }
3944d9659bf6SJohannes Doerfert 
3945d9659bf6SJohannes Doerfert       BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
3946d9659bf6SJohannes Doerfert                          StateMachineIfCascadeCurrentBB)
3947d9659bf6SJohannes Doerfert           ->setDebugLoc(DLoc);
3948d9659bf6SJohannes Doerfert       StateMachineIfCascadeCurrentBB = PRNextBB;
3949d9659bf6SJohannes Doerfert     }
3950d9659bf6SJohannes Doerfert 
3951d9659bf6SJohannes Doerfert     // At the end of the if-cascade we place the indirect function pointer call
3952d9659bf6SJohannes Doerfert     // in case we might need it, that is if there can be parallel regions we
3953d9659bf6SJohannes Doerfert     // have not handled in the if-cascade above.
3954d9659bf6SJohannes Doerfert     if (!ReachedUnknownParallelRegions.empty()) {
3955d9659bf6SJohannes Doerfert       StateMachineIfCascadeCurrentBB->setName(
3956d9659bf6SJohannes Doerfert           "worker_state_machine.parallel_region.fallback.execute");
3957d9659bf6SJohannes Doerfert       CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
3958d9659bf6SJohannes Doerfert                        StateMachineIfCascadeCurrentBB)
3959d9659bf6SJohannes Doerfert           ->setDebugLoc(DLoc);
3960d9659bf6SJohannes Doerfert     }
3961d9659bf6SJohannes Doerfert     BranchInst::Create(StateMachineEndParallelBB,
3962d9659bf6SJohannes Doerfert                        StateMachineIfCascadeCurrentBB)
3963d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3964d9659bf6SJohannes Doerfert 
396506cfdd52SJoseph Huber     FunctionCallee EndParallelFn =
396606cfdd52SJoseph Huber         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
396706cfdd52SJoseph Huber             M, OMPRTL___kmpc_kernel_end_parallel);
396806cfdd52SJoseph Huber     CallInst *EndParallel =
396906cfdd52SJoseph Huber         CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
397006cfdd52SJoseph Huber     OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
397106cfdd52SJoseph Huber     EndParallel->setDebugLoc(DLoc);
3972d9659bf6SJohannes Doerfert     BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
3973d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3974d9659bf6SJohannes Doerfert 
3975d9659bf6SJohannes Doerfert     CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
3976d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3977d9659bf6SJohannes Doerfert     BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
3978d9659bf6SJohannes Doerfert         ->setDebugLoc(DLoc);
3979d9659bf6SJohannes Doerfert 
3980d9659bf6SJohannes Doerfert     return ChangeStatus::CHANGED;
3981d9659bf6SJohannes Doerfert   }
3982d9659bf6SJohannes Doerfert 
3983d9659bf6SJohannes Doerfert   /// Fixpoint iteration update function. Will be called every time a dependence
3984d9659bf6SJohannes Doerfert   /// changed its state (and in the beginning).
updateImpl__anon23c38c770111::AAKernelInfoFunction3985d9659bf6SJohannes Doerfert   ChangeStatus updateImpl(Attributor &A) override {
3986d9659bf6SJohannes Doerfert     KernelInfoState StateBefore = getState();
3987d9659bf6SJohannes Doerfert 
3988514c033dSJohannes Doerfert     // Callback to check a read/write instruction.
3989514c033dSJohannes Doerfert     auto CheckRWInst = [&](Instruction &I) {
3990514c033dSJohannes Doerfert       // We handle calls later.
3991514c033dSJohannes Doerfert       if (isa<CallBase>(I))
3992514c033dSJohannes Doerfert         return true;
3993514c033dSJohannes Doerfert       // We only care about write effects.
3994514c033dSJohannes Doerfert       if (!I.mayWriteToMemory())
3995514c033dSJohannes Doerfert         return true;
3996514c033dSJohannes Doerfert       if (auto *SI = dyn_cast<StoreInst>(&I)) {
3997514c033dSJohannes Doerfert         SmallVector<const Value *> Objects;
3998514c033dSJohannes Doerfert         getUnderlyingObjects(SI->getPointerOperand(), Objects);
3999514c033dSJohannes Doerfert         if (llvm::all_of(Objects,
4000514c033dSJohannes Doerfert                          [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
4001514c033dSJohannes Doerfert           return true;
400229a3e3ddSGiorgis Georgakoudis         // Check for AAHeapToStack moved objects which must not be guarded.
400329a3e3ddSGiorgis Georgakoudis         auto &HS = A.getAAFor<AAHeapToStack>(
400429a3e3ddSGiorgis Georgakoudis             *this, IRPosition::function(*I.getFunction()),
4005e6e440aeSJohannes Doerfert             DepClassTy::OPTIONAL);
400629a3e3ddSGiorgis Georgakoudis         if (llvm::all_of(Objects, [&HS](const Value *Obj) {
400729a3e3ddSGiorgis Georgakoudis               auto *CB = dyn_cast<CallBase>(Obj);
400829a3e3ddSGiorgis Georgakoudis               if (!CB)
400929a3e3ddSGiorgis Georgakoudis                 return false;
401029a3e3ddSGiorgis Georgakoudis               return HS.isAssumedHeapToStack(*CB);
401129a3e3ddSGiorgis Georgakoudis             })) {
401229a3e3ddSGiorgis Georgakoudis           return true;
4013514c033dSJohannes Doerfert         }
401429a3e3ddSGiorgis Georgakoudis       }
401529a3e3ddSGiorgis Georgakoudis 
401629a3e3ddSGiorgis Georgakoudis       // Insert instruction that needs guarding.
4017514c033dSJohannes Doerfert       SPMDCompatibilityTracker.insert(&I);
4018514c033dSJohannes Doerfert       return true;
4019514c033dSJohannes Doerfert     };
4020792aac98SJohannes Doerfert 
4021792aac98SJohannes Doerfert     bool UsedAssumedInformationInCheckRWInst = false;
402297387fdfSJohannes Doerfert     if (!SPMDCompatibilityTracker.isAtFixpoint())
4023792aac98SJohannes Doerfert       if (!A.checkForAllReadWriteInstructions(
4024792aac98SJohannes Doerfert               CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
4025514c033dSJohannes Doerfert         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4026514c033dSJohannes Doerfert 
4027058c312aSJoseph Huber     bool UsedAssumedInformationFromReachingKernels = false;
4028e97e0a4fSShilei Tian     if (!IsKernelEntry) {
4029e97e0a4fSShilei Tian       updateParallelLevels(A);
403029a3e3ddSGiorgis Georgakoudis 
4031058c312aSJoseph Huber       bool AllReachingKernelsKnown = true;
4032058c312aSJoseph Huber       updateReachingKernelEntries(A, AllReachingKernelsKnown);
4033058c312aSJoseph Huber       UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
4034058c312aSJoseph Huber 
403529a3e3ddSGiorgis Georgakoudis       if (!ParallelLevels.isValidState())
403629a3e3ddSGiorgis Georgakoudis         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4037058c312aSJoseph Huber       else if (!ReachingKernelEntries.isValidState())
4038058c312aSJoseph Huber         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4039058c312aSJoseph Huber       else if (!SPMDCompatibilityTracker.empty()) {
4040058c312aSJoseph Huber         // Check if all reaching kernels agree on the mode as we can otherwise
4041058c312aSJoseph Huber         // not guard instructions. We might not be sure about the mode so we
4042058c312aSJoseph Huber         // we cannot fix the internal spmd-zation state either.
4043058c312aSJoseph Huber         int SPMD = 0, Generic = 0;
4044058c312aSJoseph Huber         for (auto *Kernel : ReachingKernelEntries) {
4045058c312aSJoseph Huber           auto &CBAA = A.getAAFor<AAKernelInfo>(
4046058c312aSJoseph Huber               *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
4047058c312aSJoseph Huber           if (CBAA.SPMDCompatibilityTracker.isValidState() &&
4048058c312aSJoseph Huber               CBAA.SPMDCompatibilityTracker.isAssumed())
4049058c312aSJoseph Huber             ++SPMD;
4050058c312aSJoseph Huber           else
4051058c312aSJoseph Huber             ++Generic;
4052058c312aSJoseph Huber           if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
4053058c312aSJoseph Huber             UsedAssumedInformationFromReachingKernels = true;
4054058c312aSJoseph Huber         }
4055058c312aSJoseph Huber         if (SPMD != 0 && Generic != 0)
4056058c312aSJoseph Huber           SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4057058c312aSJoseph Huber       }
4058e97e0a4fSShilei Tian     }
4059ca662297SShilei Tian 
4060d9659bf6SJohannes Doerfert     // Callback to check a call instruction.
4061d61aac76SJohannes Doerfert     bool AllParallelRegionStatesWereFixed = true;
406297387fdfSJohannes Doerfert     bool AllSPMDStatesWereFixed = true;
4063d9659bf6SJohannes Doerfert     auto CheckCallInst = [&](Instruction &I) {
4064d9659bf6SJohannes Doerfert       auto &CB = cast<CallBase>(I);
4065d9659bf6SJohannes Doerfert       auto &CBAA = A.getAAFor<AAKernelInfo>(
4066d9659bf6SJohannes Doerfert           *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4067d9659bf6SJohannes Doerfert       getState() ^= CBAA.getState();
406897387fdfSJohannes Doerfert       AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
4069d61aac76SJohannes Doerfert       AllParallelRegionStatesWereFixed &=
4070d61aac76SJohannes Doerfert           CBAA.ReachedKnownParallelRegions.isAtFixpoint();
4071d61aac76SJohannes Doerfert       AllParallelRegionStatesWereFixed &=
4072d61aac76SJohannes Doerfert           CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
4073d9659bf6SJohannes Doerfert       return true;
4074d9659bf6SJohannes Doerfert     };
4075d9659bf6SJohannes Doerfert 
4076792aac98SJohannes Doerfert     bool UsedAssumedInformationInCheckCallInst = false;
4077792aac98SJohannes Doerfert     if (!A.checkForAllCallLikeInstructions(
4078c6457dcaSJohannes Doerfert             CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
4079e6e440aeSJohannes Doerfert       LLVM_DEBUG(dbgs() << TAG
4080e6e440aeSJohannes Doerfert                         << "Failed to visit all call-like instructions!\n";);
4081d9659bf6SJohannes Doerfert       return indicatePessimisticFixpoint();
4082c6457dcaSJohannes Doerfert     }
4083d9659bf6SJohannes Doerfert 
4084d61aac76SJohannes Doerfert     // If we haven't used any assumed information for the reached parallel
4085d61aac76SJohannes Doerfert     // region states we can fix it.
4086d61aac76SJohannes Doerfert     if (!UsedAssumedInformationInCheckCallInst &&
4087d61aac76SJohannes Doerfert         AllParallelRegionStatesWereFixed) {
4088d61aac76SJohannes Doerfert       ReachedKnownParallelRegions.indicateOptimisticFixpoint();
4089d61aac76SJohannes Doerfert       ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
4090d61aac76SJohannes Doerfert     }
4091d61aac76SJohannes Doerfert 
4092d61aac76SJohannes Doerfert     // If we are sure there are no parallel regions in the kernel we do not
4093d61aac76SJohannes Doerfert     // want SPMD mode.
4094d61aac76SJohannes Doerfert     if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
4095d61aac76SJohannes Doerfert         ReachedKnownParallelRegions.isAtFixpoint() &&
4096d61aac76SJohannes Doerfert         ReachedUnknownParallelRegions.isValidState() &&
4097d61aac76SJohannes Doerfert         ReachedKnownParallelRegions.isValidState() &&
4098d61aac76SJohannes Doerfert         !mayContainParallelRegion())
4099d61aac76SJohannes Doerfert       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4100d61aac76SJohannes Doerfert 
410197387fdfSJohannes Doerfert     // If we haven't used any assumed information for the SPMD state we can fix
410297387fdfSJohannes Doerfert     // it.
410397387fdfSJohannes Doerfert     if (!UsedAssumedInformationInCheckRWInst &&
4104058c312aSJoseph Huber         !UsedAssumedInformationInCheckCallInst &&
4105058c312aSJoseph Huber         !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
410697387fdfSJohannes Doerfert       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
410797387fdfSJohannes Doerfert 
4108d9659bf6SJohannes Doerfert     return StateBefore == getState() ? ChangeStatus::UNCHANGED
4109d9659bf6SJohannes Doerfert                                      : ChangeStatus::CHANGED;
4110d9659bf6SJohannes Doerfert   }
4111ca662297SShilei Tian 
4112ca662297SShilei Tian private:
4113ca662297SShilei Tian   /// Update info regarding reaching kernels.
updateReachingKernelEntries__anon23c38c770111::AAKernelInfoFunction4114058c312aSJoseph Huber   void updateReachingKernelEntries(Attributor &A,
4115058c312aSJoseph Huber                                    bool &AllReachingKernelsKnown) {
4116ca662297SShilei Tian     auto PredCallSite = [&](AbstractCallSite ACS) {
4117ca662297SShilei Tian       Function *Caller = ACS.getInstruction()->getFunction();
4118ca662297SShilei Tian 
4119ca662297SShilei Tian       assert(Caller && "Caller is nullptr");
4120ca662297SShilei Tian 
4121d3454ee8SShilei Tian       auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
4122d3454ee8SShilei Tian           IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
4123ca662297SShilei Tian       if (CAA.ReachingKernelEntries.isValidState()) {
4124ca662297SShilei Tian         ReachingKernelEntries ^= CAA.ReachingKernelEntries;
4125ca662297SShilei Tian         return true;
4126ca662297SShilei Tian       }
4127ca662297SShilei Tian 
4128ca662297SShilei Tian       // We lost track of the caller of the associated function, any kernel
4129ca662297SShilei Tian       // could reach now.
4130ca662297SShilei Tian       ReachingKernelEntries.indicatePessimisticFixpoint();
4131ca662297SShilei Tian 
4132ca662297SShilei Tian       return true;
4133ca662297SShilei Tian     };
4134ca662297SShilei Tian 
4135ca662297SShilei Tian     if (!A.checkForAllCallSites(PredCallSite, *this,
4136ca662297SShilei Tian                                 true /* RequireAllCallSites */,
4137058c312aSJoseph Huber                                 AllReachingKernelsKnown))
4138ca662297SShilei Tian       ReachingKernelEntries.indicatePessimisticFixpoint();
4139ca662297SShilei Tian   }
4140e97e0a4fSShilei Tian 
4141e97e0a4fSShilei Tian   /// Update info regarding parallel levels.
updateParallelLevels__anon23c38c770111::AAKernelInfoFunction4142e97e0a4fSShilei Tian   void updateParallelLevels(Attributor &A) {
4143e97e0a4fSShilei Tian     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4144e97e0a4fSShilei Tian     OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
4145e97e0a4fSShilei Tian         OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
4146e97e0a4fSShilei Tian 
4147e97e0a4fSShilei Tian     auto PredCallSite = [&](AbstractCallSite ACS) {
4148e97e0a4fSShilei Tian       Function *Caller = ACS.getInstruction()->getFunction();
4149e97e0a4fSShilei Tian 
4150e97e0a4fSShilei Tian       assert(Caller && "Caller is nullptr");
4151e97e0a4fSShilei Tian 
4152e97e0a4fSShilei Tian       auto &CAA =
4153e97e0a4fSShilei Tian           A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
4154e97e0a4fSShilei Tian       if (CAA.ParallelLevels.isValidState()) {
4155e97e0a4fSShilei Tian         // Any function that is called by `__kmpc_parallel_51` will not be
4156e97e0a4fSShilei Tian         // folded as the parallel level in the function is updated. In order to
4157e97e0a4fSShilei Tian         // get it right, all the analysis would depend on the implentation. That
4158e97e0a4fSShilei Tian         // said, if in the future any change to the implementation, the analysis
4159e97e0a4fSShilei Tian         // could be wrong. As a consequence, we are just conservative here.
4160e97e0a4fSShilei Tian         if (Caller == Parallel51RFI.Declaration) {
4161e97e0a4fSShilei Tian           ParallelLevels.indicatePessimisticFixpoint();
4162e97e0a4fSShilei Tian           return true;
4163e97e0a4fSShilei Tian         }
4164e97e0a4fSShilei Tian 
4165e97e0a4fSShilei Tian         ParallelLevels ^= CAA.ParallelLevels;
4166e97e0a4fSShilei Tian 
4167e97e0a4fSShilei Tian         return true;
4168e97e0a4fSShilei Tian       }
4169e97e0a4fSShilei Tian 
4170e97e0a4fSShilei Tian       // We lost track of the caller of the associated function, any kernel
4171e97e0a4fSShilei Tian       // could reach now.
4172e97e0a4fSShilei Tian       ParallelLevels.indicatePessimisticFixpoint();
4173e97e0a4fSShilei Tian 
4174e97e0a4fSShilei Tian       return true;
4175e97e0a4fSShilei Tian     };
4176e97e0a4fSShilei Tian 
4177e97e0a4fSShilei Tian     bool AllCallSitesKnown = true;
4178e97e0a4fSShilei Tian     if (!A.checkForAllCallSites(PredCallSite, *this,
4179e97e0a4fSShilei Tian                                 true /* RequireAllCallSites */,
4180e97e0a4fSShilei Tian                                 AllCallSitesKnown))
4181e97e0a4fSShilei Tian       ParallelLevels.indicatePessimisticFixpoint();
4182e97e0a4fSShilei Tian   }
4183d9659bf6SJohannes Doerfert };
4184d9659bf6SJohannes Doerfert 
4185d9659bf6SJohannes Doerfert /// The call site kernel info abstract attribute, basically, what can we say
4186d9659bf6SJohannes Doerfert /// about a call site with regards to the KernelInfoState. For now this simply
4187d9659bf6SJohannes Doerfert /// forwards the information from the callee.
4188d9659bf6SJohannes Doerfert struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite__anon23c38c770111::AAKernelInfoCallSite4189d9659bf6SJohannes Doerfert   AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
4190d9659bf6SJohannes Doerfert       : AAKernelInfo(IRP, A) {}
4191d9659bf6SJohannes Doerfert 
4192d9659bf6SJohannes Doerfert   /// See AbstractAttribute::initialize(...).
initialize__anon23c38c770111::AAKernelInfoCallSite4193d9659bf6SJohannes Doerfert   void initialize(Attributor &A) override {
4194d9659bf6SJohannes Doerfert     AAKernelInfo::initialize(A);
4195d9659bf6SJohannes Doerfert 
4196d9659bf6SJohannes Doerfert     CallBase &CB = cast<CallBase>(getAssociatedValue());
4197d9659bf6SJohannes Doerfert     Function *Callee = getAssociatedFunction();
4198d9659bf6SJohannes Doerfert 
4199e52937ebSJoseph Huber     auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
4200e52937ebSJoseph Huber         *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4201d9659bf6SJohannes Doerfert 
4202514c033dSJohannes Doerfert     // Check for SPMD-mode assumptions.
4203e52937ebSJoseph Huber     if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
4204514c033dSJohannes Doerfert       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
42059e2fc0baSJoseph Huber       indicateOptimisticFixpoint();
42069e2fc0baSJoseph Huber     }
4207514c033dSJohannes Doerfert 
4208d9659bf6SJohannes Doerfert     // First weed out calls we do not care about, that is readonly/readnone
4209d9659bf6SJohannes Doerfert     // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
4210d9659bf6SJohannes Doerfert     // parallel region or anything else we are looking for.
4211d9659bf6SJohannes Doerfert     if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
4212d9659bf6SJohannes Doerfert       indicateOptimisticFixpoint();
4213d9659bf6SJohannes Doerfert       return;
4214d9659bf6SJohannes Doerfert     }
4215d9659bf6SJohannes Doerfert 
4216d9659bf6SJohannes Doerfert     // Next we check if we know the callee. If it is a known OpenMP function
4217d9659bf6SJohannes Doerfert     // we will handle them explicitly in the switch below. If it is not, we
4218d9659bf6SJohannes Doerfert     // will use an AAKernelInfo object on the callee to gather information and
4219d9659bf6SJohannes Doerfert     // merge that into the current state. The latter happens in the updateImpl.
4220d9659bf6SJohannes Doerfert     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4221d9659bf6SJohannes Doerfert     const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4222d9659bf6SJohannes Doerfert     if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4223d9659bf6SJohannes Doerfert       // Unknown caller or declarations are not analyzable, we give up.
4224d9659bf6SJohannes Doerfert       if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
4225d9659bf6SJohannes Doerfert 
4226d9659bf6SJohannes Doerfert         // Unknown callees might contain parallel regions, except if they have
4227d9659bf6SJohannes Doerfert         // an appropriate assumption attached.
4228e52937ebSJoseph Huber         if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
4229e52937ebSJoseph Huber               AssumptionAA.hasAssumption("omp_no_parallelism")))
4230d9659bf6SJohannes Doerfert           ReachedUnknownParallelRegions.insert(&CB);
4231d9659bf6SJohannes Doerfert 
4232514c033dSJohannes Doerfert         // If SPMDCompatibilityTracker is not fixed, we need to give up on the
4233514c033dSJohannes Doerfert         // idea we can run something unknown in SPMD-mode.
423429a3e3ddSGiorgis Georgakoudis         if (!SPMDCompatibilityTracker.isAtFixpoint()) {
423529a3e3ddSGiorgis Georgakoudis           SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4236514c033dSJohannes Doerfert           SPMDCompatibilityTracker.insert(&CB);
423729a3e3ddSGiorgis Georgakoudis         }
4238514c033dSJohannes Doerfert 
4239d9659bf6SJohannes Doerfert         // We have updated the state for this unknown call properly, there won't
4240d9659bf6SJohannes Doerfert         // be any change so we indicate a fixpoint.
4241d9659bf6SJohannes Doerfert         indicateOptimisticFixpoint();
4242d9659bf6SJohannes Doerfert       }
4243d9659bf6SJohannes Doerfert       // If the callee is known and can be used in IPO, we will update the state
4244d9659bf6SJohannes Doerfert       // based on the callee state in updateImpl.
4245d9659bf6SJohannes Doerfert       return;
4246d9659bf6SJohannes Doerfert     }
4247d9659bf6SJohannes Doerfert 
4248d9659bf6SJohannes Doerfert     const unsigned int WrapperFunctionArgNo = 6;
4249d9659bf6SJohannes Doerfert     RuntimeFunction RF = It->getSecond();
4250d9659bf6SJohannes Doerfert     switch (RF) {
4251514c033dSJohannes Doerfert     // All the functions we know are compatible with SPMD mode.
4252514c033dSJohannes Doerfert     case OMPRTL___kmpc_is_spmd_exec_mode:
425387ce7e65SJoseph Huber     case OMPRTL___kmpc_distribute_static_fini:
4254514c033dSJohannes Doerfert     case OMPRTL___kmpc_for_static_fini:
4255514c033dSJohannes Doerfert     case OMPRTL___kmpc_global_thread_num:
42565ab6aeddSJose M Monsalve Diaz     case OMPRTL___kmpc_get_hardware_num_threads_in_block:
42575ab6aeddSJose M Monsalve Diaz     case OMPRTL___kmpc_get_hardware_num_blocks:
4258514c033dSJohannes Doerfert     case OMPRTL___kmpc_single:
4259514c033dSJohannes Doerfert     case OMPRTL___kmpc_end_single:
4260514c033dSJohannes Doerfert     case OMPRTL___kmpc_master:
4261514c033dSJohannes Doerfert     case OMPRTL___kmpc_end_master:
4262514c033dSJohannes Doerfert     case OMPRTL___kmpc_barrier:
4263744aa09fSJoseph Huber     case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
4264744aa09fSJoseph Huber     case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
4265744aa09fSJoseph Huber     case OMPRTL___kmpc_nvptx_end_reduce_nowait:
4266514c033dSJohannes Doerfert       break;
426787ce7e65SJoseph Huber     case OMPRTL___kmpc_distribute_static_init_4:
426887ce7e65SJoseph Huber     case OMPRTL___kmpc_distribute_static_init_4u:
426987ce7e65SJoseph Huber     case OMPRTL___kmpc_distribute_static_init_8:
427087ce7e65SJoseph Huber     case OMPRTL___kmpc_distribute_static_init_8u:
4271514c033dSJohannes Doerfert     case OMPRTL___kmpc_for_static_init_4:
4272514c033dSJohannes Doerfert     case OMPRTL___kmpc_for_static_init_4u:
4273514c033dSJohannes Doerfert     case OMPRTL___kmpc_for_static_init_8:
4274514c033dSJohannes Doerfert     case OMPRTL___kmpc_for_static_init_8u: {
4275514c033dSJohannes Doerfert       // Check the schedule and allow static schedule in SPMD mode.
4276514c033dSJohannes Doerfert       unsigned ScheduleArgOpNo = 2;
4277514c033dSJohannes Doerfert       auto *ScheduleTypeCI =
4278514c033dSJohannes Doerfert           dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
4279514c033dSJohannes Doerfert       unsigned ScheduleTypeVal =
4280514c033dSJohannes Doerfert           ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
4281514c033dSJohannes Doerfert       switch (OMPScheduleType(ScheduleTypeVal)) {
42822d92ee97SMichael Kruse       case OMPScheduleType::UnorderedStatic:
42832d92ee97SMichael Kruse       case OMPScheduleType::UnorderedStaticChunked:
42842d92ee97SMichael Kruse       case OMPScheduleType::OrderedDistribute:
42852d92ee97SMichael Kruse       case OMPScheduleType::OrderedDistributeChunked:
4286514c033dSJohannes Doerfert         break;
4287514c033dSJohannes Doerfert       default:
428829a3e3ddSGiorgis Georgakoudis         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4289514c033dSJohannes Doerfert         SPMDCompatibilityTracker.insert(&CB);
4290514c033dSJohannes Doerfert         break;
4291514c033dSJohannes Doerfert       };
4292514c033dSJohannes Doerfert     } break;
4293d9659bf6SJohannes Doerfert     case OMPRTL___kmpc_target_init:
4294d9659bf6SJohannes Doerfert       KernelInitCB = &CB;
4295d9659bf6SJohannes Doerfert       break;
4296d9659bf6SJohannes Doerfert     case OMPRTL___kmpc_target_deinit:
4297d9659bf6SJohannes Doerfert       KernelDeinitCB = &CB;
4298d9659bf6SJohannes Doerfert       break;
4299d9659bf6SJohannes Doerfert     case OMPRTL___kmpc_parallel_51:
4300d9659bf6SJohannes Doerfert       if (auto *ParallelRegion = dyn_cast<Function>(
4301d9659bf6SJohannes Doerfert               CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
4302d9659bf6SJohannes Doerfert         ReachedKnownParallelRegions.insert(ParallelRegion);
4303d9659bf6SJohannes Doerfert         break;
4304d9659bf6SJohannes Doerfert       }
4305d9659bf6SJohannes Doerfert       // The condition above should usually get the parallel region function
4306d9659bf6SJohannes Doerfert       // pointer and record it. In the off chance it doesn't we assume the
4307d9659bf6SJohannes Doerfert       // worst.
4308d9659bf6SJohannes Doerfert       ReachedUnknownParallelRegions.insert(&CB);
4309d9659bf6SJohannes Doerfert       break;
4310d9659bf6SJohannes Doerfert     case OMPRTL___kmpc_omp_task:
4311d9659bf6SJohannes Doerfert       // We do not look into tasks right now, just give up.
43129ea5b972SJoseph Huber       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4313514c033dSJohannes Doerfert       SPMDCompatibilityTracker.insert(&CB);
4314d9659bf6SJohannes Doerfert       ReachedUnknownParallelRegions.insert(&CB);
4315c6457dcaSJohannes Doerfert       break;
4316f8c40ed8SGiorgis Georgakoudis     case OMPRTL___kmpc_alloc_shared:
4317f8c40ed8SGiorgis Georgakoudis     case OMPRTL___kmpc_free_shared:
4318f8c40ed8SGiorgis Georgakoudis       // Return without setting a fixpoint, to be resolved in updateImpl.
4319f8c40ed8SGiorgis Georgakoudis       return;
4320d9659bf6SJohannes Doerfert     default:
4321514c033dSJohannes Doerfert       // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
4322c6457dcaSJohannes Doerfert       // generally. However, they do not hide parallel regions.
43239ea5b972SJoseph Huber       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4324514c033dSJohannes Doerfert       SPMDCompatibilityTracker.insert(&CB);
4325c6457dcaSJohannes Doerfert       break;
4326d9659bf6SJohannes Doerfert     }
4327d9659bf6SJohannes Doerfert     // All other OpenMP runtime calls will not reach parallel regions so they
4328d9659bf6SJohannes Doerfert     // can be safely ignored for now. Since it is a known OpenMP runtime call we
4329d9659bf6SJohannes Doerfert     // have now modeled all effects and there is no need for any update.
4330d9659bf6SJohannes Doerfert     indicateOptimisticFixpoint();
4331d9659bf6SJohannes Doerfert   }
4332d9659bf6SJohannes Doerfert 
updateImpl__anon23c38c770111::AAKernelInfoCallSite4333d9659bf6SJohannes Doerfert   ChangeStatus updateImpl(Attributor &A) override {
4334d9659bf6SJohannes Doerfert     // TODO: Once we have call site specific value information we can provide
4335d9659bf6SJohannes Doerfert     //       call site specific liveness information and then it makes
4336d9659bf6SJohannes Doerfert     //       sense to specialize attributes for call sites arguments instead of
4337d9659bf6SJohannes Doerfert     //       redirecting requests to the callee argument.
4338d9659bf6SJohannes Doerfert     Function *F = getAssociatedFunction();
4339f8c40ed8SGiorgis Georgakoudis 
4340f8c40ed8SGiorgis Georgakoudis     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4341f8c40ed8SGiorgis Georgakoudis     const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
4342f8c40ed8SGiorgis Georgakoudis 
4343f8c40ed8SGiorgis Georgakoudis     // If F is not a runtime function, propagate the AAKernelInfo of the callee.
4344f8c40ed8SGiorgis Georgakoudis     if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4345d9659bf6SJohannes Doerfert       const IRPosition &FnPos = IRPosition::function(*F);
4346d9659bf6SJohannes Doerfert       auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
4347d9659bf6SJohannes Doerfert       if (getState() == FnAA.getState())
4348d9659bf6SJohannes Doerfert         return ChangeStatus::UNCHANGED;
4349d9659bf6SJohannes Doerfert       getState() = FnAA.getState();
4350d9659bf6SJohannes Doerfert       return ChangeStatus::CHANGED;
4351d9659bf6SJohannes Doerfert     }
4352f8c40ed8SGiorgis Georgakoudis 
4353f8c40ed8SGiorgis Georgakoudis     // F is a runtime function that allocates or frees memory, check
4354f8c40ed8SGiorgis Georgakoudis     // AAHeapToStack and AAHeapToShared.
4355f8c40ed8SGiorgis Georgakoudis     KernelInfoState StateBefore = getState();
4356f8c40ed8SGiorgis Georgakoudis     assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||
4357f8c40ed8SGiorgis Georgakoudis             It->getSecond() == OMPRTL___kmpc_free_shared) &&
4358f8c40ed8SGiorgis Georgakoudis            "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
4359f8c40ed8SGiorgis Georgakoudis 
4360f8c40ed8SGiorgis Georgakoudis     CallBase &CB = cast<CallBase>(getAssociatedValue());
4361f8c40ed8SGiorgis Georgakoudis 
4362f8c40ed8SGiorgis Georgakoudis     auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
4363f8c40ed8SGiorgis Georgakoudis         *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4364f8c40ed8SGiorgis Georgakoudis     auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
4365f8c40ed8SGiorgis Georgakoudis         *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4366f8c40ed8SGiorgis Georgakoudis 
4367f8c40ed8SGiorgis Georgakoudis     RuntimeFunction RF = It->getSecond();
4368f8c40ed8SGiorgis Georgakoudis 
4369f8c40ed8SGiorgis Georgakoudis     switch (RF) {
4370f8c40ed8SGiorgis Georgakoudis     // If neither HeapToStack nor HeapToShared assume the call is removed,
4371f8c40ed8SGiorgis Georgakoudis     // assume SPMD incompatibility.
4372f8c40ed8SGiorgis Georgakoudis     case OMPRTL___kmpc_alloc_shared:
4373f8c40ed8SGiorgis Georgakoudis       if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
4374f8c40ed8SGiorgis Georgakoudis           !HeapToSharedAA.isAssumedHeapToShared(CB))
4375f8c40ed8SGiorgis Georgakoudis         SPMDCompatibilityTracker.insert(&CB);
4376f8c40ed8SGiorgis Georgakoudis       break;
4377f8c40ed8SGiorgis Georgakoudis     case OMPRTL___kmpc_free_shared:
4378f8c40ed8SGiorgis Georgakoudis       if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
4379f8c40ed8SGiorgis Georgakoudis           !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
4380f8c40ed8SGiorgis Georgakoudis         SPMDCompatibilityTracker.insert(&CB);
4381f8c40ed8SGiorgis Georgakoudis       break;
4382f8c40ed8SGiorgis Georgakoudis     default:
43839ea5b972SJoseph Huber       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4384f8c40ed8SGiorgis Georgakoudis       SPMDCompatibilityTracker.insert(&CB);
4385f8c40ed8SGiorgis Georgakoudis     }
4386f8c40ed8SGiorgis Georgakoudis 
4387f8c40ed8SGiorgis Georgakoudis     return StateBefore == getState() ? ChangeStatus::UNCHANGED
4388f8c40ed8SGiorgis Georgakoudis                                      : ChangeStatus::CHANGED;
4389f8c40ed8SGiorgis Georgakoudis   }
4390d9659bf6SJohannes Doerfert };
4391d9659bf6SJohannes Doerfert 
4392ca662297SShilei Tian struct AAFoldRuntimeCall
4393ca662297SShilei Tian     : public StateWrapper<BooleanState, AbstractAttribute> {
4394ca662297SShilei Tian   using Base = StateWrapper<BooleanState, AbstractAttribute>;
4395ca662297SShilei Tian 
AAFoldRuntimeCall__anon23c38c770111::AAFoldRuntimeCall4396ca662297SShilei Tian   AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4397ca662297SShilei Tian 
4398ca662297SShilei Tian   /// Statistics are tracked as part of manifest for now.
trackStatistics__anon23c38c770111::AAFoldRuntimeCall4399ca662297SShilei Tian   void trackStatistics() const override {}
4400ca662297SShilei Tian 
4401ca662297SShilei Tian   /// Create an abstract attribute biew for the position \p IRP.
4402ca662297SShilei Tian   static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
4403ca662297SShilei Tian                                               Attributor &A);
4404ca662297SShilei Tian 
4405ca662297SShilei Tian   /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAFoldRuntimeCall4406ca662297SShilei Tian   const std::string getName() const override { return "AAFoldRuntimeCall"; }
4407ca662297SShilei Tian 
4408ca662297SShilei Tian   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAFoldRuntimeCall4409ca662297SShilei Tian   const char *getIdAddr() const override { return &ID; }
4410ca662297SShilei Tian 
4411ca662297SShilei Tian   /// This function should return true if the type of the \p AA is
4412ca662297SShilei Tian   /// AAFoldRuntimeCall
classof__anon23c38c770111::AAFoldRuntimeCall4413ca662297SShilei Tian   static bool classof(const AbstractAttribute *AA) {
4414ca662297SShilei Tian     return (AA->getIdAddr() == &ID);
4415ca662297SShilei Tian   }
4416ca662297SShilei Tian 
4417ca662297SShilei Tian   static const char ID;
4418ca662297SShilei Tian };
4419ca662297SShilei Tian 
4420ca662297SShilei Tian struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4421ca662297SShilei Tian   AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
4422ca662297SShilei Tian       : AAFoldRuntimeCall(IRP, A) {}
4423ca662297SShilei Tian 
4424ca662297SShilei Tian   /// See AbstractAttribute::getAsStr()
getAsStr__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4425ca662297SShilei Tian   const std::string getAsStr() const override {
4426ca662297SShilei Tian     if (!isValidState())
4427ca662297SShilei Tian       return "<invalid>";
4428ca662297SShilei Tian 
4429ca662297SShilei Tian     std::string Str("simplified value: ");
4430ca662297SShilei Tian 
4431a7938c74SKazu Hirata     if (!SimplifiedValue)
4432ca662297SShilei Tian       return Str + std::string("none");
4433ca662297SShilei Tian 
4434611ffcf4SKazu Hirata     if (!SimplifiedValue.value())
4435ca662297SShilei Tian       return Str + std::string("nullptr");
4436ca662297SShilei Tian 
4437611ffcf4SKazu Hirata     if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.value()))
4438ca662297SShilei Tian       return Str + std::to_string(CI->getSExtValue());
4439ca662297SShilei Tian 
4440ca662297SShilei Tian     return Str + std::string("unknown");
4441ca662297SShilei Tian   }
4442ca662297SShilei Tian 
initialize__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4443ca662297SShilei Tian   void initialize(Attributor &A) override {
4444cd0dd8ecSJoseph Huber     if (DisableOpenMPOptFolding)
4445cd0dd8ecSJoseph Huber       indicatePessimisticFixpoint();
4446cd0dd8ecSJoseph Huber 
4447ca662297SShilei Tian     Function *Callee = getAssociatedFunction();
4448ca662297SShilei Tian 
4449ca662297SShilei Tian     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4450ca662297SShilei Tian     const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4451ca662297SShilei Tian     assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&
4452ca662297SShilei Tian            "Expected a known OpenMP runtime function");
4453ca662297SShilei Tian 
4454ca662297SShilei Tian     RFKind = It->getSecond();
4455ca662297SShilei Tian 
4456ca662297SShilei Tian     CallBase &CB = cast<CallBase>(getAssociatedValue());
4457ca662297SShilei Tian     A.registerSimplificationCallback(
4458ca662297SShilei Tian         IRPosition::callsite_returned(CB),
4459ca662297SShilei Tian         [&](const IRPosition &IRP, const AbstractAttribute *AA,
4460ca662297SShilei Tian             bool &UsedAssumedInformation) -> Optional<Value *> {
4461a7938c74SKazu Hirata           assert((isValidState() ||
4462611ffcf4SKazu Hirata                   (SimplifiedValue && SimplifiedValue.value() == nullptr)) &&
4463ca662297SShilei Tian                  "Unexpected invalid state!");
4464ca662297SShilei Tian 
4465ca662297SShilei Tian           if (!isAtFixpoint()) {
4466ca662297SShilei Tian             UsedAssumedInformation = true;
4467ca662297SShilei Tian             if (AA)
4468ca662297SShilei Tian               A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
4469ca662297SShilei Tian           }
4470ca662297SShilei Tian           return SimplifiedValue;
4471ca662297SShilei Tian         });
4472ca662297SShilei Tian   }
4473ca662297SShilei Tian 
updateImpl__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4474ca662297SShilei Tian   ChangeStatus updateImpl(Attributor &A) override {
4475ca662297SShilei Tian     ChangeStatus Changed = ChangeStatus::UNCHANGED;
4476ca662297SShilei Tian     switch (RFKind) {
4477ca662297SShilei Tian     case OMPRTL___kmpc_is_spmd_exec_mode:
4478c23da666SShilei Tian       Changed |= foldIsSPMDExecMode(A);
4479ca662297SShilei Tian       break;
4480196fe994SJoseph Huber     case OMPRTL___kmpc_is_generic_main_thread_id:
4481196fe994SJoseph Huber       Changed |= foldIsGenericMainThread(A);
4482196fe994SJoseph Huber       break;
4483e97e0a4fSShilei Tian     case OMPRTL___kmpc_parallel_level:
4484e97e0a4fSShilei Tian       Changed |= foldParallelLevel(A);
4485e97e0a4fSShilei Tian       break;
44865ab6aeddSJose M Monsalve Diaz     case OMPRTL___kmpc_get_hardware_num_threads_in_block:
44875ab6aeddSJose M Monsalve Diaz       Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
44885ab6aeddSJose M Monsalve Diaz       break;
44895ab6aeddSJose M Monsalve Diaz     case OMPRTL___kmpc_get_hardware_num_blocks:
44905ab6aeddSJose M Monsalve Diaz       Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
44915ab6aeddSJose M Monsalve Diaz       break;
4492ca662297SShilei Tian     default:
4493ca662297SShilei Tian       llvm_unreachable("Unhandled OpenMP runtime function!");
4494ca662297SShilei Tian     }
4495ca662297SShilei Tian 
4496ca662297SShilei Tian     return Changed;
4497ca662297SShilei Tian   }
4498ca662297SShilei Tian 
manifest__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4499ca662297SShilei Tian   ChangeStatus manifest(Attributor &A) override {
4500ca662297SShilei Tian     ChangeStatus Changed = ChangeStatus::UNCHANGED;
4501ca662297SShilei Tian 
4502e0e687a6SKazu Hirata     if (SimplifiedValue && *SimplifiedValue) {
45037eb899cbSJoseph Huber       Instruction &I = *getCtxI();
45047a07b88fSJohannes Doerfert       A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
45057eb899cbSJoseph Huber       A.deleteAfterManifest(I);
4506196fe994SJoseph Huber 
45077eb899cbSJoseph Huber       CallBase *CB = dyn_cast<CallBase>(&I);
45087eb899cbSJoseph Huber       auto Remark = [&](OptimizationRemark OR) {
45097eb899cbSJoseph Huber         if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
45107eb899cbSJoseph Huber           return OR << "Replacing OpenMP runtime call "
45117eb899cbSJoseph Huber                     << CB->getCalledFunction()->getName() << " with "
45127eb899cbSJoseph Huber                     << ore::NV("FoldedValue", C->getZExtValue()) << ".";
45137eb899cbSJoseph Huber         return OR << "Replacing OpenMP runtime call "
45147eb899cbSJoseph Huber                   << CB->getCalledFunction()->getName() << ".";
45157eb899cbSJoseph Huber       };
45167eb899cbSJoseph Huber 
45177eb899cbSJoseph Huber       if (CB && EnableVerboseRemarks)
45187eb899cbSJoseph Huber         A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
45197eb899cbSJoseph Huber 
45207eb899cbSJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
4521196fe994SJoseph Huber                         << **SimplifiedValue << "\n");
4522196fe994SJoseph Huber 
4523ca662297SShilei Tian       Changed = ChangeStatus::CHANGED;
4524ca662297SShilei Tian     }
4525ca662297SShilei Tian 
4526ca662297SShilei Tian     return Changed;
4527ca662297SShilei Tian   }
4528ca662297SShilei Tian 
indicatePessimisticFixpoint__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4529ca662297SShilei Tian   ChangeStatus indicatePessimisticFixpoint() override {
4530ca662297SShilei Tian     SimplifiedValue = nullptr;
4531ca662297SShilei Tian     return AAFoldRuntimeCall::indicatePessimisticFixpoint();
4532ca662297SShilei Tian   }
4533ca662297SShilei Tian 
4534ca662297SShilei Tian private:
4535ca662297SShilei Tian   /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
foldIsSPMDExecMode__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4536ca662297SShilei Tian   ChangeStatus foldIsSPMDExecMode(Attributor &A) {
4537ca662297SShilei Tian     Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4538ca662297SShilei Tian 
4539ca662297SShilei Tian     unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4540ca662297SShilei Tian     unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4541ca662297SShilei Tian     auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4542ca662297SShilei Tian         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4543ca662297SShilei Tian 
4544ca662297SShilei Tian     if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4545ca662297SShilei Tian       return indicatePessimisticFixpoint();
4546ca662297SShilei Tian 
4547ca662297SShilei Tian     for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4548ca662297SShilei Tian       auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4549ca662297SShilei Tian                                           DepClassTy::REQUIRED);
4550ca662297SShilei Tian 
4551ca662297SShilei Tian       if (!AA.isValidState()) {
4552ca662297SShilei Tian         SimplifiedValue = nullptr;
4553ca662297SShilei Tian         return indicatePessimisticFixpoint();
4554ca662297SShilei Tian       }
4555ca662297SShilei Tian 
4556ca662297SShilei Tian       if (AA.SPMDCompatibilityTracker.isAssumed()) {
4557ca662297SShilei Tian         if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4558ca662297SShilei Tian           ++KnownSPMDCount;
4559ca662297SShilei Tian         else
4560ca662297SShilei Tian           ++AssumedSPMDCount;
4561ca662297SShilei Tian       } else {
4562ca662297SShilei Tian         if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4563ca662297SShilei Tian           ++KnownNonSPMDCount;
4564ca662297SShilei Tian         else
4565ca662297SShilei Tian           ++AssumedNonSPMDCount;
4566ca662297SShilei Tian       }
4567ca662297SShilei Tian     }
4568ca662297SShilei Tian 
4569ae69f468SShilei Tian     if ((AssumedSPMDCount + KnownSPMDCount) &&
4570ae69f468SShilei Tian         (AssumedNonSPMDCount + KnownNonSPMDCount))
4571ca662297SShilei Tian       return indicatePessimisticFixpoint();
4572ca662297SShilei Tian 
4573ca662297SShilei Tian     auto &Ctx = getAnchorValue().getContext();
4574ca662297SShilei Tian     if (KnownSPMDCount || AssumedSPMDCount) {
4575ca662297SShilei Tian       assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
4576ca662297SShilei Tian              "Expected only SPMD kernels!");
4577ca662297SShilei Tian       // All reaching kernels are in SPMD mode. Update all function calls to
4578ca662297SShilei Tian       // __kmpc_is_spmd_exec_mode to 1.
4579ca662297SShilei Tian       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4580d3454ee8SShilei Tian     } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
4581ca662297SShilei Tian       assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
4582ca662297SShilei Tian              "Expected only non-SPMD kernels!");
4583ca662297SShilei Tian       // All reaching kernels are in non-SPMD mode. Update all function
4584ca662297SShilei Tian       // calls to __kmpc_is_spmd_exec_mode to 0.
4585ca662297SShilei Tian       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
4586d3454ee8SShilei Tian     } else {
4587d3454ee8SShilei Tian       // We have empty reaching kernels, therefore we cannot tell if the
4588d3454ee8SShilei Tian       // associated call site can be folded. At this moment, SimplifiedValue
4589d3454ee8SShilei Tian       // must be none.
45905413bf1bSKazu Hirata       assert(!SimplifiedValue && "SimplifiedValue should be none");
4591ca662297SShilei Tian     }
4592ca662297SShilei Tian 
4593ca662297SShilei Tian     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4594ca662297SShilei Tian                                                     : ChangeStatus::CHANGED;
4595ca662297SShilei Tian   }
4596ca662297SShilei Tian 
4597196fe994SJoseph Huber   /// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
foldIsGenericMainThread__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4598196fe994SJoseph Huber   ChangeStatus foldIsGenericMainThread(Attributor &A) {
4599196fe994SJoseph Huber     Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4600196fe994SJoseph Huber 
4601196fe994SJoseph Huber     CallBase &CB = cast<CallBase>(getAssociatedValue());
4602196fe994SJoseph Huber     Function *F = CB.getFunction();
4603196fe994SJoseph Huber     const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
4604196fe994SJoseph Huber         *this, IRPosition::function(*F), DepClassTy::REQUIRED);
4605196fe994SJoseph Huber 
4606196fe994SJoseph Huber     if (!ExecutionDomainAA.isValidState())
4607196fe994SJoseph Huber       return indicatePessimisticFixpoint();
4608196fe994SJoseph Huber 
4609196fe994SJoseph Huber     auto &Ctx = getAnchorValue().getContext();
4610196fe994SJoseph Huber     if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
4611196fe994SJoseph Huber       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4612196fe994SJoseph Huber     else
4613196fe994SJoseph Huber       return indicatePessimisticFixpoint();
4614196fe994SJoseph Huber 
4615196fe994SJoseph Huber     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4616196fe994SJoseph Huber                                                     : ChangeStatus::CHANGED;
4617196fe994SJoseph Huber   }
4618196fe994SJoseph Huber 
4619e97e0a4fSShilei Tian   /// Fold __kmpc_parallel_level into a constant if possible.
foldParallelLevel__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4620e97e0a4fSShilei Tian   ChangeStatus foldParallelLevel(Attributor &A) {
4621e97e0a4fSShilei Tian     Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4622e97e0a4fSShilei Tian 
4623e97e0a4fSShilei Tian     auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4624e97e0a4fSShilei Tian         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4625e97e0a4fSShilei Tian 
4626e97e0a4fSShilei Tian     if (!CallerKernelInfoAA.ParallelLevels.isValidState())
4627e97e0a4fSShilei Tian       return indicatePessimisticFixpoint();
4628e97e0a4fSShilei Tian 
4629e97e0a4fSShilei Tian     if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4630e97e0a4fSShilei Tian       return indicatePessimisticFixpoint();
4631e97e0a4fSShilei Tian 
4632e97e0a4fSShilei Tian     if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
46335413bf1bSKazu Hirata       assert(!SimplifiedValue &&
4634e97e0a4fSShilei Tian              "SimplifiedValue should keep none at this point");
4635e97e0a4fSShilei Tian       return ChangeStatus::UNCHANGED;
4636e97e0a4fSShilei Tian     }
4637e97e0a4fSShilei Tian 
4638e97e0a4fSShilei Tian     unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4639e97e0a4fSShilei Tian     unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4640e97e0a4fSShilei Tian     for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4641e97e0a4fSShilei Tian       auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4642e97e0a4fSShilei Tian                                           DepClassTy::REQUIRED);
4643e97e0a4fSShilei Tian       if (!AA.SPMDCompatibilityTracker.isValidState())
4644e97e0a4fSShilei Tian         return indicatePessimisticFixpoint();
4645e97e0a4fSShilei Tian 
4646e97e0a4fSShilei Tian       if (AA.SPMDCompatibilityTracker.isAssumed()) {
4647e97e0a4fSShilei Tian         if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4648e97e0a4fSShilei Tian           ++KnownSPMDCount;
4649e97e0a4fSShilei Tian         else
4650e97e0a4fSShilei Tian           ++AssumedSPMDCount;
4651e97e0a4fSShilei Tian       } else {
4652e97e0a4fSShilei Tian         if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4653e97e0a4fSShilei Tian           ++KnownNonSPMDCount;
4654e97e0a4fSShilei Tian         else
4655e97e0a4fSShilei Tian           ++AssumedNonSPMDCount;
4656e97e0a4fSShilei Tian       }
4657e97e0a4fSShilei Tian     }
4658e97e0a4fSShilei Tian 
4659e97e0a4fSShilei Tian     if ((AssumedSPMDCount + KnownSPMDCount) &&
4660e97e0a4fSShilei Tian         (AssumedNonSPMDCount + KnownNonSPMDCount))
4661e97e0a4fSShilei Tian       return indicatePessimisticFixpoint();
4662e97e0a4fSShilei Tian 
4663e97e0a4fSShilei Tian     auto &Ctx = getAnchorValue().getContext();
4664e97e0a4fSShilei Tian     // If the caller can only be reached by SPMD kernel entries, the parallel
4665e97e0a4fSShilei Tian     // level is 1. Similarly, if the caller can only be reached by non-SPMD
4666e97e0a4fSShilei Tian     // kernel entries, it is 0.
4667e97e0a4fSShilei Tian     if (AssumedSPMDCount || KnownSPMDCount) {
4668e97e0a4fSShilei Tian       assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
4669e97e0a4fSShilei Tian              "Expected only SPMD kernels!");
4670e97e0a4fSShilei Tian       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
4671e97e0a4fSShilei Tian     } else {
4672e97e0a4fSShilei Tian       assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
4673e97e0a4fSShilei Tian              "Expected only non-SPMD kernels!");
4674e97e0a4fSShilei Tian       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
4675e97e0a4fSShilei Tian     }
46765ab6aeddSJose M Monsalve Diaz     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
46775ab6aeddSJose M Monsalve Diaz                                                     : ChangeStatus::CHANGED;
46785ab6aeddSJose M Monsalve Diaz   }
4679e97e0a4fSShilei Tian 
foldKernelFnAttribute__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned46805ab6aeddSJose M Monsalve Diaz   ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
46815ab6aeddSJose M Monsalve Diaz     // Specialize only if all the calls agree with the attribute constant value
46825ab6aeddSJose M Monsalve Diaz     int32_t CurrentAttrValue = -1;
46835ab6aeddSJose M Monsalve Diaz     Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
46845ab6aeddSJose M Monsalve Diaz 
46855ab6aeddSJose M Monsalve Diaz     auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
46865ab6aeddSJose M Monsalve Diaz         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
46875ab6aeddSJose M Monsalve Diaz 
46885ab6aeddSJose M Monsalve Diaz     if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
46895ab6aeddSJose M Monsalve Diaz       return indicatePessimisticFixpoint();
46905ab6aeddSJose M Monsalve Diaz 
46915ab6aeddSJose M Monsalve Diaz     // Iterate over the kernels that reach this function
46925ab6aeddSJose M Monsalve Diaz     for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
46935ab6aeddSJose M Monsalve Diaz       int32_t NextAttrVal = -1;
46945ab6aeddSJose M Monsalve Diaz       if (K->hasFnAttribute(Attr))
46955ab6aeddSJose M Monsalve Diaz         NextAttrVal =
46965ab6aeddSJose M Monsalve Diaz             std::stoi(K->getFnAttribute(Attr).getValueAsString().str());
46975ab6aeddSJose M Monsalve Diaz 
46985ab6aeddSJose M Monsalve Diaz       if (NextAttrVal == -1 ||
46995ab6aeddSJose M Monsalve Diaz           (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
47005ab6aeddSJose M Monsalve Diaz         return indicatePessimisticFixpoint();
47015ab6aeddSJose M Monsalve Diaz       CurrentAttrValue = NextAttrVal;
47025ab6aeddSJose M Monsalve Diaz     }
47035ab6aeddSJose M Monsalve Diaz 
47045ab6aeddSJose M Monsalve Diaz     if (CurrentAttrValue != -1) {
47055ab6aeddSJose M Monsalve Diaz       auto &Ctx = getAnchorValue().getContext();
47065ab6aeddSJose M Monsalve Diaz       SimplifiedValue =
47075ab6aeddSJose M Monsalve Diaz           ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
47085ab6aeddSJose M Monsalve Diaz     }
4709e97e0a4fSShilei Tian     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4710e97e0a4fSShilei Tian                                                     : ChangeStatus::CHANGED;
4711e97e0a4fSShilei Tian   }
4712e97e0a4fSShilei Tian 
4713ca662297SShilei Tian   /// An optional value the associated value is assumed to fold to. That is, we
4714ca662297SShilei Tian   /// assume the associated value (which is a call) can be replaced by this
4715ca662297SShilei Tian   /// simplified value.
4716ca662297SShilei Tian   Optional<Value *> SimplifiedValue;
4717ca662297SShilei Tian 
4718ca662297SShilei Tian   /// The runtime function kind of the callee of the associated call site.
4719ca662297SShilei Tian   RuntimeFunction RFKind;
4720ca662297SShilei Tian };
4721ca662297SShilei Tian 
47229548b74aSJohannes Doerfert } // namespace
47239548b74aSJohannes Doerfert 
47245ab6aeddSJose M Monsalve Diaz /// Register folding callsite
registerFoldRuntimeCall(RuntimeFunction RF)47255ab6aeddSJose M Monsalve Diaz void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
47265ab6aeddSJose M Monsalve Diaz   auto &RFI = OMPInfoCache.RFIs[RF];
47275ab6aeddSJose M Monsalve Diaz   RFI.foreachUse(SCC, [&](Use &U, Function &F) {
47285ab6aeddSJose M Monsalve Diaz     CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
47295ab6aeddSJose M Monsalve Diaz     if (!CI)
47305ab6aeddSJose M Monsalve Diaz       return false;
47315ab6aeddSJose M Monsalve Diaz     A.getOrCreateAAFor<AAFoldRuntimeCall>(
47325ab6aeddSJose M Monsalve Diaz         IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
47335ab6aeddSJose M Monsalve Diaz         DepClassTy::NONE, /* ForceUpdate */ false,
47345ab6aeddSJose M Monsalve Diaz         /* UpdateAfterInit */ false);
47355ab6aeddSJose M Monsalve Diaz     return false;
47365ab6aeddSJose M Monsalve Diaz   });
47375ab6aeddSJose M Monsalve Diaz }
47385ab6aeddSJose M Monsalve Diaz 
registerAAs(bool IsModulePass)4739d9659bf6SJohannes Doerfert void OpenMPOpt::registerAAs(bool IsModulePass) {
4740d9659bf6SJohannes Doerfert   if (SCC.empty())
4741d9659bf6SJohannes Doerfert     return;
47424166738cSJohannes Doerfert 
4743d9659bf6SJohannes Doerfert   if (IsModulePass) {
4744d9659bf6SJohannes Doerfert     // Ensure we create the AAKernelInfo AAs first and without triggering an
4745d9659bf6SJohannes Doerfert     // update. This will make sure we register all value simplification
4746d9659bf6SJohannes Doerfert     // callbacks before any other AA has the chance to create an AAValueSimplify
4747d9659bf6SJohannes Doerfert     // or similar.
47484166738cSJohannes Doerfert     auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
4749d9659bf6SJohannes Doerfert       A.getOrCreateAAFor<AAKernelInfo>(
47504166738cSJohannes Doerfert           IRPosition::function(Kernel), /* QueryingAA */ nullptr,
4751d9659bf6SJohannes Doerfert           DepClassTy::NONE, /* ForceUpdate */ false,
4752d9659bf6SJohannes Doerfert           /* UpdateAfterInit */ false);
47534166738cSJohannes Doerfert       return false;
47544166738cSJohannes Doerfert     };
47554166738cSJohannes Doerfert     OMPInformationCache::RuntimeFunctionInfo &InitRFI =
47564166738cSJohannes Doerfert         OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
47574166738cSJohannes Doerfert     InitRFI.foreachUse(SCC, CreateKernelInfoCB);
4758ca662297SShilei Tian 
47595ab6aeddSJose M Monsalve Diaz     registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
47605ab6aeddSJose M Monsalve Diaz     registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
47615ab6aeddSJose M Monsalve Diaz     registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
47625ab6aeddSJose M Monsalve Diaz     registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
47635ab6aeddSJose M Monsalve Diaz     registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
4764d9659bf6SJohannes Doerfert   }
4765d9659bf6SJohannes Doerfert 
4766d9659bf6SJohannes Doerfert   // Create CallSite AA for all Getters.
4767d9659bf6SJohannes Doerfert   for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
4768d9659bf6SJohannes Doerfert     auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
4769d9659bf6SJohannes Doerfert 
4770d9659bf6SJohannes Doerfert     auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
4771d9659bf6SJohannes Doerfert 
4772d9659bf6SJohannes Doerfert     auto CreateAA = [&](Use &U, Function &Caller) {
4773d9659bf6SJohannes Doerfert       CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
4774d9659bf6SJohannes Doerfert       if (!CI)
4775d9659bf6SJohannes Doerfert         return false;
4776d9659bf6SJohannes Doerfert 
4777d9659bf6SJohannes Doerfert       auto &CB = cast<CallBase>(*CI);
4778d9659bf6SJohannes Doerfert 
4779d9659bf6SJohannes Doerfert       IRPosition CBPos = IRPosition::callsite_function(CB);
4780d9659bf6SJohannes Doerfert       A.getOrCreateAAFor<AAICVTracker>(CBPos);
4781d9659bf6SJohannes Doerfert       return false;
4782d9659bf6SJohannes Doerfert     };
4783d9659bf6SJohannes Doerfert 
4784d9659bf6SJohannes Doerfert     GetterRFI.foreachUse(SCC, CreateAA);
4785d9659bf6SJohannes Doerfert   }
4786d9659bf6SJohannes Doerfert   auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4787d9659bf6SJohannes Doerfert   auto CreateAA = [&](Use &U, Function &F) {
4788d9659bf6SJohannes Doerfert     A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4789d9659bf6SJohannes Doerfert     return false;
4790d9659bf6SJohannes Doerfert   };
4791cd0dd8ecSJoseph Huber   if (!DisableOpenMPOptDeglobalization)
4792d9659bf6SJohannes Doerfert     GlobalizationRFI.foreachUse(SCC, CreateAA);
4793d9659bf6SJohannes Doerfert 
4794d9659bf6SJohannes Doerfert   // Create an ExecutionDomain AA for every function and a HeapToStack AA for
4795d9659bf6SJohannes Doerfert   // every function if there is a device kernel.
479670b75f62SJohannes Doerfert   if (!isOpenMPDevice(M))
479770b75f62SJohannes Doerfert     return;
479870b75f62SJohannes Doerfert 
4799d9659bf6SJohannes Doerfert   for (auto *F : SCC) {
480070b75f62SJohannes Doerfert     if (F->isDeclaration())
480170b75f62SJohannes Doerfert       continue;
480270b75f62SJohannes Doerfert 
4803d9659bf6SJohannes Doerfert     A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4804cd0dd8ecSJoseph Huber     if (!DisableOpenMPOptDeglobalization)
4805d9659bf6SJohannes Doerfert       A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
480670b75f62SJohannes Doerfert 
480770b75f62SJohannes Doerfert     for (auto &I : instructions(*F)) {
480870b75f62SJohannes Doerfert       if (auto *LI = dyn_cast<LoadInst>(&I)) {
480970b75f62SJohannes Doerfert         bool UsedAssumedInformation = false;
481070b75f62SJohannes Doerfert         A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4811bf789b19SJohannes Doerfert                                UsedAssumedInformation, AA::Interprocedural);
48123e0c512cSJohannes Doerfert       } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
48133e0c512cSJohannes Doerfert         A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
481470b75f62SJohannes Doerfert       }
481570b75f62SJohannes Doerfert     }
4816d9659bf6SJohannes Doerfert   }
4817d9659bf6SJohannes Doerfert }
4818d9659bf6SJohannes Doerfert 
4819b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
4820d9659bf6SJohannes Doerfert const char AAKernelInfo::ID = 0;
482118283125SJoseph Huber const char AAExecutionDomain::ID = 0;
48226fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0;
4823ca662297SShilei Tian const char AAFoldRuntimeCall::ID = 0;
4824b8235d2bSsstefan1 
createForPosition(const IRPosition & IRP,Attributor & A)4825b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
4826b8235d2bSsstefan1                                               Attributor &A) {
4827b8235d2bSsstefan1   AAICVTracker *AA = nullptr;
4828b8235d2bSsstefan1   switch (IRP.getPositionKind()) {
4829b8235d2bSsstefan1   case IRPosition::IRP_INVALID:
4830b8235d2bSsstefan1   case IRPosition::IRP_FLOAT:
4831b8235d2bSsstefan1   case IRPosition::IRP_ARGUMENT:
4832b8235d2bSsstefan1   case IRPosition::IRP_CALL_SITE_ARGUMENT:
48331de70a72SJohannes Doerfert     llvm_unreachable("ICVTracker can only be created for function position!");
48345dfd7cc4Ssstefan1   case IRPosition::IRP_RETURNED:
48355dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
48365dfd7cc4Ssstefan1     break;
48375dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE_RETURNED:
48385dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
48395dfd7cc4Ssstefan1     break;
48405dfd7cc4Ssstefan1   case IRPosition::IRP_CALL_SITE:
48415dfd7cc4Ssstefan1     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
48425dfd7cc4Ssstefan1     break;
4843b8235d2bSsstefan1   case IRPosition::IRP_FUNCTION:
4844b8235d2bSsstefan1     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
4845b8235d2bSsstefan1     break;
4846b8235d2bSsstefan1   }
4847b8235d2bSsstefan1 
4848b8235d2bSsstefan1   return *AA;
4849b8235d2bSsstefan1 }
4850b8235d2bSsstefan1 
createForPosition(const IRPosition & IRP,Attributor & A)485118283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
485218283125SJoseph Huber                                                         Attributor &A) {
485318283125SJoseph Huber   AAExecutionDomainFunction *AA = nullptr;
485418283125SJoseph Huber   switch (IRP.getPositionKind()) {
485518283125SJoseph Huber   case IRPosition::IRP_INVALID:
485618283125SJoseph Huber   case IRPosition::IRP_FLOAT:
485718283125SJoseph Huber   case IRPosition::IRP_ARGUMENT:
485818283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
485918283125SJoseph Huber   case IRPosition::IRP_RETURNED:
486018283125SJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
486118283125SJoseph Huber   case IRPosition::IRP_CALL_SITE:
486218283125SJoseph Huber     llvm_unreachable(
486318283125SJoseph Huber         "AAExecutionDomain can only be created for function position!");
486418283125SJoseph Huber   case IRPosition::IRP_FUNCTION:
486518283125SJoseph Huber     AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
486618283125SJoseph Huber     break;
486718283125SJoseph Huber   }
486818283125SJoseph Huber 
486918283125SJoseph Huber   return *AA;
487018283125SJoseph Huber }
487118283125SJoseph Huber 
createForPosition(const IRPosition & IRP,Attributor & A)48726fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
48736fc51c9fSJoseph Huber                                                   Attributor &A) {
48746fc51c9fSJoseph Huber   AAHeapToSharedFunction *AA = nullptr;
48756fc51c9fSJoseph Huber   switch (IRP.getPositionKind()) {
48766fc51c9fSJoseph Huber   case IRPosition::IRP_INVALID:
48776fc51c9fSJoseph Huber   case IRPosition::IRP_FLOAT:
48786fc51c9fSJoseph Huber   case IRPosition::IRP_ARGUMENT:
48796fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_ARGUMENT:
48806fc51c9fSJoseph Huber   case IRPosition::IRP_RETURNED:
48816fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE_RETURNED:
48826fc51c9fSJoseph Huber   case IRPosition::IRP_CALL_SITE:
48836fc51c9fSJoseph Huber     llvm_unreachable(
48846fc51c9fSJoseph Huber         "AAHeapToShared can only be created for function position!");
48856fc51c9fSJoseph Huber   case IRPosition::IRP_FUNCTION:
48866fc51c9fSJoseph Huber     AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
48876fc51c9fSJoseph Huber     break;
48886fc51c9fSJoseph Huber   }
48896fc51c9fSJoseph Huber 
48906fc51c9fSJoseph Huber   return *AA;
48916fc51c9fSJoseph Huber }
48926fc51c9fSJoseph Huber 
createForPosition(const IRPosition & IRP,Attributor & A)4893d9659bf6SJohannes Doerfert AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
4894d9659bf6SJohannes Doerfert                                               Attributor &A) {
4895d9659bf6SJohannes Doerfert   AAKernelInfo *AA = nullptr;
4896d9659bf6SJohannes Doerfert   switch (IRP.getPositionKind()) {
4897d9659bf6SJohannes Doerfert   case IRPosition::IRP_INVALID:
4898d9659bf6SJohannes Doerfert   case IRPosition::IRP_FLOAT:
4899d9659bf6SJohannes Doerfert   case IRPosition::IRP_ARGUMENT:
4900d9659bf6SJohannes Doerfert   case IRPosition::IRP_RETURNED:
4901d9659bf6SJohannes Doerfert   case IRPosition::IRP_CALL_SITE_RETURNED:
4902d9659bf6SJohannes Doerfert   case IRPosition::IRP_CALL_SITE_ARGUMENT:
4903d9659bf6SJohannes Doerfert     llvm_unreachable("KernelInfo can only be created for function position!");
4904d9659bf6SJohannes Doerfert   case IRPosition::IRP_CALL_SITE:
4905d9659bf6SJohannes Doerfert     AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
4906d9659bf6SJohannes Doerfert     break;
4907d9659bf6SJohannes Doerfert   case IRPosition::IRP_FUNCTION:
4908d9659bf6SJohannes Doerfert     AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
4909d9659bf6SJohannes Doerfert     break;
4910d9659bf6SJohannes Doerfert   }
4911d9659bf6SJohannes Doerfert 
4912d9659bf6SJohannes Doerfert   return *AA;
4913d9659bf6SJohannes Doerfert }
4914d9659bf6SJohannes Doerfert 
createForPosition(const IRPosition & IRP,Attributor & A)4915ca662297SShilei Tian AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
4916ca662297SShilei Tian                                                         Attributor &A) {
4917ca662297SShilei Tian   AAFoldRuntimeCall *AA = nullptr;
4918ca662297SShilei Tian   switch (IRP.getPositionKind()) {
4919ca662297SShilei Tian   case IRPosition::IRP_INVALID:
4920ca662297SShilei Tian   case IRPosition::IRP_FLOAT:
4921ca662297SShilei Tian   case IRPosition::IRP_ARGUMENT:
4922ca662297SShilei Tian   case IRPosition::IRP_RETURNED:
4923ca662297SShilei Tian   case IRPosition::IRP_FUNCTION:
4924ca662297SShilei Tian   case IRPosition::IRP_CALL_SITE:
4925ca662297SShilei Tian   case IRPosition::IRP_CALL_SITE_ARGUMENT:
4926ca662297SShilei Tian     llvm_unreachable("KernelInfo can only be created for call site position!");
4927ca662297SShilei Tian   case IRPosition::IRP_CALL_SITE_RETURNED:
4928ca662297SShilei Tian     AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
4929ca662297SShilei Tian     break;
4930ca662297SShilei Tian   }
4931ca662297SShilei Tian 
4932ca662297SShilei Tian   return *AA;
4933ca662297SShilei Tian }
4934ca662297SShilei Tian 
run(Module & M,ModuleAnalysisManager & AM)4935b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
49365ccb7424SJoseph Huber   if (!containsOpenMP(M))
4937b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
4938b2ad63d3SJoseph Huber   if (DisableOpenMPOptimizations)
4939b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
4940b2ad63d3SJoseph Huber 
49410edb8777SJoseph Huber   FunctionAnalysisManager &FAM =
49420edb8777SJoseph Huber       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
49435ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
49445ccb7424SJoseph Huber 
494566321807SJoseph Huber   if (PrintModuleBeforeOptimizations)
494666321807SJoseph Huber     LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
494766321807SJoseph Huber 
494857ad2e10SJoseph Huber   auto IsCalled = [&](Function &F) {
494957ad2e10SJoseph Huber     if (Kernels.contains(&F))
495057ad2e10SJoseph Huber       return true;
495157ad2e10SJoseph Huber     for (const User *U : F.users())
495257ad2e10SJoseph Huber       if (!isa<BlockAddress>(U))
495357ad2e10SJoseph Huber         return true;
495457ad2e10SJoseph Huber     return false;
495557ad2e10SJoseph Huber   };
495657ad2e10SJoseph Huber 
49570edb8777SJoseph Huber   auto EmitRemark = [&](Function &F) {
49580edb8777SJoseph Huber     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
49590edb8777SJoseph Huber     ORE.emit([&]() {
49602c31d5ebSJoseph Huber       OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
4961ecabc668SJoseph Huber       return ORA << "Could not internalize function. "
4962adbaa39dSJoseph Huber                  << "Some optimizations may not be possible. [OMP140]";
49630edb8777SJoseph Huber     });
49640edb8777SJoseph Huber   };
49650edb8777SJoseph Huber 
496657ad2e10SJoseph Huber   // Create internal copies of each function if this is a kernel Module. This
496757ad2e10SJoseph Huber   // allows iterprocedural passes to see every call edge.
4968adbaa39dSJoseph Huber   DenseMap<Function *, Function *> InternalizedMap;
4969adbaa39dSJoseph Huber   if (isOpenMPDevice(M)) {
4970adbaa39dSJoseph Huber     SmallPtrSet<Function *, 16> InternalizeFns;
497103d7e61cSJoseph Huber     for (Function &F : M)
49724a668604SJoseph Huber       if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
49734a668604SJoseph Huber           !DisableInternalization) {
4974adbaa39dSJoseph Huber         if (Attributor::isInternalizable(F)) {
4975adbaa39dSJoseph Huber           InternalizeFns.insert(&F);
4976ecabc668SJoseph Huber         } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
49770edb8777SJoseph Huber           EmitRemark(F);
49780edb8777SJoseph Huber         }
49790edb8777SJoseph Huber       }
498003d7e61cSJoseph Huber 
4981adbaa39dSJoseph Huber     Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
4982adbaa39dSJoseph Huber   }
4983adbaa39dSJoseph Huber 
498457ad2e10SJoseph Huber   // Look at every function in the Module unless it was internalized.
4985b2ad63d3SJoseph Huber   SmallVector<Function *, 16> SCC;
498603d7e61cSJoseph Huber   for (Function &F : M)
4987adbaa39dSJoseph Huber     if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
498803d7e61cSJoseph Huber       SCC.push_back(&F);
4989b2ad63d3SJoseph Huber 
4990b2ad63d3SJoseph Huber   if (SCC.empty())
4991b2ad63d3SJoseph Huber     return PreservedAnalyses::all();
4992b2ad63d3SJoseph Huber 
4993b2ad63d3SJoseph Huber   AnalysisGetter AG(FAM);
4994b2ad63d3SJoseph Huber 
4995b2ad63d3SJoseph Huber   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4996b2ad63d3SJoseph Huber     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4997b2ad63d3SJoseph Huber   };
4998b2ad63d3SJoseph Huber 
4999b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
5000b2ad63d3SJoseph Huber   CallGraphUpdater CGUpdater;
5001b2ad63d3SJoseph Huber 
5002b2ad63d3SJoseph Huber   SetVector<Function *> Functions(SCC.begin(), SCC.end());
50035ccb7424SJoseph Huber   OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
5004b2ad63d3SJoseph Huber 
5005f074a6a0SJoseph Huber   unsigned MaxFixpointIterations =
5006f074a6a0SJoseph Huber       (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
50073be3b401SJohannes Doerfert 
50083be3b401SJohannes Doerfert   AttributorConfig AC(CGUpdater);
50093be3b401SJohannes Doerfert   AC.DefaultInitializeLiveInternals = false;
50103be3b401SJohannes Doerfert   AC.RewriteSignatures = false;
50113be3b401SJohannes Doerfert   AC.MaxFixpointIterations = MaxFixpointIterations;
50123be3b401SJohannes Doerfert   AC.OREGetter = OREGetter;
50133be3b401SJohannes Doerfert   AC.PassName = DEBUG_TYPE;
50143be3b401SJohannes Doerfert 
50153be3b401SJohannes Doerfert   Attributor A(Functions, InfoCache, AC);
5016b2ad63d3SJoseph Huber 
5017b2ad63d3SJoseph Huber   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5018b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(true);
5019339aa765SJoseph Huber 
502029a74a39SJoseph Huber   // Optionally inline device functions for potentially better performance.
502129a74a39SJoseph Huber   if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
502229a74a39SJoseph Huber     for (Function &F : M)
502329a74a39SJoseph Huber       if (!F.isDeclaration() && !Kernels.contains(&F) &&
502429a74a39SJoseph Huber           !F.hasFnAttribute(Attribute::NoInline))
502529a74a39SJoseph Huber         F.addFnAttr(Attribute::AlwaysInline);
502629a74a39SJoseph Huber 
5027339aa765SJoseph Huber   if (PrintModuleAfterOptimizations)
5028339aa765SJoseph Huber     LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
5029339aa765SJoseph Huber 
5030b2ad63d3SJoseph Huber   if (Changed)
5031b2ad63d3SJoseph Huber     return PreservedAnalyses::none();
5032b2ad63d3SJoseph Huber 
5033b2ad63d3SJoseph Huber   return PreservedAnalyses::all();
5034b2ad63d3SJoseph Huber }
5035b2ad63d3SJoseph Huber 
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)5036b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
50379548b74aSJohannes Doerfert                                           CGSCCAnalysisManager &AM,
5038b2ad63d3SJoseph Huber                                           LazyCallGraph &CG,
5039b2ad63d3SJoseph Huber                                           CGSCCUpdateResult &UR) {
50405ccb7424SJoseph Huber   if (!containsOpenMP(*C.begin()->getFunction().getParent()))
50419548b74aSJohannes Doerfert     return PreservedAnalyses::all();
50429548b74aSJohannes Doerfert   if (DisableOpenMPOptimizations)
50439548b74aSJohannes Doerfert     return PreservedAnalyses::all();
50449548b74aSJohannes Doerfert 
5045ee17263aSJohannes Doerfert   SmallVector<Function *, 16> SCC;
5046351d234dSRoman Lebedev   // If there are kernels in the module, we have to run on all SCC's.
5047351d234dSRoman Lebedev   for (LazyCallGraph::Node &N : C) {
5048351d234dSRoman Lebedev     Function *Fn = &N.getFunction();
5049351d234dSRoman Lebedev     SCC.push_back(Fn);
5050351d234dSRoman Lebedev   }
5051351d234dSRoman Lebedev 
50525ccb7424SJoseph Huber   if (SCC.empty())
50539548b74aSJohannes Doerfert     return PreservedAnalyses::all();
50549548b74aSJohannes Doerfert 
50555ccb7424SJoseph Huber   Module &M = *C.begin()->getFunction().getParent();
50565ccb7424SJoseph Huber 
505766321807SJoseph Huber   if (PrintModuleBeforeOptimizations)
505866321807SJoseph Huber     LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
505966321807SJoseph Huber 
50605ccb7424SJoseph Huber   KernelSet Kernels = getDeviceKernels(M);
50615ccb7424SJoseph Huber 
50624d4ea9acSHuber, Joseph   FunctionAnalysisManager &FAM =
50634d4ea9acSHuber, Joseph       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
50647cfd267cSsstefan1 
50657cfd267cSsstefan1   AnalysisGetter AG(FAM);
50667cfd267cSsstefan1 
50677cfd267cSsstefan1   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
50684d4ea9acSHuber, Joseph     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
50694d4ea9acSHuber, Joseph   };
50704d4ea9acSHuber, Joseph 
5071b2ad63d3SJoseph Huber   BumpPtrAllocator Allocator;
50729548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
50739548b74aSJohannes Doerfert   CGUpdater.initialize(CG, C, AM, UR);
50747cfd267cSsstefan1 
50757cfd267cSsstefan1   SetVector<Function *> Functions(SCC.begin(), SCC.end());
50767cfd267cSsstefan1   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
50775ccb7424SJoseph Huber                                 /*CGSCC*/ Functions, Kernels);
50787cfd267cSsstefan1 
5079f074a6a0SJoseph Huber   unsigned MaxFixpointIterations =
5080f074a6a0SJoseph Huber       (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
50813be3b401SJohannes Doerfert 
50823be3b401SJohannes Doerfert   AttributorConfig AC(CGUpdater);
50833be3b401SJohannes Doerfert   AC.DefaultInitializeLiveInternals = false;
50843be3b401SJohannes Doerfert   AC.IsModulePass = false;
50853be3b401SJohannes Doerfert   AC.RewriteSignatures = false;
50863be3b401SJohannes Doerfert   AC.MaxFixpointIterations = MaxFixpointIterations;
50873be3b401SJohannes Doerfert   AC.OREGetter = OREGetter;
50883be3b401SJohannes Doerfert   AC.PassName = DEBUG_TYPE;
50893be3b401SJohannes Doerfert 
50903be3b401SJohannes Doerfert   Attributor A(Functions, InfoCache, AC);
5091b8235d2bSsstefan1 
5092b8235d2bSsstefan1   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5093b2ad63d3SJoseph Huber   bool Changed = OMPOpt.run(false);
5094339aa765SJoseph Huber 
5095339aa765SJoseph Huber   if (PrintModuleAfterOptimizations)
5096339aa765SJoseph Huber     LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5097339aa765SJoseph Huber 
5098694ded37SGiorgis Georgakoudis   if (Changed)
5099694ded37SGiorgis Georgakoudis     return PreservedAnalyses::none();
5100694ded37SGiorgis Georgakoudis 
51019548b74aSJohannes Doerfert   return PreservedAnalyses::all();
51029548b74aSJohannes Doerfert }
51038b57ed09SJoseph Huber 
51049548b74aSJohannes Doerfert namespace {
51059548b74aSJohannes Doerfert 
5106b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
51079548b74aSJohannes Doerfert   CallGraphUpdater CGUpdater;
51089548b74aSJohannes Doerfert   static char ID;
51099548b74aSJohannes Doerfert 
OpenMPOptCGSCCLegacyPass__anon23c38c774f11::OpenMPOptCGSCCLegacyPass5110b2ad63d3SJoseph Huber   OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
5111b2ad63d3SJoseph Huber     initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
51129548b74aSJohannes Doerfert   }
51139548b74aSJohannes Doerfert 
getAnalysisUsage__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51149548b74aSJohannes Doerfert   void getAnalysisUsage(AnalysisUsage &AU) const override {
51159548b74aSJohannes Doerfert     CallGraphSCCPass::getAnalysisUsage(AU);
51169548b74aSJohannes Doerfert   }
51179548b74aSJohannes Doerfert 
runOnSCC__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51189548b74aSJohannes Doerfert   bool runOnSCC(CallGraphSCC &CGSCC) override {
51195ccb7424SJoseph Huber     if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
51209548b74aSJohannes Doerfert       return false;
51219548b74aSJohannes Doerfert     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
51229548b74aSJohannes Doerfert       return false;
51239548b74aSJohannes Doerfert 
5124ee17263aSJohannes Doerfert     SmallVector<Function *, 16> SCC;
5125351d234dSRoman Lebedev     // If there are kernels in the module, we have to run on all SCC's.
5126351d234dSRoman Lebedev     for (CallGraphNode *CGN : CGSCC) {
5127351d234dSRoman Lebedev       Function *Fn = CGN->getFunction();
5128351d234dSRoman Lebedev       if (!Fn || Fn->isDeclaration())
5129351d234dSRoman Lebedev         continue;
5130ee17263aSJohannes Doerfert       SCC.push_back(Fn);
5131351d234dSRoman Lebedev     }
5132351d234dSRoman Lebedev 
51335ccb7424SJoseph Huber     if (SCC.empty())
51349548b74aSJohannes Doerfert       return false;
51359548b74aSJohannes Doerfert 
51365ccb7424SJoseph Huber     Module &M = CGSCC.getCallGraph().getModule();
51375ccb7424SJoseph Huber     KernelSet Kernels = getDeviceKernels(M);
51385ccb7424SJoseph Huber 
51399548b74aSJohannes Doerfert     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
51409548b74aSJohannes Doerfert     CGUpdater.initialize(CG, CGSCC);
51419548b74aSJohannes Doerfert 
51424d4ea9acSHuber, Joseph     // Maintain a map of functions to avoid rebuilding the ORE
51434d4ea9acSHuber, Joseph     DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
51444d4ea9acSHuber, Joseph     auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
51454d4ea9acSHuber, Joseph       std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
51464d4ea9acSHuber, Joseph       if (!ORE)
51474d4ea9acSHuber, Joseph         ORE = std::make_unique<OptimizationRemarkEmitter>(F);
51484d4ea9acSHuber, Joseph       return *ORE;
51494d4ea9acSHuber, Joseph     };
51504d4ea9acSHuber, Joseph 
51517cfd267cSsstefan1     AnalysisGetter AG;
51527cfd267cSsstefan1     SetVector<Function *> Functions(SCC.begin(), SCC.end());
51537cfd267cSsstefan1     BumpPtrAllocator Allocator;
51545ccb7424SJoseph Huber     OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
51555ccb7424SJoseph Huber                                   Allocator,
51565ccb7424SJoseph Huber                                   /*CGSCC*/ Functions, Kernels);
51577cfd267cSsstefan1 
5158f074a6a0SJoseph Huber     unsigned MaxFixpointIterations =
5159f074a6a0SJoseph Huber         (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
51603be3b401SJohannes Doerfert 
51613be3b401SJohannes Doerfert     AttributorConfig AC(CGUpdater);
51623be3b401SJohannes Doerfert     AC.DefaultInitializeLiveInternals = false;
51633be3b401SJohannes Doerfert     AC.IsModulePass = false;
51643be3b401SJohannes Doerfert     AC.RewriteSignatures = false;
51653be3b401SJohannes Doerfert     AC.MaxFixpointIterations = MaxFixpointIterations;
51663be3b401SJohannes Doerfert     AC.OREGetter = OREGetter;
51673be3b401SJohannes Doerfert     AC.PassName = DEBUG_TYPE;
51683be3b401SJohannes Doerfert 
51693be3b401SJohannes Doerfert     Attributor A(Functions, InfoCache, AC);
5170b8235d2bSsstefan1 
5171b8235d2bSsstefan1     OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5172339aa765SJoseph Huber     bool Result = OMPOpt.run(false);
5173339aa765SJoseph Huber 
5174339aa765SJoseph Huber     if (PrintModuleAfterOptimizations)
5175339aa765SJoseph Huber       LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5176339aa765SJoseph Huber 
5177339aa765SJoseph Huber     return Result;
51789548b74aSJohannes Doerfert   }
51799548b74aSJohannes Doerfert 
doFinalization__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51809548b74aSJohannes Doerfert   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
51819548b74aSJohannes Doerfert };
51829548b74aSJohannes Doerfert 
51839548b74aSJohannes Doerfert } // end anonymous namespace
51849548b74aSJohannes Doerfert 
getDeviceKernels(Module & M)51855ccb7424SJoseph Huber KernelSet llvm::omp::getDeviceKernels(Module &M) {
51865ccb7424SJoseph Huber   // TODO: Create a more cross-platform way of determining device kernels.
5187e8039ad4SJohannes Doerfert   NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
51885ccb7424SJoseph Huber   KernelSet Kernels;
51895ccb7424SJoseph Huber 
5190e8039ad4SJohannes Doerfert   if (!MD)
51915ccb7424SJoseph Huber     return Kernels;
5192e8039ad4SJohannes Doerfert 
5193e8039ad4SJohannes Doerfert   for (auto *Op : MD->operands()) {
5194e8039ad4SJohannes Doerfert     if (Op->getNumOperands() < 2)
5195e8039ad4SJohannes Doerfert       continue;
5196e8039ad4SJohannes Doerfert     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
5197e8039ad4SJohannes Doerfert     if (!KindID || KindID->getString() != "kernel")
5198e8039ad4SJohannes Doerfert       continue;
5199e8039ad4SJohannes Doerfert 
5200e8039ad4SJohannes Doerfert     Function *KernelFn =
5201e8039ad4SJohannes Doerfert         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
5202e8039ad4SJohannes Doerfert     if (!KernelFn)
5203e8039ad4SJohannes Doerfert       continue;
5204e8039ad4SJohannes Doerfert 
5205e8039ad4SJohannes Doerfert     ++NumOpenMPTargetRegionKernels;
5206e8039ad4SJohannes Doerfert 
5207e8039ad4SJohannes Doerfert     Kernels.insert(KernelFn);
5208e8039ad4SJohannes Doerfert   }
52095ccb7424SJoseph Huber 
52105ccb7424SJoseph Huber   return Kernels;
5211e8039ad4SJohannes Doerfert }
5212e8039ad4SJohannes Doerfert 
containsOpenMP(Module & M)52135ccb7424SJoseph Huber bool llvm::omp::containsOpenMP(Module &M) {
52145ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp");
52155ccb7424SJoseph Huber   if (!MD)
52165ccb7424SJoseph Huber     return false;
5217dce6bc18SJohannes Doerfert 
5218e8039ad4SJohannes Doerfert   return true;
5219e8039ad4SJohannes Doerfert }
5220e8039ad4SJohannes Doerfert 
isOpenMPDevice(Module & M)52215ccb7424SJoseph Huber bool llvm::omp::isOpenMPDevice(Module &M) {
52225ccb7424SJoseph Huber   Metadata *MD = M.getModuleFlag("openmp-device");
52235ccb7424SJoseph Huber   if (!MD)
52245ccb7424SJoseph Huber     return false;
52255ccb7424SJoseph Huber 
52265ccb7424SJoseph Huber   return true;
52279548b74aSJohannes Doerfert }
52289548b74aSJohannes Doerfert 
5229b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0;
52309548b74aSJohannes Doerfert 
5231b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
52329548b74aSJohannes Doerfert                       "OpenMP specific optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)52339548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
5234b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
52359548b74aSJohannes Doerfert                     "OpenMP specific optimizations", false, false)
52369548b74aSJohannes Doerfert 
5237b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
5238b2ad63d3SJoseph Huber   return new OpenMPOptCGSCCLegacyPass();
5239b2ad63d3SJoseph Huber }
5240