19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
29548b74aSJohannes Doerfert //
39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69548b74aSJohannes Doerfert //
79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
89548b74aSJohannes Doerfert //
99548b74aSJohannes Doerfert // OpenMP specific optimizations:
109548b74aSJohannes Doerfert //
119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12ca1560daSJoseph Huber // - Replacing globalized device memory with stack memory.
13ca1560daSJoseph Huber // - Replacing globalized device memory with shared memory.
14b910a109SJoseph Huber // - Parallel region merging.
15b910a109SJoseph Huber // - Transforming generic-mode device kernels to SPMD mode.
16b910a109SJoseph Huber // - Specializing the state machine for generic-mode device kernels.
179548b74aSJohannes Doerfert //
189548b74aSJohannes Doerfert //===----------------------------------------------------------------------===//
199548b74aSJohannes Doerfert
209548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h"
219548b74aSJohannes Doerfert
229548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h"
2318283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h"
249f04a0eaSJohannes Doerfert #include "llvm/ADT/SetVector.h"
259548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h"
26e6e440aeSJohannes Doerfert #include "llvm/ADT/StringRef.h"
279548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h"
289548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h"
293c8a4c6fSJohannes Doerfert #include "llvm/Analysis/MemoryLocation.h"
304d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h"
313a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h"
329548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h"
33e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
34d9659bf6SJohannes Doerfert #include "llvm/IR/Assumptions.h"
353c8a4c6fSJohannes Doerfert #include "llvm/IR/Constants.h"
36d9659bf6SJohannes Doerfert #include "llvm/IR/DiagnosticInfo.h"
37514c033dSJohannes Doerfert #include "llvm/IR/GlobalValue.h"
383c8a4c6fSJohannes Doerfert #include "llvm/IR/GlobalVariable.h"
39d9659bf6SJohannes Doerfert #include "llvm/IR/Instruction.h"
403c8a4c6fSJohannes Doerfert #include "llvm/IR/Instructions.h"
4168abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h"
4227905eebSJoseph Huber #include "llvm/IR/IntrinsicsAMDGPU.h"
4327905eebSJoseph Huber #include "llvm/IR/IntrinsicsNVPTX.h"
443c8a4c6fSJohannes Doerfert #include "llvm/IR/LLVMContext.h"
459548b74aSJohannes Doerfert #include "llvm/InitializePasses.h"
469548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h"
473c8a4c6fSJohannes Doerfert #include "llvm/Support/Debug.h"
489548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h"
497cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h"
503a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h"
519548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h"
529548b74aSJohannes Doerfert
53e6e440aeSJohannes Doerfert #include <algorithm>
54e6e440aeSJohannes Doerfert
559548b74aSJohannes Doerfert using namespace llvm;
569548b74aSJohannes Doerfert using namespace omp;
579548b74aSJohannes Doerfert
589548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt"
599548b74aSJohannes Doerfert
609548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations(
61557efc9aSFangrui Song "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
62557efc9aSFangrui Song cl::Hidden, cl::init(false));
639548b74aSJohannes Doerfert
643a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging(
65557efc9aSFangrui Song "openmp-opt-enable-merging",
663a6bfcf2SGiorgis Georgakoudis cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
673a6bfcf2SGiorgis Georgakoudis cl::init(false));
683a6bfcf2SGiorgis Georgakoudis
694a668604SJoseph Huber static cl::opt<bool>
70557efc9aSFangrui Song DisableInternalization("openmp-opt-disable-internalization",
714a668604SJoseph Huber cl::desc("Disable function internalization."),
724a668604SJoseph Huber cl::Hidden, cl::init(false));
734a668604SJoseph Huber
740f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
750f426935Ssstefan1 cl::Hidden);
76e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
77e8039ad4SJohannes Doerfert cl::init(false), cl::Hidden);
780f426935Ssstefan1
79496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency(
80496f8e5bSHamilton Tobon Mosquera "openmp-hide-memory-transfer-latency",
81496f8e5bSHamilton Tobon Mosquera cl::desc("[WIP] Tries to hide the latency of host to device memory"
82496f8e5bSHamilton Tobon Mosquera " transfers"),
83496f8e5bSHamilton Tobon Mosquera cl::Hidden, cl::init(false));
84496f8e5bSHamilton Tobon Mosquera
85cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptDeglobalization(
86557efc9aSFangrui Song "openmp-opt-disable-deglobalization",
87cd0dd8ecSJoseph Huber cl::desc("Disable OpenMP optimizations involving deglobalization."),
88cd0dd8ecSJoseph Huber cl::Hidden, cl::init(false));
89cd0dd8ecSJoseph Huber
90cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptSPMDization(
91557efc9aSFangrui Song "openmp-opt-disable-spmdization",
92cd0dd8ecSJoseph Huber cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
93cd0dd8ecSJoseph Huber cl::Hidden, cl::init(false));
94cd0dd8ecSJoseph Huber
95cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptFolding(
96557efc9aSFangrui Song "openmp-opt-disable-folding",
97cd0dd8ecSJoseph Huber cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
98cd0dd8ecSJoseph Huber cl::init(false));
99cd0dd8ecSJoseph Huber
100cd0dd8ecSJoseph Huber static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
101557efc9aSFangrui Song "openmp-opt-disable-state-machine-rewrite",
102cd0dd8ecSJoseph Huber cl::desc("Disable OpenMP optimizations that replace the state machine."),
103cd0dd8ecSJoseph Huber cl::Hidden, cl::init(false));
104cd0dd8ecSJoseph Huber
1053c8a4c6fSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptBarrierElimination(
106557efc9aSFangrui Song "openmp-opt-disable-barrier-elimination",
1073c8a4c6fSJohannes Doerfert cl::desc("Disable OpenMP optimizations that eliminate barriers."),
1083c8a4c6fSJohannes Doerfert cl::Hidden, cl::init(false));
1093c8a4c6fSJohannes Doerfert
110339aa765SJoseph Huber static cl::opt<bool> PrintModuleAfterOptimizations(
111557efc9aSFangrui Song "openmp-opt-print-module-after",
112339aa765SJoseph Huber cl::desc("Print the current module after OpenMP optimizations."),
113339aa765SJoseph Huber cl::Hidden, cl::init(false));
114339aa765SJoseph Huber
11566321807SJoseph Huber static cl::opt<bool> PrintModuleBeforeOptimizations(
116557efc9aSFangrui Song "openmp-opt-print-module-before",
11766321807SJoseph Huber cl::desc("Print the current module before OpenMP optimizations."),
11866321807SJoseph Huber cl::Hidden, cl::init(false));
11966321807SJoseph Huber
12029a74a39SJoseph Huber static cl::opt<bool> AlwaysInlineDeviceFunctions(
121557efc9aSFangrui Song "openmp-opt-inline-device",
12229a74a39SJoseph Huber cl::desc("Inline all applicible functions on the device."), cl::Hidden,
12329a74a39SJoseph Huber cl::init(false));
12429a74a39SJoseph Huber
1257eb899cbSJoseph Huber static cl::opt<bool>
126557efc9aSFangrui Song EnableVerboseRemarks("openmp-opt-verbose-remarks",
1277eb899cbSJoseph Huber cl::desc("Enables more verbose remarks."), cl::Hidden,
1287eb899cbSJoseph Huber cl::init(false));
1297eb899cbSJoseph Huber
130f074a6a0SJoseph Huber static cl::opt<unsigned>
131f074a6a0SJoseph Huber SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
132f074a6a0SJoseph Huber cl::desc("Maximal number of attributor iterations."),
133f074a6a0SJoseph Huber cl::init(256));
134f074a6a0SJoseph Huber
1350136a440SJoseph Huber static cl::opt<unsigned>
1360136a440SJoseph Huber SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
1370136a440SJoseph Huber cl::desc("Maximum amount of shared memory to use."),
1380136a440SJoseph Huber cl::init(std::numeric_limits<unsigned>::max()));
1390136a440SJoseph Huber
1409548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
1419548b74aSJohannes Doerfert "Number of OpenMP runtime calls deduplicated");
14255eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted,
14355eb714aSRoman Lebedev "Number of OpenMP parallel regions deleted");
1449548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
1459548b74aSJohannes Doerfert "Number of OpenMP runtime functions identified");
1469548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
1479548b74aSJohannes Doerfert "Number of OpenMP runtime function uses identified");
148e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels,
149e8039ad4SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) identified");
150514c033dSJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
151514c033dSJohannes Doerfert "Number of OpenMP target region entry points (=kernels) executed in "
152514c033dSJohannes Doerfert "SPMD-mode instead of generic-mode");
153d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
154d9659bf6SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) executed in "
155d9659bf6SJohannes Doerfert "generic-mode without a state machines");
156d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
157d9659bf6SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) executed in "
158d9659bf6SJohannes Doerfert "generic-mode with customized state machines with fallback");
159d9659bf6SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
160d9659bf6SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) executed in "
161d9659bf6SJohannes Doerfert "generic-mode with customized state machines without fallback");
1625b0581aeSJohannes Doerfert STATISTIC(
1635b0581aeSJohannes Doerfert NumOpenMPParallelRegionsReplacedInGPUStateMachine,
1645b0581aeSJohannes Doerfert "Number of OpenMP parallel regions replaced with ID in GPU state machines");
1653a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged,
1663a6bfcf2SGiorgis Georgakoudis "Number of OpenMP parallel regions merged");
1676fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory,
1686fc51c9fSJoseph Huber "Amount of memory pushed to shared memory");
1693c8a4c6fSJohannes Doerfert STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
1709548b74aSJohannes Doerfert
171263c4a3cSrathod-sahaab #if !defined(NDEBUG)
1729548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]";
173a50c0b0dSMikael Holmen #endif
1749548b74aSJohannes Doerfert
1759548b74aSJohannes Doerfert namespace {
1769548b74aSJohannes Doerfert
1776fc51c9fSJoseph Huber struct AAHeapToShared;
1786fc51c9fSJoseph Huber
179b8235d2bSsstefan1 struct AAICVTracker;
180b8235d2bSsstefan1
1817cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
1827cfd267cSsstefan1 /// Attributor runs.
1837cfd267cSsstefan1 struct OMPInformationCache : public InformationCache {
OMPInformationCache__anon23c38c770111::OMPInformationCache1847cfd267cSsstefan1 OMPInformationCache(Module &M, AnalysisGetter &AG,
185624d34afSJohannes Doerfert BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
18686cdff0eSEli Friedman KernelSet &Kernels)
187624d34afSJohannes Doerfert : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
188624d34afSJohannes Doerfert Kernels(Kernels) {
189624d34afSJohannes Doerfert
19061238d26Ssstefan1 OMPBuilder.initialize();
1919548b74aSJohannes Doerfert initializeRuntimeFunctions();
1920f426935Ssstefan1 initializeInternalControlVars();
1939548b74aSJohannes Doerfert }
1949548b74aSJohannes Doerfert
1950f426935Ssstefan1 /// Generic information that describes an internal control variable.
1960f426935Ssstefan1 struct InternalControlVarInfo {
1970f426935Ssstefan1 /// The kind, as described by InternalControlVar enum.
1980f426935Ssstefan1 InternalControlVar Kind;
1990f426935Ssstefan1
2000f426935Ssstefan1 /// The name of the ICV.
2010f426935Ssstefan1 StringRef Name;
2020f426935Ssstefan1
2030f426935Ssstefan1 /// Environment variable associated with this ICV.
2040f426935Ssstefan1 StringRef EnvVarName;
2050f426935Ssstefan1
2060f426935Ssstefan1 /// Initial value kind.
2070f426935Ssstefan1 ICVInitValue InitKind;
2080f426935Ssstefan1
2090f426935Ssstefan1 /// Initial value.
2100f426935Ssstefan1 ConstantInt *InitValue;
2110f426935Ssstefan1
2120f426935Ssstefan1 /// Setter RTL function associated with this ICV.
2130f426935Ssstefan1 RuntimeFunction Setter;
2140f426935Ssstefan1
2150f426935Ssstefan1 /// Getter RTL function associated with this ICV.
2160f426935Ssstefan1 RuntimeFunction Getter;
2170f426935Ssstefan1
2180f426935Ssstefan1 /// RTL Function corresponding to the override clause of this ICV
2190f426935Ssstefan1 RuntimeFunction Clause;
2200f426935Ssstefan1 };
2210f426935Ssstefan1
2229548b74aSJohannes Doerfert /// Generic information that describes a runtime function
2239548b74aSJohannes Doerfert struct RuntimeFunctionInfo {
2248855fec3SJohannes Doerfert
2259548b74aSJohannes Doerfert /// The kind, as described by the RuntimeFunction enum.
2269548b74aSJohannes Doerfert RuntimeFunction Kind;
2279548b74aSJohannes Doerfert
2289548b74aSJohannes Doerfert /// The name of the function.
2299548b74aSJohannes Doerfert StringRef Name;
2309548b74aSJohannes Doerfert
2319548b74aSJohannes Doerfert /// Flag to indicate a variadic function.
2329548b74aSJohannes Doerfert bool IsVarArg;
2339548b74aSJohannes Doerfert
2349548b74aSJohannes Doerfert /// The return type of the function.
2359548b74aSJohannes Doerfert Type *ReturnType;
2369548b74aSJohannes Doerfert
2379548b74aSJohannes Doerfert /// The argument types of the function.
2389548b74aSJohannes Doerfert SmallVector<Type *, 8> ArgumentTypes;
2399548b74aSJohannes Doerfert
2409548b74aSJohannes Doerfert /// The declaration if available.
241f09f4b26SJohannes Doerfert Function *Declaration = nullptr;
2429548b74aSJohannes Doerfert
2439548b74aSJohannes Doerfert /// Uses of this runtime function per function containing the use.
2448855fec3SJohannes Doerfert using UseVector = SmallVector<Use *, 16>;
2458855fec3SJohannes Doerfert
246b8235d2bSsstefan1 /// Clear UsesMap for runtime function.
clearUsesMap__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo247b8235d2bSsstefan1 void clearUsesMap() { UsesMap.clear(); }
248b8235d2bSsstefan1
24954bd3751SJohannes Doerfert /// Boolean conversion that is true if the runtime function was found.
operator bool__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo25054bd3751SJohannes Doerfert operator bool() const { return Declaration; }
25154bd3751SJohannes Doerfert
2528855fec3SJohannes Doerfert /// Return the vector of uses in function \p F.
getOrCreateUseVector__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2538855fec3SJohannes Doerfert UseVector &getOrCreateUseVector(Function *F) {
254b8235d2bSsstefan1 std::shared_ptr<UseVector> &UV = UsesMap[F];
2558855fec3SJohannes Doerfert if (!UV)
256b8235d2bSsstefan1 UV = std::make_shared<UseVector>();
2578855fec3SJohannes Doerfert return *UV;
2588855fec3SJohannes Doerfert }
2598855fec3SJohannes Doerfert
2608855fec3SJohannes Doerfert /// Return the vector of uses in function \p F or `nullptr` if there are
2618855fec3SJohannes Doerfert /// none.
getUseVector__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2628855fec3SJohannes Doerfert const UseVector *getUseVector(Function &F) const {
26395e57072SDavid Blaikie auto I = UsesMap.find(&F);
26495e57072SDavid Blaikie if (I != UsesMap.end())
26595e57072SDavid Blaikie return I->second.get();
26695e57072SDavid Blaikie return nullptr;
2678855fec3SJohannes Doerfert }
2688855fec3SJohannes Doerfert
2698855fec3SJohannes Doerfert /// Return how many functions contain uses of this runtime function.
getNumFunctionsWithUses__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2708855fec3SJohannes Doerfert size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
2719548b74aSJohannes Doerfert
2729548b74aSJohannes Doerfert /// Return the number of arguments (or the minimal number for variadic
2739548b74aSJohannes Doerfert /// functions).
getNumArgs__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo2749548b74aSJohannes Doerfert size_t getNumArgs() const { return ArgumentTypes.size(); }
2759548b74aSJohannes Doerfert
2769548b74aSJohannes Doerfert /// Run the callback \p CB on each use and forget the use if the result is
2779548b74aSJohannes Doerfert /// true. The callback will be fed the function in which the use was
2789548b74aSJohannes Doerfert /// encountered as second argument.
foreachUse__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo279624d34afSJohannes Doerfert void foreachUse(SmallVectorImpl<Function *> &SCC,
280624d34afSJohannes Doerfert function_ref<bool(Use &, Function &)> CB) {
281624d34afSJohannes Doerfert for (Function *F : SCC)
282624d34afSJohannes Doerfert foreachUse(CB, F);
283e099c7b6Ssstefan1 }
284e099c7b6Ssstefan1
285e099c7b6Ssstefan1 /// Run the callback \p CB on each use within the function \p F and forget
286e099c7b6Ssstefan1 /// the use if the result is true.
foreachUse__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo287624d34afSJohannes Doerfert void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
2888855fec3SJohannes Doerfert SmallVector<unsigned, 8> ToBeDeleted;
2899548b74aSJohannes Doerfert ToBeDeleted.clear();
290e099c7b6Ssstefan1
2918855fec3SJohannes Doerfert unsigned Idx = 0;
292624d34afSJohannes Doerfert UseVector &UV = getOrCreateUseVector(F);
293e099c7b6Ssstefan1
2948855fec3SJohannes Doerfert for (Use *U : UV) {
295e099c7b6Ssstefan1 if (CB(*U, *F))
2968855fec3SJohannes Doerfert ToBeDeleted.push_back(Idx);
2978855fec3SJohannes Doerfert ++Idx;
2988855fec3SJohannes Doerfert }
2998855fec3SJohannes Doerfert
3008855fec3SJohannes Doerfert // Remove the to-be-deleted indices in reverse order as prior
301b726c557SJohannes Doerfert // modifications will not modify the smaller indices.
3028855fec3SJohannes Doerfert while (!ToBeDeleted.empty()) {
3038855fec3SJohannes Doerfert unsigned Idx = ToBeDeleted.pop_back_val();
3048855fec3SJohannes Doerfert UV[Idx] = UV.back();
3058855fec3SJohannes Doerfert UV.pop_back();
3069548b74aSJohannes Doerfert }
3079548b74aSJohannes Doerfert }
3088855fec3SJohannes Doerfert
3098855fec3SJohannes Doerfert private:
3108855fec3SJohannes Doerfert /// Map from functions to all uses of this runtime function contained in
3118855fec3SJohannes Doerfert /// them.
312b8235d2bSsstefan1 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
313d9659bf6SJohannes Doerfert
314d9659bf6SJohannes Doerfert public:
315d9659bf6SJohannes Doerfert /// Iterators for the uses of this runtime function.
begin__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo316d9659bf6SJohannes Doerfert decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
end__anon23c38c770111::OMPInformationCache::RuntimeFunctionInfo317d9659bf6SJohannes Doerfert decltype(UsesMap)::iterator end() { return UsesMap.end(); }
3189548b74aSJohannes Doerfert };
3199548b74aSJohannes Doerfert
3207cfd267cSsstefan1 /// An OpenMP-IR-Builder instance
3217cfd267cSsstefan1 OpenMPIRBuilder OMPBuilder;
3227cfd267cSsstefan1
3237cfd267cSsstefan1 /// Map from runtime function kind to the runtime function description.
3247cfd267cSsstefan1 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
3257cfd267cSsstefan1 RuntimeFunction::OMPRTL___last>
3267cfd267cSsstefan1 RFIs;
3277cfd267cSsstefan1
328d9659bf6SJohannes Doerfert /// Map from function declarations/definitions to their runtime enum type.
329d9659bf6SJohannes Doerfert DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
330d9659bf6SJohannes Doerfert
3310f426935Ssstefan1 /// Map from ICV kind to the ICV description.
3320f426935Ssstefan1 EnumeratedArray<InternalControlVarInfo, InternalControlVar,
3330f426935Ssstefan1 InternalControlVar::ICV___last>
3340f426935Ssstefan1 ICVs;
3350f426935Ssstefan1
3360f426935Ssstefan1 /// Helper to initialize all internal control variable information for those
3370f426935Ssstefan1 /// defined in OMPKinds.def.
initializeInternalControlVars__anon23c38c770111::OMPInformationCache3380f426935Ssstefan1 void initializeInternalControlVars() {
3390f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL) \
3400f426935Ssstefan1 { \
3410f426935Ssstefan1 auto &ICV = ICVs[_Name]; \
3420f426935Ssstefan1 ICV.Setter = RTL; \
3430f426935Ssstefan1 }
3440f426935Ssstefan1 #define ICV_RT_GET(Name, RTL) \
3450f426935Ssstefan1 { \
3460f426935Ssstefan1 auto &ICV = ICVs[Name]; \
3470f426935Ssstefan1 ICV.Getter = RTL; \
3480f426935Ssstefan1 }
3490f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
3500f426935Ssstefan1 { \
3510f426935Ssstefan1 auto &ICV = ICVs[Enum]; \
3520f426935Ssstefan1 ICV.Name = _Name; \
3530f426935Ssstefan1 ICV.Kind = Enum; \
3540f426935Ssstefan1 ICV.InitKind = Init; \
3550f426935Ssstefan1 ICV.EnvVarName = _EnvVarName; \
3560f426935Ssstefan1 switch (ICV.InitKind) { \
357951e43f3Ssstefan1 case ICV_IMPLEMENTATION_DEFINED: \
3580f426935Ssstefan1 ICV.InitValue = nullptr; \
3590f426935Ssstefan1 break; \
360951e43f3Ssstefan1 case ICV_ZERO: \
3616aab27baSsstefan1 ICV.InitValue = ConstantInt::get( \
3626aab27baSsstefan1 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
3630f426935Ssstefan1 break; \
364951e43f3Ssstefan1 case ICV_FALSE: \
3656aab27baSsstefan1 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
3660f426935Ssstefan1 break; \
367951e43f3Ssstefan1 case ICV_LAST: \
3680f426935Ssstefan1 break; \
3690f426935Ssstefan1 } \
3700f426935Ssstefan1 }
3710f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3720f426935Ssstefan1 }
3730f426935Ssstefan1
3747cfd267cSsstefan1 /// Returns true if the function declaration \p F matches the runtime
3757cfd267cSsstefan1 /// function types, that is, return type \p RTFRetType, and argument types
3767cfd267cSsstefan1 /// \p RTFArgTypes.
declMatchesRTFTypes__anon23c38c770111::OMPInformationCache3777cfd267cSsstefan1 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
3787cfd267cSsstefan1 SmallVector<Type *, 8> &RTFArgTypes) {
3797cfd267cSsstefan1 // TODO: We should output information to the user (under debug output
3807cfd267cSsstefan1 // and via remarks).
3817cfd267cSsstefan1
3827cfd267cSsstefan1 if (!F)
3837cfd267cSsstefan1 return false;
3847cfd267cSsstefan1 if (F->getReturnType() != RTFRetType)
3857cfd267cSsstefan1 return false;
3867cfd267cSsstefan1 if (F->arg_size() != RTFArgTypes.size())
3877cfd267cSsstefan1 return false;
3887cfd267cSsstefan1
389c11ebfeaSJoseph Huber auto *RTFTyIt = RTFArgTypes.begin();
3907cfd267cSsstefan1 for (Argument &Arg : F->args()) {
3917cfd267cSsstefan1 if (Arg.getType() != *RTFTyIt)
3927cfd267cSsstefan1 return false;
3937cfd267cSsstefan1
3947cfd267cSsstefan1 ++RTFTyIt;
3957cfd267cSsstefan1 }
3967cfd267cSsstefan1
3977cfd267cSsstefan1 return true;
3987cfd267cSsstefan1 }
3997cfd267cSsstefan1
400b726c557SJohannes Doerfert // Helper to collect all uses of the declaration in the UsesMap.
collectUses__anon23c38c770111::OMPInformationCache401b8235d2bSsstefan1 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
4027cfd267cSsstefan1 unsigned NumUses = 0;
4037cfd267cSsstefan1 if (!RFI.Declaration)
4047cfd267cSsstefan1 return NumUses;
4057cfd267cSsstefan1 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
4067cfd267cSsstefan1
407b8235d2bSsstefan1 if (CollectStats) {
4087cfd267cSsstefan1 NumOpenMPRuntimeFunctionsIdentified += 1;
4097cfd267cSsstefan1 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
410b8235d2bSsstefan1 }
4117cfd267cSsstefan1
4127cfd267cSsstefan1 // TODO: We directly convert uses into proper calls and unknown uses.
4137cfd267cSsstefan1 for (Use &U : RFI.Declaration->uses()) {
4147cfd267cSsstefan1 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
4157cfd267cSsstefan1 if (ModuleSlice.count(UserI->getFunction())) {
4167cfd267cSsstefan1 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
4177cfd267cSsstefan1 ++NumUses;
4187cfd267cSsstefan1 }
4197cfd267cSsstefan1 } else {
4207cfd267cSsstefan1 RFI.getOrCreateUseVector(nullptr).push_back(&U);
4217cfd267cSsstefan1 ++NumUses;
4227cfd267cSsstefan1 }
4237cfd267cSsstefan1 }
4247cfd267cSsstefan1 return NumUses;
425b8235d2bSsstefan1 }
4267cfd267cSsstefan1
42797517055SGiorgis Georgakoudis // Helper function to recollect uses of a runtime function.
recollectUsesForFunction__anon23c38c770111::OMPInformationCache42897517055SGiorgis Georgakoudis void recollectUsesForFunction(RuntimeFunction RTF) {
42997517055SGiorgis Georgakoudis auto &RFI = RFIs[RTF];
430b8235d2bSsstefan1 RFI.clearUsesMap();
431b8235d2bSsstefan1 collectUses(RFI, /*CollectStats*/ false);
432b8235d2bSsstefan1 }
43397517055SGiorgis Georgakoudis
43497517055SGiorgis Georgakoudis // Helper function to recollect uses of all runtime functions.
recollectUses__anon23c38c770111::OMPInformationCache43597517055SGiorgis Georgakoudis void recollectUses() {
43697517055SGiorgis Georgakoudis for (int Idx = 0; Idx < RFIs.size(); ++Idx)
43797517055SGiorgis Georgakoudis recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
438b8235d2bSsstefan1 }
439b8235d2bSsstefan1
44006cfdd52SJoseph Huber // Helper function to inherit the calling convention of the function callee.
setCallingConvention__anon23c38c770111::OMPInformationCache44106cfdd52SJoseph Huber void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
44206cfdd52SJoseph Huber if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
44306cfdd52SJoseph Huber CI->setCallingConv(Fn->getCallingConv());
44406cfdd52SJoseph Huber }
44506cfdd52SJoseph Huber
446b8235d2bSsstefan1 /// Helper to initialize all runtime function information for those defined
447b8235d2bSsstefan1 /// in OpenMPKinds.def.
initializeRuntimeFunctions__anon23c38c770111::OMPInformationCache448b8235d2bSsstefan1 void initializeRuntimeFunctions() {
4497cfd267cSsstefan1 Module &M = *((*ModuleSlice.begin())->getParent());
4507cfd267cSsstefan1
4516aab27baSsstefan1 // Helper macros for handling __VA_ARGS__ in OMP_RTL
4526aab27baSsstefan1 #define OMP_TYPE(VarName, ...) \
4536aab27baSsstefan1 Type *VarName = OMPBuilder.VarName; \
4546aab27baSsstefan1 (void)VarName;
4556aab27baSsstefan1
4566aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...) \
4576aab27baSsstefan1 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
4586aab27baSsstefan1 (void)VarName##Ty; \
4596aab27baSsstefan1 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
4606aab27baSsstefan1 (void)VarName##PtrTy;
4616aab27baSsstefan1
4626aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...) \
4636aab27baSsstefan1 FunctionType *VarName = OMPBuilder.VarName; \
4646aab27baSsstefan1 (void)VarName; \
4656aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
4666aab27baSsstefan1 (void)VarName##Ptr;
4676aab27baSsstefan1
4686aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...) \
4696aab27baSsstefan1 StructType *VarName = OMPBuilder.VarName; \
4706aab27baSsstefan1 (void)VarName; \
4716aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
4726aab27baSsstefan1 (void)VarName##Ptr;
4736aab27baSsstefan1
4747cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
4757cfd267cSsstefan1 { \
4767cfd267cSsstefan1 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
4777cfd267cSsstefan1 Function *F = M.getFunction(_Name); \
478eef6601bSJoseph Huber RTLFunctions.insert(F); \
4796aab27baSsstefan1 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
480d9659bf6SJohannes Doerfert RuntimeFunctionIDMap[F] = _Enum; \
4817cfd267cSsstefan1 auto &RFI = RFIs[_Enum]; \
4827cfd267cSsstefan1 RFI.Kind = _Enum; \
4837cfd267cSsstefan1 RFI.Name = _Name; \
4847cfd267cSsstefan1 RFI.IsVarArg = _IsVarArg; \
4856aab27baSsstefan1 RFI.ReturnType = OMPBuilder._ReturnType; \
4867cfd267cSsstefan1 RFI.ArgumentTypes = std::move(ArgsTypes); \
4877cfd267cSsstefan1 RFI.Declaration = F; \
488b8235d2bSsstefan1 unsigned NumUses = collectUses(RFI); \
4897cfd267cSsstefan1 (void)NumUses; \
4907cfd267cSsstefan1 LLVM_DEBUG({ \
4917cfd267cSsstefan1 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
4927cfd267cSsstefan1 << " found\n"; \
4937cfd267cSsstefan1 if (RFI.Declaration) \
4947cfd267cSsstefan1 dbgs() << TAG << "-> got " << NumUses << " uses in " \
4957cfd267cSsstefan1 << RFI.getNumFunctionsWithUses() \
4967cfd267cSsstefan1 << " different functions.\n"; \
4977cfd267cSsstefan1 }); \
4987cfd267cSsstefan1 } \
4997cfd267cSsstefan1 }
5007cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def"
5017cfd267cSsstefan1
502*fd8fd9e5SJoseph Huber // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
503*fd8fd9e5SJoseph Huber // functions, except if `optnone` is present.
504*fd8fd9e5SJoseph Huber if (isOpenMPDevice(M)) {
505*fd8fd9e5SJoseph Huber for (Function &F : M) {
506*fd8fd9e5SJoseph Huber for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
507*fd8fd9e5SJoseph Huber if (F.hasFnAttribute(Attribute::NoInline) &&
508*fd8fd9e5SJoseph Huber F.getName().startswith(Prefix) &&
509*fd8fd9e5SJoseph Huber !F.hasFnAttribute(Attribute::OptimizeNone))
510*fd8fd9e5SJoseph Huber F.removeFnAttr(Attribute::NoInline);
511*fd8fd9e5SJoseph Huber }
512*fd8fd9e5SJoseph Huber }
513*fd8fd9e5SJoseph Huber
5147cfd267cSsstefan1 // TODO: We should attach the attributes defined in OMPKinds.def.
5157cfd267cSsstefan1 }
516e8039ad4SJohannes Doerfert
517e8039ad4SJohannes Doerfert /// Collection of known kernels (\see Kernel) in the module.
51886cdff0eSEli Friedman KernelSet &Kernels;
519eef6601bSJoseph Huber
520eef6601bSJoseph Huber /// Collection of known OpenMP runtime functions..
521eef6601bSJoseph Huber DenseSet<const Function *> RTLFunctions;
5227cfd267cSsstefan1 };
5237cfd267cSsstefan1
524d9659bf6SJohannes Doerfert template <typename Ty, bool InsertInvalidates = true>
5251a7f7790SShilei Tian struct BooleanStateWithSetVector : public BooleanState {
contains__anon23c38c770111::BooleanStateWithSetVector5261a7f7790SShilei Tian bool contains(const Ty &Elem) const { return Set.contains(Elem); }
insert__anon23c38c770111::BooleanStateWithSetVector5271a7f7790SShilei Tian bool insert(const Ty &Elem) {
528d9659bf6SJohannes Doerfert if (InsertInvalidates)
529d9659bf6SJohannes Doerfert BooleanState::indicatePessimisticFixpoint();
530d9659bf6SJohannes Doerfert return Set.insert(Elem);
531d9659bf6SJohannes Doerfert }
532d9659bf6SJohannes Doerfert
operator []__anon23c38c770111::BooleanStateWithSetVector5331a7f7790SShilei Tian const Ty &operator[](int Idx) const { return Set[Idx]; }
operator ==__anon23c38c770111::BooleanStateWithSetVector5341a7f7790SShilei Tian bool operator==(const BooleanStateWithSetVector &RHS) const {
535d9659bf6SJohannes Doerfert return BooleanState::operator==(RHS) && Set == RHS.Set;
536d9659bf6SJohannes Doerfert }
operator !=__anon23c38c770111::BooleanStateWithSetVector5371a7f7790SShilei Tian bool operator!=(const BooleanStateWithSetVector &RHS) const {
538d9659bf6SJohannes Doerfert return !(*this == RHS);
539d9659bf6SJohannes Doerfert }
540d9659bf6SJohannes Doerfert
empty__anon23c38c770111::BooleanStateWithSetVector541d9659bf6SJohannes Doerfert bool empty() const { return Set.empty(); }
size__anon23c38c770111::BooleanStateWithSetVector542d9659bf6SJohannes Doerfert size_t size() const { return Set.size(); }
543d9659bf6SJohannes Doerfert
544d9659bf6SJohannes Doerfert /// "Clamp" this state with \p RHS.
operator ^=__anon23c38c770111::BooleanStateWithSetVector5451a7f7790SShilei Tian BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
546d9659bf6SJohannes Doerfert BooleanState::operator^=(RHS);
547d9659bf6SJohannes Doerfert Set.insert(RHS.Set.begin(), RHS.Set.end());
548d9659bf6SJohannes Doerfert return *this;
549d9659bf6SJohannes Doerfert }
550d9659bf6SJohannes Doerfert
551d9659bf6SJohannes Doerfert private:
552d9659bf6SJohannes Doerfert /// A set to keep track of elements.
5531a7f7790SShilei Tian SetVector<Ty> Set;
554d9659bf6SJohannes Doerfert
555d9659bf6SJohannes Doerfert public:
begin__anon23c38c770111::BooleanStateWithSetVector556d9659bf6SJohannes Doerfert typename decltype(Set)::iterator begin() { return Set.begin(); }
end__anon23c38c770111::BooleanStateWithSetVector557d9659bf6SJohannes Doerfert typename decltype(Set)::iterator end() { return Set.end(); }
begin__anon23c38c770111::BooleanStateWithSetVector558d9659bf6SJohannes Doerfert typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
end__anon23c38c770111::BooleanStateWithSetVector559d9659bf6SJohannes Doerfert typename decltype(Set)::const_iterator end() const { return Set.end(); }
560d9659bf6SJohannes Doerfert };
561d9659bf6SJohannes Doerfert
5621a7f7790SShilei Tian template <typename Ty, bool InsertInvalidates = true>
5631a7f7790SShilei Tian using BooleanStateWithPtrSetVector =
5641a7f7790SShilei Tian BooleanStateWithSetVector<Ty *, InsertInvalidates>;
5651a7f7790SShilei Tian
566d9659bf6SJohannes Doerfert struct KernelInfoState : AbstractState {
567d9659bf6SJohannes Doerfert /// Flag to track if we reached a fixpoint.
568d9659bf6SJohannes Doerfert bool IsAtFixpoint = false;
569d9659bf6SJohannes Doerfert
570d9659bf6SJohannes Doerfert /// The parallel regions (identified by the outlined parallel functions) that
571d9659bf6SJohannes Doerfert /// can be reached from the associated function.
572d9659bf6SJohannes Doerfert BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
573d9659bf6SJohannes Doerfert ReachedKnownParallelRegions;
574d9659bf6SJohannes Doerfert
575d9659bf6SJohannes Doerfert /// State to track what parallel region we might reach.
576d9659bf6SJohannes Doerfert BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
577d9659bf6SJohannes Doerfert
578514c033dSJohannes Doerfert /// State to track if we are in SPMD-mode, assumed or know, and why we decided
579e8439ec8SGiorgis Georgakoudis /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
580e8439ec8SGiorgis Georgakoudis /// false.
58129a3e3ddSGiorgis Georgakoudis BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
582514c033dSJohannes Doerfert
583d9659bf6SJohannes Doerfert /// The __kmpc_target_init call in this kernel, if any. If we find more than
584d9659bf6SJohannes Doerfert /// one we abort as the kernel is malformed.
585d9659bf6SJohannes Doerfert CallBase *KernelInitCB = nullptr;
586d9659bf6SJohannes Doerfert
587d9659bf6SJohannes Doerfert /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
588d9659bf6SJohannes Doerfert /// one we abort as the kernel is malformed.
589d9659bf6SJohannes Doerfert CallBase *KernelDeinitCB = nullptr;
590d9659bf6SJohannes Doerfert
591ca662297SShilei Tian /// Flag to indicate if the associated function is a kernel entry.
592ca662297SShilei Tian bool IsKernelEntry = false;
593ca662297SShilei Tian
594ca662297SShilei Tian /// State to track what kernel entries can reach the associated function.
595ca662297SShilei Tian BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
596ca662297SShilei Tian
597e97e0a4fSShilei Tian /// State to indicate if we can track parallel level of the associated
598e97e0a4fSShilei Tian /// function. We will give up tracking if we encounter unknown caller or the
599e97e0a4fSShilei Tian /// caller is __kmpc_parallel_51.
600e97e0a4fSShilei Tian BooleanStateWithSetVector<uint8_t> ParallelLevels;
601e97e0a4fSShilei Tian
602d9659bf6SJohannes Doerfert /// Abstract State interface
603d9659bf6SJohannes Doerfert ///{
604d9659bf6SJohannes Doerfert
6053a3cb929SKazu Hirata KernelInfoState() = default;
KernelInfoState__anon23c38c770111::KernelInfoState606d9659bf6SJohannes Doerfert KernelInfoState(bool BestState) {
607d9659bf6SJohannes Doerfert if (!BestState)
608d9659bf6SJohannes Doerfert indicatePessimisticFixpoint();
609d9659bf6SJohannes Doerfert }
610d9659bf6SJohannes Doerfert
611d9659bf6SJohannes Doerfert /// See AbstractState::isValidState(...)
isValidState__anon23c38c770111::KernelInfoState612d9659bf6SJohannes Doerfert bool isValidState() const override { return true; }
613d9659bf6SJohannes Doerfert
614d9659bf6SJohannes Doerfert /// See AbstractState::isAtFixpoint(...)
isAtFixpoint__anon23c38c770111::KernelInfoState615d9659bf6SJohannes Doerfert bool isAtFixpoint() const override { return IsAtFixpoint; }
616d9659bf6SJohannes Doerfert
617d9659bf6SJohannes Doerfert /// See AbstractState::indicatePessimisticFixpoint(...)
indicatePessimisticFixpoint__anon23c38c770111::KernelInfoState618d9659bf6SJohannes Doerfert ChangeStatus indicatePessimisticFixpoint() override {
619d9659bf6SJohannes Doerfert IsAtFixpoint = true;
620c6457dcaSJohannes Doerfert ReachingKernelEntries.indicatePessimisticFixpoint();
621514c033dSJohannes Doerfert SPMDCompatibilityTracker.indicatePessimisticFixpoint();
622c6457dcaSJohannes Doerfert ReachedKnownParallelRegions.indicatePessimisticFixpoint();
623d9659bf6SJohannes Doerfert ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
624d9659bf6SJohannes Doerfert return ChangeStatus::CHANGED;
625d9659bf6SJohannes Doerfert }
626d9659bf6SJohannes Doerfert
627d9659bf6SJohannes Doerfert /// See AbstractState::indicateOptimisticFixpoint(...)
indicateOptimisticFixpoint__anon23c38c770111::KernelInfoState628d9659bf6SJohannes Doerfert ChangeStatus indicateOptimisticFixpoint() override {
629d9659bf6SJohannes Doerfert IsAtFixpoint = true;
630d61aac76SJohannes Doerfert ReachingKernelEntries.indicateOptimisticFixpoint();
631d61aac76SJohannes Doerfert SPMDCompatibilityTracker.indicateOptimisticFixpoint();
632d61aac76SJohannes Doerfert ReachedKnownParallelRegions.indicateOptimisticFixpoint();
633d61aac76SJohannes Doerfert ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
634d9659bf6SJohannes Doerfert return ChangeStatus::UNCHANGED;
635d9659bf6SJohannes Doerfert }
636d9659bf6SJohannes Doerfert
637d9659bf6SJohannes Doerfert /// Return the assumed state
getAssumed__anon23c38c770111::KernelInfoState638d9659bf6SJohannes Doerfert KernelInfoState &getAssumed() { return *this; }
getAssumed__anon23c38c770111::KernelInfoState639d9659bf6SJohannes Doerfert const KernelInfoState &getAssumed() const { return *this; }
640d9659bf6SJohannes Doerfert
operator ==__anon23c38c770111::KernelInfoState641d9659bf6SJohannes Doerfert bool operator==(const KernelInfoState &RHS) const {
642514c033dSJohannes Doerfert if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
643514c033dSJohannes Doerfert return false;
644d9659bf6SJohannes Doerfert if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
645d9659bf6SJohannes Doerfert return false;
646d9659bf6SJohannes Doerfert if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
647d9659bf6SJohannes Doerfert return false;
648ca662297SShilei Tian if (ReachingKernelEntries != RHS.ReachingKernelEntries)
649ca662297SShilei Tian return false;
650d9659bf6SJohannes Doerfert return true;
651d9659bf6SJohannes Doerfert }
652d9659bf6SJohannes Doerfert
6536b9a3ec3SJoseph Huber /// Returns true if this kernel contains any OpenMP parallel regions.
mayContainParallelRegion__anon23c38c770111::KernelInfoState6546b9a3ec3SJoseph Huber bool mayContainParallelRegion() {
6556b9a3ec3SJoseph Huber return !ReachedKnownParallelRegions.empty() ||
6566b9a3ec3SJoseph Huber !ReachedUnknownParallelRegions.empty();
6576b9a3ec3SJoseph Huber }
6586b9a3ec3SJoseph Huber
659d9659bf6SJohannes Doerfert /// Return empty set as the best state of potential values.
getBestState__anon23c38c770111::KernelInfoState660d9659bf6SJohannes Doerfert static KernelInfoState getBestState() { return KernelInfoState(true); }
661d9659bf6SJohannes Doerfert
getBestState__anon23c38c770111::KernelInfoState662d9659bf6SJohannes Doerfert static KernelInfoState getBestState(KernelInfoState &KIS) {
663d9659bf6SJohannes Doerfert return getBestState();
664d9659bf6SJohannes Doerfert }
665d9659bf6SJohannes Doerfert
666d9659bf6SJohannes Doerfert /// Return full set as the worst state of potential values.
getWorstState__anon23c38c770111::KernelInfoState667d9659bf6SJohannes Doerfert static KernelInfoState getWorstState() { return KernelInfoState(false); }
668d9659bf6SJohannes Doerfert
669d9659bf6SJohannes Doerfert /// "Clamp" this state with \p KIS.
operator ^=__anon23c38c770111::KernelInfoState670d9659bf6SJohannes Doerfert KernelInfoState operator^=(const KernelInfoState &KIS) {
671d9659bf6SJohannes Doerfert // Do not merge two different _init and _deinit call sites.
672d9659bf6SJohannes Doerfert if (KIS.KernelInitCB) {
673d9659bf6SJohannes Doerfert if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
674e6e440aeSJohannes Doerfert llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
675e6e440aeSJohannes Doerfert "assumptions.");
676d9659bf6SJohannes Doerfert KernelInitCB = KIS.KernelInitCB;
677d9659bf6SJohannes Doerfert }
678d9659bf6SJohannes Doerfert if (KIS.KernelDeinitCB) {
679d9659bf6SJohannes Doerfert if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
680e6e440aeSJohannes Doerfert llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
681e6e440aeSJohannes Doerfert "assumptions.");
682d9659bf6SJohannes Doerfert KernelDeinitCB = KIS.KernelDeinitCB;
683d9659bf6SJohannes Doerfert }
684514c033dSJohannes Doerfert SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
685d9659bf6SJohannes Doerfert ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
686d9659bf6SJohannes Doerfert ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
687d9659bf6SJohannes Doerfert return *this;
688d9659bf6SJohannes Doerfert }
689d9659bf6SJohannes Doerfert
operator &=__anon23c38c770111::KernelInfoState690d9659bf6SJohannes Doerfert KernelInfoState operator&=(const KernelInfoState &KIS) {
691d9659bf6SJohannes Doerfert return (*this ^= KIS);
692d9659bf6SJohannes Doerfert }
693d9659bf6SJohannes Doerfert
694d9659bf6SJohannes Doerfert ///}
695d9659bf6SJohannes Doerfert };
696d9659bf6SJohannes Doerfert
6978931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload
6988931add6SHamilton Tobon Mosquera /// array, to a vector in memory.
6998931add6SHamilton Tobon Mosquera struct OffloadArray {
7008931add6SHamilton Tobon Mosquera /// Physical array (in the IR).
7018931add6SHamilton Tobon Mosquera AllocaInst *Array = nullptr;
7028931add6SHamilton Tobon Mosquera /// Mapped values.
7038931add6SHamilton Tobon Mosquera SmallVector<Value *, 8> StoredValues;
7048931add6SHamilton Tobon Mosquera /// Last stores made in the offload array.
7058931add6SHamilton Tobon Mosquera SmallVector<StoreInst *, 8> LastAccesses;
7068931add6SHamilton Tobon Mosquera
7078931add6SHamilton Tobon Mosquera OffloadArray() = default;
7088931add6SHamilton Tobon Mosquera
7098931add6SHamilton Tobon Mosquera /// Initializes the OffloadArray with the values stored in \p Array before
7108931add6SHamilton Tobon Mosquera /// instruction \p Before is reached. Returns false if the initialization
7118931add6SHamilton Tobon Mosquera /// fails.
7128931add6SHamilton Tobon Mosquera /// This MUST be used immediately after the construction of the object.
initialize__anon23c38c770111::OffloadArray7138931add6SHamilton Tobon Mosquera bool initialize(AllocaInst &Array, Instruction &Before) {
7148931add6SHamilton Tobon Mosquera if (!Array.getAllocatedType()->isArrayTy())
7158931add6SHamilton Tobon Mosquera return false;
7168931add6SHamilton Tobon Mosquera
7178931add6SHamilton Tobon Mosquera if (!getValues(Array, Before))
7188931add6SHamilton Tobon Mosquera return false;
7198931add6SHamilton Tobon Mosquera
7208931add6SHamilton Tobon Mosquera this->Array = &Array;
7218931add6SHamilton Tobon Mosquera return true;
7228931add6SHamilton Tobon Mosquera }
7238931add6SHamilton Tobon Mosquera
724da8bec47SJoseph Huber static const unsigned DeviceIDArgNum = 1;
725da8bec47SJoseph Huber static const unsigned BasePtrsArgNum = 3;
726da8bec47SJoseph Huber static const unsigned PtrsArgNum = 4;
727da8bec47SJoseph Huber static const unsigned SizesArgNum = 5;
7281d3d9b9cSHamilton Tobon Mosquera
7298931add6SHamilton Tobon Mosquera private:
7308931add6SHamilton Tobon Mosquera /// Traverses the BasicBlock where \p Array is, collecting the stores made to
7318931add6SHamilton Tobon Mosquera /// \p Array, leaving StoredValues with the values stored before the
7328931add6SHamilton Tobon Mosquera /// instruction \p Before is reached.
getValues__anon23c38c770111::OffloadArray7338931add6SHamilton Tobon Mosquera bool getValues(AllocaInst &Array, Instruction &Before) {
7348931add6SHamilton Tobon Mosquera // Initialize container.
735d08d490aSJohannes Doerfert const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
7368931add6SHamilton Tobon Mosquera StoredValues.assign(NumValues, nullptr);
7378931add6SHamilton Tobon Mosquera LastAccesses.assign(NumValues, nullptr);
7388931add6SHamilton Tobon Mosquera
7398931add6SHamilton Tobon Mosquera // TODO: This assumes the instruction \p Before is in the same
7408931add6SHamilton Tobon Mosquera // BasicBlock as Array. Make it general, for any control flow graph.
7418931add6SHamilton Tobon Mosquera BasicBlock *BB = Array.getParent();
7428931add6SHamilton Tobon Mosquera if (BB != Before.getParent())
7438931add6SHamilton Tobon Mosquera return false;
7448931add6SHamilton Tobon Mosquera
7458931add6SHamilton Tobon Mosquera const DataLayout &DL = Array.getModule()->getDataLayout();
7468931add6SHamilton Tobon Mosquera const unsigned int PointerSize = DL.getPointerSize();
7478931add6SHamilton Tobon Mosquera
7488931add6SHamilton Tobon Mosquera for (Instruction &I : *BB) {
7498931add6SHamilton Tobon Mosquera if (&I == &Before)
7508931add6SHamilton Tobon Mosquera break;
7518931add6SHamilton Tobon Mosquera
7528931add6SHamilton Tobon Mosquera if (!isa<StoreInst>(&I))
7538931add6SHamilton Tobon Mosquera continue;
7548931add6SHamilton Tobon Mosquera
7558931add6SHamilton Tobon Mosquera auto *S = cast<StoreInst>(&I);
7568931add6SHamilton Tobon Mosquera int64_t Offset = -1;
757d08d490aSJohannes Doerfert auto *Dst =
758d08d490aSJohannes Doerfert GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
7598931add6SHamilton Tobon Mosquera if (Dst == &Array) {
7608931add6SHamilton Tobon Mosquera int64_t Idx = Offset / PointerSize;
7618931add6SHamilton Tobon Mosquera StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
7628931add6SHamilton Tobon Mosquera LastAccesses[Idx] = S;
7638931add6SHamilton Tobon Mosquera }
7648931add6SHamilton Tobon Mosquera }
7658931add6SHamilton Tobon Mosquera
7668931add6SHamilton Tobon Mosquera return isFilled();
7678931add6SHamilton Tobon Mosquera }
7688931add6SHamilton Tobon Mosquera
7698931add6SHamilton Tobon Mosquera /// Returns true if all values in StoredValues and
7708931add6SHamilton Tobon Mosquera /// LastAccesses are not nullptrs.
isFilled__anon23c38c770111::OffloadArray7718931add6SHamilton Tobon Mosquera bool isFilled() {
7728931add6SHamilton Tobon Mosquera const unsigned NumValues = StoredValues.size();
7738931add6SHamilton Tobon Mosquera for (unsigned I = 0; I < NumValues; ++I) {
7748931add6SHamilton Tobon Mosquera if (!StoredValues[I] || !LastAccesses[I])
7758931add6SHamilton Tobon Mosquera return false;
7768931add6SHamilton Tobon Mosquera }
7778931add6SHamilton Tobon Mosquera
7788931add6SHamilton Tobon Mosquera return true;
7798931add6SHamilton Tobon Mosquera }
7808931add6SHamilton Tobon Mosquera };
7818931add6SHamilton Tobon Mosquera
7827cfd267cSsstefan1 struct OpenMPOpt {
7837cfd267cSsstefan1
7847cfd267cSsstefan1 using OptimizationRemarkGetter =
7857cfd267cSsstefan1 function_ref<OptimizationRemarkEmitter &(Function *)>;
7867cfd267cSsstefan1
OpenMPOpt__anon23c38c770111::OpenMPOpt7877cfd267cSsstefan1 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
7887cfd267cSsstefan1 OptimizationRemarkGetter OREGetter,
789b8235d2bSsstefan1 OMPInformationCache &OMPInfoCache, Attributor &A)
79077b79d79SMehdi Amini : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
791b8235d2bSsstefan1 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
7927cfd267cSsstefan1
793a2281419SJoseph Huber /// Check if any remarks are enabled for openmp-opt
remarksEnabled__anon23c38c770111::OpenMPOpt794a2281419SJoseph Huber bool remarksEnabled() {
795a2281419SJoseph Huber auto &Ctx = M.getContext();
796a2281419SJoseph Huber return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
797a2281419SJoseph Huber }
798a2281419SJoseph Huber
7999548b74aSJohannes Doerfert /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
run__anon23c38c770111::OpenMPOpt800b2ad63d3SJoseph Huber bool run(bool IsModulePass) {
80154bd3751SJohannes Doerfert if (SCC.empty())
80254bd3751SJohannes Doerfert return false;
80354bd3751SJohannes Doerfert
8049548b74aSJohannes Doerfert bool Changed = false;
8059548b74aSJohannes Doerfert
8069548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
80777b79d79SMehdi Amini << " functions in a slice with "
80877b79d79SMehdi Amini << OMPInfoCache.ModuleSlice.size() << " functions\n");
8099548b74aSJohannes Doerfert
810b2ad63d3SJoseph Huber if (IsModulePass) {
811d9659bf6SJohannes Doerfert Changed |= runAttributor(IsModulePass);
81218283125SJoseph Huber
8136fc51c9fSJoseph Huber // Recollect uses, in case Attributor deleted any.
8146fc51c9fSJoseph Huber OMPInfoCache.recollectUses();
8156fc51c9fSJoseph Huber
816be2b5696SJohannes Doerfert // TODO: This should be folded into buildCustomStateMachine.
817be2b5696SJohannes Doerfert Changed |= rewriteDeviceCodeStateMachine();
818be2b5696SJohannes Doerfert
819b2ad63d3SJoseph Huber if (remarksEnabled())
820b2ad63d3SJoseph Huber analysisGlobalization();
8213c8a4c6fSJohannes Doerfert
8223c8a4c6fSJohannes Doerfert Changed |= eliminateBarriers();
823b2ad63d3SJoseph Huber } else {
824e8039ad4SJohannes Doerfert if (PrintICVValues)
825e8039ad4SJohannes Doerfert printICVs();
826e8039ad4SJohannes Doerfert if (PrintOpenMPKernels)
827e8039ad4SJohannes Doerfert printKernels();
828e8039ad4SJohannes Doerfert
829d9659bf6SJohannes Doerfert Changed |= runAttributor(IsModulePass);
830e8039ad4SJohannes Doerfert
831e8039ad4SJohannes Doerfert // Recollect uses, in case Attributor deleted any.
832e8039ad4SJohannes Doerfert OMPInfoCache.recollectUses();
833e8039ad4SJohannes Doerfert
834e8039ad4SJohannes Doerfert Changed |= deleteParallelRegions();
835d9659bf6SJohannes Doerfert
836496f8e5bSHamilton Tobon Mosquera if (HideMemoryTransferLatency)
837496f8e5bSHamilton Tobon Mosquera Changed |= hideMemTransfersLatency();
8383a6bfcf2SGiorgis Georgakoudis Changed |= deduplicateRuntimeCalls();
8393a6bfcf2SGiorgis Georgakoudis if (EnableParallelRegionMerging) {
8403a6bfcf2SGiorgis Georgakoudis if (mergeParallelRegions()) {
8413a6bfcf2SGiorgis Georgakoudis deduplicateRuntimeCalls();
8423a6bfcf2SGiorgis Georgakoudis Changed = true;
8433a6bfcf2SGiorgis Georgakoudis }
8443a6bfcf2SGiorgis Georgakoudis }
8453c8a4c6fSJohannes Doerfert
8463c8a4c6fSJohannes Doerfert Changed |= eliminateBarriers();
847b2ad63d3SJoseph Huber }
848e8039ad4SJohannes Doerfert
849e8039ad4SJohannes Doerfert return Changed;
850e8039ad4SJohannes Doerfert }
851e8039ad4SJohannes Doerfert
8520f426935Ssstefan1 /// Print initial ICV values for testing.
8530f426935Ssstefan1 /// FIXME: This should be done from the Attributor once it is added.
printICVs__anon23c38c770111::OpenMPOpt854e8039ad4SJohannes Doerfert void printICVs() const {
855cb9cfa0dSsstefan1 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
856cb9cfa0dSsstefan1 ICV_proc_bind};
8570f426935Ssstefan1
8580f426935Ssstefan1 for (Function *F : OMPInfoCache.ModuleSlice) {
8590f426935Ssstefan1 for (auto ICV : ICVs) {
8600f426935Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV];
8612db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) {
8622db182ffSJoseph Huber return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
8630f426935Ssstefan1 << " Value: "
8640f426935Ssstefan1 << (ICVInfo.InitValue
86561cdaf66SSimon Pilgrim ? toString(ICVInfo.InitValue->getValue(), 10, true)
8660f426935Ssstefan1 : "IMPLEMENTATION_DEFINED");
8670f426935Ssstefan1 };
8680f426935Ssstefan1
8692db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
8700f426935Ssstefan1 }
8710f426935Ssstefan1 }
8720f426935Ssstefan1 }
8730f426935Ssstefan1
874e8039ad4SJohannes Doerfert /// Print OpenMP GPU kernels for testing.
printKernels__anon23c38c770111::OpenMPOpt875e8039ad4SJohannes Doerfert void printKernels() const {
876e8039ad4SJohannes Doerfert for (Function *F : SCC) {
877e8039ad4SJohannes Doerfert if (!OMPInfoCache.Kernels.count(F))
878e8039ad4SJohannes Doerfert continue;
879b8235d2bSsstefan1
8802db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) {
8812db182ffSJoseph Huber return ORA << "OpenMP GPU kernel "
882e8039ad4SJohannes Doerfert << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
883e8039ad4SJohannes Doerfert };
884b8235d2bSsstefan1
8852db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
886e8039ad4SJohannes Doerfert }
8879548b74aSJohannes Doerfert }
8889548b74aSJohannes Doerfert
8897cfd267cSsstefan1 /// Return the call if \p U is a callee use in a regular call. If \p RFI is
8907cfd267cSsstefan1 /// given it has to be the callee or a nullptr is returned.
getCallIfRegularCall__anon23c38c770111::OpenMPOpt8917cfd267cSsstefan1 static CallInst *getCallIfRegularCall(
8927cfd267cSsstefan1 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
8937cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(U.getUser());
8947cfd267cSsstefan1 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
895c4b1fe05SJohannes Doerfert (!RFI ||
896c4b1fe05SJohannes Doerfert (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
8977cfd267cSsstefan1 return CI;
8987cfd267cSsstefan1 return nullptr;
8997cfd267cSsstefan1 }
9007cfd267cSsstefan1
9017cfd267cSsstefan1 /// Return the call if \p V is a regular call. If \p RFI is given it has to be
9027cfd267cSsstefan1 /// the callee or a nullptr is returned.
getCallIfRegularCall__anon23c38c770111::OpenMPOpt9037cfd267cSsstefan1 static CallInst *getCallIfRegularCall(
9047cfd267cSsstefan1 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
9057cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(&V);
9067cfd267cSsstefan1 if (CI && !CI->hasOperandBundles() &&
907c4b1fe05SJohannes Doerfert (!RFI ||
908c4b1fe05SJohannes Doerfert (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
9097cfd267cSsstefan1 return CI;
9107cfd267cSsstefan1 return nullptr;
9117cfd267cSsstefan1 }
9127cfd267cSsstefan1
9139548b74aSJohannes Doerfert private:
9143a6bfcf2SGiorgis Georgakoudis /// Merge parallel regions when it is safe.
mergeParallelRegions__anon23c38c770111::OpenMPOpt9153a6bfcf2SGiorgis Georgakoudis bool mergeParallelRegions() {
9163a6bfcf2SGiorgis Georgakoudis const unsigned CallbackCalleeOperand = 2;
9173a6bfcf2SGiorgis Georgakoudis const unsigned CallbackFirstArgOperand = 3;
9183a6bfcf2SGiorgis Georgakoudis using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
9193a6bfcf2SGiorgis Georgakoudis
9203a6bfcf2SGiorgis Georgakoudis // Check if there are any __kmpc_fork_call calls to merge.
9213a6bfcf2SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &RFI =
9223a6bfcf2SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
9233a6bfcf2SGiorgis Georgakoudis
9243a6bfcf2SGiorgis Georgakoudis if (!RFI.Declaration)
9253a6bfcf2SGiorgis Georgakoudis return false;
9263a6bfcf2SGiorgis Georgakoudis
92797517055SGiorgis Georgakoudis // Unmergable calls that prevent merging a parallel region.
92897517055SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
92997517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
93097517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
93197517055SGiorgis Georgakoudis };
9323a6bfcf2SGiorgis Georgakoudis
9333a6bfcf2SGiorgis Georgakoudis bool Changed = false;
9343a6bfcf2SGiorgis Georgakoudis LoopInfo *LI = nullptr;
9353a6bfcf2SGiorgis Georgakoudis DominatorTree *DT = nullptr;
9363a6bfcf2SGiorgis Georgakoudis
9373a6bfcf2SGiorgis Georgakoudis SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
9383a6bfcf2SGiorgis Georgakoudis
9393a6bfcf2SGiorgis Georgakoudis BasicBlock *StartBB = nullptr, *EndBB = nullptr;
940ff289feeSMichael Kruse auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
9413a6bfcf2SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock();
9423a6bfcf2SGiorgis Georgakoudis BasicBlock *CGEndBB =
9433a6bfcf2SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
9443a6bfcf2SGiorgis Georgakoudis assert(StartBB != nullptr && "StartBB should not be null");
9453a6bfcf2SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, StartBB);
9463a6bfcf2SGiorgis Georgakoudis assert(EndBB != nullptr && "EndBB should not be null");
9473a6bfcf2SGiorgis Georgakoudis EndBB->getTerminator()->setSuccessor(0, CGEndBB);
9483a6bfcf2SGiorgis Georgakoudis };
9493a6bfcf2SGiorgis Georgakoudis
950240dd924SAlex Zinenko auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
951240dd924SAlex Zinenko Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
952240dd924SAlex Zinenko ReplacementValue = &Inner;
9533a6bfcf2SGiorgis Georgakoudis return CodeGenIP;
9543a6bfcf2SGiorgis Georgakoudis };
9553a6bfcf2SGiorgis Georgakoudis
9563a6bfcf2SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {};
9573a6bfcf2SGiorgis Georgakoudis
95897517055SGiorgis Georgakoudis /// Create a sequential execution region within a merged parallel region,
95997517055SGiorgis Georgakoudis /// encapsulated in a master construct with a barrier for synchronization.
96097517055SGiorgis Georgakoudis auto CreateSequentialRegion = [&](Function *OuterFn,
96197517055SGiorgis Georgakoudis BasicBlock *OuterPredBB,
96297517055SGiorgis Georgakoudis Instruction *SeqStartI,
96397517055SGiorgis Georgakoudis Instruction *SeqEndI) {
96497517055SGiorgis Georgakoudis // Isolate the instructions of the sequential region to a separate
96597517055SGiorgis Georgakoudis // block.
96697517055SGiorgis Georgakoudis BasicBlock *ParentBB = SeqStartI->getParent();
96797517055SGiorgis Georgakoudis BasicBlock *SeqEndBB =
96897517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
96997517055SGiorgis Georgakoudis BasicBlock *SeqAfterBB =
97097517055SGiorgis Georgakoudis SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
97197517055SGiorgis Georgakoudis BasicBlock *SeqStartBB =
97297517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
97397517055SGiorgis Georgakoudis
97497517055SGiorgis Georgakoudis assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
97597517055SGiorgis Georgakoudis "Expected a different CFG");
97697517055SGiorgis Georgakoudis const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
97797517055SGiorgis Georgakoudis ParentBB->getTerminator()->eraseFromParent();
97897517055SGiorgis Georgakoudis
979ff289feeSMichael Kruse auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
98097517055SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock();
98197517055SGiorgis Georgakoudis BasicBlock *CGEndBB =
98297517055SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
98397517055SGiorgis Georgakoudis assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
98497517055SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
98597517055SGiorgis Georgakoudis assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
98697517055SGiorgis Georgakoudis SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
98797517055SGiorgis Georgakoudis };
98897517055SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {};
98997517055SGiorgis Georgakoudis
99097517055SGiorgis Georgakoudis // Find outputs from the sequential region to outside users and
99197517055SGiorgis Georgakoudis // broadcast their values to them.
99297517055SGiorgis Georgakoudis for (Instruction &I : *SeqStartBB) {
99397517055SGiorgis Georgakoudis SmallPtrSet<Instruction *, 4> OutsideUsers;
99497517055SGiorgis Georgakoudis for (User *Usr : I.users()) {
99597517055SGiorgis Georgakoudis Instruction &UsrI = *cast<Instruction>(Usr);
99697517055SGiorgis Georgakoudis // Ignore outputs to LT intrinsics, code extraction for the merged
99797517055SGiorgis Georgakoudis // parallel region will fix them.
99897517055SGiorgis Georgakoudis if (UsrI.isLifetimeStartOrEnd())
99997517055SGiorgis Georgakoudis continue;
100097517055SGiorgis Georgakoudis
100197517055SGiorgis Georgakoudis if (UsrI.getParent() != SeqStartBB)
100297517055SGiorgis Georgakoudis OutsideUsers.insert(&UsrI);
100397517055SGiorgis Georgakoudis }
100497517055SGiorgis Georgakoudis
100597517055SGiorgis Georgakoudis if (OutsideUsers.empty())
100697517055SGiorgis Georgakoudis continue;
100797517055SGiorgis Georgakoudis
100897517055SGiorgis Georgakoudis // Emit an alloca in the outer region to store the broadcasted
100997517055SGiorgis Georgakoudis // value.
101097517055SGiorgis Georgakoudis const DataLayout &DL = M.getDataLayout();
101197517055SGiorgis Georgakoudis AllocaInst *AllocaI = new AllocaInst(
101297517055SGiorgis Georgakoudis I.getType(), DL.getAllocaAddrSpace(), nullptr,
101397517055SGiorgis Georgakoudis I.getName() + ".seq.output.alloc", &OuterFn->front().front());
101497517055SGiorgis Georgakoudis
101597517055SGiorgis Georgakoudis // Emit a store instruction in the sequential BB to update the
101697517055SGiorgis Georgakoudis // value.
101797517055SGiorgis Georgakoudis new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
101897517055SGiorgis Georgakoudis
101997517055SGiorgis Georgakoudis // Emit a load instruction and replace the use of the output value
102097517055SGiorgis Georgakoudis // with it.
102197517055SGiorgis Georgakoudis for (Instruction *UsrI : OutsideUsers) {
10225b70c12fSJohannes Doerfert LoadInst *LoadI = new LoadInst(
10235b70c12fSJohannes Doerfert I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
102497517055SGiorgis Georgakoudis UsrI->replaceUsesOfWith(&I, LoadI);
102597517055SGiorgis Georgakoudis }
102697517055SGiorgis Georgakoudis }
102797517055SGiorgis Georgakoudis
102897517055SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc(
102997517055SGiorgis Georgakoudis InsertPointTy(ParentBB, ParentBB->end()), DL);
103097517055SGiorgis Georgakoudis InsertPointTy SeqAfterIP =
103197517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
103297517055SGiorgis Georgakoudis
103397517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
103497517055SGiorgis Georgakoudis
103597517055SGiorgis Georgakoudis BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
103697517055SGiorgis Georgakoudis
103797517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
103897517055SGiorgis Georgakoudis << "\n");
103997517055SGiorgis Georgakoudis };
104097517055SGiorgis Georgakoudis
10413a6bfcf2SGiorgis Georgakoudis // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
10423a6bfcf2SGiorgis Georgakoudis // contained in BB and only separated by instructions that can be
10433a6bfcf2SGiorgis Georgakoudis // redundantly executed in parallel. The block BB is split before the first
10443a6bfcf2SGiorgis Georgakoudis // call (in MergableCIs) and after the last so the entire region we merge
10453a6bfcf2SGiorgis Georgakoudis // into a single parallel region is contained in a single basic block
10463a6bfcf2SGiorgis Georgakoudis // without any other instructions. We use the OpenMPIRBuilder to outline
10473a6bfcf2SGiorgis Georgakoudis // that block and call the resulting function via __kmpc_fork_call.
10489e7a2bfcSNikita Popov auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
10499e7a2bfcSNikita Popov BasicBlock *BB) {
10503a6bfcf2SGiorgis Georgakoudis // TODO: Change the interface to allow single CIs expanded, e.g, to
10513a6bfcf2SGiorgis Georgakoudis // include an outer loop.
10523a6bfcf2SGiorgis Georgakoudis assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
10533a6bfcf2SGiorgis Georgakoudis
10543a6bfcf2SGiorgis Georgakoudis auto Remark = [&](OptimizationRemark OR) {
1055eef6601bSJoseph Huber OR << "Parallel region merged with parallel region"
1056eef6601bSJoseph Huber << (MergableCIs.size() > 2 ? "s" : "") << " at ";
105723b0ab2aSKazu Hirata for (auto *CI : llvm::drop_begin(MergableCIs)) {
10583a6bfcf2SGiorgis Georgakoudis OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
10593a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back())
10603a6bfcf2SGiorgis Georgakoudis OR << ", ";
10613a6bfcf2SGiorgis Georgakoudis }
1062eef6601bSJoseph Huber return OR << ".";
10633a6bfcf2SGiorgis Georgakoudis };
10643a6bfcf2SGiorgis Georgakoudis
10652c31d5ebSJoseph Huber emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
10663a6bfcf2SGiorgis Georgakoudis
10673a6bfcf2SGiorgis Georgakoudis Function *OriginalFn = BB->getParent();
10683a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
10693a6bfcf2SGiorgis Georgakoudis << " parallel regions in " << OriginalFn->getName()
10703a6bfcf2SGiorgis Georgakoudis << "\n");
10713a6bfcf2SGiorgis Georgakoudis
10723a6bfcf2SGiorgis Georgakoudis // Isolate the calls to merge in a separate block.
10733a6bfcf2SGiorgis Georgakoudis EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
10743a6bfcf2SGiorgis Georgakoudis BasicBlock *AfterBB =
10753a6bfcf2SGiorgis Georgakoudis SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
10763a6bfcf2SGiorgis Georgakoudis StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
10773a6bfcf2SGiorgis Georgakoudis "omp.par.merged");
10783a6bfcf2SGiorgis Georgakoudis
10793a6bfcf2SGiorgis Georgakoudis assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
10803a6bfcf2SGiorgis Georgakoudis const DebugLoc DL = BB->getTerminator()->getDebugLoc();
10813a6bfcf2SGiorgis Georgakoudis BB->getTerminator()->eraseFromParent();
10823a6bfcf2SGiorgis Georgakoudis
108397517055SGiorgis Georgakoudis // Create sequential regions for sequential instructions that are
108497517055SGiorgis Georgakoudis // in-between mergable parallel regions.
108597517055SGiorgis Georgakoudis for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
108697517055SGiorgis Georgakoudis It != End; ++It) {
108797517055SGiorgis Georgakoudis Instruction *ForkCI = *It;
108897517055SGiorgis Georgakoudis Instruction *NextForkCI = *(It + 1);
108997517055SGiorgis Georgakoudis
109097517055SGiorgis Georgakoudis // Continue if there are not in-between instructions.
109197517055SGiorgis Georgakoudis if (ForkCI->getNextNode() == NextForkCI)
109297517055SGiorgis Georgakoudis continue;
109397517055SGiorgis Georgakoudis
109497517055SGiorgis Georgakoudis CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
109597517055SGiorgis Georgakoudis NextForkCI->getPrevNode());
109697517055SGiorgis Georgakoudis }
109797517055SGiorgis Georgakoudis
10983a6bfcf2SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
10993a6bfcf2SGiorgis Georgakoudis DL);
11003a6bfcf2SGiorgis Georgakoudis IRBuilder<>::InsertPoint AllocaIP(
11013a6bfcf2SGiorgis Georgakoudis &OriginalFn->getEntryBlock(),
11023a6bfcf2SGiorgis Georgakoudis OriginalFn->getEntryBlock().getFirstInsertionPt());
11033a6bfcf2SGiorgis Georgakoudis // Create the merged parallel region with default proc binding, to
11043a6bfcf2SGiorgis Georgakoudis // avoid overriding binding settings, and without explicit cancellation.
1105e5dba2d7SMichael Kruse InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
11063a6bfcf2SGiorgis Georgakoudis Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
11073a6bfcf2SGiorgis Georgakoudis OMP_PROC_BIND_default, /* IsCancellable */ false);
11083a6bfcf2SGiorgis Georgakoudis BranchInst::Create(AfterBB, AfterIP.getBlock());
11093a6bfcf2SGiorgis Georgakoudis
11103a6bfcf2SGiorgis Georgakoudis // Perform the actual outlining.
11117cb4c261SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.finalize(OriginalFn);
11123a6bfcf2SGiorgis Georgakoudis
11133a6bfcf2SGiorgis Georgakoudis Function *OutlinedFn = MergableCIs.front()->getCaller();
11143a6bfcf2SGiorgis Georgakoudis
11153a6bfcf2SGiorgis Georgakoudis // Replace the __kmpc_fork_call calls with direct calls to the outlined
11163a6bfcf2SGiorgis Georgakoudis // callbacks.
11173a6bfcf2SGiorgis Georgakoudis SmallVector<Value *, 8> Args;
11183a6bfcf2SGiorgis Georgakoudis for (auto *CI : MergableCIs) {
1119875782bdSNikita Popov Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
1120875782bdSNikita Popov FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
11213a6bfcf2SGiorgis Georgakoudis Args.clear();
11223a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(0));
11233a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(1));
11244f0225f6SKazu Hirata for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
11254f0225f6SKazu Hirata ++U)
11263a6bfcf2SGiorgis Georgakoudis Args.push_back(CI->getArgOperand(U));
11273a6bfcf2SGiorgis Georgakoudis
11283a6bfcf2SGiorgis Georgakoudis CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
11293a6bfcf2SGiorgis Georgakoudis if (CI->getDebugLoc())
11303a6bfcf2SGiorgis Georgakoudis NewCI->setDebugLoc(CI->getDebugLoc());
11313a6bfcf2SGiorgis Georgakoudis
11323a6bfcf2SGiorgis Georgakoudis // Forward parameter attributes from the callback to the callee.
11334f0225f6SKazu Hirata for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
11344f0225f6SKazu Hirata ++U)
113580ea2bb5SArthur Eubanks for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
11363a6bfcf2SGiorgis Georgakoudis NewCI->addParamAttr(
11373a6bfcf2SGiorgis Georgakoudis U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
11383a6bfcf2SGiorgis Georgakoudis
11393a6bfcf2SGiorgis Georgakoudis // Emit an explicit barrier to replace the implicit fork-join barrier.
11403a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back()) {
11413a6bfcf2SGiorgis Georgakoudis // TODO: Remove barrier if the merged parallel region includes the
11423a6bfcf2SGiorgis Georgakoudis // 'nowait' clause.
1143e5dba2d7SMichael Kruse OMPInfoCache.OMPBuilder.createBarrier(
11443a6bfcf2SGiorgis Georgakoudis InsertPointTy(NewCI->getParent(),
11453a6bfcf2SGiorgis Georgakoudis NewCI->getNextNode()->getIterator()),
11463a6bfcf2SGiorgis Georgakoudis OMPD_parallel);
11473a6bfcf2SGiorgis Georgakoudis }
11483a6bfcf2SGiorgis Georgakoudis
11493a6bfcf2SGiorgis Georgakoudis CI->eraseFromParent();
11503a6bfcf2SGiorgis Georgakoudis }
11513a6bfcf2SGiorgis Georgakoudis
11523a6bfcf2SGiorgis Georgakoudis assert(OutlinedFn != OriginalFn && "Outlining failed");
11537fea561eSArthur Eubanks CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
11543a6bfcf2SGiorgis Georgakoudis CGUpdater.reanalyzeFunction(*OriginalFn);
11553a6bfcf2SGiorgis Georgakoudis
11563a6bfcf2SGiorgis Georgakoudis NumOpenMPParallelRegionsMerged += MergableCIs.size();
11573a6bfcf2SGiorgis Georgakoudis
11583a6bfcf2SGiorgis Georgakoudis return true;
11593a6bfcf2SGiorgis Georgakoudis };
11603a6bfcf2SGiorgis Georgakoudis
11613a6bfcf2SGiorgis Georgakoudis // Helper function that identifes sequences of
11623a6bfcf2SGiorgis Georgakoudis // __kmpc_fork_call uses in a basic block.
11633a6bfcf2SGiorgis Georgakoudis auto DetectPRsCB = [&](Use &U, Function &F) {
11643a6bfcf2SGiorgis Georgakoudis CallInst *CI = getCallIfRegularCall(U, &RFI);
11653a6bfcf2SGiorgis Georgakoudis BB2PRMap[CI->getParent()].insert(CI);
11663a6bfcf2SGiorgis Georgakoudis
11673a6bfcf2SGiorgis Georgakoudis return false;
11683a6bfcf2SGiorgis Georgakoudis };
11693a6bfcf2SGiorgis Georgakoudis
11703a6bfcf2SGiorgis Georgakoudis BB2PRMap.clear();
11713a6bfcf2SGiorgis Georgakoudis RFI.foreachUse(SCC, DetectPRsCB);
11723a6bfcf2SGiorgis Georgakoudis SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
11733a6bfcf2SGiorgis Georgakoudis // Find mergable parallel regions within a basic block that are
11743a6bfcf2SGiorgis Georgakoudis // safe to merge, that is any in-between instructions can safely
11753a6bfcf2SGiorgis Georgakoudis // execute in parallel after merging.
11763a6bfcf2SGiorgis Georgakoudis // TODO: support merging across basic-blocks.
11773a6bfcf2SGiorgis Georgakoudis for (auto &It : BB2PRMap) {
11783a6bfcf2SGiorgis Georgakoudis auto &CIs = It.getSecond();
11793a6bfcf2SGiorgis Georgakoudis if (CIs.size() < 2)
11803a6bfcf2SGiorgis Georgakoudis continue;
11813a6bfcf2SGiorgis Georgakoudis
11823a6bfcf2SGiorgis Georgakoudis BasicBlock *BB = It.getFirst();
11833a6bfcf2SGiorgis Georgakoudis SmallVector<CallInst *, 4> MergableCIs;
11843a6bfcf2SGiorgis Georgakoudis
118597517055SGiorgis Georgakoudis /// Returns true if the instruction is mergable, false otherwise.
118697517055SGiorgis Georgakoudis /// A terminator instruction is unmergable by definition since merging
118797517055SGiorgis Georgakoudis /// works within a BB. Instructions before the mergable region are
118897517055SGiorgis Georgakoudis /// mergable if they are not calls to OpenMP runtime functions that may
118997517055SGiorgis Georgakoudis /// set different execution parameters for subsequent parallel regions.
119097517055SGiorgis Georgakoudis /// Instructions in-between parallel regions are mergable if they are not
119197517055SGiorgis Georgakoudis /// calls to any non-intrinsic function since that may call a non-mergable
119297517055SGiorgis Georgakoudis /// OpenMP runtime function.
119397517055SGiorgis Georgakoudis auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
119497517055SGiorgis Georgakoudis // We do not merge across BBs, hence return false (unmergable) if the
119597517055SGiorgis Georgakoudis // instruction is a terminator.
119697517055SGiorgis Georgakoudis if (I.isTerminator())
119797517055SGiorgis Georgakoudis return false;
119897517055SGiorgis Georgakoudis
119997517055SGiorgis Georgakoudis if (!isa<CallInst>(&I))
120097517055SGiorgis Georgakoudis return true;
120197517055SGiorgis Georgakoudis
120297517055SGiorgis Georgakoudis CallInst *CI = cast<CallInst>(&I);
120397517055SGiorgis Georgakoudis if (IsBeforeMergableRegion) {
120497517055SGiorgis Georgakoudis Function *CalledFunction = CI->getCalledFunction();
120597517055SGiorgis Georgakoudis if (!CalledFunction)
120697517055SGiorgis Georgakoudis return false;
120797517055SGiorgis Georgakoudis // Return false (unmergable) if the call before the parallel
120897517055SGiorgis Georgakoudis // region calls an explicit affinity (proc_bind) or number of
120997517055SGiorgis Georgakoudis // threads (num_threads) compiler-generated function. Those settings
121097517055SGiorgis Georgakoudis // may be incompatible with following parallel regions.
121197517055SGiorgis Georgakoudis // TODO: ICV tracking to detect compatibility.
121297517055SGiorgis Georgakoudis for (const auto &RFI : UnmergableCallsInfo) {
121397517055SGiorgis Georgakoudis if (CalledFunction == RFI.Declaration)
121497517055SGiorgis Georgakoudis return false;
121597517055SGiorgis Georgakoudis }
121697517055SGiorgis Georgakoudis } else {
121797517055SGiorgis Georgakoudis // Return false (unmergable) if there is a call instruction
121897517055SGiorgis Georgakoudis // in-between parallel regions when it is not an intrinsic. It
121997517055SGiorgis Georgakoudis // may call an unmergable OpenMP runtime function in its callpath.
122097517055SGiorgis Georgakoudis // TODO: Keep track of possible OpenMP calls in the callpath.
122197517055SGiorgis Georgakoudis if (!isa<IntrinsicInst>(CI))
122297517055SGiorgis Georgakoudis return false;
122397517055SGiorgis Georgakoudis }
122497517055SGiorgis Georgakoudis
122597517055SGiorgis Georgakoudis return true;
122697517055SGiorgis Georgakoudis };
12273a6bfcf2SGiorgis Georgakoudis // Find maximal number of parallel region CIs that are safe to merge.
122897517055SGiorgis Georgakoudis for (auto It = BB->begin(), End = BB->end(); It != End;) {
122997517055SGiorgis Georgakoudis Instruction &I = *It;
123097517055SGiorgis Georgakoudis ++It;
123197517055SGiorgis Georgakoudis
12323a6bfcf2SGiorgis Georgakoudis if (CIs.count(&I)) {
12333a6bfcf2SGiorgis Georgakoudis MergableCIs.push_back(cast<CallInst>(&I));
12343a6bfcf2SGiorgis Georgakoudis continue;
12353a6bfcf2SGiorgis Georgakoudis }
12363a6bfcf2SGiorgis Georgakoudis
123797517055SGiorgis Georgakoudis // Continue expanding if the instruction is mergable.
123897517055SGiorgis Georgakoudis if (IsMergable(I, MergableCIs.empty()))
12393a6bfcf2SGiorgis Georgakoudis continue;
12403a6bfcf2SGiorgis Georgakoudis
124197517055SGiorgis Georgakoudis // Forward the instruction iterator to skip the next parallel region
124297517055SGiorgis Georgakoudis // since there is an unmergable instruction which can affect it.
124397517055SGiorgis Georgakoudis for (; It != End; ++It) {
124497517055SGiorgis Georgakoudis Instruction &SkipI = *It;
124597517055SGiorgis Georgakoudis if (CIs.count(&SkipI)) {
124697517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
124797517055SGiorgis Georgakoudis << " due to " << I << "\n");
124897517055SGiorgis Georgakoudis ++It;
124997517055SGiorgis Georgakoudis break;
125097517055SGiorgis Georgakoudis }
125197517055SGiorgis Georgakoudis }
125297517055SGiorgis Georgakoudis
125397517055SGiorgis Georgakoudis // Store mergable regions found.
12543a6bfcf2SGiorgis Georgakoudis if (MergableCIs.size() > 1) {
12553a6bfcf2SGiorgis Georgakoudis MergableCIsVector.push_back(MergableCIs);
12563a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
12573a6bfcf2SGiorgis Georgakoudis << " parallel regions in block " << BB->getName()
12583a6bfcf2SGiorgis Georgakoudis << " of function " << BB->getParent()->getName()
12593a6bfcf2SGiorgis Georgakoudis << "\n";);
12603a6bfcf2SGiorgis Georgakoudis }
12613a6bfcf2SGiorgis Georgakoudis
12623a6bfcf2SGiorgis Georgakoudis MergableCIs.clear();
12633a6bfcf2SGiorgis Georgakoudis }
12643a6bfcf2SGiorgis Georgakoudis
12653a6bfcf2SGiorgis Georgakoudis if (!MergableCIsVector.empty()) {
12663a6bfcf2SGiorgis Georgakoudis Changed = true;
12673a6bfcf2SGiorgis Georgakoudis
12683a6bfcf2SGiorgis Georgakoudis for (auto &MergableCIs : MergableCIsVector)
12693a6bfcf2SGiorgis Georgakoudis Merge(MergableCIs, BB);
1270b2ad63d3SJoseph Huber MergableCIsVector.clear();
12713a6bfcf2SGiorgis Georgakoudis }
12723a6bfcf2SGiorgis Georgakoudis }
12733a6bfcf2SGiorgis Georgakoudis
12743a6bfcf2SGiorgis Georgakoudis if (Changed) {
127597517055SGiorgis Georgakoudis /// Re-collect use for fork calls, emitted barrier calls, and
127697517055SGiorgis Georgakoudis /// any emitted master/end_master calls.
127797517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
127897517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
127997517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
128097517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
12813a6bfcf2SGiorgis Georgakoudis }
12823a6bfcf2SGiorgis Georgakoudis
12833a6bfcf2SGiorgis Georgakoudis return Changed;
12843a6bfcf2SGiorgis Georgakoudis }
12853a6bfcf2SGiorgis Georgakoudis
12869d38f98dSJohannes Doerfert /// Try to delete parallel regions if possible.
deleteParallelRegions__anon23c38c770111::OpenMPOpt1287e565db49SJohannes Doerfert bool deleteParallelRegions() {
1288e565db49SJohannes Doerfert const unsigned CallbackCalleeOperand = 2;
1289e565db49SJohannes Doerfert
12907cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI =
12917cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
12927cfd267cSsstefan1
1293e565db49SJohannes Doerfert if (!RFI.Declaration)
1294e565db49SJohannes Doerfert return false;
1295e565db49SJohannes Doerfert
1296e565db49SJohannes Doerfert bool Changed = false;
1297e565db49SJohannes Doerfert auto DeleteCallCB = [&](Use &U, Function &) {
1298e565db49SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U);
1299e565db49SJohannes Doerfert if (!CI)
1300e565db49SJohannes Doerfert return false;
1301e565db49SJohannes Doerfert auto *Fn = dyn_cast<Function>(
1302e565db49SJohannes Doerfert CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1303e565db49SJohannes Doerfert if (!Fn)
1304e565db49SJohannes Doerfert return false;
1305e565db49SJohannes Doerfert if (!Fn->onlyReadsMemory())
1306e565db49SJohannes Doerfert return false;
1307e565db49SJohannes Doerfert if (!Fn->hasFnAttribute(Attribute::WillReturn))
1308e565db49SJohannes Doerfert return false;
1309e565db49SJohannes Doerfert
1310e565db49SJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1311e565db49SJohannes Doerfert << CI->getCaller()->getName() << "\n");
13124d4ea9acSHuber, Joseph
13134d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) {
1314eef6601bSJoseph Huber return OR << "Removing parallel region with no side-effects.";
13154d4ea9acSHuber, Joseph };
13162c31d5ebSJoseph Huber emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
13174d4ea9acSHuber, Joseph
1318e565db49SJohannes Doerfert CGUpdater.removeCallSite(*CI);
1319e565db49SJohannes Doerfert CI->eraseFromParent();
1320e565db49SJohannes Doerfert Changed = true;
132155eb714aSRoman Lebedev ++NumOpenMPParallelRegionsDeleted;
1322e565db49SJohannes Doerfert return true;
1323e565db49SJohannes Doerfert };
1324e565db49SJohannes Doerfert
1325624d34afSJohannes Doerfert RFI.foreachUse(SCC, DeleteCallCB);
1326e565db49SJohannes Doerfert
1327e565db49SJohannes Doerfert return Changed;
1328e565db49SJohannes Doerfert }
1329e565db49SJohannes Doerfert
1330b726c557SJohannes Doerfert /// Try to eliminate runtime calls by reusing existing ones.
deduplicateRuntimeCalls__anon23c38c770111::OpenMPOpt13319548b74aSJohannes Doerfert bool deduplicateRuntimeCalls() {
13329548b74aSJohannes Doerfert bool Changed = false;
13339548b74aSJohannes Doerfert
1334e28936f6SJohannes Doerfert RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1335e28936f6SJohannes Doerfert OMPRTL_omp_get_num_threads,
1336e28936f6SJohannes Doerfert OMPRTL_omp_in_parallel,
1337e28936f6SJohannes Doerfert OMPRTL_omp_get_cancellation,
1338e28936f6SJohannes Doerfert OMPRTL_omp_get_thread_limit,
1339e28936f6SJohannes Doerfert OMPRTL_omp_get_supported_active_levels,
1340e28936f6SJohannes Doerfert OMPRTL_omp_get_level,
1341e28936f6SJohannes Doerfert OMPRTL_omp_get_ancestor_thread_num,
1342e28936f6SJohannes Doerfert OMPRTL_omp_get_team_size,
1343e28936f6SJohannes Doerfert OMPRTL_omp_get_active_level,
1344e28936f6SJohannes Doerfert OMPRTL_omp_in_final,
1345e28936f6SJohannes Doerfert OMPRTL_omp_get_proc_bind,
1346e28936f6SJohannes Doerfert OMPRTL_omp_get_num_places,
1347e28936f6SJohannes Doerfert OMPRTL_omp_get_num_procs,
1348e28936f6SJohannes Doerfert OMPRTL_omp_get_place_num,
1349e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_num_places,
1350e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_place_nums};
1351e28936f6SJohannes Doerfert
1352bc93c2d7SMarek Kurdej // Global-tid is handled separately.
13539548b74aSJohannes Doerfert SmallSetVector<Value *, 16> GTIdArgs;
13549548b74aSJohannes Doerfert collectGlobalThreadIdArguments(GTIdArgs);
13559548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
13569548b74aSJohannes Doerfert << " global thread ID arguments\n");
13579548b74aSJohannes Doerfert
13589548b74aSJohannes Doerfert for (Function *F : SCC) {
1359e28936f6SJohannes Doerfert for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
13604e29d256Sserge-sans-paille Changed |= deduplicateRuntimeCalls(
13614e29d256Sserge-sans-paille *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1362e28936f6SJohannes Doerfert
1363e28936f6SJohannes Doerfert // __kmpc_global_thread_num is special as we can replace it with an
1364e28936f6SJohannes Doerfert // argument in enough cases to make it worth trying.
13659548b74aSJohannes Doerfert Value *GTIdArg = nullptr;
13669548b74aSJohannes Doerfert for (Argument &Arg : F->args())
13679548b74aSJohannes Doerfert if (GTIdArgs.count(&Arg)) {
13689548b74aSJohannes Doerfert GTIdArg = &Arg;
13699548b74aSJohannes Doerfert break;
13709548b74aSJohannes Doerfert }
13719548b74aSJohannes Doerfert Changed |= deduplicateRuntimeCalls(
13727cfd267cSsstefan1 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
13739548b74aSJohannes Doerfert }
13749548b74aSJohannes Doerfert
13759548b74aSJohannes Doerfert return Changed;
13769548b74aSJohannes Doerfert }
13779548b74aSJohannes Doerfert
1378496f8e5bSHamilton Tobon Mosquera /// Tries to hide the latency of runtime calls that involve host to
1379496f8e5bSHamilton Tobon Mosquera /// device memory transfers by splitting them into their "issue" and "wait"
1380496f8e5bSHamilton Tobon Mosquera /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1381496f8e5bSHamilton Tobon Mosquera /// moved downards as much as possible. The "issue" issues the memory transfer
1382496f8e5bSHamilton Tobon Mosquera /// asynchronously, returning a handle. The "wait" waits in the returned
1383496f8e5bSHamilton Tobon Mosquera /// handle for the memory transfer to finish.
hideMemTransfersLatency__anon23c38c770111::OpenMPOpt1384496f8e5bSHamilton Tobon Mosquera bool hideMemTransfersLatency() {
1385496f8e5bSHamilton Tobon Mosquera auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1386496f8e5bSHamilton Tobon Mosquera bool Changed = false;
1387496f8e5bSHamilton Tobon Mosquera auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1388496f8e5bSHamilton Tobon Mosquera auto *RTCall = getCallIfRegularCall(U, &RFI);
1389496f8e5bSHamilton Tobon Mosquera if (!RTCall)
1390496f8e5bSHamilton Tobon Mosquera return false;
1391496f8e5bSHamilton Tobon Mosquera
13928931add6SHamilton Tobon Mosquera OffloadArray OffloadArrays[3];
13938931add6SHamilton Tobon Mosquera if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
13948931add6SHamilton Tobon Mosquera return false;
13958931add6SHamilton Tobon Mosquera
13968931add6SHamilton Tobon Mosquera LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
13978931add6SHamilton Tobon Mosquera
1398bd2fa181SHamilton Tobon Mosquera // TODO: Check if can be moved upwards.
1399bd2fa181SHamilton Tobon Mosquera bool WasSplit = false;
1400bd2fa181SHamilton Tobon Mosquera Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1401bd2fa181SHamilton Tobon Mosquera if (WaitMovementPoint)
1402bd2fa181SHamilton Tobon Mosquera WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1403bd2fa181SHamilton Tobon Mosquera
1404496f8e5bSHamilton Tobon Mosquera Changed |= WasSplit;
1405496f8e5bSHamilton Tobon Mosquera return WasSplit;
1406496f8e5bSHamilton Tobon Mosquera };
1407496f8e5bSHamilton Tobon Mosquera RFI.foreachUse(SCC, SplitMemTransfers);
1408496f8e5bSHamilton Tobon Mosquera
1409496f8e5bSHamilton Tobon Mosquera return Changed;
1410496f8e5bSHamilton Tobon Mosquera }
1411496f8e5bSHamilton Tobon Mosquera
14123c8a4c6fSJohannes Doerfert /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
14133c8a4c6fSJohannes Doerfert /// TODO: Make this an AA and expand it to work across blocks and functions.
eliminateBarriers__anon23c38c770111::OpenMPOpt14143c8a4c6fSJohannes Doerfert bool eliminateBarriers() {
14153c8a4c6fSJohannes Doerfert bool Changed = false;
14163c8a4c6fSJohannes Doerfert
14173c8a4c6fSJohannes Doerfert if (DisableOpenMPOptBarrierElimination)
14183c8a4c6fSJohannes Doerfert return /*Changed=*/false;
14193c8a4c6fSJohannes Doerfert
14203c8a4c6fSJohannes Doerfert if (OMPInfoCache.Kernels.empty())
14213c8a4c6fSJohannes Doerfert return /*Changed=*/false;
14223c8a4c6fSJohannes Doerfert
14233c8a4c6fSJohannes Doerfert enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
14243c8a4c6fSJohannes Doerfert
14253c8a4c6fSJohannes Doerfert class BarrierInfo {
14263c8a4c6fSJohannes Doerfert Instruction *I;
14273c8a4c6fSJohannes Doerfert enum ImplicitBarrierType Type;
14283c8a4c6fSJohannes Doerfert
14293c8a4c6fSJohannes Doerfert public:
14303c8a4c6fSJohannes Doerfert BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
14313c8a4c6fSJohannes Doerfert BarrierInfo(Instruction &I) : I(&I) {}
14323c8a4c6fSJohannes Doerfert
14333c8a4c6fSJohannes Doerfert bool isImplicit() { return !I; }
14343c8a4c6fSJohannes Doerfert
14353c8a4c6fSJohannes Doerfert bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
14363c8a4c6fSJohannes Doerfert
14373c8a4c6fSJohannes Doerfert bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
14383c8a4c6fSJohannes Doerfert
14393c8a4c6fSJohannes Doerfert Instruction *getInstruction() { return I; }
14403c8a4c6fSJohannes Doerfert };
14413c8a4c6fSJohannes Doerfert
14423c8a4c6fSJohannes Doerfert for (Function *Kernel : OMPInfoCache.Kernels) {
14433c8a4c6fSJohannes Doerfert for (BasicBlock &BB : *Kernel) {
14443c8a4c6fSJohannes Doerfert SmallVector<BarrierInfo, 8> BarriersInBlock;
14453c8a4c6fSJohannes Doerfert SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
14463c8a4c6fSJohannes Doerfert
14473c8a4c6fSJohannes Doerfert // Add the kernel entry implicit barrier.
14483c8a4c6fSJohannes Doerfert if (&Kernel->getEntryBlock() == &BB)
14493c8a4c6fSJohannes Doerfert BarriersInBlock.push_back(IBT_ENTRY);
14503c8a4c6fSJohannes Doerfert
14513c8a4c6fSJohannes Doerfert // Find implicit and explicit aligned barriers in the same basic block.
14523c8a4c6fSJohannes Doerfert for (Instruction &I : BB) {
14533c8a4c6fSJohannes Doerfert if (isa<ReturnInst>(I)) {
14543c8a4c6fSJohannes Doerfert // Add the implicit barrier when exiting the kernel.
14553c8a4c6fSJohannes Doerfert BarriersInBlock.push_back(IBT_EXIT);
14563c8a4c6fSJohannes Doerfert continue;
14573c8a4c6fSJohannes Doerfert }
14583c8a4c6fSJohannes Doerfert CallBase *CB = dyn_cast<CallBase>(&I);
14593c8a4c6fSJohannes Doerfert if (!CB)
14603c8a4c6fSJohannes Doerfert continue;
14613c8a4c6fSJohannes Doerfert
14623c8a4c6fSJohannes Doerfert auto IsAlignBarrierCB = [&](CallBase &CB) {
14633c8a4c6fSJohannes Doerfert switch (CB.getIntrinsicID()) {
14643c8a4c6fSJohannes Doerfert case Intrinsic::nvvm_barrier0:
14653c8a4c6fSJohannes Doerfert case Intrinsic::nvvm_barrier0_and:
14663c8a4c6fSJohannes Doerfert case Intrinsic::nvvm_barrier0_or:
14673c8a4c6fSJohannes Doerfert case Intrinsic::nvvm_barrier0_popc:
14683c8a4c6fSJohannes Doerfert return true;
14693c8a4c6fSJohannes Doerfert default:
14703c8a4c6fSJohannes Doerfert break;
14713c8a4c6fSJohannes Doerfert }
14723c8a4c6fSJohannes Doerfert return hasAssumption(CB,
14733c8a4c6fSJohannes Doerfert KnownAssumptionString("ompx_aligned_barrier"));
14743c8a4c6fSJohannes Doerfert };
14753c8a4c6fSJohannes Doerfert
14763c8a4c6fSJohannes Doerfert if (IsAlignBarrierCB(*CB)) {
14773c8a4c6fSJohannes Doerfert // Add an explicit aligned barrier.
14783c8a4c6fSJohannes Doerfert BarriersInBlock.push_back(I);
14793c8a4c6fSJohannes Doerfert }
14803c8a4c6fSJohannes Doerfert }
14813c8a4c6fSJohannes Doerfert
14823c8a4c6fSJohannes Doerfert if (BarriersInBlock.size() <= 1)
14833c8a4c6fSJohannes Doerfert continue;
14843c8a4c6fSJohannes Doerfert
14853c8a4c6fSJohannes Doerfert // A barrier in a barrier pair is removeable if all instructions
14863c8a4c6fSJohannes Doerfert // between the barriers in the pair are side-effect free modulo the
14873c8a4c6fSJohannes Doerfert // barrier operation.
14883c8a4c6fSJohannes Doerfert auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
14893c8a4c6fSJohannes Doerfert BarrierInfo *EndBI) {
14903c8a4c6fSJohannes Doerfert assert(
14913c8a4c6fSJohannes Doerfert !StartBI->isImplicitExit() &&
14923c8a4c6fSJohannes Doerfert "Expected start barrier to be other than a kernel exit barrier");
14933c8a4c6fSJohannes Doerfert assert(
14943c8a4c6fSJohannes Doerfert !EndBI->isImplicitEntry() &&
14953c8a4c6fSJohannes Doerfert "Expected end barrier to be other than a kernel entry barrier");
14963c8a4c6fSJohannes Doerfert // If StarBI instructions is null then this the implicit
14973c8a4c6fSJohannes Doerfert // kernel entry barrier, so iterate from the first instruction in the
14983c8a4c6fSJohannes Doerfert // entry block.
14993c8a4c6fSJohannes Doerfert Instruction *I = (StartBI->isImplicitEntry())
15003c8a4c6fSJohannes Doerfert ? &Kernel->getEntryBlock().front()
15013c8a4c6fSJohannes Doerfert : StartBI->getInstruction()->getNextNode();
15023c8a4c6fSJohannes Doerfert assert(I && "Expected non-null start instruction");
15033c8a4c6fSJohannes Doerfert Instruction *E = (EndBI->isImplicitExit())
15043c8a4c6fSJohannes Doerfert ? I->getParent()->getTerminator()
15053c8a4c6fSJohannes Doerfert : EndBI->getInstruction();
15063c8a4c6fSJohannes Doerfert assert(E && "Expected non-null end instruction");
15073c8a4c6fSJohannes Doerfert
15083c8a4c6fSJohannes Doerfert for (; I != E; I = I->getNextNode()) {
15093c8a4c6fSJohannes Doerfert if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
15103c8a4c6fSJohannes Doerfert continue;
15113c8a4c6fSJohannes Doerfert
15123c8a4c6fSJohannes Doerfert auto IsPotentiallyAffectedByBarrier =
15133c8a4c6fSJohannes Doerfert [](Optional<MemoryLocation> Loc) {
15143c8a4c6fSJohannes Doerfert const Value *Obj = (Loc && Loc->Ptr)
15153c8a4c6fSJohannes Doerfert ? getUnderlyingObject(Loc->Ptr)
15163c8a4c6fSJohannes Doerfert : nullptr;
15173c8a4c6fSJohannes Doerfert if (!Obj) {
15183c8a4c6fSJohannes Doerfert LLVM_DEBUG(
15193c8a4c6fSJohannes Doerfert dbgs()
15203c8a4c6fSJohannes Doerfert << "Access to unknown location requires barriers\n");
15213c8a4c6fSJohannes Doerfert return true;
15223c8a4c6fSJohannes Doerfert }
15233c8a4c6fSJohannes Doerfert if (isa<UndefValue>(Obj))
15243c8a4c6fSJohannes Doerfert return false;
15253c8a4c6fSJohannes Doerfert if (isa<AllocaInst>(Obj))
15263c8a4c6fSJohannes Doerfert return false;
15273c8a4c6fSJohannes Doerfert if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
15283c8a4c6fSJohannes Doerfert if (GV->isConstant())
15293c8a4c6fSJohannes Doerfert return false;
15303c8a4c6fSJohannes Doerfert if (GV->isThreadLocal())
15313c8a4c6fSJohannes Doerfert return false;
15323c8a4c6fSJohannes Doerfert if (GV->getAddressSpace() == (int)AddressSpace::Local)
15333c8a4c6fSJohannes Doerfert return false;
15343c8a4c6fSJohannes Doerfert if (GV->getAddressSpace() == (int)AddressSpace::Constant)
15353c8a4c6fSJohannes Doerfert return false;
15363c8a4c6fSJohannes Doerfert }
15373c8a4c6fSJohannes Doerfert LLVM_DEBUG(dbgs() << "Access to '" << *Obj
15383c8a4c6fSJohannes Doerfert << "' requires barriers\n");
15393c8a4c6fSJohannes Doerfert return true;
15403c8a4c6fSJohannes Doerfert };
15413c8a4c6fSJohannes Doerfert
15423c8a4c6fSJohannes Doerfert if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
15433c8a4c6fSJohannes Doerfert Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
15443c8a4c6fSJohannes Doerfert if (IsPotentiallyAffectedByBarrier(Loc))
15453c8a4c6fSJohannes Doerfert return false;
15463c8a4c6fSJohannes Doerfert if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
15473c8a4c6fSJohannes Doerfert Optional<MemoryLocation> Loc =
15483c8a4c6fSJohannes Doerfert MemoryLocation::getForSource(MTI);
15493c8a4c6fSJohannes Doerfert if (IsPotentiallyAffectedByBarrier(Loc))
15503c8a4c6fSJohannes Doerfert return false;
15513c8a4c6fSJohannes Doerfert }
15523c8a4c6fSJohannes Doerfert continue;
15533c8a4c6fSJohannes Doerfert }
15543c8a4c6fSJohannes Doerfert
15553c8a4c6fSJohannes Doerfert if (auto *LI = dyn_cast<LoadInst>(I))
15563c8a4c6fSJohannes Doerfert if (LI->hasMetadata(LLVMContext::MD_invariant_load))
15573c8a4c6fSJohannes Doerfert continue;
15583c8a4c6fSJohannes Doerfert
15593c8a4c6fSJohannes Doerfert Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
15603c8a4c6fSJohannes Doerfert if (IsPotentiallyAffectedByBarrier(Loc))
15613c8a4c6fSJohannes Doerfert return false;
15623c8a4c6fSJohannes Doerfert }
15633c8a4c6fSJohannes Doerfert
15643c8a4c6fSJohannes Doerfert return true;
15653c8a4c6fSJohannes Doerfert };
15663c8a4c6fSJohannes Doerfert
15673c8a4c6fSJohannes Doerfert // Iterate barrier pairs and remove an explicit barrier if analysis
15683c8a4c6fSJohannes Doerfert // deems it removeable.
15693c8a4c6fSJohannes Doerfert for (auto *It = BarriersInBlock.begin(),
15703c8a4c6fSJohannes Doerfert *End = BarriersInBlock.end() - 1;
15713c8a4c6fSJohannes Doerfert It != End; ++It) {
15723c8a4c6fSJohannes Doerfert
15733c8a4c6fSJohannes Doerfert BarrierInfo *StartBI = It;
15743c8a4c6fSJohannes Doerfert BarrierInfo *EndBI = (It + 1);
15753c8a4c6fSJohannes Doerfert
15763c8a4c6fSJohannes Doerfert // Cannot remove when both are implicit barriers, continue.
15773c8a4c6fSJohannes Doerfert if (StartBI->isImplicit() && EndBI->isImplicit())
15783c8a4c6fSJohannes Doerfert continue;
15793c8a4c6fSJohannes Doerfert
15803c8a4c6fSJohannes Doerfert if (!IsBarrierRemoveable(StartBI, EndBI))
15813c8a4c6fSJohannes Doerfert continue;
15823c8a4c6fSJohannes Doerfert
15833c8a4c6fSJohannes Doerfert assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
15843c8a4c6fSJohannes Doerfert "Expected at least one explicit barrier to remove.");
15853c8a4c6fSJohannes Doerfert
15863c8a4c6fSJohannes Doerfert // Remove an explicit barrier, check first, then second.
15873c8a4c6fSJohannes Doerfert if (!StartBI->isImplicit()) {
15883c8a4c6fSJohannes Doerfert LLVM_DEBUG(dbgs() << "Remove start barrier "
15893c8a4c6fSJohannes Doerfert << *StartBI->getInstruction() << "\n");
15903c8a4c6fSJohannes Doerfert BarriersToBeDeleted.insert(StartBI->getInstruction());
15913c8a4c6fSJohannes Doerfert } else {
15923c8a4c6fSJohannes Doerfert LLVM_DEBUG(dbgs() << "Remove end barrier "
15933c8a4c6fSJohannes Doerfert << *EndBI->getInstruction() << "\n");
15943c8a4c6fSJohannes Doerfert BarriersToBeDeleted.insert(EndBI->getInstruction());
15953c8a4c6fSJohannes Doerfert }
15963c8a4c6fSJohannes Doerfert }
15973c8a4c6fSJohannes Doerfert
15983c8a4c6fSJohannes Doerfert if (BarriersToBeDeleted.empty())
15993c8a4c6fSJohannes Doerfert continue;
16003c8a4c6fSJohannes Doerfert
16013c8a4c6fSJohannes Doerfert Changed = true;
16023c8a4c6fSJohannes Doerfert for (Instruction *I : BarriersToBeDeleted) {
16033c8a4c6fSJohannes Doerfert ++NumBarriersEliminated;
16043c8a4c6fSJohannes Doerfert auto Remark = [&](OptimizationRemark OR) {
16053c8a4c6fSJohannes Doerfert return OR << "Redundant barrier eliminated.";
16063c8a4c6fSJohannes Doerfert };
16073c8a4c6fSJohannes Doerfert
16083c8a4c6fSJohannes Doerfert if (EnableVerboseRemarks)
16093c8a4c6fSJohannes Doerfert emitRemark<OptimizationRemark>(I, "OMP190", Remark);
16103c8a4c6fSJohannes Doerfert I->eraseFromParent();
16113c8a4c6fSJohannes Doerfert }
16123c8a4c6fSJohannes Doerfert }
16133c8a4c6fSJohannes Doerfert }
16143c8a4c6fSJohannes Doerfert
16153c8a4c6fSJohannes Doerfert return Changed;
16163c8a4c6fSJohannes Doerfert }
16173c8a4c6fSJohannes Doerfert
analysisGlobalization__anon23c38c770111::OpenMPOpt1618a2281419SJoseph Huber void analysisGlobalization() {
16196fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
162082453e75SJoseph Huber
162182453e75SJoseph Huber auto CheckGlobalization = [&](Use &U, Function &Decl) {
1622a2281419SJoseph Huber if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
162344feacc7SJoseph Huber auto Remark = [&](OptimizationRemarkMissed ORM) {
162444feacc7SJoseph Huber return ORM
1625a2281419SJoseph Huber << "Found thread data sharing on the GPU. "
1626a2281419SJoseph Huber << "Expect degraded performance due to data globalization.";
1627a2281419SJoseph Huber };
16282c31d5ebSJoseph Huber emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1629a2281419SJoseph Huber }
1630a2281419SJoseph Huber
1631a2281419SJoseph Huber return false;
1632a2281419SJoseph Huber };
1633a2281419SJoseph Huber
163482453e75SJoseph Huber RFI.foreachUse(SCC, CheckGlobalization);
163582453e75SJoseph Huber }
1636a2281419SJoseph Huber
16378931add6SHamilton Tobon Mosquera /// Maps the values stored in the offload arrays passed as arguments to
16388931add6SHamilton Tobon Mosquera /// \p RuntimeCall into the offload arrays in \p OAs.
getValuesInOffloadArrays__anon23c38c770111::OpenMPOpt16398931add6SHamilton Tobon Mosquera bool getValuesInOffloadArrays(CallInst &RuntimeCall,
16408931add6SHamilton Tobon Mosquera MutableArrayRef<OffloadArray> OAs) {
16418931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "Need space for three offload arrays!");
16428931add6SHamilton Tobon Mosquera
16438931add6SHamilton Tobon Mosquera // A runtime call that involves memory offloading looks something like:
16448931add6SHamilton Tobon Mosquera // call void @__tgt_target_data_begin_mapper(arg0, arg1,
16458931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
16468931add6SHamilton Tobon Mosquera // ...)
16478931add6SHamilton Tobon Mosquera // So, the idea is to access the allocas that allocate space for these
16488931add6SHamilton Tobon Mosquera // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
16498931add6SHamilton Tobon Mosquera // Therefore:
16508931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs.
16511d3d9b9cSHamilton Tobon Mosquera Value *BasePtrsArg =
16521d3d9b9cSHamilton Tobon Mosquera RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
16538931add6SHamilton Tobon Mosquera // i8** %offload_ptrs.
16541d3d9b9cSHamilton Tobon Mosquera Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
16558931add6SHamilton Tobon Mosquera // i8** %offload_sizes.
16561d3d9b9cSHamilton Tobon Mosquera Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
16578931add6SHamilton Tobon Mosquera
16588931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs.
16598931add6SHamilton Tobon Mosquera auto *V = getUnderlyingObject(BasePtrsArg);
16608931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V))
16618931add6SHamilton Tobon Mosquera return false;
16628931add6SHamilton Tobon Mosquera auto *BasePtrsArray = cast<AllocaInst>(V);
16638931add6SHamilton Tobon Mosquera if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
16648931add6SHamilton Tobon Mosquera return false;
16658931add6SHamilton Tobon Mosquera
16668931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs.
16678931add6SHamilton Tobon Mosquera V = getUnderlyingObject(PtrsArg);
16688931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V))
16698931add6SHamilton Tobon Mosquera return false;
16708931add6SHamilton Tobon Mosquera auto *PtrsArray = cast<AllocaInst>(V);
16718931add6SHamilton Tobon Mosquera if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
16728931add6SHamilton Tobon Mosquera return false;
16738931add6SHamilton Tobon Mosquera
16748931add6SHamilton Tobon Mosquera // Get values stored in **offload_sizes.
16758931add6SHamilton Tobon Mosquera V = getUnderlyingObject(SizesArg);
16768931add6SHamilton Tobon Mosquera // If it's a [constant] global array don't analyze it.
16778931add6SHamilton Tobon Mosquera if (isa<GlobalValue>(V))
16788931add6SHamilton Tobon Mosquera return isa<Constant>(V);
16798931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V))
16808931add6SHamilton Tobon Mosquera return false;
16818931add6SHamilton Tobon Mosquera
16828931add6SHamilton Tobon Mosquera auto *SizesArray = cast<AllocaInst>(V);
16838931add6SHamilton Tobon Mosquera if (!OAs[2].initialize(*SizesArray, RuntimeCall))
16848931add6SHamilton Tobon Mosquera return false;
16858931add6SHamilton Tobon Mosquera
16868931add6SHamilton Tobon Mosquera return true;
16878931add6SHamilton Tobon Mosquera }
16888931add6SHamilton Tobon Mosquera
16898931add6SHamilton Tobon Mosquera /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
16908931add6SHamilton Tobon Mosquera /// For now this is a way to test that the function getValuesInOffloadArrays
16918931add6SHamilton Tobon Mosquera /// is working properly.
16928931add6SHamilton Tobon Mosquera /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
dumpValuesInOffloadArrays__anon23c38c770111::OpenMPOpt16938931add6SHamilton Tobon Mosquera void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
16948931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "There are three offload arrays to debug!");
16958931add6SHamilton Tobon Mosquera
16968931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
16978931add6SHamilton Tobon Mosquera std::string ValuesStr;
16988931add6SHamilton Tobon Mosquera raw_string_ostream Printer(ValuesStr);
16998931add6SHamilton Tobon Mosquera std::string Separator = " --- ";
17008931add6SHamilton Tobon Mosquera
17018931add6SHamilton Tobon Mosquera for (auto *BP : OAs[0].StoredValues) {
17028931add6SHamilton Tobon Mosquera BP->print(Printer);
17038931add6SHamilton Tobon Mosquera Printer << Separator;
17048931add6SHamilton Tobon Mosquera }
17058931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
17068931add6SHamilton Tobon Mosquera ValuesStr.clear();
17078931add6SHamilton Tobon Mosquera
17088931add6SHamilton Tobon Mosquera for (auto *P : OAs[1].StoredValues) {
17098931add6SHamilton Tobon Mosquera P->print(Printer);
17108931add6SHamilton Tobon Mosquera Printer << Separator;
17118931add6SHamilton Tobon Mosquera }
17128931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
17138931add6SHamilton Tobon Mosquera ValuesStr.clear();
17148931add6SHamilton Tobon Mosquera
17158931add6SHamilton Tobon Mosquera for (auto *S : OAs[2].StoredValues) {
17168931add6SHamilton Tobon Mosquera S->print(Printer);
17178931add6SHamilton Tobon Mosquera Printer << Separator;
17188931add6SHamilton Tobon Mosquera }
17198931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
17208931add6SHamilton Tobon Mosquera }
17218931add6SHamilton Tobon Mosquera
1722bd2fa181SHamilton Tobon Mosquera /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1723bd2fa181SHamilton Tobon Mosquera /// moved. Returns nullptr if the movement is not possible, or not worth it.
canBeMovedDownwards__anon23c38c770111::OpenMPOpt1724bd2fa181SHamilton Tobon Mosquera Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1725bd2fa181SHamilton Tobon Mosquera // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1726bd2fa181SHamilton Tobon Mosquera // Make it traverse the CFG.
1727bd2fa181SHamilton Tobon Mosquera
1728bd2fa181SHamilton Tobon Mosquera Instruction *CurrentI = &RuntimeCall;
1729bd2fa181SHamilton Tobon Mosquera bool IsWorthIt = false;
1730bd2fa181SHamilton Tobon Mosquera while ((CurrentI = CurrentI->getNextNode())) {
1731bd2fa181SHamilton Tobon Mosquera
1732bd2fa181SHamilton Tobon Mosquera // TODO: Once we detect the regions to be offloaded we should use the
1733bd2fa181SHamilton Tobon Mosquera // alias analysis manager to check if CurrentI may modify one of
1734bd2fa181SHamilton Tobon Mosquera // the offloaded regions.
1735bd2fa181SHamilton Tobon Mosquera if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1736bd2fa181SHamilton Tobon Mosquera if (IsWorthIt)
1737bd2fa181SHamilton Tobon Mosquera return CurrentI;
1738bd2fa181SHamilton Tobon Mosquera
1739bd2fa181SHamilton Tobon Mosquera return nullptr;
1740bd2fa181SHamilton Tobon Mosquera }
1741bd2fa181SHamilton Tobon Mosquera
1742bd2fa181SHamilton Tobon Mosquera // FIXME: For now if we move it over anything without side effect
1743bd2fa181SHamilton Tobon Mosquera // is worth it.
1744bd2fa181SHamilton Tobon Mosquera IsWorthIt = true;
1745bd2fa181SHamilton Tobon Mosquera }
1746bd2fa181SHamilton Tobon Mosquera
1747bd2fa181SHamilton Tobon Mosquera // Return end of BasicBlock.
1748bd2fa181SHamilton Tobon Mosquera return RuntimeCall.getParent()->getTerminator();
1749bd2fa181SHamilton Tobon Mosquera }
1750bd2fa181SHamilton Tobon Mosquera
1751496f8e5bSHamilton Tobon Mosquera /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
splitTargetDataBeginRTC__anon23c38c770111::OpenMPOpt1752bd2fa181SHamilton Tobon Mosquera bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1753bd2fa181SHamilton Tobon Mosquera Instruction &WaitMovementPoint) {
1754bd31abc1SHamilton Tobon Mosquera // Create stack allocated handle (__tgt_async_info) at the beginning of the
1755bd31abc1SHamilton Tobon Mosquera // function. Used for storing information of the async transfer, allowing to
1756bd31abc1SHamilton Tobon Mosquera // wait on it later.
1757496f8e5bSHamilton Tobon Mosquera auto &IRBuilder = OMPInfoCache.OMPBuilder;
1758bd31abc1SHamilton Tobon Mosquera auto *F = RuntimeCall.getCaller();
1759bd31abc1SHamilton Tobon Mosquera Instruction *FirstInst = &(F->getEntryBlock().front());
1760bd31abc1SHamilton Tobon Mosquera AllocaInst *Handle = new AllocaInst(
1761bd31abc1SHamilton Tobon Mosquera IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1762bd31abc1SHamilton Tobon Mosquera
1763496f8e5bSHamilton Tobon Mosquera // Add "issue" runtime call declaration:
1764496f8e5bSHamilton Tobon Mosquera // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1765496f8e5bSHamilton Tobon Mosquera // i8**, i8**, i64*, i64*)
1766496f8e5bSHamilton Tobon Mosquera FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1767496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_issue);
1768496f8e5bSHamilton Tobon Mosquera
1769496f8e5bSHamilton Tobon Mosquera // Change RuntimeCall call site for its asynchronous version.
177097e55cfeSJoseph Huber SmallVector<Value *, 16> Args;
1771bd2fa181SHamilton Tobon Mosquera for (auto &Arg : RuntimeCall.args())
1772496f8e5bSHamilton Tobon Mosquera Args.push_back(Arg.get());
1773bd31abc1SHamilton Tobon Mosquera Args.push_back(Handle);
1774496f8e5bSHamilton Tobon Mosquera
1775496f8e5bSHamilton Tobon Mosquera CallInst *IssueCallsite =
1776bd31abc1SHamilton Tobon Mosquera CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
177706cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
1778bd2fa181SHamilton Tobon Mosquera RuntimeCall.eraseFromParent();
1779496f8e5bSHamilton Tobon Mosquera
1780496f8e5bSHamilton Tobon Mosquera // Add "wait" runtime call declaration:
1781496f8e5bSHamilton Tobon Mosquera // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1782496f8e5bSHamilton Tobon Mosquera FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1783496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_wait);
1784496f8e5bSHamilton Tobon Mosquera
1785496f8e5bSHamilton Tobon Mosquera Value *WaitParams[2] = {
1786da8bec47SJoseph Huber IssueCallsite->getArgOperand(
1787da8bec47SJoseph Huber OffloadArray::DeviceIDArgNum), // device_id.
1788bd31abc1SHamilton Tobon Mosquera Handle // handle to wait on.
1789496f8e5bSHamilton Tobon Mosquera };
179006cfdd52SJoseph Huber CallInst *WaitCallsite = CallInst::Create(
179106cfdd52SJoseph Huber WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
179206cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
1793496f8e5bSHamilton Tobon Mosquera
1794496f8e5bSHamilton Tobon Mosquera return true;
1795496f8e5bSHamilton Tobon Mosquera }
1796496f8e5bSHamilton Tobon Mosquera
combinedIdentStruct__anon23c38c770111::OpenMPOpt1797dc3b5b00SJohannes Doerfert static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1798dc3b5b00SJohannes Doerfert bool GlobalOnly, bool &SingleChoice) {
1799dc3b5b00SJohannes Doerfert if (CurrentIdent == NextIdent)
1800dc3b5b00SJohannes Doerfert return CurrentIdent;
1801dc3b5b00SJohannes Doerfert
1802396b7253SJohannes Doerfert // TODO: Figure out how to actually combine multiple debug locations. For
1803dc3b5b00SJohannes Doerfert // now we just keep an existing one if there is a single choice.
1804dc3b5b00SJohannes Doerfert if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1805dc3b5b00SJohannes Doerfert SingleChoice = !CurrentIdent;
1806dc3b5b00SJohannes Doerfert return NextIdent;
1807dc3b5b00SJohannes Doerfert }
1808396b7253SJohannes Doerfert return nullptr;
1809396b7253SJohannes Doerfert }
1810396b7253SJohannes Doerfert
1811396b7253SJohannes Doerfert /// Return an `struct ident_t*` value that represents the ones used in the
1812396b7253SJohannes Doerfert /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1813396b7253SJohannes Doerfert /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1814396b7253SJohannes Doerfert /// return value we create one from scratch. We also do not yet combine
1815396b7253SJohannes Doerfert /// information, e.g., the source locations, see combinedIdentStruct.
18167cfd267cSsstefan1 Value *
getCombinedIdentFromCallUsesIn__anon23c38c770111::OpenMPOpt18177cfd267cSsstefan1 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
18187cfd267cSsstefan1 Function &F, bool GlobalOnly) {
1819dc3b5b00SJohannes Doerfert bool SingleChoice = true;
1820396b7253SJohannes Doerfert Value *Ident = nullptr;
1821396b7253SJohannes Doerfert auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1822396b7253SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI);
1823396b7253SJohannes Doerfert if (!CI || &F != &Caller)
1824396b7253SJohannes Doerfert return false;
1825396b7253SJohannes Doerfert Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1826dc3b5b00SJohannes Doerfert /* GlobalOnly */ true, SingleChoice);
1827396b7253SJohannes Doerfert return false;
1828396b7253SJohannes Doerfert };
1829624d34afSJohannes Doerfert RFI.foreachUse(SCC, CombineIdentStruct);
1830396b7253SJohannes Doerfert
1831dc3b5b00SJohannes Doerfert if (!Ident || !SingleChoice) {
1832396b7253SJohannes Doerfert // The IRBuilder uses the insertion block to get to the module, this is
1833396b7253SJohannes Doerfert // unfortunate but we work around it for now.
18347cfd267cSsstefan1 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
18357cfd267cSsstefan1 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1836396b7253SJohannes Doerfert &F.getEntryBlock(), F.getEntryBlock().begin()));
1837396b7253SJohannes Doerfert // Create a fallback location if non was found.
1838396b7253SJohannes Doerfert // TODO: Use the debug locations of the calls instead.
1839944aa042SJohannes Doerfert uint32_t SrcLocStrSize;
1840944aa042SJohannes Doerfert Constant *Loc =
1841944aa042SJohannes Doerfert OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1842944aa042SJohannes Doerfert Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
1843396b7253SJohannes Doerfert }
1844396b7253SJohannes Doerfert return Ident;
1845396b7253SJohannes Doerfert }
1846396b7253SJohannes Doerfert
1847b726c557SJohannes Doerfert /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
18489548b74aSJohannes Doerfert /// \p ReplVal if given.
deduplicateRuntimeCalls__anon23c38c770111::OpenMPOpt18497cfd267cSsstefan1 bool deduplicateRuntimeCalls(Function &F,
18507cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI,
18519548b74aSJohannes Doerfert Value *ReplVal = nullptr) {
18528855fec3SJohannes Doerfert auto *UV = RFI.getUseVector(F);
18538855fec3SJohannes Doerfert if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1854b1fbf438SRoman Lebedev return false;
1855b1fbf438SRoman Lebedev
18567cfd267cSsstefan1 LLVM_DEBUG(
18577cfd267cSsstefan1 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
18587cfd267cSsstefan1 << (ReplVal ? " with an existing value\n" : "\n") << "\n");
18597cfd267cSsstefan1
1860ab3da5ddSMichael Liao assert((!ReplVal || (isa<Argument>(ReplVal) &&
1861ab3da5ddSMichael Liao cast<Argument>(ReplVal)->getParent() == &F)) &&
18629548b74aSJohannes Doerfert "Unexpected replacement value!");
1863396b7253SJohannes Doerfert
1864396b7253SJohannes Doerfert // TODO: Use dominance to find a good position instead.
18656aab27baSsstefan1 auto CanBeMoved = [this](CallBase &CB) {
18664f0225f6SKazu Hirata unsigned NumArgs = CB.arg_size();
1867396b7253SJohannes Doerfert if (NumArgs == 0)
1868396b7253SJohannes Doerfert return true;
18696aab27baSsstefan1 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1870396b7253SJohannes Doerfert return false;
1871c11ebfeaSJoseph Huber for (unsigned U = 1; U < NumArgs; ++U)
1872c11ebfeaSJoseph Huber if (isa<Instruction>(CB.getArgOperand(U)))
1873396b7253SJohannes Doerfert return false;
1874396b7253SJohannes Doerfert return true;
1875396b7253SJohannes Doerfert };
1876396b7253SJohannes Doerfert
18779548b74aSJohannes Doerfert if (!ReplVal) {
18788855fec3SJohannes Doerfert for (Use *U : *UV)
18799548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1880396b7253SJohannes Doerfert if (!CanBeMoved(*CI))
1881396b7253SJohannes Doerfert continue;
18824d4ea9acSHuber, Joseph
1883f97de4cbSGiorgis Georgakoudis // If the function is a kernel, dedup will move
1884f97de4cbSGiorgis Georgakoudis // the runtime call right after the kernel init callsite. Otherwise,
1885f97de4cbSGiorgis Georgakoudis // it will move it to the beginning of the caller function.
1886f97de4cbSGiorgis Georgakoudis if (isKernel(F)) {
1887f97de4cbSGiorgis Georgakoudis auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
1888f97de4cbSGiorgis Georgakoudis auto *KernelInitUV = KernelInitRFI.getUseVector(F);
1889f97de4cbSGiorgis Georgakoudis
1890f97de4cbSGiorgis Georgakoudis if (KernelInitUV->empty())
1891f97de4cbSGiorgis Georgakoudis continue;
1892f97de4cbSGiorgis Georgakoudis
1893f97de4cbSGiorgis Georgakoudis assert(KernelInitUV->size() == 1 &&
1894f97de4cbSGiorgis Georgakoudis "Expected a single __kmpc_target_init in kernel\n");
1895f97de4cbSGiorgis Georgakoudis
1896f97de4cbSGiorgis Georgakoudis CallInst *KernelInitCI =
1897f97de4cbSGiorgis Georgakoudis getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
1898f97de4cbSGiorgis Georgakoudis assert(KernelInitCI &&
1899f97de4cbSGiorgis Georgakoudis "Expected a call to __kmpc_target_init in kernel\n");
1900f97de4cbSGiorgis Georgakoudis
1901f97de4cbSGiorgis Georgakoudis CI->moveAfter(KernelInitCI);
1902f97de4cbSGiorgis Georgakoudis } else
19039548b74aSJohannes Doerfert CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
19049548b74aSJohannes Doerfert ReplVal = CI;
19059548b74aSJohannes Doerfert break;
19069548b74aSJohannes Doerfert }
19079548b74aSJohannes Doerfert if (!ReplVal)
19089548b74aSJohannes Doerfert return false;
19099548b74aSJohannes Doerfert }
19109548b74aSJohannes Doerfert
1911396b7253SJohannes Doerfert // If we use a call as a replacement value we need to make sure the ident is
1912396b7253SJohannes Doerfert // valid at the new location. For now we just pick a global one, either
1913396b7253SJohannes Doerfert // existing and used by one of the calls, or created from scratch.
1914396b7253SJohannes Doerfert if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
191592c9ff6dSKazu Hirata if (!CI->arg_empty() &&
19166aab27baSsstefan1 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1917396b7253SJohannes Doerfert Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1918396b7253SJohannes Doerfert /* GlobalOnly */ true);
1919396b7253SJohannes Doerfert CI->setArgOperand(0, Ident);
1920396b7253SJohannes Doerfert }
1921396b7253SJohannes Doerfert }
1922396b7253SJohannes Doerfert
19239548b74aSJohannes Doerfert bool Changed = false;
19249548b74aSJohannes Doerfert auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
19259548b74aSJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI);
19269548b74aSJohannes Doerfert if (!CI || CI == ReplVal || &F != &Caller)
19279548b74aSJohannes Doerfert return false;
19289548b74aSJohannes Doerfert assert(CI->getCaller() == &F && "Unexpected call!");
19294d4ea9acSHuber, Joseph
19304d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) {
19314d4ea9acSHuber, Joseph return OR << "OpenMP runtime call "
1932eef6601bSJoseph Huber << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
19334d4ea9acSHuber, Joseph };
1934eef6601bSJoseph Huber if (CI->getDebugLoc())
19352c31d5ebSJoseph Huber emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1936eef6601bSJoseph Huber else
19372c31d5ebSJoseph Huber emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
19384d4ea9acSHuber, Joseph
19399548b74aSJohannes Doerfert CGUpdater.removeCallSite(*CI);
19409548b74aSJohannes Doerfert CI->replaceAllUsesWith(ReplVal);
19419548b74aSJohannes Doerfert CI->eraseFromParent();
19429548b74aSJohannes Doerfert ++NumOpenMPRuntimeCallsDeduplicated;
19439548b74aSJohannes Doerfert Changed = true;
19449548b74aSJohannes Doerfert return true;
19459548b74aSJohannes Doerfert };
1946624d34afSJohannes Doerfert RFI.foreachUse(SCC, ReplaceAndDeleteCB);
19479548b74aSJohannes Doerfert
19489548b74aSJohannes Doerfert return Changed;
19499548b74aSJohannes Doerfert }
19509548b74aSJohannes Doerfert
19519548b74aSJohannes Doerfert /// Collect arguments that represent the global thread id in \p GTIdArgs.
collectGlobalThreadIdArguments__anon23c38c770111::OpenMPOpt19529548b74aSJohannes Doerfert void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) {
19539548b74aSJohannes Doerfert // TODO: Below we basically perform a fixpoint iteration with a pessimistic
19549548b74aSJohannes Doerfert // initialization. We could define an AbstractAttribute instead and
19559548b74aSJohannes Doerfert // run the Attributor here once it can be run as an SCC pass.
19569548b74aSJohannes Doerfert
19579548b74aSJohannes Doerfert // Helper to check the argument \p ArgNo at all call sites of \p F for
19589548b74aSJohannes Doerfert // a GTId.
19599548b74aSJohannes Doerfert auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
19609548b74aSJohannes Doerfert if (!F.hasLocalLinkage())
19619548b74aSJohannes Doerfert return false;
19629548b74aSJohannes Doerfert for (Use &U : F.uses()) {
19639548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U)) {
19649548b74aSJohannes Doerfert Value *ArgOp = CI->getArgOperand(ArgNo);
19659548b74aSJohannes Doerfert if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
19667cfd267cSsstefan1 getCallIfRegularCall(
19677cfd267cSsstefan1 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
19689548b74aSJohannes Doerfert continue;
19699548b74aSJohannes Doerfert }
19709548b74aSJohannes Doerfert return false;
19719548b74aSJohannes Doerfert }
19729548b74aSJohannes Doerfert return true;
19739548b74aSJohannes Doerfert };
19749548b74aSJohannes Doerfert
19759548b74aSJohannes Doerfert // Helper to identify uses of a GTId as GTId arguments.
19769548b74aSJohannes Doerfert auto AddUserArgs = [&](Value >Id) {
19779548b74aSJohannes Doerfert for (Use &U : GTId.uses())
19789548b74aSJohannes Doerfert if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
19799548b74aSJohannes Doerfert if (CI->isArgOperand(&U))
19809548b74aSJohannes Doerfert if (Function *Callee = CI->getCalledFunction())
19819548b74aSJohannes Doerfert if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
19829548b74aSJohannes Doerfert GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
19839548b74aSJohannes Doerfert };
19849548b74aSJohannes Doerfert
19859548b74aSJohannes Doerfert // The argument users of __kmpc_global_thread_num calls are GTIds.
19867cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
19877cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
19887cfd267cSsstefan1
1989624d34afSJohannes Doerfert GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
19908855fec3SJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
19919548b74aSJohannes Doerfert AddUserArgs(*CI);
19928855fec3SJohannes Doerfert return false;
19938855fec3SJohannes Doerfert });
19949548b74aSJohannes Doerfert
19959548b74aSJohannes Doerfert // Transitively search for more arguments by looking at the users of the
19969548b74aSJohannes Doerfert // ones we know already. During the search the GTIdArgs vector is extended
19979548b74aSJohannes Doerfert // so we cannot cache the size nor can we use a range based for.
1998c11ebfeaSJoseph Huber for (unsigned U = 0; U < GTIdArgs.size(); ++U)
1999c11ebfeaSJoseph Huber AddUserArgs(*GTIdArgs[U]);
20009548b74aSJohannes Doerfert }
20019548b74aSJohannes Doerfert
20025b0581aeSJohannes Doerfert /// Kernel (=GPU) optimizations and utility functions
20035b0581aeSJohannes Doerfert ///
20045b0581aeSJohannes Doerfert ///{{
20055b0581aeSJohannes Doerfert
20065b0581aeSJohannes Doerfert /// Check if \p F is a kernel, hence entry point for target offloading.
isKernel__anon23c38c770111::OpenMPOpt20075b0581aeSJohannes Doerfert bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
20085b0581aeSJohannes Doerfert
20095b0581aeSJohannes Doerfert /// Cache to remember the unique kernel for a function.
20105b0581aeSJohannes Doerfert DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
20115b0581aeSJohannes Doerfert
20125b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p F, if any.
20135b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Function &F);
20145b0581aeSJohannes Doerfert
20155b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p I, if any.
getUniqueKernelFor__anon23c38c770111::OpenMPOpt20165b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Instruction &I) {
20175b0581aeSJohannes Doerfert return getUniqueKernelFor(*I.getFunction());
20185b0581aeSJohannes Doerfert }
20195b0581aeSJohannes Doerfert
20205b0581aeSJohannes Doerfert /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
20215b0581aeSJohannes Doerfert /// the cases we can avoid taking the address of a function.
20225b0581aeSJohannes Doerfert bool rewriteDeviceCodeStateMachine();
20235b0581aeSJohannes Doerfert
20245b0581aeSJohannes Doerfert ///
20255b0581aeSJohannes Doerfert ///}}
20265b0581aeSJohannes Doerfert
20274d4ea9acSHuber, Joseph /// Emit a remark generically
20284d4ea9acSHuber, Joseph ///
20294d4ea9acSHuber, Joseph /// This template function can be used to generically emit a remark. The
20304d4ea9acSHuber, Joseph /// RemarkKind should be one of the following:
20314d4ea9acSHuber, Joseph /// - OptimizationRemark to indicate a successful optimization attempt
20324d4ea9acSHuber, Joseph /// - OptimizationRemarkMissed to report a failed optimization attempt
20334d4ea9acSHuber, Joseph /// - OptimizationRemarkAnalysis to provide additional information about an
20344d4ea9acSHuber, Joseph /// optimization attempt
20354d4ea9acSHuber, Joseph ///
20364d4ea9acSHuber, Joseph /// The remark is built using a callback function provided by the caller that
20374d4ea9acSHuber, Joseph /// takes a RemarkKind as input and returns a RemarkKind.
20382db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon23c38c770111::OpenMPOpt20392db182ffSJoseph Huber void emitRemark(Instruction *I, StringRef RemarkName,
2040e8039ad4SJohannes Doerfert RemarkCallBack &&RemarkCB) const {
20412db182ffSJoseph Huber Function *F = I->getParent()->getParent();
20424d4ea9acSHuber, Joseph auto &ORE = OREGetter(F);
20434d4ea9acSHuber, Joseph
20442c31d5ebSJoseph Huber if (RemarkName.startswith("OMP"))
20452c31d5ebSJoseph Huber ORE.emit([&]() {
20462c31d5ebSJoseph Huber return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
20472c31d5ebSJoseph Huber << " [" << RemarkName << "]";
20482c31d5ebSJoseph Huber });
20492c31d5ebSJoseph Huber else
20502c31d5ebSJoseph Huber ORE.emit(
20512c31d5ebSJoseph Huber [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
20524d4ea9acSHuber, Joseph }
20534d4ea9acSHuber, Joseph
20542db182ffSJoseph Huber /// Emit a remark on a function.
20552db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon23c38c770111::OpenMPOpt20562db182ffSJoseph Huber void emitRemark(Function *F, StringRef RemarkName,
20572db182ffSJoseph Huber RemarkCallBack &&RemarkCB) const {
20580f426935Ssstefan1 auto &ORE = OREGetter(F);
20590f426935Ssstefan1
20602c31d5ebSJoseph Huber if (RemarkName.startswith("OMP"))
20612c31d5ebSJoseph Huber ORE.emit([&]() {
20622c31d5ebSJoseph Huber return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
20632c31d5ebSJoseph Huber << " [" << RemarkName << "]";
20642c31d5ebSJoseph Huber });
20652c31d5ebSJoseph Huber else
20662c31d5ebSJoseph Huber ORE.emit(
20672c31d5ebSJoseph Huber [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
20680f426935Ssstefan1 }
20690f426935Ssstefan1
207058725c12SJoseph Huber /// RAII struct to temporarily change an RTL function's linkage to external.
207158725c12SJoseph Huber /// This prevents it from being mistakenly removed by other optimizations.
207258725c12SJoseph Huber struct ExternalizationRAII {
ExternalizationRAII__anon23c38c770111::OpenMPOpt::ExternalizationRAII207358725c12SJoseph Huber ExternalizationRAII(OMPInformationCache &OMPInfoCache,
207458725c12SJoseph Huber RuntimeFunction RFKind)
2075e757a3b0SJoseph Huber : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
207658725c12SJoseph Huber if (!Declaration)
207758725c12SJoseph Huber return;
207858725c12SJoseph Huber
207958725c12SJoseph Huber LinkageType = Declaration->getLinkage();
208058725c12SJoseph Huber Declaration->setLinkage(GlobalValue::ExternalLinkage);
208158725c12SJoseph Huber }
208258725c12SJoseph Huber
~ExternalizationRAII__anon23c38c770111::OpenMPOpt::ExternalizationRAII208358725c12SJoseph Huber ~ExternalizationRAII() {
208458725c12SJoseph Huber if (!Declaration)
208558725c12SJoseph Huber return;
208658725c12SJoseph Huber
208758725c12SJoseph Huber Declaration->setLinkage(LinkageType);
208858725c12SJoseph Huber }
208958725c12SJoseph Huber
209058725c12SJoseph Huber Function *Declaration;
209158725c12SJoseph Huber GlobalValue::LinkageTypes LinkageType;
209258725c12SJoseph Huber };
209358725c12SJoseph Huber
2094b726c557SJohannes Doerfert /// The underlying module.
20959548b74aSJohannes Doerfert Module &M;
20969548b74aSJohannes Doerfert
20979548b74aSJohannes Doerfert /// The SCC we are operating on.
2098ee17263aSJohannes Doerfert SmallVectorImpl<Function *> &SCC;
20999548b74aSJohannes Doerfert
21009548b74aSJohannes Doerfert /// Callback to update the call graph, the first argument is a removed call,
21019548b74aSJohannes Doerfert /// the second an optional replacement call.
21029548b74aSJohannes Doerfert CallGraphUpdater &CGUpdater;
21039548b74aSJohannes Doerfert
21044d4ea9acSHuber, Joseph /// Callback to get an OptimizationRemarkEmitter from a Function *
21054d4ea9acSHuber, Joseph OptimizationRemarkGetter OREGetter;
21064d4ea9acSHuber, Joseph
21077cfd267cSsstefan1 /// OpenMP-specific information cache. Also Used for Attributor runs.
21087cfd267cSsstefan1 OMPInformationCache &OMPInfoCache;
2109b8235d2bSsstefan1
2110b8235d2bSsstefan1 /// Attributor instance.
2111b8235d2bSsstefan1 Attributor &A;
2112b8235d2bSsstefan1
2113b8235d2bSsstefan1 /// Helper function to run Attributor on SCC.
runAttributor__anon23c38c770111::OpenMPOpt2114d9659bf6SJohannes Doerfert bool runAttributor(bool IsModulePass) {
2115b8235d2bSsstefan1 if (SCC.empty())
2116b8235d2bSsstefan1 return false;
2117b8235d2bSsstefan1
211858725c12SJoseph Huber // Temporarily make these function have external linkage so the Attributor
211958725c12SJoseph Huber // doesn't remove them when we try to look them up later.
212058725c12SJoseph Huber ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
212158725c12SJoseph Huber ExternalizationRAII EndParallel(OMPInfoCache,
212258725c12SJoseph Huber OMPRTL___kmpc_kernel_end_parallel);
212358725c12SJoseph Huber ExternalizationRAII BarrierSPMD(OMPInfoCache,
212458725c12SJoseph Huber OMPRTL___kmpc_barrier_simple_spmd);
212573720c80SJohannes Doerfert ExternalizationRAII BarrierGeneric(OMPInfoCache,
212673720c80SJohannes Doerfert OMPRTL___kmpc_barrier_simple_generic);
21271cf86df8SJoseph Huber ExternalizationRAII ThreadId(OMPInfoCache,
21281cf86df8SJoseph Huber OMPRTL___kmpc_get_hardware_thread_id_in_block);
212974cacf21SJoseph Huber ExternalizationRAII NumThreads(
213074cacf21SJoseph Huber OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
21317986a5f2SJoseph Huber ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
213258725c12SJoseph Huber
2133d9659bf6SJohannes Doerfert registerAAs(IsModulePass);
2134b8235d2bSsstefan1
2135b8235d2bSsstefan1 ChangeStatus Changed = A.run();
2136b8235d2bSsstefan1
2137b8235d2bSsstefan1 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
2138b8235d2bSsstefan1 << " functions, result: " << Changed << ".\n");
2139b8235d2bSsstefan1
2140b8235d2bSsstefan1 return Changed == ChangeStatus::CHANGED;
2141b8235d2bSsstefan1 }
2142b8235d2bSsstefan1
21435ab6aeddSJose M Monsalve Diaz void registerFoldRuntimeCall(RuntimeFunction RF);
21445ab6aeddSJose M Monsalve Diaz
2145b8235d2bSsstefan1 /// Populate the Attributor with abstract attribute opportunities in the
2146b8235d2bSsstefan1 /// function.
2147d9659bf6SJohannes Doerfert void registerAAs(bool IsModulePass);
2148b8235d2bSsstefan1 };
2149b8235d2bSsstefan1
getUniqueKernelFor(Function & F)21505b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
21515b0581aeSJohannes Doerfert if (!OMPInfoCache.ModuleSlice.count(&F))
21525b0581aeSJohannes Doerfert return nullptr;
21535b0581aeSJohannes Doerfert
21545b0581aeSJohannes Doerfert // Use a scope to keep the lifetime of the CachedKernel short.
21555b0581aeSJohannes Doerfert {
21565b0581aeSJohannes Doerfert Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
21575b0581aeSJohannes Doerfert if (CachedKernel)
21585b0581aeSJohannes Doerfert return *CachedKernel;
21595b0581aeSJohannes Doerfert
21605b0581aeSJohannes Doerfert // TODO: We should use an AA to create an (optimistic and callback
21615b0581aeSJohannes Doerfert // call-aware) call graph. For now we stick to simple patterns that
21625b0581aeSJohannes Doerfert // are less powerful, basically the worst fixpoint.
21635b0581aeSJohannes Doerfert if (isKernel(F)) {
21645b0581aeSJohannes Doerfert CachedKernel = Kernel(&F);
21655b0581aeSJohannes Doerfert return *CachedKernel;
21665b0581aeSJohannes Doerfert }
21675b0581aeSJohannes Doerfert
21685b0581aeSJohannes Doerfert CachedKernel = nullptr;
2169994bb6ebSJohannes Doerfert if (!F.hasLocalLinkage()) {
2170994bb6ebSJohannes Doerfert
2171994bb6ebSJohannes Doerfert // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
21722db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2173eef6601bSJoseph Huber return ORA << "Potentially unknown OpenMP target region caller.";
2174994bb6ebSJohannes Doerfert };
21752db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
2176994bb6ebSJohannes Doerfert
21775b0581aeSJohannes Doerfert return nullptr;
21785b0581aeSJohannes Doerfert }
2179994bb6ebSJohannes Doerfert }
21805b0581aeSJohannes Doerfert
21815b0581aeSJohannes Doerfert auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
21825b0581aeSJohannes Doerfert if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
21835b0581aeSJohannes Doerfert // Allow use in equality comparisons.
21845b0581aeSJohannes Doerfert if (Cmp->isEquality())
21855b0581aeSJohannes Doerfert return getUniqueKernelFor(*Cmp);
21865b0581aeSJohannes Doerfert return nullptr;
21875b0581aeSJohannes Doerfert }
21885b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
21895b0581aeSJohannes Doerfert // Allow direct calls.
21905b0581aeSJohannes Doerfert if (CB->isCallee(&U))
21915b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB);
2192a2dbfb6bSGiorgis Georgakoudis
2193a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2194a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2195a2dbfb6bSGiorgis Georgakoudis // Allow the use in __kmpc_parallel_51 calls.
2196a2dbfb6bSGiorgis Georgakoudis if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
21975b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB);
21985b0581aeSJohannes Doerfert return nullptr;
21995b0581aeSJohannes Doerfert }
22005b0581aeSJohannes Doerfert // Disallow every other use.
22015b0581aeSJohannes Doerfert return nullptr;
22025b0581aeSJohannes Doerfert };
22035b0581aeSJohannes Doerfert
22045b0581aeSJohannes Doerfert // TODO: In the future we want to track more than just a unique kernel.
22055b0581aeSJohannes Doerfert SmallPtrSet<Kernel, 2> PotentialKernels;
22068d8ce85bSsstefan1 OMPInformationCache::foreachUse(F, [&](const Use &U) {
22075b0581aeSJohannes Doerfert PotentialKernels.insert(GetUniqueKernelForUse(U));
22085b0581aeSJohannes Doerfert });
22095b0581aeSJohannes Doerfert
22105b0581aeSJohannes Doerfert Kernel K = nullptr;
22115b0581aeSJohannes Doerfert if (PotentialKernels.size() == 1)
22125b0581aeSJohannes Doerfert K = *PotentialKernels.begin();
22135b0581aeSJohannes Doerfert
22145b0581aeSJohannes Doerfert // Cache the result.
22155b0581aeSJohannes Doerfert UniqueKernelMap[&F] = K;
22165b0581aeSJohannes Doerfert
22175b0581aeSJohannes Doerfert return K;
22185b0581aeSJohannes Doerfert }
22195b0581aeSJohannes Doerfert
rewriteDeviceCodeStateMachine()22205b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
2221a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2222a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
22235b0581aeSJohannes Doerfert
22245b0581aeSJohannes Doerfert bool Changed = false;
2225a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelRFI)
22265b0581aeSJohannes Doerfert return Changed;
22275b0581aeSJohannes Doerfert
2228cd0dd8ecSJoseph Huber // If we have disabled state machine changes, exit
2229cd0dd8ecSJoseph Huber if (DisableOpenMPOptStateMachineRewrite)
2230cd0dd8ecSJoseph Huber return Changed;
2231cd0dd8ecSJoseph Huber
22325b0581aeSJohannes Doerfert for (Function *F : SCC) {
22335b0581aeSJohannes Doerfert
2234a2dbfb6bSGiorgis Georgakoudis // Check if the function is a use in a __kmpc_parallel_51 call at
22355b0581aeSJohannes Doerfert // all.
22365b0581aeSJohannes Doerfert bool UnknownUse = false;
2237a2dbfb6bSGiorgis Georgakoudis bool KernelParallelUse = false;
22385b0581aeSJohannes Doerfert unsigned NumDirectCalls = 0;
22395b0581aeSJohannes Doerfert
22405b0581aeSJohannes Doerfert SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
22418d8ce85bSsstefan1 OMPInformationCache::foreachUse(*F, [&](Use &U) {
22425b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser()))
22435b0581aeSJohannes Doerfert if (CB->isCallee(&U)) {
22445b0581aeSJohannes Doerfert ++NumDirectCalls;
22455b0581aeSJohannes Doerfert return;
22465b0581aeSJohannes Doerfert }
22475b0581aeSJohannes Doerfert
224881db6144SMichael Liao if (isa<ICmpInst>(U.getUser())) {
22495b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U);
22505b0581aeSJohannes Doerfert return;
22515b0581aeSJohannes Doerfert }
2252a2dbfb6bSGiorgis Georgakoudis
2253a2dbfb6bSGiorgis Georgakoudis // Find wrapper functions that represent parallel kernels.
2254a2dbfb6bSGiorgis Georgakoudis CallInst *CI =
2255a2dbfb6bSGiorgis Georgakoudis OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
2256a2dbfb6bSGiorgis Georgakoudis const unsigned int WrapperFunctionArgNo = 6;
2257a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse && CI &&
2258a2dbfb6bSGiorgis Georgakoudis CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
2259a2dbfb6bSGiorgis Georgakoudis KernelParallelUse = true;
22605b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U);
22615b0581aeSJohannes Doerfert return;
22625b0581aeSJohannes Doerfert }
22635b0581aeSJohannes Doerfert UnknownUse = true;
22645b0581aeSJohannes Doerfert });
22655b0581aeSJohannes Doerfert
2266a2dbfb6bSGiorgis Georgakoudis // Do not emit a remark if we haven't seen a __kmpc_parallel_51
2267fec1f210SJohannes Doerfert // use.
2268a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse)
22695b0581aeSJohannes Doerfert continue;
22705b0581aeSJohannes Doerfert
2271fec1f210SJohannes Doerfert // If this ever hits, we should investigate.
2272fec1f210SJohannes Doerfert // TODO: Checking the number of uses is not a necessary restriction and
2273fec1f210SJohannes Doerfert // should be lifted.
2274fec1f210SJohannes Doerfert if (UnknownUse || NumDirectCalls != 1 ||
2275d9659bf6SJohannes Doerfert ToBeReplacedStateMachineUses.size() > 2) {
22762db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) {
22772db182ffSJoseph Huber return ORA << "Parallel region is used in "
2278fec1f210SJohannes Doerfert << (UnknownUse ? "unknown" : "unexpected")
2279eef6601bSJoseph Huber << " ways. Will not attempt to rewrite the state machine.";
2280fec1f210SJohannes Doerfert };
22812c31d5ebSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
22825b0581aeSJohannes Doerfert continue;
2283fec1f210SJohannes Doerfert }
22845b0581aeSJohannes Doerfert
2285a2dbfb6bSGiorgis Georgakoudis // Even if we have __kmpc_parallel_51 calls, we (for now) give
22865b0581aeSJohannes Doerfert // up if the function is not called from a unique kernel.
22875b0581aeSJohannes Doerfert Kernel K = getUniqueKernelFor(*F);
2288fec1f210SJohannes Doerfert if (!K) {
22892db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2290eef6601bSJoseph Huber return ORA << "Parallel region is not called from a unique kernel. "
2291eef6601bSJoseph Huber "Will not attempt to rewrite the state machine.";
2292fec1f210SJohannes Doerfert };
22932c31d5ebSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
22945b0581aeSJohannes Doerfert continue;
2295fec1f210SJohannes Doerfert }
22965b0581aeSJohannes Doerfert
22975b0581aeSJohannes Doerfert // We now know F is a parallel body function called only from the kernel K.
22985b0581aeSJohannes Doerfert // We also identified the state machine uses in which we replace the
22995b0581aeSJohannes Doerfert // function pointer by a new global symbol for identification purposes. This
23005b0581aeSJohannes Doerfert // ensures only direct calls to the function are left.
23015b0581aeSJohannes Doerfert
23025b0581aeSJohannes Doerfert Module &M = *F->getParent();
23035b0581aeSJohannes Doerfert Type *Int8Ty = Type::getInt8Ty(M.getContext());
23045b0581aeSJohannes Doerfert
23055b0581aeSJohannes Doerfert auto *ID = new GlobalVariable(
23065b0581aeSJohannes Doerfert M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
23075b0581aeSJohannes Doerfert UndefValue::get(Int8Ty), F->getName() + ".ID");
23085b0581aeSJohannes Doerfert
23095b0581aeSJohannes Doerfert for (Use *U : ToBeReplacedStateMachineUses)
231071052ea1SJon Chesterfield U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
231171052ea1SJon Chesterfield ID, U->get()->getType()));
23125b0581aeSJohannes Doerfert
23135b0581aeSJohannes Doerfert ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
23145b0581aeSJohannes Doerfert
23155b0581aeSJohannes Doerfert Changed = true;
23165b0581aeSJohannes Doerfert }
23175b0581aeSJohannes Doerfert
23185b0581aeSJohannes Doerfert return Changed;
23195b0581aeSJohannes Doerfert }
23205b0581aeSJohannes Doerfert
2321b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values.
2322b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2323b8235d2bSsstefan1 using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker__anon23c38c770111::AAICVTracker2324b8235d2bSsstefan1 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2325b8235d2bSsstefan1
initialize__anon23c38c770111::AAICVTracker23265dfd7cc4Ssstefan1 void initialize(Attributor &A) override {
23275dfd7cc4Ssstefan1 Function *F = getAnchorScope();
23285dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F))
23295dfd7cc4Ssstefan1 indicatePessimisticFixpoint();
23305dfd7cc4Ssstefan1 }
23315dfd7cc4Ssstefan1
2332b8235d2bSsstefan1 /// Returns true if value is assumed to be tracked.
isAssumedTracked__anon23c38c770111::AAICVTracker2333b8235d2bSsstefan1 bool isAssumedTracked() const { return getAssumed(); }
2334b8235d2bSsstefan1
2335b8235d2bSsstefan1 /// Returns true if value is known to be tracked.
isKnownTracked__anon23c38c770111::AAICVTracker2336b8235d2bSsstefan1 bool isKnownTracked() const { return getAssumed(); }
2337b8235d2bSsstefan1
2338b8235d2bSsstefan1 /// Create an abstract attribute biew for the position \p IRP.
2339b8235d2bSsstefan1 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2340b8235d2bSsstefan1
2341b8235d2bSsstefan1 /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon23c38c770111::AAICVTracker23425dfd7cc4Ssstefan1 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
23435dfd7cc4Ssstefan1 const Instruction *I,
23445dfd7cc4Ssstefan1 Attributor &A) const {
23455dfd7cc4Ssstefan1 return None;
23465dfd7cc4Ssstefan1 }
23475dfd7cc4Ssstefan1
23485dfd7cc4Ssstefan1 /// Return an assumed unique ICV value if a single candidate is found. If
23495dfd7cc4Ssstefan1 /// there cannot be one, return a nullptr. If it is not clear yet, return the
23505dfd7cc4Ssstefan1 /// Optional::NoneType.
23515dfd7cc4Ssstefan1 virtual Optional<Value *>
23525dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const = 0;
23535dfd7cc4Ssstefan1
23545dfd7cc4Ssstefan1 // Currently only nthreads is being tracked.
23555dfd7cc4Ssstefan1 // this array will only grow with time.
23565dfd7cc4Ssstefan1 InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2357b8235d2bSsstefan1
2358b8235d2bSsstefan1 /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAICVTracker2359b8235d2bSsstefan1 const std::string getName() const override { return "AAICVTracker"; }
2360b8235d2bSsstefan1
2361233af895SLuofan Chen /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAICVTracker2362233af895SLuofan Chen const char *getIdAddr() const override { return &ID; }
2363233af895SLuofan Chen
2364233af895SLuofan Chen /// This function should return true if the type of the \p AA is AAICVTracker
classof__anon23c38c770111::AAICVTracker2365233af895SLuofan Chen static bool classof(const AbstractAttribute *AA) {
2366233af895SLuofan Chen return (AA->getIdAddr() == &ID);
2367233af895SLuofan Chen }
2368233af895SLuofan Chen
2369b8235d2bSsstefan1 static const char ID;
2370b8235d2bSsstefan1 };
2371b8235d2bSsstefan1
2372b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction__anon23c38c770111::AAICVTrackerFunction2373b8235d2bSsstefan1 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2374b8235d2bSsstefan1 : AAICVTracker(IRP, A) {}
2375b8235d2bSsstefan1
2376b8235d2bSsstefan1 // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerFunction23775dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerFunction"; }
2378b8235d2bSsstefan1
2379b8235d2bSsstefan1 // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerFunction2380b8235d2bSsstefan1 void trackStatistics() const override {}
2381b8235d2bSsstefan1
23825dfd7cc4Ssstefan1 /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerFunction2383b8235d2bSsstefan1 ChangeStatus manifest(Attributor &A) override {
23845dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED;
2385b8235d2bSsstefan1 }
2386b8235d2bSsstefan1
2387b8235d2bSsstefan1 // Map of ICV to their values at specific program point.
23885dfd7cc4Ssstefan1 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2389b8235d2bSsstefan1 InternalControlVar::ICV___last>
23905dfd7cc4Ssstefan1 ICVReplacementValuesMap;
2391b8235d2bSsstefan1
updateImpl__anon23c38c770111::AAICVTrackerFunction2392b8235d2bSsstefan1 ChangeStatus updateImpl(Attributor &A) override {
2393b8235d2bSsstefan1 ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2394b8235d2bSsstefan1
2395b8235d2bSsstefan1 Function *F = getAnchorScope();
2396b8235d2bSsstefan1
2397b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2398b8235d2bSsstefan1
2399b8235d2bSsstefan1 for (InternalControlVar ICV : TrackableICVs) {
2400b8235d2bSsstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2401b8235d2bSsstefan1
24025dfd7cc4Ssstefan1 auto &ValuesMap = ICVReplacementValuesMap[ICV];
2403b8235d2bSsstefan1 auto TrackValues = [&](Use &U, Function &) {
2404b8235d2bSsstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2405b8235d2bSsstefan1 if (!CI)
2406b8235d2bSsstefan1 return false;
2407b8235d2bSsstefan1
2408b8235d2bSsstefan1 // FIXME: handle setters with more that 1 arguments.
2409b8235d2bSsstefan1 /// Track new value.
24105dfd7cc4Ssstefan1 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2411b8235d2bSsstefan1 HasChanged = ChangeStatus::CHANGED;
2412b8235d2bSsstefan1
2413b8235d2bSsstefan1 return false;
2414b8235d2bSsstefan1 };
2415b8235d2bSsstefan1
24165dfd7cc4Ssstefan1 auto CallCheck = [&](Instruction &I) {
2417b4a75598SJohannes Doerfert Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
2418ad7ce1e7SKazu Hirata if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
24195dfd7cc4Ssstefan1 HasChanged = ChangeStatus::CHANGED;
24205dfd7cc4Ssstefan1
24215dfd7cc4Ssstefan1 return true;
24225dfd7cc4Ssstefan1 };
24235dfd7cc4Ssstefan1
24245dfd7cc4Ssstefan1 // Track all changes of an ICV.
2425b8235d2bSsstefan1 SetterRFI.foreachUse(TrackValues, F);
24265dfd7cc4Ssstefan1
2427792aac98SJohannes Doerfert bool UsedAssumedInformation = false;
24285dfd7cc4Ssstefan1 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2429792aac98SJohannes Doerfert UsedAssumedInformation,
24305dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true);
24315dfd7cc4Ssstefan1
24325dfd7cc4Ssstefan1 /// TODO: Figure out a way to avoid adding entry in
24335dfd7cc4Ssstefan1 /// ICVReplacementValuesMap
24345dfd7cc4Ssstefan1 Instruction *Entry = &F->getEntryBlock().front();
24355dfd7cc4Ssstefan1 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
24365dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(Entry, nullptr));
2437b8235d2bSsstefan1 }
2438b8235d2bSsstefan1
2439b8235d2bSsstefan1 return HasChanged;
2440b8235d2bSsstefan1 }
2441b8235d2bSsstefan1
2442b4a75598SJohannes Doerfert /// Helper to check if \p I is a call and get the value for it if it is
24435dfd7cc4Ssstefan1 /// unique.
getValueForCall__anon23c38c770111::AAICVTrackerFunction2444b4a75598SJohannes Doerfert Optional<Value *> getValueForCall(Attributor &A, const Instruction &I,
24455dfd7cc4Ssstefan1 InternalControlVar &ICV) const {
2446b8235d2bSsstefan1
2447b4a75598SJohannes Doerfert const auto *CB = dyn_cast<CallBase>(&I);
2448dcaec812SJohannes Doerfert if (!CB || CB->hasFnAttr("no_openmp") ||
2449dcaec812SJohannes Doerfert CB->hasFnAttr("no_openmp_routines"))
24505dfd7cc4Ssstefan1 return None;
24515dfd7cc4Ssstefan1
2452b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2453b8235d2bSsstefan1 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
24545dfd7cc4Ssstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
24555dfd7cc4Ssstefan1 Function *CalledFunction = CB->getCalledFunction();
2456b8235d2bSsstefan1
24574eef14f9SWei Wang // Indirect call, assume ICV changes.
24584eef14f9SWei Wang if (CalledFunction == nullptr)
24594eef14f9SWei Wang return nullptr;
24605dfd7cc4Ssstefan1 if (CalledFunction == GetterRFI.Declaration)
24615dfd7cc4Ssstefan1 return None;
24625dfd7cc4Ssstefan1 if (CalledFunction == SetterRFI.Declaration) {
2463b4a75598SJohannes Doerfert if (ICVReplacementValuesMap[ICV].count(&I))
2464b4a75598SJohannes Doerfert return ICVReplacementValuesMap[ICV].lookup(&I);
24655dfd7cc4Ssstefan1
24665dfd7cc4Ssstefan1 return nullptr;
24675dfd7cc4Ssstefan1 }
24685dfd7cc4Ssstefan1
24695dfd7cc4Ssstefan1 // Since we don't know, assume it changes the ICV.
24705dfd7cc4Ssstefan1 if (CalledFunction->isDeclaration())
24715dfd7cc4Ssstefan1 return nullptr;
24725dfd7cc4Ssstefan1
24735b70c12fSJohannes Doerfert const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
24745b70c12fSJohannes Doerfert *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
24755dfd7cc4Ssstefan1
2476b4a75598SJohannes Doerfert if (ICVTrackingAA.isAssumedTracked()) {
2477b4a75598SJohannes Doerfert Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
2478481b8f31SJohannes Doerfert if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
2479481b8f31SJohannes Doerfert OMPInfoCache)))
2480b4a75598SJohannes Doerfert return URV;
2481b4a75598SJohannes Doerfert }
24825dfd7cc4Ssstefan1
24835dfd7cc4Ssstefan1 // If we don't know, assume it changes.
24845dfd7cc4Ssstefan1 return nullptr;
24855dfd7cc4Ssstefan1 }
24865dfd7cc4Ssstefan1
24875dfd7cc4Ssstefan1 // We don't check unique value for a function, so return None.
24885dfd7cc4Ssstefan1 Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerFunction24895dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override {
24905dfd7cc4Ssstefan1 return None;
24915dfd7cc4Ssstefan1 }
24925dfd7cc4Ssstefan1
24935dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon23c38c770111::AAICVTrackerFunction24945dfd7cc4Ssstefan1 Optional<Value *> getReplacementValue(InternalControlVar ICV,
24955dfd7cc4Ssstefan1 const Instruction *I,
24965dfd7cc4Ssstefan1 Attributor &A) const override {
24975dfd7cc4Ssstefan1 const auto &ValuesMap = ICVReplacementValuesMap[ICV];
24985dfd7cc4Ssstefan1 if (ValuesMap.count(I))
24995dfd7cc4Ssstefan1 return ValuesMap.lookup(I);
25005dfd7cc4Ssstefan1
25015dfd7cc4Ssstefan1 SmallVector<const Instruction *, 16> Worklist;
25025dfd7cc4Ssstefan1 SmallPtrSet<const Instruction *, 16> Visited;
25035dfd7cc4Ssstefan1 Worklist.push_back(I);
25045dfd7cc4Ssstefan1
25055dfd7cc4Ssstefan1 Optional<Value *> ReplVal;
25065dfd7cc4Ssstefan1
25075dfd7cc4Ssstefan1 while (!Worklist.empty()) {
25085dfd7cc4Ssstefan1 const Instruction *CurrInst = Worklist.pop_back_val();
25095dfd7cc4Ssstefan1 if (!Visited.insert(CurrInst).second)
2510b8235d2bSsstefan1 continue;
2511b8235d2bSsstefan1
25125dfd7cc4Ssstefan1 const BasicBlock *CurrBB = CurrInst->getParent();
25135dfd7cc4Ssstefan1
25145dfd7cc4Ssstefan1 // Go up and look for all potential setters/calls that might change the
25155dfd7cc4Ssstefan1 // ICV.
25165dfd7cc4Ssstefan1 while ((CurrInst = CurrInst->getPrevNode())) {
25175dfd7cc4Ssstefan1 if (ValuesMap.count(CurrInst)) {
25185dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
25195dfd7cc4Ssstefan1 // Unknown value, track new.
2520a7938c74SKazu Hirata if (!ReplVal) {
25215dfd7cc4Ssstefan1 ReplVal = NewReplVal;
25225dfd7cc4Ssstefan1 break;
25235dfd7cc4Ssstefan1 }
25245dfd7cc4Ssstefan1
25255dfd7cc4Ssstefan1 // If we found a new value, we can't know the icv value anymore.
2526a7938c74SKazu Hirata if (NewReplVal)
25275dfd7cc4Ssstefan1 if (ReplVal != NewReplVal)
2528b8235d2bSsstefan1 return nullptr;
2529b8235d2bSsstefan1
25305dfd7cc4Ssstefan1 break;
2531b8235d2bSsstefan1 }
2532b8235d2bSsstefan1
2533b4a75598SJohannes Doerfert Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
2534a7938c74SKazu Hirata if (!NewReplVal)
25355dfd7cc4Ssstefan1 continue;
25365dfd7cc4Ssstefan1
25375dfd7cc4Ssstefan1 // Unknown value, track new.
2538a7938c74SKazu Hirata if (!ReplVal) {
25395dfd7cc4Ssstefan1 ReplVal = NewReplVal;
25405dfd7cc4Ssstefan1 break;
2541b8235d2bSsstefan1 }
2542b8235d2bSsstefan1
25435dfd7cc4Ssstefan1 // if (NewReplVal.hasValue())
25445dfd7cc4Ssstefan1 // We found a new value, we can't know the icv value anymore.
25455dfd7cc4Ssstefan1 if (ReplVal != NewReplVal)
2546b8235d2bSsstefan1 return nullptr;
2547b8235d2bSsstefan1 }
25485dfd7cc4Ssstefan1
25495dfd7cc4Ssstefan1 // If we are in the same BB and we have a value, we are done.
2550e0e687a6SKazu Hirata if (CurrBB == I->getParent() && ReplVal)
25515dfd7cc4Ssstefan1 return ReplVal;
25525dfd7cc4Ssstefan1
25535dfd7cc4Ssstefan1 // Go through all predecessors and add terminators for analysis.
25545dfd7cc4Ssstefan1 for (const BasicBlock *Pred : predecessors(CurrBB))
25555dfd7cc4Ssstefan1 if (const Instruction *Terminator = Pred->getTerminator())
25565dfd7cc4Ssstefan1 Worklist.push_back(Terminator);
25575dfd7cc4Ssstefan1 }
25585dfd7cc4Ssstefan1
25595dfd7cc4Ssstefan1 return ReplVal;
25605dfd7cc4Ssstefan1 }
25615dfd7cc4Ssstefan1 };
25625dfd7cc4Ssstefan1
25635dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned__anon23c38c770111::AAICVTrackerFunctionReturned25645dfd7cc4Ssstefan1 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
25655dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {}
25665dfd7cc4Ssstefan1
25675dfd7cc4Ssstefan1 // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerFunctionReturned25685dfd7cc4Ssstefan1 const std::string getAsStr() const override {
25695dfd7cc4Ssstefan1 return "ICVTrackerFunctionReturned";
25705dfd7cc4Ssstefan1 }
25715dfd7cc4Ssstefan1
25725dfd7cc4Ssstefan1 // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerFunctionReturned25735dfd7cc4Ssstefan1 void trackStatistics() const override {}
25745dfd7cc4Ssstefan1
25755dfd7cc4Ssstefan1 /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerFunctionReturned25765dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override {
25775dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED;
25785dfd7cc4Ssstefan1 }
25795dfd7cc4Ssstefan1
25805dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point.
25815dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar,
25825dfd7cc4Ssstefan1 InternalControlVar::ICV___last>
25835dfd7cc4Ssstefan1 ICVReplacementValuesMap;
25845dfd7cc4Ssstefan1
25855dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV.
25865dfd7cc4Ssstefan1 Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerFunctionReturned25875dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override {
25885dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV];
25895dfd7cc4Ssstefan1 }
25905dfd7cc4Ssstefan1
updateImpl__anon23c38c770111::AAICVTrackerFunctionReturned25915dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override {
25925dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED;
25935dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
25945b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
25955dfd7cc4Ssstefan1
25965dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked())
25975dfd7cc4Ssstefan1 return indicatePessimisticFixpoint();
25985dfd7cc4Ssstefan1
25995dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) {
26005dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
26015dfd7cc4Ssstefan1 Optional<Value *> UniqueICVValue;
26025dfd7cc4Ssstefan1
26035dfd7cc4Ssstefan1 auto CheckReturnInst = [&](Instruction &I) {
26045dfd7cc4Ssstefan1 Optional<Value *> NewReplVal =
26055dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(ICV, &I, A);
26065dfd7cc4Ssstefan1
26075dfd7cc4Ssstefan1 // If we found a second ICV value there is no unique returned value.
2608e0e687a6SKazu Hirata if (UniqueICVValue && UniqueICVValue != NewReplVal)
26095dfd7cc4Ssstefan1 return false;
26105dfd7cc4Ssstefan1
26115dfd7cc4Ssstefan1 UniqueICVValue = NewReplVal;
26125dfd7cc4Ssstefan1
26135dfd7cc4Ssstefan1 return true;
26145dfd7cc4Ssstefan1 };
26155dfd7cc4Ssstefan1
2616792aac98SJohannes Doerfert bool UsedAssumedInformation = false;
26175dfd7cc4Ssstefan1 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2618792aac98SJohannes Doerfert UsedAssumedInformation,
26195dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true))
26205dfd7cc4Ssstefan1 UniqueICVValue = nullptr;
26215dfd7cc4Ssstefan1
26225dfd7cc4Ssstefan1 if (UniqueICVValue == ReplVal)
26235dfd7cc4Ssstefan1 continue;
26245dfd7cc4Ssstefan1
26255dfd7cc4Ssstefan1 ReplVal = UniqueICVValue;
26265dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED;
26275dfd7cc4Ssstefan1 }
26285dfd7cc4Ssstefan1
26295dfd7cc4Ssstefan1 return Changed;
26305dfd7cc4Ssstefan1 }
26315dfd7cc4Ssstefan1 };
26325dfd7cc4Ssstefan1
26335dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite__anon23c38c770111::AAICVTrackerCallSite26345dfd7cc4Ssstefan1 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
26355dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {}
26365dfd7cc4Ssstefan1
initialize__anon23c38c770111::AAICVTrackerCallSite26375dfd7cc4Ssstefan1 void initialize(Attributor &A) override {
26385dfd7cc4Ssstefan1 Function *F = getAnchorScope();
26395dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F))
26405dfd7cc4Ssstefan1 indicatePessimisticFixpoint();
26415dfd7cc4Ssstefan1
26425dfd7cc4Ssstefan1 // We only initialize this AA for getters, so we need to know which ICV it
26435dfd7cc4Ssstefan1 // gets.
26445dfd7cc4Ssstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
26455dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) {
26465dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV];
26475dfd7cc4Ssstefan1 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
26485dfd7cc4Ssstefan1 if (Getter.Declaration == getAssociatedFunction()) {
26495dfd7cc4Ssstefan1 AssociatedICV = ICVInfo.Kind;
26505dfd7cc4Ssstefan1 return;
26515dfd7cc4Ssstefan1 }
26525dfd7cc4Ssstefan1 }
26535dfd7cc4Ssstefan1
26545dfd7cc4Ssstefan1 /// Unknown ICV.
26555dfd7cc4Ssstefan1 indicatePessimisticFixpoint();
26565dfd7cc4Ssstefan1 }
26575dfd7cc4Ssstefan1
manifest__anon23c38c770111::AAICVTrackerCallSite26585dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override {
2659e0e687a6SKazu Hirata if (!ReplVal || !*ReplVal)
26605dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED;
26615dfd7cc4Ssstefan1
26627a07b88fSJohannes Doerfert A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
26635dfd7cc4Ssstefan1 A.deleteAfterManifest(*getCtxI());
26645dfd7cc4Ssstefan1
26655dfd7cc4Ssstefan1 return ChangeStatus::CHANGED;
26665dfd7cc4Ssstefan1 }
26675dfd7cc4Ssstefan1
26685dfd7cc4Ssstefan1 // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerCallSite26695dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
26705dfd7cc4Ssstefan1
26715dfd7cc4Ssstefan1 // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerCallSite26725dfd7cc4Ssstefan1 void trackStatistics() const override {}
26735dfd7cc4Ssstefan1
26745dfd7cc4Ssstefan1 InternalControlVar AssociatedICV;
26755dfd7cc4Ssstefan1 Optional<Value *> ReplVal;
26765dfd7cc4Ssstefan1
updateImpl__anon23c38c770111::AAICVTrackerCallSite26775dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override {
26785dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
26795b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
26805dfd7cc4Ssstefan1
26815dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV.
26825dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked())
26835dfd7cc4Ssstefan1 return indicatePessimisticFixpoint();
26845dfd7cc4Ssstefan1
26855dfd7cc4Ssstefan1 Optional<Value *> NewReplVal =
26865dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
26875dfd7cc4Ssstefan1
26885dfd7cc4Ssstefan1 if (ReplVal == NewReplVal)
26895dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED;
26905dfd7cc4Ssstefan1
26915dfd7cc4Ssstefan1 ReplVal = NewReplVal;
26925dfd7cc4Ssstefan1 return ChangeStatus::CHANGED;
26935dfd7cc4Ssstefan1 }
26945dfd7cc4Ssstefan1
26955dfd7cc4Ssstefan1 // Return the value with which associated value can be replaced for specific
26965dfd7cc4Ssstefan1 // \p ICV.
26975dfd7cc4Ssstefan1 Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerCallSite26985dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override {
26995dfd7cc4Ssstefan1 return ReplVal;
27005dfd7cc4Ssstefan1 }
27015dfd7cc4Ssstefan1 };
27025dfd7cc4Ssstefan1
27035dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned__anon23c38c770111::AAICVTrackerCallSiteReturned27045dfd7cc4Ssstefan1 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
27055dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {}
27065dfd7cc4Ssstefan1
27075dfd7cc4Ssstefan1 // FIXME: come up with better string.
getAsStr__anon23c38c770111::AAICVTrackerCallSiteReturned27085dfd7cc4Ssstefan1 const std::string getAsStr() const override {
27095dfd7cc4Ssstefan1 return "ICVTrackerCallSiteReturned";
27105dfd7cc4Ssstefan1 }
27115dfd7cc4Ssstefan1
27125dfd7cc4Ssstefan1 // FIXME: come up with some stats.
trackStatistics__anon23c38c770111::AAICVTrackerCallSiteReturned27135dfd7cc4Ssstefan1 void trackStatistics() const override {}
27145dfd7cc4Ssstefan1
27155dfd7cc4Ssstefan1 /// We don't manifest anything for this AA.
manifest__anon23c38c770111::AAICVTrackerCallSiteReturned27165dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override {
27175dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED;
27185dfd7cc4Ssstefan1 }
27195dfd7cc4Ssstefan1
27205dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point.
27215dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar,
27225dfd7cc4Ssstefan1 InternalControlVar::ICV___last>
27235dfd7cc4Ssstefan1 ICVReplacementValuesMap;
27245dfd7cc4Ssstefan1
27255dfd7cc4Ssstefan1 /// Return the value with which associated value can be replaced for specific
27265dfd7cc4Ssstefan1 /// \p ICV.
27275dfd7cc4Ssstefan1 Optional<Value *>
getUniqueReplacementValue__anon23c38c770111::AAICVTrackerCallSiteReturned27285dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override {
27295dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV];
27305dfd7cc4Ssstefan1 }
27315dfd7cc4Ssstefan1
updateImpl__anon23c38c770111::AAICVTrackerCallSiteReturned27325dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override {
27335dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED;
27345dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
27355b70c12fSJohannes Doerfert *this, IRPosition::returned(*getAssociatedFunction()),
27365b70c12fSJohannes Doerfert DepClassTy::REQUIRED);
27375dfd7cc4Ssstefan1
27385dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV.
27395dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked())
27405dfd7cc4Ssstefan1 return indicatePessimisticFixpoint();
27415dfd7cc4Ssstefan1
27425dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) {
27435dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
27445dfd7cc4Ssstefan1 Optional<Value *> NewReplVal =
27455dfd7cc4Ssstefan1 ICVTrackingAA.getUniqueReplacementValue(ICV);
27465dfd7cc4Ssstefan1
27475dfd7cc4Ssstefan1 if (ReplVal == NewReplVal)
27485dfd7cc4Ssstefan1 continue;
27495dfd7cc4Ssstefan1
27505dfd7cc4Ssstefan1 ReplVal = NewReplVal;
27515dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED;
27525dfd7cc4Ssstefan1 }
27535dfd7cc4Ssstefan1 return Changed;
27545dfd7cc4Ssstefan1 }
27559548b74aSJohannes Doerfert };
275618283125SJoseph Huber
275718283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction__anon23c38c770111::AAExecutionDomainFunction275818283125SJoseph Huber AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
275918283125SJoseph Huber : AAExecutionDomain(IRP, A) {}
276018283125SJoseph Huber
getAsStr__anon23c38c770111::AAExecutionDomainFunction276118283125SJoseph Huber const std::string getAsStr() const override {
276218283125SJoseph Huber return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
276318283125SJoseph Huber "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
276418283125SJoseph Huber }
276518283125SJoseph Huber
276618283125SJoseph Huber /// See AbstractAttribute::trackStatistics().
trackStatistics__anon23c38c770111::AAExecutionDomainFunction276718283125SJoseph Huber void trackStatistics() const override {}
276818283125SJoseph Huber
initialize__anon23c38c770111::AAExecutionDomainFunction276918283125SJoseph Huber void initialize(Attributor &A) override {
277018283125SJoseph Huber Function *F = getAnchorScope();
277118283125SJoseph Huber for (const auto &BB : *F)
277218283125SJoseph Huber SingleThreadedBBs.insert(&BB);
277318283125SJoseph Huber NumBBs = SingleThreadedBBs.size();
277418283125SJoseph Huber }
277518283125SJoseph Huber
manifest__anon23c38c770111::AAExecutionDomainFunction277618283125SJoseph Huber ChangeStatus manifest(Attributor &A) override {
277718283125SJoseph Huber LLVM_DEBUG({
277818283125SJoseph Huber for (const BasicBlock *BB : SingleThreadedBBs)
277918283125SJoseph Huber dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
278018283125SJoseph Huber << BB->getName() << " is executed by a single thread.\n";
278118283125SJoseph Huber });
278218283125SJoseph Huber return ChangeStatus::UNCHANGED;
278318283125SJoseph Huber }
278418283125SJoseph Huber
278518283125SJoseph Huber ChangeStatus updateImpl(Attributor &A) override;
278618283125SJoseph Huber
278718283125SJoseph Huber /// Check if an instruction is executed by a single thread.
isExecutedByInitialThreadOnly__anon23c38c770111::AAExecutionDomainFunction27889a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
27899a23e673SJohannes Doerfert return isExecutedByInitialThreadOnly(*I.getParent());
279018283125SJoseph Huber }
279118283125SJoseph Huber
isExecutedByInitialThreadOnly__anon23c38c770111::AAExecutionDomainFunction27929a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
27931cfdcae6SJoseph Huber return isValidState() && SingleThreadedBBs.contains(&BB);
279418283125SJoseph Huber }
279518283125SJoseph Huber
279618283125SJoseph Huber /// Set of basic blocks that are executed by a single thread.
27979f04a0eaSJohannes Doerfert SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
279818283125SJoseph Huber
279918283125SJoseph Huber /// Total number of basic blocks in this function.
280059a6b668SJohannes Doerfert long unsigned NumBBs = 0;
280118283125SJoseph Huber };
280218283125SJoseph Huber
updateImpl(Attributor & A)280318283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
280418283125SJoseph Huber Function *F = getAnchorScope();
280518283125SJoseph Huber ReversePostOrderTraversal<Function *> RPOT(F);
280618283125SJoseph Huber auto NumSingleThreadedBBs = SingleThreadedBBs.size();
280718283125SJoseph Huber
280818283125SJoseph Huber bool AllCallSitesKnown;
280918283125SJoseph Huber auto PredForCallSite = [&](AbstractCallSite ACS) {
281018283125SJoseph Huber const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
281118283125SJoseph Huber *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
281218283125SJoseph Huber DepClassTy::REQUIRED);
28131cfdcae6SJoseph Huber return ACS.isDirectCall() &&
28141cfdcae6SJoseph Huber ExecutionDomainAA.isExecutedByInitialThreadOnly(
28159a23e673SJohannes Doerfert *ACS.getInstruction());
281618283125SJoseph Huber };
281718283125SJoseph Huber
281818283125SJoseph Huber if (!A.checkForAllCallSites(PredForCallSite, *this,
281918283125SJoseph Huber /* RequiresAllCallSites */ true,
282018283125SJoseph Huber AllCallSitesKnown))
28219f04a0eaSJohannes Doerfert SingleThreadedBBs.remove(&F->getEntryBlock());
282218283125SJoseph Huber
2823e2cfbfccSJohannes Doerfert auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2824e2cfbfccSJohannes Doerfert auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2825e2cfbfccSJohannes Doerfert
282627905eebSJoseph Huber // Check if the edge into the successor block contains a condition that only
282727905eebSJoseph Huber // lets the main thread execute it.
28286fc51c9fSJoseph Huber auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
282918283125SJoseph Huber if (!Edge || !Edge->isConditional())
283018283125SJoseph Huber return false;
283118283125SJoseph Huber if (Edge->getSuccessor(0) != SuccessorBB)
283218283125SJoseph Huber return false;
283318283125SJoseph Huber
283418283125SJoseph Huber auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
283518283125SJoseph Huber if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
283618283125SJoseph Huber return false;
283718283125SJoseph Huber
283818283125SJoseph Huber ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2839e2cfbfccSJohannes Doerfert if (!C)
284018283125SJoseph Huber return false;
284118283125SJoseph Huber
2842e2cfbfccSJohannes Doerfert // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2843e2cfbfccSJohannes Doerfert if (C->isAllOnesValue()) {
2844e2cfbfccSJohannes Doerfert auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2845c4b1fe05SJohannes Doerfert CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2846c4b1fe05SJohannes Doerfert if (!CB)
2847e2cfbfccSJohannes Doerfert return false;
2848423d34f7SShilei Tian const int InitModeArgNo = 1;
2849423d34f7SShilei Tian auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
2850423d34f7SShilei Tian return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
2851e2cfbfccSJohannes Doerfert }
285218283125SJoseph Huber
285327905eebSJoseph Huber if (C->isZero()) {
285427905eebSJoseph Huber // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
285527905eebSJoseph Huber if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
285627905eebSJoseph Huber if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
285727905eebSJoseph Huber return true;
285827905eebSJoseph Huber
285927905eebSJoseph Huber // Match: 0 == llvm.amdgcn.workitem.id.x()
286027905eebSJoseph Huber if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
286127905eebSJoseph Huber if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
286227905eebSJoseph Huber return true;
286327905eebSJoseph Huber }
286427905eebSJoseph Huber
286518283125SJoseph Huber return false;
286618283125SJoseph Huber };
286718283125SJoseph Huber
286818283125SJoseph Huber // Merge all the predecessor states into the current basic block. A basic
286918283125SJoseph Huber // block is executed by a single thread if all of its predecessors are.
287018283125SJoseph Huber auto MergePredecessorStates = [&](BasicBlock *BB) {
287198007313SKazu Hirata if (pred_empty(BB))
287218283125SJoseph Huber return SingleThreadedBBs.contains(BB);
287318283125SJoseph Huber
28746fc51c9fSJoseph Huber bool IsInitialThread = true;
287598007313SKazu Hirata for (BasicBlock *PredBB : predecessors(BB)) {
287698007313SKazu Hirata if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
287718283125SJoseph Huber BB))
287898007313SKazu Hirata IsInitialThread &= SingleThreadedBBs.contains(PredBB);
287918283125SJoseph Huber }
288018283125SJoseph Huber
28816fc51c9fSJoseph Huber return IsInitialThread;
288218283125SJoseph Huber };
288318283125SJoseph Huber
288418283125SJoseph Huber for (auto *BB : RPOT) {
288518283125SJoseph Huber if (!MergePredecessorStates(BB))
28869f04a0eaSJohannes Doerfert SingleThreadedBBs.remove(BB);
288718283125SJoseph Huber }
288818283125SJoseph Huber
288918283125SJoseph Huber return (NumSingleThreadedBBs == SingleThreadedBBs.size())
289018283125SJoseph Huber ? ChangeStatus::UNCHANGED
289118283125SJoseph Huber : ChangeStatus::CHANGED;
289218283125SJoseph Huber }
289318283125SJoseph Huber
28946fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a
28956fc51c9fSJoseph Huber /// static buffer of shared memory.
28966fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
28976fc51c9fSJoseph Huber using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared__anon23c38c770111::AAHeapToShared28986fc51c9fSJoseph Huber AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
28996fc51c9fSJoseph Huber
29006fc51c9fSJoseph Huber /// Create an abstract attribute view for the position \p IRP.
29016fc51c9fSJoseph Huber static AAHeapToShared &createForPosition(const IRPosition &IRP,
29026fc51c9fSJoseph Huber Attributor &A);
29036fc51c9fSJoseph Huber
2904f8c40ed8SGiorgis Georgakoudis /// Returns true if HeapToShared conversion is assumed to be possible.
2905f8c40ed8SGiorgis Georgakoudis virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
2906f8c40ed8SGiorgis Georgakoudis
2907f8c40ed8SGiorgis Georgakoudis /// Returns true if HeapToShared conversion is assumed and the CB is a
2908f8c40ed8SGiorgis Georgakoudis /// callsite to a free operation to be removed.
2909f8c40ed8SGiorgis Georgakoudis virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
2910f8c40ed8SGiorgis Georgakoudis
29116fc51c9fSJoseph Huber /// See AbstractAttribute::getName().
getName__anon23c38c770111::AAHeapToShared29126fc51c9fSJoseph Huber const std::string getName() const override { return "AAHeapToShared"; }
29136fc51c9fSJoseph Huber
29146fc51c9fSJoseph Huber /// See AbstractAttribute::getIdAddr().
getIdAddr__anon23c38c770111::AAHeapToShared29156fc51c9fSJoseph Huber const char *getIdAddr() const override { return &ID; }
29166fc51c9fSJoseph Huber
29176fc51c9fSJoseph Huber /// This function should return true if the type of the \p AA is
29186fc51c9fSJoseph Huber /// AAHeapToShared.
classof__anon23c38c770111::AAHeapToShared29196fc51c9fSJoseph Huber static bool classof(const AbstractAttribute *AA) {
29206fc51c9fSJoseph Huber return (AA->getIdAddr() == &ID);
29216fc51c9fSJoseph Huber }
29226fc51c9fSJoseph Huber
29236fc51c9fSJoseph Huber /// Unique ID (due to the unique address)
29246fc51c9fSJoseph Huber static const char ID;
29256fc51c9fSJoseph Huber };
29266fc51c9fSJoseph Huber
29276fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction__anon23c38c770111::AAHeapToSharedFunction29286fc51c9fSJoseph Huber AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
29296fc51c9fSJoseph Huber : AAHeapToShared(IRP, A) {}
29306fc51c9fSJoseph Huber
getAsStr__anon23c38c770111::AAHeapToSharedFunction29316fc51c9fSJoseph Huber const std::string getAsStr() const override {
29326fc51c9fSJoseph Huber return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
29336fc51c9fSJoseph Huber " malloc calls eligible.";
29346fc51c9fSJoseph Huber }
29356fc51c9fSJoseph Huber
29366fc51c9fSJoseph Huber /// See AbstractAttribute::trackStatistics().
trackStatistics__anon23c38c770111::AAHeapToSharedFunction29376fc51c9fSJoseph Huber void trackStatistics() const override {}
29386fc51c9fSJoseph Huber
2939f8c40ed8SGiorgis Georgakoudis /// This functions finds free calls that will be removed by the
2940f8c40ed8SGiorgis Georgakoudis /// HeapToShared transformation.
findPotentialRemovedFreeCalls__anon23c38c770111::AAHeapToSharedFunction2941f8c40ed8SGiorgis Georgakoudis void findPotentialRemovedFreeCalls(Attributor &A) {
2942f8c40ed8SGiorgis Georgakoudis auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2943f8c40ed8SGiorgis Georgakoudis auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2944f8c40ed8SGiorgis Georgakoudis
2945f8c40ed8SGiorgis Georgakoudis PotentialRemovedFreeCalls.clear();
2946f8c40ed8SGiorgis Georgakoudis // Update free call users of found malloc calls.
2947f8c40ed8SGiorgis Georgakoudis for (CallBase *CB : MallocCalls) {
2948f8c40ed8SGiorgis Georgakoudis SmallVector<CallBase *, 4> FreeCalls;
2949f8c40ed8SGiorgis Georgakoudis for (auto *U : CB->users()) {
2950f8c40ed8SGiorgis Georgakoudis CallBase *C = dyn_cast<CallBase>(U);
2951f8c40ed8SGiorgis Georgakoudis if (C && C->getCalledFunction() == FreeRFI.Declaration)
2952f8c40ed8SGiorgis Georgakoudis FreeCalls.push_back(C);
2953f8c40ed8SGiorgis Georgakoudis }
2954f8c40ed8SGiorgis Georgakoudis
2955f8c40ed8SGiorgis Georgakoudis if (FreeCalls.size() != 1)
2956f8c40ed8SGiorgis Georgakoudis continue;
2957f8c40ed8SGiorgis Georgakoudis
2958f8c40ed8SGiorgis Georgakoudis PotentialRemovedFreeCalls.insert(FreeCalls.front());
2959f8c40ed8SGiorgis Georgakoudis }
2960f8c40ed8SGiorgis Georgakoudis }
2961f8c40ed8SGiorgis Georgakoudis
initialize__anon23c38c770111::AAHeapToSharedFunction29626fc51c9fSJoseph Huber void initialize(Attributor &A) override {
29635b4acb20SJohannes Doerfert if (DisableOpenMPOptDeglobalization) {
29645b4acb20SJohannes Doerfert indicatePessimisticFixpoint();
29655b4acb20SJohannes Doerfert return;
29665b4acb20SJohannes Doerfert }
29675b4acb20SJohannes Doerfert
29686fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29696fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
29706fc51c9fSJoseph Huber
2971192a34ddSJohannes Doerfert Attributor::SimplifictionCallbackTy SCB =
2972192a34ddSJohannes Doerfert [](const IRPosition &, const AbstractAttribute *,
2973192a34ddSJohannes Doerfert bool &) -> Optional<Value *> { return nullptr; };
29746fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users())
2975192a34ddSJohannes Doerfert if (CallBase *CB = dyn_cast<CallBase>(U)) {
29766fc51c9fSJoseph Huber MallocCalls.insert(CB);
2977192a34ddSJohannes Doerfert A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
2978192a34ddSJohannes Doerfert SCB);
2979192a34ddSJohannes Doerfert }
2980f8c40ed8SGiorgis Georgakoudis
2981f8c40ed8SGiorgis Georgakoudis findPotentialRemovedFreeCalls(A);
2982f8c40ed8SGiorgis Georgakoudis }
2983f8c40ed8SGiorgis Georgakoudis
isAssumedHeapToShared__anon23c38c770111::AAHeapToSharedFunction2984eaab880eSGiorgis Georgakoudis bool isAssumedHeapToShared(CallBase &CB) const override {
2985f8c40ed8SGiorgis Georgakoudis return isValidState() && MallocCalls.count(&CB);
2986f8c40ed8SGiorgis Georgakoudis }
2987f8c40ed8SGiorgis Georgakoudis
isAssumedHeapToSharedRemovedFree__anon23c38c770111::AAHeapToSharedFunction2988eaab880eSGiorgis Georgakoudis bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
2989f8c40ed8SGiorgis Georgakoudis return isValidState() && PotentialRemovedFreeCalls.count(&CB);
29906fc51c9fSJoseph Huber }
29916fc51c9fSJoseph Huber
manifest__anon23c38c770111::AAHeapToSharedFunction29926fc51c9fSJoseph Huber ChangeStatus manifest(Attributor &A) override {
29936fc51c9fSJoseph Huber if (MallocCalls.empty())
29946fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED;
29956fc51c9fSJoseph Huber
29966fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29976fc51c9fSJoseph Huber auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
29986fc51c9fSJoseph Huber
29996fc51c9fSJoseph Huber Function *F = getAnchorScope();
30006fc51c9fSJoseph Huber auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
30016fc51c9fSJoseph Huber DepClassTy::OPTIONAL);
30026fc51c9fSJoseph Huber
30036fc51c9fSJoseph Huber ChangeStatus Changed = ChangeStatus::UNCHANGED;
30046fc51c9fSJoseph Huber for (CallBase *CB : MallocCalls) {
30056fc51c9fSJoseph Huber // Skip replacing this if HeapToStack has already claimed it.
3006c1c1fe93SJohannes Doerfert if (HS && HS->isAssumedHeapToStack(*CB))
30076fc51c9fSJoseph Huber continue;
30086fc51c9fSJoseph Huber
30096fc51c9fSJoseph Huber // Find the unique free call to remove it.
30106fc51c9fSJoseph Huber SmallVector<CallBase *, 4> FreeCalls;
30116fc51c9fSJoseph Huber for (auto *U : CB->users()) {
30126fc51c9fSJoseph Huber CallBase *C = dyn_cast<CallBase>(U);
30136fc51c9fSJoseph Huber if (C && C->getCalledFunction() == FreeCall.Declaration)
30146fc51c9fSJoseph Huber FreeCalls.push_back(C);
30156fc51c9fSJoseph Huber }
30166fc51c9fSJoseph Huber if (FreeCalls.size() != 1)
30176fc51c9fSJoseph Huber continue;
30186fc51c9fSJoseph Huber
3019274359cfSSimon Pilgrim auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
30206fc51c9fSJoseph Huber
30210136a440SJoseph Huber if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
30220136a440SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
30230136a440SJoseph Huber << " with shared memory."
30240136a440SJoseph Huber << " Shared memory usage is limited to "
30250136a440SJoseph Huber << SharedMemoryLimit << " bytes\n");
30260136a440SJoseph Huber continue;
30270136a440SJoseph Huber }
30280136a440SJoseph Huber
302913d8f000SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
303013d8f000SJoseph Huber << " with " << AllocSize->getZExtValue()
30316fc51c9fSJoseph Huber << " bytes of shared memory\n");
30326fc51c9fSJoseph Huber
30336fc51c9fSJoseph Huber // Create a new shared memory buffer of the same size as the allocation
30346fc51c9fSJoseph Huber // and replace all the uses of the original allocation with it.
30356fc51c9fSJoseph Huber Module *M = CB->getModule();
30366fc51c9fSJoseph Huber Type *Int8Ty = Type::getInt8Ty(M->getContext());
30376fc51c9fSJoseph Huber Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
30386fc51c9fSJoseph Huber auto *SharedMem = new GlobalVariable(
30396fc51c9fSJoseph Huber *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
30405eb49009SJoseph Huber UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
30416fc51c9fSJoseph Huber GlobalValue::NotThreadLocal,
30426fc51c9fSJoseph Huber static_cast<unsigned>(AddressSpace::Shared));
30436fc51c9fSJoseph Huber auto *NewBuffer =
30446fc51c9fSJoseph Huber ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
30456fc51c9fSJoseph Huber
304630e36c9bSJoseph Huber auto Remark = [&](OptimizationRemark OR) {
304730e36c9bSJoseph Huber return OR << "Replaced globalized variable with "
304830e36c9bSJoseph Huber << ore::NV("SharedMemory", AllocSize->getZExtValue())
304930e36c9bSJoseph Huber << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
3050eef6601bSJoseph Huber << "of shared memory.";
305130e36c9bSJoseph Huber };
30522c31d5ebSJoseph Huber A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
305330e36c9bSJoseph Huber
30546e220296SJoseph Huber MaybeAlign Alignment = CB->getRetAlign();
30556e220296SJoseph Huber assert(Alignment &&
30566e220296SJoseph Huber "HeapToShared on allocation without alignment attribute");
30576e220296SJoseph Huber SharedMem->setAlignment(MaybeAlign(Alignment));
30586fc51c9fSJoseph Huber
30597a07b88fSJohannes Doerfert A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
30606fc51c9fSJoseph Huber A.deleteAfterManifest(*CB);
30616fc51c9fSJoseph Huber A.deleteAfterManifest(*FreeCalls.front());
30626fc51c9fSJoseph Huber
30630136a440SJoseph Huber SharedMemoryUsed += AllocSize->getZExtValue();
30640136a440SJoseph Huber NumBytesMovedToSharedMemory = SharedMemoryUsed;
30656fc51c9fSJoseph Huber Changed = ChangeStatus::CHANGED;
30666fc51c9fSJoseph Huber }
30676fc51c9fSJoseph Huber
30686fc51c9fSJoseph Huber return Changed;
30696fc51c9fSJoseph Huber }
30706fc51c9fSJoseph Huber
updateImpl__anon23c38c770111::AAHeapToSharedFunction30716fc51c9fSJoseph Huber ChangeStatus updateImpl(Attributor &A) override {
30726fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
30736fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
30746fc51c9fSJoseph Huber Function *F = getAnchorScope();
30756fc51c9fSJoseph Huber
30766fc51c9fSJoseph Huber auto NumMallocCalls = MallocCalls.size();
30776fc51c9fSJoseph Huber
30786fc51c9fSJoseph Huber // Only consider malloc calls executed by a single thread with a constant.
30796fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users()) {
30806fc51c9fSJoseph Huber const auto &ED = A.getAAFor<AAExecutionDomain>(
30816fc51c9fSJoseph Huber *this, IRPosition::function(*F), DepClassTy::REQUIRED);
30826fc51c9fSJoseph Huber if (CallBase *CB = dyn_cast<CallBase>(U))
3083d243cbf8SKazu Hirata if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
30846fc51c9fSJoseph Huber !ED.isExecutedByInitialThreadOnly(*CB))
3085ba70f3a5SJohannes Doerfert MallocCalls.remove(CB);
30866fc51c9fSJoseph Huber }
30876fc51c9fSJoseph Huber
3088f8c40ed8SGiorgis Georgakoudis findPotentialRemovedFreeCalls(A);
3089f8c40ed8SGiorgis Georgakoudis
30906fc51c9fSJoseph Huber if (NumMallocCalls != MallocCalls.size())
30916fc51c9fSJoseph Huber return ChangeStatus::CHANGED;
30926fc51c9fSJoseph Huber
30936fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED;
30946fc51c9fSJoseph Huber }
30956fc51c9fSJoseph Huber
30966fc51c9fSJoseph Huber /// Collection of all malloc calls in a function.
3097ba70f3a5SJohannes Doerfert SmallSetVector<CallBase *, 4> MallocCalls;
3098f8c40ed8SGiorgis Georgakoudis /// Collection of potentially removed free calls in a function.
3099f8c40ed8SGiorgis Georgakoudis SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
31000136a440SJoseph Huber /// The total amount of shared memory that has been used for HeapToShared.
31010136a440SJoseph Huber unsigned SharedMemoryUsed = 0;
31026fc51c9fSJoseph Huber };
31036fc51c9fSJoseph Huber
3104d9659bf6SJohannes Doerfert struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
3105d9659bf6SJohannes Doerfert using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo__anon23c38c770111::AAKernelInfo3106d9659bf6SJohannes Doerfert AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3107d9659bf6SJohannes Doerfert
3108d9659bf6SJohannes Doerfert /// Statistics are tracked as part of manifest for now.
trackStatistics__anon23c38c770111::AAKernelInfo3109d9659bf6SJohannes Doerfert void trackStatistics() const override {}
3110d9659bf6SJohannes Doerfert
3111d9659bf6SJohannes Doerfert /// See AbstractAttribute::getAsStr()
getAsStr__anon23c38c770111::AAKernelInfo3112d9659bf6SJohannes Doerfert const std::string getAsStr() const override {
3113d9659bf6SJohannes Doerfert if (!isValidState())
3114d9659bf6SJohannes Doerfert return "<invalid>";
3115514c033dSJohannes Doerfert return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
3116514c033dSJohannes Doerfert : "generic") +
3117514c033dSJohannes Doerfert std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
3118514c033dSJohannes Doerfert : "") +
3119d9659bf6SJohannes Doerfert std::string(" #PRs: ") +
3120c6457dcaSJohannes Doerfert (ReachedKnownParallelRegions.isValidState()
3121c6457dcaSJohannes Doerfert ? std::to_string(ReachedKnownParallelRegions.size())
3122c6457dcaSJohannes Doerfert : "<invalid>") +
3123d9659bf6SJohannes Doerfert ", #Unknown PRs: " +
3124c6457dcaSJohannes Doerfert (ReachedUnknownParallelRegions.isValidState()
3125c6457dcaSJohannes Doerfert ? std::to_string(ReachedUnknownParallelRegions.size())
3126c6457dcaSJohannes Doerfert : "<invalid>") +
31270a16c560SJohannes Doerfert ", #Reaching Kernels: " +
31280a16c560SJohannes Doerfert (ReachingKernelEntries.isValidState()
31290a16c560SJohannes Doerfert ? std::to_string(ReachingKernelEntries.size())
31300a16c560SJohannes Doerfert : "<invalid>");
3131d9659bf6SJohannes Doerfert }
3132d9659bf6SJohannes Doerfert
3133d9659bf6SJohannes Doerfert /// Create an abstract attribute biew for the position \p IRP.
3134d9659bf6SJohannes Doerfert static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
3135d9659bf6SJohannes Doerfert
3136d9659bf6SJohannes Doerfert /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAKernelInfo3137d9659bf6SJohannes Doerfert const std::string getName() const override { return "AAKernelInfo"; }
3138d9659bf6SJohannes Doerfert
3139d9659bf6SJohannes Doerfert /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAKernelInfo3140d9659bf6SJohannes Doerfert const char *getIdAddr() const override { return &ID; }
3141d9659bf6SJohannes Doerfert
3142d9659bf6SJohannes Doerfert /// This function should return true if the type of the \p AA is AAKernelInfo
classof__anon23c38c770111::AAKernelInfo3143d9659bf6SJohannes Doerfert static bool classof(const AbstractAttribute *AA) {
3144d9659bf6SJohannes Doerfert return (AA->getIdAddr() == &ID);
3145d9659bf6SJohannes Doerfert }
3146d9659bf6SJohannes Doerfert
3147d9659bf6SJohannes Doerfert static const char ID;
3148d9659bf6SJohannes Doerfert };
3149d9659bf6SJohannes Doerfert
3150d9659bf6SJohannes Doerfert /// The function kernel info abstract attribute, basically, what can we say
3151d9659bf6SJohannes Doerfert /// about a function with regards to the KernelInfoState.
3152d9659bf6SJohannes Doerfert struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction__anon23c38c770111::AAKernelInfoFunction3153d9659bf6SJohannes Doerfert AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
3154d9659bf6SJohannes Doerfert : AAKernelInfo(IRP, A) {}
3155d9659bf6SJohannes Doerfert
315629a3e3ddSGiorgis Georgakoudis SmallPtrSet<Instruction *, 4> GuardedInstructions;
315729a3e3ddSGiorgis Georgakoudis
getGuardedInstructions__anon23c38c770111::AAKernelInfoFunction315829a3e3ddSGiorgis Georgakoudis SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
315929a3e3ddSGiorgis Georgakoudis return GuardedInstructions;
316029a3e3ddSGiorgis Georgakoudis }
316129a3e3ddSGiorgis Georgakoudis
3162d9659bf6SJohannes Doerfert /// See AbstractAttribute::initialize(...).
initialize__anon23c38c770111::AAKernelInfoFunction3163d9659bf6SJohannes Doerfert void initialize(Attributor &A) override {
3164d9659bf6SJohannes Doerfert // This is a high-level transform that might change the constant arguments
3165d9659bf6SJohannes Doerfert // of the init and dinit calls. We need to tell the Attributor about this
3166d9659bf6SJohannes Doerfert // to avoid other parts using the current constant value for simpliication.
3167d9659bf6SJohannes Doerfert auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3168d9659bf6SJohannes Doerfert
3169d9659bf6SJohannes Doerfert Function *Fn = getAnchorScope();
3170ca662297SShilei Tian
3171d9659bf6SJohannes Doerfert OMPInformationCache::RuntimeFunctionInfo &InitRFI =
3172d9659bf6SJohannes Doerfert OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
3173d9659bf6SJohannes Doerfert OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
3174d9659bf6SJohannes Doerfert OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
3175d9659bf6SJohannes Doerfert
3176d9659bf6SJohannes Doerfert // For kernels we perform more initialization work, first we find the init
3177d9659bf6SJohannes Doerfert // and deinit calls.
3178d9659bf6SJohannes Doerfert auto StoreCallBase = [](Use &U,
3179d9659bf6SJohannes Doerfert OMPInformationCache::RuntimeFunctionInfo &RFI,
3180d9659bf6SJohannes Doerfert CallBase *&Storage) {
3181d9659bf6SJohannes Doerfert CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
3182d9659bf6SJohannes Doerfert assert(CB &&
3183d9659bf6SJohannes Doerfert "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
3184d9659bf6SJohannes Doerfert assert(!Storage &&
3185d9659bf6SJohannes Doerfert "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
3186d9659bf6SJohannes Doerfert Storage = CB;
3187d9659bf6SJohannes Doerfert return false;
3188d9659bf6SJohannes Doerfert };
3189d9659bf6SJohannes Doerfert InitRFI.foreachUse(
3190d9659bf6SJohannes Doerfert [&](Use &U, Function &) {
3191d9659bf6SJohannes Doerfert StoreCallBase(U, InitRFI, KernelInitCB);
3192d9659bf6SJohannes Doerfert return false;
3193d9659bf6SJohannes Doerfert },
3194d9659bf6SJohannes Doerfert Fn);
3195d9659bf6SJohannes Doerfert DeinitRFI.foreachUse(
3196d9659bf6SJohannes Doerfert [&](Use &U, Function &) {
3197d9659bf6SJohannes Doerfert StoreCallBase(U, DeinitRFI, KernelDeinitCB);
3198d9659bf6SJohannes Doerfert return false;
3199d9659bf6SJohannes Doerfert },
3200d9659bf6SJohannes Doerfert Fn);
3201d9659bf6SJohannes Doerfert
320258f93264SJoseph Huber // Ignore kernels without initializers such as global constructors.
32034166738cSJohannes Doerfert if (!KernelInitCB || !KernelDeinitCB)
320458f93264SJoseph Huber return;
32054166738cSJohannes Doerfert
32064166738cSJohannes Doerfert // Add itself to the reaching kernel and set IsKernelEntry.
32074166738cSJohannes Doerfert ReachingKernelEntries.insert(Fn);
32084166738cSJohannes Doerfert IsKernelEntry = true;
3209d9659bf6SJohannes Doerfert
3210514c033dSJohannes Doerfert // For kernels we might need to initialize/finalize the IsSPMD state and
3211514c033dSJohannes Doerfert // we need to register a simplification callback so that the Attributor
3212514c033dSJohannes Doerfert // knows the constant arguments to __kmpc_target_init and
3213d9659bf6SJohannes Doerfert // __kmpc_target_deinit might actually change.
3214d9659bf6SJohannes Doerfert
3215d9659bf6SJohannes Doerfert Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
3216d9659bf6SJohannes Doerfert [&](const IRPosition &IRP, const AbstractAttribute *AA,
3217d9659bf6SJohannes Doerfert bool &UsedAssumedInformation) -> Optional<Value *> {
3218d9659bf6SJohannes Doerfert // IRP represents the "use generic state machine" argument of an
3219d9659bf6SJohannes Doerfert // __kmpc_target_init call. We will answer this one with the internal
3220d9659bf6SJohannes Doerfert // state. As long as we are not in an invalid state, we will create a
3221d9659bf6SJohannes Doerfert // custom state machine so the value should be a `i1 false`. If we are
3222d9659bf6SJohannes Doerfert // in an invalid state, we won't change the value that is in the IR.
3223e6e440aeSJohannes Doerfert if (!ReachedKnownParallelRegions.isValidState())
3224d9659bf6SJohannes Doerfert return nullptr;
3225e0c5d83aSJohannes Doerfert // If we have disabled state machine rewrites, don't make a custom one.
3226e0c5d83aSJohannes Doerfert if (DisableOpenMPOptStateMachineRewrite)
3227e0c5d83aSJohannes Doerfert return nullptr;
3228d9659bf6SJohannes Doerfert if (AA)
3229d9659bf6SJohannes Doerfert A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3230d9659bf6SJohannes Doerfert UsedAssumedInformation = !isAtFixpoint();
3231d9659bf6SJohannes Doerfert auto *FalseVal =
32322aed0813SKazu Hirata ConstantInt::getBool(IRP.getAnchorValue().getContext(), false);
3233d9659bf6SJohannes Doerfert return FalseVal;
3234d9659bf6SJohannes Doerfert };
3235d9659bf6SJohannes Doerfert
3236423d34f7SShilei Tian Attributor::SimplifictionCallbackTy ModeSimplifyCB =
3237514c033dSJohannes Doerfert [&](const IRPosition &IRP, const AbstractAttribute *AA,
3238514c033dSJohannes Doerfert bool &UsedAssumedInformation) -> Optional<Value *> {
3239514c033dSJohannes Doerfert // IRP represents the "SPMDCompatibilityTracker" argument of an
3240514c033dSJohannes Doerfert // __kmpc_target_init or
3241514c033dSJohannes Doerfert // __kmpc_target_deinit call. We will answer this one with the internal
3242514c033dSJohannes Doerfert // state.
324397387fdfSJohannes Doerfert if (!SPMDCompatibilityTracker.isValidState())
3244514c033dSJohannes Doerfert return nullptr;
3245514c033dSJohannes Doerfert if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3246514c033dSJohannes Doerfert if (AA)
3247514c033dSJohannes Doerfert A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3248514c033dSJohannes Doerfert UsedAssumedInformation = true;
3249514c033dSJohannes Doerfert } else {
3250514c033dSJohannes Doerfert UsedAssumedInformation = false;
3251514c033dSJohannes Doerfert }
3252423d34f7SShilei Tian auto *Val = ConstantInt::getSigned(
3253423d34f7SShilei Tian IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
3254423d34f7SShilei Tian SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
3255423d34f7SShilei Tian : OMP_TGT_EXEC_MODE_GENERIC);
3256514c033dSJohannes Doerfert return Val;
3257514c033dSJohannes Doerfert };
3258514c033dSJohannes Doerfert
3259e8439ec8SGiorgis Georgakoudis Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
3260e8439ec8SGiorgis Georgakoudis [&](const IRPosition &IRP, const AbstractAttribute *AA,
3261e8439ec8SGiorgis Georgakoudis bool &UsedAssumedInformation) -> Optional<Value *> {
3262e8439ec8SGiorgis Georgakoudis // IRP represents the "RequiresFullRuntime" argument of an
3263e8439ec8SGiorgis Georgakoudis // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
3264e8439ec8SGiorgis Georgakoudis // one with the internal state of the SPMDCompatibilityTracker, so if
3265e8439ec8SGiorgis Georgakoudis // generic then true, if SPMD then false.
3266e8439ec8SGiorgis Georgakoudis if (!SPMDCompatibilityTracker.isValidState())
3267e8439ec8SGiorgis Georgakoudis return nullptr;
3268e8439ec8SGiorgis Georgakoudis if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3269e8439ec8SGiorgis Georgakoudis if (AA)
3270e8439ec8SGiorgis Georgakoudis A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3271e8439ec8SGiorgis Georgakoudis UsedAssumedInformation = true;
3272e8439ec8SGiorgis Georgakoudis } else {
3273e8439ec8SGiorgis Georgakoudis UsedAssumedInformation = false;
3274e8439ec8SGiorgis Georgakoudis }
3275e8439ec8SGiorgis Georgakoudis auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
3276e8439ec8SGiorgis Georgakoudis !SPMDCompatibilityTracker.isAssumed());
3277e8439ec8SGiorgis Georgakoudis return Val;
3278e8439ec8SGiorgis Georgakoudis };
3279e8439ec8SGiorgis Georgakoudis
3280423d34f7SShilei Tian constexpr const int InitModeArgNo = 1;
3281423d34f7SShilei Tian constexpr const int DeinitModeArgNo = 1;
3282d9659bf6SJohannes Doerfert constexpr const int InitUseStateMachineArgNo = 2;
3283e8439ec8SGiorgis Georgakoudis constexpr const int InitRequiresFullRuntimeArgNo = 3;
3284e8439ec8SGiorgis Georgakoudis constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
3285d9659bf6SJohannes Doerfert A.registerSimplificationCallback(
3286d9659bf6SJohannes Doerfert IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
3287d9659bf6SJohannes Doerfert StateMachineSimplifyCB);
3288514c033dSJohannes Doerfert A.registerSimplificationCallback(
3289423d34f7SShilei Tian IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
3290423d34f7SShilei Tian ModeSimplifyCB);
3291514c033dSJohannes Doerfert A.registerSimplificationCallback(
3292423d34f7SShilei Tian IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
3293423d34f7SShilei Tian ModeSimplifyCB);
3294e8439ec8SGiorgis Georgakoudis A.registerSimplificationCallback(
3295e8439ec8SGiorgis Georgakoudis IRPosition::callsite_argument(*KernelInitCB,
3296e8439ec8SGiorgis Georgakoudis InitRequiresFullRuntimeArgNo),
3297e8439ec8SGiorgis Georgakoudis IsGenericModeSimplifyCB);
3298e8439ec8SGiorgis Georgakoudis A.registerSimplificationCallback(
3299e8439ec8SGiorgis Georgakoudis IRPosition::callsite_argument(*KernelDeinitCB,
3300e8439ec8SGiorgis Georgakoudis DeinitRequiresFullRuntimeArgNo),
3301e8439ec8SGiorgis Georgakoudis IsGenericModeSimplifyCB);
3302514c033dSJohannes Doerfert
3303514c033dSJohannes Doerfert // Check if we know we are in SPMD-mode already.
3304423d34f7SShilei Tian ConstantInt *ModeArg =
3305423d34f7SShilei Tian dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3306423d34f7SShilei Tian if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3307514c033dSJohannes Doerfert SPMDCompatibilityTracker.indicateOptimisticFixpoint();
330860e643feSGiorgis Georgakoudis // This is a generic region but SPMDization is disabled so stop tracking.
330960e643feSGiorgis Georgakoudis else if (DisableOpenMPOptSPMDization)
331060e643feSGiorgis Georgakoudis SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3311d9659bf6SJohannes Doerfert }
3312d9659bf6SJohannes Doerfert
3313e6e440aeSJohannes Doerfert /// Sanitize the string \p S such that it is a suitable global symbol name.
sanitizeForGlobalName__anon23c38c770111::AAKernelInfoFunction3314e6e440aeSJohannes Doerfert static std::string sanitizeForGlobalName(std::string S) {
3315e6e440aeSJohannes Doerfert std::replace_if(
3316e6e440aeSJohannes Doerfert S.begin(), S.end(),
3317e6e440aeSJohannes Doerfert [](const char C) {
3318e6e440aeSJohannes Doerfert return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
3319e6e440aeSJohannes Doerfert (C >= '0' && C <= '9') || C == '_');
3320e6e440aeSJohannes Doerfert },
3321e6e440aeSJohannes Doerfert '.');
3322e6e440aeSJohannes Doerfert return S;
3323e6e440aeSJohannes Doerfert }
3324e6e440aeSJohannes Doerfert
3325d9659bf6SJohannes Doerfert /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
3326d9659bf6SJohannes Doerfert /// finished now.
manifest__anon23c38c770111::AAKernelInfoFunction3327d9659bf6SJohannes Doerfert ChangeStatus manifest(Attributor &A) override {
3328d9659bf6SJohannes Doerfert // If we are not looking at a kernel with __kmpc_target_init and
3329d9659bf6SJohannes Doerfert // __kmpc_target_deinit call we cannot actually manifest the information.
3330d9659bf6SJohannes Doerfert if (!KernelInitCB || !KernelDeinitCB)
3331d9659bf6SJohannes Doerfert return ChangeStatus::UNCHANGED;
3332d9659bf6SJohannes Doerfert
3333514c033dSJohannes Doerfert // If we can we change the execution mode to SPMD-mode otherwise we build a
3334514c033dSJohannes Doerfert // custom state machine.
3335d61aac76SJohannes Doerfert ChangeStatus Changed = ChangeStatus::UNCHANGED;
3336d61aac76SJohannes Doerfert if (!changeToSPMDMode(A, Changed))
3337c6457dcaSJohannes Doerfert return buildCustomStateMachine(A);
3338d9659bf6SJohannes Doerfert
3339d61aac76SJohannes Doerfert return Changed;
3340d9659bf6SJohannes Doerfert }
3341d9659bf6SJohannes Doerfert
changeToSPMDMode__anon23c38c770111::AAKernelInfoFunction3342d61aac76SJohannes Doerfert bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
3343c771eaf0SJohannes Doerfert if (!mayContainParallelRegion())
3344c771eaf0SJohannes Doerfert return false;
3345c771eaf0SJohannes Doerfert
3346eef6601bSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3347eef6601bSJoseph Huber
3348514c033dSJohannes Doerfert if (!SPMDCompatibilityTracker.isAssumed()) {
3349514c033dSJohannes Doerfert for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
3350514c033dSJohannes Doerfert if (!NonCompatibleI)
3351514c033dSJohannes Doerfert continue;
3352eef6601bSJoseph Huber
3353eef6601bSJoseph Huber // Skip diagnostics on calls to known OpenMP runtime functions for now.
3354eef6601bSJoseph Huber if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
3355eef6601bSJoseph Huber if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
3356eef6601bSJoseph Huber continue;
3357eef6601bSJoseph Huber
3358514c033dSJohannes Doerfert auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3359eef6601bSJoseph Huber ORA << "Value has potential side effects preventing SPMD-mode "
3360eef6601bSJoseph Huber "execution";
3361eef6601bSJoseph Huber if (isa<CallBase>(NonCompatibleI)) {
3362eef6601bSJoseph Huber ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
3363eef6601bSJoseph Huber "the called function to override";
3364514c033dSJohannes Doerfert }
3365514c033dSJohannes Doerfert return ORA << ".";
3366514c033dSJohannes Doerfert };
33672c31d5ebSJoseph Huber A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
33682c31d5ebSJoseph Huber Remark);
3369514c033dSJohannes Doerfert
3370514c033dSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
3371514c033dSJohannes Doerfert << *NonCompatibleI << "\n");
3372514c033dSJohannes Doerfert }
3373514c033dSJohannes Doerfert
3374514c033dSJohannes Doerfert return false;
3375514c033dSJohannes Doerfert }
3376514c033dSJohannes Doerfert
33774166738cSJohannes Doerfert // Get the actual kernel, could be the caller of the anchor scope if we have
33784166738cSJohannes Doerfert // a debug wrapper.
3379d61aac76SJohannes Doerfert Function *Kernel = getAnchorScope();
33804166738cSJohannes Doerfert if (Kernel->hasLocalLinkage()) {
33814166738cSJohannes Doerfert assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
33824166738cSJohannes Doerfert auto *CB = cast<CallBase>(Kernel->user_back());
33834166738cSJohannes Doerfert Kernel = CB->getCaller();
33844166738cSJohannes Doerfert }
33854166738cSJohannes Doerfert assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
33864166738cSJohannes Doerfert
33874166738cSJohannes Doerfert // Check if the kernel is already in SPMD mode, if so, return success.
3388d61aac76SJohannes Doerfert GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
3389d61aac76SJohannes Doerfert (Kernel->getName() + "_exec_mode").str());
3390d61aac76SJohannes Doerfert assert(ExecMode && "Kernel without exec mode?");
3391d61aac76SJohannes Doerfert assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
3392d61aac76SJohannes Doerfert
3393d61aac76SJohannes Doerfert // Set the global exec mode flag to indicate SPMD-Generic mode.
3394d61aac76SJohannes Doerfert assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
3395d61aac76SJohannes Doerfert "ExecMode is not an integer!");
3396d61aac76SJohannes Doerfert const int8_t ExecModeVal =
3397d61aac76SJohannes Doerfert cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
3398d61aac76SJohannes Doerfert if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
3399d61aac76SJohannes Doerfert return true;
3400d61aac76SJohannes Doerfert
3401d61aac76SJohannes Doerfert // We will now unconditionally modify the IR, indicate a change.
3402d61aac76SJohannes Doerfert Changed = ChangeStatus::CHANGED;
3403d61aac76SJohannes Doerfert
340429a3e3ddSGiorgis Georgakoudis auto CreateGuardedRegion = [&](Instruction *RegionStartI,
340529a3e3ddSGiorgis Georgakoudis Instruction *RegionEndI) {
340629a3e3ddSGiorgis Georgakoudis LoopInfo *LI = nullptr;
340729a3e3ddSGiorgis Georgakoudis DominatorTree *DT = nullptr;
340829a3e3ddSGiorgis Georgakoudis MemorySSAUpdater *MSU = nullptr;
340929a3e3ddSGiorgis Georgakoudis using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
341029a3e3ddSGiorgis Georgakoudis
341129a3e3ddSGiorgis Georgakoudis BasicBlock *ParentBB = RegionStartI->getParent();
341229a3e3ddSGiorgis Georgakoudis Function *Fn = ParentBB->getParent();
341329a3e3ddSGiorgis Georgakoudis Module &M = *Fn->getParent();
341429a3e3ddSGiorgis Georgakoudis
341529a3e3ddSGiorgis Georgakoudis // Create all the blocks and logic.
341629a3e3ddSGiorgis Georgakoudis // ParentBB:
341729a3e3ddSGiorgis Georgakoudis // goto RegionCheckTidBB
341829a3e3ddSGiorgis Georgakoudis // RegionCheckTidBB:
341929a3e3ddSGiorgis Georgakoudis // Tid = __kmpc_hardware_thread_id()
342029a3e3ddSGiorgis Georgakoudis // if (Tid != 0)
342129a3e3ddSGiorgis Georgakoudis // goto RegionBarrierBB
342229a3e3ddSGiorgis Georgakoudis // RegionStartBB:
342329a3e3ddSGiorgis Georgakoudis // <execute instructions guarded>
342429a3e3ddSGiorgis Georgakoudis // goto RegionEndBB
342529a3e3ddSGiorgis Georgakoudis // RegionEndBB:
342629a3e3ddSGiorgis Georgakoudis // <store escaping values to shared mem>
342729a3e3ddSGiorgis Georgakoudis // goto RegionBarrierBB
342829a3e3ddSGiorgis Georgakoudis // RegionBarrierBB:
342929a3e3ddSGiorgis Georgakoudis // __kmpc_simple_barrier_spmd()
343029a3e3ddSGiorgis Georgakoudis // // second barrier is omitted if lacking escaping values.
343129a3e3ddSGiorgis Georgakoudis // <load escaping values from shared mem>
343229a3e3ddSGiorgis Georgakoudis // __kmpc_simple_barrier_spmd()
343329a3e3ddSGiorgis Georgakoudis // goto RegionExitBB
343429a3e3ddSGiorgis Georgakoudis // RegionExitBB:
343529a3e3ddSGiorgis Georgakoudis // <execute rest of instructions>
343629a3e3ddSGiorgis Georgakoudis
343729a3e3ddSGiorgis Georgakoudis BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
343829a3e3ddSGiorgis Georgakoudis DT, LI, MSU, "region.guarded.end");
343929a3e3ddSGiorgis Georgakoudis BasicBlock *RegionBarrierBB =
344029a3e3ddSGiorgis Georgakoudis SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
344129a3e3ddSGiorgis Georgakoudis MSU, "region.barrier");
344229a3e3ddSGiorgis Georgakoudis BasicBlock *RegionExitBB =
344329a3e3ddSGiorgis Georgakoudis SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
344429a3e3ddSGiorgis Georgakoudis DT, LI, MSU, "region.exit");
344529a3e3ddSGiorgis Georgakoudis BasicBlock *RegionStartBB =
344629a3e3ddSGiorgis Georgakoudis SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
344729a3e3ddSGiorgis Georgakoudis
344829a3e3ddSGiorgis Georgakoudis assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
344929a3e3ddSGiorgis Georgakoudis "Expected a different CFG");
345029a3e3ddSGiorgis Georgakoudis
345129a3e3ddSGiorgis Georgakoudis BasicBlock *RegionCheckTidBB = SplitBlock(
345229a3e3ddSGiorgis Georgakoudis ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
345329a3e3ddSGiorgis Georgakoudis
345429a3e3ddSGiorgis Georgakoudis // Register basic blocks with the Attributor.
345529a3e3ddSGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*RegionEndBB);
345629a3e3ddSGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*RegionBarrierBB);
345729a3e3ddSGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*RegionExitBB);
345829a3e3ddSGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*RegionStartBB);
345929a3e3ddSGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
346029a3e3ddSGiorgis Georgakoudis
346129a3e3ddSGiorgis Georgakoudis bool HasBroadcastValues = false;
346229a3e3ddSGiorgis Georgakoudis // Find escaping outputs from the guarded region to outside users and
346329a3e3ddSGiorgis Georgakoudis // broadcast their values to them.
346429a3e3ddSGiorgis Georgakoudis for (Instruction &I : *RegionStartBB) {
346529a3e3ddSGiorgis Georgakoudis SmallPtrSet<Instruction *, 4> OutsideUsers;
346629a3e3ddSGiorgis Georgakoudis for (User *Usr : I.users()) {
346729a3e3ddSGiorgis Georgakoudis Instruction &UsrI = *cast<Instruction>(Usr);
346829a3e3ddSGiorgis Georgakoudis if (UsrI.getParent() != RegionStartBB)
346929a3e3ddSGiorgis Georgakoudis OutsideUsers.insert(&UsrI);
347029a3e3ddSGiorgis Georgakoudis }
347129a3e3ddSGiorgis Georgakoudis
347229a3e3ddSGiorgis Georgakoudis if (OutsideUsers.empty())
347329a3e3ddSGiorgis Georgakoudis continue;
347429a3e3ddSGiorgis Georgakoudis
347529a3e3ddSGiorgis Georgakoudis HasBroadcastValues = true;
347629a3e3ddSGiorgis Georgakoudis
347729a3e3ddSGiorgis Georgakoudis // Emit a global variable in shared memory to store the broadcasted
347829a3e3ddSGiorgis Georgakoudis // value.
347929a3e3ddSGiorgis Georgakoudis auto *SharedMem = new GlobalVariable(
348029a3e3ddSGiorgis Georgakoudis M, I.getType(), /* IsConstant */ false,
348129a3e3ddSGiorgis Georgakoudis GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
3482e6e440aeSJohannes Doerfert sanitizeForGlobalName(
3483e6e440aeSJohannes Doerfert (I.getName() + ".guarded.output.alloc").str()),
3484e6e440aeSJohannes Doerfert nullptr, GlobalValue::NotThreadLocal,
348529a3e3ddSGiorgis Georgakoudis static_cast<unsigned>(AddressSpace::Shared));
348629a3e3ddSGiorgis Georgakoudis
348729a3e3ddSGiorgis Georgakoudis // Emit a store instruction to update the value.
348829a3e3ddSGiorgis Georgakoudis new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
348929a3e3ddSGiorgis Georgakoudis
349029a3e3ddSGiorgis Georgakoudis LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
349129a3e3ddSGiorgis Georgakoudis I.getName() + ".guarded.output.load",
349229a3e3ddSGiorgis Georgakoudis RegionBarrierBB->getTerminator());
349329a3e3ddSGiorgis Georgakoudis
349429a3e3ddSGiorgis Georgakoudis // Emit a load instruction and replace uses of the output value.
3495e6e440aeSJohannes Doerfert for (Instruction *UsrI : OutsideUsers)
349629a3e3ddSGiorgis Georgakoudis UsrI->replaceUsesOfWith(&I, LoadI);
349729a3e3ddSGiorgis Georgakoudis }
349829a3e3ddSGiorgis Georgakoudis
349929a3e3ddSGiorgis Georgakoudis auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
350029a3e3ddSGiorgis Georgakoudis
350129a3e3ddSGiorgis Georgakoudis // Go to tid check BB in ParentBB.
350229a3e3ddSGiorgis Georgakoudis const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
350329a3e3ddSGiorgis Georgakoudis ParentBB->getTerminator()->eraseFromParent();
350429a3e3ddSGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc(
350529a3e3ddSGiorgis Georgakoudis InsertPointTy(ParentBB, ParentBB->end()), DL);
350629a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.updateToLocation(Loc);
3507944aa042SJohannes Doerfert uint32_t SrcLocStrSize;
3508944aa042SJohannes Doerfert auto *SrcLocStr =
3509944aa042SJohannes Doerfert OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3510944aa042SJohannes Doerfert Value *Ident =
3511944aa042SJohannes Doerfert OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
351229a3e3ddSGiorgis Georgakoudis BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
351329a3e3ddSGiorgis Georgakoudis
351429a3e3ddSGiorgis Georgakoudis // Add check for Tid in RegionCheckTidBB
351529a3e3ddSGiorgis Georgakoudis RegionCheckTidBB->getTerminator()->eraseFromParent();
351629a3e3ddSGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
351729a3e3ddSGiorgis Georgakoudis InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
351829a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
351929a3e3ddSGiorgis Georgakoudis FunctionCallee HardwareTidFn =
352029a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
352129a3e3ddSGiorgis Georgakoudis M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
352206cfdd52SJoseph Huber CallInst *Tid =
352329a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
352406cfdd52SJoseph Huber Tid->setDebugLoc(DL);
352506cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
352629a3e3ddSGiorgis Georgakoudis Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
352729a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.Builder
352829a3e3ddSGiorgis Georgakoudis .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
352929a3e3ddSGiorgis Georgakoudis ->setDebugLoc(DL);
353029a3e3ddSGiorgis Georgakoudis
353129a3e3ddSGiorgis Georgakoudis // First barrier for synchronization, ensures main thread has updated
353229a3e3ddSGiorgis Georgakoudis // values.
353329a3e3ddSGiorgis Georgakoudis FunctionCallee BarrierFn =
353429a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
353529a3e3ddSGiorgis Georgakoudis M, OMPRTL___kmpc_barrier_simple_spmd);
353629a3e3ddSGiorgis Georgakoudis OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
353729a3e3ddSGiorgis Georgakoudis RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
353806cfdd52SJoseph Huber CallInst *Barrier =
353906cfdd52SJoseph Huber OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
354006cfdd52SJoseph Huber Barrier->setDebugLoc(DL);
354106cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
354229a3e3ddSGiorgis Georgakoudis
354329a3e3ddSGiorgis Georgakoudis // Second barrier ensures workers have read broadcast values.
354406cfdd52SJoseph Huber if (HasBroadcastValues) {
354506cfdd52SJoseph Huber CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "",
354606cfdd52SJoseph Huber RegionBarrierBB->getTerminator());
354706cfdd52SJoseph Huber Barrier->setDebugLoc(DL);
354806cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
354906cfdd52SJoseph Huber }
355029a3e3ddSGiorgis Georgakoudis };
355129a3e3ddSGiorgis Georgakoudis
355299ea8ac9SJohannes Doerfert auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
355399ea8ac9SJohannes Doerfert SmallPtrSet<BasicBlock *, 8> Visited;
355499ea8ac9SJohannes Doerfert for (Instruction *GuardedI : SPMDCompatibilityTracker) {
355599ea8ac9SJohannes Doerfert BasicBlock *BB = GuardedI->getParent();
355699ea8ac9SJohannes Doerfert if (!Visited.insert(BB).second)
355799ea8ac9SJohannes Doerfert continue;
355899ea8ac9SJohannes Doerfert
355999ea8ac9SJohannes Doerfert SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
356099ea8ac9SJohannes Doerfert Instruction *LastEffect = nullptr;
356199ea8ac9SJohannes Doerfert BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
356299ea8ac9SJohannes Doerfert while (++IP != IPEnd) {
356399ea8ac9SJohannes Doerfert if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
356499ea8ac9SJohannes Doerfert continue;
356599ea8ac9SJohannes Doerfert Instruction *I = &*IP;
356699ea8ac9SJohannes Doerfert if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
356799ea8ac9SJohannes Doerfert continue;
356899ea8ac9SJohannes Doerfert if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
356999ea8ac9SJohannes Doerfert LastEffect = nullptr;
357099ea8ac9SJohannes Doerfert continue;
357199ea8ac9SJohannes Doerfert }
357299ea8ac9SJohannes Doerfert if (LastEffect)
357399ea8ac9SJohannes Doerfert Reorders.push_back({I, LastEffect});
357499ea8ac9SJohannes Doerfert LastEffect = &*IP;
357599ea8ac9SJohannes Doerfert }
357699ea8ac9SJohannes Doerfert for (auto &Reorder : Reorders)
357799ea8ac9SJohannes Doerfert Reorder.first->moveBefore(Reorder.second);
357899ea8ac9SJohannes Doerfert }
357999ea8ac9SJohannes Doerfert
358029a3e3ddSGiorgis Georgakoudis SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
358129a3e3ddSGiorgis Georgakoudis
358229a3e3ddSGiorgis Georgakoudis for (Instruction *GuardedI : SPMDCompatibilityTracker) {
358329a3e3ddSGiorgis Georgakoudis BasicBlock *BB = GuardedI->getParent();
358429a3e3ddSGiorgis Georgakoudis auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
358529a3e3ddSGiorgis Georgakoudis IRPosition::function(*GuardedI->getFunction()), nullptr,
358629a3e3ddSGiorgis Georgakoudis DepClassTy::NONE);
358729a3e3ddSGiorgis Georgakoudis assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
358829a3e3ddSGiorgis Georgakoudis auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
358929a3e3ddSGiorgis Georgakoudis // Continue if instruction is already guarded.
359029a3e3ddSGiorgis Georgakoudis if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
359129a3e3ddSGiorgis Georgakoudis continue;
359229a3e3ddSGiorgis Georgakoudis
359329a3e3ddSGiorgis Georgakoudis Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
359429a3e3ddSGiorgis Georgakoudis for (Instruction &I : *BB) {
359529a3e3ddSGiorgis Georgakoudis // If instruction I needs to be guarded update the guarded region
359629a3e3ddSGiorgis Georgakoudis // bounds.
359729a3e3ddSGiorgis Georgakoudis if (SPMDCompatibilityTracker.contains(&I)) {
359829a3e3ddSGiorgis Georgakoudis CalleeAAFunction.getGuardedInstructions().insert(&I);
359929a3e3ddSGiorgis Georgakoudis if (GuardedRegionStart)
360029a3e3ddSGiorgis Georgakoudis GuardedRegionEnd = &I;
360129a3e3ddSGiorgis Georgakoudis else
360229a3e3ddSGiorgis Georgakoudis GuardedRegionStart = GuardedRegionEnd = &I;
360329a3e3ddSGiorgis Georgakoudis
360429a3e3ddSGiorgis Georgakoudis continue;
360529a3e3ddSGiorgis Georgakoudis }
360629a3e3ddSGiorgis Georgakoudis
360729a3e3ddSGiorgis Georgakoudis // Instruction I does not need guarding, store
360829a3e3ddSGiorgis Georgakoudis // any region found and reset bounds.
360929a3e3ddSGiorgis Georgakoudis if (GuardedRegionStart) {
361029a3e3ddSGiorgis Georgakoudis GuardedRegions.push_back(
361129a3e3ddSGiorgis Georgakoudis std::make_pair(GuardedRegionStart, GuardedRegionEnd));
361229a3e3ddSGiorgis Georgakoudis GuardedRegionStart = nullptr;
361329a3e3ddSGiorgis Georgakoudis GuardedRegionEnd = nullptr;
361429a3e3ddSGiorgis Georgakoudis }
361529a3e3ddSGiorgis Georgakoudis }
361629a3e3ddSGiorgis Georgakoudis }
361729a3e3ddSGiorgis Georgakoudis
361829a3e3ddSGiorgis Georgakoudis for (auto &GR : GuardedRegions)
361929a3e3ddSGiorgis Georgakoudis CreateGuardedRegion(GR.first, GR.second);
362029a3e3ddSGiorgis Georgakoudis
3621514c033dSJohannes Doerfert // Adjust the global exec mode flag that tells the runtime what mode this
3622514c033dSJohannes Doerfert // kernel is executed in.
3623ca999f71SShilei Tian assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
3624ca999f71SShilei Tian "Initially non-SPMD kernel has SPMD exec mode!");
3625514c033dSJohannes Doerfert ExecMode->setInitializer(
3626ca999f71SShilei Tian ConstantInt::get(ExecMode->getInitializer()->getType(),
3627ca999f71SShilei Tian ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
3628514c033dSJohannes Doerfert
3629514c033dSJohannes Doerfert // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
3630423d34f7SShilei Tian const int InitModeArgNo = 1;
3631423d34f7SShilei Tian const int DeinitModeArgNo = 1;
3632514c033dSJohannes Doerfert const int InitUseStateMachineArgNo = 2;
3633e8439ec8SGiorgis Georgakoudis const int InitRequiresFullRuntimeArgNo = 3;
3634e8439ec8SGiorgis Georgakoudis const int DeinitRequiresFullRuntimeArgNo = 2;
3635514c033dSJohannes Doerfert
3636514c033dSJohannes Doerfert auto &Ctx = getAnchorValue().getContext();
3637423d34f7SShilei Tian A.changeUseAfterManifest(
3638423d34f7SShilei Tian KernelInitCB->getArgOperandUse(InitModeArgNo),
3639423d34f7SShilei Tian *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3640423d34f7SShilei Tian OMP_TGT_EXEC_MODE_SPMD));
3641514c033dSJohannes Doerfert A.changeUseAfterManifest(
3642514c033dSJohannes Doerfert KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
36432aed0813SKazu Hirata *ConstantInt::getBool(Ctx, false));
3644514c033dSJohannes Doerfert A.changeUseAfterManifest(
3645423d34f7SShilei Tian KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
3646423d34f7SShilei Tian *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3647423d34f7SShilei Tian OMP_TGT_EXEC_MODE_SPMD));
3648e8439ec8SGiorgis Georgakoudis A.changeUseAfterManifest(
3649e8439ec8SGiorgis Georgakoudis KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
36502aed0813SKazu Hirata *ConstantInt::getBool(Ctx, false));
3651e8439ec8SGiorgis Georgakoudis A.changeUseAfterManifest(
3652e8439ec8SGiorgis Georgakoudis KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
36532aed0813SKazu Hirata *ConstantInt::getBool(Ctx, false));
3654e8439ec8SGiorgis Georgakoudis
3655514c033dSJohannes Doerfert ++NumOpenMPTargetRegionKernelsSPMD;
3656514c033dSJohannes Doerfert
3657514c033dSJohannes Doerfert auto Remark = [&](OptimizationRemark OR) {
3658eef6601bSJoseph Huber return OR << "Transformed generic-mode kernel to SPMD-mode.";
3659514c033dSJohannes Doerfert };
36602c31d5ebSJoseph Huber A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
3661514c033dSJohannes Doerfert return true;
3662514c033dSJohannes Doerfert };
3663514c033dSJohannes Doerfert
buildCustomStateMachine__anon23c38c770111::AAKernelInfoFunction3664d9659bf6SJohannes Doerfert ChangeStatus buildCustomStateMachine(Attributor &A) {
3665cd0dd8ecSJoseph Huber // If we have disabled state machine rewrites, don't make a custom one
3666cd0dd8ecSJoseph Huber if (DisableOpenMPOptStateMachineRewrite)
3667c6457dcaSJohannes Doerfert return ChangeStatus::UNCHANGED;
3668cd0dd8ecSJoseph Huber
3669f074a6a0SJoseph Huber // Don't rewrite the state machine if we are not in a valid state.
3670f074a6a0SJoseph Huber if (!ReachedKnownParallelRegions.isValidState())
3671f074a6a0SJoseph Huber return ChangeStatus::UNCHANGED;
3672d9659bf6SJohannes Doerfert
3673423d34f7SShilei Tian const int InitModeArgNo = 1;
3674d9659bf6SJohannes Doerfert const int InitUseStateMachineArgNo = 2;
3675d9659bf6SJohannes Doerfert
3676d9659bf6SJohannes Doerfert // Check if the current configuration is non-SPMD and generic state machine.
3677d9659bf6SJohannes Doerfert // If we already have SPMD mode or a custom state machine we do not need to
3678d9659bf6SJohannes Doerfert // go any further. If it is anything but a constant something is weird and
3679d9659bf6SJohannes Doerfert // we give up.
3680d9659bf6SJohannes Doerfert ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
3681d9659bf6SJohannes Doerfert KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
3682423d34f7SShilei Tian ConstantInt *Mode =
3683423d34f7SShilei Tian dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3684d9659bf6SJohannes Doerfert
3685d9659bf6SJohannes Doerfert // If we are stuck with generic mode, try to create a custom device (=GPU)
3686d9659bf6SJohannes Doerfert // state machine which is specialized for the parallel regions that are
3687d9659bf6SJohannes Doerfert // reachable by the kernel.
3688423d34f7SShilei Tian if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
3689423d34f7SShilei Tian (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3690d9659bf6SJohannes Doerfert return ChangeStatus::UNCHANGED;
3691d9659bf6SJohannes Doerfert
3692514c033dSJohannes Doerfert // If not SPMD mode, indicate we use a custom state machine now.
3693d9659bf6SJohannes Doerfert auto &Ctx = getAnchorValue().getContext();
36942aed0813SKazu Hirata auto *FalseVal = ConstantInt::getBool(Ctx, false);
3695d9659bf6SJohannes Doerfert A.changeUseAfterManifest(
3696d9659bf6SJohannes Doerfert KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
3697d9659bf6SJohannes Doerfert
3698d9659bf6SJohannes Doerfert // If we don't actually need a state machine we are done here. This can
3699d9659bf6SJohannes Doerfert // happen if there simply are no parallel regions. In the resulting kernel
3700d9659bf6SJohannes Doerfert // all worker threads will simply exit right away, leaving the main thread
3701d9659bf6SJohannes Doerfert // to do the work alone.
37026b9a3ec3SJoseph Huber if (!mayContainParallelRegion()) {
3703d9659bf6SJohannes Doerfert ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
3704d9659bf6SJohannes Doerfert
3705d9659bf6SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) {
3706eef6601bSJoseph Huber return OR << "Removing unused state machine from generic-mode kernel.";
3707d9659bf6SJohannes Doerfert };
37082c31d5ebSJoseph Huber A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
3709d9659bf6SJohannes Doerfert
3710d9659bf6SJohannes Doerfert return ChangeStatus::CHANGED;
3711d9659bf6SJohannes Doerfert }
3712d9659bf6SJohannes Doerfert
3713d9659bf6SJohannes Doerfert // Keep track in the statistics of our new shiny custom state machine.
3714d9659bf6SJohannes Doerfert if (ReachedUnknownParallelRegions.empty()) {
3715d9659bf6SJohannes Doerfert ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
3716d9659bf6SJohannes Doerfert
3717d9659bf6SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) {
3718eef6601bSJoseph Huber return OR << "Rewriting generic-mode kernel with a customized state "
3719eef6601bSJoseph Huber "machine.";
3720d9659bf6SJohannes Doerfert };
37212c31d5ebSJoseph Huber A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
3722d9659bf6SJohannes Doerfert } else {
3723d9659bf6SJohannes Doerfert ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
3724d9659bf6SJohannes Doerfert
3725eef6601bSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis OR) {
3726d9659bf6SJohannes Doerfert return OR << "Generic-mode kernel is executed with a customized state "
3727eef6601bSJoseph Huber "machine that requires a fallback.";
3728d9659bf6SJohannes Doerfert };
37292c31d5ebSJoseph Huber A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
3730d9659bf6SJohannes Doerfert
3731d9659bf6SJohannes Doerfert // Tell the user why we ended up with a fallback.
3732d9659bf6SJohannes Doerfert for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
3733d9659bf6SJohannes Doerfert if (!UnknownParallelRegionCB)
3734d9659bf6SJohannes Doerfert continue;
3735d9659bf6SJohannes Doerfert auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3736eef6601bSJoseph Huber return ORA << "Call may contain unknown parallel regions. Use "
3737eef6601bSJoseph Huber << "`__attribute__((assume(\"omp_no_parallelism\")))` to "
3738eef6601bSJoseph Huber "override.";
3739d9659bf6SJohannes Doerfert };
37402c31d5ebSJoseph Huber A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
37412c31d5ebSJoseph Huber "OMP133", Remark);
3742d9659bf6SJohannes Doerfert }
3743d9659bf6SJohannes Doerfert }
3744d9659bf6SJohannes Doerfert
3745d9659bf6SJohannes Doerfert // Create all the blocks:
3746d9659bf6SJohannes Doerfert //
3747d9659bf6SJohannes Doerfert // InitCB = __kmpc_target_init(...)
3748c9dfe322SJoel E. Denny // BlockHwSize =
3749c9dfe322SJoel E. Denny // __kmpc_get_hardware_num_threads_in_block();
3750c9dfe322SJoel E. Denny // WarpSize = __kmpc_get_warp_size();
3751c9dfe322SJoel E. Denny // BlockSize = BlockHwSize - WarpSize;
3752f9c2d600SJohannes Doerfert // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
3753d9659bf6SJohannes Doerfert // if (IsWorker) {
3754f9c2d600SJohannes Doerfert // if (InitCB >= BlockSize) return;
375573720c80SJohannes Doerfert // SMBeginBB: __kmpc_barrier_simple_generic(...);
3756d9659bf6SJohannes Doerfert // void *WorkFn;
3757d9659bf6SJohannes Doerfert // bool Active = __kmpc_kernel_parallel(&WorkFn);
3758d9659bf6SJohannes Doerfert // if (!WorkFn) return;
3759d9659bf6SJohannes Doerfert // SMIsActiveCheckBB: if (Active) {
3760d9659bf6SJohannes Doerfert // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>)
3761d9659bf6SJohannes Doerfert // ParFn0(...);
3762d9659bf6SJohannes Doerfert // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>)
3763d9659bf6SJohannes Doerfert // ParFn1(...);
3764d9659bf6SJohannes Doerfert // ...
3765d9659bf6SJohannes Doerfert // SMIfCascadeCurrentBB: else
3766d9659bf6SJohannes Doerfert // ((WorkFnTy*)WorkFn)(...);
3767d9659bf6SJohannes Doerfert // SMEndParallelBB: __kmpc_kernel_end_parallel(...);
3768d9659bf6SJohannes Doerfert // }
376973720c80SJohannes Doerfert // SMDoneBB: __kmpc_barrier_simple_generic(...);
3770d9659bf6SJohannes Doerfert // goto SMBeginBB;
3771d9659bf6SJohannes Doerfert // }
3772d9659bf6SJohannes Doerfert // UserCodeEntryBB: // user code
3773d9659bf6SJohannes Doerfert // __kmpc_target_deinit(...)
3774d9659bf6SJohannes Doerfert //
3775d9659bf6SJohannes Doerfert Function *Kernel = getAssociatedFunction();
3776d9659bf6SJohannes Doerfert assert(Kernel && "Expected an associated function!");
3777d9659bf6SJohannes Doerfert
3778d9659bf6SJohannes Doerfert BasicBlock *InitBB = KernelInitCB->getParent();
3779d9659bf6SJohannes Doerfert BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
3780d9659bf6SJohannes Doerfert KernelInitCB->getNextNode(), "thread.user_code.check");
3781c9dfe322SJoel E. Denny BasicBlock *IsWorkerCheckBB =
3782c9dfe322SJoel E. Denny BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
3783d9659bf6SJohannes Doerfert BasicBlock *StateMachineBeginBB = BasicBlock::Create(
3784d9659bf6SJohannes Doerfert Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
3785d9659bf6SJohannes Doerfert BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
3786d9659bf6SJohannes Doerfert Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
3787d9659bf6SJohannes Doerfert BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
3788d9659bf6SJohannes Doerfert Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
3789d9659bf6SJohannes Doerfert BasicBlock *StateMachineIfCascadeCurrentBB =
3790d9659bf6SJohannes Doerfert BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3791d9659bf6SJohannes Doerfert Kernel, UserCodeEntryBB);
3792d9659bf6SJohannes Doerfert BasicBlock *StateMachineEndParallelBB =
3793d9659bf6SJohannes Doerfert BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
3794d9659bf6SJohannes Doerfert Kernel, UserCodeEntryBB);
3795d9659bf6SJohannes Doerfert BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
3796d9659bf6SJohannes Doerfert Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
37973f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*InitBB);
37983f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
3799c9dfe322SJoel E. Denny A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
38003f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
38013f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
38023f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
38033f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
38043f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
38053f71b425SGiorgis Georgakoudis A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
3806d9659bf6SJohannes Doerfert
3807d9659bf6SJohannes Doerfert const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
3808d9659bf6SJohannes Doerfert ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
3809d9659bf6SJohannes Doerfert InitBB->getTerminator()->eraseFromParent();
3810c9dfe322SJoel E. Denny
3811f9c2d600SJohannes Doerfert Instruction *IsWorker =
3812f9c2d600SJohannes Doerfert ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
3813f9c2d600SJohannes Doerfert ConstantInt::get(KernelInitCB->getType(), -1),
3814f9c2d600SJohannes Doerfert "thread.is_worker", InitBB);
3815f9c2d600SJohannes Doerfert IsWorker->setDebugLoc(DLoc);
3816f9c2d600SJohannes Doerfert BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
3817f9c2d600SJohannes Doerfert
3818c9dfe322SJoel E. Denny Module &M = *Kernel->getParent();
3819c9dfe322SJoel E. Denny auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3820c9dfe322SJoel E. Denny FunctionCallee BlockHwSizeFn =
3821c9dfe322SJoel E. Denny OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3822c9dfe322SJoel E. Denny M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
3823c9dfe322SJoel E. Denny FunctionCallee WarpSizeFn =
3824c9dfe322SJoel E. Denny OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3825c9dfe322SJoel E. Denny M, OMPRTL___kmpc_get_warp_size);
382606cfdd52SJoseph Huber CallInst *BlockHwSize =
3827f9c2d600SJohannes Doerfert CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
382806cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
3829c9dfe322SJoel E. Denny BlockHwSize->setDebugLoc(DLoc);
3830f9c2d600SJohannes Doerfert CallInst *WarpSize =
3831f9c2d600SJohannes Doerfert CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
383206cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
3833c9dfe322SJoel E. Denny WarpSize->setDebugLoc(DLoc);
3834f9c2d600SJohannes Doerfert Instruction *BlockSize = BinaryOperator::CreateSub(
3835f9c2d600SJohannes Doerfert BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
3836c9dfe322SJoel E. Denny BlockSize->setDebugLoc(DLoc);
3837f9c2d600SJohannes Doerfert Instruction *IsMainOrWorker = ICmpInst::Create(
3838f9c2d600SJohannes Doerfert ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
3839f9c2d600SJohannes Doerfert "thread.is_main_or_worker", IsWorkerCheckBB);
3840c9dfe322SJoel E. Denny IsMainOrWorker->setDebugLoc(DLoc);
3841f9c2d600SJohannes Doerfert BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
3842f9c2d600SJohannes Doerfert IsMainOrWorker, IsWorkerCheckBB);
384371052ea1SJon Chesterfield
3844d9659bf6SJohannes Doerfert // Create local storage for the work function pointer.
384571052ea1SJon Chesterfield const DataLayout &DL = M.getDataLayout();
3846d9659bf6SJohannes Doerfert Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
384771052ea1SJon Chesterfield Instruction *WorkFnAI =
384871052ea1SJon Chesterfield new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
384971052ea1SJon Chesterfield "worker.work_fn.addr", &Kernel->getEntryBlock().front());
3850d9659bf6SJohannes Doerfert WorkFnAI->setDebugLoc(DLoc);
3851d9659bf6SJohannes Doerfert
3852d9659bf6SJohannes Doerfert OMPInfoCache.OMPBuilder.updateToLocation(
3853d9659bf6SJohannes Doerfert OpenMPIRBuilder::LocationDescription(
3854d9659bf6SJohannes Doerfert IRBuilder<>::InsertPoint(StateMachineBeginBB,
3855d9659bf6SJohannes Doerfert StateMachineBeginBB->end()),
3856d9659bf6SJohannes Doerfert DLoc));
3857d9659bf6SJohannes Doerfert
3858d9659bf6SJohannes Doerfert Value *Ident = KernelInitCB->getArgOperand(0);
3859d9659bf6SJohannes Doerfert Value *GTid = KernelInitCB;
3860d9659bf6SJohannes Doerfert
3861d9659bf6SJohannes Doerfert FunctionCallee BarrierFn =
3862d9659bf6SJohannes Doerfert OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
386373720c80SJohannes Doerfert M, OMPRTL___kmpc_barrier_simple_generic);
386406cfdd52SJoseph Huber CallInst *Barrier =
386506cfdd52SJoseph Huber CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
386606cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
386706cfdd52SJoseph Huber Barrier->setDebugLoc(DLoc);
3868d9659bf6SJohannes Doerfert
386971052ea1SJon Chesterfield if (WorkFnAI->getType()->getPointerAddressSpace() !=
387071052ea1SJon Chesterfield (unsigned int)AddressSpace::Generic) {
387171052ea1SJon Chesterfield WorkFnAI = new AddrSpaceCastInst(
387271052ea1SJon Chesterfield WorkFnAI,
387371052ea1SJon Chesterfield PointerType::getWithSamePointeeType(
387471052ea1SJon Chesterfield cast<PointerType>(WorkFnAI->getType()),
387571052ea1SJon Chesterfield (unsigned int)AddressSpace::Generic),
387671052ea1SJon Chesterfield WorkFnAI->getName() + ".generic", StateMachineBeginBB);
387771052ea1SJon Chesterfield WorkFnAI->setDebugLoc(DLoc);
387871052ea1SJon Chesterfield }
387971052ea1SJon Chesterfield
3880d9659bf6SJohannes Doerfert FunctionCallee KernelParallelFn =
3881d9659bf6SJohannes Doerfert OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3882d9659bf6SJohannes Doerfert M, OMPRTL___kmpc_kernel_parallel);
388306cfdd52SJoseph Huber CallInst *IsActiveWorker = CallInst::Create(
3884d9659bf6SJohannes Doerfert KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
388506cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
3886d9659bf6SJohannes Doerfert IsActiveWorker->setDebugLoc(DLoc);
3887d9659bf6SJohannes Doerfert Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
3888d9659bf6SJohannes Doerfert StateMachineBeginBB);
3889d9659bf6SJohannes Doerfert WorkFn->setDebugLoc(DLoc);
3890d9659bf6SJohannes Doerfert
3891d9659bf6SJohannes Doerfert FunctionType *ParallelRegionFnTy = FunctionType::get(
3892d9659bf6SJohannes Doerfert Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
3893d9659bf6SJohannes Doerfert false);
3894d9659bf6SJohannes Doerfert Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
3895d9659bf6SJohannes Doerfert WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
3896d9659bf6SJohannes Doerfert StateMachineBeginBB);
3897d9659bf6SJohannes Doerfert
3898d9659bf6SJohannes Doerfert Instruction *IsDone =
3899d9659bf6SJohannes Doerfert ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
3900d9659bf6SJohannes Doerfert Constant::getNullValue(VoidPtrTy), "worker.is_done",
3901d9659bf6SJohannes Doerfert StateMachineBeginBB);
3902d9659bf6SJohannes Doerfert IsDone->setDebugLoc(DLoc);
3903d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
3904d9659bf6SJohannes Doerfert IsDone, StateMachineBeginBB)
3905d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3906d9659bf6SJohannes Doerfert
3907d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineIfCascadeCurrentBB,
3908d9659bf6SJohannes Doerfert StateMachineDoneBarrierBB, IsActiveWorker,
3909d9659bf6SJohannes Doerfert StateMachineIsActiveCheckBB)
3910d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3911d9659bf6SJohannes Doerfert
3912d9659bf6SJohannes Doerfert Value *ZeroArg =
3913d9659bf6SJohannes Doerfert Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
3914d9659bf6SJohannes Doerfert
3915d9659bf6SJohannes Doerfert // Now that we have most of the CFG skeleton it is time for the if-cascade
3916d9659bf6SJohannes Doerfert // that checks the function pointer we got from the runtime against the
3917d9659bf6SJohannes Doerfert // parallel regions we expect, if there are any.
3918c11ebfeaSJoseph Huber for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
3919c11ebfeaSJoseph Huber auto *ParallelRegion = ReachedKnownParallelRegions[I];
3920d9659bf6SJohannes Doerfert BasicBlock *PRExecuteBB = BasicBlock::Create(
3921d9659bf6SJohannes Doerfert Ctx, "worker_state_machine.parallel_region.execute", Kernel,
3922d9659bf6SJohannes Doerfert StateMachineEndParallelBB);
3923d9659bf6SJohannes Doerfert CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
3924d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3925d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
3926d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3927d9659bf6SJohannes Doerfert
3928d9659bf6SJohannes Doerfert BasicBlock *PRNextBB =
3929d9659bf6SJohannes Doerfert BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3930d9659bf6SJohannes Doerfert Kernel, StateMachineEndParallelBB);
3931d9659bf6SJohannes Doerfert
3932d9659bf6SJohannes Doerfert // Check if we need to compare the pointer at all or if we can just
3933d9659bf6SJohannes Doerfert // call the parallel region function.
3934d9659bf6SJohannes Doerfert Value *IsPR;
3935c11ebfeaSJoseph Huber if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
3936d9659bf6SJohannes Doerfert Instruction *CmpI = ICmpInst::Create(
3937d9659bf6SJohannes Doerfert ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
3938d9659bf6SJohannes Doerfert "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
3939d9659bf6SJohannes Doerfert CmpI->setDebugLoc(DLoc);
3940d9659bf6SJohannes Doerfert IsPR = CmpI;
3941d9659bf6SJohannes Doerfert } else {
3942d9659bf6SJohannes Doerfert IsPR = ConstantInt::getTrue(Ctx);
3943d9659bf6SJohannes Doerfert }
3944d9659bf6SJohannes Doerfert
3945d9659bf6SJohannes Doerfert BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
3946d9659bf6SJohannes Doerfert StateMachineIfCascadeCurrentBB)
3947d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3948d9659bf6SJohannes Doerfert StateMachineIfCascadeCurrentBB = PRNextBB;
3949d9659bf6SJohannes Doerfert }
3950d9659bf6SJohannes Doerfert
3951d9659bf6SJohannes Doerfert // At the end of the if-cascade we place the indirect function pointer call
3952d9659bf6SJohannes Doerfert // in case we might need it, that is if there can be parallel regions we
3953d9659bf6SJohannes Doerfert // have not handled in the if-cascade above.
3954d9659bf6SJohannes Doerfert if (!ReachedUnknownParallelRegions.empty()) {
3955d9659bf6SJohannes Doerfert StateMachineIfCascadeCurrentBB->setName(
3956d9659bf6SJohannes Doerfert "worker_state_machine.parallel_region.fallback.execute");
3957d9659bf6SJohannes Doerfert CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
3958d9659bf6SJohannes Doerfert StateMachineIfCascadeCurrentBB)
3959d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3960d9659bf6SJohannes Doerfert }
3961d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineEndParallelBB,
3962d9659bf6SJohannes Doerfert StateMachineIfCascadeCurrentBB)
3963d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3964d9659bf6SJohannes Doerfert
396506cfdd52SJoseph Huber FunctionCallee EndParallelFn =
396606cfdd52SJoseph Huber OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
396706cfdd52SJoseph Huber M, OMPRTL___kmpc_kernel_end_parallel);
396806cfdd52SJoseph Huber CallInst *EndParallel =
396906cfdd52SJoseph Huber CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
397006cfdd52SJoseph Huber OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
397106cfdd52SJoseph Huber EndParallel->setDebugLoc(DLoc);
3972d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
3973d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3974d9659bf6SJohannes Doerfert
3975d9659bf6SJohannes Doerfert CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
3976d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3977d9659bf6SJohannes Doerfert BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
3978d9659bf6SJohannes Doerfert ->setDebugLoc(DLoc);
3979d9659bf6SJohannes Doerfert
3980d9659bf6SJohannes Doerfert return ChangeStatus::CHANGED;
3981d9659bf6SJohannes Doerfert }
3982d9659bf6SJohannes Doerfert
3983d9659bf6SJohannes Doerfert /// Fixpoint iteration update function. Will be called every time a dependence
3984d9659bf6SJohannes Doerfert /// changed its state (and in the beginning).
updateImpl__anon23c38c770111::AAKernelInfoFunction3985d9659bf6SJohannes Doerfert ChangeStatus updateImpl(Attributor &A) override {
3986d9659bf6SJohannes Doerfert KernelInfoState StateBefore = getState();
3987d9659bf6SJohannes Doerfert
3988514c033dSJohannes Doerfert // Callback to check a read/write instruction.
3989514c033dSJohannes Doerfert auto CheckRWInst = [&](Instruction &I) {
3990514c033dSJohannes Doerfert // We handle calls later.
3991514c033dSJohannes Doerfert if (isa<CallBase>(I))
3992514c033dSJohannes Doerfert return true;
3993514c033dSJohannes Doerfert // We only care about write effects.
3994514c033dSJohannes Doerfert if (!I.mayWriteToMemory())
3995514c033dSJohannes Doerfert return true;
3996514c033dSJohannes Doerfert if (auto *SI = dyn_cast<StoreInst>(&I)) {
3997514c033dSJohannes Doerfert SmallVector<const Value *> Objects;
3998514c033dSJohannes Doerfert getUnderlyingObjects(SI->getPointerOperand(), Objects);
3999514c033dSJohannes Doerfert if (llvm::all_of(Objects,
4000514c033dSJohannes Doerfert [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
4001514c033dSJohannes Doerfert return true;
400229a3e3ddSGiorgis Georgakoudis // Check for AAHeapToStack moved objects which must not be guarded.
400329a3e3ddSGiorgis Georgakoudis auto &HS = A.getAAFor<AAHeapToStack>(
400429a3e3ddSGiorgis Georgakoudis *this, IRPosition::function(*I.getFunction()),
4005e6e440aeSJohannes Doerfert DepClassTy::OPTIONAL);
400629a3e3ddSGiorgis Georgakoudis if (llvm::all_of(Objects, [&HS](const Value *Obj) {
400729a3e3ddSGiorgis Georgakoudis auto *CB = dyn_cast<CallBase>(Obj);
400829a3e3ddSGiorgis Georgakoudis if (!CB)
400929a3e3ddSGiorgis Georgakoudis return false;
401029a3e3ddSGiorgis Georgakoudis return HS.isAssumedHeapToStack(*CB);
401129a3e3ddSGiorgis Georgakoudis })) {
401229a3e3ddSGiorgis Georgakoudis return true;
4013514c033dSJohannes Doerfert }
401429a3e3ddSGiorgis Georgakoudis }
401529a3e3ddSGiorgis Georgakoudis
401629a3e3ddSGiorgis Georgakoudis // Insert instruction that needs guarding.
4017514c033dSJohannes Doerfert SPMDCompatibilityTracker.insert(&I);
4018514c033dSJohannes Doerfert return true;
4019514c033dSJohannes Doerfert };
4020792aac98SJohannes Doerfert
4021792aac98SJohannes Doerfert bool UsedAssumedInformationInCheckRWInst = false;
402297387fdfSJohannes Doerfert if (!SPMDCompatibilityTracker.isAtFixpoint())
4023792aac98SJohannes Doerfert if (!A.checkForAllReadWriteInstructions(
4024792aac98SJohannes Doerfert CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
4025514c033dSJohannes Doerfert SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4026514c033dSJohannes Doerfert
4027058c312aSJoseph Huber bool UsedAssumedInformationFromReachingKernels = false;
4028e97e0a4fSShilei Tian if (!IsKernelEntry) {
4029e97e0a4fSShilei Tian updateParallelLevels(A);
403029a3e3ddSGiorgis Georgakoudis
4031058c312aSJoseph Huber bool AllReachingKernelsKnown = true;
4032058c312aSJoseph Huber updateReachingKernelEntries(A, AllReachingKernelsKnown);
4033058c312aSJoseph Huber UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
4034058c312aSJoseph Huber
403529a3e3ddSGiorgis Georgakoudis if (!ParallelLevels.isValidState())
403629a3e3ddSGiorgis Georgakoudis SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4037058c312aSJoseph Huber else if (!ReachingKernelEntries.isValidState())
4038058c312aSJoseph Huber SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4039058c312aSJoseph Huber else if (!SPMDCompatibilityTracker.empty()) {
4040058c312aSJoseph Huber // Check if all reaching kernels agree on the mode as we can otherwise
4041058c312aSJoseph Huber // not guard instructions. We might not be sure about the mode so we
4042058c312aSJoseph Huber // we cannot fix the internal spmd-zation state either.
4043058c312aSJoseph Huber int SPMD = 0, Generic = 0;
4044058c312aSJoseph Huber for (auto *Kernel : ReachingKernelEntries) {
4045058c312aSJoseph Huber auto &CBAA = A.getAAFor<AAKernelInfo>(
4046058c312aSJoseph Huber *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
4047058c312aSJoseph Huber if (CBAA.SPMDCompatibilityTracker.isValidState() &&
4048058c312aSJoseph Huber CBAA.SPMDCompatibilityTracker.isAssumed())
4049058c312aSJoseph Huber ++SPMD;
4050058c312aSJoseph Huber else
4051058c312aSJoseph Huber ++Generic;
4052058c312aSJoseph Huber if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
4053058c312aSJoseph Huber UsedAssumedInformationFromReachingKernels = true;
4054058c312aSJoseph Huber }
4055058c312aSJoseph Huber if (SPMD != 0 && Generic != 0)
4056058c312aSJoseph Huber SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4057058c312aSJoseph Huber }
4058e97e0a4fSShilei Tian }
4059ca662297SShilei Tian
4060d9659bf6SJohannes Doerfert // Callback to check a call instruction.
4061d61aac76SJohannes Doerfert bool AllParallelRegionStatesWereFixed = true;
406297387fdfSJohannes Doerfert bool AllSPMDStatesWereFixed = true;
4063d9659bf6SJohannes Doerfert auto CheckCallInst = [&](Instruction &I) {
4064d9659bf6SJohannes Doerfert auto &CB = cast<CallBase>(I);
4065d9659bf6SJohannes Doerfert auto &CBAA = A.getAAFor<AAKernelInfo>(
4066d9659bf6SJohannes Doerfert *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4067d9659bf6SJohannes Doerfert getState() ^= CBAA.getState();
406897387fdfSJohannes Doerfert AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
4069d61aac76SJohannes Doerfert AllParallelRegionStatesWereFixed &=
4070d61aac76SJohannes Doerfert CBAA.ReachedKnownParallelRegions.isAtFixpoint();
4071d61aac76SJohannes Doerfert AllParallelRegionStatesWereFixed &=
4072d61aac76SJohannes Doerfert CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
4073d9659bf6SJohannes Doerfert return true;
4074d9659bf6SJohannes Doerfert };
4075d9659bf6SJohannes Doerfert
4076792aac98SJohannes Doerfert bool UsedAssumedInformationInCheckCallInst = false;
4077792aac98SJohannes Doerfert if (!A.checkForAllCallLikeInstructions(
4078c6457dcaSJohannes Doerfert CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
4079e6e440aeSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG
4080e6e440aeSJohannes Doerfert << "Failed to visit all call-like instructions!\n";);
4081d9659bf6SJohannes Doerfert return indicatePessimisticFixpoint();
4082c6457dcaSJohannes Doerfert }
4083d9659bf6SJohannes Doerfert
4084d61aac76SJohannes Doerfert // If we haven't used any assumed information for the reached parallel
4085d61aac76SJohannes Doerfert // region states we can fix it.
4086d61aac76SJohannes Doerfert if (!UsedAssumedInformationInCheckCallInst &&
4087d61aac76SJohannes Doerfert AllParallelRegionStatesWereFixed) {
4088d61aac76SJohannes Doerfert ReachedKnownParallelRegions.indicateOptimisticFixpoint();
4089d61aac76SJohannes Doerfert ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
4090d61aac76SJohannes Doerfert }
4091d61aac76SJohannes Doerfert
4092d61aac76SJohannes Doerfert // If we are sure there are no parallel regions in the kernel we do not
4093d61aac76SJohannes Doerfert // want SPMD mode.
4094d61aac76SJohannes Doerfert if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
4095d61aac76SJohannes Doerfert ReachedKnownParallelRegions.isAtFixpoint() &&
4096d61aac76SJohannes Doerfert ReachedUnknownParallelRegions.isValidState() &&
4097d61aac76SJohannes Doerfert ReachedKnownParallelRegions.isValidState() &&
4098d61aac76SJohannes Doerfert !mayContainParallelRegion())
4099d61aac76SJohannes Doerfert SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4100d61aac76SJohannes Doerfert
410197387fdfSJohannes Doerfert // If we haven't used any assumed information for the SPMD state we can fix
410297387fdfSJohannes Doerfert // it.
410397387fdfSJohannes Doerfert if (!UsedAssumedInformationInCheckRWInst &&
4104058c312aSJoseph Huber !UsedAssumedInformationInCheckCallInst &&
4105058c312aSJoseph Huber !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
410697387fdfSJohannes Doerfert SPMDCompatibilityTracker.indicateOptimisticFixpoint();
410797387fdfSJohannes Doerfert
4108d9659bf6SJohannes Doerfert return StateBefore == getState() ? ChangeStatus::UNCHANGED
4109d9659bf6SJohannes Doerfert : ChangeStatus::CHANGED;
4110d9659bf6SJohannes Doerfert }
4111ca662297SShilei Tian
4112ca662297SShilei Tian private:
4113ca662297SShilei Tian /// Update info regarding reaching kernels.
updateReachingKernelEntries__anon23c38c770111::AAKernelInfoFunction4114058c312aSJoseph Huber void updateReachingKernelEntries(Attributor &A,
4115058c312aSJoseph Huber bool &AllReachingKernelsKnown) {
4116ca662297SShilei Tian auto PredCallSite = [&](AbstractCallSite ACS) {
4117ca662297SShilei Tian Function *Caller = ACS.getInstruction()->getFunction();
4118ca662297SShilei Tian
4119ca662297SShilei Tian assert(Caller && "Caller is nullptr");
4120ca662297SShilei Tian
4121d3454ee8SShilei Tian auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
4122d3454ee8SShilei Tian IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
4123ca662297SShilei Tian if (CAA.ReachingKernelEntries.isValidState()) {
4124ca662297SShilei Tian ReachingKernelEntries ^= CAA.ReachingKernelEntries;
4125ca662297SShilei Tian return true;
4126ca662297SShilei Tian }
4127ca662297SShilei Tian
4128ca662297SShilei Tian // We lost track of the caller of the associated function, any kernel
4129ca662297SShilei Tian // could reach now.
4130ca662297SShilei Tian ReachingKernelEntries.indicatePessimisticFixpoint();
4131ca662297SShilei Tian
4132ca662297SShilei Tian return true;
4133ca662297SShilei Tian };
4134ca662297SShilei Tian
4135ca662297SShilei Tian if (!A.checkForAllCallSites(PredCallSite, *this,
4136ca662297SShilei Tian true /* RequireAllCallSites */,
4137058c312aSJoseph Huber AllReachingKernelsKnown))
4138ca662297SShilei Tian ReachingKernelEntries.indicatePessimisticFixpoint();
4139ca662297SShilei Tian }
4140e97e0a4fSShilei Tian
4141e97e0a4fSShilei Tian /// Update info regarding parallel levels.
updateParallelLevels__anon23c38c770111::AAKernelInfoFunction4142e97e0a4fSShilei Tian void updateParallelLevels(Attributor &A) {
4143e97e0a4fSShilei Tian auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4144e97e0a4fSShilei Tian OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
4145e97e0a4fSShilei Tian OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
4146e97e0a4fSShilei Tian
4147e97e0a4fSShilei Tian auto PredCallSite = [&](AbstractCallSite ACS) {
4148e97e0a4fSShilei Tian Function *Caller = ACS.getInstruction()->getFunction();
4149e97e0a4fSShilei Tian
4150e97e0a4fSShilei Tian assert(Caller && "Caller is nullptr");
4151e97e0a4fSShilei Tian
4152e97e0a4fSShilei Tian auto &CAA =
4153e97e0a4fSShilei Tian A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
4154e97e0a4fSShilei Tian if (CAA.ParallelLevels.isValidState()) {
4155e97e0a4fSShilei Tian // Any function that is called by `__kmpc_parallel_51` will not be
4156e97e0a4fSShilei Tian // folded as the parallel level in the function is updated. In order to
4157e97e0a4fSShilei Tian // get it right, all the analysis would depend on the implentation. That
4158e97e0a4fSShilei Tian // said, if in the future any change to the implementation, the analysis
4159e97e0a4fSShilei Tian // could be wrong. As a consequence, we are just conservative here.
4160e97e0a4fSShilei Tian if (Caller == Parallel51RFI.Declaration) {
4161e97e0a4fSShilei Tian ParallelLevels.indicatePessimisticFixpoint();
4162e97e0a4fSShilei Tian return true;
4163e97e0a4fSShilei Tian }
4164e97e0a4fSShilei Tian
4165e97e0a4fSShilei Tian ParallelLevels ^= CAA.ParallelLevels;
4166e97e0a4fSShilei Tian
4167e97e0a4fSShilei Tian return true;
4168e97e0a4fSShilei Tian }
4169e97e0a4fSShilei Tian
4170e97e0a4fSShilei Tian // We lost track of the caller of the associated function, any kernel
4171e97e0a4fSShilei Tian // could reach now.
4172e97e0a4fSShilei Tian ParallelLevels.indicatePessimisticFixpoint();
4173e97e0a4fSShilei Tian
4174e97e0a4fSShilei Tian return true;
4175e97e0a4fSShilei Tian };
4176e97e0a4fSShilei Tian
4177e97e0a4fSShilei Tian bool AllCallSitesKnown = true;
4178e97e0a4fSShilei Tian if (!A.checkForAllCallSites(PredCallSite, *this,
4179e97e0a4fSShilei Tian true /* RequireAllCallSites */,
4180e97e0a4fSShilei Tian AllCallSitesKnown))
4181e97e0a4fSShilei Tian ParallelLevels.indicatePessimisticFixpoint();
4182e97e0a4fSShilei Tian }
4183d9659bf6SJohannes Doerfert };
4184d9659bf6SJohannes Doerfert
4185d9659bf6SJohannes Doerfert /// The call site kernel info abstract attribute, basically, what can we say
4186d9659bf6SJohannes Doerfert /// about a call site with regards to the KernelInfoState. For now this simply
4187d9659bf6SJohannes Doerfert /// forwards the information from the callee.
4188d9659bf6SJohannes Doerfert struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite__anon23c38c770111::AAKernelInfoCallSite4189d9659bf6SJohannes Doerfert AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
4190d9659bf6SJohannes Doerfert : AAKernelInfo(IRP, A) {}
4191d9659bf6SJohannes Doerfert
4192d9659bf6SJohannes Doerfert /// See AbstractAttribute::initialize(...).
initialize__anon23c38c770111::AAKernelInfoCallSite4193d9659bf6SJohannes Doerfert void initialize(Attributor &A) override {
4194d9659bf6SJohannes Doerfert AAKernelInfo::initialize(A);
4195d9659bf6SJohannes Doerfert
4196d9659bf6SJohannes Doerfert CallBase &CB = cast<CallBase>(getAssociatedValue());
4197d9659bf6SJohannes Doerfert Function *Callee = getAssociatedFunction();
4198d9659bf6SJohannes Doerfert
4199e52937ebSJoseph Huber auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
4200e52937ebSJoseph Huber *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4201d9659bf6SJohannes Doerfert
4202514c033dSJohannes Doerfert // Check for SPMD-mode assumptions.
4203e52937ebSJoseph Huber if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
4204514c033dSJohannes Doerfert SPMDCompatibilityTracker.indicateOptimisticFixpoint();
42059e2fc0baSJoseph Huber indicateOptimisticFixpoint();
42069e2fc0baSJoseph Huber }
4207514c033dSJohannes Doerfert
4208d9659bf6SJohannes Doerfert // First weed out calls we do not care about, that is readonly/readnone
4209d9659bf6SJohannes Doerfert // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
4210d9659bf6SJohannes Doerfert // parallel region or anything else we are looking for.
4211d9659bf6SJohannes Doerfert if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
4212d9659bf6SJohannes Doerfert indicateOptimisticFixpoint();
4213d9659bf6SJohannes Doerfert return;
4214d9659bf6SJohannes Doerfert }
4215d9659bf6SJohannes Doerfert
4216d9659bf6SJohannes Doerfert // Next we check if we know the callee. If it is a known OpenMP function
4217d9659bf6SJohannes Doerfert // we will handle them explicitly in the switch below. If it is not, we
4218d9659bf6SJohannes Doerfert // will use an AAKernelInfo object on the callee to gather information and
4219d9659bf6SJohannes Doerfert // merge that into the current state. The latter happens in the updateImpl.
4220d9659bf6SJohannes Doerfert auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4221d9659bf6SJohannes Doerfert const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4222d9659bf6SJohannes Doerfert if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4223d9659bf6SJohannes Doerfert // Unknown caller or declarations are not analyzable, we give up.
4224d9659bf6SJohannes Doerfert if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
4225d9659bf6SJohannes Doerfert
4226d9659bf6SJohannes Doerfert // Unknown callees might contain parallel regions, except if they have
4227d9659bf6SJohannes Doerfert // an appropriate assumption attached.
4228e52937ebSJoseph Huber if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
4229e52937ebSJoseph Huber AssumptionAA.hasAssumption("omp_no_parallelism")))
4230d9659bf6SJohannes Doerfert ReachedUnknownParallelRegions.insert(&CB);
4231d9659bf6SJohannes Doerfert
4232514c033dSJohannes Doerfert // If SPMDCompatibilityTracker is not fixed, we need to give up on the
4233514c033dSJohannes Doerfert // idea we can run something unknown in SPMD-mode.
423429a3e3ddSGiorgis Georgakoudis if (!SPMDCompatibilityTracker.isAtFixpoint()) {
423529a3e3ddSGiorgis Georgakoudis SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4236514c033dSJohannes Doerfert SPMDCompatibilityTracker.insert(&CB);
423729a3e3ddSGiorgis Georgakoudis }
4238514c033dSJohannes Doerfert
4239d9659bf6SJohannes Doerfert // We have updated the state for this unknown call properly, there won't
4240d9659bf6SJohannes Doerfert // be any change so we indicate a fixpoint.
4241d9659bf6SJohannes Doerfert indicateOptimisticFixpoint();
4242d9659bf6SJohannes Doerfert }
4243d9659bf6SJohannes Doerfert // If the callee is known and can be used in IPO, we will update the state
4244d9659bf6SJohannes Doerfert // based on the callee state in updateImpl.
4245d9659bf6SJohannes Doerfert return;
4246d9659bf6SJohannes Doerfert }
4247d9659bf6SJohannes Doerfert
4248d9659bf6SJohannes Doerfert const unsigned int WrapperFunctionArgNo = 6;
4249d9659bf6SJohannes Doerfert RuntimeFunction RF = It->getSecond();
4250d9659bf6SJohannes Doerfert switch (RF) {
4251514c033dSJohannes Doerfert // All the functions we know are compatible with SPMD mode.
4252514c033dSJohannes Doerfert case OMPRTL___kmpc_is_spmd_exec_mode:
425387ce7e65SJoseph Huber case OMPRTL___kmpc_distribute_static_fini:
4254514c033dSJohannes Doerfert case OMPRTL___kmpc_for_static_fini:
4255514c033dSJohannes Doerfert case OMPRTL___kmpc_global_thread_num:
42565ab6aeddSJose M Monsalve Diaz case OMPRTL___kmpc_get_hardware_num_threads_in_block:
42575ab6aeddSJose M Monsalve Diaz case OMPRTL___kmpc_get_hardware_num_blocks:
4258514c033dSJohannes Doerfert case OMPRTL___kmpc_single:
4259514c033dSJohannes Doerfert case OMPRTL___kmpc_end_single:
4260514c033dSJohannes Doerfert case OMPRTL___kmpc_master:
4261514c033dSJohannes Doerfert case OMPRTL___kmpc_end_master:
4262514c033dSJohannes Doerfert case OMPRTL___kmpc_barrier:
4263744aa09fSJoseph Huber case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
4264744aa09fSJoseph Huber case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
4265744aa09fSJoseph Huber case OMPRTL___kmpc_nvptx_end_reduce_nowait:
4266514c033dSJohannes Doerfert break;
426787ce7e65SJoseph Huber case OMPRTL___kmpc_distribute_static_init_4:
426887ce7e65SJoseph Huber case OMPRTL___kmpc_distribute_static_init_4u:
426987ce7e65SJoseph Huber case OMPRTL___kmpc_distribute_static_init_8:
427087ce7e65SJoseph Huber case OMPRTL___kmpc_distribute_static_init_8u:
4271514c033dSJohannes Doerfert case OMPRTL___kmpc_for_static_init_4:
4272514c033dSJohannes Doerfert case OMPRTL___kmpc_for_static_init_4u:
4273514c033dSJohannes Doerfert case OMPRTL___kmpc_for_static_init_8:
4274514c033dSJohannes Doerfert case OMPRTL___kmpc_for_static_init_8u: {
4275514c033dSJohannes Doerfert // Check the schedule and allow static schedule in SPMD mode.
4276514c033dSJohannes Doerfert unsigned ScheduleArgOpNo = 2;
4277514c033dSJohannes Doerfert auto *ScheduleTypeCI =
4278514c033dSJohannes Doerfert dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
4279514c033dSJohannes Doerfert unsigned ScheduleTypeVal =
4280514c033dSJohannes Doerfert ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
4281514c033dSJohannes Doerfert switch (OMPScheduleType(ScheduleTypeVal)) {
42822d92ee97SMichael Kruse case OMPScheduleType::UnorderedStatic:
42832d92ee97SMichael Kruse case OMPScheduleType::UnorderedStaticChunked:
42842d92ee97SMichael Kruse case OMPScheduleType::OrderedDistribute:
42852d92ee97SMichael Kruse case OMPScheduleType::OrderedDistributeChunked:
4286514c033dSJohannes Doerfert break;
4287514c033dSJohannes Doerfert default:
428829a3e3ddSGiorgis Georgakoudis SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4289514c033dSJohannes Doerfert SPMDCompatibilityTracker.insert(&CB);
4290514c033dSJohannes Doerfert break;
4291514c033dSJohannes Doerfert };
4292514c033dSJohannes Doerfert } break;
4293d9659bf6SJohannes Doerfert case OMPRTL___kmpc_target_init:
4294d9659bf6SJohannes Doerfert KernelInitCB = &CB;
4295d9659bf6SJohannes Doerfert break;
4296d9659bf6SJohannes Doerfert case OMPRTL___kmpc_target_deinit:
4297d9659bf6SJohannes Doerfert KernelDeinitCB = &CB;
4298d9659bf6SJohannes Doerfert break;
4299d9659bf6SJohannes Doerfert case OMPRTL___kmpc_parallel_51:
4300d9659bf6SJohannes Doerfert if (auto *ParallelRegion = dyn_cast<Function>(
4301d9659bf6SJohannes Doerfert CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
4302d9659bf6SJohannes Doerfert ReachedKnownParallelRegions.insert(ParallelRegion);
4303d9659bf6SJohannes Doerfert break;
4304d9659bf6SJohannes Doerfert }
4305d9659bf6SJohannes Doerfert // The condition above should usually get the parallel region function
4306d9659bf6SJohannes Doerfert // pointer and record it. In the off chance it doesn't we assume the
4307d9659bf6SJohannes Doerfert // worst.
4308d9659bf6SJohannes Doerfert ReachedUnknownParallelRegions.insert(&CB);
4309d9659bf6SJohannes Doerfert break;
4310d9659bf6SJohannes Doerfert case OMPRTL___kmpc_omp_task:
4311d9659bf6SJohannes Doerfert // We do not look into tasks right now, just give up.
43129ea5b972SJoseph Huber SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4313514c033dSJohannes Doerfert SPMDCompatibilityTracker.insert(&CB);
4314d9659bf6SJohannes Doerfert ReachedUnknownParallelRegions.insert(&CB);
4315c6457dcaSJohannes Doerfert break;
4316f8c40ed8SGiorgis Georgakoudis case OMPRTL___kmpc_alloc_shared:
4317f8c40ed8SGiorgis Georgakoudis case OMPRTL___kmpc_free_shared:
4318f8c40ed8SGiorgis Georgakoudis // Return without setting a fixpoint, to be resolved in updateImpl.
4319f8c40ed8SGiorgis Georgakoudis return;
4320d9659bf6SJohannes Doerfert default:
4321514c033dSJohannes Doerfert // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
4322c6457dcaSJohannes Doerfert // generally. However, they do not hide parallel regions.
43239ea5b972SJoseph Huber SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4324514c033dSJohannes Doerfert SPMDCompatibilityTracker.insert(&CB);
4325c6457dcaSJohannes Doerfert break;
4326d9659bf6SJohannes Doerfert }
4327d9659bf6SJohannes Doerfert // All other OpenMP runtime calls will not reach parallel regions so they
4328d9659bf6SJohannes Doerfert // can be safely ignored for now. Since it is a known OpenMP runtime call we
4329d9659bf6SJohannes Doerfert // have now modeled all effects and there is no need for any update.
4330d9659bf6SJohannes Doerfert indicateOptimisticFixpoint();
4331d9659bf6SJohannes Doerfert }
4332d9659bf6SJohannes Doerfert
updateImpl__anon23c38c770111::AAKernelInfoCallSite4333d9659bf6SJohannes Doerfert ChangeStatus updateImpl(Attributor &A) override {
4334d9659bf6SJohannes Doerfert // TODO: Once we have call site specific value information we can provide
4335d9659bf6SJohannes Doerfert // call site specific liveness information and then it makes
4336d9659bf6SJohannes Doerfert // sense to specialize attributes for call sites arguments instead of
4337d9659bf6SJohannes Doerfert // redirecting requests to the callee argument.
4338d9659bf6SJohannes Doerfert Function *F = getAssociatedFunction();
4339f8c40ed8SGiorgis Georgakoudis
4340f8c40ed8SGiorgis Georgakoudis auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4341f8c40ed8SGiorgis Georgakoudis const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
4342f8c40ed8SGiorgis Georgakoudis
4343f8c40ed8SGiorgis Georgakoudis // If F is not a runtime function, propagate the AAKernelInfo of the callee.
4344f8c40ed8SGiorgis Georgakoudis if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4345d9659bf6SJohannes Doerfert const IRPosition &FnPos = IRPosition::function(*F);
4346d9659bf6SJohannes Doerfert auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
4347d9659bf6SJohannes Doerfert if (getState() == FnAA.getState())
4348d9659bf6SJohannes Doerfert return ChangeStatus::UNCHANGED;
4349d9659bf6SJohannes Doerfert getState() = FnAA.getState();
4350d9659bf6SJohannes Doerfert return ChangeStatus::CHANGED;
4351d9659bf6SJohannes Doerfert }
4352f8c40ed8SGiorgis Georgakoudis
4353f8c40ed8SGiorgis Georgakoudis // F is a runtime function that allocates or frees memory, check
4354f8c40ed8SGiorgis Georgakoudis // AAHeapToStack and AAHeapToShared.
4355f8c40ed8SGiorgis Georgakoudis KernelInfoState StateBefore = getState();
4356f8c40ed8SGiorgis Georgakoudis assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||
4357f8c40ed8SGiorgis Georgakoudis It->getSecond() == OMPRTL___kmpc_free_shared) &&
4358f8c40ed8SGiorgis Georgakoudis "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
4359f8c40ed8SGiorgis Georgakoudis
4360f8c40ed8SGiorgis Georgakoudis CallBase &CB = cast<CallBase>(getAssociatedValue());
4361f8c40ed8SGiorgis Georgakoudis
4362f8c40ed8SGiorgis Georgakoudis auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
4363f8c40ed8SGiorgis Georgakoudis *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4364f8c40ed8SGiorgis Georgakoudis auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
4365f8c40ed8SGiorgis Georgakoudis *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4366f8c40ed8SGiorgis Georgakoudis
4367f8c40ed8SGiorgis Georgakoudis RuntimeFunction RF = It->getSecond();
4368f8c40ed8SGiorgis Georgakoudis
4369f8c40ed8SGiorgis Georgakoudis switch (RF) {
4370f8c40ed8SGiorgis Georgakoudis // If neither HeapToStack nor HeapToShared assume the call is removed,
4371f8c40ed8SGiorgis Georgakoudis // assume SPMD incompatibility.
4372f8c40ed8SGiorgis Georgakoudis case OMPRTL___kmpc_alloc_shared:
4373f8c40ed8SGiorgis Georgakoudis if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
4374f8c40ed8SGiorgis Georgakoudis !HeapToSharedAA.isAssumedHeapToShared(CB))
4375f8c40ed8SGiorgis Georgakoudis SPMDCompatibilityTracker.insert(&CB);
4376f8c40ed8SGiorgis Georgakoudis break;
4377f8c40ed8SGiorgis Georgakoudis case OMPRTL___kmpc_free_shared:
4378f8c40ed8SGiorgis Georgakoudis if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
4379f8c40ed8SGiorgis Georgakoudis !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
4380f8c40ed8SGiorgis Georgakoudis SPMDCompatibilityTracker.insert(&CB);
4381f8c40ed8SGiorgis Georgakoudis break;
4382f8c40ed8SGiorgis Georgakoudis default:
43839ea5b972SJoseph Huber SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4384f8c40ed8SGiorgis Georgakoudis SPMDCompatibilityTracker.insert(&CB);
4385f8c40ed8SGiorgis Georgakoudis }
4386f8c40ed8SGiorgis Georgakoudis
4387f8c40ed8SGiorgis Georgakoudis return StateBefore == getState() ? ChangeStatus::UNCHANGED
4388f8c40ed8SGiorgis Georgakoudis : ChangeStatus::CHANGED;
4389f8c40ed8SGiorgis Georgakoudis }
4390d9659bf6SJohannes Doerfert };
4391d9659bf6SJohannes Doerfert
4392ca662297SShilei Tian struct AAFoldRuntimeCall
4393ca662297SShilei Tian : public StateWrapper<BooleanState, AbstractAttribute> {
4394ca662297SShilei Tian using Base = StateWrapper<BooleanState, AbstractAttribute>;
4395ca662297SShilei Tian
AAFoldRuntimeCall__anon23c38c770111::AAFoldRuntimeCall4396ca662297SShilei Tian AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4397ca662297SShilei Tian
4398ca662297SShilei Tian /// Statistics are tracked as part of manifest for now.
trackStatistics__anon23c38c770111::AAFoldRuntimeCall4399ca662297SShilei Tian void trackStatistics() const override {}
4400ca662297SShilei Tian
4401ca662297SShilei Tian /// Create an abstract attribute biew for the position \p IRP.
4402ca662297SShilei Tian static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
4403ca662297SShilei Tian Attributor &A);
4404ca662297SShilei Tian
4405ca662297SShilei Tian /// See AbstractAttribute::getName()
getName__anon23c38c770111::AAFoldRuntimeCall4406ca662297SShilei Tian const std::string getName() const override { return "AAFoldRuntimeCall"; }
4407ca662297SShilei Tian
4408ca662297SShilei Tian /// See AbstractAttribute::getIdAddr()
getIdAddr__anon23c38c770111::AAFoldRuntimeCall4409ca662297SShilei Tian const char *getIdAddr() const override { return &ID; }
4410ca662297SShilei Tian
4411ca662297SShilei Tian /// This function should return true if the type of the \p AA is
4412ca662297SShilei Tian /// AAFoldRuntimeCall
classof__anon23c38c770111::AAFoldRuntimeCall4413ca662297SShilei Tian static bool classof(const AbstractAttribute *AA) {
4414ca662297SShilei Tian return (AA->getIdAddr() == &ID);
4415ca662297SShilei Tian }
4416ca662297SShilei Tian
4417ca662297SShilei Tian static const char ID;
4418ca662297SShilei Tian };
4419ca662297SShilei Tian
4420ca662297SShilei Tian struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4421ca662297SShilei Tian AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
4422ca662297SShilei Tian : AAFoldRuntimeCall(IRP, A) {}
4423ca662297SShilei Tian
4424ca662297SShilei Tian /// See AbstractAttribute::getAsStr()
getAsStr__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4425ca662297SShilei Tian const std::string getAsStr() const override {
4426ca662297SShilei Tian if (!isValidState())
4427ca662297SShilei Tian return "<invalid>";
4428ca662297SShilei Tian
4429ca662297SShilei Tian std::string Str("simplified value: ");
4430ca662297SShilei Tian
4431a7938c74SKazu Hirata if (!SimplifiedValue)
4432ca662297SShilei Tian return Str + std::string("none");
4433ca662297SShilei Tian
4434611ffcf4SKazu Hirata if (!SimplifiedValue.value())
4435ca662297SShilei Tian return Str + std::string("nullptr");
4436ca662297SShilei Tian
4437611ffcf4SKazu Hirata if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.value()))
4438ca662297SShilei Tian return Str + std::to_string(CI->getSExtValue());
4439ca662297SShilei Tian
4440ca662297SShilei Tian return Str + std::string("unknown");
4441ca662297SShilei Tian }
4442ca662297SShilei Tian
initialize__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4443ca662297SShilei Tian void initialize(Attributor &A) override {
4444cd0dd8ecSJoseph Huber if (DisableOpenMPOptFolding)
4445cd0dd8ecSJoseph Huber indicatePessimisticFixpoint();
4446cd0dd8ecSJoseph Huber
4447ca662297SShilei Tian Function *Callee = getAssociatedFunction();
4448ca662297SShilei Tian
4449ca662297SShilei Tian auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4450ca662297SShilei Tian const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4451ca662297SShilei Tian assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&
4452ca662297SShilei Tian "Expected a known OpenMP runtime function");
4453ca662297SShilei Tian
4454ca662297SShilei Tian RFKind = It->getSecond();
4455ca662297SShilei Tian
4456ca662297SShilei Tian CallBase &CB = cast<CallBase>(getAssociatedValue());
4457ca662297SShilei Tian A.registerSimplificationCallback(
4458ca662297SShilei Tian IRPosition::callsite_returned(CB),
4459ca662297SShilei Tian [&](const IRPosition &IRP, const AbstractAttribute *AA,
4460ca662297SShilei Tian bool &UsedAssumedInformation) -> Optional<Value *> {
4461a7938c74SKazu Hirata assert((isValidState() ||
4462611ffcf4SKazu Hirata (SimplifiedValue && SimplifiedValue.value() == nullptr)) &&
4463ca662297SShilei Tian "Unexpected invalid state!");
4464ca662297SShilei Tian
4465ca662297SShilei Tian if (!isAtFixpoint()) {
4466ca662297SShilei Tian UsedAssumedInformation = true;
4467ca662297SShilei Tian if (AA)
4468ca662297SShilei Tian A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
4469ca662297SShilei Tian }
4470ca662297SShilei Tian return SimplifiedValue;
4471ca662297SShilei Tian });
4472ca662297SShilei Tian }
4473ca662297SShilei Tian
updateImpl__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4474ca662297SShilei Tian ChangeStatus updateImpl(Attributor &A) override {
4475ca662297SShilei Tian ChangeStatus Changed = ChangeStatus::UNCHANGED;
4476ca662297SShilei Tian switch (RFKind) {
4477ca662297SShilei Tian case OMPRTL___kmpc_is_spmd_exec_mode:
4478c23da666SShilei Tian Changed |= foldIsSPMDExecMode(A);
4479ca662297SShilei Tian break;
4480196fe994SJoseph Huber case OMPRTL___kmpc_is_generic_main_thread_id:
4481196fe994SJoseph Huber Changed |= foldIsGenericMainThread(A);
4482196fe994SJoseph Huber break;
4483e97e0a4fSShilei Tian case OMPRTL___kmpc_parallel_level:
4484e97e0a4fSShilei Tian Changed |= foldParallelLevel(A);
4485e97e0a4fSShilei Tian break;
44865ab6aeddSJose M Monsalve Diaz case OMPRTL___kmpc_get_hardware_num_threads_in_block:
44875ab6aeddSJose M Monsalve Diaz Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
44885ab6aeddSJose M Monsalve Diaz break;
44895ab6aeddSJose M Monsalve Diaz case OMPRTL___kmpc_get_hardware_num_blocks:
44905ab6aeddSJose M Monsalve Diaz Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
44915ab6aeddSJose M Monsalve Diaz break;
4492ca662297SShilei Tian default:
4493ca662297SShilei Tian llvm_unreachable("Unhandled OpenMP runtime function!");
4494ca662297SShilei Tian }
4495ca662297SShilei Tian
4496ca662297SShilei Tian return Changed;
4497ca662297SShilei Tian }
4498ca662297SShilei Tian
manifest__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4499ca662297SShilei Tian ChangeStatus manifest(Attributor &A) override {
4500ca662297SShilei Tian ChangeStatus Changed = ChangeStatus::UNCHANGED;
4501ca662297SShilei Tian
4502e0e687a6SKazu Hirata if (SimplifiedValue && *SimplifiedValue) {
45037eb899cbSJoseph Huber Instruction &I = *getCtxI();
45047a07b88fSJohannes Doerfert A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
45057eb899cbSJoseph Huber A.deleteAfterManifest(I);
4506196fe994SJoseph Huber
45077eb899cbSJoseph Huber CallBase *CB = dyn_cast<CallBase>(&I);
45087eb899cbSJoseph Huber auto Remark = [&](OptimizationRemark OR) {
45097eb899cbSJoseph Huber if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
45107eb899cbSJoseph Huber return OR << "Replacing OpenMP runtime call "
45117eb899cbSJoseph Huber << CB->getCalledFunction()->getName() << " with "
45127eb899cbSJoseph Huber << ore::NV("FoldedValue", C->getZExtValue()) << ".";
45137eb899cbSJoseph Huber return OR << "Replacing OpenMP runtime call "
45147eb899cbSJoseph Huber << CB->getCalledFunction()->getName() << ".";
45157eb899cbSJoseph Huber };
45167eb899cbSJoseph Huber
45177eb899cbSJoseph Huber if (CB && EnableVerboseRemarks)
45187eb899cbSJoseph Huber A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
45197eb899cbSJoseph Huber
45207eb899cbSJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
4521196fe994SJoseph Huber << **SimplifiedValue << "\n");
4522196fe994SJoseph Huber
4523ca662297SShilei Tian Changed = ChangeStatus::CHANGED;
4524ca662297SShilei Tian }
4525ca662297SShilei Tian
4526ca662297SShilei Tian return Changed;
4527ca662297SShilei Tian }
4528ca662297SShilei Tian
indicatePessimisticFixpoint__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4529ca662297SShilei Tian ChangeStatus indicatePessimisticFixpoint() override {
4530ca662297SShilei Tian SimplifiedValue = nullptr;
4531ca662297SShilei Tian return AAFoldRuntimeCall::indicatePessimisticFixpoint();
4532ca662297SShilei Tian }
4533ca662297SShilei Tian
4534ca662297SShilei Tian private:
4535ca662297SShilei Tian /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
foldIsSPMDExecMode__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4536ca662297SShilei Tian ChangeStatus foldIsSPMDExecMode(Attributor &A) {
4537ca662297SShilei Tian Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4538ca662297SShilei Tian
4539ca662297SShilei Tian unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4540ca662297SShilei Tian unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4541ca662297SShilei Tian auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4542ca662297SShilei Tian *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4543ca662297SShilei Tian
4544ca662297SShilei Tian if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4545ca662297SShilei Tian return indicatePessimisticFixpoint();
4546ca662297SShilei Tian
4547ca662297SShilei Tian for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4548ca662297SShilei Tian auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4549ca662297SShilei Tian DepClassTy::REQUIRED);
4550ca662297SShilei Tian
4551ca662297SShilei Tian if (!AA.isValidState()) {
4552ca662297SShilei Tian SimplifiedValue = nullptr;
4553ca662297SShilei Tian return indicatePessimisticFixpoint();
4554ca662297SShilei Tian }
4555ca662297SShilei Tian
4556ca662297SShilei Tian if (AA.SPMDCompatibilityTracker.isAssumed()) {
4557ca662297SShilei Tian if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4558ca662297SShilei Tian ++KnownSPMDCount;
4559ca662297SShilei Tian else
4560ca662297SShilei Tian ++AssumedSPMDCount;
4561ca662297SShilei Tian } else {
4562ca662297SShilei Tian if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4563ca662297SShilei Tian ++KnownNonSPMDCount;
4564ca662297SShilei Tian else
4565ca662297SShilei Tian ++AssumedNonSPMDCount;
4566ca662297SShilei Tian }
4567ca662297SShilei Tian }
4568ca662297SShilei Tian
4569ae69f468SShilei Tian if ((AssumedSPMDCount + KnownSPMDCount) &&
4570ae69f468SShilei Tian (AssumedNonSPMDCount + KnownNonSPMDCount))
4571ca662297SShilei Tian return indicatePessimisticFixpoint();
4572ca662297SShilei Tian
4573ca662297SShilei Tian auto &Ctx = getAnchorValue().getContext();
4574ca662297SShilei Tian if (KnownSPMDCount || AssumedSPMDCount) {
4575ca662297SShilei Tian assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
4576ca662297SShilei Tian "Expected only SPMD kernels!");
4577ca662297SShilei Tian // All reaching kernels are in SPMD mode. Update all function calls to
4578ca662297SShilei Tian // __kmpc_is_spmd_exec_mode to 1.
4579ca662297SShilei Tian SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4580d3454ee8SShilei Tian } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
4581ca662297SShilei Tian assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
4582ca662297SShilei Tian "Expected only non-SPMD kernels!");
4583ca662297SShilei Tian // All reaching kernels are in non-SPMD mode. Update all function
4584ca662297SShilei Tian // calls to __kmpc_is_spmd_exec_mode to 0.
4585ca662297SShilei Tian SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
4586d3454ee8SShilei Tian } else {
4587d3454ee8SShilei Tian // We have empty reaching kernels, therefore we cannot tell if the
4588d3454ee8SShilei Tian // associated call site can be folded. At this moment, SimplifiedValue
4589d3454ee8SShilei Tian // must be none.
45905413bf1bSKazu Hirata assert(!SimplifiedValue && "SimplifiedValue should be none");
4591ca662297SShilei Tian }
4592ca662297SShilei Tian
4593ca662297SShilei Tian return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4594ca662297SShilei Tian : ChangeStatus::CHANGED;
4595ca662297SShilei Tian }
4596ca662297SShilei Tian
4597196fe994SJoseph Huber /// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
foldIsGenericMainThread__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4598196fe994SJoseph Huber ChangeStatus foldIsGenericMainThread(Attributor &A) {
4599196fe994SJoseph Huber Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4600196fe994SJoseph Huber
4601196fe994SJoseph Huber CallBase &CB = cast<CallBase>(getAssociatedValue());
4602196fe994SJoseph Huber Function *F = CB.getFunction();
4603196fe994SJoseph Huber const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
4604196fe994SJoseph Huber *this, IRPosition::function(*F), DepClassTy::REQUIRED);
4605196fe994SJoseph Huber
4606196fe994SJoseph Huber if (!ExecutionDomainAA.isValidState())
4607196fe994SJoseph Huber return indicatePessimisticFixpoint();
4608196fe994SJoseph Huber
4609196fe994SJoseph Huber auto &Ctx = getAnchorValue().getContext();
4610196fe994SJoseph Huber if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
4611196fe994SJoseph Huber SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4612196fe994SJoseph Huber else
4613196fe994SJoseph Huber return indicatePessimisticFixpoint();
4614196fe994SJoseph Huber
4615196fe994SJoseph Huber return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4616196fe994SJoseph Huber : ChangeStatus::CHANGED;
4617196fe994SJoseph Huber }
4618196fe994SJoseph Huber
4619e97e0a4fSShilei Tian /// Fold __kmpc_parallel_level into a constant if possible.
foldParallelLevel__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned4620e97e0a4fSShilei Tian ChangeStatus foldParallelLevel(Attributor &A) {
4621e97e0a4fSShilei Tian Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4622e97e0a4fSShilei Tian
4623e97e0a4fSShilei Tian auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4624e97e0a4fSShilei Tian *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4625e97e0a4fSShilei Tian
4626e97e0a4fSShilei Tian if (!CallerKernelInfoAA.ParallelLevels.isValidState())
4627e97e0a4fSShilei Tian return indicatePessimisticFixpoint();
4628e97e0a4fSShilei Tian
4629e97e0a4fSShilei Tian if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4630e97e0a4fSShilei Tian return indicatePessimisticFixpoint();
4631e97e0a4fSShilei Tian
4632e97e0a4fSShilei Tian if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
46335413bf1bSKazu Hirata assert(!SimplifiedValue &&
4634e97e0a4fSShilei Tian "SimplifiedValue should keep none at this point");
4635e97e0a4fSShilei Tian return ChangeStatus::UNCHANGED;
4636e97e0a4fSShilei Tian }
4637e97e0a4fSShilei Tian
4638e97e0a4fSShilei Tian unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4639e97e0a4fSShilei Tian unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4640e97e0a4fSShilei Tian for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4641e97e0a4fSShilei Tian auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4642e97e0a4fSShilei Tian DepClassTy::REQUIRED);
4643e97e0a4fSShilei Tian if (!AA.SPMDCompatibilityTracker.isValidState())
4644e97e0a4fSShilei Tian return indicatePessimisticFixpoint();
4645e97e0a4fSShilei Tian
4646e97e0a4fSShilei Tian if (AA.SPMDCompatibilityTracker.isAssumed()) {
4647e97e0a4fSShilei Tian if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4648e97e0a4fSShilei Tian ++KnownSPMDCount;
4649e97e0a4fSShilei Tian else
4650e97e0a4fSShilei Tian ++AssumedSPMDCount;
4651e97e0a4fSShilei Tian } else {
4652e97e0a4fSShilei Tian if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4653e97e0a4fSShilei Tian ++KnownNonSPMDCount;
4654e97e0a4fSShilei Tian else
4655e97e0a4fSShilei Tian ++AssumedNonSPMDCount;
4656e97e0a4fSShilei Tian }
4657e97e0a4fSShilei Tian }
4658e97e0a4fSShilei Tian
4659e97e0a4fSShilei Tian if ((AssumedSPMDCount + KnownSPMDCount) &&
4660e97e0a4fSShilei Tian (AssumedNonSPMDCount + KnownNonSPMDCount))
4661e97e0a4fSShilei Tian return indicatePessimisticFixpoint();
4662e97e0a4fSShilei Tian
4663e97e0a4fSShilei Tian auto &Ctx = getAnchorValue().getContext();
4664e97e0a4fSShilei Tian // If the caller can only be reached by SPMD kernel entries, the parallel
4665e97e0a4fSShilei Tian // level is 1. Similarly, if the caller can only be reached by non-SPMD
4666e97e0a4fSShilei Tian // kernel entries, it is 0.
4667e97e0a4fSShilei Tian if (AssumedSPMDCount || KnownSPMDCount) {
4668e97e0a4fSShilei Tian assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
4669e97e0a4fSShilei Tian "Expected only SPMD kernels!");
4670e97e0a4fSShilei Tian SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
4671e97e0a4fSShilei Tian } else {
4672e97e0a4fSShilei Tian assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
4673e97e0a4fSShilei Tian "Expected only non-SPMD kernels!");
4674e97e0a4fSShilei Tian SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
4675e97e0a4fSShilei Tian }
46765ab6aeddSJose M Monsalve Diaz return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
46775ab6aeddSJose M Monsalve Diaz : ChangeStatus::CHANGED;
46785ab6aeddSJose M Monsalve Diaz }
4679e97e0a4fSShilei Tian
foldKernelFnAttribute__anon23c38c770111::AAFoldRuntimeCallCallSiteReturned46805ab6aeddSJose M Monsalve Diaz ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
46815ab6aeddSJose M Monsalve Diaz // Specialize only if all the calls agree with the attribute constant value
46825ab6aeddSJose M Monsalve Diaz int32_t CurrentAttrValue = -1;
46835ab6aeddSJose M Monsalve Diaz Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
46845ab6aeddSJose M Monsalve Diaz
46855ab6aeddSJose M Monsalve Diaz auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
46865ab6aeddSJose M Monsalve Diaz *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
46875ab6aeddSJose M Monsalve Diaz
46885ab6aeddSJose M Monsalve Diaz if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
46895ab6aeddSJose M Monsalve Diaz return indicatePessimisticFixpoint();
46905ab6aeddSJose M Monsalve Diaz
46915ab6aeddSJose M Monsalve Diaz // Iterate over the kernels that reach this function
46925ab6aeddSJose M Monsalve Diaz for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
46935ab6aeddSJose M Monsalve Diaz int32_t NextAttrVal = -1;
46945ab6aeddSJose M Monsalve Diaz if (K->hasFnAttribute(Attr))
46955ab6aeddSJose M Monsalve Diaz NextAttrVal =
46965ab6aeddSJose M Monsalve Diaz std::stoi(K->getFnAttribute(Attr).getValueAsString().str());
46975ab6aeddSJose M Monsalve Diaz
46985ab6aeddSJose M Monsalve Diaz if (NextAttrVal == -1 ||
46995ab6aeddSJose M Monsalve Diaz (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
47005ab6aeddSJose M Monsalve Diaz return indicatePessimisticFixpoint();
47015ab6aeddSJose M Monsalve Diaz CurrentAttrValue = NextAttrVal;
47025ab6aeddSJose M Monsalve Diaz }
47035ab6aeddSJose M Monsalve Diaz
47045ab6aeddSJose M Monsalve Diaz if (CurrentAttrValue != -1) {
47055ab6aeddSJose M Monsalve Diaz auto &Ctx = getAnchorValue().getContext();
47065ab6aeddSJose M Monsalve Diaz SimplifiedValue =
47075ab6aeddSJose M Monsalve Diaz ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
47085ab6aeddSJose M Monsalve Diaz }
4709e97e0a4fSShilei Tian return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4710e97e0a4fSShilei Tian : ChangeStatus::CHANGED;
4711e97e0a4fSShilei Tian }
4712e97e0a4fSShilei Tian
4713ca662297SShilei Tian /// An optional value the associated value is assumed to fold to. That is, we
4714ca662297SShilei Tian /// assume the associated value (which is a call) can be replaced by this
4715ca662297SShilei Tian /// simplified value.
4716ca662297SShilei Tian Optional<Value *> SimplifiedValue;
4717ca662297SShilei Tian
4718ca662297SShilei Tian /// The runtime function kind of the callee of the associated call site.
4719ca662297SShilei Tian RuntimeFunction RFKind;
4720ca662297SShilei Tian };
4721ca662297SShilei Tian
47229548b74aSJohannes Doerfert } // namespace
47239548b74aSJohannes Doerfert
47245ab6aeddSJose M Monsalve Diaz /// Register folding callsite
registerFoldRuntimeCall(RuntimeFunction RF)47255ab6aeddSJose M Monsalve Diaz void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
47265ab6aeddSJose M Monsalve Diaz auto &RFI = OMPInfoCache.RFIs[RF];
47275ab6aeddSJose M Monsalve Diaz RFI.foreachUse(SCC, [&](Use &U, Function &F) {
47285ab6aeddSJose M Monsalve Diaz CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
47295ab6aeddSJose M Monsalve Diaz if (!CI)
47305ab6aeddSJose M Monsalve Diaz return false;
47315ab6aeddSJose M Monsalve Diaz A.getOrCreateAAFor<AAFoldRuntimeCall>(
47325ab6aeddSJose M Monsalve Diaz IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
47335ab6aeddSJose M Monsalve Diaz DepClassTy::NONE, /* ForceUpdate */ false,
47345ab6aeddSJose M Monsalve Diaz /* UpdateAfterInit */ false);
47355ab6aeddSJose M Monsalve Diaz return false;
47365ab6aeddSJose M Monsalve Diaz });
47375ab6aeddSJose M Monsalve Diaz }
47385ab6aeddSJose M Monsalve Diaz
registerAAs(bool IsModulePass)4739d9659bf6SJohannes Doerfert void OpenMPOpt::registerAAs(bool IsModulePass) {
4740d9659bf6SJohannes Doerfert if (SCC.empty())
4741d9659bf6SJohannes Doerfert return;
47424166738cSJohannes Doerfert
4743d9659bf6SJohannes Doerfert if (IsModulePass) {
4744d9659bf6SJohannes Doerfert // Ensure we create the AAKernelInfo AAs first and without triggering an
4745d9659bf6SJohannes Doerfert // update. This will make sure we register all value simplification
4746d9659bf6SJohannes Doerfert // callbacks before any other AA has the chance to create an AAValueSimplify
4747d9659bf6SJohannes Doerfert // or similar.
47484166738cSJohannes Doerfert auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
4749d9659bf6SJohannes Doerfert A.getOrCreateAAFor<AAKernelInfo>(
47504166738cSJohannes Doerfert IRPosition::function(Kernel), /* QueryingAA */ nullptr,
4751d9659bf6SJohannes Doerfert DepClassTy::NONE, /* ForceUpdate */ false,
4752d9659bf6SJohannes Doerfert /* UpdateAfterInit */ false);
47534166738cSJohannes Doerfert return false;
47544166738cSJohannes Doerfert };
47554166738cSJohannes Doerfert OMPInformationCache::RuntimeFunctionInfo &InitRFI =
47564166738cSJohannes Doerfert OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
47574166738cSJohannes Doerfert InitRFI.foreachUse(SCC, CreateKernelInfoCB);
4758ca662297SShilei Tian
47595ab6aeddSJose M Monsalve Diaz registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
47605ab6aeddSJose M Monsalve Diaz registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
47615ab6aeddSJose M Monsalve Diaz registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
47625ab6aeddSJose M Monsalve Diaz registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
47635ab6aeddSJose M Monsalve Diaz registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
4764d9659bf6SJohannes Doerfert }
4765d9659bf6SJohannes Doerfert
4766d9659bf6SJohannes Doerfert // Create CallSite AA for all Getters.
4767d9659bf6SJohannes Doerfert for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
4768d9659bf6SJohannes Doerfert auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
4769d9659bf6SJohannes Doerfert
4770d9659bf6SJohannes Doerfert auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
4771d9659bf6SJohannes Doerfert
4772d9659bf6SJohannes Doerfert auto CreateAA = [&](Use &U, Function &Caller) {
4773d9659bf6SJohannes Doerfert CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
4774d9659bf6SJohannes Doerfert if (!CI)
4775d9659bf6SJohannes Doerfert return false;
4776d9659bf6SJohannes Doerfert
4777d9659bf6SJohannes Doerfert auto &CB = cast<CallBase>(*CI);
4778d9659bf6SJohannes Doerfert
4779d9659bf6SJohannes Doerfert IRPosition CBPos = IRPosition::callsite_function(CB);
4780d9659bf6SJohannes Doerfert A.getOrCreateAAFor<AAICVTracker>(CBPos);
4781d9659bf6SJohannes Doerfert return false;
4782d9659bf6SJohannes Doerfert };
4783d9659bf6SJohannes Doerfert
4784d9659bf6SJohannes Doerfert GetterRFI.foreachUse(SCC, CreateAA);
4785d9659bf6SJohannes Doerfert }
4786d9659bf6SJohannes Doerfert auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4787d9659bf6SJohannes Doerfert auto CreateAA = [&](Use &U, Function &F) {
4788d9659bf6SJohannes Doerfert A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4789d9659bf6SJohannes Doerfert return false;
4790d9659bf6SJohannes Doerfert };
4791cd0dd8ecSJoseph Huber if (!DisableOpenMPOptDeglobalization)
4792d9659bf6SJohannes Doerfert GlobalizationRFI.foreachUse(SCC, CreateAA);
4793d9659bf6SJohannes Doerfert
4794d9659bf6SJohannes Doerfert // Create an ExecutionDomain AA for every function and a HeapToStack AA for
4795d9659bf6SJohannes Doerfert // every function if there is a device kernel.
479670b75f62SJohannes Doerfert if (!isOpenMPDevice(M))
479770b75f62SJohannes Doerfert return;
479870b75f62SJohannes Doerfert
4799d9659bf6SJohannes Doerfert for (auto *F : SCC) {
480070b75f62SJohannes Doerfert if (F->isDeclaration())
480170b75f62SJohannes Doerfert continue;
480270b75f62SJohannes Doerfert
4803d9659bf6SJohannes Doerfert A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4804cd0dd8ecSJoseph Huber if (!DisableOpenMPOptDeglobalization)
4805d9659bf6SJohannes Doerfert A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
480670b75f62SJohannes Doerfert
480770b75f62SJohannes Doerfert for (auto &I : instructions(*F)) {
480870b75f62SJohannes Doerfert if (auto *LI = dyn_cast<LoadInst>(&I)) {
480970b75f62SJohannes Doerfert bool UsedAssumedInformation = false;
481070b75f62SJohannes Doerfert A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4811bf789b19SJohannes Doerfert UsedAssumedInformation, AA::Interprocedural);
48123e0c512cSJohannes Doerfert } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
48133e0c512cSJohannes Doerfert A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
481470b75f62SJohannes Doerfert }
481570b75f62SJohannes Doerfert }
4816d9659bf6SJohannes Doerfert }
4817d9659bf6SJohannes Doerfert }
4818d9659bf6SJohannes Doerfert
4819b8235d2bSsstefan1 const char AAICVTracker::ID = 0;
4820d9659bf6SJohannes Doerfert const char AAKernelInfo::ID = 0;
482118283125SJoseph Huber const char AAExecutionDomain::ID = 0;
48226fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0;
4823ca662297SShilei Tian const char AAFoldRuntimeCall::ID = 0;
4824b8235d2bSsstefan1
createForPosition(const IRPosition & IRP,Attributor & A)4825b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
4826b8235d2bSsstefan1 Attributor &A) {
4827b8235d2bSsstefan1 AAICVTracker *AA = nullptr;
4828b8235d2bSsstefan1 switch (IRP.getPositionKind()) {
4829b8235d2bSsstefan1 case IRPosition::IRP_INVALID:
4830b8235d2bSsstefan1 case IRPosition::IRP_FLOAT:
4831b8235d2bSsstefan1 case IRPosition::IRP_ARGUMENT:
4832b8235d2bSsstefan1 case IRPosition::IRP_CALL_SITE_ARGUMENT:
48331de70a72SJohannes Doerfert llvm_unreachable("ICVTracker can only be created for function position!");
48345dfd7cc4Ssstefan1 case IRPosition::IRP_RETURNED:
48355dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
48365dfd7cc4Ssstefan1 break;
48375dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE_RETURNED:
48385dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
48395dfd7cc4Ssstefan1 break;
48405dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE:
48415dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
48425dfd7cc4Ssstefan1 break;
4843b8235d2bSsstefan1 case IRPosition::IRP_FUNCTION:
4844b8235d2bSsstefan1 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
4845b8235d2bSsstefan1 break;
4846b8235d2bSsstefan1 }
4847b8235d2bSsstefan1
4848b8235d2bSsstefan1 return *AA;
4849b8235d2bSsstefan1 }
4850b8235d2bSsstefan1
createForPosition(const IRPosition & IRP,Attributor & A)485118283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
485218283125SJoseph Huber Attributor &A) {
485318283125SJoseph Huber AAExecutionDomainFunction *AA = nullptr;
485418283125SJoseph Huber switch (IRP.getPositionKind()) {
485518283125SJoseph Huber case IRPosition::IRP_INVALID:
485618283125SJoseph Huber case IRPosition::IRP_FLOAT:
485718283125SJoseph Huber case IRPosition::IRP_ARGUMENT:
485818283125SJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT:
485918283125SJoseph Huber case IRPosition::IRP_RETURNED:
486018283125SJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED:
486118283125SJoseph Huber case IRPosition::IRP_CALL_SITE:
486218283125SJoseph Huber llvm_unreachable(
486318283125SJoseph Huber "AAExecutionDomain can only be created for function position!");
486418283125SJoseph Huber case IRPosition::IRP_FUNCTION:
486518283125SJoseph Huber AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
486618283125SJoseph Huber break;
486718283125SJoseph Huber }
486818283125SJoseph Huber
486918283125SJoseph Huber return *AA;
487018283125SJoseph Huber }
487118283125SJoseph Huber
createForPosition(const IRPosition & IRP,Attributor & A)48726fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
48736fc51c9fSJoseph Huber Attributor &A) {
48746fc51c9fSJoseph Huber AAHeapToSharedFunction *AA = nullptr;
48756fc51c9fSJoseph Huber switch (IRP.getPositionKind()) {
48766fc51c9fSJoseph Huber case IRPosition::IRP_INVALID:
48776fc51c9fSJoseph Huber case IRPosition::IRP_FLOAT:
48786fc51c9fSJoseph Huber case IRPosition::IRP_ARGUMENT:
48796fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT:
48806fc51c9fSJoseph Huber case IRPosition::IRP_RETURNED:
48816fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED:
48826fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE:
48836fc51c9fSJoseph Huber llvm_unreachable(
48846fc51c9fSJoseph Huber "AAHeapToShared can only be created for function position!");
48856fc51c9fSJoseph Huber case IRPosition::IRP_FUNCTION:
48866fc51c9fSJoseph Huber AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
48876fc51c9fSJoseph Huber break;
48886fc51c9fSJoseph Huber }
48896fc51c9fSJoseph Huber
48906fc51c9fSJoseph Huber return *AA;
48916fc51c9fSJoseph Huber }
48926fc51c9fSJoseph Huber
createForPosition(const IRPosition & IRP,Attributor & A)4893d9659bf6SJohannes Doerfert AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
4894d9659bf6SJohannes Doerfert Attributor &A) {
4895d9659bf6SJohannes Doerfert AAKernelInfo *AA = nullptr;
4896d9659bf6SJohannes Doerfert switch (IRP.getPositionKind()) {
4897d9659bf6SJohannes Doerfert case IRPosition::IRP_INVALID:
4898d9659bf6SJohannes Doerfert case IRPosition::IRP_FLOAT:
4899d9659bf6SJohannes Doerfert case IRPosition::IRP_ARGUMENT:
4900d9659bf6SJohannes Doerfert case IRPosition::IRP_RETURNED:
4901d9659bf6SJohannes Doerfert case IRPosition::IRP_CALL_SITE_RETURNED:
4902d9659bf6SJohannes Doerfert case IRPosition::IRP_CALL_SITE_ARGUMENT:
4903d9659bf6SJohannes Doerfert llvm_unreachable("KernelInfo can only be created for function position!");
4904d9659bf6SJohannes Doerfert case IRPosition::IRP_CALL_SITE:
4905d9659bf6SJohannes Doerfert AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
4906d9659bf6SJohannes Doerfert break;
4907d9659bf6SJohannes Doerfert case IRPosition::IRP_FUNCTION:
4908d9659bf6SJohannes Doerfert AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
4909d9659bf6SJohannes Doerfert break;
4910d9659bf6SJohannes Doerfert }
4911d9659bf6SJohannes Doerfert
4912d9659bf6SJohannes Doerfert return *AA;
4913d9659bf6SJohannes Doerfert }
4914d9659bf6SJohannes Doerfert
createForPosition(const IRPosition & IRP,Attributor & A)4915ca662297SShilei Tian AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
4916ca662297SShilei Tian Attributor &A) {
4917ca662297SShilei Tian AAFoldRuntimeCall *AA = nullptr;
4918ca662297SShilei Tian switch (IRP.getPositionKind()) {
4919ca662297SShilei Tian case IRPosition::IRP_INVALID:
4920ca662297SShilei Tian case IRPosition::IRP_FLOAT:
4921ca662297SShilei Tian case IRPosition::IRP_ARGUMENT:
4922ca662297SShilei Tian case IRPosition::IRP_RETURNED:
4923ca662297SShilei Tian case IRPosition::IRP_FUNCTION:
4924ca662297SShilei Tian case IRPosition::IRP_CALL_SITE:
4925ca662297SShilei Tian case IRPosition::IRP_CALL_SITE_ARGUMENT:
4926ca662297SShilei Tian llvm_unreachable("KernelInfo can only be created for call site position!");
4927ca662297SShilei Tian case IRPosition::IRP_CALL_SITE_RETURNED:
4928ca662297SShilei Tian AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
4929ca662297SShilei Tian break;
4930ca662297SShilei Tian }
4931ca662297SShilei Tian
4932ca662297SShilei Tian return *AA;
4933ca662297SShilei Tian }
4934ca662297SShilei Tian
run(Module & M,ModuleAnalysisManager & AM)4935b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
49365ccb7424SJoseph Huber if (!containsOpenMP(M))
4937b2ad63d3SJoseph Huber return PreservedAnalyses::all();
4938b2ad63d3SJoseph Huber if (DisableOpenMPOptimizations)
4939b2ad63d3SJoseph Huber return PreservedAnalyses::all();
4940b2ad63d3SJoseph Huber
49410edb8777SJoseph Huber FunctionAnalysisManager &FAM =
49420edb8777SJoseph Huber AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
49435ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M);
49445ccb7424SJoseph Huber
494566321807SJoseph Huber if (PrintModuleBeforeOptimizations)
494666321807SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
494766321807SJoseph Huber
494857ad2e10SJoseph Huber auto IsCalled = [&](Function &F) {
494957ad2e10SJoseph Huber if (Kernels.contains(&F))
495057ad2e10SJoseph Huber return true;
495157ad2e10SJoseph Huber for (const User *U : F.users())
495257ad2e10SJoseph Huber if (!isa<BlockAddress>(U))
495357ad2e10SJoseph Huber return true;
495457ad2e10SJoseph Huber return false;
495557ad2e10SJoseph Huber };
495657ad2e10SJoseph Huber
49570edb8777SJoseph Huber auto EmitRemark = [&](Function &F) {
49580edb8777SJoseph Huber auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
49590edb8777SJoseph Huber ORE.emit([&]() {
49602c31d5ebSJoseph Huber OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
4961ecabc668SJoseph Huber return ORA << "Could not internalize function. "
4962adbaa39dSJoseph Huber << "Some optimizations may not be possible. [OMP140]";
49630edb8777SJoseph Huber });
49640edb8777SJoseph Huber };
49650edb8777SJoseph Huber
496657ad2e10SJoseph Huber // Create internal copies of each function if this is a kernel Module. This
496757ad2e10SJoseph Huber // allows iterprocedural passes to see every call edge.
4968adbaa39dSJoseph Huber DenseMap<Function *, Function *> InternalizedMap;
4969adbaa39dSJoseph Huber if (isOpenMPDevice(M)) {
4970adbaa39dSJoseph Huber SmallPtrSet<Function *, 16> InternalizeFns;
497103d7e61cSJoseph Huber for (Function &F : M)
49724a668604SJoseph Huber if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
49734a668604SJoseph Huber !DisableInternalization) {
4974adbaa39dSJoseph Huber if (Attributor::isInternalizable(F)) {
4975adbaa39dSJoseph Huber InternalizeFns.insert(&F);
4976ecabc668SJoseph Huber } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
49770edb8777SJoseph Huber EmitRemark(F);
49780edb8777SJoseph Huber }
49790edb8777SJoseph Huber }
498003d7e61cSJoseph Huber
4981adbaa39dSJoseph Huber Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
4982adbaa39dSJoseph Huber }
4983adbaa39dSJoseph Huber
498457ad2e10SJoseph Huber // Look at every function in the Module unless it was internalized.
4985b2ad63d3SJoseph Huber SmallVector<Function *, 16> SCC;
498603d7e61cSJoseph Huber for (Function &F : M)
4987adbaa39dSJoseph Huber if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
498803d7e61cSJoseph Huber SCC.push_back(&F);
4989b2ad63d3SJoseph Huber
4990b2ad63d3SJoseph Huber if (SCC.empty())
4991b2ad63d3SJoseph Huber return PreservedAnalyses::all();
4992b2ad63d3SJoseph Huber
4993b2ad63d3SJoseph Huber AnalysisGetter AG(FAM);
4994b2ad63d3SJoseph Huber
4995b2ad63d3SJoseph Huber auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4996b2ad63d3SJoseph Huber return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4997b2ad63d3SJoseph Huber };
4998b2ad63d3SJoseph Huber
4999b2ad63d3SJoseph Huber BumpPtrAllocator Allocator;
5000b2ad63d3SJoseph Huber CallGraphUpdater CGUpdater;
5001b2ad63d3SJoseph Huber
5002b2ad63d3SJoseph Huber SetVector<Function *> Functions(SCC.begin(), SCC.end());
50035ccb7424SJoseph Huber OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
5004b2ad63d3SJoseph Huber
5005f074a6a0SJoseph Huber unsigned MaxFixpointIterations =
5006f074a6a0SJoseph Huber (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
50073be3b401SJohannes Doerfert
50083be3b401SJohannes Doerfert AttributorConfig AC(CGUpdater);
50093be3b401SJohannes Doerfert AC.DefaultInitializeLiveInternals = false;
50103be3b401SJohannes Doerfert AC.RewriteSignatures = false;
50113be3b401SJohannes Doerfert AC.MaxFixpointIterations = MaxFixpointIterations;
50123be3b401SJohannes Doerfert AC.OREGetter = OREGetter;
50133be3b401SJohannes Doerfert AC.PassName = DEBUG_TYPE;
50143be3b401SJohannes Doerfert
50153be3b401SJohannes Doerfert Attributor A(Functions, InfoCache, AC);
5016b2ad63d3SJoseph Huber
5017b2ad63d3SJoseph Huber OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5018b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(true);
5019339aa765SJoseph Huber
502029a74a39SJoseph Huber // Optionally inline device functions for potentially better performance.
502129a74a39SJoseph Huber if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
502229a74a39SJoseph Huber for (Function &F : M)
502329a74a39SJoseph Huber if (!F.isDeclaration() && !Kernels.contains(&F) &&
502429a74a39SJoseph Huber !F.hasFnAttribute(Attribute::NoInline))
502529a74a39SJoseph Huber F.addFnAttr(Attribute::AlwaysInline);
502629a74a39SJoseph Huber
5027339aa765SJoseph Huber if (PrintModuleAfterOptimizations)
5028339aa765SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
5029339aa765SJoseph Huber
5030b2ad63d3SJoseph Huber if (Changed)
5031b2ad63d3SJoseph Huber return PreservedAnalyses::none();
5032b2ad63d3SJoseph Huber
5033b2ad63d3SJoseph Huber return PreservedAnalyses::all();
5034b2ad63d3SJoseph Huber }
5035b2ad63d3SJoseph Huber
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)5036b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
50379548b74aSJohannes Doerfert CGSCCAnalysisManager &AM,
5038b2ad63d3SJoseph Huber LazyCallGraph &CG,
5039b2ad63d3SJoseph Huber CGSCCUpdateResult &UR) {
50405ccb7424SJoseph Huber if (!containsOpenMP(*C.begin()->getFunction().getParent()))
50419548b74aSJohannes Doerfert return PreservedAnalyses::all();
50429548b74aSJohannes Doerfert if (DisableOpenMPOptimizations)
50439548b74aSJohannes Doerfert return PreservedAnalyses::all();
50449548b74aSJohannes Doerfert
5045ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC;
5046351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's.
5047351d234dSRoman Lebedev for (LazyCallGraph::Node &N : C) {
5048351d234dSRoman Lebedev Function *Fn = &N.getFunction();
5049351d234dSRoman Lebedev SCC.push_back(Fn);
5050351d234dSRoman Lebedev }
5051351d234dSRoman Lebedev
50525ccb7424SJoseph Huber if (SCC.empty())
50539548b74aSJohannes Doerfert return PreservedAnalyses::all();
50549548b74aSJohannes Doerfert
50555ccb7424SJoseph Huber Module &M = *C.begin()->getFunction().getParent();
50565ccb7424SJoseph Huber
505766321807SJoseph Huber if (PrintModuleBeforeOptimizations)
505866321807SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
505966321807SJoseph Huber
50605ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M);
50615ccb7424SJoseph Huber
50624d4ea9acSHuber, Joseph FunctionAnalysisManager &FAM =
50634d4ea9acSHuber, Joseph AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
50647cfd267cSsstefan1
50657cfd267cSsstefan1 AnalysisGetter AG(FAM);
50667cfd267cSsstefan1
50677cfd267cSsstefan1 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
50684d4ea9acSHuber, Joseph return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
50694d4ea9acSHuber, Joseph };
50704d4ea9acSHuber, Joseph
5071b2ad63d3SJoseph Huber BumpPtrAllocator Allocator;
50729548b74aSJohannes Doerfert CallGraphUpdater CGUpdater;
50739548b74aSJohannes Doerfert CGUpdater.initialize(CG, C, AM, UR);
50747cfd267cSsstefan1
50757cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end());
50767cfd267cSsstefan1 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
50775ccb7424SJoseph Huber /*CGSCC*/ Functions, Kernels);
50787cfd267cSsstefan1
5079f074a6a0SJoseph Huber unsigned MaxFixpointIterations =
5080f074a6a0SJoseph Huber (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
50813be3b401SJohannes Doerfert
50823be3b401SJohannes Doerfert AttributorConfig AC(CGUpdater);
50833be3b401SJohannes Doerfert AC.DefaultInitializeLiveInternals = false;
50843be3b401SJohannes Doerfert AC.IsModulePass = false;
50853be3b401SJohannes Doerfert AC.RewriteSignatures = false;
50863be3b401SJohannes Doerfert AC.MaxFixpointIterations = MaxFixpointIterations;
50873be3b401SJohannes Doerfert AC.OREGetter = OREGetter;
50883be3b401SJohannes Doerfert AC.PassName = DEBUG_TYPE;
50893be3b401SJohannes Doerfert
50903be3b401SJohannes Doerfert Attributor A(Functions, InfoCache, AC);
5091b8235d2bSsstefan1
5092b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5093b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(false);
5094339aa765SJoseph Huber
5095339aa765SJoseph Huber if (PrintModuleAfterOptimizations)
5096339aa765SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5097339aa765SJoseph Huber
5098694ded37SGiorgis Georgakoudis if (Changed)
5099694ded37SGiorgis Georgakoudis return PreservedAnalyses::none();
5100694ded37SGiorgis Georgakoudis
51019548b74aSJohannes Doerfert return PreservedAnalyses::all();
51029548b74aSJohannes Doerfert }
51038b57ed09SJoseph Huber
51049548b74aSJohannes Doerfert namespace {
51059548b74aSJohannes Doerfert
5106b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
51079548b74aSJohannes Doerfert CallGraphUpdater CGUpdater;
51089548b74aSJohannes Doerfert static char ID;
51099548b74aSJohannes Doerfert
OpenMPOptCGSCCLegacyPass__anon23c38c774f11::OpenMPOptCGSCCLegacyPass5110b2ad63d3SJoseph Huber OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
5111b2ad63d3SJoseph Huber initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
51129548b74aSJohannes Doerfert }
51139548b74aSJohannes Doerfert
getAnalysisUsage__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51149548b74aSJohannes Doerfert void getAnalysisUsage(AnalysisUsage &AU) const override {
51159548b74aSJohannes Doerfert CallGraphSCCPass::getAnalysisUsage(AU);
51169548b74aSJohannes Doerfert }
51179548b74aSJohannes Doerfert
runOnSCC__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51189548b74aSJohannes Doerfert bool runOnSCC(CallGraphSCC &CGSCC) override {
51195ccb7424SJoseph Huber if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
51209548b74aSJohannes Doerfert return false;
51219548b74aSJohannes Doerfert if (DisableOpenMPOptimizations || skipSCC(CGSCC))
51229548b74aSJohannes Doerfert return false;
51239548b74aSJohannes Doerfert
5124ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC;
5125351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's.
5126351d234dSRoman Lebedev for (CallGraphNode *CGN : CGSCC) {
5127351d234dSRoman Lebedev Function *Fn = CGN->getFunction();
5128351d234dSRoman Lebedev if (!Fn || Fn->isDeclaration())
5129351d234dSRoman Lebedev continue;
5130ee17263aSJohannes Doerfert SCC.push_back(Fn);
5131351d234dSRoman Lebedev }
5132351d234dSRoman Lebedev
51335ccb7424SJoseph Huber if (SCC.empty())
51349548b74aSJohannes Doerfert return false;
51359548b74aSJohannes Doerfert
51365ccb7424SJoseph Huber Module &M = CGSCC.getCallGraph().getModule();
51375ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M);
51385ccb7424SJoseph Huber
51399548b74aSJohannes Doerfert CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
51409548b74aSJohannes Doerfert CGUpdater.initialize(CG, CGSCC);
51419548b74aSJohannes Doerfert
51424d4ea9acSHuber, Joseph // Maintain a map of functions to avoid rebuilding the ORE
51434d4ea9acSHuber, Joseph DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
51444d4ea9acSHuber, Joseph auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
51454d4ea9acSHuber, Joseph std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
51464d4ea9acSHuber, Joseph if (!ORE)
51474d4ea9acSHuber, Joseph ORE = std::make_unique<OptimizationRemarkEmitter>(F);
51484d4ea9acSHuber, Joseph return *ORE;
51494d4ea9acSHuber, Joseph };
51504d4ea9acSHuber, Joseph
51517cfd267cSsstefan1 AnalysisGetter AG;
51527cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end());
51537cfd267cSsstefan1 BumpPtrAllocator Allocator;
51545ccb7424SJoseph Huber OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
51555ccb7424SJoseph Huber Allocator,
51565ccb7424SJoseph Huber /*CGSCC*/ Functions, Kernels);
51577cfd267cSsstefan1
5158f074a6a0SJoseph Huber unsigned MaxFixpointIterations =
5159f074a6a0SJoseph Huber (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
51603be3b401SJohannes Doerfert
51613be3b401SJohannes Doerfert AttributorConfig AC(CGUpdater);
51623be3b401SJohannes Doerfert AC.DefaultInitializeLiveInternals = false;
51633be3b401SJohannes Doerfert AC.IsModulePass = false;
51643be3b401SJohannes Doerfert AC.RewriteSignatures = false;
51653be3b401SJohannes Doerfert AC.MaxFixpointIterations = MaxFixpointIterations;
51663be3b401SJohannes Doerfert AC.OREGetter = OREGetter;
51673be3b401SJohannes Doerfert AC.PassName = DEBUG_TYPE;
51683be3b401SJohannes Doerfert
51693be3b401SJohannes Doerfert Attributor A(Functions, InfoCache, AC);
5170b8235d2bSsstefan1
5171b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5172339aa765SJoseph Huber bool Result = OMPOpt.run(false);
5173339aa765SJoseph Huber
5174339aa765SJoseph Huber if (PrintModuleAfterOptimizations)
5175339aa765SJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5176339aa765SJoseph Huber
5177339aa765SJoseph Huber return Result;
51789548b74aSJohannes Doerfert }
51799548b74aSJohannes Doerfert
doFinalization__anon23c38c774f11::OpenMPOptCGSCCLegacyPass51809548b74aSJohannes Doerfert bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
51819548b74aSJohannes Doerfert };
51829548b74aSJohannes Doerfert
51839548b74aSJohannes Doerfert } // end anonymous namespace
51849548b74aSJohannes Doerfert
getDeviceKernels(Module & M)51855ccb7424SJoseph Huber KernelSet llvm::omp::getDeviceKernels(Module &M) {
51865ccb7424SJoseph Huber // TODO: Create a more cross-platform way of determining device kernels.
5187e8039ad4SJohannes Doerfert NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
51885ccb7424SJoseph Huber KernelSet Kernels;
51895ccb7424SJoseph Huber
5190e8039ad4SJohannes Doerfert if (!MD)
51915ccb7424SJoseph Huber return Kernels;
5192e8039ad4SJohannes Doerfert
5193e8039ad4SJohannes Doerfert for (auto *Op : MD->operands()) {
5194e8039ad4SJohannes Doerfert if (Op->getNumOperands() < 2)
5195e8039ad4SJohannes Doerfert continue;
5196e8039ad4SJohannes Doerfert MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
5197e8039ad4SJohannes Doerfert if (!KindID || KindID->getString() != "kernel")
5198e8039ad4SJohannes Doerfert continue;
5199e8039ad4SJohannes Doerfert
5200e8039ad4SJohannes Doerfert Function *KernelFn =
5201e8039ad4SJohannes Doerfert mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
5202e8039ad4SJohannes Doerfert if (!KernelFn)
5203e8039ad4SJohannes Doerfert continue;
5204e8039ad4SJohannes Doerfert
5205e8039ad4SJohannes Doerfert ++NumOpenMPTargetRegionKernels;
5206e8039ad4SJohannes Doerfert
5207e8039ad4SJohannes Doerfert Kernels.insert(KernelFn);
5208e8039ad4SJohannes Doerfert }
52095ccb7424SJoseph Huber
52105ccb7424SJoseph Huber return Kernels;
5211e8039ad4SJohannes Doerfert }
5212e8039ad4SJohannes Doerfert
containsOpenMP(Module & M)52135ccb7424SJoseph Huber bool llvm::omp::containsOpenMP(Module &M) {
52145ccb7424SJoseph Huber Metadata *MD = M.getModuleFlag("openmp");
52155ccb7424SJoseph Huber if (!MD)
52165ccb7424SJoseph Huber return false;
5217dce6bc18SJohannes Doerfert
5218e8039ad4SJohannes Doerfert return true;
5219e8039ad4SJohannes Doerfert }
5220e8039ad4SJohannes Doerfert
isOpenMPDevice(Module & M)52215ccb7424SJoseph Huber bool llvm::omp::isOpenMPDevice(Module &M) {
52225ccb7424SJoseph Huber Metadata *MD = M.getModuleFlag("openmp-device");
52235ccb7424SJoseph Huber if (!MD)
52245ccb7424SJoseph Huber return false;
52255ccb7424SJoseph Huber
52265ccb7424SJoseph Huber return true;
52279548b74aSJohannes Doerfert }
52289548b74aSJohannes Doerfert
5229b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0;
52309548b74aSJohannes Doerfert
5231b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
52329548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)52339548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
5234b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
52359548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false)
52369548b74aSJohannes Doerfert
5237b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
5238b2ad63d3SJoseph Huber return new OpenMPOptCGSCCLegacyPass();
5239b2ad63d3SJoseph Huber }
5240