19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 29548b74aSJohannes Doerfert // 39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69548b74aSJohannes Doerfert // 79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 89548b74aSJohannes Doerfert // 99548b74aSJohannes Doerfert // OpenMP specific optimizations: 109548b74aSJohannes Doerfert // 119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num. 129548b74aSJohannes Doerfert // 139548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 149548b74aSJohannes Doerfert 159548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h" 169548b74aSJohannes Doerfert 179548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h" 189548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h" 199548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h" 209548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h" 214d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h" 229548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h" 23e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 249548b74aSJohannes Doerfert #include "llvm/InitializePasses.h" 259548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h" 269548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h" 277cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h" 289548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h" 299548b74aSJohannes Doerfert 309548b74aSJohannes Doerfert using namespace llvm; 319548b74aSJohannes Doerfert using namespace omp; 329548b74aSJohannes Doerfert 339548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt" 349548b74aSJohannes Doerfert 359548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations( 369548b74aSJohannes Doerfert "openmp-opt-disable", cl::ZeroOrMore, 379548b74aSJohannes Doerfert cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 389548b74aSJohannes Doerfert cl::init(false)); 399548b74aSJohannes Doerfert 400f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 410f426935Ssstefan1 cl::Hidden); 42e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 43e8039ad4SJohannes Doerfert cl::init(false), cl::Hidden); 440f426935Ssstefan1 45496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency( 46496f8e5bSHamilton Tobon Mosquera "openmp-hide-memory-transfer-latency", 47496f8e5bSHamilton Tobon Mosquera cl::desc("[WIP] Tries to hide the latency of host to device memory" 48496f8e5bSHamilton Tobon Mosquera " transfers"), 49496f8e5bSHamilton Tobon Mosquera cl::Hidden, cl::init(false)); 50496f8e5bSHamilton Tobon Mosquera 51496f8e5bSHamilton Tobon Mosquera 529548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 539548b74aSJohannes Doerfert "Number of OpenMP runtime calls deduplicated"); 5455eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted, 5555eb714aSRoman Lebedev "Number of OpenMP parallel regions deleted"); 569548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 579548b74aSJohannes Doerfert "Number of OpenMP runtime functions identified"); 589548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 599548b74aSJohannes Doerfert "Number of OpenMP runtime function uses identified"); 60e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels, 61e8039ad4SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) identified"); 625b0581aeSJohannes Doerfert STATISTIC( 635b0581aeSJohannes Doerfert NumOpenMPParallelRegionsReplacedInGPUStateMachine, 645b0581aeSJohannes Doerfert "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 659548b74aSJohannes Doerfert 66263c4a3cSrathod-sahaab #if !defined(NDEBUG) 679548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]"; 68a50c0b0dSMikael Holmen #endif 699548b74aSJohannes Doerfert 709548b74aSJohannes Doerfert namespace { 719548b74aSJohannes Doerfert 72b8235d2bSsstefan1 struct AAICVTracker; 73b8235d2bSsstefan1 747cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 757cfd267cSsstefan1 /// Attributor runs. 767cfd267cSsstefan1 struct OMPInformationCache : public InformationCache { 777cfd267cSsstefan1 OMPInformationCache(Module &M, AnalysisGetter &AG, 78624d34afSJohannes Doerfert BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, 79e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels) 80624d34afSJohannes Doerfert : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), 81624d34afSJohannes Doerfert Kernels(Kernels) { 82624d34afSJohannes Doerfert 8361238d26Ssstefan1 OMPBuilder.initialize(); 849548b74aSJohannes Doerfert initializeRuntimeFunctions(); 850f426935Ssstefan1 initializeInternalControlVars(); 869548b74aSJohannes Doerfert } 879548b74aSJohannes Doerfert 880f426935Ssstefan1 /// Generic information that describes an internal control variable. 890f426935Ssstefan1 struct InternalControlVarInfo { 900f426935Ssstefan1 /// The kind, as described by InternalControlVar enum. 910f426935Ssstefan1 InternalControlVar Kind; 920f426935Ssstefan1 930f426935Ssstefan1 /// The name of the ICV. 940f426935Ssstefan1 StringRef Name; 950f426935Ssstefan1 960f426935Ssstefan1 /// Environment variable associated with this ICV. 970f426935Ssstefan1 StringRef EnvVarName; 980f426935Ssstefan1 990f426935Ssstefan1 /// Initial value kind. 1000f426935Ssstefan1 ICVInitValue InitKind; 1010f426935Ssstefan1 1020f426935Ssstefan1 /// Initial value. 1030f426935Ssstefan1 ConstantInt *InitValue; 1040f426935Ssstefan1 1050f426935Ssstefan1 /// Setter RTL function associated with this ICV. 1060f426935Ssstefan1 RuntimeFunction Setter; 1070f426935Ssstefan1 1080f426935Ssstefan1 /// Getter RTL function associated with this ICV. 1090f426935Ssstefan1 RuntimeFunction Getter; 1100f426935Ssstefan1 1110f426935Ssstefan1 /// RTL Function corresponding to the override clause of this ICV 1120f426935Ssstefan1 RuntimeFunction Clause; 1130f426935Ssstefan1 }; 1140f426935Ssstefan1 1159548b74aSJohannes Doerfert /// Generic information that describes a runtime function 1169548b74aSJohannes Doerfert struct RuntimeFunctionInfo { 1178855fec3SJohannes Doerfert 1189548b74aSJohannes Doerfert /// The kind, as described by the RuntimeFunction enum. 1199548b74aSJohannes Doerfert RuntimeFunction Kind; 1209548b74aSJohannes Doerfert 1219548b74aSJohannes Doerfert /// The name of the function. 1229548b74aSJohannes Doerfert StringRef Name; 1239548b74aSJohannes Doerfert 1249548b74aSJohannes Doerfert /// Flag to indicate a variadic function. 1259548b74aSJohannes Doerfert bool IsVarArg; 1269548b74aSJohannes Doerfert 1279548b74aSJohannes Doerfert /// The return type of the function. 1289548b74aSJohannes Doerfert Type *ReturnType; 1299548b74aSJohannes Doerfert 1309548b74aSJohannes Doerfert /// The argument types of the function. 1319548b74aSJohannes Doerfert SmallVector<Type *, 8> ArgumentTypes; 1329548b74aSJohannes Doerfert 1339548b74aSJohannes Doerfert /// The declaration if available. 134f09f4b26SJohannes Doerfert Function *Declaration = nullptr; 1359548b74aSJohannes Doerfert 1369548b74aSJohannes Doerfert /// Uses of this runtime function per function containing the use. 1378855fec3SJohannes Doerfert using UseVector = SmallVector<Use *, 16>; 1388855fec3SJohannes Doerfert 139b8235d2bSsstefan1 /// Clear UsesMap for runtime function. 140b8235d2bSsstefan1 void clearUsesMap() { UsesMap.clear(); } 141b8235d2bSsstefan1 14254bd3751SJohannes Doerfert /// Boolean conversion that is true if the runtime function was found. 14354bd3751SJohannes Doerfert operator bool() const { return Declaration; } 14454bd3751SJohannes Doerfert 1458855fec3SJohannes Doerfert /// Return the vector of uses in function \p F. 1468855fec3SJohannes Doerfert UseVector &getOrCreateUseVector(Function *F) { 147b8235d2bSsstefan1 std::shared_ptr<UseVector> &UV = UsesMap[F]; 1488855fec3SJohannes Doerfert if (!UV) 149b8235d2bSsstefan1 UV = std::make_shared<UseVector>(); 1508855fec3SJohannes Doerfert return *UV; 1518855fec3SJohannes Doerfert } 1528855fec3SJohannes Doerfert 1538855fec3SJohannes Doerfert /// Return the vector of uses in function \p F or `nullptr` if there are 1548855fec3SJohannes Doerfert /// none. 1558855fec3SJohannes Doerfert const UseVector *getUseVector(Function &F) const { 15695e57072SDavid Blaikie auto I = UsesMap.find(&F); 15795e57072SDavid Blaikie if (I != UsesMap.end()) 15895e57072SDavid Blaikie return I->second.get(); 15995e57072SDavid Blaikie return nullptr; 1608855fec3SJohannes Doerfert } 1618855fec3SJohannes Doerfert 1628855fec3SJohannes Doerfert /// Return how many functions contain uses of this runtime function. 1638855fec3SJohannes Doerfert size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 1649548b74aSJohannes Doerfert 1659548b74aSJohannes Doerfert /// Return the number of arguments (or the minimal number for variadic 1669548b74aSJohannes Doerfert /// functions). 1679548b74aSJohannes Doerfert size_t getNumArgs() const { return ArgumentTypes.size(); } 1689548b74aSJohannes Doerfert 1699548b74aSJohannes Doerfert /// Run the callback \p CB on each use and forget the use if the result is 1709548b74aSJohannes Doerfert /// true. The callback will be fed the function in which the use was 1719548b74aSJohannes Doerfert /// encountered as second argument. 172624d34afSJohannes Doerfert void foreachUse(SmallVectorImpl<Function *> &SCC, 173624d34afSJohannes Doerfert function_ref<bool(Use &, Function &)> CB) { 174624d34afSJohannes Doerfert for (Function *F : SCC) 175624d34afSJohannes Doerfert foreachUse(CB, F); 176e099c7b6Ssstefan1 } 177e099c7b6Ssstefan1 178e099c7b6Ssstefan1 /// Run the callback \p CB on each use within the function \p F and forget 179e099c7b6Ssstefan1 /// the use if the result is true. 180624d34afSJohannes Doerfert void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 1818855fec3SJohannes Doerfert SmallVector<unsigned, 8> ToBeDeleted; 1829548b74aSJohannes Doerfert ToBeDeleted.clear(); 183e099c7b6Ssstefan1 1848855fec3SJohannes Doerfert unsigned Idx = 0; 185624d34afSJohannes Doerfert UseVector &UV = getOrCreateUseVector(F); 186e099c7b6Ssstefan1 1878855fec3SJohannes Doerfert for (Use *U : UV) { 188e099c7b6Ssstefan1 if (CB(*U, *F)) 1898855fec3SJohannes Doerfert ToBeDeleted.push_back(Idx); 1908855fec3SJohannes Doerfert ++Idx; 1918855fec3SJohannes Doerfert } 1928855fec3SJohannes Doerfert 1938855fec3SJohannes Doerfert // Remove the to-be-deleted indices in reverse order as prior 194b726c557SJohannes Doerfert // modifications will not modify the smaller indices. 1958855fec3SJohannes Doerfert while (!ToBeDeleted.empty()) { 1968855fec3SJohannes Doerfert unsigned Idx = ToBeDeleted.pop_back_val(); 1978855fec3SJohannes Doerfert UV[Idx] = UV.back(); 1988855fec3SJohannes Doerfert UV.pop_back(); 1999548b74aSJohannes Doerfert } 2009548b74aSJohannes Doerfert } 2018855fec3SJohannes Doerfert 2028855fec3SJohannes Doerfert private: 2038855fec3SJohannes Doerfert /// Map from functions to all uses of this runtime function contained in 2048855fec3SJohannes Doerfert /// them. 205b8235d2bSsstefan1 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 2069548b74aSJohannes Doerfert }; 2079548b74aSJohannes Doerfert 2087cfd267cSsstefan1 /// An OpenMP-IR-Builder instance 2097cfd267cSsstefan1 OpenMPIRBuilder OMPBuilder; 2107cfd267cSsstefan1 2117cfd267cSsstefan1 /// Map from runtime function kind to the runtime function description. 2127cfd267cSsstefan1 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 2137cfd267cSsstefan1 RuntimeFunction::OMPRTL___last> 2147cfd267cSsstefan1 RFIs; 2157cfd267cSsstefan1 2160f426935Ssstefan1 /// Map from ICV kind to the ICV description. 2170f426935Ssstefan1 EnumeratedArray<InternalControlVarInfo, InternalControlVar, 2180f426935Ssstefan1 InternalControlVar::ICV___last> 2190f426935Ssstefan1 ICVs; 2200f426935Ssstefan1 2210f426935Ssstefan1 /// Helper to initialize all internal control variable information for those 2220f426935Ssstefan1 /// defined in OMPKinds.def. 2230f426935Ssstefan1 void initializeInternalControlVars() { 2240f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL) \ 2250f426935Ssstefan1 { \ 2260f426935Ssstefan1 auto &ICV = ICVs[_Name]; \ 2270f426935Ssstefan1 ICV.Setter = RTL; \ 2280f426935Ssstefan1 } 2290f426935Ssstefan1 #define ICV_RT_GET(Name, RTL) \ 2300f426935Ssstefan1 { \ 2310f426935Ssstefan1 auto &ICV = ICVs[Name]; \ 2320f426935Ssstefan1 ICV.Getter = RTL; \ 2330f426935Ssstefan1 } 2340f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 2350f426935Ssstefan1 { \ 2360f426935Ssstefan1 auto &ICV = ICVs[Enum]; \ 2370f426935Ssstefan1 ICV.Name = _Name; \ 2380f426935Ssstefan1 ICV.Kind = Enum; \ 2390f426935Ssstefan1 ICV.InitKind = Init; \ 2400f426935Ssstefan1 ICV.EnvVarName = _EnvVarName; \ 2410f426935Ssstefan1 switch (ICV.InitKind) { \ 242951e43f3Ssstefan1 case ICV_IMPLEMENTATION_DEFINED: \ 2430f426935Ssstefan1 ICV.InitValue = nullptr; \ 2440f426935Ssstefan1 break; \ 245951e43f3Ssstefan1 case ICV_ZERO: \ 2466aab27baSsstefan1 ICV.InitValue = ConstantInt::get( \ 2476aab27baSsstefan1 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 2480f426935Ssstefan1 break; \ 249951e43f3Ssstefan1 case ICV_FALSE: \ 2506aab27baSsstefan1 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 2510f426935Ssstefan1 break; \ 252951e43f3Ssstefan1 case ICV_LAST: \ 2530f426935Ssstefan1 break; \ 2540f426935Ssstefan1 } \ 2550f426935Ssstefan1 } 2560f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2570f426935Ssstefan1 } 2580f426935Ssstefan1 2597cfd267cSsstefan1 /// Returns true if the function declaration \p F matches the runtime 2607cfd267cSsstefan1 /// function types, that is, return type \p RTFRetType, and argument types 2617cfd267cSsstefan1 /// \p RTFArgTypes. 2627cfd267cSsstefan1 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 2637cfd267cSsstefan1 SmallVector<Type *, 8> &RTFArgTypes) { 2647cfd267cSsstefan1 // TODO: We should output information to the user (under debug output 2657cfd267cSsstefan1 // and via remarks). 2667cfd267cSsstefan1 2677cfd267cSsstefan1 if (!F) 2687cfd267cSsstefan1 return false; 2697cfd267cSsstefan1 if (F->getReturnType() != RTFRetType) 2707cfd267cSsstefan1 return false; 2717cfd267cSsstefan1 if (F->arg_size() != RTFArgTypes.size()) 2727cfd267cSsstefan1 return false; 2737cfd267cSsstefan1 2747cfd267cSsstefan1 auto RTFTyIt = RTFArgTypes.begin(); 2757cfd267cSsstefan1 for (Argument &Arg : F->args()) { 2767cfd267cSsstefan1 if (Arg.getType() != *RTFTyIt) 2777cfd267cSsstefan1 return false; 2787cfd267cSsstefan1 2797cfd267cSsstefan1 ++RTFTyIt; 2807cfd267cSsstefan1 } 2817cfd267cSsstefan1 2827cfd267cSsstefan1 return true; 2837cfd267cSsstefan1 } 2847cfd267cSsstefan1 285b726c557SJohannes Doerfert // Helper to collect all uses of the declaration in the UsesMap. 286b8235d2bSsstefan1 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 2877cfd267cSsstefan1 unsigned NumUses = 0; 2887cfd267cSsstefan1 if (!RFI.Declaration) 2897cfd267cSsstefan1 return NumUses; 2907cfd267cSsstefan1 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 2917cfd267cSsstefan1 292b8235d2bSsstefan1 if (CollectStats) { 2937cfd267cSsstefan1 NumOpenMPRuntimeFunctionsIdentified += 1; 2947cfd267cSsstefan1 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 295b8235d2bSsstefan1 } 2967cfd267cSsstefan1 2977cfd267cSsstefan1 // TODO: We directly convert uses into proper calls and unknown uses. 2987cfd267cSsstefan1 for (Use &U : RFI.Declaration->uses()) { 2997cfd267cSsstefan1 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 3007cfd267cSsstefan1 if (ModuleSlice.count(UserI->getFunction())) { 3017cfd267cSsstefan1 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 3027cfd267cSsstefan1 ++NumUses; 3037cfd267cSsstefan1 } 3047cfd267cSsstefan1 } else { 3057cfd267cSsstefan1 RFI.getOrCreateUseVector(nullptr).push_back(&U); 3067cfd267cSsstefan1 ++NumUses; 3077cfd267cSsstefan1 } 3087cfd267cSsstefan1 } 3097cfd267cSsstefan1 return NumUses; 310b8235d2bSsstefan1 } 3117cfd267cSsstefan1 312b8235d2bSsstefan1 // Helper function to recollect uses of all runtime functions. 313b8235d2bSsstefan1 void recollectUses() { 314b8235d2bSsstefan1 for (int Idx = 0; Idx < RFIs.size(); ++Idx) { 315b8235d2bSsstefan1 auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)]; 316b8235d2bSsstefan1 RFI.clearUsesMap(); 317b8235d2bSsstefan1 collectUses(RFI, /*CollectStats*/ false); 318b8235d2bSsstefan1 } 319b8235d2bSsstefan1 } 320b8235d2bSsstefan1 321b8235d2bSsstefan1 /// Helper to initialize all runtime function information for those defined 322b8235d2bSsstefan1 /// in OpenMPKinds.def. 323b8235d2bSsstefan1 void initializeRuntimeFunctions() { 3247cfd267cSsstefan1 Module &M = *((*ModuleSlice.begin())->getParent()); 3257cfd267cSsstefan1 3266aab27baSsstefan1 // Helper macros for handling __VA_ARGS__ in OMP_RTL 3276aab27baSsstefan1 #define OMP_TYPE(VarName, ...) \ 3286aab27baSsstefan1 Type *VarName = OMPBuilder.VarName; \ 3296aab27baSsstefan1 (void)VarName; 3306aab27baSsstefan1 3316aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...) \ 3326aab27baSsstefan1 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 3336aab27baSsstefan1 (void)VarName##Ty; \ 3346aab27baSsstefan1 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 3356aab27baSsstefan1 (void)VarName##PtrTy; 3366aab27baSsstefan1 3376aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...) \ 3386aab27baSsstefan1 FunctionType *VarName = OMPBuilder.VarName; \ 3396aab27baSsstefan1 (void)VarName; \ 3406aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3416aab27baSsstefan1 (void)VarName##Ptr; 3426aab27baSsstefan1 3436aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...) \ 3446aab27baSsstefan1 StructType *VarName = OMPBuilder.VarName; \ 3456aab27baSsstefan1 (void)VarName; \ 3466aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3476aab27baSsstefan1 (void)VarName##Ptr; 3486aab27baSsstefan1 3497cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 3507cfd267cSsstefan1 { \ 3517cfd267cSsstefan1 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 3527cfd267cSsstefan1 Function *F = M.getFunction(_Name); \ 3536aab27baSsstefan1 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 3547cfd267cSsstefan1 auto &RFI = RFIs[_Enum]; \ 3557cfd267cSsstefan1 RFI.Kind = _Enum; \ 3567cfd267cSsstefan1 RFI.Name = _Name; \ 3577cfd267cSsstefan1 RFI.IsVarArg = _IsVarArg; \ 3586aab27baSsstefan1 RFI.ReturnType = OMPBuilder._ReturnType; \ 3597cfd267cSsstefan1 RFI.ArgumentTypes = std::move(ArgsTypes); \ 3607cfd267cSsstefan1 RFI.Declaration = F; \ 361b8235d2bSsstefan1 unsigned NumUses = collectUses(RFI); \ 3627cfd267cSsstefan1 (void)NumUses; \ 3637cfd267cSsstefan1 LLVM_DEBUG({ \ 3647cfd267cSsstefan1 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 3657cfd267cSsstefan1 << " found\n"; \ 3667cfd267cSsstefan1 if (RFI.Declaration) \ 3677cfd267cSsstefan1 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 3687cfd267cSsstefan1 << RFI.getNumFunctionsWithUses() \ 3697cfd267cSsstefan1 << " different functions.\n"; \ 3707cfd267cSsstefan1 }); \ 3717cfd267cSsstefan1 } \ 3727cfd267cSsstefan1 } 3737cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 3747cfd267cSsstefan1 3757cfd267cSsstefan1 // TODO: We should attach the attributes defined in OMPKinds.def. 3767cfd267cSsstefan1 } 377e8039ad4SJohannes Doerfert 378e8039ad4SJohannes Doerfert /// Collection of known kernels (\see Kernel) in the module. 379e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels; 3807cfd267cSsstefan1 }; 3817cfd267cSsstefan1 3827cfd267cSsstefan1 struct OpenMPOpt { 3837cfd267cSsstefan1 3847cfd267cSsstefan1 using OptimizationRemarkGetter = 3857cfd267cSsstefan1 function_ref<OptimizationRemarkEmitter &(Function *)>; 3867cfd267cSsstefan1 3877cfd267cSsstefan1 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 3887cfd267cSsstefan1 OptimizationRemarkGetter OREGetter, 389b8235d2bSsstefan1 OMPInformationCache &OMPInfoCache, Attributor &A) 39077b79d79SMehdi Amini : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 391b8235d2bSsstefan1 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 3927cfd267cSsstefan1 3939548b74aSJohannes Doerfert /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 3949548b74aSJohannes Doerfert bool run() { 39554bd3751SJohannes Doerfert if (SCC.empty()) 39654bd3751SJohannes Doerfert return false; 39754bd3751SJohannes Doerfert 3989548b74aSJohannes Doerfert bool Changed = false; 3999548b74aSJohannes Doerfert 4009548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 40177b79d79SMehdi Amini << " functions in a slice with " 40277b79d79SMehdi Amini << OMPInfoCache.ModuleSlice.size() << " functions\n"); 4039548b74aSJohannes Doerfert 404e8039ad4SJohannes Doerfert if (PrintICVValues) 405e8039ad4SJohannes Doerfert printICVs(); 406e8039ad4SJohannes Doerfert if (PrintOpenMPKernels) 407e8039ad4SJohannes Doerfert printKernels(); 408e8039ad4SJohannes Doerfert 4095b0581aeSJohannes Doerfert Changed |= rewriteDeviceCodeStateMachine(); 4105b0581aeSJohannes Doerfert 411e8039ad4SJohannes Doerfert Changed |= runAttributor(); 412e8039ad4SJohannes Doerfert 413e8039ad4SJohannes Doerfert // Recollect uses, in case Attributor deleted any. 414e8039ad4SJohannes Doerfert OMPInfoCache.recollectUses(); 415e8039ad4SJohannes Doerfert 416e8039ad4SJohannes Doerfert Changed |= deduplicateRuntimeCalls(); 417e8039ad4SJohannes Doerfert Changed |= deleteParallelRegions(); 418496f8e5bSHamilton Tobon Mosquera if (HideMemoryTransferLatency) 419496f8e5bSHamilton Tobon Mosquera Changed |= hideMemTransfersLatency(); 420e8039ad4SJohannes Doerfert 421e8039ad4SJohannes Doerfert return Changed; 422e8039ad4SJohannes Doerfert } 423e8039ad4SJohannes Doerfert 4240f426935Ssstefan1 /// Print initial ICV values for testing. 4250f426935Ssstefan1 /// FIXME: This should be done from the Attributor once it is added. 426e8039ad4SJohannes Doerfert void printICVs() const { 4270f426935Ssstefan1 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; 4280f426935Ssstefan1 4290f426935Ssstefan1 for (Function *F : OMPInfoCache.ModuleSlice) { 4300f426935Ssstefan1 for (auto ICV : ICVs) { 4310f426935Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 4320f426935Ssstefan1 auto Remark = [&](OptimizationRemark OR) { 4330f426935Ssstefan1 return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 4340f426935Ssstefan1 << " Value: " 4350f426935Ssstefan1 << (ICVInfo.InitValue 4360f426935Ssstefan1 ? ICVInfo.InitValue->getValue().toString(10, true) 4370f426935Ssstefan1 : "IMPLEMENTATION_DEFINED"); 4380f426935Ssstefan1 }; 4390f426935Ssstefan1 4400f426935Ssstefan1 emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); 4410f426935Ssstefan1 } 4420f426935Ssstefan1 } 4430f426935Ssstefan1 } 4440f426935Ssstefan1 445e8039ad4SJohannes Doerfert /// Print OpenMP GPU kernels for testing. 446e8039ad4SJohannes Doerfert void printKernels() const { 447e8039ad4SJohannes Doerfert for (Function *F : SCC) { 448e8039ad4SJohannes Doerfert if (!OMPInfoCache.Kernels.count(F)) 449e8039ad4SJohannes Doerfert continue; 450b8235d2bSsstefan1 451e8039ad4SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) { 452e8039ad4SJohannes Doerfert return OR << "OpenMP GPU kernel " 453e8039ad4SJohannes Doerfert << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 454e8039ad4SJohannes Doerfert }; 455b8235d2bSsstefan1 456e8039ad4SJohannes Doerfert emitRemarkOnFunction(F, "OpenMPGPU", Remark); 457e8039ad4SJohannes Doerfert } 4589548b74aSJohannes Doerfert } 4599548b74aSJohannes Doerfert 4607cfd267cSsstefan1 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 4617cfd267cSsstefan1 /// given it has to be the callee or a nullptr is returned. 4627cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 4637cfd267cSsstefan1 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 4647cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 4657cfd267cSsstefan1 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 4667cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 4677cfd267cSsstefan1 return CI; 4687cfd267cSsstefan1 return nullptr; 4697cfd267cSsstefan1 } 4707cfd267cSsstefan1 4717cfd267cSsstefan1 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 4727cfd267cSsstefan1 /// the callee or a nullptr is returned. 4737cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 4747cfd267cSsstefan1 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 4757cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(&V); 4767cfd267cSsstefan1 if (CI && !CI->hasOperandBundles() && 4777cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 4787cfd267cSsstefan1 return CI; 4797cfd267cSsstefan1 return nullptr; 4807cfd267cSsstefan1 } 4817cfd267cSsstefan1 4829548b74aSJohannes Doerfert private: 4839d38f98dSJohannes Doerfert /// Try to delete parallel regions if possible. 484e565db49SJohannes Doerfert bool deleteParallelRegions() { 485e565db49SJohannes Doerfert const unsigned CallbackCalleeOperand = 2; 486e565db49SJohannes Doerfert 4877cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI = 4887cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 4897cfd267cSsstefan1 490e565db49SJohannes Doerfert if (!RFI.Declaration) 491e565db49SJohannes Doerfert return false; 492e565db49SJohannes Doerfert 493e565db49SJohannes Doerfert bool Changed = false; 494e565db49SJohannes Doerfert auto DeleteCallCB = [&](Use &U, Function &) { 495e565db49SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U); 496e565db49SJohannes Doerfert if (!CI) 497e565db49SJohannes Doerfert return false; 498e565db49SJohannes Doerfert auto *Fn = dyn_cast<Function>( 499e565db49SJohannes Doerfert CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 500e565db49SJohannes Doerfert if (!Fn) 501e565db49SJohannes Doerfert return false; 502e565db49SJohannes Doerfert if (!Fn->onlyReadsMemory()) 503e565db49SJohannes Doerfert return false; 504e565db49SJohannes Doerfert if (!Fn->hasFnAttribute(Attribute::WillReturn)) 505e565db49SJohannes Doerfert return false; 506e565db49SJohannes Doerfert 507e565db49SJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 508e565db49SJohannes Doerfert << CI->getCaller()->getName() << "\n"); 5094d4ea9acSHuber, Joseph 5104d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 5114d4ea9acSHuber, Joseph return OR << "Parallel region in " 5124d4ea9acSHuber, Joseph << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 5134d4ea9acSHuber, Joseph << " deleted"; 5144d4ea9acSHuber, Joseph }; 5154d4ea9acSHuber, Joseph emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 5164d4ea9acSHuber, Joseph Remark); 5174d4ea9acSHuber, Joseph 518e565db49SJohannes Doerfert CGUpdater.removeCallSite(*CI); 519e565db49SJohannes Doerfert CI->eraseFromParent(); 520e565db49SJohannes Doerfert Changed = true; 52155eb714aSRoman Lebedev ++NumOpenMPParallelRegionsDeleted; 522e565db49SJohannes Doerfert return true; 523e565db49SJohannes Doerfert }; 524e565db49SJohannes Doerfert 525624d34afSJohannes Doerfert RFI.foreachUse(SCC, DeleteCallCB); 526e565db49SJohannes Doerfert 527e565db49SJohannes Doerfert return Changed; 528e565db49SJohannes Doerfert } 529e565db49SJohannes Doerfert 530b726c557SJohannes Doerfert /// Try to eliminate runtime calls by reusing existing ones. 5319548b74aSJohannes Doerfert bool deduplicateRuntimeCalls() { 5329548b74aSJohannes Doerfert bool Changed = false; 5339548b74aSJohannes Doerfert 534e28936f6SJohannes Doerfert RuntimeFunction DeduplicableRuntimeCallIDs[] = { 535e28936f6SJohannes Doerfert OMPRTL_omp_get_num_threads, 536e28936f6SJohannes Doerfert OMPRTL_omp_in_parallel, 537e28936f6SJohannes Doerfert OMPRTL_omp_get_cancellation, 538e28936f6SJohannes Doerfert OMPRTL_omp_get_thread_limit, 539e28936f6SJohannes Doerfert OMPRTL_omp_get_supported_active_levels, 540e28936f6SJohannes Doerfert OMPRTL_omp_get_level, 541e28936f6SJohannes Doerfert OMPRTL_omp_get_ancestor_thread_num, 542e28936f6SJohannes Doerfert OMPRTL_omp_get_team_size, 543e28936f6SJohannes Doerfert OMPRTL_omp_get_active_level, 544e28936f6SJohannes Doerfert OMPRTL_omp_in_final, 545e28936f6SJohannes Doerfert OMPRTL_omp_get_proc_bind, 546e28936f6SJohannes Doerfert OMPRTL_omp_get_num_places, 547e28936f6SJohannes Doerfert OMPRTL_omp_get_num_procs, 548e28936f6SJohannes Doerfert OMPRTL_omp_get_place_num, 549e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_num_places, 550e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_place_nums}; 551e28936f6SJohannes Doerfert 552bc93c2d7SMarek Kurdej // Global-tid is handled separately. 5539548b74aSJohannes Doerfert SmallSetVector<Value *, 16> GTIdArgs; 5549548b74aSJohannes Doerfert collectGlobalThreadIdArguments(GTIdArgs); 5559548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 5569548b74aSJohannes Doerfert << " global thread ID arguments\n"); 5579548b74aSJohannes Doerfert 5589548b74aSJohannes Doerfert for (Function *F : SCC) { 559e28936f6SJohannes Doerfert for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 5604e29d256Sserge-sans-paille Changed |= deduplicateRuntimeCalls( 5614e29d256Sserge-sans-paille *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 562e28936f6SJohannes Doerfert 563e28936f6SJohannes Doerfert // __kmpc_global_thread_num is special as we can replace it with an 564e28936f6SJohannes Doerfert // argument in enough cases to make it worth trying. 5659548b74aSJohannes Doerfert Value *GTIdArg = nullptr; 5669548b74aSJohannes Doerfert for (Argument &Arg : F->args()) 5679548b74aSJohannes Doerfert if (GTIdArgs.count(&Arg)) { 5689548b74aSJohannes Doerfert GTIdArg = &Arg; 5699548b74aSJohannes Doerfert break; 5709548b74aSJohannes Doerfert } 5719548b74aSJohannes Doerfert Changed |= deduplicateRuntimeCalls( 5727cfd267cSsstefan1 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 5739548b74aSJohannes Doerfert } 5749548b74aSJohannes Doerfert 5759548b74aSJohannes Doerfert return Changed; 5769548b74aSJohannes Doerfert } 5779548b74aSJohannes Doerfert 578496f8e5bSHamilton Tobon Mosquera /// Tries to hide the latency of runtime calls that involve host to 579496f8e5bSHamilton Tobon Mosquera /// device memory transfers by splitting them into their "issue" and "wait" 580496f8e5bSHamilton Tobon Mosquera /// versions. The "issue" is moved upwards as much as possible. The "wait" is 581496f8e5bSHamilton Tobon Mosquera /// moved downards as much as possible. The "issue" issues the memory transfer 582496f8e5bSHamilton Tobon Mosquera /// asynchronously, returning a handle. The "wait" waits in the returned 583496f8e5bSHamilton Tobon Mosquera /// handle for the memory transfer to finish. 584496f8e5bSHamilton Tobon Mosquera bool hideMemTransfersLatency() { 585496f8e5bSHamilton Tobon Mosquera auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 586496f8e5bSHamilton Tobon Mosquera bool Changed = false; 587496f8e5bSHamilton Tobon Mosquera auto SplitMemTransfers = [&](Use &U, Function &Decl) { 588496f8e5bSHamilton Tobon Mosquera auto *RTCall = getCallIfRegularCall(U, &RFI); 589496f8e5bSHamilton Tobon Mosquera if (!RTCall) 590496f8e5bSHamilton Tobon Mosquera return false; 591496f8e5bSHamilton Tobon Mosquera 592bd2fa181SHamilton Tobon Mosquera // TODO: Check if can be moved upwards. 593bd2fa181SHamilton Tobon Mosquera bool WasSplit = false; 594bd2fa181SHamilton Tobon Mosquera Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 595bd2fa181SHamilton Tobon Mosquera if (WaitMovementPoint) 596bd2fa181SHamilton Tobon Mosquera WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 597bd2fa181SHamilton Tobon Mosquera 598496f8e5bSHamilton Tobon Mosquera Changed |= WasSplit; 599496f8e5bSHamilton Tobon Mosquera return WasSplit; 600496f8e5bSHamilton Tobon Mosquera }; 601496f8e5bSHamilton Tobon Mosquera RFI.foreachUse(SCC, SplitMemTransfers); 602496f8e5bSHamilton Tobon Mosquera 603496f8e5bSHamilton Tobon Mosquera return Changed; 604496f8e5bSHamilton Tobon Mosquera } 605496f8e5bSHamilton Tobon Mosquera 606bd2fa181SHamilton Tobon Mosquera /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 607bd2fa181SHamilton Tobon Mosquera /// moved. Returns nullptr if the movement is not possible, or not worth it. 608bd2fa181SHamilton Tobon Mosquera Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 609bd2fa181SHamilton Tobon Mosquera // FIXME: This traverses only the BasicBlock where RuntimeCall is. 610bd2fa181SHamilton Tobon Mosquera // Make it traverse the CFG. 611bd2fa181SHamilton Tobon Mosquera 612bd2fa181SHamilton Tobon Mosquera Instruction *CurrentI = &RuntimeCall; 613bd2fa181SHamilton Tobon Mosquera bool IsWorthIt = false; 614bd2fa181SHamilton Tobon Mosquera while ((CurrentI = CurrentI->getNextNode())) { 615bd2fa181SHamilton Tobon Mosquera 616bd2fa181SHamilton Tobon Mosquera // TODO: Once we detect the regions to be offloaded we should use the 617bd2fa181SHamilton Tobon Mosquera // alias analysis manager to check if CurrentI may modify one of 618bd2fa181SHamilton Tobon Mosquera // the offloaded regions. 619bd2fa181SHamilton Tobon Mosquera if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 620bd2fa181SHamilton Tobon Mosquera if (IsWorthIt) 621bd2fa181SHamilton Tobon Mosquera return CurrentI; 622bd2fa181SHamilton Tobon Mosquera 623bd2fa181SHamilton Tobon Mosquera return nullptr; 624bd2fa181SHamilton Tobon Mosquera } 625bd2fa181SHamilton Tobon Mosquera 626bd2fa181SHamilton Tobon Mosquera // FIXME: For now if we move it over anything without side effect 627bd2fa181SHamilton Tobon Mosquera // is worth it. 628bd2fa181SHamilton Tobon Mosquera IsWorthIt = true; 629bd2fa181SHamilton Tobon Mosquera } 630bd2fa181SHamilton Tobon Mosquera 631bd2fa181SHamilton Tobon Mosquera // Return end of BasicBlock. 632bd2fa181SHamilton Tobon Mosquera return RuntimeCall.getParent()->getTerminator(); 633bd2fa181SHamilton Tobon Mosquera } 634bd2fa181SHamilton Tobon Mosquera 635496f8e5bSHamilton Tobon Mosquera /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 636bd2fa181SHamilton Tobon Mosquera bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 637bd2fa181SHamilton Tobon Mosquera Instruction &WaitMovementPoint) { 638496f8e5bSHamilton Tobon Mosquera auto &IRBuilder = OMPInfoCache.OMPBuilder; 639496f8e5bSHamilton Tobon Mosquera // Add "issue" runtime call declaration: 640496f8e5bSHamilton Tobon Mosquera // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 641496f8e5bSHamilton Tobon Mosquera // i8**, i8**, i64*, i64*) 642496f8e5bSHamilton Tobon Mosquera FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 643496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_issue); 644496f8e5bSHamilton Tobon Mosquera 645496f8e5bSHamilton Tobon Mosquera // Change RuntimeCall call site for its asynchronous version. 646496f8e5bSHamilton Tobon Mosquera SmallVector<Value *, 8> Args; 647bd2fa181SHamilton Tobon Mosquera for (auto &Arg : RuntimeCall.args()) 648496f8e5bSHamilton Tobon Mosquera Args.push_back(Arg.get()); 649496f8e5bSHamilton Tobon Mosquera 650496f8e5bSHamilton Tobon Mosquera CallInst *IssueCallsite = 651bd2fa181SHamilton Tobon Mosquera CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall); 652bd2fa181SHamilton Tobon Mosquera RuntimeCall.eraseFromParent(); 653496f8e5bSHamilton Tobon Mosquera 654496f8e5bSHamilton Tobon Mosquera // Add "wait" runtime call declaration: 655496f8e5bSHamilton Tobon Mosquera // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 656496f8e5bSHamilton Tobon Mosquera FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 657496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_wait); 658496f8e5bSHamilton Tobon Mosquera 659496f8e5bSHamilton Tobon Mosquera // Add call site to WaitDecl. 660496f8e5bSHamilton Tobon Mosquera Value *WaitParams[2] = { 661496f8e5bSHamilton Tobon Mosquera IssueCallsite->getArgOperand(0), // device_id. 662496f8e5bSHamilton Tobon Mosquera IssueCallsite // returned handle. 663496f8e5bSHamilton Tobon Mosquera }; 664bd2fa181SHamilton Tobon Mosquera CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); 665496f8e5bSHamilton Tobon Mosquera 666496f8e5bSHamilton Tobon Mosquera return true; 667496f8e5bSHamilton Tobon Mosquera } 668496f8e5bSHamilton Tobon Mosquera 669dc3b5b00SJohannes Doerfert static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 670dc3b5b00SJohannes Doerfert bool GlobalOnly, bool &SingleChoice) { 671dc3b5b00SJohannes Doerfert if (CurrentIdent == NextIdent) 672dc3b5b00SJohannes Doerfert return CurrentIdent; 673dc3b5b00SJohannes Doerfert 674396b7253SJohannes Doerfert // TODO: Figure out how to actually combine multiple debug locations. For 675dc3b5b00SJohannes Doerfert // now we just keep an existing one if there is a single choice. 676dc3b5b00SJohannes Doerfert if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 677dc3b5b00SJohannes Doerfert SingleChoice = !CurrentIdent; 678dc3b5b00SJohannes Doerfert return NextIdent; 679dc3b5b00SJohannes Doerfert } 680396b7253SJohannes Doerfert return nullptr; 681396b7253SJohannes Doerfert } 682396b7253SJohannes Doerfert 683396b7253SJohannes Doerfert /// Return an `struct ident_t*` value that represents the ones used in the 684396b7253SJohannes Doerfert /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 685396b7253SJohannes Doerfert /// return a local `struct ident_t*`. For now, if we cannot find a suitable 686396b7253SJohannes Doerfert /// return value we create one from scratch. We also do not yet combine 687396b7253SJohannes Doerfert /// information, e.g., the source locations, see combinedIdentStruct. 6887cfd267cSsstefan1 Value * 6897cfd267cSsstefan1 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 6907cfd267cSsstefan1 Function &F, bool GlobalOnly) { 691dc3b5b00SJohannes Doerfert bool SingleChoice = true; 692396b7253SJohannes Doerfert Value *Ident = nullptr; 693396b7253SJohannes Doerfert auto CombineIdentStruct = [&](Use &U, Function &Caller) { 694396b7253SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 695396b7253SJohannes Doerfert if (!CI || &F != &Caller) 696396b7253SJohannes Doerfert return false; 697396b7253SJohannes Doerfert Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 698dc3b5b00SJohannes Doerfert /* GlobalOnly */ true, SingleChoice); 699396b7253SJohannes Doerfert return false; 700396b7253SJohannes Doerfert }; 701624d34afSJohannes Doerfert RFI.foreachUse(SCC, CombineIdentStruct); 702396b7253SJohannes Doerfert 703dc3b5b00SJohannes Doerfert if (!Ident || !SingleChoice) { 704396b7253SJohannes Doerfert // The IRBuilder uses the insertion block to get to the module, this is 705396b7253SJohannes Doerfert // unfortunate but we work around it for now. 7067cfd267cSsstefan1 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 7077cfd267cSsstefan1 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 708396b7253SJohannes Doerfert &F.getEntryBlock(), F.getEntryBlock().begin())); 709396b7253SJohannes Doerfert // Create a fallback location if non was found. 710396b7253SJohannes Doerfert // TODO: Use the debug locations of the calls instead. 7117cfd267cSsstefan1 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 7127cfd267cSsstefan1 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 713396b7253SJohannes Doerfert } 714396b7253SJohannes Doerfert return Ident; 715396b7253SJohannes Doerfert } 716396b7253SJohannes Doerfert 717b726c557SJohannes Doerfert /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 7189548b74aSJohannes Doerfert /// \p ReplVal if given. 7197cfd267cSsstefan1 bool deduplicateRuntimeCalls(Function &F, 7207cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI, 7219548b74aSJohannes Doerfert Value *ReplVal = nullptr) { 7228855fec3SJohannes Doerfert auto *UV = RFI.getUseVector(F); 7238855fec3SJohannes Doerfert if (!UV || UV->size() + (ReplVal != nullptr) < 2) 724b1fbf438SRoman Lebedev return false; 725b1fbf438SRoman Lebedev 7267cfd267cSsstefan1 LLVM_DEBUG( 7277cfd267cSsstefan1 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 7287cfd267cSsstefan1 << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 7297cfd267cSsstefan1 730ab3da5ddSMichael Liao assert((!ReplVal || (isa<Argument>(ReplVal) && 731ab3da5ddSMichael Liao cast<Argument>(ReplVal)->getParent() == &F)) && 7329548b74aSJohannes Doerfert "Unexpected replacement value!"); 733396b7253SJohannes Doerfert 734396b7253SJohannes Doerfert // TODO: Use dominance to find a good position instead. 7356aab27baSsstefan1 auto CanBeMoved = [this](CallBase &CB) { 736396b7253SJohannes Doerfert unsigned NumArgs = CB.getNumArgOperands(); 737396b7253SJohannes Doerfert if (NumArgs == 0) 738396b7253SJohannes Doerfert return true; 7396aab27baSsstefan1 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 740396b7253SJohannes Doerfert return false; 741396b7253SJohannes Doerfert for (unsigned u = 1; u < NumArgs; ++u) 742396b7253SJohannes Doerfert if (isa<Instruction>(CB.getArgOperand(u))) 743396b7253SJohannes Doerfert return false; 744396b7253SJohannes Doerfert return true; 745396b7253SJohannes Doerfert }; 746396b7253SJohannes Doerfert 7479548b74aSJohannes Doerfert if (!ReplVal) { 7488855fec3SJohannes Doerfert for (Use *U : *UV) 7499548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 750396b7253SJohannes Doerfert if (!CanBeMoved(*CI)) 751396b7253SJohannes Doerfert continue; 7524d4ea9acSHuber, Joseph 7534d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 7544d4ea9acSHuber, Joseph auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); 7554d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 7564d4ea9acSHuber, Joseph << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " 7574d4ea9acSHuber, Joseph << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); 7584d4ea9acSHuber, Joseph }; 7594d4ea9acSHuber, Joseph emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark); 7604d4ea9acSHuber, Joseph 7619548b74aSJohannes Doerfert CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 7629548b74aSJohannes Doerfert ReplVal = CI; 7639548b74aSJohannes Doerfert break; 7649548b74aSJohannes Doerfert } 7659548b74aSJohannes Doerfert if (!ReplVal) 7669548b74aSJohannes Doerfert return false; 7679548b74aSJohannes Doerfert } 7689548b74aSJohannes Doerfert 769396b7253SJohannes Doerfert // If we use a call as a replacement value we need to make sure the ident is 770396b7253SJohannes Doerfert // valid at the new location. For now we just pick a global one, either 771396b7253SJohannes Doerfert // existing and used by one of the calls, or created from scratch. 772396b7253SJohannes Doerfert if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 773396b7253SJohannes Doerfert if (CI->getNumArgOperands() > 0 && 7746aab27baSsstefan1 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 775396b7253SJohannes Doerfert Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 776396b7253SJohannes Doerfert /* GlobalOnly */ true); 777396b7253SJohannes Doerfert CI->setArgOperand(0, Ident); 778396b7253SJohannes Doerfert } 779396b7253SJohannes Doerfert } 780396b7253SJohannes Doerfert 7819548b74aSJohannes Doerfert bool Changed = false; 7829548b74aSJohannes Doerfert auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 7839548b74aSJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 7849548b74aSJohannes Doerfert if (!CI || CI == ReplVal || &F != &Caller) 7859548b74aSJohannes Doerfert return false; 7869548b74aSJohannes Doerfert assert(CI->getCaller() == &F && "Unexpected call!"); 7874d4ea9acSHuber, Joseph 7884d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 7894d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 7904d4ea9acSHuber, Joseph << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 7914d4ea9acSHuber, Joseph }; 7924d4ea9acSHuber, Joseph emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark); 7934d4ea9acSHuber, Joseph 7949548b74aSJohannes Doerfert CGUpdater.removeCallSite(*CI); 7959548b74aSJohannes Doerfert CI->replaceAllUsesWith(ReplVal); 7969548b74aSJohannes Doerfert CI->eraseFromParent(); 7979548b74aSJohannes Doerfert ++NumOpenMPRuntimeCallsDeduplicated; 7989548b74aSJohannes Doerfert Changed = true; 7999548b74aSJohannes Doerfert return true; 8009548b74aSJohannes Doerfert }; 801624d34afSJohannes Doerfert RFI.foreachUse(SCC, ReplaceAndDeleteCB); 8029548b74aSJohannes Doerfert 8039548b74aSJohannes Doerfert return Changed; 8049548b74aSJohannes Doerfert } 8059548b74aSJohannes Doerfert 8069548b74aSJohannes Doerfert /// Collect arguments that represent the global thread id in \p GTIdArgs. 8079548b74aSJohannes Doerfert void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 8089548b74aSJohannes Doerfert // TODO: Below we basically perform a fixpoint iteration with a pessimistic 8099548b74aSJohannes Doerfert // initialization. We could define an AbstractAttribute instead and 8109548b74aSJohannes Doerfert // run the Attributor here once it can be run as an SCC pass. 8119548b74aSJohannes Doerfert 8129548b74aSJohannes Doerfert // Helper to check the argument \p ArgNo at all call sites of \p F for 8139548b74aSJohannes Doerfert // a GTId. 8149548b74aSJohannes Doerfert auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 8159548b74aSJohannes Doerfert if (!F.hasLocalLinkage()) 8169548b74aSJohannes Doerfert return false; 8179548b74aSJohannes Doerfert for (Use &U : F.uses()) { 8189548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U)) { 8199548b74aSJohannes Doerfert Value *ArgOp = CI->getArgOperand(ArgNo); 8209548b74aSJohannes Doerfert if (CI == &RefCI || GTIdArgs.count(ArgOp) || 8217cfd267cSsstefan1 getCallIfRegularCall( 8227cfd267cSsstefan1 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 8239548b74aSJohannes Doerfert continue; 8249548b74aSJohannes Doerfert } 8259548b74aSJohannes Doerfert return false; 8269548b74aSJohannes Doerfert } 8279548b74aSJohannes Doerfert return true; 8289548b74aSJohannes Doerfert }; 8299548b74aSJohannes Doerfert 8309548b74aSJohannes Doerfert // Helper to identify uses of a GTId as GTId arguments. 8319548b74aSJohannes Doerfert auto AddUserArgs = [&](Value >Id) { 8329548b74aSJohannes Doerfert for (Use &U : GTId.uses()) 8339548b74aSJohannes Doerfert if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 8349548b74aSJohannes Doerfert if (CI->isArgOperand(&U)) 8359548b74aSJohannes Doerfert if (Function *Callee = CI->getCalledFunction()) 8369548b74aSJohannes Doerfert if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 8379548b74aSJohannes Doerfert GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 8389548b74aSJohannes Doerfert }; 8399548b74aSJohannes Doerfert 8409548b74aSJohannes Doerfert // The argument users of __kmpc_global_thread_num calls are GTIds. 8417cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 8427cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 8437cfd267cSsstefan1 844624d34afSJohannes Doerfert GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 8458855fec3SJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 8469548b74aSJohannes Doerfert AddUserArgs(*CI); 8478855fec3SJohannes Doerfert return false; 8488855fec3SJohannes Doerfert }); 8499548b74aSJohannes Doerfert 8509548b74aSJohannes Doerfert // Transitively search for more arguments by looking at the users of the 8519548b74aSJohannes Doerfert // ones we know already. During the search the GTIdArgs vector is extended 8529548b74aSJohannes Doerfert // so we cannot cache the size nor can we use a range based for. 8539548b74aSJohannes Doerfert for (unsigned u = 0; u < GTIdArgs.size(); ++u) 8549548b74aSJohannes Doerfert AddUserArgs(*GTIdArgs[u]); 8559548b74aSJohannes Doerfert } 8569548b74aSJohannes Doerfert 8575b0581aeSJohannes Doerfert /// Kernel (=GPU) optimizations and utility functions 8585b0581aeSJohannes Doerfert /// 8595b0581aeSJohannes Doerfert ///{{ 8605b0581aeSJohannes Doerfert 8615b0581aeSJohannes Doerfert /// Check if \p F is a kernel, hence entry point for target offloading. 8625b0581aeSJohannes Doerfert bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } 8635b0581aeSJohannes Doerfert 8645b0581aeSJohannes Doerfert /// Cache to remember the unique kernel for a function. 8655b0581aeSJohannes Doerfert DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; 8665b0581aeSJohannes Doerfert 8675b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p F, if any. 8685b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Function &F); 8695b0581aeSJohannes Doerfert 8705b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p I, if any. 8715b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Instruction &I) { 8725b0581aeSJohannes Doerfert return getUniqueKernelFor(*I.getFunction()); 8735b0581aeSJohannes Doerfert } 8745b0581aeSJohannes Doerfert 8755b0581aeSJohannes Doerfert /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 8765b0581aeSJohannes Doerfert /// the cases we can avoid taking the address of a function. 8775b0581aeSJohannes Doerfert bool rewriteDeviceCodeStateMachine(); 8785b0581aeSJohannes Doerfert 8795b0581aeSJohannes Doerfert /// 8805b0581aeSJohannes Doerfert ///}} 8815b0581aeSJohannes Doerfert 8824d4ea9acSHuber, Joseph /// Emit a remark generically 8834d4ea9acSHuber, Joseph /// 8844d4ea9acSHuber, Joseph /// This template function can be used to generically emit a remark. The 8854d4ea9acSHuber, Joseph /// RemarkKind should be one of the following: 8864d4ea9acSHuber, Joseph /// - OptimizationRemark to indicate a successful optimization attempt 8874d4ea9acSHuber, Joseph /// - OptimizationRemarkMissed to report a failed optimization attempt 8884d4ea9acSHuber, Joseph /// - OptimizationRemarkAnalysis to provide additional information about an 8894d4ea9acSHuber, Joseph /// optimization attempt 8904d4ea9acSHuber, Joseph /// 8914d4ea9acSHuber, Joseph /// The remark is built using a callback function provided by the caller that 8924d4ea9acSHuber, Joseph /// takes a RemarkKind as input and returns a RemarkKind. 8934d4ea9acSHuber, Joseph template <typename RemarkKind, 8944d4ea9acSHuber, Joseph typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>> 8954d4ea9acSHuber, Joseph void emitRemark(Instruction *Inst, StringRef RemarkName, 896e8039ad4SJohannes Doerfert RemarkCallBack &&RemarkCB) const { 8974d4ea9acSHuber, Joseph Function *F = Inst->getParent()->getParent(); 8984d4ea9acSHuber, Joseph auto &ORE = OREGetter(F); 8994d4ea9acSHuber, Joseph 9007cfd267cSsstefan1 ORE.emit( 9017cfd267cSsstefan1 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); 9024d4ea9acSHuber, Joseph } 9034d4ea9acSHuber, Joseph 9040f426935Ssstefan1 /// Emit a remark on a function. Since only OptimizationRemark is supporting 9050f426935Ssstefan1 /// this, it can't be made generic. 906e8039ad4SJohannes Doerfert void 907e8039ad4SJohannes Doerfert emitRemarkOnFunction(Function *F, StringRef RemarkName, 908e8039ad4SJohannes Doerfert function_ref<OptimizationRemark(OptimizationRemark &&)> 909e8039ad4SJohannes Doerfert &&RemarkCB) const { 9100f426935Ssstefan1 auto &ORE = OREGetter(F); 9110f426935Ssstefan1 9120f426935Ssstefan1 ORE.emit([&]() { 9130f426935Ssstefan1 return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); 9140f426935Ssstefan1 }); 9150f426935Ssstefan1 } 9160f426935Ssstefan1 917b726c557SJohannes Doerfert /// The underlying module. 9189548b74aSJohannes Doerfert Module &M; 9199548b74aSJohannes Doerfert 9209548b74aSJohannes Doerfert /// The SCC we are operating on. 921ee17263aSJohannes Doerfert SmallVectorImpl<Function *> &SCC; 9229548b74aSJohannes Doerfert 9239548b74aSJohannes Doerfert /// Callback to update the call graph, the first argument is a removed call, 9249548b74aSJohannes Doerfert /// the second an optional replacement call. 9259548b74aSJohannes Doerfert CallGraphUpdater &CGUpdater; 9269548b74aSJohannes Doerfert 9274d4ea9acSHuber, Joseph /// Callback to get an OptimizationRemarkEmitter from a Function * 9284d4ea9acSHuber, Joseph OptimizationRemarkGetter OREGetter; 9294d4ea9acSHuber, Joseph 9307cfd267cSsstefan1 /// OpenMP-specific information cache. Also Used for Attributor runs. 9317cfd267cSsstefan1 OMPInformationCache &OMPInfoCache; 932b8235d2bSsstefan1 933b8235d2bSsstefan1 /// Attributor instance. 934b8235d2bSsstefan1 Attributor &A; 935b8235d2bSsstefan1 936b8235d2bSsstefan1 /// Helper function to run Attributor on SCC. 937b8235d2bSsstefan1 bool runAttributor() { 938b8235d2bSsstefan1 if (SCC.empty()) 939b8235d2bSsstefan1 return false; 940b8235d2bSsstefan1 941b8235d2bSsstefan1 registerAAs(); 942b8235d2bSsstefan1 943b8235d2bSsstefan1 ChangeStatus Changed = A.run(); 944b8235d2bSsstefan1 945b8235d2bSsstefan1 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 946b8235d2bSsstefan1 << " functions, result: " << Changed << ".\n"); 947b8235d2bSsstefan1 948b8235d2bSsstefan1 return Changed == ChangeStatus::CHANGED; 949b8235d2bSsstefan1 } 950b8235d2bSsstefan1 951b8235d2bSsstefan1 /// Populate the Attributor with abstract attribute opportunities in the 952b8235d2bSsstefan1 /// function. 953b8235d2bSsstefan1 void registerAAs() { 954*5dfd7cc4Ssstefan1 if (SCC.empty()) 955*5dfd7cc4Ssstefan1 return; 956b8235d2bSsstefan1 957*5dfd7cc4Ssstefan1 // Create CallSite AA for all Getters. 958*5dfd7cc4Ssstefan1 for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 959*5dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 960*5dfd7cc4Ssstefan1 961*5dfd7cc4Ssstefan1 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 962*5dfd7cc4Ssstefan1 963*5dfd7cc4Ssstefan1 auto CreateAA = [&](Use &U, Function &Caller) { 964*5dfd7cc4Ssstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 965*5dfd7cc4Ssstefan1 if (!CI) 966*5dfd7cc4Ssstefan1 return false; 967*5dfd7cc4Ssstefan1 968*5dfd7cc4Ssstefan1 auto &CB = cast<CallBase>(*CI); 969*5dfd7cc4Ssstefan1 970*5dfd7cc4Ssstefan1 IRPosition CBPos = IRPosition::callsite_function(CB); 971*5dfd7cc4Ssstefan1 A.getOrCreateAAFor<AAICVTracker>(CBPos); 972*5dfd7cc4Ssstefan1 return false; 973*5dfd7cc4Ssstefan1 }; 974*5dfd7cc4Ssstefan1 975*5dfd7cc4Ssstefan1 GetterRFI.foreachUse(SCC, CreateAA); 976b8235d2bSsstefan1 } 977b8235d2bSsstefan1 } 978b8235d2bSsstefan1 }; 979b8235d2bSsstefan1 9805b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 9815b0581aeSJohannes Doerfert if (!OMPInfoCache.ModuleSlice.count(&F)) 9825b0581aeSJohannes Doerfert return nullptr; 9835b0581aeSJohannes Doerfert 9845b0581aeSJohannes Doerfert // Use a scope to keep the lifetime of the CachedKernel short. 9855b0581aeSJohannes Doerfert { 9865b0581aeSJohannes Doerfert Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 9875b0581aeSJohannes Doerfert if (CachedKernel) 9885b0581aeSJohannes Doerfert return *CachedKernel; 9895b0581aeSJohannes Doerfert 9905b0581aeSJohannes Doerfert // TODO: We should use an AA to create an (optimistic and callback 9915b0581aeSJohannes Doerfert // call-aware) call graph. For now we stick to simple patterns that 9925b0581aeSJohannes Doerfert // are less powerful, basically the worst fixpoint. 9935b0581aeSJohannes Doerfert if (isKernel(F)) { 9945b0581aeSJohannes Doerfert CachedKernel = Kernel(&F); 9955b0581aeSJohannes Doerfert return *CachedKernel; 9965b0581aeSJohannes Doerfert } 9975b0581aeSJohannes Doerfert 9985b0581aeSJohannes Doerfert CachedKernel = nullptr; 9995b0581aeSJohannes Doerfert if (!F.hasLocalLinkage()) 10005b0581aeSJohannes Doerfert return nullptr; 10015b0581aeSJohannes Doerfert } 10025b0581aeSJohannes Doerfert 10035b0581aeSJohannes Doerfert auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 10045b0581aeSJohannes Doerfert if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 10055b0581aeSJohannes Doerfert // Allow use in equality comparisons. 10065b0581aeSJohannes Doerfert if (Cmp->isEquality()) 10075b0581aeSJohannes Doerfert return getUniqueKernelFor(*Cmp); 10085b0581aeSJohannes Doerfert return nullptr; 10095b0581aeSJohannes Doerfert } 10105b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 10115b0581aeSJohannes Doerfert // Allow direct calls. 10125b0581aeSJohannes Doerfert if (CB->isCallee(&U)) 10135b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 10145b0581aeSJohannes Doerfert // Allow the use in __kmpc_kernel_prepare_parallel calls. 10155b0581aeSJohannes Doerfert if (Function *Callee = CB->getCalledFunction()) 10165b0581aeSJohannes Doerfert if (Callee->getName() == "__kmpc_kernel_prepare_parallel") 10175b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 10185b0581aeSJohannes Doerfert return nullptr; 10195b0581aeSJohannes Doerfert } 10205b0581aeSJohannes Doerfert // Disallow every other use. 10215b0581aeSJohannes Doerfert return nullptr; 10225b0581aeSJohannes Doerfert }; 10235b0581aeSJohannes Doerfert 10245b0581aeSJohannes Doerfert // TODO: In the future we want to track more than just a unique kernel. 10255b0581aeSJohannes Doerfert SmallPtrSet<Kernel, 2> PotentialKernels; 10268d8ce85bSsstefan1 OMPInformationCache::foreachUse(F, [&](const Use &U) { 10275b0581aeSJohannes Doerfert PotentialKernels.insert(GetUniqueKernelForUse(U)); 10285b0581aeSJohannes Doerfert }); 10295b0581aeSJohannes Doerfert 10305b0581aeSJohannes Doerfert Kernel K = nullptr; 10315b0581aeSJohannes Doerfert if (PotentialKernels.size() == 1) 10325b0581aeSJohannes Doerfert K = *PotentialKernels.begin(); 10335b0581aeSJohannes Doerfert 10345b0581aeSJohannes Doerfert // Cache the result. 10355b0581aeSJohannes Doerfert UniqueKernelMap[&F] = K; 10365b0581aeSJohannes Doerfert 10375b0581aeSJohannes Doerfert return K; 10385b0581aeSJohannes Doerfert } 10395b0581aeSJohannes Doerfert 10405b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 10415b0581aeSJohannes Doerfert OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI = 10425b0581aeSJohannes Doerfert OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel]; 10435b0581aeSJohannes Doerfert 10445b0581aeSJohannes Doerfert bool Changed = false; 10455b0581aeSJohannes Doerfert if (!KernelPrepareParallelRFI) 10465b0581aeSJohannes Doerfert return Changed; 10475b0581aeSJohannes Doerfert 10485b0581aeSJohannes Doerfert for (Function *F : SCC) { 10495b0581aeSJohannes Doerfert 10505b0581aeSJohannes Doerfert // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at 10515b0581aeSJohannes Doerfert // all. 10525b0581aeSJohannes Doerfert bool UnknownUse = false; 1053fec1f210SJohannes Doerfert bool KernelPrepareUse = false; 10545b0581aeSJohannes Doerfert unsigned NumDirectCalls = 0; 10555b0581aeSJohannes Doerfert 10565b0581aeSJohannes Doerfert SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 10578d8ce85bSsstefan1 OMPInformationCache::foreachUse(*F, [&](Use &U) { 10585b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) 10595b0581aeSJohannes Doerfert if (CB->isCallee(&U)) { 10605b0581aeSJohannes Doerfert ++NumDirectCalls; 10615b0581aeSJohannes Doerfert return; 10625b0581aeSJohannes Doerfert } 10635b0581aeSJohannes Doerfert 106481db6144SMichael Liao if (isa<ICmpInst>(U.getUser())) { 10655b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 10665b0581aeSJohannes Doerfert return; 10675b0581aeSJohannes Doerfert } 1068fec1f210SJohannes Doerfert if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall( 1069fec1f210SJohannes Doerfert *U.getUser(), &KernelPrepareParallelRFI)) { 1070fec1f210SJohannes Doerfert KernelPrepareUse = true; 10715b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 10725b0581aeSJohannes Doerfert return; 10735b0581aeSJohannes Doerfert } 10745b0581aeSJohannes Doerfert UnknownUse = true; 10755b0581aeSJohannes Doerfert }); 10765b0581aeSJohannes Doerfert 1077fec1f210SJohannes Doerfert // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel 1078fec1f210SJohannes Doerfert // use. 1079fec1f210SJohannes Doerfert if (!KernelPrepareUse) 10805b0581aeSJohannes Doerfert continue; 10815b0581aeSJohannes Doerfert 1082fec1f210SJohannes Doerfert { 1083fec1f210SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) { 1084fec1f210SJohannes Doerfert return OR << "Found a parallel region that is called in a target " 1085fec1f210SJohannes Doerfert "region but not part of a combined target construct nor " 1086fec1f210SJohannes Doerfert "nesed inside a target construct without intermediate " 1087fec1f210SJohannes Doerfert "code. This can lead to excessive register usage for " 1088fec1f210SJohannes Doerfert "unrelated target regions in the same translation unit " 1089fec1f210SJohannes Doerfert "due to spurious call edges assumed by ptxas."; 1090fec1f210SJohannes Doerfert }; 1091fec1f210SJohannes Doerfert emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1092fec1f210SJohannes Doerfert } 1093fec1f210SJohannes Doerfert 1094fec1f210SJohannes Doerfert // If this ever hits, we should investigate. 1095fec1f210SJohannes Doerfert // TODO: Checking the number of uses is not a necessary restriction and 1096fec1f210SJohannes Doerfert // should be lifted. 1097fec1f210SJohannes Doerfert if (UnknownUse || NumDirectCalls != 1 || 1098fec1f210SJohannes Doerfert ToBeReplacedStateMachineUses.size() != 2) { 1099fec1f210SJohannes Doerfert { 1100fec1f210SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) { 1101fec1f210SJohannes Doerfert return OR << "Parallel region is used in " 1102fec1f210SJohannes Doerfert << (UnknownUse ? "unknown" : "unexpected") 1103fec1f210SJohannes Doerfert << " ways; will not attempt to rewrite the state machine."; 1104fec1f210SJohannes Doerfert }; 1105fec1f210SJohannes Doerfert emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1106fec1f210SJohannes Doerfert } 11075b0581aeSJohannes Doerfert continue; 1108fec1f210SJohannes Doerfert } 11095b0581aeSJohannes Doerfert 11105b0581aeSJohannes Doerfert // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give 11115b0581aeSJohannes Doerfert // up if the function is not called from a unique kernel. 11125b0581aeSJohannes Doerfert Kernel K = getUniqueKernelFor(*F); 1113fec1f210SJohannes Doerfert if (!K) { 1114fec1f210SJohannes Doerfert { 1115fec1f210SJohannes Doerfert auto Remark = [&](OptimizationRemark OR) { 1116fec1f210SJohannes Doerfert return OR << "Parallel region is not known to be called from a " 1117fec1f210SJohannes Doerfert "unique single target region, maybe the surrounding " 1118fec1f210SJohannes Doerfert "function has external linkage?; will not attempt to " 1119fec1f210SJohannes Doerfert "rewrite the state machine use."; 1120fec1f210SJohannes Doerfert }; 1121fec1f210SJohannes Doerfert emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", 1122fec1f210SJohannes Doerfert Remark); 1123fec1f210SJohannes Doerfert } 11245b0581aeSJohannes Doerfert continue; 1125fec1f210SJohannes Doerfert } 11265b0581aeSJohannes Doerfert 11275b0581aeSJohannes Doerfert // We now know F is a parallel body function called only from the kernel K. 11285b0581aeSJohannes Doerfert // We also identified the state machine uses in which we replace the 11295b0581aeSJohannes Doerfert // function pointer by a new global symbol for identification purposes. This 11305b0581aeSJohannes Doerfert // ensures only direct calls to the function are left. 11315b0581aeSJohannes Doerfert 1132fec1f210SJohannes Doerfert { 1133fec1f210SJohannes Doerfert auto RemarkParalleRegion = [&](OptimizationRemark OR) { 1134fec1f210SJohannes Doerfert return OR << "Specialize parallel region that is only reached from a " 1135fec1f210SJohannes Doerfert "single target region to avoid spurious call edges and " 1136fec1f210SJohannes Doerfert "excessive register usage in other target regions. " 1137fec1f210SJohannes Doerfert "(parallel region ID: " 1138fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1139fec1f210SJohannes Doerfert << ", kernel ID: " 1140fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1141fec1f210SJohannes Doerfert }; 1142fec1f210SJohannes Doerfert emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", 1143fec1f210SJohannes Doerfert RemarkParalleRegion); 1144fec1f210SJohannes Doerfert auto RemarkKernel = [&](OptimizationRemark OR) { 1145fec1f210SJohannes Doerfert return OR << "Target region containing the parallel region that is " 1146fec1f210SJohannes Doerfert "specialized. (parallel region ID: " 1147fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1148fec1f210SJohannes Doerfert << ", kernel ID: " 1149fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1150fec1f210SJohannes Doerfert }; 1151fec1f210SJohannes Doerfert emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); 1152fec1f210SJohannes Doerfert } 1153fec1f210SJohannes Doerfert 11545b0581aeSJohannes Doerfert Module &M = *F->getParent(); 11555b0581aeSJohannes Doerfert Type *Int8Ty = Type::getInt8Ty(M.getContext()); 11565b0581aeSJohannes Doerfert 11575b0581aeSJohannes Doerfert auto *ID = new GlobalVariable( 11585b0581aeSJohannes Doerfert M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 11595b0581aeSJohannes Doerfert UndefValue::get(Int8Ty), F->getName() + ".ID"); 11605b0581aeSJohannes Doerfert 11615b0581aeSJohannes Doerfert for (Use *U : ToBeReplacedStateMachineUses) 11625b0581aeSJohannes Doerfert U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); 11635b0581aeSJohannes Doerfert 11645b0581aeSJohannes Doerfert ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 11655b0581aeSJohannes Doerfert 11665b0581aeSJohannes Doerfert Changed = true; 11675b0581aeSJohannes Doerfert } 11685b0581aeSJohannes Doerfert 11695b0581aeSJohannes Doerfert return Changed; 11705b0581aeSJohannes Doerfert } 11715b0581aeSJohannes Doerfert 1172b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values. 1173b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 1174b8235d2bSsstefan1 using Base = StateWrapper<BooleanState, AbstractAttribute>; 1175b8235d2bSsstefan1 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 1176b8235d2bSsstefan1 1177*5dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 1178*5dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 1179*5dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 1180*5dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 1181*5dfd7cc4Ssstefan1 } 1182*5dfd7cc4Ssstefan1 1183b8235d2bSsstefan1 /// Returns true if value is assumed to be tracked. 1184b8235d2bSsstefan1 bool isAssumedTracked() const { return getAssumed(); } 1185b8235d2bSsstefan1 1186b8235d2bSsstefan1 /// Returns true if value is known to be tracked. 1187b8235d2bSsstefan1 bool isKnownTracked() const { return getAssumed(); } 1188b8235d2bSsstefan1 1189b8235d2bSsstefan1 /// Create an abstract attribute biew for the position \p IRP. 1190b8235d2bSsstefan1 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 1191b8235d2bSsstefan1 1192b8235d2bSsstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 1193*5dfd7cc4Ssstefan1 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, 1194*5dfd7cc4Ssstefan1 const Instruction *I, 1195*5dfd7cc4Ssstefan1 Attributor &A) const { 1196*5dfd7cc4Ssstefan1 return None; 1197*5dfd7cc4Ssstefan1 } 1198*5dfd7cc4Ssstefan1 1199*5dfd7cc4Ssstefan1 /// Return an assumed unique ICV value if a single candidate is found. If 1200*5dfd7cc4Ssstefan1 /// there cannot be one, return a nullptr. If it is not clear yet, return the 1201*5dfd7cc4Ssstefan1 /// Optional::NoneType. 1202*5dfd7cc4Ssstefan1 virtual Optional<Value *> 1203*5dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const = 0; 1204*5dfd7cc4Ssstefan1 1205*5dfd7cc4Ssstefan1 // Currently only nthreads is being tracked. 1206*5dfd7cc4Ssstefan1 // this array will only grow with time. 1207*5dfd7cc4Ssstefan1 InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 1208b8235d2bSsstefan1 1209b8235d2bSsstefan1 /// See AbstractAttribute::getName() 1210b8235d2bSsstefan1 const std::string getName() const override { return "AAICVTracker"; } 1211b8235d2bSsstefan1 1212233af895SLuofan Chen /// See AbstractAttribute::getIdAddr() 1213233af895SLuofan Chen const char *getIdAddr() const override { return &ID; } 1214233af895SLuofan Chen 1215233af895SLuofan Chen /// This function should return true if the type of the \p AA is AAICVTracker 1216233af895SLuofan Chen static bool classof(const AbstractAttribute *AA) { 1217233af895SLuofan Chen return (AA->getIdAddr() == &ID); 1218233af895SLuofan Chen } 1219233af895SLuofan Chen 1220b8235d2bSsstefan1 static const char ID; 1221b8235d2bSsstefan1 }; 1222b8235d2bSsstefan1 1223b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker { 1224b8235d2bSsstefan1 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 1225b8235d2bSsstefan1 : AAICVTracker(IRP, A) {} 1226b8235d2bSsstefan1 1227b8235d2bSsstefan1 // FIXME: come up with better string. 1228*5dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerFunction"; } 1229b8235d2bSsstefan1 1230b8235d2bSsstefan1 // FIXME: come up with some stats. 1231b8235d2bSsstefan1 void trackStatistics() const override {} 1232b8235d2bSsstefan1 1233*5dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 1234b8235d2bSsstefan1 ChangeStatus manifest(Attributor &A) override { 1235*5dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1236b8235d2bSsstefan1 } 1237b8235d2bSsstefan1 1238b8235d2bSsstefan1 // Map of ICV to their values at specific program point. 1239*5dfd7cc4Ssstefan1 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 1240b8235d2bSsstefan1 InternalControlVar::ICV___last> 1241*5dfd7cc4Ssstefan1 ICVReplacementValuesMap; 1242b8235d2bSsstefan1 1243b8235d2bSsstefan1 ChangeStatus updateImpl(Attributor &A) override { 1244b8235d2bSsstefan1 ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 1245b8235d2bSsstefan1 1246b8235d2bSsstefan1 Function *F = getAnchorScope(); 1247b8235d2bSsstefan1 1248b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1249b8235d2bSsstefan1 1250b8235d2bSsstefan1 for (InternalControlVar ICV : TrackableICVs) { 1251b8235d2bSsstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1252b8235d2bSsstefan1 1253*5dfd7cc4Ssstefan1 auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1254b8235d2bSsstefan1 auto TrackValues = [&](Use &U, Function &) { 1255b8235d2bSsstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 1256b8235d2bSsstefan1 if (!CI) 1257b8235d2bSsstefan1 return false; 1258b8235d2bSsstefan1 1259b8235d2bSsstefan1 // FIXME: handle setters with more that 1 arguments. 1260b8235d2bSsstefan1 /// Track new value. 1261*5dfd7cc4Ssstefan1 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 1262b8235d2bSsstefan1 HasChanged = ChangeStatus::CHANGED; 1263b8235d2bSsstefan1 1264b8235d2bSsstefan1 return false; 1265b8235d2bSsstefan1 }; 1266b8235d2bSsstefan1 1267*5dfd7cc4Ssstefan1 auto CallCheck = [&](Instruction &I) { 1268*5dfd7cc4Ssstefan1 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); 1269*5dfd7cc4Ssstefan1 if (ReplVal.hasValue() && 1270*5dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 1271*5dfd7cc4Ssstefan1 HasChanged = ChangeStatus::CHANGED; 1272*5dfd7cc4Ssstefan1 1273*5dfd7cc4Ssstefan1 return true; 1274*5dfd7cc4Ssstefan1 }; 1275*5dfd7cc4Ssstefan1 1276*5dfd7cc4Ssstefan1 // Track all changes of an ICV. 1277b8235d2bSsstefan1 SetterRFI.foreachUse(TrackValues, F); 1278*5dfd7cc4Ssstefan1 1279*5dfd7cc4Ssstefan1 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 1280*5dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true); 1281*5dfd7cc4Ssstefan1 1282*5dfd7cc4Ssstefan1 /// TODO: Figure out a way to avoid adding entry in 1283*5dfd7cc4Ssstefan1 /// ICVReplacementValuesMap 1284*5dfd7cc4Ssstefan1 Instruction *Entry = &F->getEntryBlock().front(); 1285*5dfd7cc4Ssstefan1 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 1286*5dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(Entry, nullptr)); 1287b8235d2bSsstefan1 } 1288b8235d2bSsstefan1 1289b8235d2bSsstefan1 return HasChanged; 1290b8235d2bSsstefan1 } 1291b8235d2bSsstefan1 1292*5dfd7cc4Ssstefan1 /// Hepler to check if \p I is a call and get the value for it if it is 1293*5dfd7cc4Ssstefan1 /// unique. 1294*5dfd7cc4Ssstefan1 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, 1295*5dfd7cc4Ssstefan1 InternalControlVar &ICV) const { 1296b8235d2bSsstefan1 1297*5dfd7cc4Ssstefan1 const auto *CB = dyn_cast<CallBase>(I); 1298*5dfd7cc4Ssstefan1 if (!CB) 1299*5dfd7cc4Ssstefan1 return None; 1300*5dfd7cc4Ssstefan1 1301b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1302b8235d2bSsstefan1 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 1303*5dfd7cc4Ssstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1304*5dfd7cc4Ssstefan1 Function *CalledFunction = CB->getCalledFunction(); 1305b8235d2bSsstefan1 1306*5dfd7cc4Ssstefan1 if (CalledFunction == GetterRFI.Declaration) 1307*5dfd7cc4Ssstefan1 return None; 1308*5dfd7cc4Ssstefan1 if (CalledFunction == SetterRFI.Declaration) { 1309*5dfd7cc4Ssstefan1 if (ICVReplacementValuesMap[ICV].count(I)) 1310*5dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV].lookup(I); 1311*5dfd7cc4Ssstefan1 1312*5dfd7cc4Ssstefan1 return nullptr; 1313*5dfd7cc4Ssstefan1 } 1314*5dfd7cc4Ssstefan1 1315*5dfd7cc4Ssstefan1 // Since we don't know, assume it changes the ICV. 1316*5dfd7cc4Ssstefan1 if (CalledFunction->isDeclaration()) 1317*5dfd7cc4Ssstefan1 return nullptr; 1318*5dfd7cc4Ssstefan1 1319*5dfd7cc4Ssstefan1 const auto &ICVTrackingAA = 1320*5dfd7cc4Ssstefan1 A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB)); 1321*5dfd7cc4Ssstefan1 1322*5dfd7cc4Ssstefan1 if (ICVTrackingAA.isAssumedTracked()) 1323*5dfd7cc4Ssstefan1 return ICVTrackingAA.getUniqueReplacementValue(ICV); 1324*5dfd7cc4Ssstefan1 1325*5dfd7cc4Ssstefan1 // If we don't know, assume it changes. 1326*5dfd7cc4Ssstefan1 return nullptr; 1327*5dfd7cc4Ssstefan1 } 1328*5dfd7cc4Ssstefan1 1329*5dfd7cc4Ssstefan1 // We don't check unique value for a function, so return None. 1330*5dfd7cc4Ssstefan1 Optional<Value *> 1331*5dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 1332*5dfd7cc4Ssstefan1 return None; 1333*5dfd7cc4Ssstefan1 } 1334*5dfd7cc4Ssstefan1 1335*5dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 1336*5dfd7cc4Ssstefan1 Optional<Value *> getReplacementValue(InternalControlVar ICV, 1337*5dfd7cc4Ssstefan1 const Instruction *I, 1338*5dfd7cc4Ssstefan1 Attributor &A) const override { 1339*5dfd7cc4Ssstefan1 const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1340*5dfd7cc4Ssstefan1 if (ValuesMap.count(I)) 1341*5dfd7cc4Ssstefan1 return ValuesMap.lookup(I); 1342*5dfd7cc4Ssstefan1 1343*5dfd7cc4Ssstefan1 SmallVector<const Instruction *, 16> Worklist; 1344*5dfd7cc4Ssstefan1 SmallPtrSet<const Instruction *, 16> Visited; 1345*5dfd7cc4Ssstefan1 Worklist.push_back(I); 1346*5dfd7cc4Ssstefan1 1347*5dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 1348*5dfd7cc4Ssstefan1 1349*5dfd7cc4Ssstefan1 while (!Worklist.empty()) { 1350*5dfd7cc4Ssstefan1 const Instruction *CurrInst = Worklist.pop_back_val(); 1351*5dfd7cc4Ssstefan1 if (!Visited.insert(CurrInst).second) 1352b8235d2bSsstefan1 continue; 1353b8235d2bSsstefan1 1354*5dfd7cc4Ssstefan1 const BasicBlock *CurrBB = CurrInst->getParent(); 1355*5dfd7cc4Ssstefan1 1356*5dfd7cc4Ssstefan1 // Go up and look for all potential setters/calls that might change the 1357*5dfd7cc4Ssstefan1 // ICV. 1358*5dfd7cc4Ssstefan1 while ((CurrInst = CurrInst->getPrevNode())) { 1359*5dfd7cc4Ssstefan1 if (ValuesMap.count(CurrInst)) { 1360*5dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 1361*5dfd7cc4Ssstefan1 // Unknown value, track new. 1362*5dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 1363*5dfd7cc4Ssstefan1 ReplVal = NewReplVal; 1364*5dfd7cc4Ssstefan1 break; 1365*5dfd7cc4Ssstefan1 } 1366*5dfd7cc4Ssstefan1 1367*5dfd7cc4Ssstefan1 // If we found a new value, we can't know the icv value anymore. 1368*5dfd7cc4Ssstefan1 if (NewReplVal.hasValue()) 1369*5dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 1370b8235d2bSsstefan1 return nullptr; 1371b8235d2bSsstefan1 1372*5dfd7cc4Ssstefan1 break; 1373b8235d2bSsstefan1 } 1374b8235d2bSsstefan1 1375*5dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); 1376*5dfd7cc4Ssstefan1 if (!NewReplVal.hasValue()) 1377*5dfd7cc4Ssstefan1 continue; 1378*5dfd7cc4Ssstefan1 1379*5dfd7cc4Ssstefan1 // Unknown value, track new. 1380*5dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 1381*5dfd7cc4Ssstefan1 ReplVal = NewReplVal; 1382*5dfd7cc4Ssstefan1 break; 1383b8235d2bSsstefan1 } 1384b8235d2bSsstefan1 1385*5dfd7cc4Ssstefan1 // if (NewReplVal.hasValue()) 1386*5dfd7cc4Ssstefan1 // We found a new value, we can't know the icv value anymore. 1387*5dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 1388b8235d2bSsstefan1 return nullptr; 1389b8235d2bSsstefan1 } 1390*5dfd7cc4Ssstefan1 1391*5dfd7cc4Ssstefan1 // If we are in the same BB and we have a value, we are done. 1392*5dfd7cc4Ssstefan1 if (CurrBB == I->getParent() && ReplVal.hasValue()) 1393*5dfd7cc4Ssstefan1 return ReplVal; 1394*5dfd7cc4Ssstefan1 1395*5dfd7cc4Ssstefan1 // Go through all predecessors and add terminators for analysis. 1396*5dfd7cc4Ssstefan1 for (const BasicBlock *Pred : predecessors(CurrBB)) 1397*5dfd7cc4Ssstefan1 if (const Instruction *Terminator = Pred->getTerminator()) 1398*5dfd7cc4Ssstefan1 Worklist.push_back(Terminator); 1399*5dfd7cc4Ssstefan1 } 1400*5dfd7cc4Ssstefan1 1401*5dfd7cc4Ssstefan1 return ReplVal; 1402*5dfd7cc4Ssstefan1 } 1403*5dfd7cc4Ssstefan1 }; 1404*5dfd7cc4Ssstefan1 1405*5dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker { 1406*5dfd7cc4Ssstefan1 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 1407*5dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 1408*5dfd7cc4Ssstefan1 1409*5dfd7cc4Ssstefan1 // FIXME: come up with better string. 1410*5dfd7cc4Ssstefan1 const std::string getAsStr() const override { 1411*5dfd7cc4Ssstefan1 return "ICVTrackerFunctionReturned"; 1412*5dfd7cc4Ssstefan1 } 1413*5dfd7cc4Ssstefan1 1414*5dfd7cc4Ssstefan1 // FIXME: come up with some stats. 1415*5dfd7cc4Ssstefan1 void trackStatistics() const override {} 1416*5dfd7cc4Ssstefan1 1417*5dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 1418*5dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 1419*5dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1420*5dfd7cc4Ssstefan1 } 1421*5dfd7cc4Ssstefan1 1422*5dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 1423*5dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 1424*5dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 1425*5dfd7cc4Ssstefan1 ICVReplacementValuesMap; 1426*5dfd7cc4Ssstefan1 1427*5dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 1428*5dfd7cc4Ssstefan1 Optional<Value *> 1429*5dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 1430*5dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 1431*5dfd7cc4Ssstefan1 } 1432*5dfd7cc4Ssstefan1 1433*5dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 1434*5dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 1435*5dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1436*5dfd7cc4Ssstefan1 *this, IRPosition::function(*getAnchorScope())); 1437*5dfd7cc4Ssstefan1 1438*5dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 1439*5dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 1440*5dfd7cc4Ssstefan1 1441*5dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 1442*5dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 1443*5dfd7cc4Ssstefan1 Optional<Value *> UniqueICVValue; 1444*5dfd7cc4Ssstefan1 1445*5dfd7cc4Ssstefan1 auto CheckReturnInst = [&](Instruction &I) { 1446*5dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 1447*5dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(ICV, &I, A); 1448*5dfd7cc4Ssstefan1 1449*5dfd7cc4Ssstefan1 // If we found a second ICV value there is no unique returned value. 1450*5dfd7cc4Ssstefan1 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) 1451*5dfd7cc4Ssstefan1 return false; 1452*5dfd7cc4Ssstefan1 1453*5dfd7cc4Ssstefan1 UniqueICVValue = NewReplVal; 1454*5dfd7cc4Ssstefan1 1455*5dfd7cc4Ssstefan1 return true; 1456*5dfd7cc4Ssstefan1 }; 1457*5dfd7cc4Ssstefan1 1458*5dfd7cc4Ssstefan1 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 1459*5dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true)) 1460*5dfd7cc4Ssstefan1 UniqueICVValue = nullptr; 1461*5dfd7cc4Ssstefan1 1462*5dfd7cc4Ssstefan1 if (UniqueICVValue == ReplVal) 1463*5dfd7cc4Ssstefan1 continue; 1464*5dfd7cc4Ssstefan1 1465*5dfd7cc4Ssstefan1 ReplVal = UniqueICVValue; 1466*5dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 1467*5dfd7cc4Ssstefan1 } 1468*5dfd7cc4Ssstefan1 1469*5dfd7cc4Ssstefan1 return Changed; 1470*5dfd7cc4Ssstefan1 } 1471*5dfd7cc4Ssstefan1 }; 1472*5dfd7cc4Ssstefan1 1473*5dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker { 1474*5dfd7cc4Ssstefan1 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 1475*5dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 1476*5dfd7cc4Ssstefan1 1477*5dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 1478*5dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 1479*5dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 1480*5dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 1481*5dfd7cc4Ssstefan1 1482*5dfd7cc4Ssstefan1 // We only initialize this AA for getters, so we need to know which ICV it 1483*5dfd7cc4Ssstefan1 // gets. 1484*5dfd7cc4Ssstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1485*5dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 1486*5dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 1487*5dfd7cc4Ssstefan1 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 1488*5dfd7cc4Ssstefan1 if (Getter.Declaration == getAssociatedFunction()) { 1489*5dfd7cc4Ssstefan1 AssociatedICV = ICVInfo.Kind; 1490*5dfd7cc4Ssstefan1 return; 1491*5dfd7cc4Ssstefan1 } 1492*5dfd7cc4Ssstefan1 } 1493*5dfd7cc4Ssstefan1 1494*5dfd7cc4Ssstefan1 /// Unknown ICV. 1495*5dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 1496*5dfd7cc4Ssstefan1 } 1497*5dfd7cc4Ssstefan1 1498*5dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 1499*5dfd7cc4Ssstefan1 if (!ReplVal.hasValue() || !ReplVal.getValue()) 1500*5dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1501*5dfd7cc4Ssstefan1 1502*5dfd7cc4Ssstefan1 A.changeValueAfterManifest(*getCtxI(), **ReplVal); 1503*5dfd7cc4Ssstefan1 A.deleteAfterManifest(*getCtxI()); 1504*5dfd7cc4Ssstefan1 1505*5dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 1506*5dfd7cc4Ssstefan1 } 1507*5dfd7cc4Ssstefan1 1508*5dfd7cc4Ssstefan1 // FIXME: come up with better string. 1509*5dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerCallSite"; } 1510*5dfd7cc4Ssstefan1 1511*5dfd7cc4Ssstefan1 // FIXME: come up with some stats. 1512*5dfd7cc4Ssstefan1 void trackStatistics() const override {} 1513*5dfd7cc4Ssstefan1 1514*5dfd7cc4Ssstefan1 InternalControlVar AssociatedICV; 1515*5dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 1516*5dfd7cc4Ssstefan1 1517*5dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 1518*5dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1519*5dfd7cc4Ssstefan1 *this, IRPosition::function(*getAnchorScope())); 1520*5dfd7cc4Ssstefan1 1521*5dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 1522*5dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 1523*5dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 1524*5dfd7cc4Ssstefan1 1525*5dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 1526*5dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); 1527*5dfd7cc4Ssstefan1 1528*5dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 1529*5dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1530*5dfd7cc4Ssstefan1 1531*5dfd7cc4Ssstefan1 ReplVal = NewReplVal; 1532*5dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 1533*5dfd7cc4Ssstefan1 } 1534*5dfd7cc4Ssstefan1 1535*5dfd7cc4Ssstefan1 // Return the value with which associated value can be replaced for specific 1536*5dfd7cc4Ssstefan1 // \p ICV. 1537*5dfd7cc4Ssstefan1 Optional<Value *> 1538*5dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 1539*5dfd7cc4Ssstefan1 return ReplVal; 1540*5dfd7cc4Ssstefan1 } 1541*5dfd7cc4Ssstefan1 }; 1542*5dfd7cc4Ssstefan1 1543*5dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker { 1544*5dfd7cc4Ssstefan1 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 1545*5dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 1546*5dfd7cc4Ssstefan1 1547*5dfd7cc4Ssstefan1 // FIXME: come up with better string. 1548*5dfd7cc4Ssstefan1 const std::string getAsStr() const override { 1549*5dfd7cc4Ssstefan1 return "ICVTrackerCallSiteReturned"; 1550*5dfd7cc4Ssstefan1 } 1551*5dfd7cc4Ssstefan1 1552*5dfd7cc4Ssstefan1 // FIXME: come up with some stats. 1553*5dfd7cc4Ssstefan1 void trackStatistics() const override {} 1554*5dfd7cc4Ssstefan1 1555*5dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 1556*5dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 1557*5dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1558*5dfd7cc4Ssstefan1 } 1559*5dfd7cc4Ssstefan1 1560*5dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 1561*5dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 1562*5dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 1563*5dfd7cc4Ssstefan1 ICVReplacementValuesMap; 1564*5dfd7cc4Ssstefan1 1565*5dfd7cc4Ssstefan1 /// Return the value with which associated value can be replaced for specific 1566*5dfd7cc4Ssstefan1 /// \p ICV. 1567*5dfd7cc4Ssstefan1 Optional<Value *> 1568*5dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 1569*5dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 1570*5dfd7cc4Ssstefan1 } 1571*5dfd7cc4Ssstefan1 1572*5dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 1573*5dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 1574*5dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1575*5dfd7cc4Ssstefan1 *this, IRPosition::returned(*getAssociatedFunction())); 1576*5dfd7cc4Ssstefan1 1577*5dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 1578*5dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 1579*5dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 1580*5dfd7cc4Ssstefan1 1581*5dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 1582*5dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 1583*5dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 1584*5dfd7cc4Ssstefan1 ICVTrackingAA.getUniqueReplacementValue(ICV); 1585*5dfd7cc4Ssstefan1 1586*5dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 1587*5dfd7cc4Ssstefan1 continue; 1588*5dfd7cc4Ssstefan1 1589*5dfd7cc4Ssstefan1 ReplVal = NewReplVal; 1590*5dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 1591*5dfd7cc4Ssstefan1 } 1592*5dfd7cc4Ssstefan1 return Changed; 1593*5dfd7cc4Ssstefan1 } 15949548b74aSJohannes Doerfert }; 15959548b74aSJohannes Doerfert } // namespace 15969548b74aSJohannes Doerfert 1597b8235d2bSsstefan1 const char AAICVTracker::ID = 0; 1598b8235d2bSsstefan1 1599b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 1600b8235d2bSsstefan1 Attributor &A) { 1601b8235d2bSsstefan1 AAICVTracker *AA = nullptr; 1602b8235d2bSsstefan1 switch (IRP.getPositionKind()) { 1603b8235d2bSsstefan1 case IRPosition::IRP_INVALID: 1604b8235d2bSsstefan1 case IRPosition::IRP_FLOAT: 1605b8235d2bSsstefan1 case IRPosition::IRP_ARGUMENT: 1606b8235d2bSsstefan1 case IRPosition::IRP_CALL_SITE_ARGUMENT: 16071de70a72SJohannes Doerfert llvm_unreachable("ICVTracker can only be created for function position!"); 1608*5dfd7cc4Ssstefan1 case IRPosition::IRP_RETURNED: 1609*5dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 1610*5dfd7cc4Ssstefan1 break; 1611*5dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE_RETURNED: 1612*5dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 1613*5dfd7cc4Ssstefan1 break; 1614*5dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE: 1615*5dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 1616*5dfd7cc4Ssstefan1 break; 1617b8235d2bSsstefan1 case IRPosition::IRP_FUNCTION: 1618b8235d2bSsstefan1 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 1619b8235d2bSsstefan1 break; 1620b8235d2bSsstefan1 } 1621b8235d2bSsstefan1 1622b8235d2bSsstefan1 return *AA; 1623b8235d2bSsstefan1 } 1624b8235d2bSsstefan1 16259548b74aSJohannes Doerfert PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 16269548b74aSJohannes Doerfert CGSCCAnalysisManager &AM, 16279548b74aSJohannes Doerfert LazyCallGraph &CG, CGSCCUpdateResult &UR) { 16289548b74aSJohannes Doerfert if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 16299548b74aSJohannes Doerfert return PreservedAnalyses::all(); 16309548b74aSJohannes Doerfert 16319548b74aSJohannes Doerfert if (DisableOpenMPOptimizations) 16329548b74aSJohannes Doerfert return PreservedAnalyses::all(); 16339548b74aSJohannes Doerfert 1634ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 1635351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 1636351d234dSRoman Lebedev bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 1637351d234dSRoman Lebedev for (LazyCallGraph::Node &N : C) { 1638351d234dSRoman Lebedev Function *Fn = &N.getFunction(); 1639351d234dSRoman Lebedev SCC.push_back(Fn); 16409548b74aSJohannes Doerfert 1641351d234dSRoman Lebedev // Do we already know that the SCC contains kernels, 1642351d234dSRoman Lebedev // or that OpenMP functions are called from this SCC? 1643351d234dSRoman Lebedev if (SCCIsInteresting) 1644351d234dSRoman Lebedev continue; 1645351d234dSRoman Lebedev // If not, let's check that. 1646351d234dSRoman Lebedev SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 1647351d234dSRoman Lebedev } 1648351d234dSRoman Lebedev 1649351d234dSRoman Lebedev if (!SCCIsInteresting || SCC.empty()) 16509548b74aSJohannes Doerfert return PreservedAnalyses::all(); 16519548b74aSJohannes Doerfert 16524d4ea9acSHuber, Joseph FunctionAnalysisManager &FAM = 16534d4ea9acSHuber, Joseph AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 16547cfd267cSsstefan1 16557cfd267cSsstefan1 AnalysisGetter AG(FAM); 16567cfd267cSsstefan1 16577cfd267cSsstefan1 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 16584d4ea9acSHuber, Joseph return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 16594d4ea9acSHuber, Joseph }; 16604d4ea9acSHuber, Joseph 16619548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 16629548b74aSJohannes Doerfert CGUpdater.initialize(CG, C, AM, UR); 16637cfd267cSsstefan1 16647cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 16657cfd267cSsstefan1 BumpPtrAllocator Allocator; 16667cfd267cSsstefan1 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 1667624d34afSJohannes Doerfert /*CGSCC*/ Functions, OMPInModule.getKernels()); 16687cfd267cSsstefan1 16691de70a72SJohannes Doerfert Attributor A(Functions, InfoCache, CGUpdater); 1670b8235d2bSsstefan1 1671b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 16729548b74aSJohannes Doerfert bool Changed = OMPOpt.run(); 1673694ded37SGiorgis Georgakoudis if (Changed) 1674694ded37SGiorgis Georgakoudis return PreservedAnalyses::none(); 1675694ded37SGiorgis Georgakoudis 16769548b74aSJohannes Doerfert return PreservedAnalyses::all(); 16779548b74aSJohannes Doerfert } 16789548b74aSJohannes Doerfert 16799548b74aSJohannes Doerfert namespace { 16809548b74aSJohannes Doerfert 16819548b74aSJohannes Doerfert struct OpenMPOptLegacyPass : public CallGraphSCCPass { 16829548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 16839548b74aSJohannes Doerfert OpenMPInModule OMPInModule; 16849548b74aSJohannes Doerfert static char ID; 16859548b74aSJohannes Doerfert 16869548b74aSJohannes Doerfert OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 16879548b74aSJohannes Doerfert initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 16889548b74aSJohannes Doerfert } 16899548b74aSJohannes Doerfert 16909548b74aSJohannes Doerfert void getAnalysisUsage(AnalysisUsage &AU) const override { 16919548b74aSJohannes Doerfert CallGraphSCCPass::getAnalysisUsage(AU); 16929548b74aSJohannes Doerfert } 16939548b74aSJohannes Doerfert 16949548b74aSJohannes Doerfert bool doInitialization(CallGraph &CG) override { 16959548b74aSJohannes Doerfert // Disable the pass if there is no OpenMP (runtime call) in the module. 16969548b74aSJohannes Doerfert containsOpenMP(CG.getModule(), OMPInModule); 16979548b74aSJohannes Doerfert return false; 16989548b74aSJohannes Doerfert } 16999548b74aSJohannes Doerfert 17009548b74aSJohannes Doerfert bool runOnSCC(CallGraphSCC &CGSCC) override { 17019548b74aSJohannes Doerfert if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 17029548b74aSJohannes Doerfert return false; 17039548b74aSJohannes Doerfert if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 17049548b74aSJohannes Doerfert return false; 17059548b74aSJohannes Doerfert 1706ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 1707351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 1708351d234dSRoman Lebedev bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 1709351d234dSRoman Lebedev for (CallGraphNode *CGN : CGSCC) { 1710351d234dSRoman Lebedev Function *Fn = CGN->getFunction(); 1711351d234dSRoman Lebedev if (!Fn || Fn->isDeclaration()) 1712351d234dSRoman Lebedev continue; 1713ee17263aSJohannes Doerfert SCC.push_back(Fn); 17149548b74aSJohannes Doerfert 1715351d234dSRoman Lebedev // Do we already know that the SCC contains kernels, 1716351d234dSRoman Lebedev // or that OpenMP functions are called from this SCC? 1717351d234dSRoman Lebedev if (SCCIsInteresting) 1718351d234dSRoman Lebedev continue; 1719351d234dSRoman Lebedev // If not, let's check that. 1720351d234dSRoman Lebedev SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 1721351d234dSRoman Lebedev } 1722351d234dSRoman Lebedev 1723351d234dSRoman Lebedev if (!SCCIsInteresting || SCC.empty()) 17249548b74aSJohannes Doerfert return false; 17259548b74aSJohannes Doerfert 17269548b74aSJohannes Doerfert CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 17279548b74aSJohannes Doerfert CGUpdater.initialize(CG, CGSCC); 17289548b74aSJohannes Doerfert 17294d4ea9acSHuber, Joseph // Maintain a map of functions to avoid rebuilding the ORE 17304d4ea9acSHuber, Joseph DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 17314d4ea9acSHuber, Joseph auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 17324d4ea9acSHuber, Joseph std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 17334d4ea9acSHuber, Joseph if (!ORE) 17344d4ea9acSHuber, Joseph ORE = std::make_unique<OptimizationRemarkEmitter>(F); 17354d4ea9acSHuber, Joseph return *ORE; 17364d4ea9acSHuber, Joseph }; 17374d4ea9acSHuber, Joseph 17387cfd267cSsstefan1 AnalysisGetter AG; 17397cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 17407cfd267cSsstefan1 BumpPtrAllocator Allocator; 1741e8039ad4SJohannes Doerfert OMPInformationCache InfoCache( 1742e8039ad4SJohannes Doerfert *(Functions.back()->getParent()), AG, Allocator, 1743624d34afSJohannes Doerfert /*CGSCC*/ Functions, OMPInModule.getKernels()); 17447cfd267cSsstefan1 17451de70a72SJohannes Doerfert Attributor A(Functions, InfoCache, CGUpdater); 1746b8235d2bSsstefan1 1747b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 17489548b74aSJohannes Doerfert return OMPOpt.run(); 17499548b74aSJohannes Doerfert } 17509548b74aSJohannes Doerfert 17519548b74aSJohannes Doerfert bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 17529548b74aSJohannes Doerfert }; 17539548b74aSJohannes Doerfert 17549548b74aSJohannes Doerfert } // end anonymous namespace 17559548b74aSJohannes Doerfert 1756e8039ad4SJohannes Doerfert void OpenMPInModule::identifyKernels(Module &M) { 1757e8039ad4SJohannes Doerfert 1758e8039ad4SJohannes Doerfert NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); 1759e8039ad4SJohannes Doerfert if (!MD) 1760e8039ad4SJohannes Doerfert return; 1761e8039ad4SJohannes Doerfert 1762e8039ad4SJohannes Doerfert for (auto *Op : MD->operands()) { 1763e8039ad4SJohannes Doerfert if (Op->getNumOperands() < 2) 1764e8039ad4SJohannes Doerfert continue; 1765e8039ad4SJohannes Doerfert MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 1766e8039ad4SJohannes Doerfert if (!KindID || KindID->getString() != "kernel") 1767e8039ad4SJohannes Doerfert continue; 1768e8039ad4SJohannes Doerfert 1769e8039ad4SJohannes Doerfert Function *KernelFn = 1770e8039ad4SJohannes Doerfert mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 1771e8039ad4SJohannes Doerfert if (!KernelFn) 1772e8039ad4SJohannes Doerfert continue; 1773e8039ad4SJohannes Doerfert 1774e8039ad4SJohannes Doerfert ++NumOpenMPTargetRegionKernels; 1775e8039ad4SJohannes Doerfert 1776e8039ad4SJohannes Doerfert Kernels.insert(KernelFn); 1777e8039ad4SJohannes Doerfert } 1778e8039ad4SJohannes Doerfert } 1779e8039ad4SJohannes Doerfert 17809548b74aSJohannes Doerfert bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 17819548b74aSJohannes Doerfert if (OMPInModule.isKnown()) 17829548b74aSJohannes Doerfert return OMPInModule; 1783dce6bc18SJohannes Doerfert 1784351d234dSRoman Lebedev auto RecordFunctionsContainingUsesOf = [&](Function *F) { 1785351d234dSRoman Lebedev for (User *U : F->users()) 1786351d234dSRoman Lebedev if (auto *I = dyn_cast<Instruction>(U)) 1787351d234dSRoman Lebedev OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction()); 1788351d234dSRoman Lebedev }; 1789351d234dSRoman Lebedev 1790dce6bc18SJohannes Doerfert // MSVC doesn't like long if-else chains for some reason and instead just 1791dce6bc18SJohannes Doerfert // issues an error. Work around it.. 1792dce6bc18SJohannes Doerfert do { 17939548b74aSJohannes Doerfert #define OMP_RTL(_Enum, _Name, ...) \ 1794351d234dSRoman Lebedev if (Function *F = M.getFunction(_Name)) { \ 1795351d234dSRoman Lebedev RecordFunctionsContainingUsesOf(F); \ 1796dce6bc18SJohannes Doerfert OMPInModule = true; \ 1797dce6bc18SJohannes Doerfert } 17989548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPKinds.def" 1799dce6bc18SJohannes Doerfert } while (false); 1800e8039ad4SJohannes Doerfert 1801e8039ad4SJohannes Doerfert // Identify kernels once. TODO: We should split the OMPInformationCache into a 1802e8039ad4SJohannes Doerfert // module and an SCC part. The kernel information, among other things, could 1803e8039ad4SJohannes Doerfert // go into the module part. 1804e8039ad4SJohannes Doerfert if (OMPInModule.isKnown() && OMPInModule) { 1805e8039ad4SJohannes Doerfert OMPInModule.identifyKernels(M); 1806e8039ad4SJohannes Doerfert return true; 1807e8039ad4SJohannes Doerfert } 1808e8039ad4SJohannes Doerfert 18099548b74aSJohannes Doerfert return OMPInModule = false; 18109548b74aSJohannes Doerfert } 18119548b74aSJohannes Doerfert 18129548b74aSJohannes Doerfert char OpenMPOptLegacyPass::ID = 0; 18139548b74aSJohannes Doerfert 18149548b74aSJohannes Doerfert INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 18159548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 18169548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 18179548b74aSJohannes Doerfert INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 18189548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 18199548b74aSJohannes Doerfert 18209548b74aSJohannes Doerfert Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 1821