19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 29548b74aSJohannes Doerfert // 39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69548b74aSJohannes Doerfert // 79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 89548b74aSJohannes Doerfert // 99548b74aSJohannes Doerfert // OpenMP specific optimizations: 109548b74aSJohannes Doerfert // 119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num. 129548b74aSJohannes Doerfert // 139548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 149548b74aSJohannes Doerfert 159548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h" 169548b74aSJohannes Doerfert 179548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h" 1818283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h" 199548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h" 209548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h" 219548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h" 224d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h" 233a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h" 249548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h" 25e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 2668abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h" 2768abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsAMDGPU.h" 2868abc3d2SJoseph Huber #include "llvm/IR/IntrinsicsNVPTX.h" 296fc51c9fSJoseph Huber #include "llvm/IR/PatternMatch.h" 309548b74aSJohannes Doerfert #include "llvm/InitializePasses.h" 319548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h" 329548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h" 337cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h" 343a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h" 359548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h" 3697517055SGiorgis Georgakoudis #include "llvm/Transforms/Utils/CodeExtractor.h" 379548b74aSJohannes Doerfert 386fc51c9fSJoseph Huber using namespace llvm::PatternMatch; 399548b74aSJohannes Doerfert using namespace llvm; 409548b74aSJohannes Doerfert using namespace omp; 419548b74aSJohannes Doerfert 429548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt" 439548b74aSJohannes Doerfert 449548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations( 459548b74aSJohannes Doerfert "openmp-opt-disable", cl::ZeroOrMore, 469548b74aSJohannes Doerfert cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 479548b74aSJohannes Doerfert cl::init(false)); 489548b74aSJohannes Doerfert 493a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging( 503a6bfcf2SGiorgis Georgakoudis "openmp-opt-enable-merging", cl::ZeroOrMore, 513a6bfcf2SGiorgis Georgakoudis cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, 523a6bfcf2SGiorgis Georgakoudis cl::init(false)); 533a6bfcf2SGiorgis Georgakoudis 540f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 550f426935Ssstefan1 cl::Hidden); 56e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 57e8039ad4SJohannes Doerfert cl::init(false), cl::Hidden); 580f426935Ssstefan1 59496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency( 60496f8e5bSHamilton Tobon Mosquera "openmp-hide-memory-transfer-latency", 61496f8e5bSHamilton Tobon Mosquera cl::desc("[WIP] Tries to hide the latency of host to device memory" 62496f8e5bSHamilton Tobon Mosquera " transfers"), 63496f8e5bSHamilton Tobon Mosquera cl::Hidden, cl::init(false)); 64496f8e5bSHamilton Tobon Mosquera 659548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 669548b74aSJohannes Doerfert "Number of OpenMP runtime calls deduplicated"); 6755eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted, 6855eb714aSRoman Lebedev "Number of OpenMP parallel regions deleted"); 699548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 709548b74aSJohannes Doerfert "Number of OpenMP runtime functions identified"); 719548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 729548b74aSJohannes Doerfert "Number of OpenMP runtime function uses identified"); 73e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels, 74e8039ad4SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) identified"); 755b0581aeSJohannes Doerfert STATISTIC( 765b0581aeSJohannes Doerfert NumOpenMPParallelRegionsReplacedInGPUStateMachine, 775b0581aeSJohannes Doerfert "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 783a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged, 793a6bfcf2SGiorgis Georgakoudis "Number of OpenMP parallel regions merged"); 806fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory, 816fc51c9fSJoseph Huber "Amount of memory pushed to shared memory"); 829548b74aSJohannes Doerfert 83263c4a3cSrathod-sahaab #if !defined(NDEBUG) 849548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]"; 85a50c0b0dSMikael Holmen #endif 869548b74aSJohannes Doerfert 879548b74aSJohannes Doerfert namespace { 889548b74aSJohannes Doerfert 896fc51c9fSJoseph Huber enum class AddressSpace : unsigned { 906fc51c9fSJoseph Huber Generic = 0, 916fc51c9fSJoseph Huber Global = 1, 926fc51c9fSJoseph Huber Shared = 3, 936fc51c9fSJoseph Huber Constant = 4, 946fc51c9fSJoseph Huber Local = 5, 956fc51c9fSJoseph Huber }; 966fc51c9fSJoseph Huber 976fc51c9fSJoseph Huber struct AAHeapToShared; 986fc51c9fSJoseph Huber 99b8235d2bSsstefan1 struct AAICVTracker; 100b8235d2bSsstefan1 1017cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 1027cfd267cSsstefan1 /// Attributor runs. 1037cfd267cSsstefan1 struct OMPInformationCache : public InformationCache { 1047cfd267cSsstefan1 OMPInformationCache(Module &M, AnalysisGetter &AG, 105624d34afSJohannes Doerfert BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, 106e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels) 107624d34afSJohannes Doerfert : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), 108624d34afSJohannes Doerfert Kernels(Kernels) { 109624d34afSJohannes Doerfert 11061238d26Ssstefan1 OMPBuilder.initialize(); 1119548b74aSJohannes Doerfert initializeRuntimeFunctions(); 1120f426935Ssstefan1 initializeInternalControlVars(); 1139548b74aSJohannes Doerfert } 1149548b74aSJohannes Doerfert 1150f426935Ssstefan1 /// Generic information that describes an internal control variable. 1160f426935Ssstefan1 struct InternalControlVarInfo { 1170f426935Ssstefan1 /// The kind, as described by InternalControlVar enum. 1180f426935Ssstefan1 InternalControlVar Kind; 1190f426935Ssstefan1 1200f426935Ssstefan1 /// The name of the ICV. 1210f426935Ssstefan1 StringRef Name; 1220f426935Ssstefan1 1230f426935Ssstefan1 /// Environment variable associated with this ICV. 1240f426935Ssstefan1 StringRef EnvVarName; 1250f426935Ssstefan1 1260f426935Ssstefan1 /// Initial value kind. 1270f426935Ssstefan1 ICVInitValue InitKind; 1280f426935Ssstefan1 1290f426935Ssstefan1 /// Initial value. 1300f426935Ssstefan1 ConstantInt *InitValue; 1310f426935Ssstefan1 1320f426935Ssstefan1 /// Setter RTL function associated with this ICV. 1330f426935Ssstefan1 RuntimeFunction Setter; 1340f426935Ssstefan1 1350f426935Ssstefan1 /// Getter RTL function associated with this ICV. 1360f426935Ssstefan1 RuntimeFunction Getter; 1370f426935Ssstefan1 1380f426935Ssstefan1 /// RTL Function corresponding to the override clause of this ICV 1390f426935Ssstefan1 RuntimeFunction Clause; 1400f426935Ssstefan1 }; 1410f426935Ssstefan1 1429548b74aSJohannes Doerfert /// Generic information that describes a runtime function 1439548b74aSJohannes Doerfert struct RuntimeFunctionInfo { 1448855fec3SJohannes Doerfert 1459548b74aSJohannes Doerfert /// The kind, as described by the RuntimeFunction enum. 1469548b74aSJohannes Doerfert RuntimeFunction Kind; 1479548b74aSJohannes Doerfert 1489548b74aSJohannes Doerfert /// The name of the function. 1499548b74aSJohannes Doerfert StringRef Name; 1509548b74aSJohannes Doerfert 1519548b74aSJohannes Doerfert /// Flag to indicate a variadic function. 1529548b74aSJohannes Doerfert bool IsVarArg; 1539548b74aSJohannes Doerfert 1549548b74aSJohannes Doerfert /// The return type of the function. 1559548b74aSJohannes Doerfert Type *ReturnType; 1569548b74aSJohannes Doerfert 1579548b74aSJohannes Doerfert /// The argument types of the function. 1589548b74aSJohannes Doerfert SmallVector<Type *, 8> ArgumentTypes; 1599548b74aSJohannes Doerfert 1609548b74aSJohannes Doerfert /// The declaration if available. 161f09f4b26SJohannes Doerfert Function *Declaration = nullptr; 1629548b74aSJohannes Doerfert 1639548b74aSJohannes Doerfert /// Uses of this runtime function per function containing the use. 1648855fec3SJohannes Doerfert using UseVector = SmallVector<Use *, 16>; 1658855fec3SJohannes Doerfert 166b8235d2bSsstefan1 /// Clear UsesMap for runtime function. 167b8235d2bSsstefan1 void clearUsesMap() { UsesMap.clear(); } 168b8235d2bSsstefan1 16954bd3751SJohannes Doerfert /// Boolean conversion that is true if the runtime function was found. 17054bd3751SJohannes Doerfert operator bool() const { return Declaration; } 17154bd3751SJohannes Doerfert 1728855fec3SJohannes Doerfert /// Return the vector of uses in function \p F. 1738855fec3SJohannes Doerfert UseVector &getOrCreateUseVector(Function *F) { 174b8235d2bSsstefan1 std::shared_ptr<UseVector> &UV = UsesMap[F]; 1758855fec3SJohannes Doerfert if (!UV) 176b8235d2bSsstefan1 UV = std::make_shared<UseVector>(); 1778855fec3SJohannes Doerfert return *UV; 1788855fec3SJohannes Doerfert } 1798855fec3SJohannes Doerfert 1808855fec3SJohannes Doerfert /// Return the vector of uses in function \p F or `nullptr` if there are 1818855fec3SJohannes Doerfert /// none. 1828855fec3SJohannes Doerfert const UseVector *getUseVector(Function &F) const { 18395e57072SDavid Blaikie auto I = UsesMap.find(&F); 18495e57072SDavid Blaikie if (I != UsesMap.end()) 18595e57072SDavid Blaikie return I->second.get(); 18695e57072SDavid Blaikie return nullptr; 1878855fec3SJohannes Doerfert } 1888855fec3SJohannes Doerfert 1898855fec3SJohannes Doerfert /// Return how many functions contain uses of this runtime function. 1908855fec3SJohannes Doerfert size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 1919548b74aSJohannes Doerfert 1929548b74aSJohannes Doerfert /// Return the number of arguments (or the minimal number for variadic 1939548b74aSJohannes Doerfert /// functions). 1949548b74aSJohannes Doerfert size_t getNumArgs() const { return ArgumentTypes.size(); } 1959548b74aSJohannes Doerfert 1969548b74aSJohannes Doerfert /// Run the callback \p CB on each use and forget the use if the result is 1979548b74aSJohannes Doerfert /// true. The callback will be fed the function in which the use was 1989548b74aSJohannes Doerfert /// encountered as second argument. 199624d34afSJohannes Doerfert void foreachUse(SmallVectorImpl<Function *> &SCC, 200624d34afSJohannes Doerfert function_ref<bool(Use &, Function &)> CB) { 201624d34afSJohannes Doerfert for (Function *F : SCC) 202624d34afSJohannes Doerfert foreachUse(CB, F); 203e099c7b6Ssstefan1 } 204e099c7b6Ssstefan1 205e099c7b6Ssstefan1 /// Run the callback \p CB on each use within the function \p F and forget 206e099c7b6Ssstefan1 /// the use if the result is true. 207624d34afSJohannes Doerfert void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 2088855fec3SJohannes Doerfert SmallVector<unsigned, 8> ToBeDeleted; 2099548b74aSJohannes Doerfert ToBeDeleted.clear(); 210e099c7b6Ssstefan1 2118855fec3SJohannes Doerfert unsigned Idx = 0; 212624d34afSJohannes Doerfert UseVector &UV = getOrCreateUseVector(F); 213e099c7b6Ssstefan1 2148855fec3SJohannes Doerfert for (Use *U : UV) { 215e099c7b6Ssstefan1 if (CB(*U, *F)) 2168855fec3SJohannes Doerfert ToBeDeleted.push_back(Idx); 2178855fec3SJohannes Doerfert ++Idx; 2188855fec3SJohannes Doerfert } 2198855fec3SJohannes Doerfert 2208855fec3SJohannes Doerfert // Remove the to-be-deleted indices in reverse order as prior 221b726c557SJohannes Doerfert // modifications will not modify the smaller indices. 2228855fec3SJohannes Doerfert while (!ToBeDeleted.empty()) { 2238855fec3SJohannes Doerfert unsigned Idx = ToBeDeleted.pop_back_val(); 2248855fec3SJohannes Doerfert UV[Idx] = UV.back(); 2258855fec3SJohannes Doerfert UV.pop_back(); 2269548b74aSJohannes Doerfert } 2279548b74aSJohannes Doerfert } 2288855fec3SJohannes Doerfert 2298855fec3SJohannes Doerfert private: 2308855fec3SJohannes Doerfert /// Map from functions to all uses of this runtime function contained in 2318855fec3SJohannes Doerfert /// them. 232b8235d2bSsstefan1 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 2339548b74aSJohannes Doerfert }; 2349548b74aSJohannes Doerfert 2357cfd267cSsstefan1 /// An OpenMP-IR-Builder instance 2367cfd267cSsstefan1 OpenMPIRBuilder OMPBuilder; 2377cfd267cSsstefan1 2387cfd267cSsstefan1 /// Map from runtime function kind to the runtime function description. 2397cfd267cSsstefan1 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 2407cfd267cSsstefan1 RuntimeFunction::OMPRTL___last> 2417cfd267cSsstefan1 RFIs; 2427cfd267cSsstefan1 2430f426935Ssstefan1 /// Map from ICV kind to the ICV description. 2440f426935Ssstefan1 EnumeratedArray<InternalControlVarInfo, InternalControlVar, 2450f426935Ssstefan1 InternalControlVar::ICV___last> 2460f426935Ssstefan1 ICVs; 2470f426935Ssstefan1 2480f426935Ssstefan1 /// Helper to initialize all internal control variable information for those 2490f426935Ssstefan1 /// defined in OMPKinds.def. 2500f426935Ssstefan1 void initializeInternalControlVars() { 2510f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL) \ 2520f426935Ssstefan1 { \ 2530f426935Ssstefan1 auto &ICV = ICVs[_Name]; \ 2540f426935Ssstefan1 ICV.Setter = RTL; \ 2550f426935Ssstefan1 } 2560f426935Ssstefan1 #define ICV_RT_GET(Name, RTL) \ 2570f426935Ssstefan1 { \ 2580f426935Ssstefan1 auto &ICV = ICVs[Name]; \ 2590f426935Ssstefan1 ICV.Getter = RTL; \ 2600f426935Ssstefan1 } 2610f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 2620f426935Ssstefan1 { \ 2630f426935Ssstefan1 auto &ICV = ICVs[Enum]; \ 2640f426935Ssstefan1 ICV.Name = _Name; \ 2650f426935Ssstefan1 ICV.Kind = Enum; \ 2660f426935Ssstefan1 ICV.InitKind = Init; \ 2670f426935Ssstefan1 ICV.EnvVarName = _EnvVarName; \ 2680f426935Ssstefan1 switch (ICV.InitKind) { \ 269951e43f3Ssstefan1 case ICV_IMPLEMENTATION_DEFINED: \ 2700f426935Ssstefan1 ICV.InitValue = nullptr; \ 2710f426935Ssstefan1 break; \ 272951e43f3Ssstefan1 case ICV_ZERO: \ 2736aab27baSsstefan1 ICV.InitValue = ConstantInt::get( \ 2746aab27baSsstefan1 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 2750f426935Ssstefan1 break; \ 276951e43f3Ssstefan1 case ICV_FALSE: \ 2776aab27baSsstefan1 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 2780f426935Ssstefan1 break; \ 279951e43f3Ssstefan1 case ICV_LAST: \ 2800f426935Ssstefan1 break; \ 2810f426935Ssstefan1 } \ 2820f426935Ssstefan1 } 2830f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2840f426935Ssstefan1 } 2850f426935Ssstefan1 2867cfd267cSsstefan1 /// Returns true if the function declaration \p F matches the runtime 2877cfd267cSsstefan1 /// function types, that is, return type \p RTFRetType, and argument types 2887cfd267cSsstefan1 /// \p RTFArgTypes. 2897cfd267cSsstefan1 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 2907cfd267cSsstefan1 SmallVector<Type *, 8> &RTFArgTypes) { 2917cfd267cSsstefan1 // TODO: We should output information to the user (under debug output 2927cfd267cSsstefan1 // and via remarks). 2937cfd267cSsstefan1 2947cfd267cSsstefan1 if (!F) 2957cfd267cSsstefan1 return false; 2967cfd267cSsstefan1 if (F->getReturnType() != RTFRetType) 2977cfd267cSsstefan1 return false; 2987cfd267cSsstefan1 if (F->arg_size() != RTFArgTypes.size()) 2997cfd267cSsstefan1 return false; 3007cfd267cSsstefan1 3017cfd267cSsstefan1 auto RTFTyIt = RTFArgTypes.begin(); 3027cfd267cSsstefan1 for (Argument &Arg : F->args()) { 3037cfd267cSsstefan1 if (Arg.getType() != *RTFTyIt) 3047cfd267cSsstefan1 return false; 3057cfd267cSsstefan1 3067cfd267cSsstefan1 ++RTFTyIt; 3077cfd267cSsstefan1 } 3087cfd267cSsstefan1 3097cfd267cSsstefan1 return true; 3107cfd267cSsstefan1 } 3117cfd267cSsstefan1 312b726c557SJohannes Doerfert // Helper to collect all uses of the declaration in the UsesMap. 313b8235d2bSsstefan1 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 3147cfd267cSsstefan1 unsigned NumUses = 0; 3157cfd267cSsstefan1 if (!RFI.Declaration) 3167cfd267cSsstefan1 return NumUses; 3177cfd267cSsstefan1 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 3187cfd267cSsstefan1 319b8235d2bSsstefan1 if (CollectStats) { 3207cfd267cSsstefan1 NumOpenMPRuntimeFunctionsIdentified += 1; 3217cfd267cSsstefan1 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 322b8235d2bSsstefan1 } 3237cfd267cSsstefan1 3247cfd267cSsstefan1 // TODO: We directly convert uses into proper calls and unknown uses. 3257cfd267cSsstefan1 for (Use &U : RFI.Declaration->uses()) { 3267cfd267cSsstefan1 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 3277cfd267cSsstefan1 if (ModuleSlice.count(UserI->getFunction())) { 3287cfd267cSsstefan1 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 3297cfd267cSsstefan1 ++NumUses; 3307cfd267cSsstefan1 } 3317cfd267cSsstefan1 } else { 3327cfd267cSsstefan1 RFI.getOrCreateUseVector(nullptr).push_back(&U); 3337cfd267cSsstefan1 ++NumUses; 3347cfd267cSsstefan1 } 3357cfd267cSsstefan1 } 3367cfd267cSsstefan1 return NumUses; 337b8235d2bSsstefan1 } 3387cfd267cSsstefan1 33997517055SGiorgis Georgakoudis // Helper function to recollect uses of a runtime function. 34097517055SGiorgis Georgakoudis void recollectUsesForFunction(RuntimeFunction RTF) { 34197517055SGiorgis Georgakoudis auto &RFI = RFIs[RTF]; 342b8235d2bSsstefan1 RFI.clearUsesMap(); 343b8235d2bSsstefan1 collectUses(RFI, /*CollectStats*/ false); 344b8235d2bSsstefan1 } 34597517055SGiorgis Georgakoudis 34697517055SGiorgis Georgakoudis // Helper function to recollect uses of all runtime functions. 34797517055SGiorgis Georgakoudis void recollectUses() { 34897517055SGiorgis Georgakoudis for (int Idx = 0; Idx < RFIs.size(); ++Idx) 34997517055SGiorgis Georgakoudis recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); 350b8235d2bSsstefan1 } 351b8235d2bSsstefan1 352b8235d2bSsstefan1 /// Helper to initialize all runtime function information for those defined 353b8235d2bSsstefan1 /// in OpenMPKinds.def. 354b8235d2bSsstefan1 void initializeRuntimeFunctions() { 3557cfd267cSsstefan1 Module &M = *((*ModuleSlice.begin())->getParent()); 3567cfd267cSsstefan1 3576aab27baSsstefan1 // Helper macros for handling __VA_ARGS__ in OMP_RTL 3586aab27baSsstefan1 #define OMP_TYPE(VarName, ...) \ 3596aab27baSsstefan1 Type *VarName = OMPBuilder.VarName; \ 3606aab27baSsstefan1 (void)VarName; 3616aab27baSsstefan1 3626aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...) \ 3636aab27baSsstefan1 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 3646aab27baSsstefan1 (void)VarName##Ty; \ 3656aab27baSsstefan1 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 3666aab27baSsstefan1 (void)VarName##PtrTy; 3676aab27baSsstefan1 3686aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...) \ 3696aab27baSsstefan1 FunctionType *VarName = OMPBuilder.VarName; \ 3706aab27baSsstefan1 (void)VarName; \ 3716aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3726aab27baSsstefan1 (void)VarName##Ptr; 3736aab27baSsstefan1 3746aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...) \ 3756aab27baSsstefan1 StructType *VarName = OMPBuilder.VarName; \ 3766aab27baSsstefan1 (void)VarName; \ 3776aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3786aab27baSsstefan1 (void)VarName##Ptr; 3796aab27baSsstefan1 3807cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 3817cfd267cSsstefan1 { \ 3827cfd267cSsstefan1 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 3837cfd267cSsstefan1 Function *F = M.getFunction(_Name); \ 3846aab27baSsstefan1 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 3857cfd267cSsstefan1 auto &RFI = RFIs[_Enum]; \ 3867cfd267cSsstefan1 RFI.Kind = _Enum; \ 3877cfd267cSsstefan1 RFI.Name = _Name; \ 3887cfd267cSsstefan1 RFI.IsVarArg = _IsVarArg; \ 3896aab27baSsstefan1 RFI.ReturnType = OMPBuilder._ReturnType; \ 3907cfd267cSsstefan1 RFI.ArgumentTypes = std::move(ArgsTypes); \ 3917cfd267cSsstefan1 RFI.Declaration = F; \ 392b8235d2bSsstefan1 unsigned NumUses = collectUses(RFI); \ 3937cfd267cSsstefan1 (void)NumUses; \ 3947cfd267cSsstefan1 LLVM_DEBUG({ \ 3957cfd267cSsstefan1 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 3967cfd267cSsstefan1 << " found\n"; \ 3977cfd267cSsstefan1 if (RFI.Declaration) \ 3987cfd267cSsstefan1 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 3997cfd267cSsstefan1 << RFI.getNumFunctionsWithUses() \ 4007cfd267cSsstefan1 << " different functions.\n"; \ 4017cfd267cSsstefan1 }); \ 4027cfd267cSsstefan1 } \ 4037cfd267cSsstefan1 } 4047cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 4057cfd267cSsstefan1 4067cfd267cSsstefan1 // TODO: We should attach the attributes defined in OMPKinds.def. 4077cfd267cSsstefan1 } 408e8039ad4SJohannes Doerfert 409e8039ad4SJohannes Doerfert /// Collection of known kernels (\see Kernel) in the module. 410e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels; 4117cfd267cSsstefan1 }; 4127cfd267cSsstefan1 4138931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload 4148931add6SHamilton Tobon Mosquera /// array, to a vector in memory. 4158931add6SHamilton Tobon Mosquera struct OffloadArray { 4168931add6SHamilton Tobon Mosquera /// Physical array (in the IR). 4178931add6SHamilton Tobon Mosquera AllocaInst *Array = nullptr; 4188931add6SHamilton Tobon Mosquera /// Mapped values. 4198931add6SHamilton Tobon Mosquera SmallVector<Value *, 8> StoredValues; 4208931add6SHamilton Tobon Mosquera /// Last stores made in the offload array. 4218931add6SHamilton Tobon Mosquera SmallVector<StoreInst *, 8> LastAccesses; 4228931add6SHamilton Tobon Mosquera 4238931add6SHamilton Tobon Mosquera OffloadArray() = default; 4248931add6SHamilton Tobon Mosquera 4258931add6SHamilton Tobon Mosquera /// Initializes the OffloadArray with the values stored in \p Array before 4268931add6SHamilton Tobon Mosquera /// instruction \p Before is reached. Returns false if the initialization 4278931add6SHamilton Tobon Mosquera /// fails. 4288931add6SHamilton Tobon Mosquera /// This MUST be used immediately after the construction of the object. 4298931add6SHamilton Tobon Mosquera bool initialize(AllocaInst &Array, Instruction &Before) { 4308931add6SHamilton Tobon Mosquera if (!Array.getAllocatedType()->isArrayTy()) 4318931add6SHamilton Tobon Mosquera return false; 4328931add6SHamilton Tobon Mosquera 4338931add6SHamilton Tobon Mosquera if (!getValues(Array, Before)) 4348931add6SHamilton Tobon Mosquera return false; 4358931add6SHamilton Tobon Mosquera 4368931add6SHamilton Tobon Mosquera this->Array = &Array; 4378931add6SHamilton Tobon Mosquera return true; 4388931add6SHamilton Tobon Mosquera } 4398931add6SHamilton Tobon Mosquera 440da8bec47SJoseph Huber static const unsigned DeviceIDArgNum = 1; 441da8bec47SJoseph Huber static const unsigned BasePtrsArgNum = 3; 442da8bec47SJoseph Huber static const unsigned PtrsArgNum = 4; 443da8bec47SJoseph Huber static const unsigned SizesArgNum = 5; 4441d3d9b9cSHamilton Tobon Mosquera 4458931add6SHamilton Tobon Mosquera private: 4468931add6SHamilton Tobon Mosquera /// Traverses the BasicBlock where \p Array is, collecting the stores made to 4478931add6SHamilton Tobon Mosquera /// \p Array, leaving StoredValues with the values stored before the 4488931add6SHamilton Tobon Mosquera /// instruction \p Before is reached. 4498931add6SHamilton Tobon Mosquera bool getValues(AllocaInst &Array, Instruction &Before) { 4508931add6SHamilton Tobon Mosquera // Initialize container. 451d08d490aSJohannes Doerfert const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); 4528931add6SHamilton Tobon Mosquera StoredValues.assign(NumValues, nullptr); 4538931add6SHamilton Tobon Mosquera LastAccesses.assign(NumValues, nullptr); 4548931add6SHamilton Tobon Mosquera 4558931add6SHamilton Tobon Mosquera // TODO: This assumes the instruction \p Before is in the same 4568931add6SHamilton Tobon Mosquera // BasicBlock as Array. Make it general, for any control flow graph. 4578931add6SHamilton Tobon Mosquera BasicBlock *BB = Array.getParent(); 4588931add6SHamilton Tobon Mosquera if (BB != Before.getParent()) 4598931add6SHamilton Tobon Mosquera return false; 4608931add6SHamilton Tobon Mosquera 4618931add6SHamilton Tobon Mosquera const DataLayout &DL = Array.getModule()->getDataLayout(); 4628931add6SHamilton Tobon Mosquera const unsigned int PointerSize = DL.getPointerSize(); 4638931add6SHamilton Tobon Mosquera 4648931add6SHamilton Tobon Mosquera for (Instruction &I : *BB) { 4658931add6SHamilton Tobon Mosquera if (&I == &Before) 4668931add6SHamilton Tobon Mosquera break; 4678931add6SHamilton Tobon Mosquera 4688931add6SHamilton Tobon Mosquera if (!isa<StoreInst>(&I)) 4698931add6SHamilton Tobon Mosquera continue; 4708931add6SHamilton Tobon Mosquera 4718931add6SHamilton Tobon Mosquera auto *S = cast<StoreInst>(&I); 4728931add6SHamilton Tobon Mosquera int64_t Offset = -1; 473d08d490aSJohannes Doerfert auto *Dst = 474d08d490aSJohannes Doerfert GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); 4758931add6SHamilton Tobon Mosquera if (Dst == &Array) { 4768931add6SHamilton Tobon Mosquera int64_t Idx = Offset / PointerSize; 4778931add6SHamilton Tobon Mosquera StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); 4788931add6SHamilton Tobon Mosquera LastAccesses[Idx] = S; 4798931add6SHamilton Tobon Mosquera } 4808931add6SHamilton Tobon Mosquera } 4818931add6SHamilton Tobon Mosquera 4828931add6SHamilton Tobon Mosquera return isFilled(); 4838931add6SHamilton Tobon Mosquera } 4848931add6SHamilton Tobon Mosquera 4858931add6SHamilton Tobon Mosquera /// Returns true if all values in StoredValues and 4868931add6SHamilton Tobon Mosquera /// LastAccesses are not nullptrs. 4878931add6SHamilton Tobon Mosquera bool isFilled() { 4888931add6SHamilton Tobon Mosquera const unsigned NumValues = StoredValues.size(); 4898931add6SHamilton Tobon Mosquera for (unsigned I = 0; I < NumValues; ++I) { 4908931add6SHamilton Tobon Mosquera if (!StoredValues[I] || !LastAccesses[I]) 4918931add6SHamilton Tobon Mosquera return false; 4928931add6SHamilton Tobon Mosquera } 4938931add6SHamilton Tobon Mosquera 4948931add6SHamilton Tobon Mosquera return true; 4958931add6SHamilton Tobon Mosquera } 4968931add6SHamilton Tobon Mosquera }; 4978931add6SHamilton Tobon Mosquera 4987cfd267cSsstefan1 struct OpenMPOpt { 4997cfd267cSsstefan1 5007cfd267cSsstefan1 using OptimizationRemarkGetter = 5017cfd267cSsstefan1 function_ref<OptimizationRemarkEmitter &(Function *)>; 5027cfd267cSsstefan1 5037cfd267cSsstefan1 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 5047cfd267cSsstefan1 OptimizationRemarkGetter OREGetter, 505b8235d2bSsstefan1 OMPInformationCache &OMPInfoCache, Attributor &A) 50677b79d79SMehdi Amini : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 507b8235d2bSsstefan1 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 5087cfd267cSsstefan1 509a2281419SJoseph Huber /// Check if any remarks are enabled for openmp-opt 510a2281419SJoseph Huber bool remarksEnabled() { 511a2281419SJoseph Huber auto &Ctx = M.getContext(); 512a2281419SJoseph Huber return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); 513a2281419SJoseph Huber } 514a2281419SJoseph Huber 5159548b74aSJohannes Doerfert /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 516b2ad63d3SJoseph Huber bool run(bool IsModulePass) { 51754bd3751SJohannes Doerfert if (SCC.empty()) 51854bd3751SJohannes Doerfert return false; 51954bd3751SJohannes Doerfert 5209548b74aSJohannes Doerfert bool Changed = false; 5219548b74aSJohannes Doerfert 5229548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 52377b79d79SMehdi Amini << " functions in a slice with " 52477b79d79SMehdi Amini << OMPInfoCache.ModuleSlice.size() << " functions\n"); 5259548b74aSJohannes Doerfert 526b2ad63d3SJoseph Huber if (IsModulePass) { 52718283125SJoseph Huber Changed |= runAttributor(); 52818283125SJoseph Huber 5296fc51c9fSJoseph Huber // Recollect uses, in case Attributor deleted any. 5306fc51c9fSJoseph Huber OMPInfoCache.recollectUses(); 5316fc51c9fSJoseph Huber 532b2ad63d3SJoseph Huber if (remarksEnabled()) 533b2ad63d3SJoseph Huber analysisGlobalization(); 534b2ad63d3SJoseph Huber } else { 535e8039ad4SJohannes Doerfert if (PrintICVValues) 536e8039ad4SJohannes Doerfert printICVs(); 537e8039ad4SJohannes Doerfert if (PrintOpenMPKernels) 538e8039ad4SJohannes Doerfert printKernels(); 539e8039ad4SJohannes Doerfert 5405b0581aeSJohannes Doerfert Changed |= rewriteDeviceCodeStateMachine(); 5415b0581aeSJohannes Doerfert 542e8039ad4SJohannes Doerfert Changed |= runAttributor(); 543e8039ad4SJohannes Doerfert 544e8039ad4SJohannes Doerfert // Recollect uses, in case Attributor deleted any. 545e8039ad4SJohannes Doerfert OMPInfoCache.recollectUses(); 546e8039ad4SJohannes Doerfert 547e8039ad4SJohannes Doerfert Changed |= deleteParallelRegions(); 548496f8e5bSHamilton Tobon Mosquera if (HideMemoryTransferLatency) 549496f8e5bSHamilton Tobon Mosquera Changed |= hideMemTransfersLatency(); 5503a6bfcf2SGiorgis Georgakoudis Changed |= deduplicateRuntimeCalls(); 5513a6bfcf2SGiorgis Georgakoudis if (EnableParallelRegionMerging) { 5523a6bfcf2SGiorgis Georgakoudis if (mergeParallelRegions()) { 5533a6bfcf2SGiorgis Georgakoudis deduplicateRuntimeCalls(); 5543a6bfcf2SGiorgis Georgakoudis Changed = true; 5553a6bfcf2SGiorgis Georgakoudis } 5563a6bfcf2SGiorgis Georgakoudis } 557b2ad63d3SJoseph Huber } 558e8039ad4SJohannes Doerfert 559e8039ad4SJohannes Doerfert return Changed; 560e8039ad4SJohannes Doerfert } 561e8039ad4SJohannes Doerfert 5620f426935Ssstefan1 /// Print initial ICV values for testing. 5630f426935Ssstefan1 /// FIXME: This should be done from the Attributor once it is added. 564e8039ad4SJohannes Doerfert void printICVs() const { 565cb9cfa0dSsstefan1 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, 566cb9cfa0dSsstefan1 ICV_proc_bind}; 5670f426935Ssstefan1 5680f426935Ssstefan1 for (Function *F : OMPInfoCache.ModuleSlice) { 5690f426935Ssstefan1 for (auto ICV : ICVs) { 5700f426935Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 5712db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 5722db182ffSJoseph Huber return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 5730f426935Ssstefan1 << " Value: " 5740f426935Ssstefan1 << (ICVInfo.InitValue 57561cdaf66SSimon Pilgrim ? toString(ICVInfo.InitValue->getValue(), 10, true) 5760f426935Ssstefan1 : "IMPLEMENTATION_DEFINED"); 5770f426935Ssstefan1 }; 5780f426935Ssstefan1 5792db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); 5800f426935Ssstefan1 } 5810f426935Ssstefan1 } 5820f426935Ssstefan1 } 5830f426935Ssstefan1 584e8039ad4SJohannes Doerfert /// Print OpenMP GPU kernels for testing. 585e8039ad4SJohannes Doerfert void printKernels() const { 586e8039ad4SJohannes Doerfert for (Function *F : SCC) { 587e8039ad4SJohannes Doerfert if (!OMPInfoCache.Kernels.count(F)) 588e8039ad4SJohannes Doerfert continue; 589b8235d2bSsstefan1 5902db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 5912db182ffSJoseph Huber return ORA << "OpenMP GPU kernel " 592e8039ad4SJohannes Doerfert << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 593e8039ad4SJohannes Doerfert }; 594b8235d2bSsstefan1 5952db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); 596e8039ad4SJohannes Doerfert } 5979548b74aSJohannes Doerfert } 5989548b74aSJohannes Doerfert 5997cfd267cSsstefan1 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 6007cfd267cSsstefan1 /// given it has to be the callee or a nullptr is returned. 6017cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 6027cfd267cSsstefan1 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 6037cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 6047cfd267cSsstefan1 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 6057cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 6067cfd267cSsstefan1 return CI; 6077cfd267cSsstefan1 return nullptr; 6087cfd267cSsstefan1 } 6097cfd267cSsstefan1 6107cfd267cSsstefan1 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 6117cfd267cSsstefan1 /// the callee or a nullptr is returned. 6127cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 6137cfd267cSsstefan1 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 6147cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(&V); 6157cfd267cSsstefan1 if (CI && !CI->hasOperandBundles() && 6167cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 6177cfd267cSsstefan1 return CI; 6187cfd267cSsstefan1 return nullptr; 6197cfd267cSsstefan1 } 6207cfd267cSsstefan1 6219548b74aSJohannes Doerfert private: 6223a6bfcf2SGiorgis Georgakoudis /// Merge parallel regions when it is safe. 6233a6bfcf2SGiorgis Georgakoudis bool mergeParallelRegions() { 6243a6bfcf2SGiorgis Georgakoudis const unsigned CallbackCalleeOperand = 2; 6253a6bfcf2SGiorgis Georgakoudis const unsigned CallbackFirstArgOperand = 3; 6263a6bfcf2SGiorgis Georgakoudis using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6273a6bfcf2SGiorgis Georgakoudis 6283a6bfcf2SGiorgis Georgakoudis // Check if there are any __kmpc_fork_call calls to merge. 6293a6bfcf2SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &RFI = 6303a6bfcf2SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 6313a6bfcf2SGiorgis Georgakoudis 6323a6bfcf2SGiorgis Georgakoudis if (!RFI.Declaration) 6333a6bfcf2SGiorgis Georgakoudis return false; 6343a6bfcf2SGiorgis Georgakoudis 63597517055SGiorgis Georgakoudis // Unmergable calls that prevent merging a parallel region. 63697517055SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { 63797517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], 63897517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], 63997517055SGiorgis Georgakoudis }; 6403a6bfcf2SGiorgis Georgakoudis 6413a6bfcf2SGiorgis Georgakoudis bool Changed = false; 6423a6bfcf2SGiorgis Georgakoudis LoopInfo *LI = nullptr; 6433a6bfcf2SGiorgis Georgakoudis DominatorTree *DT = nullptr; 6443a6bfcf2SGiorgis Georgakoudis 6453a6bfcf2SGiorgis Georgakoudis SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; 6463a6bfcf2SGiorgis Georgakoudis 6473a6bfcf2SGiorgis Georgakoudis BasicBlock *StartBB = nullptr, *EndBB = nullptr; 6483a6bfcf2SGiorgis Georgakoudis auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 6493a6bfcf2SGiorgis Georgakoudis BasicBlock &ContinuationIP) { 6503a6bfcf2SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock(); 6513a6bfcf2SGiorgis Georgakoudis BasicBlock *CGEndBB = 6523a6bfcf2SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 6533a6bfcf2SGiorgis Georgakoudis assert(StartBB != nullptr && "StartBB should not be null"); 6543a6bfcf2SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, StartBB); 6553a6bfcf2SGiorgis Georgakoudis assert(EndBB != nullptr && "EndBB should not be null"); 6563a6bfcf2SGiorgis Georgakoudis EndBB->getTerminator()->setSuccessor(0, CGEndBB); 6573a6bfcf2SGiorgis Georgakoudis }; 6583a6bfcf2SGiorgis Georgakoudis 659240dd924SAlex Zinenko auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 660240dd924SAlex Zinenko Value &Inner, Value *&ReplacementValue) -> InsertPointTy { 661240dd924SAlex Zinenko ReplacementValue = &Inner; 6623a6bfcf2SGiorgis Georgakoudis return CodeGenIP; 6633a6bfcf2SGiorgis Georgakoudis }; 6643a6bfcf2SGiorgis Georgakoudis 6653a6bfcf2SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 6663a6bfcf2SGiorgis Georgakoudis 66797517055SGiorgis Georgakoudis /// Create a sequential execution region within a merged parallel region, 66897517055SGiorgis Georgakoudis /// encapsulated in a master construct with a barrier for synchronization. 66997517055SGiorgis Georgakoudis auto CreateSequentialRegion = [&](Function *OuterFn, 67097517055SGiorgis Georgakoudis BasicBlock *OuterPredBB, 67197517055SGiorgis Georgakoudis Instruction *SeqStartI, 67297517055SGiorgis Georgakoudis Instruction *SeqEndI) { 67397517055SGiorgis Georgakoudis // Isolate the instructions of the sequential region to a separate 67497517055SGiorgis Georgakoudis // block. 67597517055SGiorgis Georgakoudis BasicBlock *ParentBB = SeqStartI->getParent(); 67697517055SGiorgis Georgakoudis BasicBlock *SeqEndBB = 67797517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); 67897517055SGiorgis Georgakoudis BasicBlock *SeqAfterBB = 67997517055SGiorgis Georgakoudis SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); 68097517055SGiorgis Georgakoudis BasicBlock *SeqStartBB = 68197517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); 68297517055SGiorgis Georgakoudis 68397517055SGiorgis Georgakoudis assert(ParentBB->getUniqueSuccessor() == SeqStartBB && 68497517055SGiorgis Georgakoudis "Expected a different CFG"); 68597517055SGiorgis Georgakoudis const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 68697517055SGiorgis Georgakoudis ParentBB->getTerminator()->eraseFromParent(); 68797517055SGiorgis Georgakoudis 68897517055SGiorgis Georgakoudis auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 68997517055SGiorgis Georgakoudis BasicBlock &ContinuationIP) { 69097517055SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock(); 69197517055SGiorgis Georgakoudis BasicBlock *CGEndBB = 69297517055SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 69397517055SGiorgis Georgakoudis assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); 69497517055SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); 69597517055SGiorgis Georgakoudis assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); 69697517055SGiorgis Georgakoudis SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); 69797517055SGiorgis Georgakoudis }; 69897517055SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 69997517055SGiorgis Georgakoudis 70097517055SGiorgis Georgakoudis // Find outputs from the sequential region to outside users and 70197517055SGiorgis Georgakoudis // broadcast their values to them. 70297517055SGiorgis Georgakoudis for (Instruction &I : *SeqStartBB) { 70397517055SGiorgis Georgakoudis SmallPtrSet<Instruction *, 4> OutsideUsers; 70497517055SGiorgis Georgakoudis for (User *Usr : I.users()) { 70597517055SGiorgis Georgakoudis Instruction &UsrI = *cast<Instruction>(Usr); 70697517055SGiorgis Georgakoudis // Ignore outputs to LT intrinsics, code extraction for the merged 70797517055SGiorgis Georgakoudis // parallel region will fix them. 70897517055SGiorgis Georgakoudis if (UsrI.isLifetimeStartOrEnd()) 70997517055SGiorgis Georgakoudis continue; 71097517055SGiorgis Georgakoudis 71197517055SGiorgis Georgakoudis if (UsrI.getParent() != SeqStartBB) 71297517055SGiorgis Georgakoudis OutsideUsers.insert(&UsrI); 71397517055SGiorgis Georgakoudis } 71497517055SGiorgis Georgakoudis 71597517055SGiorgis Georgakoudis if (OutsideUsers.empty()) 71697517055SGiorgis Georgakoudis continue; 71797517055SGiorgis Georgakoudis 71897517055SGiorgis Georgakoudis // Emit an alloca in the outer region to store the broadcasted 71997517055SGiorgis Georgakoudis // value. 72097517055SGiorgis Georgakoudis const DataLayout &DL = M.getDataLayout(); 72197517055SGiorgis Georgakoudis AllocaInst *AllocaI = new AllocaInst( 72297517055SGiorgis Georgakoudis I.getType(), DL.getAllocaAddrSpace(), nullptr, 72397517055SGiorgis Georgakoudis I.getName() + ".seq.output.alloc", &OuterFn->front().front()); 72497517055SGiorgis Georgakoudis 72597517055SGiorgis Georgakoudis // Emit a store instruction in the sequential BB to update the 72697517055SGiorgis Georgakoudis // value. 72797517055SGiorgis Georgakoudis new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); 72897517055SGiorgis Georgakoudis 72997517055SGiorgis Georgakoudis // Emit a load instruction and replace the use of the output value 73097517055SGiorgis Georgakoudis // with it. 73197517055SGiorgis Georgakoudis for (Instruction *UsrI : OutsideUsers) { 7325b70c12fSJohannes Doerfert LoadInst *LoadI = new LoadInst( 7335b70c12fSJohannes Doerfert I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); 73497517055SGiorgis Georgakoudis UsrI->replaceUsesOfWith(&I, LoadI); 73597517055SGiorgis Georgakoudis } 73697517055SGiorgis Georgakoudis } 73797517055SGiorgis Georgakoudis 73897517055SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc( 73997517055SGiorgis Georgakoudis InsertPointTy(ParentBB, ParentBB->end()), DL); 74097517055SGiorgis Georgakoudis InsertPointTy SeqAfterIP = 74197517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); 74297517055SGiorgis Georgakoudis 74397517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); 74497517055SGiorgis Georgakoudis 74597517055SGiorgis Georgakoudis BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); 74697517055SGiorgis Georgakoudis 74797517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn 74897517055SGiorgis Georgakoudis << "\n"); 74997517055SGiorgis Georgakoudis }; 75097517055SGiorgis Georgakoudis 7513a6bfcf2SGiorgis Georgakoudis // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all 7523a6bfcf2SGiorgis Georgakoudis // contained in BB and only separated by instructions that can be 7533a6bfcf2SGiorgis Georgakoudis // redundantly executed in parallel. The block BB is split before the first 7543a6bfcf2SGiorgis Georgakoudis // call (in MergableCIs) and after the last so the entire region we merge 7553a6bfcf2SGiorgis Georgakoudis // into a single parallel region is contained in a single basic block 7563a6bfcf2SGiorgis Georgakoudis // without any other instructions. We use the OpenMPIRBuilder to outline 7573a6bfcf2SGiorgis Georgakoudis // that block and call the resulting function via __kmpc_fork_call. 7583a6bfcf2SGiorgis Georgakoudis auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) { 7593a6bfcf2SGiorgis Georgakoudis // TODO: Change the interface to allow single CIs expanded, e.g, to 7603a6bfcf2SGiorgis Georgakoudis // include an outer loop. 7613a6bfcf2SGiorgis Georgakoudis assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); 7623a6bfcf2SGiorgis Georgakoudis 7633a6bfcf2SGiorgis Georgakoudis auto Remark = [&](OptimizationRemark OR) { 7643a6bfcf2SGiorgis Georgakoudis OR << "Parallel region at " 7653a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMergeFront", 7663a6bfcf2SGiorgis Georgakoudis MergableCIs.front()->getDebugLoc()) 7673a6bfcf2SGiorgis Georgakoudis << " merged with parallel regions at "; 76823b0ab2aSKazu Hirata for (auto *CI : llvm::drop_begin(MergableCIs)) { 7693a6bfcf2SGiorgis Georgakoudis OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); 7703a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back()) 7713a6bfcf2SGiorgis Georgakoudis OR << ", "; 7723a6bfcf2SGiorgis Georgakoudis } 7733a6bfcf2SGiorgis Georgakoudis return OR; 7743a6bfcf2SGiorgis Georgakoudis }; 7753a6bfcf2SGiorgis Georgakoudis 7763a6bfcf2SGiorgis Georgakoudis emitRemark<OptimizationRemark>(MergableCIs.front(), 7773a6bfcf2SGiorgis Georgakoudis "OpenMPParallelRegionMerging", Remark); 7783a6bfcf2SGiorgis Georgakoudis 7793a6bfcf2SGiorgis Georgakoudis Function *OriginalFn = BB->getParent(); 7803a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size() 7813a6bfcf2SGiorgis Georgakoudis << " parallel regions in " << OriginalFn->getName() 7823a6bfcf2SGiorgis Georgakoudis << "\n"); 7833a6bfcf2SGiorgis Georgakoudis 7843a6bfcf2SGiorgis Georgakoudis // Isolate the calls to merge in a separate block. 7853a6bfcf2SGiorgis Georgakoudis EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); 7863a6bfcf2SGiorgis Georgakoudis BasicBlock *AfterBB = 7873a6bfcf2SGiorgis Georgakoudis SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); 7883a6bfcf2SGiorgis Georgakoudis StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, 7893a6bfcf2SGiorgis Georgakoudis "omp.par.merged"); 7903a6bfcf2SGiorgis Georgakoudis 7913a6bfcf2SGiorgis Georgakoudis assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG"); 7923a6bfcf2SGiorgis Georgakoudis const DebugLoc DL = BB->getTerminator()->getDebugLoc(); 7933a6bfcf2SGiorgis Georgakoudis BB->getTerminator()->eraseFromParent(); 7943a6bfcf2SGiorgis Georgakoudis 79597517055SGiorgis Georgakoudis // Create sequential regions for sequential instructions that are 79697517055SGiorgis Georgakoudis // in-between mergable parallel regions. 79797517055SGiorgis Georgakoudis for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; 79897517055SGiorgis Georgakoudis It != End; ++It) { 79997517055SGiorgis Georgakoudis Instruction *ForkCI = *It; 80097517055SGiorgis Georgakoudis Instruction *NextForkCI = *(It + 1); 80197517055SGiorgis Georgakoudis 80297517055SGiorgis Georgakoudis // Continue if there are not in-between instructions. 80397517055SGiorgis Georgakoudis if (ForkCI->getNextNode() == NextForkCI) 80497517055SGiorgis Georgakoudis continue; 80597517055SGiorgis Georgakoudis 80697517055SGiorgis Georgakoudis CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), 80797517055SGiorgis Georgakoudis NextForkCI->getPrevNode()); 80897517055SGiorgis Georgakoudis } 80997517055SGiorgis Georgakoudis 8103a6bfcf2SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), 8113a6bfcf2SGiorgis Georgakoudis DL); 8123a6bfcf2SGiorgis Georgakoudis IRBuilder<>::InsertPoint AllocaIP( 8133a6bfcf2SGiorgis Georgakoudis &OriginalFn->getEntryBlock(), 8143a6bfcf2SGiorgis Georgakoudis OriginalFn->getEntryBlock().getFirstInsertionPt()); 8153a6bfcf2SGiorgis Georgakoudis // Create the merged parallel region with default proc binding, to 8163a6bfcf2SGiorgis Georgakoudis // avoid overriding binding settings, and without explicit cancellation. 817e5dba2d7SMichael Kruse InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( 8183a6bfcf2SGiorgis Georgakoudis Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 8193a6bfcf2SGiorgis Georgakoudis OMP_PROC_BIND_default, /* IsCancellable */ false); 8203a6bfcf2SGiorgis Georgakoudis BranchInst::Create(AfterBB, AfterIP.getBlock()); 8213a6bfcf2SGiorgis Georgakoudis 8223a6bfcf2SGiorgis Georgakoudis // Perform the actual outlining. 823b1191206SMichael Kruse OMPInfoCache.OMPBuilder.finalize(OriginalFn, 824b1191206SMichael Kruse /* AllowExtractorSinking */ true); 8253a6bfcf2SGiorgis Georgakoudis 8263a6bfcf2SGiorgis Georgakoudis Function *OutlinedFn = MergableCIs.front()->getCaller(); 8273a6bfcf2SGiorgis Georgakoudis 8283a6bfcf2SGiorgis Georgakoudis // Replace the __kmpc_fork_call calls with direct calls to the outlined 8293a6bfcf2SGiorgis Georgakoudis // callbacks. 8303a6bfcf2SGiorgis Georgakoudis SmallVector<Value *, 8> Args; 8313a6bfcf2SGiorgis Georgakoudis for (auto *CI : MergableCIs) { 8323a6bfcf2SGiorgis Georgakoudis Value *Callee = 8333a6bfcf2SGiorgis Georgakoudis CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); 8343a6bfcf2SGiorgis Georgakoudis FunctionType *FT = 8353a6bfcf2SGiorgis Georgakoudis cast<FunctionType>(Callee->getType()->getPointerElementType()); 8363a6bfcf2SGiorgis Georgakoudis Args.clear(); 8373a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(0)); 8383a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(1)); 8393a6bfcf2SGiorgis Georgakoudis for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); 8403a6bfcf2SGiorgis Georgakoudis U < E; ++U) 8413a6bfcf2SGiorgis Georgakoudis Args.push_back(CI->getArgOperand(U)); 8423a6bfcf2SGiorgis Georgakoudis 8433a6bfcf2SGiorgis Georgakoudis CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); 8443a6bfcf2SGiorgis Georgakoudis if (CI->getDebugLoc()) 8453a6bfcf2SGiorgis Georgakoudis NewCI->setDebugLoc(CI->getDebugLoc()); 8463a6bfcf2SGiorgis Georgakoudis 8473a6bfcf2SGiorgis Georgakoudis // Forward parameter attributes from the callback to the callee. 8483a6bfcf2SGiorgis Georgakoudis for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); 8493a6bfcf2SGiorgis Georgakoudis U < E; ++U) 8503a6bfcf2SGiorgis Georgakoudis for (const Attribute &A : CI->getAttributes().getParamAttributes(U)) 8513a6bfcf2SGiorgis Georgakoudis NewCI->addParamAttr( 8523a6bfcf2SGiorgis Georgakoudis U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); 8533a6bfcf2SGiorgis Georgakoudis 8543a6bfcf2SGiorgis Georgakoudis // Emit an explicit barrier to replace the implicit fork-join barrier. 8553a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back()) { 8563a6bfcf2SGiorgis Georgakoudis // TODO: Remove barrier if the merged parallel region includes the 8573a6bfcf2SGiorgis Georgakoudis // 'nowait' clause. 858e5dba2d7SMichael Kruse OMPInfoCache.OMPBuilder.createBarrier( 8593a6bfcf2SGiorgis Georgakoudis InsertPointTy(NewCI->getParent(), 8603a6bfcf2SGiorgis Georgakoudis NewCI->getNextNode()->getIterator()), 8613a6bfcf2SGiorgis Georgakoudis OMPD_parallel); 8623a6bfcf2SGiorgis Georgakoudis } 8633a6bfcf2SGiorgis Georgakoudis 8643a6bfcf2SGiorgis Georgakoudis auto Remark = [&](OptimizationRemark OR) { 8653a6bfcf2SGiorgis Georgakoudis return OR << "Parallel region at " 8663a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()) 8673a6bfcf2SGiorgis Georgakoudis << " merged with " 8683a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMergeFront", 8693a6bfcf2SGiorgis Georgakoudis MergableCIs.front()->getDebugLoc()); 8703a6bfcf2SGiorgis Georgakoudis }; 8713a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.front()) 8723a6bfcf2SGiorgis Georgakoudis emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging", 8733a6bfcf2SGiorgis Georgakoudis Remark); 8743a6bfcf2SGiorgis Georgakoudis 8753a6bfcf2SGiorgis Georgakoudis CI->eraseFromParent(); 8763a6bfcf2SGiorgis Georgakoudis } 8773a6bfcf2SGiorgis Georgakoudis 8783a6bfcf2SGiorgis Georgakoudis assert(OutlinedFn != OriginalFn && "Outlining failed"); 8797fea561eSArthur Eubanks CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); 8803a6bfcf2SGiorgis Georgakoudis CGUpdater.reanalyzeFunction(*OriginalFn); 8813a6bfcf2SGiorgis Georgakoudis 8823a6bfcf2SGiorgis Georgakoudis NumOpenMPParallelRegionsMerged += MergableCIs.size(); 8833a6bfcf2SGiorgis Georgakoudis 8843a6bfcf2SGiorgis Georgakoudis return true; 8853a6bfcf2SGiorgis Georgakoudis }; 8863a6bfcf2SGiorgis Georgakoudis 8873a6bfcf2SGiorgis Georgakoudis // Helper function that identifes sequences of 8883a6bfcf2SGiorgis Georgakoudis // __kmpc_fork_call uses in a basic block. 8893a6bfcf2SGiorgis Georgakoudis auto DetectPRsCB = [&](Use &U, Function &F) { 8903a6bfcf2SGiorgis Georgakoudis CallInst *CI = getCallIfRegularCall(U, &RFI); 8913a6bfcf2SGiorgis Georgakoudis BB2PRMap[CI->getParent()].insert(CI); 8923a6bfcf2SGiorgis Georgakoudis 8933a6bfcf2SGiorgis Georgakoudis return false; 8943a6bfcf2SGiorgis Georgakoudis }; 8953a6bfcf2SGiorgis Georgakoudis 8963a6bfcf2SGiorgis Georgakoudis BB2PRMap.clear(); 8973a6bfcf2SGiorgis Georgakoudis RFI.foreachUse(SCC, DetectPRsCB); 8983a6bfcf2SGiorgis Georgakoudis SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; 8993a6bfcf2SGiorgis Georgakoudis // Find mergable parallel regions within a basic block that are 9003a6bfcf2SGiorgis Georgakoudis // safe to merge, that is any in-between instructions can safely 9013a6bfcf2SGiorgis Georgakoudis // execute in parallel after merging. 9023a6bfcf2SGiorgis Georgakoudis // TODO: support merging across basic-blocks. 9033a6bfcf2SGiorgis Georgakoudis for (auto &It : BB2PRMap) { 9043a6bfcf2SGiorgis Georgakoudis auto &CIs = It.getSecond(); 9053a6bfcf2SGiorgis Georgakoudis if (CIs.size() < 2) 9063a6bfcf2SGiorgis Georgakoudis continue; 9073a6bfcf2SGiorgis Georgakoudis 9083a6bfcf2SGiorgis Georgakoudis BasicBlock *BB = It.getFirst(); 9093a6bfcf2SGiorgis Georgakoudis SmallVector<CallInst *, 4> MergableCIs; 9103a6bfcf2SGiorgis Georgakoudis 91197517055SGiorgis Georgakoudis /// Returns true if the instruction is mergable, false otherwise. 91297517055SGiorgis Georgakoudis /// A terminator instruction is unmergable by definition since merging 91397517055SGiorgis Georgakoudis /// works within a BB. Instructions before the mergable region are 91497517055SGiorgis Georgakoudis /// mergable if they are not calls to OpenMP runtime functions that may 91597517055SGiorgis Georgakoudis /// set different execution parameters for subsequent parallel regions. 91697517055SGiorgis Georgakoudis /// Instructions in-between parallel regions are mergable if they are not 91797517055SGiorgis Georgakoudis /// calls to any non-intrinsic function since that may call a non-mergable 91897517055SGiorgis Georgakoudis /// OpenMP runtime function. 91997517055SGiorgis Georgakoudis auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { 92097517055SGiorgis Georgakoudis // We do not merge across BBs, hence return false (unmergable) if the 92197517055SGiorgis Georgakoudis // instruction is a terminator. 92297517055SGiorgis Georgakoudis if (I.isTerminator()) 92397517055SGiorgis Georgakoudis return false; 92497517055SGiorgis Georgakoudis 92597517055SGiorgis Georgakoudis if (!isa<CallInst>(&I)) 92697517055SGiorgis Georgakoudis return true; 92797517055SGiorgis Georgakoudis 92897517055SGiorgis Georgakoudis CallInst *CI = cast<CallInst>(&I); 92997517055SGiorgis Georgakoudis if (IsBeforeMergableRegion) { 93097517055SGiorgis Georgakoudis Function *CalledFunction = CI->getCalledFunction(); 93197517055SGiorgis Georgakoudis if (!CalledFunction) 93297517055SGiorgis Georgakoudis return false; 93397517055SGiorgis Georgakoudis // Return false (unmergable) if the call before the parallel 93497517055SGiorgis Georgakoudis // region calls an explicit affinity (proc_bind) or number of 93597517055SGiorgis Georgakoudis // threads (num_threads) compiler-generated function. Those settings 93697517055SGiorgis Georgakoudis // may be incompatible with following parallel regions. 93797517055SGiorgis Georgakoudis // TODO: ICV tracking to detect compatibility. 93897517055SGiorgis Georgakoudis for (const auto &RFI : UnmergableCallsInfo) { 93997517055SGiorgis Georgakoudis if (CalledFunction == RFI.Declaration) 94097517055SGiorgis Georgakoudis return false; 94197517055SGiorgis Georgakoudis } 94297517055SGiorgis Georgakoudis } else { 94397517055SGiorgis Georgakoudis // Return false (unmergable) if there is a call instruction 94497517055SGiorgis Georgakoudis // in-between parallel regions when it is not an intrinsic. It 94597517055SGiorgis Georgakoudis // may call an unmergable OpenMP runtime function in its callpath. 94697517055SGiorgis Georgakoudis // TODO: Keep track of possible OpenMP calls in the callpath. 94797517055SGiorgis Georgakoudis if (!isa<IntrinsicInst>(CI)) 94897517055SGiorgis Georgakoudis return false; 94997517055SGiorgis Georgakoudis } 95097517055SGiorgis Georgakoudis 95197517055SGiorgis Georgakoudis return true; 95297517055SGiorgis Georgakoudis }; 9533a6bfcf2SGiorgis Georgakoudis // Find maximal number of parallel region CIs that are safe to merge. 95497517055SGiorgis Georgakoudis for (auto It = BB->begin(), End = BB->end(); It != End;) { 95597517055SGiorgis Georgakoudis Instruction &I = *It; 95697517055SGiorgis Georgakoudis ++It; 95797517055SGiorgis Georgakoudis 9583a6bfcf2SGiorgis Georgakoudis if (CIs.count(&I)) { 9593a6bfcf2SGiorgis Georgakoudis MergableCIs.push_back(cast<CallInst>(&I)); 9603a6bfcf2SGiorgis Georgakoudis continue; 9613a6bfcf2SGiorgis Georgakoudis } 9623a6bfcf2SGiorgis Georgakoudis 96397517055SGiorgis Georgakoudis // Continue expanding if the instruction is mergable. 96497517055SGiorgis Georgakoudis if (IsMergable(I, MergableCIs.empty())) 9653a6bfcf2SGiorgis Georgakoudis continue; 9663a6bfcf2SGiorgis Georgakoudis 96797517055SGiorgis Georgakoudis // Forward the instruction iterator to skip the next parallel region 96897517055SGiorgis Georgakoudis // since there is an unmergable instruction which can affect it. 96997517055SGiorgis Georgakoudis for (; It != End; ++It) { 97097517055SGiorgis Georgakoudis Instruction &SkipI = *It; 97197517055SGiorgis Georgakoudis if (CIs.count(&SkipI)) { 97297517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI 97397517055SGiorgis Georgakoudis << " due to " << I << "\n"); 97497517055SGiorgis Georgakoudis ++It; 97597517055SGiorgis Georgakoudis break; 97697517055SGiorgis Georgakoudis } 97797517055SGiorgis Georgakoudis } 97897517055SGiorgis Georgakoudis 97997517055SGiorgis Georgakoudis // Store mergable regions found. 9803a6bfcf2SGiorgis Georgakoudis if (MergableCIs.size() > 1) { 9813a6bfcf2SGiorgis Georgakoudis MergableCIsVector.push_back(MergableCIs); 9823a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() 9833a6bfcf2SGiorgis Georgakoudis << " parallel regions in block " << BB->getName() 9843a6bfcf2SGiorgis Georgakoudis << " of function " << BB->getParent()->getName() 9853a6bfcf2SGiorgis Georgakoudis << "\n";); 9863a6bfcf2SGiorgis Georgakoudis } 9873a6bfcf2SGiorgis Georgakoudis 9883a6bfcf2SGiorgis Georgakoudis MergableCIs.clear(); 9893a6bfcf2SGiorgis Georgakoudis } 9903a6bfcf2SGiorgis Georgakoudis 9913a6bfcf2SGiorgis Georgakoudis if (!MergableCIsVector.empty()) { 9923a6bfcf2SGiorgis Georgakoudis Changed = true; 9933a6bfcf2SGiorgis Georgakoudis 9943a6bfcf2SGiorgis Georgakoudis for (auto &MergableCIs : MergableCIsVector) 9953a6bfcf2SGiorgis Georgakoudis Merge(MergableCIs, BB); 996b2ad63d3SJoseph Huber MergableCIsVector.clear(); 9973a6bfcf2SGiorgis Georgakoudis } 9983a6bfcf2SGiorgis Georgakoudis } 9993a6bfcf2SGiorgis Georgakoudis 10003a6bfcf2SGiorgis Georgakoudis if (Changed) { 100197517055SGiorgis Georgakoudis /// Re-collect use for fork calls, emitted barrier calls, and 100297517055SGiorgis Georgakoudis /// any emitted master/end_master calls. 100397517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); 100497517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); 100597517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); 100697517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); 10073a6bfcf2SGiorgis Georgakoudis } 10083a6bfcf2SGiorgis Georgakoudis 10093a6bfcf2SGiorgis Georgakoudis return Changed; 10103a6bfcf2SGiorgis Georgakoudis } 10113a6bfcf2SGiorgis Georgakoudis 10129d38f98dSJohannes Doerfert /// Try to delete parallel regions if possible. 1013e565db49SJohannes Doerfert bool deleteParallelRegions() { 1014e565db49SJohannes Doerfert const unsigned CallbackCalleeOperand = 2; 1015e565db49SJohannes Doerfert 10167cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI = 10177cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 10187cfd267cSsstefan1 1019e565db49SJohannes Doerfert if (!RFI.Declaration) 1020e565db49SJohannes Doerfert return false; 1021e565db49SJohannes Doerfert 1022e565db49SJohannes Doerfert bool Changed = false; 1023e565db49SJohannes Doerfert auto DeleteCallCB = [&](Use &U, Function &) { 1024e565db49SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U); 1025e565db49SJohannes Doerfert if (!CI) 1026e565db49SJohannes Doerfert return false; 1027e565db49SJohannes Doerfert auto *Fn = dyn_cast<Function>( 1028e565db49SJohannes Doerfert CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 1029e565db49SJohannes Doerfert if (!Fn) 1030e565db49SJohannes Doerfert return false; 1031e565db49SJohannes Doerfert if (!Fn->onlyReadsMemory()) 1032e565db49SJohannes Doerfert return false; 1033e565db49SJohannes Doerfert if (!Fn->hasFnAttribute(Attribute::WillReturn)) 1034e565db49SJohannes Doerfert return false; 1035e565db49SJohannes Doerfert 1036e565db49SJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 1037e565db49SJohannes Doerfert << CI->getCaller()->getName() << "\n"); 10384d4ea9acSHuber, Joseph 10394d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 10404d4ea9acSHuber, Joseph return OR << "Parallel region in " 10414d4ea9acSHuber, Joseph << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 10424d4ea9acSHuber, Joseph << " deleted"; 10434d4ea9acSHuber, Joseph }; 10444d4ea9acSHuber, Joseph emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 10454d4ea9acSHuber, Joseph Remark); 10464d4ea9acSHuber, Joseph 1047e565db49SJohannes Doerfert CGUpdater.removeCallSite(*CI); 1048e565db49SJohannes Doerfert CI->eraseFromParent(); 1049e565db49SJohannes Doerfert Changed = true; 105055eb714aSRoman Lebedev ++NumOpenMPParallelRegionsDeleted; 1051e565db49SJohannes Doerfert return true; 1052e565db49SJohannes Doerfert }; 1053e565db49SJohannes Doerfert 1054624d34afSJohannes Doerfert RFI.foreachUse(SCC, DeleteCallCB); 1055e565db49SJohannes Doerfert 1056e565db49SJohannes Doerfert return Changed; 1057e565db49SJohannes Doerfert } 1058e565db49SJohannes Doerfert 1059b726c557SJohannes Doerfert /// Try to eliminate runtime calls by reusing existing ones. 10609548b74aSJohannes Doerfert bool deduplicateRuntimeCalls() { 10619548b74aSJohannes Doerfert bool Changed = false; 10629548b74aSJohannes Doerfert 1063e28936f6SJohannes Doerfert RuntimeFunction DeduplicableRuntimeCallIDs[] = { 1064e28936f6SJohannes Doerfert OMPRTL_omp_get_num_threads, 1065e28936f6SJohannes Doerfert OMPRTL_omp_in_parallel, 1066e28936f6SJohannes Doerfert OMPRTL_omp_get_cancellation, 1067e28936f6SJohannes Doerfert OMPRTL_omp_get_thread_limit, 1068e28936f6SJohannes Doerfert OMPRTL_omp_get_supported_active_levels, 1069e28936f6SJohannes Doerfert OMPRTL_omp_get_level, 1070e28936f6SJohannes Doerfert OMPRTL_omp_get_ancestor_thread_num, 1071e28936f6SJohannes Doerfert OMPRTL_omp_get_team_size, 1072e28936f6SJohannes Doerfert OMPRTL_omp_get_active_level, 1073e28936f6SJohannes Doerfert OMPRTL_omp_in_final, 1074e28936f6SJohannes Doerfert OMPRTL_omp_get_proc_bind, 1075e28936f6SJohannes Doerfert OMPRTL_omp_get_num_places, 1076e28936f6SJohannes Doerfert OMPRTL_omp_get_num_procs, 1077e28936f6SJohannes Doerfert OMPRTL_omp_get_place_num, 1078e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_num_places, 1079e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_place_nums}; 1080e28936f6SJohannes Doerfert 1081bc93c2d7SMarek Kurdej // Global-tid is handled separately. 10829548b74aSJohannes Doerfert SmallSetVector<Value *, 16> GTIdArgs; 10839548b74aSJohannes Doerfert collectGlobalThreadIdArguments(GTIdArgs); 10849548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 10859548b74aSJohannes Doerfert << " global thread ID arguments\n"); 10869548b74aSJohannes Doerfert 10879548b74aSJohannes Doerfert for (Function *F : SCC) { 1088e28936f6SJohannes Doerfert for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 10894e29d256Sserge-sans-paille Changed |= deduplicateRuntimeCalls( 10904e29d256Sserge-sans-paille *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 1091e28936f6SJohannes Doerfert 1092e28936f6SJohannes Doerfert // __kmpc_global_thread_num is special as we can replace it with an 1093e28936f6SJohannes Doerfert // argument in enough cases to make it worth trying. 10949548b74aSJohannes Doerfert Value *GTIdArg = nullptr; 10959548b74aSJohannes Doerfert for (Argument &Arg : F->args()) 10969548b74aSJohannes Doerfert if (GTIdArgs.count(&Arg)) { 10979548b74aSJohannes Doerfert GTIdArg = &Arg; 10989548b74aSJohannes Doerfert break; 10999548b74aSJohannes Doerfert } 11009548b74aSJohannes Doerfert Changed |= deduplicateRuntimeCalls( 11017cfd267cSsstefan1 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 11029548b74aSJohannes Doerfert } 11039548b74aSJohannes Doerfert 11049548b74aSJohannes Doerfert return Changed; 11059548b74aSJohannes Doerfert } 11069548b74aSJohannes Doerfert 1107496f8e5bSHamilton Tobon Mosquera /// Tries to hide the latency of runtime calls that involve host to 1108496f8e5bSHamilton Tobon Mosquera /// device memory transfers by splitting them into their "issue" and "wait" 1109496f8e5bSHamilton Tobon Mosquera /// versions. The "issue" is moved upwards as much as possible. The "wait" is 1110496f8e5bSHamilton Tobon Mosquera /// moved downards as much as possible. The "issue" issues the memory transfer 1111496f8e5bSHamilton Tobon Mosquera /// asynchronously, returning a handle. The "wait" waits in the returned 1112496f8e5bSHamilton Tobon Mosquera /// handle for the memory transfer to finish. 1113496f8e5bSHamilton Tobon Mosquera bool hideMemTransfersLatency() { 1114496f8e5bSHamilton Tobon Mosquera auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 1115496f8e5bSHamilton Tobon Mosquera bool Changed = false; 1116496f8e5bSHamilton Tobon Mosquera auto SplitMemTransfers = [&](Use &U, Function &Decl) { 1117496f8e5bSHamilton Tobon Mosquera auto *RTCall = getCallIfRegularCall(U, &RFI); 1118496f8e5bSHamilton Tobon Mosquera if (!RTCall) 1119496f8e5bSHamilton Tobon Mosquera return false; 1120496f8e5bSHamilton Tobon Mosquera 11218931add6SHamilton Tobon Mosquera OffloadArray OffloadArrays[3]; 11228931add6SHamilton Tobon Mosquera if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) 11238931add6SHamilton Tobon Mosquera return false; 11248931add6SHamilton Tobon Mosquera 11258931add6SHamilton Tobon Mosquera LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays)); 11268931add6SHamilton Tobon Mosquera 1127bd2fa181SHamilton Tobon Mosquera // TODO: Check if can be moved upwards. 1128bd2fa181SHamilton Tobon Mosquera bool WasSplit = false; 1129bd2fa181SHamilton Tobon Mosquera Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 1130bd2fa181SHamilton Tobon Mosquera if (WaitMovementPoint) 1131bd2fa181SHamilton Tobon Mosquera WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 1132bd2fa181SHamilton Tobon Mosquera 1133496f8e5bSHamilton Tobon Mosquera Changed |= WasSplit; 1134496f8e5bSHamilton Tobon Mosquera return WasSplit; 1135496f8e5bSHamilton Tobon Mosquera }; 1136496f8e5bSHamilton Tobon Mosquera RFI.foreachUse(SCC, SplitMemTransfers); 1137496f8e5bSHamilton Tobon Mosquera 1138496f8e5bSHamilton Tobon Mosquera return Changed; 1139496f8e5bSHamilton Tobon Mosquera } 1140496f8e5bSHamilton Tobon Mosquera 1141a2281419SJoseph Huber void analysisGlobalization() { 11426fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 114382453e75SJoseph Huber 114482453e75SJoseph Huber auto CheckGlobalization = [&](Use &U, Function &Decl) { 1145a2281419SJoseph Huber if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { 1146a2281419SJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 1147a2281419SJoseph Huber return ORA 1148a2281419SJoseph Huber << "Found thread data sharing on the GPU. " 1149a2281419SJoseph Huber << "Expect degraded performance due to data globalization."; 1150a2281419SJoseph Huber }; 1151a2281419SJoseph Huber emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization", 1152a2281419SJoseph Huber Remark); 1153a2281419SJoseph Huber } 1154a2281419SJoseph Huber 1155a2281419SJoseph Huber return false; 1156a2281419SJoseph Huber }; 1157a2281419SJoseph Huber 115882453e75SJoseph Huber RFI.foreachUse(SCC, CheckGlobalization); 115982453e75SJoseph Huber } 1160a2281419SJoseph Huber 11618931add6SHamilton Tobon Mosquera /// Maps the values stored in the offload arrays passed as arguments to 11628931add6SHamilton Tobon Mosquera /// \p RuntimeCall into the offload arrays in \p OAs. 11638931add6SHamilton Tobon Mosquera bool getValuesInOffloadArrays(CallInst &RuntimeCall, 11648931add6SHamilton Tobon Mosquera MutableArrayRef<OffloadArray> OAs) { 11658931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "Need space for three offload arrays!"); 11668931add6SHamilton Tobon Mosquera 11678931add6SHamilton Tobon Mosquera // A runtime call that involves memory offloading looks something like: 11688931add6SHamilton Tobon Mosquera // call void @__tgt_target_data_begin_mapper(arg0, arg1, 11698931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, 11708931add6SHamilton Tobon Mosquera // ...) 11718931add6SHamilton Tobon Mosquera // So, the idea is to access the allocas that allocate space for these 11728931add6SHamilton Tobon Mosquera // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. 11738931add6SHamilton Tobon Mosquera // Therefore: 11748931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs. 11751d3d9b9cSHamilton Tobon Mosquera Value *BasePtrsArg = 11761d3d9b9cSHamilton Tobon Mosquera RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); 11778931add6SHamilton Tobon Mosquera // i8** %offload_ptrs. 11781d3d9b9cSHamilton Tobon Mosquera Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); 11798931add6SHamilton Tobon Mosquera // i8** %offload_sizes. 11801d3d9b9cSHamilton Tobon Mosquera Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); 11818931add6SHamilton Tobon Mosquera 11828931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs. 11838931add6SHamilton Tobon Mosquera auto *V = getUnderlyingObject(BasePtrsArg); 11848931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 11858931add6SHamilton Tobon Mosquera return false; 11868931add6SHamilton Tobon Mosquera auto *BasePtrsArray = cast<AllocaInst>(V); 11878931add6SHamilton Tobon Mosquera if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) 11888931add6SHamilton Tobon Mosquera return false; 11898931add6SHamilton Tobon Mosquera 11908931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs. 11918931add6SHamilton Tobon Mosquera V = getUnderlyingObject(PtrsArg); 11928931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 11938931add6SHamilton Tobon Mosquera return false; 11948931add6SHamilton Tobon Mosquera auto *PtrsArray = cast<AllocaInst>(V); 11958931add6SHamilton Tobon Mosquera if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) 11968931add6SHamilton Tobon Mosquera return false; 11978931add6SHamilton Tobon Mosquera 11988931add6SHamilton Tobon Mosquera // Get values stored in **offload_sizes. 11998931add6SHamilton Tobon Mosquera V = getUnderlyingObject(SizesArg); 12008931add6SHamilton Tobon Mosquera // If it's a [constant] global array don't analyze it. 12018931add6SHamilton Tobon Mosquera if (isa<GlobalValue>(V)) 12028931add6SHamilton Tobon Mosquera return isa<Constant>(V); 12038931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 12048931add6SHamilton Tobon Mosquera return false; 12058931add6SHamilton Tobon Mosquera 12068931add6SHamilton Tobon Mosquera auto *SizesArray = cast<AllocaInst>(V); 12078931add6SHamilton Tobon Mosquera if (!OAs[2].initialize(*SizesArray, RuntimeCall)) 12088931add6SHamilton Tobon Mosquera return false; 12098931add6SHamilton Tobon Mosquera 12108931add6SHamilton Tobon Mosquera return true; 12118931add6SHamilton Tobon Mosquera } 12128931add6SHamilton Tobon Mosquera 12138931add6SHamilton Tobon Mosquera /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. 12148931add6SHamilton Tobon Mosquera /// For now this is a way to test that the function getValuesInOffloadArrays 12158931add6SHamilton Tobon Mosquera /// is working properly. 12168931add6SHamilton Tobon Mosquera /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. 12178931add6SHamilton Tobon Mosquera void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { 12188931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "There are three offload arrays to debug!"); 12198931add6SHamilton Tobon Mosquera 12208931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n"); 12218931add6SHamilton Tobon Mosquera std::string ValuesStr; 12228931add6SHamilton Tobon Mosquera raw_string_ostream Printer(ValuesStr); 12238931add6SHamilton Tobon Mosquera std::string Separator = " --- "; 12248931add6SHamilton Tobon Mosquera 12258931add6SHamilton Tobon Mosquera for (auto *BP : OAs[0].StoredValues) { 12268931add6SHamilton Tobon Mosquera BP->print(Printer); 12278931add6SHamilton Tobon Mosquera Printer << Separator; 12288931add6SHamilton Tobon Mosquera } 12298931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"); 12308931add6SHamilton Tobon Mosquera ValuesStr.clear(); 12318931add6SHamilton Tobon Mosquera 12328931add6SHamilton Tobon Mosquera for (auto *P : OAs[1].StoredValues) { 12338931add6SHamilton Tobon Mosquera P->print(Printer); 12348931add6SHamilton Tobon Mosquera Printer << Separator; 12358931add6SHamilton Tobon Mosquera } 12368931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"); 12378931add6SHamilton Tobon Mosquera ValuesStr.clear(); 12388931add6SHamilton Tobon Mosquera 12398931add6SHamilton Tobon Mosquera for (auto *S : OAs[2].StoredValues) { 12408931add6SHamilton Tobon Mosquera S->print(Printer); 12418931add6SHamilton Tobon Mosquera Printer << Separator; 12428931add6SHamilton Tobon Mosquera } 12438931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"); 12448931add6SHamilton Tobon Mosquera } 12458931add6SHamilton Tobon Mosquera 1246bd2fa181SHamilton Tobon Mosquera /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 1247bd2fa181SHamilton Tobon Mosquera /// moved. Returns nullptr if the movement is not possible, or not worth it. 1248bd2fa181SHamilton Tobon Mosquera Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 1249bd2fa181SHamilton Tobon Mosquera // FIXME: This traverses only the BasicBlock where RuntimeCall is. 1250bd2fa181SHamilton Tobon Mosquera // Make it traverse the CFG. 1251bd2fa181SHamilton Tobon Mosquera 1252bd2fa181SHamilton Tobon Mosquera Instruction *CurrentI = &RuntimeCall; 1253bd2fa181SHamilton Tobon Mosquera bool IsWorthIt = false; 1254bd2fa181SHamilton Tobon Mosquera while ((CurrentI = CurrentI->getNextNode())) { 1255bd2fa181SHamilton Tobon Mosquera 1256bd2fa181SHamilton Tobon Mosquera // TODO: Once we detect the regions to be offloaded we should use the 1257bd2fa181SHamilton Tobon Mosquera // alias analysis manager to check if CurrentI may modify one of 1258bd2fa181SHamilton Tobon Mosquera // the offloaded regions. 1259bd2fa181SHamilton Tobon Mosquera if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 1260bd2fa181SHamilton Tobon Mosquera if (IsWorthIt) 1261bd2fa181SHamilton Tobon Mosquera return CurrentI; 1262bd2fa181SHamilton Tobon Mosquera 1263bd2fa181SHamilton Tobon Mosquera return nullptr; 1264bd2fa181SHamilton Tobon Mosquera } 1265bd2fa181SHamilton Tobon Mosquera 1266bd2fa181SHamilton Tobon Mosquera // FIXME: For now if we move it over anything without side effect 1267bd2fa181SHamilton Tobon Mosquera // is worth it. 1268bd2fa181SHamilton Tobon Mosquera IsWorthIt = true; 1269bd2fa181SHamilton Tobon Mosquera } 1270bd2fa181SHamilton Tobon Mosquera 1271bd2fa181SHamilton Tobon Mosquera // Return end of BasicBlock. 1272bd2fa181SHamilton Tobon Mosquera return RuntimeCall.getParent()->getTerminator(); 1273bd2fa181SHamilton Tobon Mosquera } 1274bd2fa181SHamilton Tobon Mosquera 1275496f8e5bSHamilton Tobon Mosquera /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 1276bd2fa181SHamilton Tobon Mosquera bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 1277bd2fa181SHamilton Tobon Mosquera Instruction &WaitMovementPoint) { 1278bd31abc1SHamilton Tobon Mosquera // Create stack allocated handle (__tgt_async_info) at the beginning of the 1279bd31abc1SHamilton Tobon Mosquera // function. Used for storing information of the async transfer, allowing to 1280bd31abc1SHamilton Tobon Mosquera // wait on it later. 1281496f8e5bSHamilton Tobon Mosquera auto &IRBuilder = OMPInfoCache.OMPBuilder; 1282bd31abc1SHamilton Tobon Mosquera auto *F = RuntimeCall.getCaller(); 1283bd31abc1SHamilton Tobon Mosquera Instruction *FirstInst = &(F->getEntryBlock().front()); 1284bd31abc1SHamilton Tobon Mosquera AllocaInst *Handle = new AllocaInst( 1285bd31abc1SHamilton Tobon Mosquera IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); 1286bd31abc1SHamilton Tobon Mosquera 1287496f8e5bSHamilton Tobon Mosquera // Add "issue" runtime call declaration: 1288496f8e5bSHamilton Tobon Mosquera // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 1289496f8e5bSHamilton Tobon Mosquera // i8**, i8**, i64*, i64*) 1290496f8e5bSHamilton Tobon Mosquera FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 1291496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_issue); 1292496f8e5bSHamilton Tobon Mosquera 1293496f8e5bSHamilton Tobon Mosquera // Change RuntimeCall call site for its asynchronous version. 129497e55cfeSJoseph Huber SmallVector<Value *, 16> Args; 1295bd2fa181SHamilton Tobon Mosquera for (auto &Arg : RuntimeCall.args()) 1296496f8e5bSHamilton Tobon Mosquera Args.push_back(Arg.get()); 1297bd31abc1SHamilton Tobon Mosquera Args.push_back(Handle); 1298496f8e5bSHamilton Tobon Mosquera 1299496f8e5bSHamilton Tobon Mosquera CallInst *IssueCallsite = 1300bd31abc1SHamilton Tobon Mosquera CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); 1301bd2fa181SHamilton Tobon Mosquera RuntimeCall.eraseFromParent(); 1302496f8e5bSHamilton Tobon Mosquera 1303496f8e5bSHamilton Tobon Mosquera // Add "wait" runtime call declaration: 1304496f8e5bSHamilton Tobon Mosquera // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 1305496f8e5bSHamilton Tobon Mosquera FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 1306496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_wait); 1307496f8e5bSHamilton Tobon Mosquera 1308496f8e5bSHamilton Tobon Mosquera Value *WaitParams[2] = { 1309da8bec47SJoseph Huber IssueCallsite->getArgOperand( 1310da8bec47SJoseph Huber OffloadArray::DeviceIDArgNum), // device_id. 1311bd31abc1SHamilton Tobon Mosquera Handle // handle to wait on. 1312496f8e5bSHamilton Tobon Mosquera }; 1313bd2fa181SHamilton Tobon Mosquera CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); 1314496f8e5bSHamilton Tobon Mosquera 1315496f8e5bSHamilton Tobon Mosquera return true; 1316496f8e5bSHamilton Tobon Mosquera } 1317496f8e5bSHamilton Tobon Mosquera 1318dc3b5b00SJohannes Doerfert static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 1319dc3b5b00SJohannes Doerfert bool GlobalOnly, bool &SingleChoice) { 1320dc3b5b00SJohannes Doerfert if (CurrentIdent == NextIdent) 1321dc3b5b00SJohannes Doerfert return CurrentIdent; 1322dc3b5b00SJohannes Doerfert 1323396b7253SJohannes Doerfert // TODO: Figure out how to actually combine multiple debug locations. For 1324dc3b5b00SJohannes Doerfert // now we just keep an existing one if there is a single choice. 1325dc3b5b00SJohannes Doerfert if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 1326dc3b5b00SJohannes Doerfert SingleChoice = !CurrentIdent; 1327dc3b5b00SJohannes Doerfert return NextIdent; 1328dc3b5b00SJohannes Doerfert } 1329396b7253SJohannes Doerfert return nullptr; 1330396b7253SJohannes Doerfert } 1331396b7253SJohannes Doerfert 1332396b7253SJohannes Doerfert /// Return an `struct ident_t*` value that represents the ones used in the 1333396b7253SJohannes Doerfert /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 1334396b7253SJohannes Doerfert /// return a local `struct ident_t*`. For now, if we cannot find a suitable 1335396b7253SJohannes Doerfert /// return value we create one from scratch. We also do not yet combine 1336396b7253SJohannes Doerfert /// information, e.g., the source locations, see combinedIdentStruct. 13377cfd267cSsstefan1 Value * 13387cfd267cSsstefan1 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 13397cfd267cSsstefan1 Function &F, bool GlobalOnly) { 1340dc3b5b00SJohannes Doerfert bool SingleChoice = true; 1341396b7253SJohannes Doerfert Value *Ident = nullptr; 1342396b7253SJohannes Doerfert auto CombineIdentStruct = [&](Use &U, Function &Caller) { 1343396b7253SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 1344396b7253SJohannes Doerfert if (!CI || &F != &Caller) 1345396b7253SJohannes Doerfert return false; 1346396b7253SJohannes Doerfert Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 1347dc3b5b00SJohannes Doerfert /* GlobalOnly */ true, SingleChoice); 1348396b7253SJohannes Doerfert return false; 1349396b7253SJohannes Doerfert }; 1350624d34afSJohannes Doerfert RFI.foreachUse(SCC, CombineIdentStruct); 1351396b7253SJohannes Doerfert 1352dc3b5b00SJohannes Doerfert if (!Ident || !SingleChoice) { 1353396b7253SJohannes Doerfert // The IRBuilder uses the insertion block to get to the module, this is 1354396b7253SJohannes Doerfert // unfortunate but we work around it for now. 13557cfd267cSsstefan1 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 13567cfd267cSsstefan1 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 1357396b7253SJohannes Doerfert &F.getEntryBlock(), F.getEntryBlock().begin())); 1358396b7253SJohannes Doerfert // Create a fallback location if non was found. 1359396b7253SJohannes Doerfert // TODO: Use the debug locations of the calls instead. 13607cfd267cSsstefan1 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 13617cfd267cSsstefan1 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 1362396b7253SJohannes Doerfert } 1363396b7253SJohannes Doerfert return Ident; 1364396b7253SJohannes Doerfert } 1365396b7253SJohannes Doerfert 1366b726c557SJohannes Doerfert /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 13679548b74aSJohannes Doerfert /// \p ReplVal if given. 13687cfd267cSsstefan1 bool deduplicateRuntimeCalls(Function &F, 13697cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI, 13709548b74aSJohannes Doerfert Value *ReplVal = nullptr) { 13718855fec3SJohannes Doerfert auto *UV = RFI.getUseVector(F); 13728855fec3SJohannes Doerfert if (!UV || UV->size() + (ReplVal != nullptr) < 2) 1373b1fbf438SRoman Lebedev return false; 1374b1fbf438SRoman Lebedev 13757cfd267cSsstefan1 LLVM_DEBUG( 13767cfd267cSsstefan1 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 13777cfd267cSsstefan1 << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 13787cfd267cSsstefan1 1379ab3da5ddSMichael Liao assert((!ReplVal || (isa<Argument>(ReplVal) && 1380ab3da5ddSMichael Liao cast<Argument>(ReplVal)->getParent() == &F)) && 13819548b74aSJohannes Doerfert "Unexpected replacement value!"); 1382396b7253SJohannes Doerfert 1383396b7253SJohannes Doerfert // TODO: Use dominance to find a good position instead. 13846aab27baSsstefan1 auto CanBeMoved = [this](CallBase &CB) { 1385396b7253SJohannes Doerfert unsigned NumArgs = CB.getNumArgOperands(); 1386396b7253SJohannes Doerfert if (NumArgs == 0) 1387396b7253SJohannes Doerfert return true; 13886aab27baSsstefan1 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 1389396b7253SJohannes Doerfert return false; 1390396b7253SJohannes Doerfert for (unsigned u = 1; u < NumArgs; ++u) 1391396b7253SJohannes Doerfert if (isa<Instruction>(CB.getArgOperand(u))) 1392396b7253SJohannes Doerfert return false; 1393396b7253SJohannes Doerfert return true; 1394396b7253SJohannes Doerfert }; 1395396b7253SJohannes Doerfert 13969548b74aSJohannes Doerfert if (!ReplVal) { 13978855fec3SJohannes Doerfert for (Use *U : *UV) 13989548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 1399396b7253SJohannes Doerfert if (!CanBeMoved(*CI)) 1400396b7253SJohannes Doerfert continue; 14014d4ea9acSHuber, Joseph 14024d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 14034d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 14042db182ffSJoseph Huber << ore::NV("OpenMPOptRuntime", RFI.Name) 14052db182ffSJoseph Huber << " moved to beginning of OpenMP region"; 14064d4ea9acSHuber, Joseph }; 14072db182ffSJoseph Huber emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark); 14084d4ea9acSHuber, Joseph 14099548b74aSJohannes Doerfert CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 14109548b74aSJohannes Doerfert ReplVal = CI; 14119548b74aSJohannes Doerfert break; 14129548b74aSJohannes Doerfert } 14139548b74aSJohannes Doerfert if (!ReplVal) 14149548b74aSJohannes Doerfert return false; 14159548b74aSJohannes Doerfert } 14169548b74aSJohannes Doerfert 1417396b7253SJohannes Doerfert // If we use a call as a replacement value we need to make sure the ident is 1418396b7253SJohannes Doerfert // valid at the new location. For now we just pick a global one, either 1419396b7253SJohannes Doerfert // existing and used by one of the calls, or created from scratch. 1420396b7253SJohannes Doerfert if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 1421396b7253SJohannes Doerfert if (CI->getNumArgOperands() > 0 && 14226aab27baSsstefan1 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 1423396b7253SJohannes Doerfert Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 1424396b7253SJohannes Doerfert /* GlobalOnly */ true); 1425396b7253SJohannes Doerfert CI->setArgOperand(0, Ident); 1426396b7253SJohannes Doerfert } 1427396b7253SJohannes Doerfert } 1428396b7253SJohannes Doerfert 14299548b74aSJohannes Doerfert bool Changed = false; 14309548b74aSJohannes Doerfert auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 14319548b74aSJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 14329548b74aSJohannes Doerfert if (!CI || CI == ReplVal || &F != &Caller) 14339548b74aSJohannes Doerfert return false; 14349548b74aSJohannes Doerfert assert(CI->getCaller() == &F && "Unexpected call!"); 14354d4ea9acSHuber, Joseph 14364d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 14374d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 14384d4ea9acSHuber, Joseph << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 14394d4ea9acSHuber, Joseph }; 14402db182ffSJoseph Huber emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark); 14414d4ea9acSHuber, Joseph 14429548b74aSJohannes Doerfert CGUpdater.removeCallSite(*CI); 14439548b74aSJohannes Doerfert CI->replaceAllUsesWith(ReplVal); 14449548b74aSJohannes Doerfert CI->eraseFromParent(); 14459548b74aSJohannes Doerfert ++NumOpenMPRuntimeCallsDeduplicated; 14469548b74aSJohannes Doerfert Changed = true; 14479548b74aSJohannes Doerfert return true; 14489548b74aSJohannes Doerfert }; 1449624d34afSJohannes Doerfert RFI.foreachUse(SCC, ReplaceAndDeleteCB); 14509548b74aSJohannes Doerfert 14519548b74aSJohannes Doerfert return Changed; 14529548b74aSJohannes Doerfert } 14539548b74aSJohannes Doerfert 14549548b74aSJohannes Doerfert /// Collect arguments that represent the global thread id in \p GTIdArgs. 14559548b74aSJohannes Doerfert void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 14569548b74aSJohannes Doerfert // TODO: Below we basically perform a fixpoint iteration with a pessimistic 14579548b74aSJohannes Doerfert // initialization. We could define an AbstractAttribute instead and 14589548b74aSJohannes Doerfert // run the Attributor here once it can be run as an SCC pass. 14599548b74aSJohannes Doerfert 14609548b74aSJohannes Doerfert // Helper to check the argument \p ArgNo at all call sites of \p F for 14619548b74aSJohannes Doerfert // a GTId. 14629548b74aSJohannes Doerfert auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 14639548b74aSJohannes Doerfert if (!F.hasLocalLinkage()) 14649548b74aSJohannes Doerfert return false; 14659548b74aSJohannes Doerfert for (Use &U : F.uses()) { 14669548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U)) { 14679548b74aSJohannes Doerfert Value *ArgOp = CI->getArgOperand(ArgNo); 14689548b74aSJohannes Doerfert if (CI == &RefCI || GTIdArgs.count(ArgOp) || 14697cfd267cSsstefan1 getCallIfRegularCall( 14707cfd267cSsstefan1 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 14719548b74aSJohannes Doerfert continue; 14729548b74aSJohannes Doerfert } 14739548b74aSJohannes Doerfert return false; 14749548b74aSJohannes Doerfert } 14759548b74aSJohannes Doerfert return true; 14769548b74aSJohannes Doerfert }; 14779548b74aSJohannes Doerfert 14789548b74aSJohannes Doerfert // Helper to identify uses of a GTId as GTId arguments. 14799548b74aSJohannes Doerfert auto AddUserArgs = [&](Value >Id) { 14809548b74aSJohannes Doerfert for (Use &U : GTId.uses()) 14819548b74aSJohannes Doerfert if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 14829548b74aSJohannes Doerfert if (CI->isArgOperand(&U)) 14839548b74aSJohannes Doerfert if (Function *Callee = CI->getCalledFunction()) 14849548b74aSJohannes Doerfert if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 14859548b74aSJohannes Doerfert GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 14869548b74aSJohannes Doerfert }; 14879548b74aSJohannes Doerfert 14889548b74aSJohannes Doerfert // The argument users of __kmpc_global_thread_num calls are GTIds. 14897cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 14907cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 14917cfd267cSsstefan1 1492624d34afSJohannes Doerfert GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 14938855fec3SJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 14949548b74aSJohannes Doerfert AddUserArgs(*CI); 14958855fec3SJohannes Doerfert return false; 14968855fec3SJohannes Doerfert }); 14979548b74aSJohannes Doerfert 14989548b74aSJohannes Doerfert // Transitively search for more arguments by looking at the users of the 14999548b74aSJohannes Doerfert // ones we know already. During the search the GTIdArgs vector is extended 15009548b74aSJohannes Doerfert // so we cannot cache the size nor can we use a range based for. 15019548b74aSJohannes Doerfert for (unsigned u = 0; u < GTIdArgs.size(); ++u) 15029548b74aSJohannes Doerfert AddUserArgs(*GTIdArgs[u]); 15039548b74aSJohannes Doerfert } 15049548b74aSJohannes Doerfert 15055b0581aeSJohannes Doerfert /// Kernel (=GPU) optimizations and utility functions 15065b0581aeSJohannes Doerfert /// 15075b0581aeSJohannes Doerfert ///{{ 15085b0581aeSJohannes Doerfert 15095b0581aeSJohannes Doerfert /// Check if \p F is a kernel, hence entry point for target offloading. 15105b0581aeSJohannes Doerfert bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } 15115b0581aeSJohannes Doerfert 15125b0581aeSJohannes Doerfert /// Cache to remember the unique kernel for a function. 15135b0581aeSJohannes Doerfert DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; 15145b0581aeSJohannes Doerfert 15155b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p F, if any. 15165b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Function &F); 15175b0581aeSJohannes Doerfert 15185b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p I, if any. 15195b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Instruction &I) { 15205b0581aeSJohannes Doerfert return getUniqueKernelFor(*I.getFunction()); 15215b0581aeSJohannes Doerfert } 15225b0581aeSJohannes Doerfert 15235b0581aeSJohannes Doerfert /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 15245b0581aeSJohannes Doerfert /// the cases we can avoid taking the address of a function. 15255b0581aeSJohannes Doerfert bool rewriteDeviceCodeStateMachine(); 15265b0581aeSJohannes Doerfert 15275b0581aeSJohannes Doerfert /// 15285b0581aeSJohannes Doerfert ///}} 15295b0581aeSJohannes Doerfert 15304d4ea9acSHuber, Joseph /// Emit a remark generically 15314d4ea9acSHuber, Joseph /// 15324d4ea9acSHuber, Joseph /// This template function can be used to generically emit a remark. The 15334d4ea9acSHuber, Joseph /// RemarkKind should be one of the following: 15344d4ea9acSHuber, Joseph /// - OptimizationRemark to indicate a successful optimization attempt 15354d4ea9acSHuber, Joseph /// - OptimizationRemarkMissed to report a failed optimization attempt 15364d4ea9acSHuber, Joseph /// - OptimizationRemarkAnalysis to provide additional information about an 15374d4ea9acSHuber, Joseph /// optimization attempt 15384d4ea9acSHuber, Joseph /// 15394d4ea9acSHuber, Joseph /// The remark is built using a callback function provided by the caller that 15404d4ea9acSHuber, Joseph /// takes a RemarkKind as input and returns a RemarkKind. 15412db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack> 15422db182ffSJoseph Huber void emitRemark(Instruction *I, StringRef RemarkName, 1543e8039ad4SJohannes Doerfert RemarkCallBack &&RemarkCB) const { 15442db182ffSJoseph Huber Function *F = I->getParent()->getParent(); 15454d4ea9acSHuber, Joseph auto &ORE = OREGetter(F); 15464d4ea9acSHuber, Joseph 15472db182ffSJoseph Huber ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); 15484d4ea9acSHuber, Joseph } 15494d4ea9acSHuber, Joseph 15502db182ffSJoseph Huber /// Emit a remark on a function. 15512db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack> 15522db182ffSJoseph Huber void emitRemark(Function *F, StringRef RemarkName, 15532db182ffSJoseph Huber RemarkCallBack &&RemarkCB) const { 15540f426935Ssstefan1 auto &ORE = OREGetter(F); 15550f426935Ssstefan1 15562db182ffSJoseph Huber ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); 15570f426935Ssstefan1 } 15580f426935Ssstefan1 1559b726c557SJohannes Doerfert /// The underlying module. 15609548b74aSJohannes Doerfert Module &M; 15619548b74aSJohannes Doerfert 15629548b74aSJohannes Doerfert /// The SCC we are operating on. 1563ee17263aSJohannes Doerfert SmallVectorImpl<Function *> &SCC; 15649548b74aSJohannes Doerfert 15659548b74aSJohannes Doerfert /// Callback to update the call graph, the first argument is a removed call, 15669548b74aSJohannes Doerfert /// the second an optional replacement call. 15679548b74aSJohannes Doerfert CallGraphUpdater &CGUpdater; 15689548b74aSJohannes Doerfert 15694d4ea9acSHuber, Joseph /// Callback to get an OptimizationRemarkEmitter from a Function * 15704d4ea9acSHuber, Joseph OptimizationRemarkGetter OREGetter; 15714d4ea9acSHuber, Joseph 15727cfd267cSsstefan1 /// OpenMP-specific information cache. Also Used for Attributor runs. 15737cfd267cSsstefan1 OMPInformationCache &OMPInfoCache; 1574b8235d2bSsstefan1 1575b8235d2bSsstefan1 /// Attributor instance. 1576b8235d2bSsstefan1 Attributor &A; 1577b8235d2bSsstefan1 1578b8235d2bSsstefan1 /// Helper function to run Attributor on SCC. 1579b8235d2bSsstefan1 bool runAttributor() { 1580b8235d2bSsstefan1 if (SCC.empty()) 1581b8235d2bSsstefan1 return false; 1582b8235d2bSsstefan1 1583b8235d2bSsstefan1 registerAAs(); 1584b8235d2bSsstefan1 1585b8235d2bSsstefan1 ChangeStatus Changed = A.run(); 1586b8235d2bSsstefan1 1587b8235d2bSsstefan1 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 1588b8235d2bSsstefan1 << " functions, result: " << Changed << ".\n"); 1589b8235d2bSsstefan1 1590b8235d2bSsstefan1 return Changed == ChangeStatus::CHANGED; 1591b8235d2bSsstefan1 } 1592b8235d2bSsstefan1 1593b8235d2bSsstefan1 /// Populate the Attributor with abstract attribute opportunities in the 1594b8235d2bSsstefan1 /// function. 1595b8235d2bSsstefan1 void registerAAs() { 15965dfd7cc4Ssstefan1 if (SCC.empty()) 15975dfd7cc4Ssstefan1 return; 1598b8235d2bSsstefan1 15995dfd7cc4Ssstefan1 // Create CallSite AA for all Getters. 16005dfd7cc4Ssstefan1 for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 16015dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 16025dfd7cc4Ssstefan1 16035dfd7cc4Ssstefan1 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 16045dfd7cc4Ssstefan1 16055dfd7cc4Ssstefan1 auto CreateAA = [&](Use &U, Function &Caller) { 16065dfd7cc4Ssstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 16075dfd7cc4Ssstefan1 if (!CI) 16085dfd7cc4Ssstefan1 return false; 16095dfd7cc4Ssstefan1 16105dfd7cc4Ssstefan1 auto &CB = cast<CallBase>(*CI); 16115dfd7cc4Ssstefan1 16125dfd7cc4Ssstefan1 IRPosition CBPos = IRPosition::callsite_function(CB); 16135dfd7cc4Ssstefan1 A.getOrCreateAAFor<AAICVTracker>(CBPos); 16145dfd7cc4Ssstefan1 return false; 16155dfd7cc4Ssstefan1 }; 16165dfd7cc4Ssstefan1 16175dfd7cc4Ssstefan1 GetterRFI.foreachUse(SCC, CreateAA); 1618b8235d2bSsstefan1 } 16196fc51c9fSJoseph Huber auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 16206fc51c9fSJoseph Huber auto CreateAA = [&](Use &U, Function &F) { 16216fc51c9fSJoseph Huber A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); 16226fc51c9fSJoseph Huber return false; 16236fc51c9fSJoseph Huber }; 16246fc51c9fSJoseph Huber GlobalizationRFI.foreachUse(SCC, CreateAA); 162518283125SJoseph Huber 1626*7d69da71SJoseph Huber // Create an ExecutionDomain AA for every function and a HeapToStack AA for 1627*7d69da71SJoseph Huber // every function if there is a device kernel. 162803d7e61cSJoseph Huber for (auto *F : SCC) { 162903d7e61cSJoseph Huber if (!F->isDeclaration()) 163003d7e61cSJoseph Huber A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F)); 1631*7d69da71SJoseph Huber if (!OMPInfoCache.Kernels.empty()) 1632*7d69da71SJoseph Huber A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F)); 163318283125SJoseph Huber } 1634b8235d2bSsstefan1 } 1635b8235d2bSsstefan1 }; 1636b8235d2bSsstefan1 16375b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 16385b0581aeSJohannes Doerfert if (!OMPInfoCache.ModuleSlice.count(&F)) 16395b0581aeSJohannes Doerfert return nullptr; 16405b0581aeSJohannes Doerfert 16415b0581aeSJohannes Doerfert // Use a scope to keep the lifetime of the CachedKernel short. 16425b0581aeSJohannes Doerfert { 16435b0581aeSJohannes Doerfert Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 16445b0581aeSJohannes Doerfert if (CachedKernel) 16455b0581aeSJohannes Doerfert return *CachedKernel; 16465b0581aeSJohannes Doerfert 16475b0581aeSJohannes Doerfert // TODO: We should use an AA to create an (optimistic and callback 16485b0581aeSJohannes Doerfert // call-aware) call graph. For now we stick to simple patterns that 16495b0581aeSJohannes Doerfert // are less powerful, basically the worst fixpoint. 16505b0581aeSJohannes Doerfert if (isKernel(F)) { 16515b0581aeSJohannes Doerfert CachedKernel = Kernel(&F); 16525b0581aeSJohannes Doerfert return *CachedKernel; 16535b0581aeSJohannes Doerfert } 16545b0581aeSJohannes Doerfert 16555b0581aeSJohannes Doerfert CachedKernel = nullptr; 1656994bb6ebSJohannes Doerfert if (!F.hasLocalLinkage()) { 1657994bb6ebSJohannes Doerfert 1658994bb6ebSJohannes Doerfert // See https://openmp.llvm.org/remarks/OptimizationRemarks.html 16592db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 16602db182ffSJoseph Huber return ORA 16612db182ffSJoseph Huber << "[OMP100] Potentially unknown OpenMP target region caller"; 1662994bb6ebSJohannes Doerfert }; 16632db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); 1664994bb6ebSJohannes Doerfert 16655b0581aeSJohannes Doerfert return nullptr; 16665b0581aeSJohannes Doerfert } 1667994bb6ebSJohannes Doerfert } 16685b0581aeSJohannes Doerfert 16695b0581aeSJohannes Doerfert auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 16705b0581aeSJohannes Doerfert if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 16715b0581aeSJohannes Doerfert // Allow use in equality comparisons. 16725b0581aeSJohannes Doerfert if (Cmp->isEquality()) 16735b0581aeSJohannes Doerfert return getUniqueKernelFor(*Cmp); 16745b0581aeSJohannes Doerfert return nullptr; 16755b0581aeSJohannes Doerfert } 16765b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 16775b0581aeSJohannes Doerfert // Allow direct calls. 16785b0581aeSJohannes Doerfert if (CB->isCallee(&U)) 16795b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 1680a2dbfb6bSGiorgis Georgakoudis 1681a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 1682a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 1683a2dbfb6bSGiorgis Georgakoudis // Allow the use in __kmpc_parallel_51 calls. 1684a2dbfb6bSGiorgis Georgakoudis if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) 16855b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 16865b0581aeSJohannes Doerfert return nullptr; 16875b0581aeSJohannes Doerfert } 16885b0581aeSJohannes Doerfert // Disallow every other use. 16895b0581aeSJohannes Doerfert return nullptr; 16905b0581aeSJohannes Doerfert }; 16915b0581aeSJohannes Doerfert 16925b0581aeSJohannes Doerfert // TODO: In the future we want to track more than just a unique kernel. 16935b0581aeSJohannes Doerfert SmallPtrSet<Kernel, 2> PotentialKernels; 16948d8ce85bSsstefan1 OMPInformationCache::foreachUse(F, [&](const Use &U) { 16955b0581aeSJohannes Doerfert PotentialKernels.insert(GetUniqueKernelForUse(U)); 16965b0581aeSJohannes Doerfert }); 16975b0581aeSJohannes Doerfert 16985b0581aeSJohannes Doerfert Kernel K = nullptr; 16995b0581aeSJohannes Doerfert if (PotentialKernels.size() == 1) 17005b0581aeSJohannes Doerfert K = *PotentialKernels.begin(); 17015b0581aeSJohannes Doerfert 17025b0581aeSJohannes Doerfert // Cache the result. 17035b0581aeSJohannes Doerfert UniqueKernelMap[&F] = K; 17045b0581aeSJohannes Doerfert 17055b0581aeSJohannes Doerfert return K; 17065b0581aeSJohannes Doerfert } 17075b0581aeSJohannes Doerfert 17085b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 1709a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 1710a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 17115b0581aeSJohannes Doerfert 17125b0581aeSJohannes Doerfert bool Changed = false; 1713a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelRFI) 17145b0581aeSJohannes Doerfert return Changed; 17155b0581aeSJohannes Doerfert 17165b0581aeSJohannes Doerfert for (Function *F : SCC) { 17175b0581aeSJohannes Doerfert 1718a2dbfb6bSGiorgis Georgakoudis // Check if the function is a use in a __kmpc_parallel_51 call at 17195b0581aeSJohannes Doerfert // all. 17205b0581aeSJohannes Doerfert bool UnknownUse = false; 1721a2dbfb6bSGiorgis Georgakoudis bool KernelParallelUse = false; 17225b0581aeSJohannes Doerfert unsigned NumDirectCalls = 0; 17235b0581aeSJohannes Doerfert 17245b0581aeSJohannes Doerfert SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 17258d8ce85bSsstefan1 OMPInformationCache::foreachUse(*F, [&](Use &U) { 17265b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) 17275b0581aeSJohannes Doerfert if (CB->isCallee(&U)) { 17285b0581aeSJohannes Doerfert ++NumDirectCalls; 17295b0581aeSJohannes Doerfert return; 17305b0581aeSJohannes Doerfert } 17315b0581aeSJohannes Doerfert 173281db6144SMichael Liao if (isa<ICmpInst>(U.getUser())) { 17335b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 17345b0581aeSJohannes Doerfert return; 17355b0581aeSJohannes Doerfert } 1736a2dbfb6bSGiorgis Georgakoudis 1737a2dbfb6bSGiorgis Georgakoudis // Find wrapper functions that represent parallel kernels. 1738a2dbfb6bSGiorgis Georgakoudis CallInst *CI = 1739a2dbfb6bSGiorgis Georgakoudis OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); 1740a2dbfb6bSGiorgis Georgakoudis const unsigned int WrapperFunctionArgNo = 6; 1741a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse && CI && 1742a2dbfb6bSGiorgis Georgakoudis CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { 1743a2dbfb6bSGiorgis Georgakoudis KernelParallelUse = true; 17445b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 17455b0581aeSJohannes Doerfert return; 17465b0581aeSJohannes Doerfert } 17475b0581aeSJohannes Doerfert UnknownUse = true; 17485b0581aeSJohannes Doerfert }); 17495b0581aeSJohannes Doerfert 1750a2dbfb6bSGiorgis Georgakoudis // Do not emit a remark if we haven't seen a __kmpc_parallel_51 1751fec1f210SJohannes Doerfert // use. 1752a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse) 17535b0581aeSJohannes Doerfert continue; 17545b0581aeSJohannes Doerfert 1755fec1f210SJohannes Doerfert { 17562db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17572db182ffSJoseph Huber return ORA << "Found a parallel region that is called in a target " 1758fec1f210SJohannes Doerfert "region but not part of a combined target construct nor " 1759a2dbfb6bSGiorgis Georgakoudis "nested inside a target construct without intermediate " 1760fec1f210SJohannes Doerfert "code. This can lead to excessive register usage for " 1761fec1f210SJohannes Doerfert "unrelated target regions in the same translation unit " 1762fec1f210SJohannes Doerfert "due to spurious call edges assumed by ptxas."; 1763fec1f210SJohannes Doerfert }; 17642db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD", 17652db182ffSJoseph Huber Remark); 1766fec1f210SJohannes Doerfert } 1767fec1f210SJohannes Doerfert 1768fec1f210SJohannes Doerfert // If this ever hits, we should investigate. 1769fec1f210SJohannes Doerfert // TODO: Checking the number of uses is not a necessary restriction and 1770fec1f210SJohannes Doerfert // should be lifted. 1771fec1f210SJohannes Doerfert if (UnknownUse || NumDirectCalls != 1 || 1772fec1f210SJohannes Doerfert ToBeReplacedStateMachineUses.size() != 2) { 1773fec1f210SJohannes Doerfert { 17742db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17752db182ffSJoseph Huber return ORA << "Parallel region is used in " 1776fec1f210SJohannes Doerfert << (UnknownUse ? "unknown" : "unexpected") 1777fec1f210SJohannes Doerfert << " ways; will not attempt to rewrite the state machine."; 1778fec1f210SJohannes Doerfert }; 17792db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>( 17802db182ffSJoseph Huber F, "OpenMPParallelRegionInNonSPMD", Remark); 1781fec1f210SJohannes Doerfert } 17825b0581aeSJohannes Doerfert continue; 1783fec1f210SJohannes Doerfert } 17845b0581aeSJohannes Doerfert 1785a2dbfb6bSGiorgis Georgakoudis // Even if we have __kmpc_parallel_51 calls, we (for now) give 17865b0581aeSJohannes Doerfert // up if the function is not called from a unique kernel. 17875b0581aeSJohannes Doerfert Kernel K = getUniqueKernelFor(*F); 1788fec1f210SJohannes Doerfert if (!K) { 1789fec1f210SJohannes Doerfert { 17902db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17912db182ffSJoseph Huber return ORA << "Parallel region is not known to be called from a " 1792fec1f210SJohannes Doerfert "unique single target region, maybe the surrounding " 1793fec1f210SJohannes Doerfert "function has external linkage?; will not attempt to " 1794fec1f210SJohannes Doerfert "rewrite the state machine use."; 1795fec1f210SJohannes Doerfert }; 17962db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>( 17972db182ffSJoseph Huber F, "OpenMPParallelRegionInMultipleKernesl", Remark); 1798fec1f210SJohannes Doerfert } 17995b0581aeSJohannes Doerfert continue; 1800fec1f210SJohannes Doerfert } 18015b0581aeSJohannes Doerfert 18025b0581aeSJohannes Doerfert // We now know F is a parallel body function called only from the kernel K. 18035b0581aeSJohannes Doerfert // We also identified the state machine uses in which we replace the 18045b0581aeSJohannes Doerfert // function pointer by a new global symbol for identification purposes. This 18055b0581aeSJohannes Doerfert // ensures only direct calls to the function are left. 18065b0581aeSJohannes Doerfert 1807fec1f210SJohannes Doerfert { 18082db182ffSJoseph Huber auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) { 18092db182ffSJoseph Huber return ORA << "Specialize parallel region that is only reached from a " 1810fec1f210SJohannes Doerfert "single target region to avoid spurious call edges and " 1811fec1f210SJohannes Doerfert "excessive register usage in other target regions. " 1812fec1f210SJohannes Doerfert "(parallel region ID: " 1813fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1814fec1f210SJohannes Doerfert << ", kernel ID: " 1815fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1816fec1f210SJohannes Doerfert }; 18172db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD", 1818fec1f210SJohannes Doerfert RemarkParalleRegion); 18192db182ffSJoseph Huber auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) { 18202db182ffSJoseph Huber return ORA << "Target region containing the parallel region that is " 1821fec1f210SJohannes Doerfert "specialized. (parallel region ID: " 1822fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1823fec1f210SJohannes Doerfert << ", kernel ID: " 1824fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1825fec1f210SJohannes Doerfert }; 18262db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD", 18272db182ffSJoseph Huber RemarkKernel); 1828fec1f210SJohannes Doerfert } 1829fec1f210SJohannes Doerfert 18305b0581aeSJohannes Doerfert Module &M = *F->getParent(); 18315b0581aeSJohannes Doerfert Type *Int8Ty = Type::getInt8Ty(M.getContext()); 18325b0581aeSJohannes Doerfert 18335b0581aeSJohannes Doerfert auto *ID = new GlobalVariable( 18345b0581aeSJohannes Doerfert M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 18355b0581aeSJohannes Doerfert UndefValue::get(Int8Ty), F->getName() + ".ID"); 18365b0581aeSJohannes Doerfert 18375b0581aeSJohannes Doerfert for (Use *U : ToBeReplacedStateMachineUses) 18385b0581aeSJohannes Doerfert U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); 18395b0581aeSJohannes Doerfert 18405b0581aeSJohannes Doerfert ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 18415b0581aeSJohannes Doerfert 18425b0581aeSJohannes Doerfert Changed = true; 18435b0581aeSJohannes Doerfert } 18445b0581aeSJohannes Doerfert 18455b0581aeSJohannes Doerfert return Changed; 18465b0581aeSJohannes Doerfert } 18475b0581aeSJohannes Doerfert 1848b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values. 1849b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 1850b8235d2bSsstefan1 using Base = StateWrapper<BooleanState, AbstractAttribute>; 1851b8235d2bSsstefan1 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 1852b8235d2bSsstefan1 18535dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 18545dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 18555dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 18565dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 18575dfd7cc4Ssstefan1 } 18585dfd7cc4Ssstefan1 1859b8235d2bSsstefan1 /// Returns true if value is assumed to be tracked. 1860b8235d2bSsstefan1 bool isAssumedTracked() const { return getAssumed(); } 1861b8235d2bSsstefan1 1862b8235d2bSsstefan1 /// Returns true if value is known to be tracked. 1863b8235d2bSsstefan1 bool isKnownTracked() const { return getAssumed(); } 1864b8235d2bSsstefan1 1865b8235d2bSsstefan1 /// Create an abstract attribute biew for the position \p IRP. 1866b8235d2bSsstefan1 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 1867b8235d2bSsstefan1 1868b8235d2bSsstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 18695dfd7cc4Ssstefan1 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, 18705dfd7cc4Ssstefan1 const Instruction *I, 18715dfd7cc4Ssstefan1 Attributor &A) const { 18725dfd7cc4Ssstefan1 return None; 18735dfd7cc4Ssstefan1 } 18745dfd7cc4Ssstefan1 18755dfd7cc4Ssstefan1 /// Return an assumed unique ICV value if a single candidate is found. If 18765dfd7cc4Ssstefan1 /// there cannot be one, return a nullptr. If it is not clear yet, return the 18775dfd7cc4Ssstefan1 /// Optional::NoneType. 18785dfd7cc4Ssstefan1 virtual Optional<Value *> 18795dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const = 0; 18805dfd7cc4Ssstefan1 18815dfd7cc4Ssstefan1 // Currently only nthreads is being tracked. 18825dfd7cc4Ssstefan1 // this array will only grow with time. 18835dfd7cc4Ssstefan1 InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 1884b8235d2bSsstefan1 1885b8235d2bSsstefan1 /// See AbstractAttribute::getName() 1886b8235d2bSsstefan1 const std::string getName() const override { return "AAICVTracker"; } 1887b8235d2bSsstefan1 1888233af895SLuofan Chen /// See AbstractAttribute::getIdAddr() 1889233af895SLuofan Chen const char *getIdAddr() const override { return &ID; } 1890233af895SLuofan Chen 1891233af895SLuofan Chen /// This function should return true if the type of the \p AA is AAICVTracker 1892233af895SLuofan Chen static bool classof(const AbstractAttribute *AA) { 1893233af895SLuofan Chen return (AA->getIdAddr() == &ID); 1894233af895SLuofan Chen } 1895233af895SLuofan Chen 1896b8235d2bSsstefan1 static const char ID; 1897b8235d2bSsstefan1 }; 1898b8235d2bSsstefan1 1899b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker { 1900b8235d2bSsstefan1 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 1901b8235d2bSsstefan1 : AAICVTracker(IRP, A) {} 1902b8235d2bSsstefan1 1903b8235d2bSsstefan1 // FIXME: come up with better string. 19045dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerFunction"; } 1905b8235d2bSsstefan1 1906b8235d2bSsstefan1 // FIXME: come up with some stats. 1907b8235d2bSsstefan1 void trackStatistics() const override {} 1908b8235d2bSsstefan1 19095dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 1910b8235d2bSsstefan1 ChangeStatus manifest(Attributor &A) override { 19115dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1912b8235d2bSsstefan1 } 1913b8235d2bSsstefan1 1914b8235d2bSsstefan1 // Map of ICV to their values at specific program point. 19155dfd7cc4Ssstefan1 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 1916b8235d2bSsstefan1 InternalControlVar::ICV___last> 19175dfd7cc4Ssstefan1 ICVReplacementValuesMap; 1918b8235d2bSsstefan1 1919b8235d2bSsstefan1 ChangeStatus updateImpl(Attributor &A) override { 1920b8235d2bSsstefan1 ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 1921b8235d2bSsstefan1 1922b8235d2bSsstefan1 Function *F = getAnchorScope(); 1923b8235d2bSsstefan1 1924b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1925b8235d2bSsstefan1 1926b8235d2bSsstefan1 for (InternalControlVar ICV : TrackableICVs) { 1927b8235d2bSsstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1928b8235d2bSsstefan1 19295dfd7cc4Ssstefan1 auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1930b8235d2bSsstefan1 auto TrackValues = [&](Use &U, Function &) { 1931b8235d2bSsstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 1932b8235d2bSsstefan1 if (!CI) 1933b8235d2bSsstefan1 return false; 1934b8235d2bSsstefan1 1935b8235d2bSsstefan1 // FIXME: handle setters with more that 1 arguments. 1936b8235d2bSsstefan1 /// Track new value. 19375dfd7cc4Ssstefan1 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 1938b8235d2bSsstefan1 HasChanged = ChangeStatus::CHANGED; 1939b8235d2bSsstefan1 1940b8235d2bSsstefan1 return false; 1941b8235d2bSsstefan1 }; 1942b8235d2bSsstefan1 19435dfd7cc4Ssstefan1 auto CallCheck = [&](Instruction &I) { 19445dfd7cc4Ssstefan1 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); 19455dfd7cc4Ssstefan1 if (ReplVal.hasValue() && 19465dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 19475dfd7cc4Ssstefan1 HasChanged = ChangeStatus::CHANGED; 19485dfd7cc4Ssstefan1 19495dfd7cc4Ssstefan1 return true; 19505dfd7cc4Ssstefan1 }; 19515dfd7cc4Ssstefan1 19525dfd7cc4Ssstefan1 // Track all changes of an ICV. 1953b8235d2bSsstefan1 SetterRFI.foreachUse(TrackValues, F); 19545dfd7cc4Ssstefan1 19555dfd7cc4Ssstefan1 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 19565dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true); 19575dfd7cc4Ssstefan1 19585dfd7cc4Ssstefan1 /// TODO: Figure out a way to avoid adding entry in 19595dfd7cc4Ssstefan1 /// ICVReplacementValuesMap 19605dfd7cc4Ssstefan1 Instruction *Entry = &F->getEntryBlock().front(); 19615dfd7cc4Ssstefan1 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 19625dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(Entry, nullptr)); 1963b8235d2bSsstefan1 } 1964b8235d2bSsstefan1 1965b8235d2bSsstefan1 return HasChanged; 1966b8235d2bSsstefan1 } 1967b8235d2bSsstefan1 19685dfd7cc4Ssstefan1 /// Hepler to check if \p I is a call and get the value for it if it is 19695dfd7cc4Ssstefan1 /// unique. 19705dfd7cc4Ssstefan1 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, 19715dfd7cc4Ssstefan1 InternalControlVar &ICV) const { 1972b8235d2bSsstefan1 19735dfd7cc4Ssstefan1 const auto *CB = dyn_cast<CallBase>(I); 1974dcaec812SJohannes Doerfert if (!CB || CB->hasFnAttr("no_openmp") || 1975dcaec812SJohannes Doerfert CB->hasFnAttr("no_openmp_routines")) 19765dfd7cc4Ssstefan1 return None; 19775dfd7cc4Ssstefan1 1978b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1979b8235d2bSsstefan1 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 19805dfd7cc4Ssstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 19815dfd7cc4Ssstefan1 Function *CalledFunction = CB->getCalledFunction(); 1982b8235d2bSsstefan1 19834eef14f9SWei Wang // Indirect call, assume ICV changes. 19844eef14f9SWei Wang if (CalledFunction == nullptr) 19854eef14f9SWei Wang return nullptr; 19865dfd7cc4Ssstefan1 if (CalledFunction == GetterRFI.Declaration) 19875dfd7cc4Ssstefan1 return None; 19885dfd7cc4Ssstefan1 if (CalledFunction == SetterRFI.Declaration) { 19895dfd7cc4Ssstefan1 if (ICVReplacementValuesMap[ICV].count(I)) 19905dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV].lookup(I); 19915dfd7cc4Ssstefan1 19925dfd7cc4Ssstefan1 return nullptr; 19935dfd7cc4Ssstefan1 } 19945dfd7cc4Ssstefan1 19955dfd7cc4Ssstefan1 // Since we don't know, assume it changes the ICV. 19965dfd7cc4Ssstefan1 if (CalledFunction->isDeclaration()) 19975dfd7cc4Ssstefan1 return nullptr; 19985dfd7cc4Ssstefan1 19995b70c12fSJohannes Doerfert const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 20005b70c12fSJohannes Doerfert *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); 20015dfd7cc4Ssstefan1 20025dfd7cc4Ssstefan1 if (ICVTrackingAA.isAssumedTracked()) 20035dfd7cc4Ssstefan1 return ICVTrackingAA.getUniqueReplacementValue(ICV); 20045dfd7cc4Ssstefan1 20055dfd7cc4Ssstefan1 // If we don't know, assume it changes. 20065dfd7cc4Ssstefan1 return nullptr; 20075dfd7cc4Ssstefan1 } 20085dfd7cc4Ssstefan1 20095dfd7cc4Ssstefan1 // We don't check unique value for a function, so return None. 20105dfd7cc4Ssstefan1 Optional<Value *> 20115dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 20125dfd7cc4Ssstefan1 return None; 20135dfd7cc4Ssstefan1 } 20145dfd7cc4Ssstefan1 20155dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 20165dfd7cc4Ssstefan1 Optional<Value *> getReplacementValue(InternalControlVar ICV, 20175dfd7cc4Ssstefan1 const Instruction *I, 20185dfd7cc4Ssstefan1 Attributor &A) const override { 20195dfd7cc4Ssstefan1 const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 20205dfd7cc4Ssstefan1 if (ValuesMap.count(I)) 20215dfd7cc4Ssstefan1 return ValuesMap.lookup(I); 20225dfd7cc4Ssstefan1 20235dfd7cc4Ssstefan1 SmallVector<const Instruction *, 16> Worklist; 20245dfd7cc4Ssstefan1 SmallPtrSet<const Instruction *, 16> Visited; 20255dfd7cc4Ssstefan1 Worklist.push_back(I); 20265dfd7cc4Ssstefan1 20275dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 20285dfd7cc4Ssstefan1 20295dfd7cc4Ssstefan1 while (!Worklist.empty()) { 20305dfd7cc4Ssstefan1 const Instruction *CurrInst = Worklist.pop_back_val(); 20315dfd7cc4Ssstefan1 if (!Visited.insert(CurrInst).second) 2032b8235d2bSsstefan1 continue; 2033b8235d2bSsstefan1 20345dfd7cc4Ssstefan1 const BasicBlock *CurrBB = CurrInst->getParent(); 20355dfd7cc4Ssstefan1 20365dfd7cc4Ssstefan1 // Go up and look for all potential setters/calls that might change the 20375dfd7cc4Ssstefan1 // ICV. 20385dfd7cc4Ssstefan1 while ((CurrInst = CurrInst->getPrevNode())) { 20395dfd7cc4Ssstefan1 if (ValuesMap.count(CurrInst)) { 20405dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 20415dfd7cc4Ssstefan1 // Unknown value, track new. 20425dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 20435dfd7cc4Ssstefan1 ReplVal = NewReplVal; 20445dfd7cc4Ssstefan1 break; 20455dfd7cc4Ssstefan1 } 20465dfd7cc4Ssstefan1 20475dfd7cc4Ssstefan1 // If we found a new value, we can't know the icv value anymore. 20485dfd7cc4Ssstefan1 if (NewReplVal.hasValue()) 20495dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 2050b8235d2bSsstefan1 return nullptr; 2051b8235d2bSsstefan1 20525dfd7cc4Ssstefan1 break; 2053b8235d2bSsstefan1 } 2054b8235d2bSsstefan1 20555dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); 20565dfd7cc4Ssstefan1 if (!NewReplVal.hasValue()) 20575dfd7cc4Ssstefan1 continue; 20585dfd7cc4Ssstefan1 20595dfd7cc4Ssstefan1 // Unknown value, track new. 20605dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 20615dfd7cc4Ssstefan1 ReplVal = NewReplVal; 20625dfd7cc4Ssstefan1 break; 2063b8235d2bSsstefan1 } 2064b8235d2bSsstefan1 20655dfd7cc4Ssstefan1 // if (NewReplVal.hasValue()) 20665dfd7cc4Ssstefan1 // We found a new value, we can't know the icv value anymore. 20675dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 2068b8235d2bSsstefan1 return nullptr; 2069b8235d2bSsstefan1 } 20705dfd7cc4Ssstefan1 20715dfd7cc4Ssstefan1 // If we are in the same BB and we have a value, we are done. 20725dfd7cc4Ssstefan1 if (CurrBB == I->getParent() && ReplVal.hasValue()) 20735dfd7cc4Ssstefan1 return ReplVal; 20745dfd7cc4Ssstefan1 20755dfd7cc4Ssstefan1 // Go through all predecessors and add terminators for analysis. 20765dfd7cc4Ssstefan1 for (const BasicBlock *Pred : predecessors(CurrBB)) 20775dfd7cc4Ssstefan1 if (const Instruction *Terminator = Pred->getTerminator()) 20785dfd7cc4Ssstefan1 Worklist.push_back(Terminator); 20795dfd7cc4Ssstefan1 } 20805dfd7cc4Ssstefan1 20815dfd7cc4Ssstefan1 return ReplVal; 20825dfd7cc4Ssstefan1 } 20835dfd7cc4Ssstefan1 }; 20845dfd7cc4Ssstefan1 20855dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker { 20865dfd7cc4Ssstefan1 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 20875dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 20885dfd7cc4Ssstefan1 20895dfd7cc4Ssstefan1 // FIXME: come up with better string. 20905dfd7cc4Ssstefan1 const std::string getAsStr() const override { 20915dfd7cc4Ssstefan1 return "ICVTrackerFunctionReturned"; 20925dfd7cc4Ssstefan1 } 20935dfd7cc4Ssstefan1 20945dfd7cc4Ssstefan1 // FIXME: come up with some stats. 20955dfd7cc4Ssstefan1 void trackStatistics() const override {} 20965dfd7cc4Ssstefan1 20975dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 20985dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 20995dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 21005dfd7cc4Ssstefan1 } 21015dfd7cc4Ssstefan1 21025dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 21035dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 21045dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 21055dfd7cc4Ssstefan1 ICVReplacementValuesMap; 21065dfd7cc4Ssstefan1 21075dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 21085dfd7cc4Ssstefan1 Optional<Value *> 21095dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 21105dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 21115dfd7cc4Ssstefan1 } 21125dfd7cc4Ssstefan1 21135dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 21145dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 21155dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 21165b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 21175dfd7cc4Ssstefan1 21185dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 21195dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 21205dfd7cc4Ssstefan1 21215dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 21225dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 21235dfd7cc4Ssstefan1 Optional<Value *> UniqueICVValue; 21245dfd7cc4Ssstefan1 21255dfd7cc4Ssstefan1 auto CheckReturnInst = [&](Instruction &I) { 21265dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 21275dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(ICV, &I, A); 21285dfd7cc4Ssstefan1 21295dfd7cc4Ssstefan1 // If we found a second ICV value there is no unique returned value. 21305dfd7cc4Ssstefan1 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) 21315dfd7cc4Ssstefan1 return false; 21325dfd7cc4Ssstefan1 21335dfd7cc4Ssstefan1 UniqueICVValue = NewReplVal; 21345dfd7cc4Ssstefan1 21355dfd7cc4Ssstefan1 return true; 21365dfd7cc4Ssstefan1 }; 21375dfd7cc4Ssstefan1 21385dfd7cc4Ssstefan1 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 21395dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true)) 21405dfd7cc4Ssstefan1 UniqueICVValue = nullptr; 21415dfd7cc4Ssstefan1 21425dfd7cc4Ssstefan1 if (UniqueICVValue == ReplVal) 21435dfd7cc4Ssstefan1 continue; 21445dfd7cc4Ssstefan1 21455dfd7cc4Ssstefan1 ReplVal = UniqueICVValue; 21465dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 21475dfd7cc4Ssstefan1 } 21485dfd7cc4Ssstefan1 21495dfd7cc4Ssstefan1 return Changed; 21505dfd7cc4Ssstefan1 } 21515dfd7cc4Ssstefan1 }; 21525dfd7cc4Ssstefan1 21535dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker { 21545dfd7cc4Ssstefan1 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 21555dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 21565dfd7cc4Ssstefan1 21575dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 21585dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 21595dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 21605dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 21615dfd7cc4Ssstefan1 21625dfd7cc4Ssstefan1 // We only initialize this AA for getters, so we need to know which ICV it 21635dfd7cc4Ssstefan1 // gets. 21645dfd7cc4Ssstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 21655dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 21665dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 21675dfd7cc4Ssstefan1 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 21685dfd7cc4Ssstefan1 if (Getter.Declaration == getAssociatedFunction()) { 21695dfd7cc4Ssstefan1 AssociatedICV = ICVInfo.Kind; 21705dfd7cc4Ssstefan1 return; 21715dfd7cc4Ssstefan1 } 21725dfd7cc4Ssstefan1 } 21735dfd7cc4Ssstefan1 21745dfd7cc4Ssstefan1 /// Unknown ICV. 21755dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 21765dfd7cc4Ssstefan1 } 21775dfd7cc4Ssstefan1 21785dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 21795dfd7cc4Ssstefan1 if (!ReplVal.hasValue() || !ReplVal.getValue()) 21805dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 21815dfd7cc4Ssstefan1 21825dfd7cc4Ssstefan1 A.changeValueAfterManifest(*getCtxI(), **ReplVal); 21835dfd7cc4Ssstefan1 A.deleteAfterManifest(*getCtxI()); 21845dfd7cc4Ssstefan1 21855dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 21865dfd7cc4Ssstefan1 } 21875dfd7cc4Ssstefan1 21885dfd7cc4Ssstefan1 // FIXME: come up with better string. 21895dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerCallSite"; } 21905dfd7cc4Ssstefan1 21915dfd7cc4Ssstefan1 // FIXME: come up with some stats. 21925dfd7cc4Ssstefan1 void trackStatistics() const override {} 21935dfd7cc4Ssstefan1 21945dfd7cc4Ssstefan1 InternalControlVar AssociatedICV; 21955dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 21965dfd7cc4Ssstefan1 21975dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 21985dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 21995b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 22005dfd7cc4Ssstefan1 22015dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 22025dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 22035dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 22045dfd7cc4Ssstefan1 22055dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 22065dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); 22075dfd7cc4Ssstefan1 22085dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 22095dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 22105dfd7cc4Ssstefan1 22115dfd7cc4Ssstefan1 ReplVal = NewReplVal; 22125dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 22135dfd7cc4Ssstefan1 } 22145dfd7cc4Ssstefan1 22155dfd7cc4Ssstefan1 // Return the value with which associated value can be replaced for specific 22165dfd7cc4Ssstefan1 // \p ICV. 22175dfd7cc4Ssstefan1 Optional<Value *> 22185dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 22195dfd7cc4Ssstefan1 return ReplVal; 22205dfd7cc4Ssstefan1 } 22215dfd7cc4Ssstefan1 }; 22225dfd7cc4Ssstefan1 22235dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker { 22245dfd7cc4Ssstefan1 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 22255dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 22265dfd7cc4Ssstefan1 22275dfd7cc4Ssstefan1 // FIXME: come up with better string. 22285dfd7cc4Ssstefan1 const std::string getAsStr() const override { 22295dfd7cc4Ssstefan1 return "ICVTrackerCallSiteReturned"; 22305dfd7cc4Ssstefan1 } 22315dfd7cc4Ssstefan1 22325dfd7cc4Ssstefan1 // FIXME: come up with some stats. 22335dfd7cc4Ssstefan1 void trackStatistics() const override {} 22345dfd7cc4Ssstefan1 22355dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 22365dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 22375dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 22385dfd7cc4Ssstefan1 } 22395dfd7cc4Ssstefan1 22405dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 22415dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 22425dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 22435dfd7cc4Ssstefan1 ICVReplacementValuesMap; 22445dfd7cc4Ssstefan1 22455dfd7cc4Ssstefan1 /// Return the value with which associated value can be replaced for specific 22465dfd7cc4Ssstefan1 /// \p ICV. 22475dfd7cc4Ssstefan1 Optional<Value *> 22485dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 22495dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 22505dfd7cc4Ssstefan1 } 22515dfd7cc4Ssstefan1 22525dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 22535dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 22545dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 22555b70c12fSJohannes Doerfert *this, IRPosition::returned(*getAssociatedFunction()), 22565b70c12fSJohannes Doerfert DepClassTy::REQUIRED); 22575dfd7cc4Ssstefan1 22585dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 22595dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 22605dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 22615dfd7cc4Ssstefan1 22625dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 22635dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 22645dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 22655dfd7cc4Ssstefan1 ICVTrackingAA.getUniqueReplacementValue(ICV); 22665dfd7cc4Ssstefan1 22675dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 22685dfd7cc4Ssstefan1 continue; 22695dfd7cc4Ssstefan1 22705dfd7cc4Ssstefan1 ReplVal = NewReplVal; 22715dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 22725dfd7cc4Ssstefan1 } 22735dfd7cc4Ssstefan1 return Changed; 22745dfd7cc4Ssstefan1 } 22759548b74aSJohannes Doerfert }; 227618283125SJoseph Huber 227718283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain { 227818283125SJoseph Huber AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) 227918283125SJoseph Huber : AAExecutionDomain(IRP, A) {} 228018283125SJoseph Huber 228118283125SJoseph Huber const std::string getAsStr() const override { 228218283125SJoseph Huber return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + 228318283125SJoseph Huber "/" + std::to_string(NumBBs) + " BBs thread 0 only."; 228418283125SJoseph Huber } 228518283125SJoseph Huber 228618283125SJoseph Huber /// See AbstractAttribute::trackStatistics(). 228718283125SJoseph Huber void trackStatistics() const override {} 228818283125SJoseph Huber 228918283125SJoseph Huber void initialize(Attributor &A) override { 229018283125SJoseph Huber Function *F = getAnchorScope(); 229118283125SJoseph Huber for (const auto &BB : *F) 229218283125SJoseph Huber SingleThreadedBBs.insert(&BB); 229318283125SJoseph Huber NumBBs = SingleThreadedBBs.size(); 229418283125SJoseph Huber } 229518283125SJoseph Huber 229618283125SJoseph Huber ChangeStatus manifest(Attributor &A) override { 229718283125SJoseph Huber LLVM_DEBUG({ 229818283125SJoseph Huber for (const BasicBlock *BB : SingleThreadedBBs) 229918283125SJoseph Huber dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " 230018283125SJoseph Huber << BB->getName() << " is executed by a single thread.\n"; 230118283125SJoseph Huber }); 230218283125SJoseph Huber return ChangeStatus::UNCHANGED; 230318283125SJoseph Huber } 230418283125SJoseph Huber 230518283125SJoseph Huber ChangeStatus updateImpl(Attributor &A) override; 230618283125SJoseph Huber 230718283125SJoseph Huber /// Check if an instruction is executed by a single thread. 23089a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const Instruction &I) const override { 23099a23e673SJohannes Doerfert return isExecutedByInitialThreadOnly(*I.getParent()); 231018283125SJoseph Huber } 231118283125SJoseph Huber 23129a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { 231318283125SJoseph Huber return SingleThreadedBBs.contains(&BB); 231418283125SJoseph Huber } 231518283125SJoseph Huber 231618283125SJoseph Huber /// Set of basic blocks that are executed by a single thread. 231718283125SJoseph Huber DenseSet<const BasicBlock *> SingleThreadedBBs; 231818283125SJoseph Huber 231918283125SJoseph Huber /// Total number of basic blocks in this function. 232018283125SJoseph Huber long unsigned NumBBs; 232118283125SJoseph Huber }; 232218283125SJoseph Huber 232318283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { 232418283125SJoseph Huber Function *F = getAnchorScope(); 232518283125SJoseph Huber ReversePostOrderTraversal<Function *> RPOT(F); 232618283125SJoseph Huber auto NumSingleThreadedBBs = SingleThreadedBBs.size(); 232718283125SJoseph Huber 232818283125SJoseph Huber bool AllCallSitesKnown; 232918283125SJoseph Huber auto PredForCallSite = [&](AbstractCallSite ACS) { 233018283125SJoseph Huber const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( 233118283125SJoseph Huber *this, IRPosition::function(*ACS.getInstruction()->getFunction()), 233218283125SJoseph Huber DepClassTy::REQUIRED); 23339a23e673SJohannes Doerfert return ExecutionDomainAA.isExecutedByInitialThreadOnly( 23349a23e673SJohannes Doerfert *ACS.getInstruction()); 233518283125SJoseph Huber }; 233618283125SJoseph Huber 233718283125SJoseph Huber if (!A.checkForAllCallSites(PredForCallSite, *this, 233818283125SJoseph Huber /* RequiresAllCallSites */ true, 233918283125SJoseph Huber AllCallSitesKnown)) 234018283125SJoseph Huber SingleThreadedBBs.erase(&F->getEntryBlock()); 234118283125SJoseph Huber 234218283125SJoseph Huber // Check if the edge into the successor block compares a thread-id function to 234318283125SJoseph Huber // a constant zero. 234418283125SJoseph Huber // TODO: Use AAValueSimplify to simplify and propogate constants. 234518283125SJoseph Huber // TODO: Check more than a single use for thread ID's. 23466fc51c9fSJoseph Huber auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { 234718283125SJoseph Huber if (!Edge || !Edge->isConditional()) 234818283125SJoseph Huber return false; 234918283125SJoseph Huber if (Edge->getSuccessor(0) != SuccessorBB) 235018283125SJoseph Huber return false; 235118283125SJoseph Huber 235218283125SJoseph Huber auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); 235318283125SJoseph Huber if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) 235418283125SJoseph Huber return false; 235518283125SJoseph Huber 23566fc51c9fSJoseph Huber // Temporarily match the pattern generated by clang for teams regions. 23576fc51c9fSJoseph Huber // TODO: Remove this once the new runtime is in place. 23586fc51c9fSJoseph Huber ConstantInt *One, *NegOne; 23596fc51c9fSJoseph Huber CmpInst::Predicate Pred; 23606fc51c9fSJoseph Huber auto &&m_ThreadID = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_tid_x>(); 23616fc51c9fSJoseph Huber auto &&m_WarpSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_warpsize>(); 23626fc51c9fSJoseph Huber auto &&m_BlockSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_ntid_x>(); 23636fc51c9fSJoseph Huber if (match(Cmp, m_Cmp(Pred, m_ThreadID, 23646fc51c9fSJoseph Huber m_And(m_Sub(m_BlockSize, m_ConstantInt(One)), 23656fc51c9fSJoseph Huber m_Xor(m_Sub(m_WarpSize, m_ConstantInt(One)), 23666fc51c9fSJoseph Huber m_ConstantInt(NegOne)))))) 23676fc51c9fSJoseph Huber if (One->isOne() && NegOne->isMinusOne() && 23686fc51c9fSJoseph Huber Pred == CmpInst::Predicate::ICMP_EQ) 23696fc51c9fSJoseph Huber return true; 23706fc51c9fSJoseph Huber 237118283125SJoseph Huber ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 237218283125SJoseph Huber if (!C || !C->isZero()) 237318283125SJoseph Huber return false; 237418283125SJoseph Huber 237568abc3d2SJoseph Huber if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 237668abc3d2SJoseph Huber if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) 237718283125SJoseph Huber return true; 237868abc3d2SJoseph Huber if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 237968abc3d2SJoseph Huber if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) 238068abc3d2SJoseph Huber return true; 238118283125SJoseph Huber 238218283125SJoseph Huber return false; 238318283125SJoseph Huber }; 238418283125SJoseph Huber 238518283125SJoseph Huber // Merge all the predecessor states into the current basic block. A basic 238618283125SJoseph Huber // block is executed by a single thread if all of its predecessors are. 238718283125SJoseph Huber auto MergePredecessorStates = [&](BasicBlock *BB) { 238818283125SJoseph Huber if (pred_begin(BB) == pred_end(BB)) 238918283125SJoseph Huber return SingleThreadedBBs.contains(BB); 239018283125SJoseph Huber 23916fc51c9fSJoseph Huber bool IsInitialThread = true; 239218283125SJoseph Huber for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB); 239318283125SJoseph Huber PredBB != PredEndBB; ++PredBB) { 23946fc51c9fSJoseph Huber if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()), 239518283125SJoseph Huber BB)) 23966fc51c9fSJoseph Huber IsInitialThread &= SingleThreadedBBs.contains(*PredBB); 239718283125SJoseph Huber } 239818283125SJoseph Huber 23996fc51c9fSJoseph Huber return IsInitialThread; 240018283125SJoseph Huber }; 240118283125SJoseph Huber 240218283125SJoseph Huber for (auto *BB : RPOT) { 240318283125SJoseph Huber if (!MergePredecessorStates(BB)) 240418283125SJoseph Huber SingleThreadedBBs.erase(BB); 240518283125SJoseph Huber } 240618283125SJoseph Huber 240718283125SJoseph Huber return (NumSingleThreadedBBs == SingleThreadedBBs.size()) 240818283125SJoseph Huber ? ChangeStatus::UNCHANGED 240918283125SJoseph Huber : ChangeStatus::CHANGED; 241018283125SJoseph Huber } 241118283125SJoseph Huber 24126fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a 24136fc51c9fSJoseph Huber /// static buffer of shared memory. 24146fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { 24156fc51c9fSJoseph Huber using Base = StateWrapper<BooleanState, AbstractAttribute>; 24166fc51c9fSJoseph Huber AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 24176fc51c9fSJoseph Huber 24186fc51c9fSJoseph Huber /// Create an abstract attribute view for the position \p IRP. 24196fc51c9fSJoseph Huber static AAHeapToShared &createForPosition(const IRPosition &IRP, 24206fc51c9fSJoseph Huber Attributor &A); 24216fc51c9fSJoseph Huber 24226fc51c9fSJoseph Huber /// See AbstractAttribute::getName(). 24236fc51c9fSJoseph Huber const std::string getName() const override { return "AAHeapToShared"; } 24246fc51c9fSJoseph Huber 24256fc51c9fSJoseph Huber /// See AbstractAttribute::getIdAddr(). 24266fc51c9fSJoseph Huber const char *getIdAddr() const override { return &ID; } 24276fc51c9fSJoseph Huber 24286fc51c9fSJoseph Huber /// This function should return true if the type of the \p AA is 24296fc51c9fSJoseph Huber /// AAHeapToShared. 24306fc51c9fSJoseph Huber static bool classof(const AbstractAttribute *AA) { 24316fc51c9fSJoseph Huber return (AA->getIdAddr() == &ID); 24326fc51c9fSJoseph Huber } 24336fc51c9fSJoseph Huber 24346fc51c9fSJoseph Huber /// Unique ID (due to the unique address) 24356fc51c9fSJoseph Huber static const char ID; 24366fc51c9fSJoseph Huber }; 24376fc51c9fSJoseph Huber 24386fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared { 24396fc51c9fSJoseph Huber AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) 24406fc51c9fSJoseph Huber : AAHeapToShared(IRP, A) {} 24416fc51c9fSJoseph Huber 24426fc51c9fSJoseph Huber const std::string getAsStr() const override { 24436fc51c9fSJoseph Huber return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + 24446fc51c9fSJoseph Huber " malloc calls eligible."; 24456fc51c9fSJoseph Huber } 24466fc51c9fSJoseph Huber 24476fc51c9fSJoseph Huber /// See AbstractAttribute::trackStatistics(). 24486fc51c9fSJoseph Huber void trackStatistics() const override {} 24496fc51c9fSJoseph Huber 24506fc51c9fSJoseph Huber void initialize(Attributor &A) override { 24516fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 24526fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 24536fc51c9fSJoseph Huber 24546fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users()) 24556fc51c9fSJoseph Huber if (CallBase *CB = dyn_cast<CallBase>(U)) 24566fc51c9fSJoseph Huber MallocCalls.insert(CB); 24576fc51c9fSJoseph Huber } 24586fc51c9fSJoseph Huber 24596fc51c9fSJoseph Huber ChangeStatus manifest(Attributor &A) override { 24606fc51c9fSJoseph Huber if (MallocCalls.empty()) 24616fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED; 24626fc51c9fSJoseph Huber 24636fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 24646fc51c9fSJoseph Huber auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 24656fc51c9fSJoseph Huber 24666fc51c9fSJoseph Huber Function *F = getAnchorScope(); 24676fc51c9fSJoseph Huber auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, 24686fc51c9fSJoseph Huber DepClassTy::OPTIONAL); 24696fc51c9fSJoseph Huber 24706fc51c9fSJoseph Huber ChangeStatus Changed = ChangeStatus::UNCHANGED; 24716fc51c9fSJoseph Huber for (CallBase *CB : MallocCalls) { 24726fc51c9fSJoseph Huber // Skip replacing this if HeapToStack has already claimed it. 24736fc51c9fSJoseph Huber if (HS && HS->isKnownHeapToStack(*CB)) 24746fc51c9fSJoseph Huber continue; 24756fc51c9fSJoseph Huber 24766fc51c9fSJoseph Huber // Find the unique free call to remove it. 24776fc51c9fSJoseph Huber SmallVector<CallBase *, 4> FreeCalls; 24786fc51c9fSJoseph Huber for (auto *U : CB->users()) { 24796fc51c9fSJoseph Huber CallBase *C = dyn_cast<CallBase>(U); 24806fc51c9fSJoseph Huber if (C && C->getCalledFunction() == FreeCall.Declaration) 24816fc51c9fSJoseph Huber FreeCalls.push_back(C); 24826fc51c9fSJoseph Huber } 24836fc51c9fSJoseph Huber if (FreeCalls.size() != 1) 24846fc51c9fSJoseph Huber continue; 24856fc51c9fSJoseph Huber 24866fc51c9fSJoseph Huber ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0)); 24876fc51c9fSJoseph Huber 24886fc51c9fSJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in " 24896fc51c9fSJoseph Huber << CB->getCaller()->getName() << " with " 24906fc51c9fSJoseph Huber << AllocSize->getZExtValue() 24916fc51c9fSJoseph Huber << " bytes of shared memory\n"); 24926fc51c9fSJoseph Huber 24936fc51c9fSJoseph Huber // Create a new shared memory buffer of the same size as the allocation 24946fc51c9fSJoseph Huber // and replace all the uses of the original allocation with it. 24956fc51c9fSJoseph Huber Module *M = CB->getModule(); 24966fc51c9fSJoseph Huber Type *Int8Ty = Type::getInt8Ty(M->getContext()); 24976fc51c9fSJoseph Huber Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); 24986fc51c9fSJoseph Huber auto *SharedMem = new GlobalVariable( 24996fc51c9fSJoseph Huber *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, 25006fc51c9fSJoseph Huber UndefValue::get(Int8ArrTy), CB->getName(), nullptr, 25016fc51c9fSJoseph Huber GlobalValue::NotThreadLocal, 25026fc51c9fSJoseph Huber static_cast<unsigned>(AddressSpace::Shared)); 25036fc51c9fSJoseph Huber auto *NewBuffer = 25046fc51c9fSJoseph Huber ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); 25056fc51c9fSJoseph Huber 25066fc51c9fSJoseph Huber SharedMem->setAlignment(MaybeAlign(32)); 25076fc51c9fSJoseph Huber 25086fc51c9fSJoseph Huber A.changeValueAfterManifest(*CB, *NewBuffer); 25096fc51c9fSJoseph Huber A.deleteAfterManifest(*CB); 25106fc51c9fSJoseph Huber A.deleteAfterManifest(*FreeCalls.front()); 25116fc51c9fSJoseph Huber 25126fc51c9fSJoseph Huber NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); 25136fc51c9fSJoseph Huber Changed = ChangeStatus::CHANGED; 25146fc51c9fSJoseph Huber } 25156fc51c9fSJoseph Huber 25166fc51c9fSJoseph Huber return Changed; 25176fc51c9fSJoseph Huber } 25186fc51c9fSJoseph Huber 25196fc51c9fSJoseph Huber ChangeStatus updateImpl(Attributor &A) override { 25206fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 25216fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 25226fc51c9fSJoseph Huber Function *F = getAnchorScope(); 25236fc51c9fSJoseph Huber 25246fc51c9fSJoseph Huber auto NumMallocCalls = MallocCalls.size(); 25256fc51c9fSJoseph Huber 25266fc51c9fSJoseph Huber // Only consider malloc calls executed by a single thread with a constant. 25276fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users()) { 25286fc51c9fSJoseph Huber const auto &ED = A.getAAFor<AAExecutionDomain>( 25296fc51c9fSJoseph Huber *this, IRPosition::function(*F), DepClassTy::REQUIRED); 25306fc51c9fSJoseph Huber if (CallBase *CB = dyn_cast<CallBase>(U)) 25316fc51c9fSJoseph Huber if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) || 25326fc51c9fSJoseph Huber !ED.isExecutedByInitialThreadOnly(*CB)) 25336fc51c9fSJoseph Huber MallocCalls.erase(CB); 25346fc51c9fSJoseph Huber } 25356fc51c9fSJoseph Huber 25366fc51c9fSJoseph Huber if (NumMallocCalls != MallocCalls.size()) 25376fc51c9fSJoseph Huber return ChangeStatus::CHANGED; 25386fc51c9fSJoseph Huber 25396fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED; 25406fc51c9fSJoseph Huber } 25416fc51c9fSJoseph Huber 25426fc51c9fSJoseph Huber /// Collection of all malloc calls in a function. 25436fc51c9fSJoseph Huber SmallPtrSet<CallBase *, 4> MallocCalls; 25446fc51c9fSJoseph Huber }; 25456fc51c9fSJoseph Huber 25469548b74aSJohannes Doerfert } // namespace 25479548b74aSJohannes Doerfert 2548b8235d2bSsstefan1 const char AAICVTracker::ID = 0; 254918283125SJoseph Huber const char AAExecutionDomain::ID = 0; 25506fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0; 2551b8235d2bSsstefan1 2552b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 2553b8235d2bSsstefan1 Attributor &A) { 2554b8235d2bSsstefan1 AAICVTracker *AA = nullptr; 2555b8235d2bSsstefan1 switch (IRP.getPositionKind()) { 2556b8235d2bSsstefan1 case IRPosition::IRP_INVALID: 2557b8235d2bSsstefan1 case IRPosition::IRP_FLOAT: 2558b8235d2bSsstefan1 case IRPosition::IRP_ARGUMENT: 2559b8235d2bSsstefan1 case IRPosition::IRP_CALL_SITE_ARGUMENT: 25601de70a72SJohannes Doerfert llvm_unreachable("ICVTracker can only be created for function position!"); 25615dfd7cc4Ssstefan1 case IRPosition::IRP_RETURNED: 25625dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 25635dfd7cc4Ssstefan1 break; 25645dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE_RETURNED: 25655dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 25665dfd7cc4Ssstefan1 break; 25675dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE: 25685dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 25695dfd7cc4Ssstefan1 break; 2570b8235d2bSsstefan1 case IRPosition::IRP_FUNCTION: 2571b8235d2bSsstefan1 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 2572b8235d2bSsstefan1 break; 2573b8235d2bSsstefan1 } 2574b8235d2bSsstefan1 2575b8235d2bSsstefan1 return *AA; 2576b8235d2bSsstefan1 } 2577b8235d2bSsstefan1 257818283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, 257918283125SJoseph Huber Attributor &A) { 258018283125SJoseph Huber AAExecutionDomainFunction *AA = nullptr; 258118283125SJoseph Huber switch (IRP.getPositionKind()) { 258218283125SJoseph Huber case IRPosition::IRP_INVALID: 258318283125SJoseph Huber case IRPosition::IRP_FLOAT: 258418283125SJoseph Huber case IRPosition::IRP_ARGUMENT: 258518283125SJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT: 258618283125SJoseph Huber case IRPosition::IRP_RETURNED: 258718283125SJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED: 258818283125SJoseph Huber case IRPosition::IRP_CALL_SITE: 258918283125SJoseph Huber llvm_unreachable( 259018283125SJoseph Huber "AAExecutionDomain can only be created for function position!"); 259118283125SJoseph Huber case IRPosition::IRP_FUNCTION: 259218283125SJoseph Huber AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); 259318283125SJoseph Huber break; 259418283125SJoseph Huber } 259518283125SJoseph Huber 259618283125SJoseph Huber return *AA; 259718283125SJoseph Huber } 259818283125SJoseph Huber 25996fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, 26006fc51c9fSJoseph Huber Attributor &A) { 26016fc51c9fSJoseph Huber AAHeapToSharedFunction *AA = nullptr; 26026fc51c9fSJoseph Huber switch (IRP.getPositionKind()) { 26036fc51c9fSJoseph Huber case IRPosition::IRP_INVALID: 26046fc51c9fSJoseph Huber case IRPosition::IRP_FLOAT: 26056fc51c9fSJoseph Huber case IRPosition::IRP_ARGUMENT: 26066fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT: 26076fc51c9fSJoseph Huber case IRPosition::IRP_RETURNED: 26086fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED: 26096fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE: 26106fc51c9fSJoseph Huber llvm_unreachable( 26116fc51c9fSJoseph Huber "AAHeapToShared can only be created for function position!"); 26126fc51c9fSJoseph Huber case IRPosition::IRP_FUNCTION: 26136fc51c9fSJoseph Huber AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); 26146fc51c9fSJoseph Huber break; 26156fc51c9fSJoseph Huber } 26166fc51c9fSJoseph Huber 26176fc51c9fSJoseph Huber return *AA; 26186fc51c9fSJoseph Huber } 26196fc51c9fSJoseph Huber 2620b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { 2621b2ad63d3SJoseph Huber if (!containsOpenMP(M, OMPInModule)) 2622b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2623b2ad63d3SJoseph Huber 2624b2ad63d3SJoseph Huber if (DisableOpenMPOptimizations) 2625b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2626b2ad63d3SJoseph Huber 262703d7e61cSJoseph Huber // Create internal copies of each function if this is a kernel Module. 262803d7e61cSJoseph Huber DenseSet<const Function *> InternalizedFuncs; 262903d7e61cSJoseph Huber if (!OMPInModule.getKernels().empty()) 263003d7e61cSJoseph Huber for (Function &F : M) 263103d7e61cSJoseph Huber if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F)) 263203d7e61cSJoseph Huber if (Attributor::internalizeFunction(F, /* Force */ true)) 263303d7e61cSJoseph Huber InternalizedFuncs.insert(&F); 263403d7e61cSJoseph Huber 263503d7e61cSJoseph Huber // Look at every function definition in the Module that wasn't internalized. 2636b2ad63d3SJoseph Huber SmallVector<Function *, 16> SCC; 263703d7e61cSJoseph Huber for (Function &F : M) 263803d7e61cSJoseph Huber if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) 263903d7e61cSJoseph Huber SCC.push_back(&F); 2640b2ad63d3SJoseph Huber 2641b2ad63d3SJoseph Huber if (SCC.empty()) 2642b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2643b2ad63d3SJoseph Huber 2644b2ad63d3SJoseph Huber FunctionAnalysisManager &FAM = 2645b2ad63d3SJoseph Huber AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2646b2ad63d3SJoseph Huber 2647b2ad63d3SJoseph Huber AnalysisGetter AG(FAM); 2648b2ad63d3SJoseph Huber 2649b2ad63d3SJoseph Huber auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 2650b2ad63d3SJoseph Huber return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 2651b2ad63d3SJoseph Huber }; 2652b2ad63d3SJoseph Huber 2653b2ad63d3SJoseph Huber BumpPtrAllocator Allocator; 2654b2ad63d3SJoseph Huber CallGraphUpdater CGUpdater; 2655b2ad63d3SJoseph Huber 2656b2ad63d3SJoseph Huber SetVector<Function *> Functions(SCC.begin(), SCC.end()); 2657b2ad63d3SJoseph Huber OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, 2658b2ad63d3SJoseph Huber OMPInModule.getKernels()); 2659b2ad63d3SJoseph Huber 266003d7e61cSJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false); 2661b2ad63d3SJoseph Huber 2662b2ad63d3SJoseph Huber OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2663b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(true); 2664b2ad63d3SJoseph Huber if (Changed) 2665b2ad63d3SJoseph Huber return PreservedAnalyses::none(); 2666b2ad63d3SJoseph Huber 2667b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2668b2ad63d3SJoseph Huber } 2669b2ad63d3SJoseph Huber 2670b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, 26719548b74aSJohannes Doerfert CGSCCAnalysisManager &AM, 2672b2ad63d3SJoseph Huber LazyCallGraph &CG, 2673b2ad63d3SJoseph Huber CGSCCUpdateResult &UR) { 26749548b74aSJohannes Doerfert if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 26759548b74aSJohannes Doerfert return PreservedAnalyses::all(); 26769548b74aSJohannes Doerfert 26779548b74aSJohannes Doerfert if (DisableOpenMPOptimizations) 26789548b74aSJohannes Doerfert return PreservedAnalyses::all(); 26799548b74aSJohannes Doerfert 2680ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 2681351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 2682351d234dSRoman Lebedev bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 2683351d234dSRoman Lebedev for (LazyCallGraph::Node &N : C) { 2684351d234dSRoman Lebedev Function *Fn = &N.getFunction(); 2685351d234dSRoman Lebedev SCC.push_back(Fn); 26869548b74aSJohannes Doerfert 2687351d234dSRoman Lebedev // Do we already know that the SCC contains kernels, 2688351d234dSRoman Lebedev // or that OpenMP functions are called from this SCC? 2689351d234dSRoman Lebedev if (SCCIsInteresting) 2690351d234dSRoman Lebedev continue; 2691351d234dSRoman Lebedev // If not, let's check that. 2692351d234dSRoman Lebedev SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 2693351d234dSRoman Lebedev } 2694351d234dSRoman Lebedev 2695351d234dSRoman Lebedev if (!SCCIsInteresting || SCC.empty()) 26969548b74aSJohannes Doerfert return PreservedAnalyses::all(); 26979548b74aSJohannes Doerfert 26984d4ea9acSHuber, Joseph FunctionAnalysisManager &FAM = 26994d4ea9acSHuber, Joseph AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 27007cfd267cSsstefan1 27017cfd267cSsstefan1 AnalysisGetter AG(FAM); 27027cfd267cSsstefan1 27037cfd267cSsstefan1 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 27044d4ea9acSHuber, Joseph return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 27054d4ea9acSHuber, Joseph }; 27064d4ea9acSHuber, Joseph 2707b2ad63d3SJoseph Huber BumpPtrAllocator Allocator; 27089548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 27099548b74aSJohannes Doerfert CGUpdater.initialize(CG, C, AM, UR); 27107cfd267cSsstefan1 27117cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 27127cfd267cSsstefan1 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 2713624d34afSJohannes Doerfert /*CGSCC*/ Functions, OMPInModule.getKernels()); 27147cfd267cSsstefan1 27158b57ed09SJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); 2716b8235d2bSsstefan1 2717b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2718b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(false); 2719694ded37SGiorgis Georgakoudis if (Changed) 2720694ded37SGiorgis Georgakoudis return PreservedAnalyses::none(); 2721694ded37SGiorgis Georgakoudis 27229548b74aSJohannes Doerfert return PreservedAnalyses::all(); 27239548b74aSJohannes Doerfert } 27248b57ed09SJoseph Huber 27259548b74aSJohannes Doerfert namespace { 27269548b74aSJohannes Doerfert 2727b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { 27289548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 27299548b74aSJohannes Doerfert OpenMPInModule OMPInModule; 27309548b74aSJohannes Doerfert static char ID; 27319548b74aSJohannes Doerfert 2732b2ad63d3SJoseph Huber OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { 2733b2ad63d3SJoseph Huber initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); 27349548b74aSJohannes Doerfert } 27359548b74aSJohannes Doerfert 27369548b74aSJohannes Doerfert void getAnalysisUsage(AnalysisUsage &AU) const override { 27379548b74aSJohannes Doerfert CallGraphSCCPass::getAnalysisUsage(AU); 27389548b74aSJohannes Doerfert } 27399548b74aSJohannes Doerfert 27409548b74aSJohannes Doerfert bool doInitialization(CallGraph &CG) override { 27419548b74aSJohannes Doerfert // Disable the pass if there is no OpenMP (runtime call) in the module. 27429548b74aSJohannes Doerfert containsOpenMP(CG.getModule(), OMPInModule); 27439548b74aSJohannes Doerfert return false; 27449548b74aSJohannes Doerfert } 27459548b74aSJohannes Doerfert 27469548b74aSJohannes Doerfert bool runOnSCC(CallGraphSCC &CGSCC) override { 27479548b74aSJohannes Doerfert if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 27489548b74aSJohannes Doerfert return false; 27499548b74aSJohannes Doerfert if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 27509548b74aSJohannes Doerfert return false; 27519548b74aSJohannes Doerfert 2752ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 2753351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 2754351d234dSRoman Lebedev bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 2755351d234dSRoman Lebedev for (CallGraphNode *CGN : CGSCC) { 2756351d234dSRoman Lebedev Function *Fn = CGN->getFunction(); 2757351d234dSRoman Lebedev if (!Fn || Fn->isDeclaration()) 2758351d234dSRoman Lebedev continue; 2759ee17263aSJohannes Doerfert SCC.push_back(Fn); 27609548b74aSJohannes Doerfert 2761351d234dSRoman Lebedev // Do we already know that the SCC contains kernels, 2762351d234dSRoman Lebedev // or that OpenMP functions are called from this SCC? 2763351d234dSRoman Lebedev if (SCCIsInteresting) 2764351d234dSRoman Lebedev continue; 2765351d234dSRoman Lebedev // If not, let's check that. 2766351d234dSRoman Lebedev SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 2767351d234dSRoman Lebedev } 2768351d234dSRoman Lebedev 2769351d234dSRoman Lebedev if (!SCCIsInteresting || SCC.empty()) 27709548b74aSJohannes Doerfert return false; 27719548b74aSJohannes Doerfert 27729548b74aSJohannes Doerfert CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 27739548b74aSJohannes Doerfert CGUpdater.initialize(CG, CGSCC); 27749548b74aSJohannes Doerfert 27754d4ea9acSHuber, Joseph // Maintain a map of functions to avoid rebuilding the ORE 27764d4ea9acSHuber, Joseph DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 27774d4ea9acSHuber, Joseph auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 27784d4ea9acSHuber, Joseph std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 27794d4ea9acSHuber, Joseph if (!ORE) 27804d4ea9acSHuber, Joseph ORE = std::make_unique<OptimizationRemarkEmitter>(F); 27814d4ea9acSHuber, Joseph return *ORE; 27824d4ea9acSHuber, Joseph }; 27834d4ea9acSHuber, Joseph 27847cfd267cSsstefan1 AnalysisGetter AG; 27857cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 27867cfd267cSsstefan1 BumpPtrAllocator Allocator; 2787e8039ad4SJohannes Doerfert OMPInformationCache InfoCache( 2788e8039ad4SJohannes Doerfert *(Functions.back()->getParent()), AG, Allocator, 2789624d34afSJohannes Doerfert /*CGSCC*/ Functions, OMPInModule.getKernels()); 27907cfd267cSsstefan1 27918b57ed09SJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, false); 2792b8235d2bSsstefan1 2793b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2794b2ad63d3SJoseph Huber return OMPOpt.run(false); 27959548b74aSJohannes Doerfert } 27969548b74aSJohannes Doerfert 27979548b74aSJohannes Doerfert bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 27989548b74aSJohannes Doerfert }; 27999548b74aSJohannes Doerfert 28009548b74aSJohannes Doerfert } // end anonymous namespace 28019548b74aSJohannes Doerfert 2802e8039ad4SJohannes Doerfert void OpenMPInModule::identifyKernels(Module &M) { 2803e8039ad4SJohannes Doerfert 2804e8039ad4SJohannes Doerfert NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); 2805e8039ad4SJohannes Doerfert if (!MD) 2806e8039ad4SJohannes Doerfert return; 2807e8039ad4SJohannes Doerfert 2808e8039ad4SJohannes Doerfert for (auto *Op : MD->operands()) { 2809e8039ad4SJohannes Doerfert if (Op->getNumOperands() < 2) 2810e8039ad4SJohannes Doerfert continue; 2811e8039ad4SJohannes Doerfert MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 2812e8039ad4SJohannes Doerfert if (!KindID || KindID->getString() != "kernel") 2813e8039ad4SJohannes Doerfert continue; 2814e8039ad4SJohannes Doerfert 2815e8039ad4SJohannes Doerfert Function *KernelFn = 2816e8039ad4SJohannes Doerfert mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 2817e8039ad4SJohannes Doerfert if (!KernelFn) 2818e8039ad4SJohannes Doerfert continue; 2819e8039ad4SJohannes Doerfert 2820e8039ad4SJohannes Doerfert ++NumOpenMPTargetRegionKernels; 2821e8039ad4SJohannes Doerfert 2822e8039ad4SJohannes Doerfert Kernels.insert(KernelFn); 2823e8039ad4SJohannes Doerfert } 2824e8039ad4SJohannes Doerfert } 2825e8039ad4SJohannes Doerfert 28269548b74aSJohannes Doerfert bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 28279548b74aSJohannes Doerfert if (OMPInModule.isKnown()) 28289548b74aSJohannes Doerfert return OMPInModule; 2829dce6bc18SJohannes Doerfert 2830351d234dSRoman Lebedev auto RecordFunctionsContainingUsesOf = [&](Function *F) { 2831351d234dSRoman Lebedev for (User *U : F->users()) 2832351d234dSRoman Lebedev if (auto *I = dyn_cast<Instruction>(U)) 2833351d234dSRoman Lebedev OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction()); 2834351d234dSRoman Lebedev }; 2835351d234dSRoman Lebedev 2836dce6bc18SJohannes Doerfert // MSVC doesn't like long if-else chains for some reason and instead just 2837dce6bc18SJohannes Doerfert // issues an error. Work around it.. 2838dce6bc18SJohannes Doerfert do { 28399548b74aSJohannes Doerfert #define OMP_RTL(_Enum, _Name, ...) \ 2840351d234dSRoman Lebedev if (Function *F = M.getFunction(_Name)) { \ 2841351d234dSRoman Lebedev RecordFunctionsContainingUsesOf(F); \ 2842dce6bc18SJohannes Doerfert OMPInModule = true; \ 2843dce6bc18SJohannes Doerfert } 28449548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPKinds.def" 2845dce6bc18SJohannes Doerfert } while (false); 2846e8039ad4SJohannes Doerfert 2847e8039ad4SJohannes Doerfert // Identify kernels once. TODO: We should split the OMPInformationCache into a 2848e8039ad4SJohannes Doerfert // module and an SCC part. The kernel information, among other things, could 2849e8039ad4SJohannes Doerfert // go into the module part. 2850e8039ad4SJohannes Doerfert if (OMPInModule.isKnown() && OMPInModule) { 2851e8039ad4SJohannes Doerfert OMPInModule.identifyKernels(M); 2852e8039ad4SJohannes Doerfert return true; 2853e8039ad4SJohannes Doerfert } 2854e8039ad4SJohannes Doerfert 28559548b74aSJohannes Doerfert return OMPInModule = false; 28569548b74aSJohannes Doerfert } 28579548b74aSJohannes Doerfert 2858b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0; 28599548b74aSJohannes Doerfert 2860b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", 28619548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 28629548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 2863b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", 28649548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 28659548b74aSJohannes Doerfert 2866b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() { 2867b2ad63d3SJoseph Huber return new OpenMPOptCGSCCLegacyPass(); 2868b2ad63d3SJoseph Huber } 2869