19548b74aSJohannes Doerfert //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 29548b74aSJohannes Doerfert // 39548b74aSJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49548b74aSJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information. 59548b74aSJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69548b74aSJohannes Doerfert // 79548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 89548b74aSJohannes Doerfert // 99548b74aSJohannes Doerfert // OpenMP specific optimizations: 109548b74aSJohannes Doerfert // 119548b74aSJohannes Doerfert // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12ca1560daSJoseph Huber // - Replacing globalized device memory with stack memory. 13ca1560daSJoseph Huber // - Replacing globalized device memory with shared memory. 149548b74aSJohannes Doerfert // 159548b74aSJohannes Doerfert //===----------------------------------------------------------------------===// 169548b74aSJohannes Doerfert 179548b74aSJohannes Doerfert #include "llvm/Transforms/IPO/OpenMPOpt.h" 189548b74aSJohannes Doerfert 199548b74aSJohannes Doerfert #include "llvm/ADT/EnumeratedArray.h" 2018283125SJoseph Huber #include "llvm/ADT/PostOrderIterator.h" 219548b74aSJohannes Doerfert #include "llvm/ADT/Statistic.h" 229548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraph.h" 239548b74aSJohannes Doerfert #include "llvm/Analysis/CallGraphSCCPass.h" 244d4ea9acSHuber, Joseph #include "llvm/Analysis/OptimizationRemarkEmitter.h" 253a6bfcf2SGiorgis Georgakoudis #include "llvm/Analysis/ValueTracking.h" 269548b74aSJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPConstants.h" 27e28936f6SJohannes Doerfert #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 2868abc3d2SJoseph Huber #include "llvm/IR/IntrinsicInst.h" 299548b74aSJohannes Doerfert #include "llvm/InitializePasses.h" 309548b74aSJohannes Doerfert #include "llvm/Support/CommandLine.h" 319548b74aSJohannes Doerfert #include "llvm/Transforms/IPO.h" 327cfd267cSsstefan1 #include "llvm/Transforms/IPO/Attributor.h" 333a6bfcf2SGiorgis Georgakoudis #include "llvm/Transforms/Utils/BasicBlockUtils.h" 349548b74aSJohannes Doerfert #include "llvm/Transforms/Utils/CallGraphUpdater.h" 3597517055SGiorgis Georgakoudis #include "llvm/Transforms/Utils/CodeExtractor.h" 369548b74aSJohannes Doerfert 379548b74aSJohannes Doerfert using namespace llvm; 389548b74aSJohannes Doerfert using namespace omp; 399548b74aSJohannes Doerfert 409548b74aSJohannes Doerfert #define DEBUG_TYPE "openmp-opt" 419548b74aSJohannes Doerfert 429548b74aSJohannes Doerfert static cl::opt<bool> DisableOpenMPOptimizations( 439548b74aSJohannes Doerfert "openmp-opt-disable", cl::ZeroOrMore, 449548b74aSJohannes Doerfert cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 459548b74aSJohannes Doerfert cl::init(false)); 469548b74aSJohannes Doerfert 473a6bfcf2SGiorgis Georgakoudis static cl::opt<bool> EnableParallelRegionMerging( 483a6bfcf2SGiorgis Georgakoudis "openmp-opt-enable-merging", cl::ZeroOrMore, 493a6bfcf2SGiorgis Georgakoudis cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, 503a6bfcf2SGiorgis Georgakoudis cl::init(false)); 513a6bfcf2SGiorgis Georgakoudis 520f426935Ssstefan1 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 530f426935Ssstefan1 cl::Hidden); 54e8039ad4SJohannes Doerfert static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 55e8039ad4SJohannes Doerfert cl::init(false), cl::Hidden); 560f426935Ssstefan1 57496f8e5bSHamilton Tobon Mosquera static cl::opt<bool> HideMemoryTransferLatency( 58496f8e5bSHamilton Tobon Mosquera "openmp-hide-memory-transfer-latency", 59496f8e5bSHamilton Tobon Mosquera cl::desc("[WIP] Tries to hide the latency of host to device memory" 60496f8e5bSHamilton Tobon Mosquera " transfers"), 61496f8e5bSHamilton Tobon Mosquera cl::Hidden, cl::init(false)); 62496f8e5bSHamilton Tobon Mosquera 639548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 649548b74aSJohannes Doerfert "Number of OpenMP runtime calls deduplicated"); 6555eb714aSRoman Lebedev STATISTIC(NumOpenMPParallelRegionsDeleted, 6655eb714aSRoman Lebedev "Number of OpenMP parallel regions deleted"); 679548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 689548b74aSJohannes Doerfert "Number of OpenMP runtime functions identified"); 699548b74aSJohannes Doerfert STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 709548b74aSJohannes Doerfert "Number of OpenMP runtime function uses identified"); 71e8039ad4SJohannes Doerfert STATISTIC(NumOpenMPTargetRegionKernels, 72e8039ad4SJohannes Doerfert "Number of OpenMP target region entry points (=kernels) identified"); 735b0581aeSJohannes Doerfert STATISTIC( 745b0581aeSJohannes Doerfert NumOpenMPParallelRegionsReplacedInGPUStateMachine, 755b0581aeSJohannes Doerfert "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 763a6bfcf2SGiorgis Georgakoudis STATISTIC(NumOpenMPParallelRegionsMerged, 773a6bfcf2SGiorgis Georgakoudis "Number of OpenMP parallel regions merged"); 786fc51c9fSJoseph Huber STATISTIC(NumBytesMovedToSharedMemory, 796fc51c9fSJoseph Huber "Amount of memory pushed to shared memory"); 809548b74aSJohannes Doerfert 81263c4a3cSrathod-sahaab #if !defined(NDEBUG) 829548b74aSJohannes Doerfert static constexpr auto TAG = "[" DEBUG_TYPE "]"; 83a50c0b0dSMikael Holmen #endif 849548b74aSJohannes Doerfert 859548b74aSJohannes Doerfert namespace { 869548b74aSJohannes Doerfert 876fc51c9fSJoseph Huber enum class AddressSpace : unsigned { 886fc51c9fSJoseph Huber Generic = 0, 896fc51c9fSJoseph Huber Global = 1, 906fc51c9fSJoseph Huber Shared = 3, 916fc51c9fSJoseph Huber Constant = 4, 926fc51c9fSJoseph Huber Local = 5, 936fc51c9fSJoseph Huber }; 946fc51c9fSJoseph Huber 956fc51c9fSJoseph Huber struct AAHeapToShared; 966fc51c9fSJoseph Huber 97b8235d2bSsstefan1 struct AAICVTracker; 98b8235d2bSsstefan1 997cfd267cSsstefan1 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 1007cfd267cSsstefan1 /// Attributor runs. 1017cfd267cSsstefan1 struct OMPInformationCache : public InformationCache { 1027cfd267cSsstefan1 OMPInformationCache(Module &M, AnalysisGetter &AG, 103624d34afSJohannes Doerfert BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, 104e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels) 105624d34afSJohannes Doerfert : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), 106624d34afSJohannes Doerfert Kernels(Kernels) { 107624d34afSJohannes Doerfert 10861238d26Ssstefan1 OMPBuilder.initialize(); 1099548b74aSJohannes Doerfert initializeRuntimeFunctions(); 1100f426935Ssstefan1 initializeInternalControlVars(); 1119548b74aSJohannes Doerfert } 1129548b74aSJohannes Doerfert 1130f426935Ssstefan1 /// Generic information that describes an internal control variable. 1140f426935Ssstefan1 struct InternalControlVarInfo { 1150f426935Ssstefan1 /// The kind, as described by InternalControlVar enum. 1160f426935Ssstefan1 InternalControlVar Kind; 1170f426935Ssstefan1 1180f426935Ssstefan1 /// The name of the ICV. 1190f426935Ssstefan1 StringRef Name; 1200f426935Ssstefan1 1210f426935Ssstefan1 /// Environment variable associated with this ICV. 1220f426935Ssstefan1 StringRef EnvVarName; 1230f426935Ssstefan1 1240f426935Ssstefan1 /// Initial value kind. 1250f426935Ssstefan1 ICVInitValue InitKind; 1260f426935Ssstefan1 1270f426935Ssstefan1 /// Initial value. 1280f426935Ssstefan1 ConstantInt *InitValue; 1290f426935Ssstefan1 1300f426935Ssstefan1 /// Setter RTL function associated with this ICV. 1310f426935Ssstefan1 RuntimeFunction Setter; 1320f426935Ssstefan1 1330f426935Ssstefan1 /// Getter RTL function associated with this ICV. 1340f426935Ssstefan1 RuntimeFunction Getter; 1350f426935Ssstefan1 1360f426935Ssstefan1 /// RTL Function corresponding to the override clause of this ICV 1370f426935Ssstefan1 RuntimeFunction Clause; 1380f426935Ssstefan1 }; 1390f426935Ssstefan1 1409548b74aSJohannes Doerfert /// Generic information that describes a runtime function 1419548b74aSJohannes Doerfert struct RuntimeFunctionInfo { 1428855fec3SJohannes Doerfert 1439548b74aSJohannes Doerfert /// The kind, as described by the RuntimeFunction enum. 1449548b74aSJohannes Doerfert RuntimeFunction Kind; 1459548b74aSJohannes Doerfert 1469548b74aSJohannes Doerfert /// The name of the function. 1479548b74aSJohannes Doerfert StringRef Name; 1489548b74aSJohannes Doerfert 1499548b74aSJohannes Doerfert /// Flag to indicate a variadic function. 1509548b74aSJohannes Doerfert bool IsVarArg; 1519548b74aSJohannes Doerfert 1529548b74aSJohannes Doerfert /// The return type of the function. 1539548b74aSJohannes Doerfert Type *ReturnType; 1549548b74aSJohannes Doerfert 1559548b74aSJohannes Doerfert /// The argument types of the function. 1569548b74aSJohannes Doerfert SmallVector<Type *, 8> ArgumentTypes; 1579548b74aSJohannes Doerfert 1589548b74aSJohannes Doerfert /// The declaration if available. 159f09f4b26SJohannes Doerfert Function *Declaration = nullptr; 1609548b74aSJohannes Doerfert 1619548b74aSJohannes Doerfert /// Uses of this runtime function per function containing the use. 1628855fec3SJohannes Doerfert using UseVector = SmallVector<Use *, 16>; 1638855fec3SJohannes Doerfert 164b8235d2bSsstefan1 /// Clear UsesMap for runtime function. 165b8235d2bSsstefan1 void clearUsesMap() { UsesMap.clear(); } 166b8235d2bSsstefan1 16754bd3751SJohannes Doerfert /// Boolean conversion that is true if the runtime function was found. 16854bd3751SJohannes Doerfert operator bool() const { return Declaration; } 16954bd3751SJohannes Doerfert 1708855fec3SJohannes Doerfert /// Return the vector of uses in function \p F. 1718855fec3SJohannes Doerfert UseVector &getOrCreateUseVector(Function *F) { 172b8235d2bSsstefan1 std::shared_ptr<UseVector> &UV = UsesMap[F]; 1738855fec3SJohannes Doerfert if (!UV) 174b8235d2bSsstefan1 UV = std::make_shared<UseVector>(); 1758855fec3SJohannes Doerfert return *UV; 1768855fec3SJohannes Doerfert } 1778855fec3SJohannes Doerfert 1788855fec3SJohannes Doerfert /// Return the vector of uses in function \p F or `nullptr` if there are 1798855fec3SJohannes Doerfert /// none. 1808855fec3SJohannes Doerfert const UseVector *getUseVector(Function &F) const { 18195e57072SDavid Blaikie auto I = UsesMap.find(&F); 18295e57072SDavid Blaikie if (I != UsesMap.end()) 18395e57072SDavid Blaikie return I->second.get(); 18495e57072SDavid Blaikie return nullptr; 1858855fec3SJohannes Doerfert } 1868855fec3SJohannes Doerfert 1878855fec3SJohannes Doerfert /// Return how many functions contain uses of this runtime function. 1888855fec3SJohannes Doerfert size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 1899548b74aSJohannes Doerfert 1909548b74aSJohannes Doerfert /// Return the number of arguments (or the minimal number for variadic 1919548b74aSJohannes Doerfert /// functions). 1929548b74aSJohannes Doerfert size_t getNumArgs() const { return ArgumentTypes.size(); } 1939548b74aSJohannes Doerfert 1949548b74aSJohannes Doerfert /// Run the callback \p CB on each use and forget the use if the result is 1959548b74aSJohannes Doerfert /// true. The callback will be fed the function in which the use was 1969548b74aSJohannes Doerfert /// encountered as second argument. 197624d34afSJohannes Doerfert void foreachUse(SmallVectorImpl<Function *> &SCC, 198624d34afSJohannes Doerfert function_ref<bool(Use &, Function &)> CB) { 199624d34afSJohannes Doerfert for (Function *F : SCC) 200624d34afSJohannes Doerfert foreachUse(CB, F); 201e099c7b6Ssstefan1 } 202e099c7b6Ssstefan1 203e099c7b6Ssstefan1 /// Run the callback \p CB on each use within the function \p F and forget 204e099c7b6Ssstefan1 /// the use if the result is true. 205624d34afSJohannes Doerfert void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 2068855fec3SJohannes Doerfert SmallVector<unsigned, 8> ToBeDeleted; 2079548b74aSJohannes Doerfert ToBeDeleted.clear(); 208e099c7b6Ssstefan1 2098855fec3SJohannes Doerfert unsigned Idx = 0; 210624d34afSJohannes Doerfert UseVector &UV = getOrCreateUseVector(F); 211e099c7b6Ssstefan1 2128855fec3SJohannes Doerfert for (Use *U : UV) { 213e099c7b6Ssstefan1 if (CB(*U, *F)) 2148855fec3SJohannes Doerfert ToBeDeleted.push_back(Idx); 2158855fec3SJohannes Doerfert ++Idx; 2168855fec3SJohannes Doerfert } 2178855fec3SJohannes Doerfert 2188855fec3SJohannes Doerfert // Remove the to-be-deleted indices in reverse order as prior 219b726c557SJohannes Doerfert // modifications will not modify the smaller indices. 2208855fec3SJohannes Doerfert while (!ToBeDeleted.empty()) { 2218855fec3SJohannes Doerfert unsigned Idx = ToBeDeleted.pop_back_val(); 2228855fec3SJohannes Doerfert UV[Idx] = UV.back(); 2238855fec3SJohannes Doerfert UV.pop_back(); 2249548b74aSJohannes Doerfert } 2259548b74aSJohannes Doerfert } 2268855fec3SJohannes Doerfert 2278855fec3SJohannes Doerfert private: 2288855fec3SJohannes Doerfert /// Map from functions to all uses of this runtime function contained in 2298855fec3SJohannes Doerfert /// them. 230b8235d2bSsstefan1 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 2319548b74aSJohannes Doerfert }; 2329548b74aSJohannes Doerfert 2337cfd267cSsstefan1 /// An OpenMP-IR-Builder instance 2347cfd267cSsstefan1 OpenMPIRBuilder OMPBuilder; 2357cfd267cSsstefan1 2367cfd267cSsstefan1 /// Map from runtime function kind to the runtime function description. 2377cfd267cSsstefan1 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 2387cfd267cSsstefan1 RuntimeFunction::OMPRTL___last> 2397cfd267cSsstefan1 RFIs; 2407cfd267cSsstefan1 2410f426935Ssstefan1 /// Map from ICV kind to the ICV description. 2420f426935Ssstefan1 EnumeratedArray<InternalControlVarInfo, InternalControlVar, 2430f426935Ssstefan1 InternalControlVar::ICV___last> 2440f426935Ssstefan1 ICVs; 2450f426935Ssstefan1 2460f426935Ssstefan1 /// Helper to initialize all internal control variable information for those 2470f426935Ssstefan1 /// defined in OMPKinds.def. 2480f426935Ssstefan1 void initializeInternalControlVars() { 2490f426935Ssstefan1 #define ICV_RT_SET(_Name, RTL) \ 2500f426935Ssstefan1 { \ 2510f426935Ssstefan1 auto &ICV = ICVs[_Name]; \ 2520f426935Ssstefan1 ICV.Setter = RTL; \ 2530f426935Ssstefan1 } 2540f426935Ssstefan1 #define ICV_RT_GET(Name, RTL) \ 2550f426935Ssstefan1 { \ 2560f426935Ssstefan1 auto &ICV = ICVs[Name]; \ 2570f426935Ssstefan1 ICV.Getter = RTL; \ 2580f426935Ssstefan1 } 2590f426935Ssstefan1 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 2600f426935Ssstefan1 { \ 2610f426935Ssstefan1 auto &ICV = ICVs[Enum]; \ 2620f426935Ssstefan1 ICV.Name = _Name; \ 2630f426935Ssstefan1 ICV.Kind = Enum; \ 2640f426935Ssstefan1 ICV.InitKind = Init; \ 2650f426935Ssstefan1 ICV.EnvVarName = _EnvVarName; \ 2660f426935Ssstefan1 switch (ICV.InitKind) { \ 267951e43f3Ssstefan1 case ICV_IMPLEMENTATION_DEFINED: \ 2680f426935Ssstefan1 ICV.InitValue = nullptr; \ 2690f426935Ssstefan1 break; \ 270951e43f3Ssstefan1 case ICV_ZERO: \ 2716aab27baSsstefan1 ICV.InitValue = ConstantInt::get( \ 2726aab27baSsstefan1 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 2730f426935Ssstefan1 break; \ 274951e43f3Ssstefan1 case ICV_FALSE: \ 2756aab27baSsstefan1 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 2760f426935Ssstefan1 break; \ 277951e43f3Ssstefan1 case ICV_LAST: \ 2780f426935Ssstefan1 break; \ 2790f426935Ssstefan1 } \ 2800f426935Ssstefan1 } 2810f426935Ssstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2820f426935Ssstefan1 } 2830f426935Ssstefan1 2847cfd267cSsstefan1 /// Returns true if the function declaration \p F matches the runtime 2857cfd267cSsstefan1 /// function types, that is, return type \p RTFRetType, and argument types 2867cfd267cSsstefan1 /// \p RTFArgTypes. 2877cfd267cSsstefan1 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 2887cfd267cSsstefan1 SmallVector<Type *, 8> &RTFArgTypes) { 2897cfd267cSsstefan1 // TODO: We should output information to the user (under debug output 2907cfd267cSsstefan1 // and via remarks). 2917cfd267cSsstefan1 2927cfd267cSsstefan1 if (!F) 2937cfd267cSsstefan1 return false; 2947cfd267cSsstefan1 if (F->getReturnType() != RTFRetType) 2957cfd267cSsstefan1 return false; 2967cfd267cSsstefan1 if (F->arg_size() != RTFArgTypes.size()) 2977cfd267cSsstefan1 return false; 2987cfd267cSsstefan1 2997cfd267cSsstefan1 auto RTFTyIt = RTFArgTypes.begin(); 3007cfd267cSsstefan1 for (Argument &Arg : F->args()) { 3017cfd267cSsstefan1 if (Arg.getType() != *RTFTyIt) 3027cfd267cSsstefan1 return false; 3037cfd267cSsstefan1 3047cfd267cSsstefan1 ++RTFTyIt; 3057cfd267cSsstefan1 } 3067cfd267cSsstefan1 3077cfd267cSsstefan1 return true; 3087cfd267cSsstefan1 } 3097cfd267cSsstefan1 310b726c557SJohannes Doerfert // Helper to collect all uses of the declaration in the UsesMap. 311b8235d2bSsstefan1 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 3127cfd267cSsstefan1 unsigned NumUses = 0; 3137cfd267cSsstefan1 if (!RFI.Declaration) 3147cfd267cSsstefan1 return NumUses; 3157cfd267cSsstefan1 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 3167cfd267cSsstefan1 317b8235d2bSsstefan1 if (CollectStats) { 3187cfd267cSsstefan1 NumOpenMPRuntimeFunctionsIdentified += 1; 3197cfd267cSsstefan1 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 320b8235d2bSsstefan1 } 3217cfd267cSsstefan1 3227cfd267cSsstefan1 // TODO: We directly convert uses into proper calls and unknown uses. 3237cfd267cSsstefan1 for (Use &U : RFI.Declaration->uses()) { 3247cfd267cSsstefan1 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 3257cfd267cSsstefan1 if (ModuleSlice.count(UserI->getFunction())) { 3267cfd267cSsstefan1 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 3277cfd267cSsstefan1 ++NumUses; 3287cfd267cSsstefan1 } 3297cfd267cSsstefan1 } else { 3307cfd267cSsstefan1 RFI.getOrCreateUseVector(nullptr).push_back(&U); 3317cfd267cSsstefan1 ++NumUses; 3327cfd267cSsstefan1 } 3337cfd267cSsstefan1 } 3347cfd267cSsstefan1 return NumUses; 335b8235d2bSsstefan1 } 3367cfd267cSsstefan1 33797517055SGiorgis Georgakoudis // Helper function to recollect uses of a runtime function. 33897517055SGiorgis Georgakoudis void recollectUsesForFunction(RuntimeFunction RTF) { 33997517055SGiorgis Georgakoudis auto &RFI = RFIs[RTF]; 340b8235d2bSsstefan1 RFI.clearUsesMap(); 341b8235d2bSsstefan1 collectUses(RFI, /*CollectStats*/ false); 342b8235d2bSsstefan1 } 34397517055SGiorgis Georgakoudis 34497517055SGiorgis Georgakoudis // Helper function to recollect uses of all runtime functions. 34597517055SGiorgis Georgakoudis void recollectUses() { 34697517055SGiorgis Georgakoudis for (int Idx = 0; Idx < RFIs.size(); ++Idx) 34797517055SGiorgis Georgakoudis recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); 348b8235d2bSsstefan1 } 349b8235d2bSsstefan1 350b8235d2bSsstefan1 /// Helper to initialize all runtime function information for those defined 351b8235d2bSsstefan1 /// in OpenMPKinds.def. 352b8235d2bSsstefan1 void initializeRuntimeFunctions() { 3537cfd267cSsstefan1 Module &M = *((*ModuleSlice.begin())->getParent()); 3547cfd267cSsstefan1 3556aab27baSsstefan1 // Helper macros for handling __VA_ARGS__ in OMP_RTL 3566aab27baSsstefan1 #define OMP_TYPE(VarName, ...) \ 3576aab27baSsstefan1 Type *VarName = OMPBuilder.VarName; \ 3586aab27baSsstefan1 (void)VarName; 3596aab27baSsstefan1 3606aab27baSsstefan1 #define OMP_ARRAY_TYPE(VarName, ...) \ 3616aab27baSsstefan1 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 3626aab27baSsstefan1 (void)VarName##Ty; \ 3636aab27baSsstefan1 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 3646aab27baSsstefan1 (void)VarName##PtrTy; 3656aab27baSsstefan1 3666aab27baSsstefan1 #define OMP_FUNCTION_TYPE(VarName, ...) \ 3676aab27baSsstefan1 FunctionType *VarName = OMPBuilder.VarName; \ 3686aab27baSsstefan1 (void)VarName; \ 3696aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3706aab27baSsstefan1 (void)VarName##Ptr; 3716aab27baSsstefan1 3726aab27baSsstefan1 #define OMP_STRUCT_TYPE(VarName, ...) \ 3736aab27baSsstefan1 StructType *VarName = OMPBuilder.VarName; \ 3746aab27baSsstefan1 (void)VarName; \ 3756aab27baSsstefan1 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 3766aab27baSsstefan1 (void)VarName##Ptr; 3776aab27baSsstefan1 3787cfd267cSsstefan1 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 3797cfd267cSsstefan1 { \ 3807cfd267cSsstefan1 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 3817cfd267cSsstefan1 Function *F = M.getFunction(_Name); \ 3826aab27baSsstefan1 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 3837cfd267cSsstefan1 auto &RFI = RFIs[_Enum]; \ 3847cfd267cSsstefan1 RFI.Kind = _Enum; \ 3857cfd267cSsstefan1 RFI.Name = _Name; \ 3867cfd267cSsstefan1 RFI.IsVarArg = _IsVarArg; \ 3876aab27baSsstefan1 RFI.ReturnType = OMPBuilder._ReturnType; \ 3887cfd267cSsstefan1 RFI.ArgumentTypes = std::move(ArgsTypes); \ 3897cfd267cSsstefan1 RFI.Declaration = F; \ 390b8235d2bSsstefan1 unsigned NumUses = collectUses(RFI); \ 3917cfd267cSsstefan1 (void)NumUses; \ 3927cfd267cSsstefan1 LLVM_DEBUG({ \ 3937cfd267cSsstefan1 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 3947cfd267cSsstefan1 << " found\n"; \ 3957cfd267cSsstefan1 if (RFI.Declaration) \ 3967cfd267cSsstefan1 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 3977cfd267cSsstefan1 << RFI.getNumFunctionsWithUses() \ 3987cfd267cSsstefan1 << " different functions.\n"; \ 3997cfd267cSsstefan1 }); \ 4007cfd267cSsstefan1 } \ 4017cfd267cSsstefan1 } 4027cfd267cSsstefan1 #include "llvm/Frontend/OpenMP/OMPKinds.def" 4037cfd267cSsstefan1 4047cfd267cSsstefan1 // TODO: We should attach the attributes defined in OMPKinds.def. 4057cfd267cSsstefan1 } 406e8039ad4SJohannes Doerfert 407e8039ad4SJohannes Doerfert /// Collection of known kernels (\see Kernel) in the module. 408e8039ad4SJohannes Doerfert SmallPtrSetImpl<Kernel> &Kernels; 4097cfd267cSsstefan1 }; 4107cfd267cSsstefan1 4118931add6SHamilton Tobon Mosquera /// Used to map the values physically (in the IR) stored in an offload 4128931add6SHamilton Tobon Mosquera /// array, to a vector in memory. 4138931add6SHamilton Tobon Mosquera struct OffloadArray { 4148931add6SHamilton Tobon Mosquera /// Physical array (in the IR). 4158931add6SHamilton Tobon Mosquera AllocaInst *Array = nullptr; 4168931add6SHamilton Tobon Mosquera /// Mapped values. 4178931add6SHamilton Tobon Mosquera SmallVector<Value *, 8> StoredValues; 4188931add6SHamilton Tobon Mosquera /// Last stores made in the offload array. 4198931add6SHamilton Tobon Mosquera SmallVector<StoreInst *, 8> LastAccesses; 4208931add6SHamilton Tobon Mosquera 4218931add6SHamilton Tobon Mosquera OffloadArray() = default; 4228931add6SHamilton Tobon Mosquera 4238931add6SHamilton Tobon Mosquera /// Initializes the OffloadArray with the values stored in \p Array before 4248931add6SHamilton Tobon Mosquera /// instruction \p Before is reached. Returns false if the initialization 4258931add6SHamilton Tobon Mosquera /// fails. 4268931add6SHamilton Tobon Mosquera /// This MUST be used immediately after the construction of the object. 4278931add6SHamilton Tobon Mosquera bool initialize(AllocaInst &Array, Instruction &Before) { 4288931add6SHamilton Tobon Mosquera if (!Array.getAllocatedType()->isArrayTy()) 4298931add6SHamilton Tobon Mosquera return false; 4308931add6SHamilton Tobon Mosquera 4318931add6SHamilton Tobon Mosquera if (!getValues(Array, Before)) 4328931add6SHamilton Tobon Mosquera return false; 4338931add6SHamilton Tobon Mosquera 4348931add6SHamilton Tobon Mosquera this->Array = &Array; 4358931add6SHamilton Tobon Mosquera return true; 4368931add6SHamilton Tobon Mosquera } 4378931add6SHamilton Tobon Mosquera 438da8bec47SJoseph Huber static const unsigned DeviceIDArgNum = 1; 439da8bec47SJoseph Huber static const unsigned BasePtrsArgNum = 3; 440da8bec47SJoseph Huber static const unsigned PtrsArgNum = 4; 441da8bec47SJoseph Huber static const unsigned SizesArgNum = 5; 4421d3d9b9cSHamilton Tobon Mosquera 4438931add6SHamilton Tobon Mosquera private: 4448931add6SHamilton Tobon Mosquera /// Traverses the BasicBlock where \p Array is, collecting the stores made to 4458931add6SHamilton Tobon Mosquera /// \p Array, leaving StoredValues with the values stored before the 4468931add6SHamilton Tobon Mosquera /// instruction \p Before is reached. 4478931add6SHamilton Tobon Mosquera bool getValues(AllocaInst &Array, Instruction &Before) { 4488931add6SHamilton Tobon Mosquera // Initialize container. 449d08d490aSJohannes Doerfert const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); 4508931add6SHamilton Tobon Mosquera StoredValues.assign(NumValues, nullptr); 4518931add6SHamilton Tobon Mosquera LastAccesses.assign(NumValues, nullptr); 4528931add6SHamilton Tobon Mosquera 4538931add6SHamilton Tobon Mosquera // TODO: This assumes the instruction \p Before is in the same 4548931add6SHamilton Tobon Mosquera // BasicBlock as Array. Make it general, for any control flow graph. 4558931add6SHamilton Tobon Mosquera BasicBlock *BB = Array.getParent(); 4568931add6SHamilton Tobon Mosquera if (BB != Before.getParent()) 4578931add6SHamilton Tobon Mosquera return false; 4588931add6SHamilton Tobon Mosquera 4598931add6SHamilton Tobon Mosquera const DataLayout &DL = Array.getModule()->getDataLayout(); 4608931add6SHamilton Tobon Mosquera const unsigned int PointerSize = DL.getPointerSize(); 4618931add6SHamilton Tobon Mosquera 4628931add6SHamilton Tobon Mosquera for (Instruction &I : *BB) { 4638931add6SHamilton Tobon Mosquera if (&I == &Before) 4648931add6SHamilton Tobon Mosquera break; 4658931add6SHamilton Tobon Mosquera 4668931add6SHamilton Tobon Mosquera if (!isa<StoreInst>(&I)) 4678931add6SHamilton Tobon Mosquera continue; 4688931add6SHamilton Tobon Mosquera 4698931add6SHamilton Tobon Mosquera auto *S = cast<StoreInst>(&I); 4708931add6SHamilton Tobon Mosquera int64_t Offset = -1; 471d08d490aSJohannes Doerfert auto *Dst = 472d08d490aSJohannes Doerfert GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); 4738931add6SHamilton Tobon Mosquera if (Dst == &Array) { 4748931add6SHamilton Tobon Mosquera int64_t Idx = Offset / PointerSize; 4758931add6SHamilton Tobon Mosquera StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); 4768931add6SHamilton Tobon Mosquera LastAccesses[Idx] = S; 4778931add6SHamilton Tobon Mosquera } 4788931add6SHamilton Tobon Mosquera } 4798931add6SHamilton Tobon Mosquera 4808931add6SHamilton Tobon Mosquera return isFilled(); 4818931add6SHamilton Tobon Mosquera } 4828931add6SHamilton Tobon Mosquera 4838931add6SHamilton Tobon Mosquera /// Returns true if all values in StoredValues and 4848931add6SHamilton Tobon Mosquera /// LastAccesses are not nullptrs. 4858931add6SHamilton Tobon Mosquera bool isFilled() { 4868931add6SHamilton Tobon Mosquera const unsigned NumValues = StoredValues.size(); 4878931add6SHamilton Tobon Mosquera for (unsigned I = 0; I < NumValues; ++I) { 4888931add6SHamilton Tobon Mosquera if (!StoredValues[I] || !LastAccesses[I]) 4898931add6SHamilton Tobon Mosquera return false; 4908931add6SHamilton Tobon Mosquera } 4918931add6SHamilton Tobon Mosquera 4928931add6SHamilton Tobon Mosquera return true; 4938931add6SHamilton Tobon Mosquera } 4948931add6SHamilton Tobon Mosquera }; 4958931add6SHamilton Tobon Mosquera 4967cfd267cSsstefan1 struct OpenMPOpt { 4977cfd267cSsstefan1 4987cfd267cSsstefan1 using OptimizationRemarkGetter = 4997cfd267cSsstefan1 function_ref<OptimizationRemarkEmitter &(Function *)>; 5007cfd267cSsstefan1 5017cfd267cSsstefan1 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 5027cfd267cSsstefan1 OptimizationRemarkGetter OREGetter, 503b8235d2bSsstefan1 OMPInformationCache &OMPInfoCache, Attributor &A) 50477b79d79SMehdi Amini : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 505b8235d2bSsstefan1 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 5067cfd267cSsstefan1 507a2281419SJoseph Huber /// Check if any remarks are enabled for openmp-opt 508a2281419SJoseph Huber bool remarksEnabled() { 509a2281419SJoseph Huber auto &Ctx = M.getContext(); 510a2281419SJoseph Huber return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); 511a2281419SJoseph Huber } 512a2281419SJoseph Huber 5139548b74aSJohannes Doerfert /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 514b2ad63d3SJoseph Huber bool run(bool IsModulePass) { 51554bd3751SJohannes Doerfert if (SCC.empty()) 51654bd3751SJohannes Doerfert return false; 51754bd3751SJohannes Doerfert 5189548b74aSJohannes Doerfert bool Changed = false; 5199548b74aSJohannes Doerfert 5209548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 52177b79d79SMehdi Amini << " functions in a slice with " 52277b79d79SMehdi Amini << OMPInfoCache.ModuleSlice.size() << " functions\n"); 5239548b74aSJohannes Doerfert 524b2ad63d3SJoseph Huber if (IsModulePass) { 525d3e74913SNico Weber Changed |= runAttributor(); 52618283125SJoseph Huber 5276fc51c9fSJoseph Huber // Recollect uses, in case Attributor deleted any. 5286fc51c9fSJoseph Huber OMPInfoCache.recollectUses(); 5296fc51c9fSJoseph Huber 530b2ad63d3SJoseph Huber if (remarksEnabled()) 531b2ad63d3SJoseph Huber analysisGlobalization(); 532b2ad63d3SJoseph Huber } else { 533e8039ad4SJohannes Doerfert if (PrintICVValues) 534e8039ad4SJohannes Doerfert printICVs(); 535e8039ad4SJohannes Doerfert if (PrintOpenMPKernels) 536e8039ad4SJohannes Doerfert printKernels(); 537e8039ad4SJohannes Doerfert 538d3e74913SNico Weber Changed |= rewriteDeviceCodeStateMachine(); 539d3e74913SNico Weber 540d3e74913SNico Weber Changed |= runAttributor(); 541e8039ad4SJohannes Doerfert 542e8039ad4SJohannes Doerfert // Recollect uses, in case Attributor deleted any. 543e8039ad4SJohannes Doerfert OMPInfoCache.recollectUses(); 544e8039ad4SJohannes Doerfert 545e8039ad4SJohannes Doerfert Changed |= deleteParallelRegions(); 546496f8e5bSHamilton Tobon Mosquera if (HideMemoryTransferLatency) 547496f8e5bSHamilton Tobon Mosquera Changed |= hideMemTransfersLatency(); 5483a6bfcf2SGiorgis Georgakoudis Changed |= deduplicateRuntimeCalls(); 5493a6bfcf2SGiorgis Georgakoudis if (EnableParallelRegionMerging) { 5503a6bfcf2SGiorgis Georgakoudis if (mergeParallelRegions()) { 5513a6bfcf2SGiorgis Georgakoudis deduplicateRuntimeCalls(); 5523a6bfcf2SGiorgis Georgakoudis Changed = true; 5533a6bfcf2SGiorgis Georgakoudis } 5543a6bfcf2SGiorgis Georgakoudis } 555b2ad63d3SJoseph Huber } 556e8039ad4SJohannes Doerfert 557e8039ad4SJohannes Doerfert return Changed; 558e8039ad4SJohannes Doerfert } 559e8039ad4SJohannes Doerfert 5600f426935Ssstefan1 /// Print initial ICV values for testing. 5610f426935Ssstefan1 /// FIXME: This should be done from the Attributor once it is added. 562e8039ad4SJohannes Doerfert void printICVs() const { 563cb9cfa0dSsstefan1 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, 564cb9cfa0dSsstefan1 ICV_proc_bind}; 5650f426935Ssstefan1 5660f426935Ssstefan1 for (Function *F : OMPInfoCache.ModuleSlice) { 5670f426935Ssstefan1 for (auto ICV : ICVs) { 5680f426935Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 5692db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 5702db182ffSJoseph Huber return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 5710f426935Ssstefan1 << " Value: " 5720f426935Ssstefan1 << (ICVInfo.InitValue 57361cdaf66SSimon Pilgrim ? toString(ICVInfo.InitValue->getValue(), 10, true) 5740f426935Ssstefan1 : "IMPLEMENTATION_DEFINED"); 5750f426935Ssstefan1 }; 5760f426935Ssstefan1 5772db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); 5780f426935Ssstefan1 } 5790f426935Ssstefan1 } 5800f426935Ssstefan1 } 5810f426935Ssstefan1 582e8039ad4SJohannes Doerfert /// Print OpenMP GPU kernels for testing. 583e8039ad4SJohannes Doerfert void printKernels() const { 584e8039ad4SJohannes Doerfert for (Function *F : SCC) { 585e8039ad4SJohannes Doerfert if (!OMPInfoCache.Kernels.count(F)) 586e8039ad4SJohannes Doerfert continue; 587b8235d2bSsstefan1 5882db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 5892db182ffSJoseph Huber return ORA << "OpenMP GPU kernel " 590e8039ad4SJohannes Doerfert << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 591e8039ad4SJohannes Doerfert }; 592b8235d2bSsstefan1 5932db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); 594e8039ad4SJohannes Doerfert } 5959548b74aSJohannes Doerfert } 5969548b74aSJohannes Doerfert 5977cfd267cSsstefan1 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 5987cfd267cSsstefan1 /// given it has to be the callee or a nullptr is returned. 5997cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 6007cfd267cSsstefan1 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 6017cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 6027cfd267cSsstefan1 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 6037cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 6047cfd267cSsstefan1 return CI; 6057cfd267cSsstefan1 return nullptr; 6067cfd267cSsstefan1 } 6077cfd267cSsstefan1 6087cfd267cSsstefan1 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 6097cfd267cSsstefan1 /// the callee or a nullptr is returned. 6107cfd267cSsstefan1 static CallInst *getCallIfRegularCall( 6117cfd267cSsstefan1 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 6127cfd267cSsstefan1 CallInst *CI = dyn_cast<CallInst>(&V); 6137cfd267cSsstefan1 if (CI && !CI->hasOperandBundles() && 6147cfd267cSsstefan1 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 6157cfd267cSsstefan1 return CI; 6167cfd267cSsstefan1 return nullptr; 6177cfd267cSsstefan1 } 6187cfd267cSsstefan1 6199548b74aSJohannes Doerfert private: 6203a6bfcf2SGiorgis Georgakoudis /// Merge parallel regions when it is safe. 6213a6bfcf2SGiorgis Georgakoudis bool mergeParallelRegions() { 6223a6bfcf2SGiorgis Georgakoudis const unsigned CallbackCalleeOperand = 2; 6233a6bfcf2SGiorgis Georgakoudis const unsigned CallbackFirstArgOperand = 3; 6243a6bfcf2SGiorgis Georgakoudis using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6253a6bfcf2SGiorgis Georgakoudis 6263a6bfcf2SGiorgis Georgakoudis // Check if there are any __kmpc_fork_call calls to merge. 6273a6bfcf2SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &RFI = 6283a6bfcf2SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 6293a6bfcf2SGiorgis Georgakoudis 6303a6bfcf2SGiorgis Georgakoudis if (!RFI.Declaration) 6313a6bfcf2SGiorgis Georgakoudis return false; 6323a6bfcf2SGiorgis Georgakoudis 63397517055SGiorgis Georgakoudis // Unmergable calls that prevent merging a parallel region. 63497517055SGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { 63597517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], 63697517055SGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], 63797517055SGiorgis Georgakoudis }; 6383a6bfcf2SGiorgis Georgakoudis 6393a6bfcf2SGiorgis Georgakoudis bool Changed = false; 6403a6bfcf2SGiorgis Georgakoudis LoopInfo *LI = nullptr; 6413a6bfcf2SGiorgis Georgakoudis DominatorTree *DT = nullptr; 6423a6bfcf2SGiorgis Georgakoudis 6433a6bfcf2SGiorgis Georgakoudis SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; 6443a6bfcf2SGiorgis Georgakoudis 6453a6bfcf2SGiorgis Georgakoudis BasicBlock *StartBB = nullptr, *EndBB = nullptr; 6463a6bfcf2SGiorgis Georgakoudis auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 6473a6bfcf2SGiorgis Georgakoudis BasicBlock &ContinuationIP) { 6483a6bfcf2SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock(); 6493a6bfcf2SGiorgis Georgakoudis BasicBlock *CGEndBB = 6503a6bfcf2SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 6513a6bfcf2SGiorgis Georgakoudis assert(StartBB != nullptr && "StartBB should not be null"); 6523a6bfcf2SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, StartBB); 6533a6bfcf2SGiorgis Georgakoudis assert(EndBB != nullptr && "EndBB should not be null"); 6543a6bfcf2SGiorgis Georgakoudis EndBB->getTerminator()->setSuccessor(0, CGEndBB); 6553a6bfcf2SGiorgis Georgakoudis }; 6563a6bfcf2SGiorgis Georgakoudis 657240dd924SAlex Zinenko auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 658240dd924SAlex Zinenko Value &Inner, Value *&ReplacementValue) -> InsertPointTy { 659240dd924SAlex Zinenko ReplacementValue = &Inner; 6603a6bfcf2SGiorgis Georgakoudis return CodeGenIP; 6613a6bfcf2SGiorgis Georgakoudis }; 6623a6bfcf2SGiorgis Georgakoudis 6633a6bfcf2SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 6643a6bfcf2SGiorgis Georgakoudis 66597517055SGiorgis Georgakoudis /// Create a sequential execution region within a merged parallel region, 66697517055SGiorgis Georgakoudis /// encapsulated in a master construct with a barrier for synchronization. 66797517055SGiorgis Georgakoudis auto CreateSequentialRegion = [&](Function *OuterFn, 66897517055SGiorgis Georgakoudis BasicBlock *OuterPredBB, 66997517055SGiorgis Georgakoudis Instruction *SeqStartI, 67097517055SGiorgis Georgakoudis Instruction *SeqEndI) { 67197517055SGiorgis Georgakoudis // Isolate the instructions of the sequential region to a separate 67297517055SGiorgis Georgakoudis // block. 67397517055SGiorgis Georgakoudis BasicBlock *ParentBB = SeqStartI->getParent(); 67497517055SGiorgis Georgakoudis BasicBlock *SeqEndBB = 67597517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); 67697517055SGiorgis Georgakoudis BasicBlock *SeqAfterBB = 67797517055SGiorgis Georgakoudis SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); 67897517055SGiorgis Georgakoudis BasicBlock *SeqStartBB = 67997517055SGiorgis Georgakoudis SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); 68097517055SGiorgis Georgakoudis 68197517055SGiorgis Georgakoudis assert(ParentBB->getUniqueSuccessor() == SeqStartBB && 68297517055SGiorgis Georgakoudis "Expected a different CFG"); 68397517055SGiorgis Georgakoudis const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 68497517055SGiorgis Georgakoudis ParentBB->getTerminator()->eraseFromParent(); 68597517055SGiorgis Georgakoudis 68697517055SGiorgis Georgakoudis auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 68797517055SGiorgis Georgakoudis BasicBlock &ContinuationIP) { 68897517055SGiorgis Georgakoudis BasicBlock *CGStartBB = CodeGenIP.getBlock(); 68997517055SGiorgis Georgakoudis BasicBlock *CGEndBB = 69097517055SGiorgis Georgakoudis SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 69197517055SGiorgis Georgakoudis assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); 69297517055SGiorgis Georgakoudis CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); 69397517055SGiorgis Georgakoudis assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); 69497517055SGiorgis Georgakoudis SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); 69597517055SGiorgis Georgakoudis }; 69697517055SGiorgis Georgakoudis auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 69797517055SGiorgis Georgakoudis 69897517055SGiorgis Georgakoudis // Find outputs from the sequential region to outside users and 69997517055SGiorgis Georgakoudis // broadcast their values to them. 70097517055SGiorgis Georgakoudis for (Instruction &I : *SeqStartBB) { 70197517055SGiorgis Georgakoudis SmallPtrSet<Instruction *, 4> OutsideUsers; 70297517055SGiorgis Georgakoudis for (User *Usr : I.users()) { 70397517055SGiorgis Georgakoudis Instruction &UsrI = *cast<Instruction>(Usr); 70497517055SGiorgis Georgakoudis // Ignore outputs to LT intrinsics, code extraction for the merged 70597517055SGiorgis Georgakoudis // parallel region will fix them. 70697517055SGiorgis Georgakoudis if (UsrI.isLifetimeStartOrEnd()) 70797517055SGiorgis Georgakoudis continue; 70897517055SGiorgis Georgakoudis 70997517055SGiorgis Georgakoudis if (UsrI.getParent() != SeqStartBB) 71097517055SGiorgis Georgakoudis OutsideUsers.insert(&UsrI); 71197517055SGiorgis Georgakoudis } 71297517055SGiorgis Georgakoudis 71397517055SGiorgis Georgakoudis if (OutsideUsers.empty()) 71497517055SGiorgis Georgakoudis continue; 71597517055SGiorgis Georgakoudis 71697517055SGiorgis Georgakoudis // Emit an alloca in the outer region to store the broadcasted 71797517055SGiorgis Georgakoudis // value. 71897517055SGiorgis Georgakoudis const DataLayout &DL = M.getDataLayout(); 71997517055SGiorgis Georgakoudis AllocaInst *AllocaI = new AllocaInst( 72097517055SGiorgis Georgakoudis I.getType(), DL.getAllocaAddrSpace(), nullptr, 72197517055SGiorgis Georgakoudis I.getName() + ".seq.output.alloc", &OuterFn->front().front()); 72297517055SGiorgis Georgakoudis 72397517055SGiorgis Georgakoudis // Emit a store instruction in the sequential BB to update the 72497517055SGiorgis Georgakoudis // value. 72597517055SGiorgis Georgakoudis new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); 72697517055SGiorgis Georgakoudis 72797517055SGiorgis Georgakoudis // Emit a load instruction and replace the use of the output value 72897517055SGiorgis Georgakoudis // with it. 72997517055SGiorgis Georgakoudis for (Instruction *UsrI : OutsideUsers) { 7305b70c12fSJohannes Doerfert LoadInst *LoadI = new LoadInst( 7315b70c12fSJohannes Doerfert I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); 73297517055SGiorgis Georgakoudis UsrI->replaceUsesOfWith(&I, LoadI); 73397517055SGiorgis Georgakoudis } 73497517055SGiorgis Georgakoudis } 73597517055SGiorgis Georgakoudis 73697517055SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc( 73797517055SGiorgis Georgakoudis InsertPointTy(ParentBB, ParentBB->end()), DL); 73897517055SGiorgis Georgakoudis InsertPointTy SeqAfterIP = 73997517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); 74097517055SGiorgis Georgakoudis 74197517055SGiorgis Georgakoudis OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); 74297517055SGiorgis Georgakoudis 74397517055SGiorgis Georgakoudis BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); 74497517055SGiorgis Georgakoudis 74597517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn 74697517055SGiorgis Georgakoudis << "\n"); 74797517055SGiorgis Georgakoudis }; 74897517055SGiorgis Georgakoudis 7493a6bfcf2SGiorgis Georgakoudis // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all 7503a6bfcf2SGiorgis Georgakoudis // contained in BB and only separated by instructions that can be 7513a6bfcf2SGiorgis Georgakoudis // redundantly executed in parallel. The block BB is split before the first 7523a6bfcf2SGiorgis Georgakoudis // call (in MergableCIs) and after the last so the entire region we merge 7533a6bfcf2SGiorgis Georgakoudis // into a single parallel region is contained in a single basic block 7543a6bfcf2SGiorgis Georgakoudis // without any other instructions. We use the OpenMPIRBuilder to outline 7553a6bfcf2SGiorgis Georgakoudis // that block and call the resulting function via __kmpc_fork_call. 7563a6bfcf2SGiorgis Georgakoudis auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) { 7573a6bfcf2SGiorgis Georgakoudis // TODO: Change the interface to allow single CIs expanded, e.g, to 7583a6bfcf2SGiorgis Georgakoudis // include an outer loop. 7593a6bfcf2SGiorgis Georgakoudis assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); 7603a6bfcf2SGiorgis Georgakoudis 7613a6bfcf2SGiorgis Georgakoudis auto Remark = [&](OptimizationRemark OR) { 7623a6bfcf2SGiorgis Georgakoudis OR << "Parallel region at " 7633a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMergeFront", 7643a6bfcf2SGiorgis Georgakoudis MergableCIs.front()->getDebugLoc()) 7653a6bfcf2SGiorgis Georgakoudis << " merged with parallel regions at "; 76623b0ab2aSKazu Hirata for (auto *CI : llvm::drop_begin(MergableCIs)) { 7673a6bfcf2SGiorgis Georgakoudis OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); 7683a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back()) 7693a6bfcf2SGiorgis Georgakoudis OR << ", "; 7703a6bfcf2SGiorgis Georgakoudis } 7713a6bfcf2SGiorgis Georgakoudis return OR; 7723a6bfcf2SGiorgis Georgakoudis }; 7733a6bfcf2SGiorgis Georgakoudis 7743a6bfcf2SGiorgis Georgakoudis emitRemark<OptimizationRemark>(MergableCIs.front(), 7753a6bfcf2SGiorgis Georgakoudis "OpenMPParallelRegionMerging", Remark); 7763a6bfcf2SGiorgis Georgakoudis 7773a6bfcf2SGiorgis Georgakoudis Function *OriginalFn = BB->getParent(); 7783a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size() 7793a6bfcf2SGiorgis Georgakoudis << " parallel regions in " << OriginalFn->getName() 7803a6bfcf2SGiorgis Georgakoudis << "\n"); 7813a6bfcf2SGiorgis Georgakoudis 7823a6bfcf2SGiorgis Georgakoudis // Isolate the calls to merge in a separate block. 7833a6bfcf2SGiorgis Georgakoudis EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); 7843a6bfcf2SGiorgis Georgakoudis BasicBlock *AfterBB = 7853a6bfcf2SGiorgis Georgakoudis SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); 7863a6bfcf2SGiorgis Georgakoudis StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, 7873a6bfcf2SGiorgis Georgakoudis "omp.par.merged"); 7883a6bfcf2SGiorgis Georgakoudis 7893a6bfcf2SGiorgis Georgakoudis assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG"); 7903a6bfcf2SGiorgis Georgakoudis const DebugLoc DL = BB->getTerminator()->getDebugLoc(); 7913a6bfcf2SGiorgis Georgakoudis BB->getTerminator()->eraseFromParent(); 7923a6bfcf2SGiorgis Georgakoudis 79397517055SGiorgis Georgakoudis // Create sequential regions for sequential instructions that are 79497517055SGiorgis Georgakoudis // in-between mergable parallel regions. 79597517055SGiorgis Georgakoudis for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; 79697517055SGiorgis Georgakoudis It != End; ++It) { 79797517055SGiorgis Georgakoudis Instruction *ForkCI = *It; 79897517055SGiorgis Georgakoudis Instruction *NextForkCI = *(It + 1); 79997517055SGiorgis Georgakoudis 80097517055SGiorgis Georgakoudis // Continue if there are not in-between instructions. 80197517055SGiorgis Georgakoudis if (ForkCI->getNextNode() == NextForkCI) 80297517055SGiorgis Georgakoudis continue; 80397517055SGiorgis Georgakoudis 80497517055SGiorgis Georgakoudis CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), 80597517055SGiorgis Georgakoudis NextForkCI->getPrevNode()); 80697517055SGiorgis Georgakoudis } 80797517055SGiorgis Georgakoudis 8083a6bfcf2SGiorgis Georgakoudis OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), 8093a6bfcf2SGiorgis Georgakoudis DL); 8103a6bfcf2SGiorgis Georgakoudis IRBuilder<>::InsertPoint AllocaIP( 8113a6bfcf2SGiorgis Georgakoudis &OriginalFn->getEntryBlock(), 8123a6bfcf2SGiorgis Georgakoudis OriginalFn->getEntryBlock().getFirstInsertionPt()); 8133a6bfcf2SGiorgis Georgakoudis // Create the merged parallel region with default proc binding, to 8143a6bfcf2SGiorgis Georgakoudis // avoid overriding binding settings, and without explicit cancellation. 815e5dba2d7SMichael Kruse InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( 8163a6bfcf2SGiorgis Georgakoudis Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 8173a6bfcf2SGiorgis Georgakoudis OMP_PROC_BIND_default, /* IsCancellable */ false); 8183a6bfcf2SGiorgis Georgakoudis BranchInst::Create(AfterBB, AfterIP.getBlock()); 8193a6bfcf2SGiorgis Georgakoudis 8203a6bfcf2SGiorgis Georgakoudis // Perform the actual outlining. 821b1191206SMichael Kruse OMPInfoCache.OMPBuilder.finalize(OriginalFn, 822b1191206SMichael Kruse /* AllowExtractorSinking */ true); 8233a6bfcf2SGiorgis Georgakoudis 8243a6bfcf2SGiorgis Georgakoudis Function *OutlinedFn = MergableCIs.front()->getCaller(); 8253a6bfcf2SGiorgis Georgakoudis 8263a6bfcf2SGiorgis Georgakoudis // Replace the __kmpc_fork_call calls with direct calls to the outlined 8273a6bfcf2SGiorgis Georgakoudis // callbacks. 8283a6bfcf2SGiorgis Georgakoudis SmallVector<Value *, 8> Args; 8293a6bfcf2SGiorgis Georgakoudis for (auto *CI : MergableCIs) { 8303a6bfcf2SGiorgis Georgakoudis Value *Callee = 8313a6bfcf2SGiorgis Georgakoudis CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); 8323a6bfcf2SGiorgis Georgakoudis FunctionType *FT = 8333a6bfcf2SGiorgis Georgakoudis cast<FunctionType>(Callee->getType()->getPointerElementType()); 8343a6bfcf2SGiorgis Georgakoudis Args.clear(); 8353a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(0)); 8363a6bfcf2SGiorgis Georgakoudis Args.push_back(OutlinedFn->getArg(1)); 8373a6bfcf2SGiorgis Georgakoudis for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); 8383a6bfcf2SGiorgis Georgakoudis U < E; ++U) 8393a6bfcf2SGiorgis Georgakoudis Args.push_back(CI->getArgOperand(U)); 8403a6bfcf2SGiorgis Georgakoudis 8413a6bfcf2SGiorgis Georgakoudis CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); 8423a6bfcf2SGiorgis Georgakoudis if (CI->getDebugLoc()) 8433a6bfcf2SGiorgis Georgakoudis NewCI->setDebugLoc(CI->getDebugLoc()); 8443a6bfcf2SGiorgis Georgakoudis 8453a6bfcf2SGiorgis Georgakoudis // Forward parameter attributes from the callback to the callee. 8463a6bfcf2SGiorgis Georgakoudis for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); 8473a6bfcf2SGiorgis Georgakoudis U < E; ++U) 8483a6bfcf2SGiorgis Georgakoudis for (const Attribute &A : CI->getAttributes().getParamAttributes(U)) 8493a6bfcf2SGiorgis Georgakoudis NewCI->addParamAttr( 8503a6bfcf2SGiorgis Georgakoudis U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); 8513a6bfcf2SGiorgis Georgakoudis 8523a6bfcf2SGiorgis Georgakoudis // Emit an explicit barrier to replace the implicit fork-join barrier. 8533a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.back()) { 8543a6bfcf2SGiorgis Georgakoudis // TODO: Remove barrier if the merged parallel region includes the 8553a6bfcf2SGiorgis Georgakoudis // 'nowait' clause. 856e5dba2d7SMichael Kruse OMPInfoCache.OMPBuilder.createBarrier( 8573a6bfcf2SGiorgis Georgakoudis InsertPointTy(NewCI->getParent(), 8583a6bfcf2SGiorgis Georgakoudis NewCI->getNextNode()->getIterator()), 8593a6bfcf2SGiorgis Georgakoudis OMPD_parallel); 8603a6bfcf2SGiorgis Georgakoudis } 8613a6bfcf2SGiorgis Georgakoudis 8623a6bfcf2SGiorgis Georgakoudis auto Remark = [&](OptimizationRemark OR) { 8633a6bfcf2SGiorgis Georgakoudis return OR << "Parallel region at " 8643a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()) 8653a6bfcf2SGiorgis Georgakoudis << " merged with " 8663a6bfcf2SGiorgis Georgakoudis << ore::NV("OpenMPParallelMergeFront", 8673a6bfcf2SGiorgis Georgakoudis MergableCIs.front()->getDebugLoc()); 8683a6bfcf2SGiorgis Georgakoudis }; 8693a6bfcf2SGiorgis Georgakoudis if (CI != MergableCIs.front()) 8703a6bfcf2SGiorgis Georgakoudis emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionMerging", 8713a6bfcf2SGiorgis Georgakoudis Remark); 8723a6bfcf2SGiorgis Georgakoudis 8733a6bfcf2SGiorgis Georgakoudis CI->eraseFromParent(); 8743a6bfcf2SGiorgis Georgakoudis } 8753a6bfcf2SGiorgis Georgakoudis 8763a6bfcf2SGiorgis Georgakoudis assert(OutlinedFn != OriginalFn && "Outlining failed"); 8777fea561eSArthur Eubanks CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); 8783a6bfcf2SGiorgis Georgakoudis CGUpdater.reanalyzeFunction(*OriginalFn); 8793a6bfcf2SGiorgis Georgakoudis 8803a6bfcf2SGiorgis Georgakoudis NumOpenMPParallelRegionsMerged += MergableCIs.size(); 8813a6bfcf2SGiorgis Georgakoudis 8823a6bfcf2SGiorgis Georgakoudis return true; 8833a6bfcf2SGiorgis Georgakoudis }; 8843a6bfcf2SGiorgis Georgakoudis 8853a6bfcf2SGiorgis Georgakoudis // Helper function that identifes sequences of 8863a6bfcf2SGiorgis Georgakoudis // __kmpc_fork_call uses in a basic block. 8873a6bfcf2SGiorgis Georgakoudis auto DetectPRsCB = [&](Use &U, Function &F) { 8883a6bfcf2SGiorgis Georgakoudis CallInst *CI = getCallIfRegularCall(U, &RFI); 8893a6bfcf2SGiorgis Georgakoudis BB2PRMap[CI->getParent()].insert(CI); 8903a6bfcf2SGiorgis Georgakoudis 8913a6bfcf2SGiorgis Georgakoudis return false; 8923a6bfcf2SGiorgis Georgakoudis }; 8933a6bfcf2SGiorgis Georgakoudis 8943a6bfcf2SGiorgis Georgakoudis BB2PRMap.clear(); 8953a6bfcf2SGiorgis Georgakoudis RFI.foreachUse(SCC, DetectPRsCB); 8963a6bfcf2SGiorgis Georgakoudis SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; 8973a6bfcf2SGiorgis Georgakoudis // Find mergable parallel regions within a basic block that are 8983a6bfcf2SGiorgis Georgakoudis // safe to merge, that is any in-between instructions can safely 8993a6bfcf2SGiorgis Georgakoudis // execute in parallel after merging. 9003a6bfcf2SGiorgis Georgakoudis // TODO: support merging across basic-blocks. 9013a6bfcf2SGiorgis Georgakoudis for (auto &It : BB2PRMap) { 9023a6bfcf2SGiorgis Georgakoudis auto &CIs = It.getSecond(); 9033a6bfcf2SGiorgis Georgakoudis if (CIs.size() < 2) 9043a6bfcf2SGiorgis Georgakoudis continue; 9053a6bfcf2SGiorgis Georgakoudis 9063a6bfcf2SGiorgis Georgakoudis BasicBlock *BB = It.getFirst(); 9073a6bfcf2SGiorgis Georgakoudis SmallVector<CallInst *, 4> MergableCIs; 9083a6bfcf2SGiorgis Georgakoudis 90997517055SGiorgis Georgakoudis /// Returns true if the instruction is mergable, false otherwise. 91097517055SGiorgis Georgakoudis /// A terminator instruction is unmergable by definition since merging 91197517055SGiorgis Georgakoudis /// works within a BB. Instructions before the mergable region are 91297517055SGiorgis Georgakoudis /// mergable if they are not calls to OpenMP runtime functions that may 91397517055SGiorgis Georgakoudis /// set different execution parameters for subsequent parallel regions. 91497517055SGiorgis Georgakoudis /// Instructions in-between parallel regions are mergable if they are not 91597517055SGiorgis Georgakoudis /// calls to any non-intrinsic function since that may call a non-mergable 91697517055SGiorgis Georgakoudis /// OpenMP runtime function. 91797517055SGiorgis Georgakoudis auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { 91897517055SGiorgis Georgakoudis // We do not merge across BBs, hence return false (unmergable) if the 91997517055SGiorgis Georgakoudis // instruction is a terminator. 92097517055SGiorgis Georgakoudis if (I.isTerminator()) 92197517055SGiorgis Georgakoudis return false; 92297517055SGiorgis Georgakoudis 92397517055SGiorgis Georgakoudis if (!isa<CallInst>(&I)) 92497517055SGiorgis Georgakoudis return true; 92597517055SGiorgis Georgakoudis 92697517055SGiorgis Georgakoudis CallInst *CI = cast<CallInst>(&I); 92797517055SGiorgis Georgakoudis if (IsBeforeMergableRegion) { 92897517055SGiorgis Georgakoudis Function *CalledFunction = CI->getCalledFunction(); 92997517055SGiorgis Georgakoudis if (!CalledFunction) 93097517055SGiorgis Georgakoudis return false; 93197517055SGiorgis Georgakoudis // Return false (unmergable) if the call before the parallel 93297517055SGiorgis Georgakoudis // region calls an explicit affinity (proc_bind) or number of 93397517055SGiorgis Georgakoudis // threads (num_threads) compiler-generated function. Those settings 93497517055SGiorgis Georgakoudis // may be incompatible with following parallel regions. 93597517055SGiorgis Georgakoudis // TODO: ICV tracking to detect compatibility. 93697517055SGiorgis Georgakoudis for (const auto &RFI : UnmergableCallsInfo) { 93797517055SGiorgis Georgakoudis if (CalledFunction == RFI.Declaration) 93897517055SGiorgis Georgakoudis return false; 93997517055SGiorgis Georgakoudis } 94097517055SGiorgis Georgakoudis } else { 94197517055SGiorgis Georgakoudis // Return false (unmergable) if there is a call instruction 94297517055SGiorgis Georgakoudis // in-between parallel regions when it is not an intrinsic. It 94397517055SGiorgis Georgakoudis // may call an unmergable OpenMP runtime function in its callpath. 94497517055SGiorgis Georgakoudis // TODO: Keep track of possible OpenMP calls in the callpath. 94597517055SGiorgis Georgakoudis if (!isa<IntrinsicInst>(CI)) 94697517055SGiorgis Georgakoudis return false; 94797517055SGiorgis Georgakoudis } 94897517055SGiorgis Georgakoudis 94997517055SGiorgis Georgakoudis return true; 95097517055SGiorgis Georgakoudis }; 9513a6bfcf2SGiorgis Georgakoudis // Find maximal number of parallel region CIs that are safe to merge. 95297517055SGiorgis Georgakoudis for (auto It = BB->begin(), End = BB->end(); It != End;) { 95397517055SGiorgis Georgakoudis Instruction &I = *It; 95497517055SGiorgis Georgakoudis ++It; 95597517055SGiorgis Georgakoudis 9563a6bfcf2SGiorgis Georgakoudis if (CIs.count(&I)) { 9573a6bfcf2SGiorgis Georgakoudis MergableCIs.push_back(cast<CallInst>(&I)); 9583a6bfcf2SGiorgis Georgakoudis continue; 9593a6bfcf2SGiorgis Georgakoudis } 9603a6bfcf2SGiorgis Georgakoudis 96197517055SGiorgis Georgakoudis // Continue expanding if the instruction is mergable. 96297517055SGiorgis Georgakoudis if (IsMergable(I, MergableCIs.empty())) 9633a6bfcf2SGiorgis Georgakoudis continue; 9643a6bfcf2SGiorgis Georgakoudis 96597517055SGiorgis Georgakoudis // Forward the instruction iterator to skip the next parallel region 96697517055SGiorgis Georgakoudis // since there is an unmergable instruction which can affect it. 96797517055SGiorgis Georgakoudis for (; It != End; ++It) { 96897517055SGiorgis Georgakoudis Instruction &SkipI = *It; 96997517055SGiorgis Georgakoudis if (CIs.count(&SkipI)) { 97097517055SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI 97197517055SGiorgis Georgakoudis << " due to " << I << "\n"); 97297517055SGiorgis Georgakoudis ++It; 97397517055SGiorgis Georgakoudis break; 97497517055SGiorgis Georgakoudis } 97597517055SGiorgis Georgakoudis } 97697517055SGiorgis Georgakoudis 97797517055SGiorgis Georgakoudis // Store mergable regions found. 9783a6bfcf2SGiorgis Georgakoudis if (MergableCIs.size() > 1) { 9793a6bfcf2SGiorgis Georgakoudis MergableCIsVector.push_back(MergableCIs); 9803a6bfcf2SGiorgis Georgakoudis LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() 9813a6bfcf2SGiorgis Georgakoudis << " parallel regions in block " << BB->getName() 9823a6bfcf2SGiorgis Georgakoudis << " of function " << BB->getParent()->getName() 9833a6bfcf2SGiorgis Georgakoudis << "\n";); 9843a6bfcf2SGiorgis Georgakoudis } 9853a6bfcf2SGiorgis Georgakoudis 9863a6bfcf2SGiorgis Georgakoudis MergableCIs.clear(); 9873a6bfcf2SGiorgis Georgakoudis } 9883a6bfcf2SGiorgis Georgakoudis 9893a6bfcf2SGiorgis Georgakoudis if (!MergableCIsVector.empty()) { 9903a6bfcf2SGiorgis Georgakoudis Changed = true; 9913a6bfcf2SGiorgis Georgakoudis 9923a6bfcf2SGiorgis Georgakoudis for (auto &MergableCIs : MergableCIsVector) 9933a6bfcf2SGiorgis Georgakoudis Merge(MergableCIs, BB); 994b2ad63d3SJoseph Huber MergableCIsVector.clear(); 9953a6bfcf2SGiorgis Georgakoudis } 9963a6bfcf2SGiorgis Georgakoudis } 9973a6bfcf2SGiorgis Georgakoudis 9983a6bfcf2SGiorgis Georgakoudis if (Changed) { 99997517055SGiorgis Georgakoudis /// Re-collect use for fork calls, emitted barrier calls, and 100097517055SGiorgis Georgakoudis /// any emitted master/end_master calls. 100197517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); 100297517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); 100397517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); 100497517055SGiorgis Georgakoudis OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); 10053a6bfcf2SGiorgis Georgakoudis } 10063a6bfcf2SGiorgis Georgakoudis 10073a6bfcf2SGiorgis Georgakoudis return Changed; 10083a6bfcf2SGiorgis Georgakoudis } 10093a6bfcf2SGiorgis Georgakoudis 10109d38f98dSJohannes Doerfert /// Try to delete parallel regions if possible. 1011e565db49SJohannes Doerfert bool deleteParallelRegions() { 1012e565db49SJohannes Doerfert const unsigned CallbackCalleeOperand = 2; 1013e565db49SJohannes Doerfert 10147cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI = 10157cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 10167cfd267cSsstefan1 1017e565db49SJohannes Doerfert if (!RFI.Declaration) 1018e565db49SJohannes Doerfert return false; 1019e565db49SJohannes Doerfert 1020e565db49SJohannes Doerfert bool Changed = false; 1021e565db49SJohannes Doerfert auto DeleteCallCB = [&](Use &U, Function &) { 1022e565db49SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U); 1023e565db49SJohannes Doerfert if (!CI) 1024e565db49SJohannes Doerfert return false; 1025e565db49SJohannes Doerfert auto *Fn = dyn_cast<Function>( 1026e565db49SJohannes Doerfert CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 1027e565db49SJohannes Doerfert if (!Fn) 1028e565db49SJohannes Doerfert return false; 1029e565db49SJohannes Doerfert if (!Fn->onlyReadsMemory()) 1030e565db49SJohannes Doerfert return false; 1031e565db49SJohannes Doerfert if (!Fn->hasFnAttribute(Attribute::WillReturn)) 1032e565db49SJohannes Doerfert return false; 1033e565db49SJohannes Doerfert 1034e565db49SJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 1035e565db49SJohannes Doerfert << CI->getCaller()->getName() << "\n"); 10364d4ea9acSHuber, Joseph 10374d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 10384d4ea9acSHuber, Joseph return OR << "Parallel region in " 10394d4ea9acSHuber, Joseph << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 10404d4ea9acSHuber, Joseph << " deleted"; 10414d4ea9acSHuber, Joseph }; 10424d4ea9acSHuber, Joseph emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 10434d4ea9acSHuber, Joseph Remark); 10444d4ea9acSHuber, Joseph 1045e565db49SJohannes Doerfert CGUpdater.removeCallSite(*CI); 1046e565db49SJohannes Doerfert CI->eraseFromParent(); 1047e565db49SJohannes Doerfert Changed = true; 104855eb714aSRoman Lebedev ++NumOpenMPParallelRegionsDeleted; 1049e565db49SJohannes Doerfert return true; 1050e565db49SJohannes Doerfert }; 1051e565db49SJohannes Doerfert 1052624d34afSJohannes Doerfert RFI.foreachUse(SCC, DeleteCallCB); 1053e565db49SJohannes Doerfert 1054e565db49SJohannes Doerfert return Changed; 1055e565db49SJohannes Doerfert } 1056e565db49SJohannes Doerfert 1057b726c557SJohannes Doerfert /// Try to eliminate runtime calls by reusing existing ones. 10589548b74aSJohannes Doerfert bool deduplicateRuntimeCalls() { 10599548b74aSJohannes Doerfert bool Changed = false; 10609548b74aSJohannes Doerfert 1061e28936f6SJohannes Doerfert RuntimeFunction DeduplicableRuntimeCallIDs[] = { 1062e28936f6SJohannes Doerfert OMPRTL_omp_get_num_threads, 1063e28936f6SJohannes Doerfert OMPRTL_omp_in_parallel, 1064e28936f6SJohannes Doerfert OMPRTL_omp_get_cancellation, 1065e28936f6SJohannes Doerfert OMPRTL_omp_get_thread_limit, 1066e28936f6SJohannes Doerfert OMPRTL_omp_get_supported_active_levels, 1067e28936f6SJohannes Doerfert OMPRTL_omp_get_level, 1068e28936f6SJohannes Doerfert OMPRTL_omp_get_ancestor_thread_num, 1069e28936f6SJohannes Doerfert OMPRTL_omp_get_team_size, 1070e28936f6SJohannes Doerfert OMPRTL_omp_get_active_level, 1071e28936f6SJohannes Doerfert OMPRTL_omp_in_final, 1072e28936f6SJohannes Doerfert OMPRTL_omp_get_proc_bind, 1073e28936f6SJohannes Doerfert OMPRTL_omp_get_num_places, 1074e28936f6SJohannes Doerfert OMPRTL_omp_get_num_procs, 1075e28936f6SJohannes Doerfert OMPRTL_omp_get_place_num, 1076e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_num_places, 1077e28936f6SJohannes Doerfert OMPRTL_omp_get_partition_place_nums}; 1078e28936f6SJohannes Doerfert 1079bc93c2d7SMarek Kurdej // Global-tid is handled separately. 10809548b74aSJohannes Doerfert SmallSetVector<Value *, 16> GTIdArgs; 10819548b74aSJohannes Doerfert collectGlobalThreadIdArguments(GTIdArgs); 10829548b74aSJohannes Doerfert LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 10839548b74aSJohannes Doerfert << " global thread ID arguments\n"); 10849548b74aSJohannes Doerfert 10859548b74aSJohannes Doerfert for (Function *F : SCC) { 1086e28936f6SJohannes Doerfert for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 10874e29d256Sserge-sans-paille Changed |= deduplicateRuntimeCalls( 10884e29d256Sserge-sans-paille *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 1089e28936f6SJohannes Doerfert 1090e28936f6SJohannes Doerfert // __kmpc_global_thread_num is special as we can replace it with an 1091e28936f6SJohannes Doerfert // argument in enough cases to make it worth trying. 10929548b74aSJohannes Doerfert Value *GTIdArg = nullptr; 10939548b74aSJohannes Doerfert for (Argument &Arg : F->args()) 10949548b74aSJohannes Doerfert if (GTIdArgs.count(&Arg)) { 10959548b74aSJohannes Doerfert GTIdArg = &Arg; 10969548b74aSJohannes Doerfert break; 10979548b74aSJohannes Doerfert } 10989548b74aSJohannes Doerfert Changed |= deduplicateRuntimeCalls( 10997cfd267cSsstefan1 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 11009548b74aSJohannes Doerfert } 11019548b74aSJohannes Doerfert 11029548b74aSJohannes Doerfert return Changed; 11039548b74aSJohannes Doerfert } 11049548b74aSJohannes Doerfert 1105496f8e5bSHamilton Tobon Mosquera /// Tries to hide the latency of runtime calls that involve host to 1106496f8e5bSHamilton Tobon Mosquera /// device memory transfers by splitting them into their "issue" and "wait" 1107496f8e5bSHamilton Tobon Mosquera /// versions. The "issue" is moved upwards as much as possible. The "wait" is 1108496f8e5bSHamilton Tobon Mosquera /// moved downards as much as possible. The "issue" issues the memory transfer 1109496f8e5bSHamilton Tobon Mosquera /// asynchronously, returning a handle. The "wait" waits in the returned 1110496f8e5bSHamilton Tobon Mosquera /// handle for the memory transfer to finish. 1111496f8e5bSHamilton Tobon Mosquera bool hideMemTransfersLatency() { 1112496f8e5bSHamilton Tobon Mosquera auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 1113496f8e5bSHamilton Tobon Mosquera bool Changed = false; 1114496f8e5bSHamilton Tobon Mosquera auto SplitMemTransfers = [&](Use &U, Function &Decl) { 1115496f8e5bSHamilton Tobon Mosquera auto *RTCall = getCallIfRegularCall(U, &RFI); 1116496f8e5bSHamilton Tobon Mosquera if (!RTCall) 1117496f8e5bSHamilton Tobon Mosquera return false; 1118496f8e5bSHamilton Tobon Mosquera 11198931add6SHamilton Tobon Mosquera OffloadArray OffloadArrays[3]; 11208931add6SHamilton Tobon Mosquera if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) 11218931add6SHamilton Tobon Mosquera return false; 11228931add6SHamilton Tobon Mosquera 11238931add6SHamilton Tobon Mosquera LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays)); 11248931add6SHamilton Tobon Mosquera 1125bd2fa181SHamilton Tobon Mosquera // TODO: Check if can be moved upwards. 1126bd2fa181SHamilton Tobon Mosquera bool WasSplit = false; 1127bd2fa181SHamilton Tobon Mosquera Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 1128bd2fa181SHamilton Tobon Mosquera if (WaitMovementPoint) 1129bd2fa181SHamilton Tobon Mosquera WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 1130bd2fa181SHamilton Tobon Mosquera 1131496f8e5bSHamilton Tobon Mosquera Changed |= WasSplit; 1132496f8e5bSHamilton Tobon Mosquera return WasSplit; 1133496f8e5bSHamilton Tobon Mosquera }; 1134496f8e5bSHamilton Tobon Mosquera RFI.foreachUse(SCC, SplitMemTransfers); 1135496f8e5bSHamilton Tobon Mosquera 1136496f8e5bSHamilton Tobon Mosquera return Changed; 1137496f8e5bSHamilton Tobon Mosquera } 1138496f8e5bSHamilton Tobon Mosquera 1139a2281419SJoseph Huber void analysisGlobalization() { 11406fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 114182453e75SJoseph Huber 114282453e75SJoseph Huber auto CheckGlobalization = [&](Use &U, Function &Decl) { 1143a2281419SJoseph Huber if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { 114444feacc7SJoseph Huber auto Remark = [&](OptimizationRemarkMissed ORM) { 114544feacc7SJoseph Huber return ORM 1146a2281419SJoseph Huber << "Found thread data sharing on the GPU. " 1147a2281419SJoseph Huber << "Expect degraded performance due to data globalization."; 1148a2281419SJoseph Huber }; 114944feacc7SJoseph Huber emitRemark<OptimizationRemarkMissed>(CI, "OpenMPGlobalization", Remark); 1150a2281419SJoseph Huber } 1151a2281419SJoseph Huber 1152a2281419SJoseph Huber return false; 1153a2281419SJoseph Huber }; 1154a2281419SJoseph Huber 115582453e75SJoseph Huber RFI.foreachUse(SCC, CheckGlobalization); 115682453e75SJoseph Huber } 1157a2281419SJoseph Huber 11588931add6SHamilton Tobon Mosquera /// Maps the values stored in the offload arrays passed as arguments to 11598931add6SHamilton Tobon Mosquera /// \p RuntimeCall into the offload arrays in \p OAs. 11608931add6SHamilton Tobon Mosquera bool getValuesInOffloadArrays(CallInst &RuntimeCall, 11618931add6SHamilton Tobon Mosquera MutableArrayRef<OffloadArray> OAs) { 11628931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "Need space for three offload arrays!"); 11638931add6SHamilton Tobon Mosquera 11648931add6SHamilton Tobon Mosquera // A runtime call that involves memory offloading looks something like: 11658931add6SHamilton Tobon Mosquera // call void @__tgt_target_data_begin_mapper(arg0, arg1, 11668931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, 11678931add6SHamilton Tobon Mosquera // ...) 11688931add6SHamilton Tobon Mosquera // So, the idea is to access the allocas that allocate space for these 11698931add6SHamilton Tobon Mosquera // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. 11708931add6SHamilton Tobon Mosquera // Therefore: 11718931add6SHamilton Tobon Mosquera // i8** %offload_baseptrs. 11721d3d9b9cSHamilton Tobon Mosquera Value *BasePtrsArg = 11731d3d9b9cSHamilton Tobon Mosquera RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); 11748931add6SHamilton Tobon Mosquera // i8** %offload_ptrs. 11751d3d9b9cSHamilton Tobon Mosquera Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); 11768931add6SHamilton Tobon Mosquera // i8** %offload_sizes. 11771d3d9b9cSHamilton Tobon Mosquera Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); 11788931add6SHamilton Tobon Mosquera 11798931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs. 11808931add6SHamilton Tobon Mosquera auto *V = getUnderlyingObject(BasePtrsArg); 11818931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 11828931add6SHamilton Tobon Mosquera return false; 11838931add6SHamilton Tobon Mosquera auto *BasePtrsArray = cast<AllocaInst>(V); 11848931add6SHamilton Tobon Mosquera if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) 11858931add6SHamilton Tobon Mosquera return false; 11868931add6SHamilton Tobon Mosquera 11878931add6SHamilton Tobon Mosquera // Get values stored in **offload_baseptrs. 11888931add6SHamilton Tobon Mosquera V = getUnderlyingObject(PtrsArg); 11898931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 11908931add6SHamilton Tobon Mosquera return false; 11918931add6SHamilton Tobon Mosquera auto *PtrsArray = cast<AllocaInst>(V); 11928931add6SHamilton Tobon Mosquera if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) 11938931add6SHamilton Tobon Mosquera return false; 11948931add6SHamilton Tobon Mosquera 11958931add6SHamilton Tobon Mosquera // Get values stored in **offload_sizes. 11968931add6SHamilton Tobon Mosquera V = getUnderlyingObject(SizesArg); 11978931add6SHamilton Tobon Mosquera // If it's a [constant] global array don't analyze it. 11988931add6SHamilton Tobon Mosquera if (isa<GlobalValue>(V)) 11998931add6SHamilton Tobon Mosquera return isa<Constant>(V); 12008931add6SHamilton Tobon Mosquera if (!isa<AllocaInst>(V)) 12018931add6SHamilton Tobon Mosquera return false; 12028931add6SHamilton Tobon Mosquera 12038931add6SHamilton Tobon Mosquera auto *SizesArray = cast<AllocaInst>(V); 12048931add6SHamilton Tobon Mosquera if (!OAs[2].initialize(*SizesArray, RuntimeCall)) 12058931add6SHamilton Tobon Mosquera return false; 12068931add6SHamilton Tobon Mosquera 12078931add6SHamilton Tobon Mosquera return true; 12088931add6SHamilton Tobon Mosquera } 12098931add6SHamilton Tobon Mosquera 12108931add6SHamilton Tobon Mosquera /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. 12118931add6SHamilton Tobon Mosquera /// For now this is a way to test that the function getValuesInOffloadArrays 12128931add6SHamilton Tobon Mosquera /// is working properly. 12138931add6SHamilton Tobon Mosquera /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. 12148931add6SHamilton Tobon Mosquera void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { 12158931add6SHamilton Tobon Mosquera assert(OAs.size() == 3 && "There are three offload arrays to debug!"); 12168931add6SHamilton Tobon Mosquera 12178931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n"); 12188931add6SHamilton Tobon Mosquera std::string ValuesStr; 12198931add6SHamilton Tobon Mosquera raw_string_ostream Printer(ValuesStr); 12208931add6SHamilton Tobon Mosquera std::string Separator = " --- "; 12218931add6SHamilton Tobon Mosquera 12228931add6SHamilton Tobon Mosquera for (auto *BP : OAs[0].StoredValues) { 12238931add6SHamilton Tobon Mosquera BP->print(Printer); 12248931add6SHamilton Tobon Mosquera Printer << Separator; 12258931add6SHamilton Tobon Mosquera } 12268931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"); 12278931add6SHamilton Tobon Mosquera ValuesStr.clear(); 12288931add6SHamilton Tobon Mosquera 12298931add6SHamilton Tobon Mosquera for (auto *P : OAs[1].StoredValues) { 12308931add6SHamilton Tobon Mosquera P->print(Printer); 12318931add6SHamilton Tobon Mosquera Printer << Separator; 12328931add6SHamilton Tobon Mosquera } 12338931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"); 12348931add6SHamilton Tobon Mosquera ValuesStr.clear(); 12358931add6SHamilton Tobon Mosquera 12368931add6SHamilton Tobon Mosquera for (auto *S : OAs[2].StoredValues) { 12378931add6SHamilton Tobon Mosquera S->print(Printer); 12388931add6SHamilton Tobon Mosquera Printer << Separator; 12398931add6SHamilton Tobon Mosquera } 12408931add6SHamilton Tobon Mosquera LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"); 12418931add6SHamilton Tobon Mosquera } 12428931add6SHamilton Tobon Mosquera 1243bd2fa181SHamilton Tobon Mosquera /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 1244bd2fa181SHamilton Tobon Mosquera /// moved. Returns nullptr if the movement is not possible, or not worth it. 1245bd2fa181SHamilton Tobon Mosquera Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 1246bd2fa181SHamilton Tobon Mosquera // FIXME: This traverses only the BasicBlock where RuntimeCall is. 1247bd2fa181SHamilton Tobon Mosquera // Make it traverse the CFG. 1248bd2fa181SHamilton Tobon Mosquera 1249bd2fa181SHamilton Tobon Mosquera Instruction *CurrentI = &RuntimeCall; 1250bd2fa181SHamilton Tobon Mosquera bool IsWorthIt = false; 1251bd2fa181SHamilton Tobon Mosquera while ((CurrentI = CurrentI->getNextNode())) { 1252bd2fa181SHamilton Tobon Mosquera 1253bd2fa181SHamilton Tobon Mosquera // TODO: Once we detect the regions to be offloaded we should use the 1254bd2fa181SHamilton Tobon Mosquera // alias analysis manager to check if CurrentI may modify one of 1255bd2fa181SHamilton Tobon Mosquera // the offloaded regions. 1256bd2fa181SHamilton Tobon Mosquera if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 1257bd2fa181SHamilton Tobon Mosquera if (IsWorthIt) 1258bd2fa181SHamilton Tobon Mosquera return CurrentI; 1259bd2fa181SHamilton Tobon Mosquera 1260bd2fa181SHamilton Tobon Mosquera return nullptr; 1261bd2fa181SHamilton Tobon Mosquera } 1262bd2fa181SHamilton Tobon Mosquera 1263bd2fa181SHamilton Tobon Mosquera // FIXME: For now if we move it over anything without side effect 1264bd2fa181SHamilton Tobon Mosquera // is worth it. 1265bd2fa181SHamilton Tobon Mosquera IsWorthIt = true; 1266bd2fa181SHamilton Tobon Mosquera } 1267bd2fa181SHamilton Tobon Mosquera 1268bd2fa181SHamilton Tobon Mosquera // Return end of BasicBlock. 1269bd2fa181SHamilton Tobon Mosquera return RuntimeCall.getParent()->getTerminator(); 1270bd2fa181SHamilton Tobon Mosquera } 1271bd2fa181SHamilton Tobon Mosquera 1272496f8e5bSHamilton Tobon Mosquera /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 1273bd2fa181SHamilton Tobon Mosquera bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 1274bd2fa181SHamilton Tobon Mosquera Instruction &WaitMovementPoint) { 1275bd31abc1SHamilton Tobon Mosquera // Create stack allocated handle (__tgt_async_info) at the beginning of the 1276bd31abc1SHamilton Tobon Mosquera // function. Used for storing information of the async transfer, allowing to 1277bd31abc1SHamilton Tobon Mosquera // wait on it later. 1278496f8e5bSHamilton Tobon Mosquera auto &IRBuilder = OMPInfoCache.OMPBuilder; 1279bd31abc1SHamilton Tobon Mosquera auto *F = RuntimeCall.getCaller(); 1280bd31abc1SHamilton Tobon Mosquera Instruction *FirstInst = &(F->getEntryBlock().front()); 1281bd31abc1SHamilton Tobon Mosquera AllocaInst *Handle = new AllocaInst( 1282bd31abc1SHamilton Tobon Mosquera IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); 1283bd31abc1SHamilton Tobon Mosquera 1284496f8e5bSHamilton Tobon Mosquera // Add "issue" runtime call declaration: 1285496f8e5bSHamilton Tobon Mosquera // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 1286496f8e5bSHamilton Tobon Mosquera // i8**, i8**, i64*, i64*) 1287496f8e5bSHamilton Tobon Mosquera FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 1288496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_issue); 1289496f8e5bSHamilton Tobon Mosquera 1290496f8e5bSHamilton Tobon Mosquera // Change RuntimeCall call site for its asynchronous version. 129197e55cfeSJoseph Huber SmallVector<Value *, 16> Args; 1292bd2fa181SHamilton Tobon Mosquera for (auto &Arg : RuntimeCall.args()) 1293496f8e5bSHamilton Tobon Mosquera Args.push_back(Arg.get()); 1294bd31abc1SHamilton Tobon Mosquera Args.push_back(Handle); 1295496f8e5bSHamilton Tobon Mosquera 1296496f8e5bSHamilton Tobon Mosquera CallInst *IssueCallsite = 1297bd31abc1SHamilton Tobon Mosquera CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); 1298bd2fa181SHamilton Tobon Mosquera RuntimeCall.eraseFromParent(); 1299496f8e5bSHamilton Tobon Mosquera 1300496f8e5bSHamilton Tobon Mosquera // Add "wait" runtime call declaration: 1301496f8e5bSHamilton Tobon Mosquera // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 1302496f8e5bSHamilton Tobon Mosquera FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 1303496f8e5bSHamilton Tobon Mosquera M, OMPRTL___tgt_target_data_begin_mapper_wait); 1304496f8e5bSHamilton Tobon Mosquera 1305496f8e5bSHamilton Tobon Mosquera Value *WaitParams[2] = { 1306da8bec47SJoseph Huber IssueCallsite->getArgOperand( 1307da8bec47SJoseph Huber OffloadArray::DeviceIDArgNum), // device_id. 1308bd31abc1SHamilton Tobon Mosquera Handle // handle to wait on. 1309496f8e5bSHamilton Tobon Mosquera }; 1310bd2fa181SHamilton Tobon Mosquera CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); 1311496f8e5bSHamilton Tobon Mosquera 1312496f8e5bSHamilton Tobon Mosquera return true; 1313496f8e5bSHamilton Tobon Mosquera } 1314496f8e5bSHamilton Tobon Mosquera 1315dc3b5b00SJohannes Doerfert static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 1316dc3b5b00SJohannes Doerfert bool GlobalOnly, bool &SingleChoice) { 1317dc3b5b00SJohannes Doerfert if (CurrentIdent == NextIdent) 1318dc3b5b00SJohannes Doerfert return CurrentIdent; 1319dc3b5b00SJohannes Doerfert 1320396b7253SJohannes Doerfert // TODO: Figure out how to actually combine multiple debug locations. For 1321dc3b5b00SJohannes Doerfert // now we just keep an existing one if there is a single choice. 1322dc3b5b00SJohannes Doerfert if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 1323dc3b5b00SJohannes Doerfert SingleChoice = !CurrentIdent; 1324dc3b5b00SJohannes Doerfert return NextIdent; 1325dc3b5b00SJohannes Doerfert } 1326396b7253SJohannes Doerfert return nullptr; 1327396b7253SJohannes Doerfert } 1328396b7253SJohannes Doerfert 1329396b7253SJohannes Doerfert /// Return an `struct ident_t*` value that represents the ones used in the 1330396b7253SJohannes Doerfert /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 1331396b7253SJohannes Doerfert /// return a local `struct ident_t*`. For now, if we cannot find a suitable 1332396b7253SJohannes Doerfert /// return value we create one from scratch. We also do not yet combine 1333396b7253SJohannes Doerfert /// information, e.g., the source locations, see combinedIdentStruct. 13347cfd267cSsstefan1 Value * 13357cfd267cSsstefan1 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 13367cfd267cSsstefan1 Function &F, bool GlobalOnly) { 1337dc3b5b00SJohannes Doerfert bool SingleChoice = true; 1338396b7253SJohannes Doerfert Value *Ident = nullptr; 1339396b7253SJohannes Doerfert auto CombineIdentStruct = [&](Use &U, Function &Caller) { 1340396b7253SJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 1341396b7253SJohannes Doerfert if (!CI || &F != &Caller) 1342396b7253SJohannes Doerfert return false; 1343396b7253SJohannes Doerfert Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 1344dc3b5b00SJohannes Doerfert /* GlobalOnly */ true, SingleChoice); 1345396b7253SJohannes Doerfert return false; 1346396b7253SJohannes Doerfert }; 1347624d34afSJohannes Doerfert RFI.foreachUse(SCC, CombineIdentStruct); 1348396b7253SJohannes Doerfert 1349dc3b5b00SJohannes Doerfert if (!Ident || !SingleChoice) { 1350396b7253SJohannes Doerfert // The IRBuilder uses the insertion block to get to the module, this is 1351396b7253SJohannes Doerfert // unfortunate but we work around it for now. 13527cfd267cSsstefan1 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 13537cfd267cSsstefan1 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 1354396b7253SJohannes Doerfert &F.getEntryBlock(), F.getEntryBlock().begin())); 1355396b7253SJohannes Doerfert // Create a fallback location if non was found. 1356396b7253SJohannes Doerfert // TODO: Use the debug locations of the calls instead. 13577cfd267cSsstefan1 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 13587cfd267cSsstefan1 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 1359396b7253SJohannes Doerfert } 1360396b7253SJohannes Doerfert return Ident; 1361396b7253SJohannes Doerfert } 1362396b7253SJohannes Doerfert 1363b726c557SJohannes Doerfert /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 13649548b74aSJohannes Doerfert /// \p ReplVal if given. 13657cfd267cSsstefan1 bool deduplicateRuntimeCalls(Function &F, 13667cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &RFI, 13679548b74aSJohannes Doerfert Value *ReplVal = nullptr) { 13688855fec3SJohannes Doerfert auto *UV = RFI.getUseVector(F); 13698855fec3SJohannes Doerfert if (!UV || UV->size() + (ReplVal != nullptr) < 2) 1370b1fbf438SRoman Lebedev return false; 1371b1fbf438SRoman Lebedev 13727cfd267cSsstefan1 LLVM_DEBUG( 13737cfd267cSsstefan1 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 13747cfd267cSsstefan1 << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 13757cfd267cSsstefan1 1376ab3da5ddSMichael Liao assert((!ReplVal || (isa<Argument>(ReplVal) && 1377ab3da5ddSMichael Liao cast<Argument>(ReplVal)->getParent() == &F)) && 13789548b74aSJohannes Doerfert "Unexpected replacement value!"); 1379396b7253SJohannes Doerfert 1380396b7253SJohannes Doerfert // TODO: Use dominance to find a good position instead. 13816aab27baSsstefan1 auto CanBeMoved = [this](CallBase &CB) { 1382396b7253SJohannes Doerfert unsigned NumArgs = CB.getNumArgOperands(); 1383396b7253SJohannes Doerfert if (NumArgs == 0) 1384396b7253SJohannes Doerfert return true; 13856aab27baSsstefan1 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 1386396b7253SJohannes Doerfert return false; 1387396b7253SJohannes Doerfert for (unsigned u = 1; u < NumArgs; ++u) 1388396b7253SJohannes Doerfert if (isa<Instruction>(CB.getArgOperand(u))) 1389396b7253SJohannes Doerfert return false; 1390396b7253SJohannes Doerfert return true; 1391396b7253SJohannes Doerfert }; 1392396b7253SJohannes Doerfert 13939548b74aSJohannes Doerfert if (!ReplVal) { 13948855fec3SJohannes Doerfert for (Use *U : *UV) 13959548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 1396396b7253SJohannes Doerfert if (!CanBeMoved(*CI)) 1397396b7253SJohannes Doerfert continue; 13984d4ea9acSHuber, Joseph 13994d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 14004d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 14012db182ffSJoseph Huber << ore::NV("OpenMPOptRuntime", RFI.Name) 14022db182ffSJoseph Huber << " moved to beginning of OpenMP region"; 14034d4ea9acSHuber, Joseph }; 14042db182ffSJoseph Huber emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeCodeMotion", Remark); 14054d4ea9acSHuber, Joseph 14069548b74aSJohannes Doerfert CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 14079548b74aSJohannes Doerfert ReplVal = CI; 14089548b74aSJohannes Doerfert break; 14099548b74aSJohannes Doerfert } 14109548b74aSJohannes Doerfert if (!ReplVal) 14119548b74aSJohannes Doerfert return false; 14129548b74aSJohannes Doerfert } 14139548b74aSJohannes Doerfert 1414396b7253SJohannes Doerfert // If we use a call as a replacement value we need to make sure the ident is 1415396b7253SJohannes Doerfert // valid at the new location. For now we just pick a global one, either 1416396b7253SJohannes Doerfert // existing and used by one of the calls, or created from scratch. 1417396b7253SJohannes Doerfert if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 1418396b7253SJohannes Doerfert if (CI->getNumArgOperands() > 0 && 14196aab27baSsstefan1 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 1420396b7253SJohannes Doerfert Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 1421396b7253SJohannes Doerfert /* GlobalOnly */ true); 1422396b7253SJohannes Doerfert CI->setArgOperand(0, Ident); 1423396b7253SJohannes Doerfert } 1424396b7253SJohannes Doerfert } 1425396b7253SJohannes Doerfert 14269548b74aSJohannes Doerfert bool Changed = false; 14279548b74aSJohannes Doerfert auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 14289548b74aSJohannes Doerfert CallInst *CI = getCallIfRegularCall(U, &RFI); 14299548b74aSJohannes Doerfert if (!CI || CI == ReplVal || &F != &Caller) 14309548b74aSJohannes Doerfert return false; 14319548b74aSJohannes Doerfert assert(CI->getCaller() == &F && "Unexpected call!"); 14324d4ea9acSHuber, Joseph 14334d4ea9acSHuber, Joseph auto Remark = [&](OptimizationRemark OR) { 14344d4ea9acSHuber, Joseph return OR << "OpenMP runtime call " 14354d4ea9acSHuber, Joseph << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 14364d4ea9acSHuber, Joseph }; 14372db182ffSJoseph Huber emitRemark<OptimizationRemark>(&F, "OpenMPRuntimeDeduplicated", Remark); 14384d4ea9acSHuber, Joseph 14399548b74aSJohannes Doerfert CGUpdater.removeCallSite(*CI); 14409548b74aSJohannes Doerfert CI->replaceAllUsesWith(ReplVal); 14419548b74aSJohannes Doerfert CI->eraseFromParent(); 14429548b74aSJohannes Doerfert ++NumOpenMPRuntimeCallsDeduplicated; 14439548b74aSJohannes Doerfert Changed = true; 14449548b74aSJohannes Doerfert return true; 14459548b74aSJohannes Doerfert }; 1446624d34afSJohannes Doerfert RFI.foreachUse(SCC, ReplaceAndDeleteCB); 14479548b74aSJohannes Doerfert 14489548b74aSJohannes Doerfert return Changed; 14499548b74aSJohannes Doerfert } 14509548b74aSJohannes Doerfert 14519548b74aSJohannes Doerfert /// Collect arguments that represent the global thread id in \p GTIdArgs. 14529548b74aSJohannes Doerfert void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 14539548b74aSJohannes Doerfert // TODO: Below we basically perform a fixpoint iteration with a pessimistic 14549548b74aSJohannes Doerfert // initialization. We could define an AbstractAttribute instead and 14559548b74aSJohannes Doerfert // run the Attributor here once it can be run as an SCC pass. 14569548b74aSJohannes Doerfert 14579548b74aSJohannes Doerfert // Helper to check the argument \p ArgNo at all call sites of \p F for 14589548b74aSJohannes Doerfert // a GTId. 14599548b74aSJohannes Doerfert auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 14609548b74aSJohannes Doerfert if (!F.hasLocalLinkage()) 14619548b74aSJohannes Doerfert return false; 14629548b74aSJohannes Doerfert for (Use &U : F.uses()) { 14639548b74aSJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U)) { 14649548b74aSJohannes Doerfert Value *ArgOp = CI->getArgOperand(ArgNo); 14659548b74aSJohannes Doerfert if (CI == &RefCI || GTIdArgs.count(ArgOp) || 14667cfd267cSsstefan1 getCallIfRegularCall( 14677cfd267cSsstefan1 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 14689548b74aSJohannes Doerfert continue; 14699548b74aSJohannes Doerfert } 14709548b74aSJohannes Doerfert return false; 14719548b74aSJohannes Doerfert } 14729548b74aSJohannes Doerfert return true; 14739548b74aSJohannes Doerfert }; 14749548b74aSJohannes Doerfert 14759548b74aSJohannes Doerfert // Helper to identify uses of a GTId as GTId arguments. 14769548b74aSJohannes Doerfert auto AddUserArgs = [&](Value >Id) { 14779548b74aSJohannes Doerfert for (Use &U : GTId.uses()) 14789548b74aSJohannes Doerfert if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 14799548b74aSJohannes Doerfert if (CI->isArgOperand(&U)) 14809548b74aSJohannes Doerfert if (Function *Callee = CI->getCalledFunction()) 14819548b74aSJohannes Doerfert if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 14829548b74aSJohannes Doerfert GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 14839548b74aSJohannes Doerfert }; 14849548b74aSJohannes Doerfert 14859548b74aSJohannes Doerfert // The argument users of __kmpc_global_thread_num calls are GTIds. 14867cfd267cSsstefan1 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 14877cfd267cSsstefan1 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 14887cfd267cSsstefan1 1489624d34afSJohannes Doerfert GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 14908855fec3SJohannes Doerfert if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 14919548b74aSJohannes Doerfert AddUserArgs(*CI); 14928855fec3SJohannes Doerfert return false; 14938855fec3SJohannes Doerfert }); 14949548b74aSJohannes Doerfert 14959548b74aSJohannes Doerfert // Transitively search for more arguments by looking at the users of the 14969548b74aSJohannes Doerfert // ones we know already. During the search the GTIdArgs vector is extended 14979548b74aSJohannes Doerfert // so we cannot cache the size nor can we use a range based for. 14989548b74aSJohannes Doerfert for (unsigned u = 0; u < GTIdArgs.size(); ++u) 14999548b74aSJohannes Doerfert AddUserArgs(*GTIdArgs[u]); 15009548b74aSJohannes Doerfert } 15019548b74aSJohannes Doerfert 15025b0581aeSJohannes Doerfert /// Kernel (=GPU) optimizations and utility functions 15035b0581aeSJohannes Doerfert /// 15045b0581aeSJohannes Doerfert ///{{ 15055b0581aeSJohannes Doerfert 15065b0581aeSJohannes Doerfert /// Check if \p F is a kernel, hence entry point for target offloading. 15075b0581aeSJohannes Doerfert bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } 15085b0581aeSJohannes Doerfert 15095b0581aeSJohannes Doerfert /// Cache to remember the unique kernel for a function. 15105b0581aeSJohannes Doerfert DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; 15115b0581aeSJohannes Doerfert 15125b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p F, if any. 15135b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Function &F); 15145b0581aeSJohannes Doerfert 15155b0581aeSJohannes Doerfert /// Find the unique kernel that will execute \p I, if any. 15165b0581aeSJohannes Doerfert Kernel getUniqueKernelFor(Instruction &I) { 15175b0581aeSJohannes Doerfert return getUniqueKernelFor(*I.getFunction()); 15185b0581aeSJohannes Doerfert } 15195b0581aeSJohannes Doerfert 15205b0581aeSJohannes Doerfert /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 15215b0581aeSJohannes Doerfert /// the cases we can avoid taking the address of a function. 15225b0581aeSJohannes Doerfert bool rewriteDeviceCodeStateMachine(); 15235b0581aeSJohannes Doerfert 15245b0581aeSJohannes Doerfert /// 15255b0581aeSJohannes Doerfert ///}} 15265b0581aeSJohannes Doerfert 15274d4ea9acSHuber, Joseph /// Emit a remark generically 15284d4ea9acSHuber, Joseph /// 15294d4ea9acSHuber, Joseph /// This template function can be used to generically emit a remark. The 15304d4ea9acSHuber, Joseph /// RemarkKind should be one of the following: 15314d4ea9acSHuber, Joseph /// - OptimizationRemark to indicate a successful optimization attempt 15324d4ea9acSHuber, Joseph /// - OptimizationRemarkMissed to report a failed optimization attempt 15334d4ea9acSHuber, Joseph /// - OptimizationRemarkAnalysis to provide additional information about an 15344d4ea9acSHuber, Joseph /// optimization attempt 15354d4ea9acSHuber, Joseph /// 15364d4ea9acSHuber, Joseph /// The remark is built using a callback function provided by the caller that 15374d4ea9acSHuber, Joseph /// takes a RemarkKind as input and returns a RemarkKind. 15382db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack> 15392db182ffSJoseph Huber void emitRemark(Instruction *I, StringRef RemarkName, 1540e8039ad4SJohannes Doerfert RemarkCallBack &&RemarkCB) const { 15412db182ffSJoseph Huber Function *F = I->getParent()->getParent(); 15424d4ea9acSHuber, Joseph auto &ORE = OREGetter(F); 15434d4ea9acSHuber, Joseph 15442db182ffSJoseph Huber ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); 15454d4ea9acSHuber, Joseph } 15464d4ea9acSHuber, Joseph 15472db182ffSJoseph Huber /// Emit a remark on a function. 15482db182ffSJoseph Huber template <typename RemarkKind, typename RemarkCallBack> 15492db182ffSJoseph Huber void emitRemark(Function *F, StringRef RemarkName, 15502db182ffSJoseph Huber RemarkCallBack &&RemarkCB) const { 15510f426935Ssstefan1 auto &ORE = OREGetter(F); 15520f426935Ssstefan1 15532db182ffSJoseph Huber ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); 15540f426935Ssstefan1 } 15550f426935Ssstefan1 1556b726c557SJohannes Doerfert /// The underlying module. 15579548b74aSJohannes Doerfert Module &M; 15589548b74aSJohannes Doerfert 15599548b74aSJohannes Doerfert /// The SCC we are operating on. 1560ee17263aSJohannes Doerfert SmallVectorImpl<Function *> &SCC; 15619548b74aSJohannes Doerfert 15629548b74aSJohannes Doerfert /// Callback to update the call graph, the first argument is a removed call, 15639548b74aSJohannes Doerfert /// the second an optional replacement call. 15649548b74aSJohannes Doerfert CallGraphUpdater &CGUpdater; 15659548b74aSJohannes Doerfert 15664d4ea9acSHuber, Joseph /// Callback to get an OptimizationRemarkEmitter from a Function * 15674d4ea9acSHuber, Joseph OptimizationRemarkGetter OREGetter; 15684d4ea9acSHuber, Joseph 15697cfd267cSsstefan1 /// OpenMP-specific information cache. Also Used for Attributor runs. 15707cfd267cSsstefan1 OMPInformationCache &OMPInfoCache; 1571b8235d2bSsstefan1 1572b8235d2bSsstefan1 /// Attributor instance. 1573b8235d2bSsstefan1 Attributor &A; 1574b8235d2bSsstefan1 1575b8235d2bSsstefan1 /// Helper function to run Attributor on SCC. 1576d3e74913SNico Weber bool runAttributor() { 1577b8235d2bSsstefan1 if (SCC.empty()) 1578b8235d2bSsstefan1 return false; 1579b8235d2bSsstefan1 1580d3e74913SNico Weber registerAAs(); 1581b8235d2bSsstefan1 1582b8235d2bSsstefan1 ChangeStatus Changed = A.run(); 1583b8235d2bSsstefan1 1584b8235d2bSsstefan1 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 1585b8235d2bSsstefan1 << " functions, result: " << Changed << ".\n"); 1586b8235d2bSsstefan1 1587b8235d2bSsstefan1 return Changed == ChangeStatus::CHANGED; 1588b8235d2bSsstefan1 } 1589b8235d2bSsstefan1 1590b8235d2bSsstefan1 /// Populate the Attributor with abstract attribute opportunities in the 1591b8235d2bSsstefan1 /// function. 1592d3e74913SNico Weber void registerAAs() { 1593d3e74913SNico Weber if (SCC.empty()) 1594d3e74913SNico Weber return; 1595d3e74913SNico Weber 1596d3e74913SNico Weber // Create CallSite AA for all Getters. 1597d3e74913SNico Weber for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 1598d3e74913SNico Weber auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 1599d3e74913SNico Weber 1600d3e74913SNico Weber auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 1601d3e74913SNico Weber 1602d3e74913SNico Weber auto CreateAA = [&](Use &U, Function &Caller) { 1603d3e74913SNico Weber CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 1604d3e74913SNico Weber if (!CI) 1605d3e74913SNico Weber return false; 1606d3e74913SNico Weber 1607d3e74913SNico Weber auto &CB = cast<CallBase>(*CI); 1608d3e74913SNico Weber 1609d3e74913SNico Weber IRPosition CBPos = IRPosition::callsite_function(CB); 1610d3e74913SNico Weber A.getOrCreateAAFor<AAICVTracker>(CBPos); 1611d3e74913SNico Weber return false; 1612d3e74913SNico Weber }; 1613d3e74913SNico Weber 1614d3e74913SNico Weber GetterRFI.foreachUse(SCC, CreateAA); 1615d3e74913SNico Weber } 1616d3e74913SNico Weber auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 1617d3e74913SNico Weber auto CreateAA = [&](Use &U, Function &F) { 1618d3e74913SNico Weber A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); 1619d3e74913SNico Weber return false; 1620d3e74913SNico Weber }; 1621d3e74913SNico Weber GlobalizationRFI.foreachUse(SCC, CreateAA); 1622d3e74913SNico Weber 1623d3e74913SNico Weber // Create an ExecutionDomain AA for every function and a HeapToStack AA for 1624d3e74913SNico Weber // every function if there is a device kernel. 1625d3e74913SNico Weber for (auto *F : SCC) { 1626d3e74913SNico Weber if (!F->isDeclaration()) 1627d3e74913SNico Weber A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F)); 1628d3e74913SNico Weber if (isOpenMPDevice(M)) 1629d3e74913SNico Weber A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F)); 1630d3e74913SNico Weber } 1631d3e74913SNico Weber } 1632b8235d2bSsstefan1 }; 1633b8235d2bSsstefan1 16345b0581aeSJohannes Doerfert Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 16355b0581aeSJohannes Doerfert if (!OMPInfoCache.ModuleSlice.count(&F)) 16365b0581aeSJohannes Doerfert return nullptr; 16375b0581aeSJohannes Doerfert 16385b0581aeSJohannes Doerfert // Use a scope to keep the lifetime of the CachedKernel short. 16395b0581aeSJohannes Doerfert { 16405b0581aeSJohannes Doerfert Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 16415b0581aeSJohannes Doerfert if (CachedKernel) 16425b0581aeSJohannes Doerfert return *CachedKernel; 16435b0581aeSJohannes Doerfert 16445b0581aeSJohannes Doerfert // TODO: We should use an AA to create an (optimistic and callback 16455b0581aeSJohannes Doerfert // call-aware) call graph. For now we stick to simple patterns that 16465b0581aeSJohannes Doerfert // are less powerful, basically the worst fixpoint. 16475b0581aeSJohannes Doerfert if (isKernel(F)) { 16485b0581aeSJohannes Doerfert CachedKernel = Kernel(&F); 16495b0581aeSJohannes Doerfert return *CachedKernel; 16505b0581aeSJohannes Doerfert } 16515b0581aeSJohannes Doerfert 16525b0581aeSJohannes Doerfert CachedKernel = nullptr; 1653994bb6ebSJohannes Doerfert if (!F.hasLocalLinkage()) { 1654994bb6ebSJohannes Doerfert 1655994bb6ebSJohannes Doerfert // See https://openmp.llvm.org/remarks/OptimizationRemarks.html 16562db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 16572db182ffSJoseph Huber return ORA 16582db182ffSJoseph Huber << "[OMP100] Potentially unknown OpenMP target region caller"; 1659994bb6ebSJohannes Doerfert }; 16602db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); 1661994bb6ebSJohannes Doerfert 16625b0581aeSJohannes Doerfert return nullptr; 16635b0581aeSJohannes Doerfert } 1664994bb6ebSJohannes Doerfert } 16655b0581aeSJohannes Doerfert 16665b0581aeSJohannes Doerfert auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 16675b0581aeSJohannes Doerfert if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 16685b0581aeSJohannes Doerfert // Allow use in equality comparisons. 16695b0581aeSJohannes Doerfert if (Cmp->isEquality()) 16705b0581aeSJohannes Doerfert return getUniqueKernelFor(*Cmp); 16715b0581aeSJohannes Doerfert return nullptr; 16725b0581aeSJohannes Doerfert } 16735b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 16745b0581aeSJohannes Doerfert // Allow direct calls. 16755b0581aeSJohannes Doerfert if (CB->isCallee(&U)) 16765b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 1677a2dbfb6bSGiorgis Georgakoudis 1678a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 1679a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 1680a2dbfb6bSGiorgis Georgakoudis // Allow the use in __kmpc_parallel_51 calls. 1681a2dbfb6bSGiorgis Georgakoudis if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) 16825b0581aeSJohannes Doerfert return getUniqueKernelFor(*CB); 16835b0581aeSJohannes Doerfert return nullptr; 16845b0581aeSJohannes Doerfert } 16855b0581aeSJohannes Doerfert // Disallow every other use. 16865b0581aeSJohannes Doerfert return nullptr; 16875b0581aeSJohannes Doerfert }; 16885b0581aeSJohannes Doerfert 16895b0581aeSJohannes Doerfert // TODO: In the future we want to track more than just a unique kernel. 16905b0581aeSJohannes Doerfert SmallPtrSet<Kernel, 2> PotentialKernels; 16918d8ce85bSsstefan1 OMPInformationCache::foreachUse(F, [&](const Use &U) { 16925b0581aeSJohannes Doerfert PotentialKernels.insert(GetUniqueKernelForUse(U)); 16935b0581aeSJohannes Doerfert }); 16945b0581aeSJohannes Doerfert 16955b0581aeSJohannes Doerfert Kernel K = nullptr; 16965b0581aeSJohannes Doerfert if (PotentialKernels.size() == 1) 16975b0581aeSJohannes Doerfert K = *PotentialKernels.begin(); 16985b0581aeSJohannes Doerfert 16995b0581aeSJohannes Doerfert // Cache the result. 17005b0581aeSJohannes Doerfert UniqueKernelMap[&F] = K; 17015b0581aeSJohannes Doerfert 17025b0581aeSJohannes Doerfert return K; 17035b0581aeSJohannes Doerfert } 17045b0581aeSJohannes Doerfert 17055b0581aeSJohannes Doerfert bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 1706a2dbfb6bSGiorgis Georgakoudis OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 1707a2dbfb6bSGiorgis Georgakoudis OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 17085b0581aeSJohannes Doerfert 17095b0581aeSJohannes Doerfert bool Changed = false; 1710a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelRFI) 17115b0581aeSJohannes Doerfert return Changed; 17125b0581aeSJohannes Doerfert 17135b0581aeSJohannes Doerfert for (Function *F : SCC) { 17145b0581aeSJohannes Doerfert 1715a2dbfb6bSGiorgis Georgakoudis // Check if the function is a use in a __kmpc_parallel_51 call at 17165b0581aeSJohannes Doerfert // all. 17175b0581aeSJohannes Doerfert bool UnknownUse = false; 1718a2dbfb6bSGiorgis Georgakoudis bool KernelParallelUse = false; 17195b0581aeSJohannes Doerfert unsigned NumDirectCalls = 0; 17205b0581aeSJohannes Doerfert 17215b0581aeSJohannes Doerfert SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 17228d8ce85bSsstefan1 OMPInformationCache::foreachUse(*F, [&](Use &U) { 17235b0581aeSJohannes Doerfert if (auto *CB = dyn_cast<CallBase>(U.getUser())) 17245b0581aeSJohannes Doerfert if (CB->isCallee(&U)) { 17255b0581aeSJohannes Doerfert ++NumDirectCalls; 17265b0581aeSJohannes Doerfert return; 17275b0581aeSJohannes Doerfert } 17285b0581aeSJohannes Doerfert 172981db6144SMichael Liao if (isa<ICmpInst>(U.getUser())) { 17305b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 17315b0581aeSJohannes Doerfert return; 17325b0581aeSJohannes Doerfert } 1733a2dbfb6bSGiorgis Georgakoudis 1734a2dbfb6bSGiorgis Georgakoudis // Find wrapper functions that represent parallel kernels. 1735a2dbfb6bSGiorgis Georgakoudis CallInst *CI = 1736a2dbfb6bSGiorgis Georgakoudis OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); 1737a2dbfb6bSGiorgis Georgakoudis const unsigned int WrapperFunctionArgNo = 6; 1738a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse && CI && 1739a2dbfb6bSGiorgis Georgakoudis CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { 1740a2dbfb6bSGiorgis Georgakoudis KernelParallelUse = true; 17415b0581aeSJohannes Doerfert ToBeReplacedStateMachineUses.push_back(&U); 17425b0581aeSJohannes Doerfert return; 17435b0581aeSJohannes Doerfert } 17445b0581aeSJohannes Doerfert UnknownUse = true; 17455b0581aeSJohannes Doerfert }); 17465b0581aeSJohannes Doerfert 1747a2dbfb6bSGiorgis Georgakoudis // Do not emit a remark if we haven't seen a __kmpc_parallel_51 1748fec1f210SJohannes Doerfert // use. 1749a2dbfb6bSGiorgis Georgakoudis if (!KernelParallelUse) 17505b0581aeSJohannes Doerfert continue; 17515b0581aeSJohannes Doerfert 1752fec1f210SJohannes Doerfert { 17532db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17542db182ffSJoseph Huber return ORA << "Found a parallel region that is called in a target " 1755fec1f210SJohannes Doerfert "region but not part of a combined target construct nor " 1756a2dbfb6bSGiorgis Georgakoudis "nested inside a target construct without intermediate " 1757fec1f210SJohannes Doerfert "code. This can lead to excessive register usage for " 1758fec1f210SJohannes Doerfert "unrelated target regions in the same translation unit " 1759fec1f210SJohannes Doerfert "due to spurious call edges assumed by ptxas."; 1760fec1f210SJohannes Doerfert }; 17612db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD", 17622db182ffSJoseph Huber Remark); 1763fec1f210SJohannes Doerfert } 1764fec1f210SJohannes Doerfert 1765fec1f210SJohannes Doerfert // If this ever hits, we should investigate. 1766fec1f210SJohannes Doerfert // TODO: Checking the number of uses is not a necessary restriction and 1767fec1f210SJohannes Doerfert // should be lifted. 1768fec1f210SJohannes Doerfert if (UnknownUse || NumDirectCalls != 1 || 1769d3e74913SNico Weber ToBeReplacedStateMachineUses.size() != 2) { 1770fec1f210SJohannes Doerfert { 17712db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17722db182ffSJoseph Huber return ORA << "Parallel region is used in " 1773fec1f210SJohannes Doerfert << (UnknownUse ? "unknown" : "unexpected") 1774fec1f210SJohannes Doerfert << " ways; will not attempt to rewrite the state machine."; 1775fec1f210SJohannes Doerfert }; 17762db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>( 17772db182ffSJoseph Huber F, "OpenMPParallelRegionInNonSPMD", Remark); 1778fec1f210SJohannes Doerfert } 17795b0581aeSJohannes Doerfert continue; 1780fec1f210SJohannes Doerfert } 17815b0581aeSJohannes Doerfert 1782a2dbfb6bSGiorgis Georgakoudis // Even if we have __kmpc_parallel_51 calls, we (for now) give 17835b0581aeSJohannes Doerfert // up if the function is not called from a unique kernel. 17845b0581aeSJohannes Doerfert Kernel K = getUniqueKernelFor(*F); 1785fec1f210SJohannes Doerfert if (!K) { 1786fec1f210SJohannes Doerfert { 17872db182ffSJoseph Huber auto Remark = [&](OptimizationRemarkAnalysis ORA) { 17882db182ffSJoseph Huber return ORA << "Parallel region is not known to be called from a " 1789fec1f210SJohannes Doerfert "unique single target region, maybe the surrounding " 1790fec1f210SJohannes Doerfert "function has external linkage?; will not attempt to " 1791fec1f210SJohannes Doerfert "rewrite the state machine use."; 1792fec1f210SJohannes Doerfert }; 17932db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>( 17942db182ffSJoseph Huber F, "OpenMPParallelRegionInMultipleKernesl", Remark); 1795fec1f210SJohannes Doerfert } 17965b0581aeSJohannes Doerfert continue; 1797fec1f210SJohannes Doerfert } 17985b0581aeSJohannes Doerfert 17995b0581aeSJohannes Doerfert // We now know F is a parallel body function called only from the kernel K. 18005b0581aeSJohannes Doerfert // We also identified the state machine uses in which we replace the 18015b0581aeSJohannes Doerfert // function pointer by a new global symbol for identification purposes. This 18025b0581aeSJohannes Doerfert // ensures only direct calls to the function are left. 18035b0581aeSJohannes Doerfert 1804fec1f210SJohannes Doerfert { 18052db182ffSJoseph Huber auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) { 18062db182ffSJoseph Huber return ORA << "Specialize parallel region that is only reached from a " 1807fec1f210SJohannes Doerfert "single target region to avoid spurious call edges and " 1808fec1f210SJohannes Doerfert "excessive register usage in other target regions. " 1809fec1f210SJohannes Doerfert "(parallel region ID: " 1810fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1811fec1f210SJohannes Doerfert << ", kernel ID: " 1812fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1813fec1f210SJohannes Doerfert }; 18142db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPParallelRegionInNonSPMD", 1815fec1f210SJohannes Doerfert RemarkParalleRegion); 18162db182ffSJoseph Huber auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) { 18172db182ffSJoseph Huber return ORA << "Target region containing the parallel region that is " 1818fec1f210SJohannes Doerfert "specialized. (parallel region ID: " 1819fec1f210SJohannes Doerfert << ore::NV("OpenMPParallelRegion", F->getName()) 1820fec1f210SJohannes Doerfert << ", kernel ID: " 1821fec1f210SJohannes Doerfert << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1822fec1f210SJohannes Doerfert }; 18232db182ffSJoseph Huber emitRemark<OptimizationRemarkAnalysis>(K, "OpenMPParallelRegionInNonSPMD", 18242db182ffSJoseph Huber RemarkKernel); 1825fec1f210SJohannes Doerfert } 1826fec1f210SJohannes Doerfert 18275b0581aeSJohannes Doerfert Module &M = *F->getParent(); 18285b0581aeSJohannes Doerfert Type *Int8Ty = Type::getInt8Ty(M.getContext()); 18295b0581aeSJohannes Doerfert 18305b0581aeSJohannes Doerfert auto *ID = new GlobalVariable( 18315b0581aeSJohannes Doerfert M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 18325b0581aeSJohannes Doerfert UndefValue::get(Int8Ty), F->getName() + ".ID"); 18335b0581aeSJohannes Doerfert 18345b0581aeSJohannes Doerfert for (Use *U : ToBeReplacedStateMachineUses) 18355b0581aeSJohannes Doerfert U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); 18365b0581aeSJohannes Doerfert 18375b0581aeSJohannes Doerfert ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 18385b0581aeSJohannes Doerfert 18395b0581aeSJohannes Doerfert Changed = true; 18405b0581aeSJohannes Doerfert } 18415b0581aeSJohannes Doerfert 18425b0581aeSJohannes Doerfert return Changed; 18435b0581aeSJohannes Doerfert } 18445b0581aeSJohannes Doerfert 1845b8235d2bSsstefan1 /// Abstract Attribute for tracking ICV values. 1846b8235d2bSsstefan1 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 1847b8235d2bSsstefan1 using Base = StateWrapper<BooleanState, AbstractAttribute>; 1848b8235d2bSsstefan1 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 1849b8235d2bSsstefan1 18505dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 18515dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 18525dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 18535dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 18545dfd7cc4Ssstefan1 } 18555dfd7cc4Ssstefan1 1856b8235d2bSsstefan1 /// Returns true if value is assumed to be tracked. 1857b8235d2bSsstefan1 bool isAssumedTracked() const { return getAssumed(); } 1858b8235d2bSsstefan1 1859b8235d2bSsstefan1 /// Returns true if value is known to be tracked. 1860b8235d2bSsstefan1 bool isKnownTracked() const { return getAssumed(); } 1861b8235d2bSsstefan1 1862b8235d2bSsstefan1 /// Create an abstract attribute biew for the position \p IRP. 1863b8235d2bSsstefan1 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 1864b8235d2bSsstefan1 1865b8235d2bSsstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 18665dfd7cc4Ssstefan1 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, 18675dfd7cc4Ssstefan1 const Instruction *I, 18685dfd7cc4Ssstefan1 Attributor &A) const { 18695dfd7cc4Ssstefan1 return None; 18705dfd7cc4Ssstefan1 } 18715dfd7cc4Ssstefan1 18725dfd7cc4Ssstefan1 /// Return an assumed unique ICV value if a single candidate is found. If 18735dfd7cc4Ssstefan1 /// there cannot be one, return a nullptr. If it is not clear yet, return the 18745dfd7cc4Ssstefan1 /// Optional::NoneType. 18755dfd7cc4Ssstefan1 virtual Optional<Value *> 18765dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const = 0; 18775dfd7cc4Ssstefan1 18785dfd7cc4Ssstefan1 // Currently only nthreads is being tracked. 18795dfd7cc4Ssstefan1 // this array will only grow with time. 18805dfd7cc4Ssstefan1 InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 1881b8235d2bSsstefan1 1882b8235d2bSsstefan1 /// See AbstractAttribute::getName() 1883b8235d2bSsstefan1 const std::string getName() const override { return "AAICVTracker"; } 1884b8235d2bSsstefan1 1885233af895SLuofan Chen /// See AbstractAttribute::getIdAddr() 1886233af895SLuofan Chen const char *getIdAddr() const override { return &ID; } 1887233af895SLuofan Chen 1888233af895SLuofan Chen /// This function should return true if the type of the \p AA is AAICVTracker 1889233af895SLuofan Chen static bool classof(const AbstractAttribute *AA) { 1890233af895SLuofan Chen return (AA->getIdAddr() == &ID); 1891233af895SLuofan Chen } 1892233af895SLuofan Chen 1893b8235d2bSsstefan1 static const char ID; 1894b8235d2bSsstefan1 }; 1895b8235d2bSsstefan1 1896b8235d2bSsstefan1 struct AAICVTrackerFunction : public AAICVTracker { 1897b8235d2bSsstefan1 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 1898b8235d2bSsstefan1 : AAICVTracker(IRP, A) {} 1899b8235d2bSsstefan1 1900b8235d2bSsstefan1 // FIXME: come up with better string. 19015dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerFunction"; } 1902b8235d2bSsstefan1 1903b8235d2bSsstefan1 // FIXME: come up with some stats. 1904b8235d2bSsstefan1 void trackStatistics() const override {} 1905b8235d2bSsstefan1 19065dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 1907b8235d2bSsstefan1 ChangeStatus manifest(Attributor &A) override { 19085dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 1909b8235d2bSsstefan1 } 1910b8235d2bSsstefan1 1911b8235d2bSsstefan1 // Map of ICV to their values at specific program point. 19125dfd7cc4Ssstefan1 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 1913b8235d2bSsstefan1 InternalControlVar::ICV___last> 19145dfd7cc4Ssstefan1 ICVReplacementValuesMap; 1915b8235d2bSsstefan1 1916b8235d2bSsstefan1 ChangeStatus updateImpl(Attributor &A) override { 1917b8235d2bSsstefan1 ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 1918b8235d2bSsstefan1 1919b8235d2bSsstefan1 Function *F = getAnchorScope(); 1920b8235d2bSsstefan1 1921b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1922b8235d2bSsstefan1 1923b8235d2bSsstefan1 for (InternalControlVar ICV : TrackableICVs) { 1924b8235d2bSsstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1925b8235d2bSsstefan1 19265dfd7cc4Ssstefan1 auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1927b8235d2bSsstefan1 auto TrackValues = [&](Use &U, Function &) { 1928b8235d2bSsstefan1 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 1929b8235d2bSsstefan1 if (!CI) 1930b8235d2bSsstefan1 return false; 1931b8235d2bSsstefan1 1932b8235d2bSsstefan1 // FIXME: handle setters with more that 1 arguments. 1933b8235d2bSsstefan1 /// Track new value. 19345dfd7cc4Ssstefan1 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 1935b8235d2bSsstefan1 HasChanged = ChangeStatus::CHANGED; 1936b8235d2bSsstefan1 1937b8235d2bSsstefan1 return false; 1938b8235d2bSsstefan1 }; 1939b8235d2bSsstefan1 19405dfd7cc4Ssstefan1 auto CallCheck = [&](Instruction &I) { 19415dfd7cc4Ssstefan1 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); 19425dfd7cc4Ssstefan1 if (ReplVal.hasValue() && 19435dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 19445dfd7cc4Ssstefan1 HasChanged = ChangeStatus::CHANGED; 19455dfd7cc4Ssstefan1 19465dfd7cc4Ssstefan1 return true; 19475dfd7cc4Ssstefan1 }; 19485dfd7cc4Ssstefan1 19495dfd7cc4Ssstefan1 // Track all changes of an ICV. 1950b8235d2bSsstefan1 SetterRFI.foreachUse(TrackValues, F); 19515dfd7cc4Ssstefan1 19525dfd7cc4Ssstefan1 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 19535dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true); 19545dfd7cc4Ssstefan1 19555dfd7cc4Ssstefan1 /// TODO: Figure out a way to avoid adding entry in 19565dfd7cc4Ssstefan1 /// ICVReplacementValuesMap 19575dfd7cc4Ssstefan1 Instruction *Entry = &F->getEntryBlock().front(); 19585dfd7cc4Ssstefan1 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 19595dfd7cc4Ssstefan1 ValuesMap.insert(std::make_pair(Entry, nullptr)); 1960b8235d2bSsstefan1 } 1961b8235d2bSsstefan1 1962b8235d2bSsstefan1 return HasChanged; 1963b8235d2bSsstefan1 } 1964b8235d2bSsstefan1 19655dfd7cc4Ssstefan1 /// Hepler to check if \p I is a call and get the value for it if it is 19665dfd7cc4Ssstefan1 /// unique. 19675dfd7cc4Ssstefan1 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, 19685dfd7cc4Ssstefan1 InternalControlVar &ICV) const { 1969b8235d2bSsstefan1 19705dfd7cc4Ssstefan1 const auto *CB = dyn_cast<CallBase>(I); 1971dcaec812SJohannes Doerfert if (!CB || CB->hasFnAttr("no_openmp") || 1972dcaec812SJohannes Doerfert CB->hasFnAttr("no_openmp_routines")) 19735dfd7cc4Ssstefan1 return None; 19745dfd7cc4Ssstefan1 1975b8235d2bSsstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1976b8235d2bSsstefan1 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 19775dfd7cc4Ssstefan1 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 19785dfd7cc4Ssstefan1 Function *CalledFunction = CB->getCalledFunction(); 1979b8235d2bSsstefan1 19804eef14f9SWei Wang // Indirect call, assume ICV changes. 19814eef14f9SWei Wang if (CalledFunction == nullptr) 19824eef14f9SWei Wang return nullptr; 19835dfd7cc4Ssstefan1 if (CalledFunction == GetterRFI.Declaration) 19845dfd7cc4Ssstefan1 return None; 19855dfd7cc4Ssstefan1 if (CalledFunction == SetterRFI.Declaration) { 19865dfd7cc4Ssstefan1 if (ICVReplacementValuesMap[ICV].count(I)) 19875dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV].lookup(I); 19885dfd7cc4Ssstefan1 19895dfd7cc4Ssstefan1 return nullptr; 19905dfd7cc4Ssstefan1 } 19915dfd7cc4Ssstefan1 19925dfd7cc4Ssstefan1 // Since we don't know, assume it changes the ICV. 19935dfd7cc4Ssstefan1 if (CalledFunction->isDeclaration()) 19945dfd7cc4Ssstefan1 return nullptr; 19955dfd7cc4Ssstefan1 19965b70c12fSJohannes Doerfert const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 19975b70c12fSJohannes Doerfert *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); 19985dfd7cc4Ssstefan1 19995dfd7cc4Ssstefan1 if (ICVTrackingAA.isAssumedTracked()) 20005dfd7cc4Ssstefan1 return ICVTrackingAA.getUniqueReplacementValue(ICV); 20015dfd7cc4Ssstefan1 20025dfd7cc4Ssstefan1 // If we don't know, assume it changes. 20035dfd7cc4Ssstefan1 return nullptr; 20045dfd7cc4Ssstefan1 } 20055dfd7cc4Ssstefan1 20065dfd7cc4Ssstefan1 // We don't check unique value for a function, so return None. 20075dfd7cc4Ssstefan1 Optional<Value *> 20085dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 20095dfd7cc4Ssstefan1 return None; 20105dfd7cc4Ssstefan1 } 20115dfd7cc4Ssstefan1 20125dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 20135dfd7cc4Ssstefan1 Optional<Value *> getReplacementValue(InternalControlVar ICV, 20145dfd7cc4Ssstefan1 const Instruction *I, 20155dfd7cc4Ssstefan1 Attributor &A) const override { 20165dfd7cc4Ssstefan1 const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 20175dfd7cc4Ssstefan1 if (ValuesMap.count(I)) 20185dfd7cc4Ssstefan1 return ValuesMap.lookup(I); 20195dfd7cc4Ssstefan1 20205dfd7cc4Ssstefan1 SmallVector<const Instruction *, 16> Worklist; 20215dfd7cc4Ssstefan1 SmallPtrSet<const Instruction *, 16> Visited; 20225dfd7cc4Ssstefan1 Worklist.push_back(I); 20235dfd7cc4Ssstefan1 20245dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 20255dfd7cc4Ssstefan1 20265dfd7cc4Ssstefan1 while (!Worklist.empty()) { 20275dfd7cc4Ssstefan1 const Instruction *CurrInst = Worklist.pop_back_val(); 20285dfd7cc4Ssstefan1 if (!Visited.insert(CurrInst).second) 2029b8235d2bSsstefan1 continue; 2030b8235d2bSsstefan1 20315dfd7cc4Ssstefan1 const BasicBlock *CurrBB = CurrInst->getParent(); 20325dfd7cc4Ssstefan1 20335dfd7cc4Ssstefan1 // Go up and look for all potential setters/calls that might change the 20345dfd7cc4Ssstefan1 // ICV. 20355dfd7cc4Ssstefan1 while ((CurrInst = CurrInst->getPrevNode())) { 20365dfd7cc4Ssstefan1 if (ValuesMap.count(CurrInst)) { 20375dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 20385dfd7cc4Ssstefan1 // Unknown value, track new. 20395dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 20405dfd7cc4Ssstefan1 ReplVal = NewReplVal; 20415dfd7cc4Ssstefan1 break; 20425dfd7cc4Ssstefan1 } 20435dfd7cc4Ssstefan1 20445dfd7cc4Ssstefan1 // If we found a new value, we can't know the icv value anymore. 20455dfd7cc4Ssstefan1 if (NewReplVal.hasValue()) 20465dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 2047b8235d2bSsstefan1 return nullptr; 2048b8235d2bSsstefan1 20495dfd7cc4Ssstefan1 break; 2050b8235d2bSsstefan1 } 2051b8235d2bSsstefan1 20525dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); 20535dfd7cc4Ssstefan1 if (!NewReplVal.hasValue()) 20545dfd7cc4Ssstefan1 continue; 20555dfd7cc4Ssstefan1 20565dfd7cc4Ssstefan1 // Unknown value, track new. 20575dfd7cc4Ssstefan1 if (!ReplVal.hasValue()) { 20585dfd7cc4Ssstefan1 ReplVal = NewReplVal; 20595dfd7cc4Ssstefan1 break; 2060b8235d2bSsstefan1 } 2061b8235d2bSsstefan1 20625dfd7cc4Ssstefan1 // if (NewReplVal.hasValue()) 20635dfd7cc4Ssstefan1 // We found a new value, we can't know the icv value anymore. 20645dfd7cc4Ssstefan1 if (ReplVal != NewReplVal) 2065b8235d2bSsstefan1 return nullptr; 2066b8235d2bSsstefan1 } 20675dfd7cc4Ssstefan1 20685dfd7cc4Ssstefan1 // If we are in the same BB and we have a value, we are done. 20695dfd7cc4Ssstefan1 if (CurrBB == I->getParent() && ReplVal.hasValue()) 20705dfd7cc4Ssstefan1 return ReplVal; 20715dfd7cc4Ssstefan1 20725dfd7cc4Ssstefan1 // Go through all predecessors and add terminators for analysis. 20735dfd7cc4Ssstefan1 for (const BasicBlock *Pred : predecessors(CurrBB)) 20745dfd7cc4Ssstefan1 if (const Instruction *Terminator = Pred->getTerminator()) 20755dfd7cc4Ssstefan1 Worklist.push_back(Terminator); 20765dfd7cc4Ssstefan1 } 20775dfd7cc4Ssstefan1 20785dfd7cc4Ssstefan1 return ReplVal; 20795dfd7cc4Ssstefan1 } 20805dfd7cc4Ssstefan1 }; 20815dfd7cc4Ssstefan1 20825dfd7cc4Ssstefan1 struct AAICVTrackerFunctionReturned : AAICVTracker { 20835dfd7cc4Ssstefan1 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 20845dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 20855dfd7cc4Ssstefan1 20865dfd7cc4Ssstefan1 // FIXME: come up with better string. 20875dfd7cc4Ssstefan1 const std::string getAsStr() const override { 20885dfd7cc4Ssstefan1 return "ICVTrackerFunctionReturned"; 20895dfd7cc4Ssstefan1 } 20905dfd7cc4Ssstefan1 20915dfd7cc4Ssstefan1 // FIXME: come up with some stats. 20925dfd7cc4Ssstefan1 void trackStatistics() const override {} 20935dfd7cc4Ssstefan1 20945dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 20955dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 20965dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 20975dfd7cc4Ssstefan1 } 20985dfd7cc4Ssstefan1 20995dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 21005dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 21015dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 21025dfd7cc4Ssstefan1 ICVReplacementValuesMap; 21035dfd7cc4Ssstefan1 21045dfd7cc4Ssstefan1 /// Return the value with which \p I can be replaced for specific \p ICV. 21055dfd7cc4Ssstefan1 Optional<Value *> 21065dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 21075dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 21085dfd7cc4Ssstefan1 } 21095dfd7cc4Ssstefan1 21105dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 21115dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 21125dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 21135b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 21145dfd7cc4Ssstefan1 21155dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 21165dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 21175dfd7cc4Ssstefan1 21185dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 21195dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 21205dfd7cc4Ssstefan1 Optional<Value *> UniqueICVValue; 21215dfd7cc4Ssstefan1 21225dfd7cc4Ssstefan1 auto CheckReturnInst = [&](Instruction &I) { 21235dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 21245dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(ICV, &I, A); 21255dfd7cc4Ssstefan1 21265dfd7cc4Ssstefan1 // If we found a second ICV value there is no unique returned value. 21275dfd7cc4Ssstefan1 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) 21285dfd7cc4Ssstefan1 return false; 21295dfd7cc4Ssstefan1 21305dfd7cc4Ssstefan1 UniqueICVValue = NewReplVal; 21315dfd7cc4Ssstefan1 21325dfd7cc4Ssstefan1 return true; 21335dfd7cc4Ssstefan1 }; 21345dfd7cc4Ssstefan1 21355dfd7cc4Ssstefan1 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 21365dfd7cc4Ssstefan1 /* CheckBBLivenessOnly */ true)) 21375dfd7cc4Ssstefan1 UniqueICVValue = nullptr; 21385dfd7cc4Ssstefan1 21395dfd7cc4Ssstefan1 if (UniqueICVValue == ReplVal) 21405dfd7cc4Ssstefan1 continue; 21415dfd7cc4Ssstefan1 21425dfd7cc4Ssstefan1 ReplVal = UniqueICVValue; 21435dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 21445dfd7cc4Ssstefan1 } 21455dfd7cc4Ssstefan1 21465dfd7cc4Ssstefan1 return Changed; 21475dfd7cc4Ssstefan1 } 21485dfd7cc4Ssstefan1 }; 21495dfd7cc4Ssstefan1 21505dfd7cc4Ssstefan1 struct AAICVTrackerCallSite : AAICVTracker { 21515dfd7cc4Ssstefan1 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 21525dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 21535dfd7cc4Ssstefan1 21545dfd7cc4Ssstefan1 void initialize(Attributor &A) override { 21555dfd7cc4Ssstefan1 Function *F = getAnchorScope(); 21565dfd7cc4Ssstefan1 if (!F || !A.isFunctionIPOAmendable(*F)) 21575dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 21585dfd7cc4Ssstefan1 21595dfd7cc4Ssstefan1 // We only initialize this AA for getters, so we need to know which ICV it 21605dfd7cc4Ssstefan1 // gets. 21615dfd7cc4Ssstefan1 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 21625dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 21635dfd7cc4Ssstefan1 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 21645dfd7cc4Ssstefan1 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 21655dfd7cc4Ssstefan1 if (Getter.Declaration == getAssociatedFunction()) { 21665dfd7cc4Ssstefan1 AssociatedICV = ICVInfo.Kind; 21675dfd7cc4Ssstefan1 return; 21685dfd7cc4Ssstefan1 } 21695dfd7cc4Ssstefan1 } 21705dfd7cc4Ssstefan1 21715dfd7cc4Ssstefan1 /// Unknown ICV. 21725dfd7cc4Ssstefan1 indicatePessimisticFixpoint(); 21735dfd7cc4Ssstefan1 } 21745dfd7cc4Ssstefan1 21755dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 21765dfd7cc4Ssstefan1 if (!ReplVal.hasValue() || !ReplVal.getValue()) 21775dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 21785dfd7cc4Ssstefan1 21795dfd7cc4Ssstefan1 A.changeValueAfterManifest(*getCtxI(), **ReplVal); 21805dfd7cc4Ssstefan1 A.deleteAfterManifest(*getCtxI()); 21815dfd7cc4Ssstefan1 21825dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 21835dfd7cc4Ssstefan1 } 21845dfd7cc4Ssstefan1 21855dfd7cc4Ssstefan1 // FIXME: come up with better string. 21865dfd7cc4Ssstefan1 const std::string getAsStr() const override { return "ICVTrackerCallSite"; } 21875dfd7cc4Ssstefan1 21885dfd7cc4Ssstefan1 // FIXME: come up with some stats. 21895dfd7cc4Ssstefan1 void trackStatistics() const override {} 21905dfd7cc4Ssstefan1 21915dfd7cc4Ssstefan1 InternalControlVar AssociatedICV; 21925dfd7cc4Ssstefan1 Optional<Value *> ReplVal; 21935dfd7cc4Ssstefan1 21945dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 21955dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 21965b70c12fSJohannes Doerfert *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 21975dfd7cc4Ssstefan1 21985dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 21995dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 22005dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 22015dfd7cc4Ssstefan1 22025dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 22035dfd7cc4Ssstefan1 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); 22045dfd7cc4Ssstefan1 22055dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 22065dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 22075dfd7cc4Ssstefan1 22085dfd7cc4Ssstefan1 ReplVal = NewReplVal; 22095dfd7cc4Ssstefan1 return ChangeStatus::CHANGED; 22105dfd7cc4Ssstefan1 } 22115dfd7cc4Ssstefan1 22125dfd7cc4Ssstefan1 // Return the value with which associated value can be replaced for specific 22135dfd7cc4Ssstefan1 // \p ICV. 22145dfd7cc4Ssstefan1 Optional<Value *> 22155dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 22165dfd7cc4Ssstefan1 return ReplVal; 22175dfd7cc4Ssstefan1 } 22185dfd7cc4Ssstefan1 }; 22195dfd7cc4Ssstefan1 22205dfd7cc4Ssstefan1 struct AAICVTrackerCallSiteReturned : AAICVTracker { 22215dfd7cc4Ssstefan1 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 22225dfd7cc4Ssstefan1 : AAICVTracker(IRP, A) {} 22235dfd7cc4Ssstefan1 22245dfd7cc4Ssstefan1 // FIXME: come up with better string. 22255dfd7cc4Ssstefan1 const std::string getAsStr() const override { 22265dfd7cc4Ssstefan1 return "ICVTrackerCallSiteReturned"; 22275dfd7cc4Ssstefan1 } 22285dfd7cc4Ssstefan1 22295dfd7cc4Ssstefan1 // FIXME: come up with some stats. 22305dfd7cc4Ssstefan1 void trackStatistics() const override {} 22315dfd7cc4Ssstefan1 22325dfd7cc4Ssstefan1 /// We don't manifest anything for this AA. 22335dfd7cc4Ssstefan1 ChangeStatus manifest(Attributor &A) override { 22345dfd7cc4Ssstefan1 return ChangeStatus::UNCHANGED; 22355dfd7cc4Ssstefan1 } 22365dfd7cc4Ssstefan1 22375dfd7cc4Ssstefan1 // Map of ICV to their values at specific program point. 22385dfd7cc4Ssstefan1 EnumeratedArray<Optional<Value *>, InternalControlVar, 22395dfd7cc4Ssstefan1 InternalControlVar::ICV___last> 22405dfd7cc4Ssstefan1 ICVReplacementValuesMap; 22415dfd7cc4Ssstefan1 22425dfd7cc4Ssstefan1 /// Return the value with which associated value can be replaced for specific 22435dfd7cc4Ssstefan1 /// \p ICV. 22445dfd7cc4Ssstefan1 Optional<Value *> 22455dfd7cc4Ssstefan1 getUniqueReplacementValue(InternalControlVar ICV) const override { 22465dfd7cc4Ssstefan1 return ICVReplacementValuesMap[ICV]; 22475dfd7cc4Ssstefan1 } 22485dfd7cc4Ssstefan1 22495dfd7cc4Ssstefan1 ChangeStatus updateImpl(Attributor &A) override { 22505dfd7cc4Ssstefan1 ChangeStatus Changed = ChangeStatus::UNCHANGED; 22515dfd7cc4Ssstefan1 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 22525b70c12fSJohannes Doerfert *this, IRPosition::returned(*getAssociatedFunction()), 22535b70c12fSJohannes Doerfert DepClassTy::REQUIRED); 22545dfd7cc4Ssstefan1 22555dfd7cc4Ssstefan1 // We don't have any information, so we assume it changes the ICV. 22565dfd7cc4Ssstefan1 if (!ICVTrackingAA.isAssumedTracked()) 22575dfd7cc4Ssstefan1 return indicatePessimisticFixpoint(); 22585dfd7cc4Ssstefan1 22595dfd7cc4Ssstefan1 for (InternalControlVar ICV : TrackableICVs) { 22605dfd7cc4Ssstefan1 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 22615dfd7cc4Ssstefan1 Optional<Value *> NewReplVal = 22625dfd7cc4Ssstefan1 ICVTrackingAA.getUniqueReplacementValue(ICV); 22635dfd7cc4Ssstefan1 22645dfd7cc4Ssstefan1 if (ReplVal == NewReplVal) 22655dfd7cc4Ssstefan1 continue; 22665dfd7cc4Ssstefan1 22675dfd7cc4Ssstefan1 ReplVal = NewReplVal; 22685dfd7cc4Ssstefan1 Changed = ChangeStatus::CHANGED; 22695dfd7cc4Ssstefan1 } 22705dfd7cc4Ssstefan1 return Changed; 22715dfd7cc4Ssstefan1 } 22729548b74aSJohannes Doerfert }; 227318283125SJoseph Huber 227418283125SJoseph Huber struct AAExecutionDomainFunction : public AAExecutionDomain { 227518283125SJoseph Huber AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) 227618283125SJoseph Huber : AAExecutionDomain(IRP, A) {} 227718283125SJoseph Huber 227818283125SJoseph Huber const std::string getAsStr() const override { 227918283125SJoseph Huber return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + 228018283125SJoseph Huber "/" + std::to_string(NumBBs) + " BBs thread 0 only."; 228118283125SJoseph Huber } 228218283125SJoseph Huber 228318283125SJoseph Huber /// See AbstractAttribute::trackStatistics(). 228418283125SJoseph Huber void trackStatistics() const override {} 228518283125SJoseph Huber 228618283125SJoseph Huber void initialize(Attributor &A) override { 228718283125SJoseph Huber Function *F = getAnchorScope(); 228818283125SJoseph Huber for (const auto &BB : *F) 228918283125SJoseph Huber SingleThreadedBBs.insert(&BB); 229018283125SJoseph Huber NumBBs = SingleThreadedBBs.size(); 229118283125SJoseph Huber } 229218283125SJoseph Huber 229318283125SJoseph Huber ChangeStatus manifest(Attributor &A) override { 229418283125SJoseph Huber LLVM_DEBUG({ 229518283125SJoseph Huber for (const BasicBlock *BB : SingleThreadedBBs) 229618283125SJoseph Huber dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " 229718283125SJoseph Huber << BB->getName() << " is executed by a single thread.\n"; 229818283125SJoseph Huber }); 229918283125SJoseph Huber return ChangeStatus::UNCHANGED; 230018283125SJoseph Huber } 230118283125SJoseph Huber 230218283125SJoseph Huber ChangeStatus updateImpl(Attributor &A) override; 230318283125SJoseph Huber 230418283125SJoseph Huber /// Check if an instruction is executed by a single thread. 23059a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const Instruction &I) const override { 23069a23e673SJohannes Doerfert return isExecutedByInitialThreadOnly(*I.getParent()); 230718283125SJoseph Huber } 230818283125SJoseph Huber 23099a23e673SJohannes Doerfert bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { 23101cfdcae6SJoseph Huber return isValidState() && SingleThreadedBBs.contains(&BB); 231118283125SJoseph Huber } 231218283125SJoseph Huber 231318283125SJoseph Huber /// Set of basic blocks that are executed by a single thread. 231418283125SJoseph Huber DenseSet<const BasicBlock *> SingleThreadedBBs; 231518283125SJoseph Huber 231618283125SJoseph Huber /// Total number of basic blocks in this function. 231718283125SJoseph Huber long unsigned NumBBs; 231818283125SJoseph Huber }; 231918283125SJoseph Huber 232018283125SJoseph Huber ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { 232118283125SJoseph Huber Function *F = getAnchorScope(); 232218283125SJoseph Huber ReversePostOrderTraversal<Function *> RPOT(F); 232318283125SJoseph Huber auto NumSingleThreadedBBs = SingleThreadedBBs.size(); 232418283125SJoseph Huber 232518283125SJoseph Huber bool AllCallSitesKnown; 232618283125SJoseph Huber auto PredForCallSite = [&](AbstractCallSite ACS) { 232718283125SJoseph Huber const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( 232818283125SJoseph Huber *this, IRPosition::function(*ACS.getInstruction()->getFunction()), 232918283125SJoseph Huber DepClassTy::REQUIRED); 23301cfdcae6SJoseph Huber return ACS.isDirectCall() && 23311cfdcae6SJoseph Huber ExecutionDomainAA.isExecutedByInitialThreadOnly( 23329a23e673SJohannes Doerfert *ACS.getInstruction()); 233318283125SJoseph Huber }; 233418283125SJoseph Huber 233518283125SJoseph Huber if (!A.checkForAllCallSites(PredForCallSite, *this, 233618283125SJoseph Huber /* RequiresAllCallSites */ true, 233718283125SJoseph Huber AllCallSitesKnown)) 233818283125SJoseph Huber SingleThreadedBBs.erase(&F->getEntryBlock()); 233918283125SJoseph Huber 2340*e2cfbfccSJohannes Doerfert auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 2341*e2cfbfccSJohannes Doerfert auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 2342*e2cfbfccSJohannes Doerfert 2343*e2cfbfccSJohannes Doerfert // Check if the edge into the successor block compares the __kmpc_target_init 2344*e2cfbfccSJohannes Doerfert // result with -1. If we are in non-SPMD-mode that signals only the main 2345*e2cfbfccSJohannes Doerfert // thread will execute the edge. 23466fc51c9fSJoseph Huber auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { 234718283125SJoseph Huber if (!Edge || !Edge->isConditional()) 234818283125SJoseph Huber return false; 234918283125SJoseph Huber if (Edge->getSuccessor(0) != SuccessorBB) 235018283125SJoseph Huber return false; 235118283125SJoseph Huber 235218283125SJoseph Huber auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); 235318283125SJoseph Huber if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) 235418283125SJoseph Huber return false; 235518283125SJoseph Huber 235618283125SJoseph Huber ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 2357*e2cfbfccSJohannes Doerfert if (!C) 235818283125SJoseph Huber return false; 235918283125SJoseph Huber 2360*e2cfbfccSJohannes Doerfert // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) 2361*e2cfbfccSJohannes Doerfert if (C->isAllOnesValue()) { 2362*e2cfbfccSJohannes Doerfert auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); 2363*e2cfbfccSJohannes Doerfert if (!CB || CB->getCalledFunction() != RFI.Declaration) 2364*e2cfbfccSJohannes Doerfert return false; 2365*e2cfbfccSJohannes Doerfert const int InitIsSPMDArgNo = 1; 2366*e2cfbfccSJohannes Doerfert auto *IsSPMDModeCI = 2367*e2cfbfccSJohannes Doerfert dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo)); 2368*e2cfbfccSJohannes Doerfert return IsSPMDModeCI && IsSPMDModeCI->isZero(); 2369*e2cfbfccSJohannes Doerfert } 237018283125SJoseph Huber 237118283125SJoseph Huber return false; 237218283125SJoseph Huber }; 237318283125SJoseph Huber 237418283125SJoseph Huber // Merge all the predecessor states into the current basic block. A basic 237518283125SJoseph Huber // block is executed by a single thread if all of its predecessors are. 237618283125SJoseph Huber auto MergePredecessorStates = [&](BasicBlock *BB) { 237718283125SJoseph Huber if (pred_begin(BB) == pred_end(BB)) 237818283125SJoseph Huber return SingleThreadedBBs.contains(BB); 237918283125SJoseph Huber 23806fc51c9fSJoseph Huber bool IsInitialThread = true; 238118283125SJoseph Huber for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB); 238218283125SJoseph Huber PredBB != PredEndBB; ++PredBB) { 23836fc51c9fSJoseph Huber if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()), 238418283125SJoseph Huber BB)) 23856fc51c9fSJoseph Huber IsInitialThread &= SingleThreadedBBs.contains(*PredBB); 238618283125SJoseph Huber } 238718283125SJoseph Huber 23886fc51c9fSJoseph Huber return IsInitialThread; 238918283125SJoseph Huber }; 239018283125SJoseph Huber 239118283125SJoseph Huber for (auto *BB : RPOT) { 239218283125SJoseph Huber if (!MergePredecessorStates(BB)) 239318283125SJoseph Huber SingleThreadedBBs.erase(BB); 239418283125SJoseph Huber } 239518283125SJoseph Huber 239618283125SJoseph Huber return (NumSingleThreadedBBs == SingleThreadedBBs.size()) 239718283125SJoseph Huber ? ChangeStatus::UNCHANGED 239818283125SJoseph Huber : ChangeStatus::CHANGED; 239918283125SJoseph Huber } 240018283125SJoseph Huber 24016fc51c9fSJoseph Huber /// Try to replace memory allocation calls called by a single thread with a 24026fc51c9fSJoseph Huber /// static buffer of shared memory. 24036fc51c9fSJoseph Huber struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { 24046fc51c9fSJoseph Huber using Base = StateWrapper<BooleanState, AbstractAttribute>; 24056fc51c9fSJoseph Huber AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 24066fc51c9fSJoseph Huber 24076fc51c9fSJoseph Huber /// Create an abstract attribute view for the position \p IRP. 24086fc51c9fSJoseph Huber static AAHeapToShared &createForPosition(const IRPosition &IRP, 24096fc51c9fSJoseph Huber Attributor &A); 24106fc51c9fSJoseph Huber 24116fc51c9fSJoseph Huber /// See AbstractAttribute::getName(). 24126fc51c9fSJoseph Huber const std::string getName() const override { return "AAHeapToShared"; } 24136fc51c9fSJoseph Huber 24146fc51c9fSJoseph Huber /// See AbstractAttribute::getIdAddr(). 24156fc51c9fSJoseph Huber const char *getIdAddr() const override { return &ID; } 24166fc51c9fSJoseph Huber 24176fc51c9fSJoseph Huber /// This function should return true if the type of the \p AA is 24186fc51c9fSJoseph Huber /// AAHeapToShared. 24196fc51c9fSJoseph Huber static bool classof(const AbstractAttribute *AA) { 24206fc51c9fSJoseph Huber return (AA->getIdAddr() == &ID); 24216fc51c9fSJoseph Huber } 24226fc51c9fSJoseph Huber 24236fc51c9fSJoseph Huber /// Unique ID (due to the unique address) 24246fc51c9fSJoseph Huber static const char ID; 24256fc51c9fSJoseph Huber }; 24266fc51c9fSJoseph Huber 24276fc51c9fSJoseph Huber struct AAHeapToSharedFunction : public AAHeapToShared { 24286fc51c9fSJoseph Huber AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) 24296fc51c9fSJoseph Huber : AAHeapToShared(IRP, A) {} 24306fc51c9fSJoseph Huber 24316fc51c9fSJoseph Huber const std::string getAsStr() const override { 24326fc51c9fSJoseph Huber return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + 24336fc51c9fSJoseph Huber " malloc calls eligible."; 24346fc51c9fSJoseph Huber } 24356fc51c9fSJoseph Huber 24366fc51c9fSJoseph Huber /// See AbstractAttribute::trackStatistics(). 24376fc51c9fSJoseph Huber void trackStatistics() const override {} 24386fc51c9fSJoseph Huber 24396fc51c9fSJoseph Huber void initialize(Attributor &A) override { 24406fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 24416fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 24426fc51c9fSJoseph Huber 24436fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users()) 24446fc51c9fSJoseph Huber if (CallBase *CB = dyn_cast<CallBase>(U)) 24456fc51c9fSJoseph Huber MallocCalls.insert(CB); 24466fc51c9fSJoseph Huber } 24476fc51c9fSJoseph Huber 24486fc51c9fSJoseph Huber ChangeStatus manifest(Attributor &A) override { 24496fc51c9fSJoseph Huber if (MallocCalls.empty()) 24506fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED; 24516fc51c9fSJoseph Huber 24526fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 24536fc51c9fSJoseph Huber auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 24546fc51c9fSJoseph Huber 24556fc51c9fSJoseph Huber Function *F = getAnchorScope(); 24566fc51c9fSJoseph Huber auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, 24576fc51c9fSJoseph Huber DepClassTy::OPTIONAL); 24586fc51c9fSJoseph Huber 24596fc51c9fSJoseph Huber ChangeStatus Changed = ChangeStatus::UNCHANGED; 24606fc51c9fSJoseph Huber for (CallBase *CB : MallocCalls) { 24616fc51c9fSJoseph Huber // Skip replacing this if HeapToStack has already claimed it. 2462c1c1fe93SJohannes Doerfert if (HS && HS->isAssumedHeapToStack(*CB)) 24636fc51c9fSJoseph Huber continue; 24646fc51c9fSJoseph Huber 24656fc51c9fSJoseph Huber // Find the unique free call to remove it. 24666fc51c9fSJoseph Huber SmallVector<CallBase *, 4> FreeCalls; 24676fc51c9fSJoseph Huber for (auto *U : CB->users()) { 24686fc51c9fSJoseph Huber CallBase *C = dyn_cast<CallBase>(U); 24696fc51c9fSJoseph Huber if (C && C->getCalledFunction() == FreeCall.Declaration) 24706fc51c9fSJoseph Huber FreeCalls.push_back(C); 24716fc51c9fSJoseph Huber } 24726fc51c9fSJoseph Huber if (FreeCalls.size() != 1) 24736fc51c9fSJoseph Huber continue; 24746fc51c9fSJoseph Huber 24756fc51c9fSJoseph Huber ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0)); 24766fc51c9fSJoseph Huber 24776fc51c9fSJoseph Huber LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in " 24786fc51c9fSJoseph Huber << CB->getCaller()->getName() << " with " 24796fc51c9fSJoseph Huber << AllocSize->getZExtValue() 24806fc51c9fSJoseph Huber << " bytes of shared memory\n"); 24816fc51c9fSJoseph Huber 24826fc51c9fSJoseph Huber // Create a new shared memory buffer of the same size as the allocation 24836fc51c9fSJoseph Huber // and replace all the uses of the original allocation with it. 24846fc51c9fSJoseph Huber Module *M = CB->getModule(); 24856fc51c9fSJoseph Huber Type *Int8Ty = Type::getInt8Ty(M->getContext()); 24866fc51c9fSJoseph Huber Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); 24876fc51c9fSJoseph Huber auto *SharedMem = new GlobalVariable( 24886fc51c9fSJoseph Huber *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, 24896fc51c9fSJoseph Huber UndefValue::get(Int8ArrTy), CB->getName(), nullptr, 24906fc51c9fSJoseph Huber GlobalValue::NotThreadLocal, 24916fc51c9fSJoseph Huber static_cast<unsigned>(AddressSpace::Shared)); 24926fc51c9fSJoseph Huber auto *NewBuffer = 24936fc51c9fSJoseph Huber ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); 24946fc51c9fSJoseph Huber 249530e36c9bSJoseph Huber auto Remark = [&](OptimizationRemark OR) { 249630e36c9bSJoseph Huber return OR << "Replaced globalized variable with " 249730e36c9bSJoseph Huber << ore::NV("SharedMemory", AllocSize->getZExtValue()) 249830e36c9bSJoseph Huber << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") 249930e36c9bSJoseph Huber << "of shared memory"; 250030e36c9bSJoseph Huber }; 250130e36c9bSJoseph Huber A.emitRemark<OptimizationRemark>(CB, "OpenMPReplaceGlobalization", 250230e36c9bSJoseph Huber Remark); 250330e36c9bSJoseph Huber 25046fc51c9fSJoseph Huber SharedMem->setAlignment(MaybeAlign(32)); 25056fc51c9fSJoseph Huber 25066fc51c9fSJoseph Huber A.changeValueAfterManifest(*CB, *NewBuffer); 25076fc51c9fSJoseph Huber A.deleteAfterManifest(*CB); 25086fc51c9fSJoseph Huber A.deleteAfterManifest(*FreeCalls.front()); 25096fc51c9fSJoseph Huber 25106fc51c9fSJoseph Huber NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); 25116fc51c9fSJoseph Huber Changed = ChangeStatus::CHANGED; 25126fc51c9fSJoseph Huber } 25136fc51c9fSJoseph Huber 25146fc51c9fSJoseph Huber return Changed; 25156fc51c9fSJoseph Huber } 25166fc51c9fSJoseph Huber 25176fc51c9fSJoseph Huber ChangeStatus updateImpl(Attributor &A) override { 25186fc51c9fSJoseph Huber auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 25196fc51c9fSJoseph Huber auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 25206fc51c9fSJoseph Huber Function *F = getAnchorScope(); 25216fc51c9fSJoseph Huber 25226fc51c9fSJoseph Huber auto NumMallocCalls = MallocCalls.size(); 25236fc51c9fSJoseph Huber 25246fc51c9fSJoseph Huber // Only consider malloc calls executed by a single thread with a constant. 25256fc51c9fSJoseph Huber for (User *U : RFI.Declaration->users()) { 25266fc51c9fSJoseph Huber const auto &ED = A.getAAFor<AAExecutionDomain>( 25276fc51c9fSJoseph Huber *this, IRPosition::function(*F), DepClassTy::REQUIRED); 25286fc51c9fSJoseph Huber if (CallBase *CB = dyn_cast<CallBase>(U)) 25296fc51c9fSJoseph Huber if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) || 25306fc51c9fSJoseph Huber !ED.isExecutedByInitialThreadOnly(*CB)) 25316fc51c9fSJoseph Huber MallocCalls.erase(CB); 25326fc51c9fSJoseph Huber } 25336fc51c9fSJoseph Huber 25346fc51c9fSJoseph Huber if (NumMallocCalls != MallocCalls.size()) 25356fc51c9fSJoseph Huber return ChangeStatus::CHANGED; 25366fc51c9fSJoseph Huber 25376fc51c9fSJoseph Huber return ChangeStatus::UNCHANGED; 25386fc51c9fSJoseph Huber } 25396fc51c9fSJoseph Huber 25406fc51c9fSJoseph Huber /// Collection of all malloc calls in a function. 25416fc51c9fSJoseph Huber SmallPtrSet<CallBase *, 4> MallocCalls; 25426fc51c9fSJoseph Huber }; 25436fc51c9fSJoseph Huber 25449548b74aSJohannes Doerfert } // namespace 25459548b74aSJohannes Doerfert 2546b8235d2bSsstefan1 const char AAICVTracker::ID = 0; 254718283125SJoseph Huber const char AAExecutionDomain::ID = 0; 25486fc51c9fSJoseph Huber const char AAHeapToShared::ID = 0; 2549b8235d2bSsstefan1 2550b8235d2bSsstefan1 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 2551b8235d2bSsstefan1 Attributor &A) { 2552b8235d2bSsstefan1 AAICVTracker *AA = nullptr; 2553b8235d2bSsstefan1 switch (IRP.getPositionKind()) { 2554b8235d2bSsstefan1 case IRPosition::IRP_INVALID: 2555b8235d2bSsstefan1 case IRPosition::IRP_FLOAT: 2556b8235d2bSsstefan1 case IRPosition::IRP_ARGUMENT: 2557b8235d2bSsstefan1 case IRPosition::IRP_CALL_SITE_ARGUMENT: 25581de70a72SJohannes Doerfert llvm_unreachable("ICVTracker can only be created for function position!"); 25595dfd7cc4Ssstefan1 case IRPosition::IRP_RETURNED: 25605dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 25615dfd7cc4Ssstefan1 break; 25625dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE_RETURNED: 25635dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 25645dfd7cc4Ssstefan1 break; 25655dfd7cc4Ssstefan1 case IRPosition::IRP_CALL_SITE: 25665dfd7cc4Ssstefan1 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 25675dfd7cc4Ssstefan1 break; 2568b8235d2bSsstefan1 case IRPosition::IRP_FUNCTION: 2569b8235d2bSsstefan1 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 2570b8235d2bSsstefan1 break; 2571b8235d2bSsstefan1 } 2572b8235d2bSsstefan1 2573b8235d2bSsstefan1 return *AA; 2574b8235d2bSsstefan1 } 2575b8235d2bSsstefan1 257618283125SJoseph Huber AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, 257718283125SJoseph Huber Attributor &A) { 257818283125SJoseph Huber AAExecutionDomainFunction *AA = nullptr; 257918283125SJoseph Huber switch (IRP.getPositionKind()) { 258018283125SJoseph Huber case IRPosition::IRP_INVALID: 258118283125SJoseph Huber case IRPosition::IRP_FLOAT: 258218283125SJoseph Huber case IRPosition::IRP_ARGUMENT: 258318283125SJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT: 258418283125SJoseph Huber case IRPosition::IRP_RETURNED: 258518283125SJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED: 258618283125SJoseph Huber case IRPosition::IRP_CALL_SITE: 258718283125SJoseph Huber llvm_unreachable( 258818283125SJoseph Huber "AAExecutionDomain can only be created for function position!"); 258918283125SJoseph Huber case IRPosition::IRP_FUNCTION: 259018283125SJoseph Huber AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); 259118283125SJoseph Huber break; 259218283125SJoseph Huber } 259318283125SJoseph Huber 259418283125SJoseph Huber return *AA; 259518283125SJoseph Huber } 259618283125SJoseph Huber 25976fc51c9fSJoseph Huber AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, 25986fc51c9fSJoseph Huber Attributor &A) { 25996fc51c9fSJoseph Huber AAHeapToSharedFunction *AA = nullptr; 26006fc51c9fSJoseph Huber switch (IRP.getPositionKind()) { 26016fc51c9fSJoseph Huber case IRPosition::IRP_INVALID: 26026fc51c9fSJoseph Huber case IRPosition::IRP_FLOAT: 26036fc51c9fSJoseph Huber case IRPosition::IRP_ARGUMENT: 26046fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_ARGUMENT: 26056fc51c9fSJoseph Huber case IRPosition::IRP_RETURNED: 26066fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE_RETURNED: 26076fc51c9fSJoseph Huber case IRPosition::IRP_CALL_SITE: 26086fc51c9fSJoseph Huber llvm_unreachable( 26096fc51c9fSJoseph Huber "AAHeapToShared can only be created for function position!"); 26106fc51c9fSJoseph Huber case IRPosition::IRP_FUNCTION: 26116fc51c9fSJoseph Huber AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); 26126fc51c9fSJoseph Huber break; 26136fc51c9fSJoseph Huber } 26146fc51c9fSJoseph Huber 26156fc51c9fSJoseph Huber return *AA; 26166fc51c9fSJoseph Huber } 26176fc51c9fSJoseph Huber 2618b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { 26195ccb7424SJoseph Huber if (!containsOpenMP(M)) 2620b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2621b2ad63d3SJoseph Huber if (DisableOpenMPOptimizations) 2622b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2623b2ad63d3SJoseph Huber 26240edb8777SJoseph Huber FunctionAnalysisManager &FAM = 26250edb8777SJoseph Huber AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 26265ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M); 26275ccb7424SJoseph Huber 262857ad2e10SJoseph Huber auto IsCalled = [&](Function &F) { 262957ad2e10SJoseph Huber if (Kernels.contains(&F)) 263057ad2e10SJoseph Huber return true; 263157ad2e10SJoseph Huber for (const User *U : F.users()) 263257ad2e10SJoseph Huber if (!isa<BlockAddress>(U)) 263357ad2e10SJoseph Huber return true; 263457ad2e10SJoseph Huber return false; 263557ad2e10SJoseph Huber }; 263657ad2e10SJoseph Huber 26370edb8777SJoseph Huber auto EmitRemark = [&](Function &F) { 26380edb8777SJoseph Huber auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 26390edb8777SJoseph Huber ORE.emit([&]() { 2640ecabc668SJoseph Huber OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "InternalizationFailure", &F); 2641ecabc668SJoseph Huber return ORA << "Could not internalize function. " 26420edb8777SJoseph Huber << "Some optimizations may not be possible."; 26430edb8777SJoseph Huber }); 26440edb8777SJoseph Huber }; 26450edb8777SJoseph Huber 264657ad2e10SJoseph Huber // Create internal copies of each function if this is a kernel Module. This 264757ad2e10SJoseph Huber // allows iterprocedural passes to see every call edge. 264803d7e61cSJoseph Huber DenseSet<const Function *> InternalizedFuncs; 26495ccb7424SJoseph Huber if (isOpenMPDevice(M)) 265003d7e61cSJoseph Huber for (Function &F : M) 26510edb8777SJoseph Huber if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) { 26520edb8777SJoseph Huber if (Attributor::internalizeFunction(F, /* Force */ true)) { 265303d7e61cSJoseph Huber InternalizedFuncs.insert(&F); 2654ecabc668SJoseph Huber } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { 26550edb8777SJoseph Huber EmitRemark(F); 26560edb8777SJoseph Huber } 26570edb8777SJoseph Huber } 265803d7e61cSJoseph Huber 265957ad2e10SJoseph Huber // Look at every function in the Module unless it was internalized. 2660b2ad63d3SJoseph Huber SmallVector<Function *, 16> SCC; 266103d7e61cSJoseph Huber for (Function &F : M) 266203d7e61cSJoseph Huber if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) 266303d7e61cSJoseph Huber SCC.push_back(&F); 2664b2ad63d3SJoseph Huber 2665b2ad63d3SJoseph Huber if (SCC.empty()) 2666b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2667b2ad63d3SJoseph Huber 2668b2ad63d3SJoseph Huber AnalysisGetter AG(FAM); 2669b2ad63d3SJoseph Huber 2670b2ad63d3SJoseph Huber auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 2671b2ad63d3SJoseph Huber return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 2672b2ad63d3SJoseph Huber }; 2673b2ad63d3SJoseph Huber 2674b2ad63d3SJoseph Huber BumpPtrAllocator Allocator; 2675b2ad63d3SJoseph Huber CallGraphUpdater CGUpdater; 2676b2ad63d3SJoseph Huber 2677b2ad63d3SJoseph Huber SetVector<Function *> Functions(SCC.begin(), SCC.end()); 26785ccb7424SJoseph Huber OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); 2679b2ad63d3SJoseph Huber 268013b2fba2SJoseph Huber unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; 26814a6bd8e3SJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, 268213b2fba2SJoseph Huber MaxFixpointIterations, OREGetter, DEBUG_TYPE); 2683b2ad63d3SJoseph Huber 2684b2ad63d3SJoseph Huber OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2685b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(true); 2686b2ad63d3SJoseph Huber if (Changed) 2687b2ad63d3SJoseph Huber return PreservedAnalyses::none(); 2688b2ad63d3SJoseph Huber 2689b2ad63d3SJoseph Huber return PreservedAnalyses::all(); 2690b2ad63d3SJoseph Huber } 2691b2ad63d3SJoseph Huber 2692b2ad63d3SJoseph Huber PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, 26939548b74aSJohannes Doerfert CGSCCAnalysisManager &AM, 2694b2ad63d3SJoseph Huber LazyCallGraph &CG, 2695b2ad63d3SJoseph Huber CGSCCUpdateResult &UR) { 26965ccb7424SJoseph Huber if (!containsOpenMP(*C.begin()->getFunction().getParent())) 26979548b74aSJohannes Doerfert return PreservedAnalyses::all(); 26989548b74aSJohannes Doerfert if (DisableOpenMPOptimizations) 26999548b74aSJohannes Doerfert return PreservedAnalyses::all(); 27009548b74aSJohannes Doerfert 2701ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 2702351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 2703351d234dSRoman Lebedev for (LazyCallGraph::Node &N : C) { 2704351d234dSRoman Lebedev Function *Fn = &N.getFunction(); 2705351d234dSRoman Lebedev SCC.push_back(Fn); 2706351d234dSRoman Lebedev } 2707351d234dSRoman Lebedev 27085ccb7424SJoseph Huber if (SCC.empty()) 27099548b74aSJohannes Doerfert return PreservedAnalyses::all(); 27109548b74aSJohannes Doerfert 27115ccb7424SJoseph Huber Module &M = *C.begin()->getFunction().getParent(); 27125ccb7424SJoseph Huber 27135ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M); 27145ccb7424SJoseph Huber 27154d4ea9acSHuber, Joseph FunctionAnalysisManager &FAM = 27164d4ea9acSHuber, Joseph AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 27177cfd267cSsstefan1 27187cfd267cSsstefan1 AnalysisGetter AG(FAM); 27197cfd267cSsstefan1 27207cfd267cSsstefan1 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 27214d4ea9acSHuber, Joseph return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 27224d4ea9acSHuber, Joseph }; 27234d4ea9acSHuber, Joseph 2724b2ad63d3SJoseph Huber BumpPtrAllocator Allocator; 27259548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 27269548b74aSJohannes Doerfert CGUpdater.initialize(CG, C, AM, UR); 27277cfd267cSsstefan1 27287cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 27297cfd267cSsstefan1 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 27305ccb7424SJoseph Huber /*CGSCC*/ Functions, Kernels); 27317cfd267cSsstefan1 273213b2fba2SJoseph Huber unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; 27334a6bd8e3SJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, 273413b2fba2SJoseph Huber MaxFixpointIterations, OREGetter, DEBUG_TYPE); 2735b8235d2bSsstefan1 2736b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2737b2ad63d3SJoseph Huber bool Changed = OMPOpt.run(false); 2738694ded37SGiorgis Georgakoudis if (Changed) 2739694ded37SGiorgis Georgakoudis return PreservedAnalyses::none(); 2740694ded37SGiorgis Georgakoudis 27419548b74aSJohannes Doerfert return PreservedAnalyses::all(); 27429548b74aSJohannes Doerfert } 27438b57ed09SJoseph Huber 27449548b74aSJohannes Doerfert namespace { 27459548b74aSJohannes Doerfert 2746b2ad63d3SJoseph Huber struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { 27479548b74aSJohannes Doerfert CallGraphUpdater CGUpdater; 27489548b74aSJohannes Doerfert static char ID; 27499548b74aSJohannes Doerfert 2750b2ad63d3SJoseph Huber OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { 2751b2ad63d3SJoseph Huber initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); 27529548b74aSJohannes Doerfert } 27539548b74aSJohannes Doerfert 27549548b74aSJohannes Doerfert void getAnalysisUsage(AnalysisUsage &AU) const override { 27559548b74aSJohannes Doerfert CallGraphSCCPass::getAnalysisUsage(AU); 27569548b74aSJohannes Doerfert } 27579548b74aSJohannes Doerfert 27589548b74aSJohannes Doerfert bool runOnSCC(CallGraphSCC &CGSCC) override { 27595ccb7424SJoseph Huber if (!containsOpenMP(CGSCC.getCallGraph().getModule())) 27609548b74aSJohannes Doerfert return false; 27619548b74aSJohannes Doerfert if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 27629548b74aSJohannes Doerfert return false; 27639548b74aSJohannes Doerfert 2764ee17263aSJohannes Doerfert SmallVector<Function *, 16> SCC; 2765351d234dSRoman Lebedev // If there are kernels in the module, we have to run on all SCC's. 2766351d234dSRoman Lebedev for (CallGraphNode *CGN : CGSCC) { 2767351d234dSRoman Lebedev Function *Fn = CGN->getFunction(); 2768351d234dSRoman Lebedev if (!Fn || Fn->isDeclaration()) 2769351d234dSRoman Lebedev continue; 2770ee17263aSJohannes Doerfert SCC.push_back(Fn); 2771351d234dSRoman Lebedev } 2772351d234dSRoman Lebedev 27735ccb7424SJoseph Huber if (SCC.empty()) 27749548b74aSJohannes Doerfert return false; 27759548b74aSJohannes Doerfert 27765ccb7424SJoseph Huber Module &M = CGSCC.getCallGraph().getModule(); 27775ccb7424SJoseph Huber KernelSet Kernels = getDeviceKernels(M); 27785ccb7424SJoseph Huber 27799548b74aSJohannes Doerfert CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 27809548b74aSJohannes Doerfert CGUpdater.initialize(CG, CGSCC); 27819548b74aSJohannes Doerfert 27824d4ea9acSHuber, Joseph // Maintain a map of functions to avoid rebuilding the ORE 27834d4ea9acSHuber, Joseph DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 27844d4ea9acSHuber, Joseph auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 27854d4ea9acSHuber, Joseph std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 27864d4ea9acSHuber, Joseph if (!ORE) 27874d4ea9acSHuber, Joseph ORE = std::make_unique<OptimizationRemarkEmitter>(F); 27884d4ea9acSHuber, Joseph return *ORE; 27894d4ea9acSHuber, Joseph }; 27904d4ea9acSHuber, Joseph 27917cfd267cSsstefan1 AnalysisGetter AG; 27927cfd267cSsstefan1 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 27937cfd267cSsstefan1 BumpPtrAllocator Allocator; 27945ccb7424SJoseph Huber OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, 27955ccb7424SJoseph Huber Allocator, 27965ccb7424SJoseph Huber /*CGSCC*/ Functions, Kernels); 27977cfd267cSsstefan1 279813b2fba2SJoseph Huber unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; 279930e36c9bSJoseph Huber Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, 280013b2fba2SJoseph Huber MaxFixpointIterations, OREGetter, DEBUG_TYPE); 2801b8235d2bSsstefan1 2802b8235d2bSsstefan1 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 2803b2ad63d3SJoseph Huber return OMPOpt.run(false); 28049548b74aSJohannes Doerfert } 28059548b74aSJohannes Doerfert 28069548b74aSJohannes Doerfert bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 28079548b74aSJohannes Doerfert }; 28089548b74aSJohannes Doerfert 28099548b74aSJohannes Doerfert } // end anonymous namespace 28109548b74aSJohannes Doerfert 28115ccb7424SJoseph Huber KernelSet llvm::omp::getDeviceKernels(Module &M) { 28125ccb7424SJoseph Huber // TODO: Create a more cross-platform way of determining device kernels. 2813e8039ad4SJohannes Doerfert NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); 28145ccb7424SJoseph Huber KernelSet Kernels; 28155ccb7424SJoseph Huber 2816e8039ad4SJohannes Doerfert if (!MD) 28175ccb7424SJoseph Huber return Kernels; 2818e8039ad4SJohannes Doerfert 2819e8039ad4SJohannes Doerfert for (auto *Op : MD->operands()) { 2820e8039ad4SJohannes Doerfert if (Op->getNumOperands() < 2) 2821e8039ad4SJohannes Doerfert continue; 2822e8039ad4SJohannes Doerfert MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 2823e8039ad4SJohannes Doerfert if (!KindID || KindID->getString() != "kernel") 2824e8039ad4SJohannes Doerfert continue; 2825e8039ad4SJohannes Doerfert 2826e8039ad4SJohannes Doerfert Function *KernelFn = 2827e8039ad4SJohannes Doerfert mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 2828e8039ad4SJohannes Doerfert if (!KernelFn) 2829e8039ad4SJohannes Doerfert continue; 2830e8039ad4SJohannes Doerfert 2831e8039ad4SJohannes Doerfert ++NumOpenMPTargetRegionKernels; 2832e8039ad4SJohannes Doerfert 2833e8039ad4SJohannes Doerfert Kernels.insert(KernelFn); 2834e8039ad4SJohannes Doerfert } 28355ccb7424SJoseph Huber 28365ccb7424SJoseph Huber return Kernels; 2837e8039ad4SJohannes Doerfert } 2838e8039ad4SJohannes Doerfert 28395ccb7424SJoseph Huber bool llvm::omp::containsOpenMP(Module &M) { 28405ccb7424SJoseph Huber Metadata *MD = M.getModuleFlag("openmp"); 28415ccb7424SJoseph Huber if (!MD) 28425ccb7424SJoseph Huber return false; 2843dce6bc18SJohannes Doerfert 2844e8039ad4SJohannes Doerfert return true; 2845e8039ad4SJohannes Doerfert } 2846e8039ad4SJohannes Doerfert 28475ccb7424SJoseph Huber bool llvm::omp::isOpenMPDevice(Module &M) { 28485ccb7424SJoseph Huber Metadata *MD = M.getModuleFlag("openmp-device"); 28495ccb7424SJoseph Huber if (!MD) 28505ccb7424SJoseph Huber return false; 28515ccb7424SJoseph Huber 28525ccb7424SJoseph Huber return true; 28539548b74aSJohannes Doerfert } 28549548b74aSJohannes Doerfert 2855b2ad63d3SJoseph Huber char OpenMPOptCGSCCLegacyPass::ID = 0; 28569548b74aSJohannes Doerfert 2857b2ad63d3SJoseph Huber INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", 28589548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 28599548b74aSJohannes Doerfert INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 2860b2ad63d3SJoseph Huber INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", 28619548b74aSJohannes Doerfert "OpenMP specific optimizations", false, false) 28629548b74aSJohannes Doerfert 2863b2ad63d3SJoseph Huber Pass *llvm::createOpenMPOptCGSCCLegacyPass() { 2864b2ad63d3SJoseph Huber return new OpenMPOptCGSCCLegacyPass(); 2865b2ad63d3SJoseph Huber } 2866