1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // OpenMP specific optimizations: 10 // 11 // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/OpenMPOpt.h" 16 17 #include "llvm/ADT/EnumeratedArray.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/CallGraph.h" 20 #include "llvm/Analysis/CallGraphSCCPass.h" 21 #include "llvm/Frontend/OpenMP/OMPConstants.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Transforms/IPO.h" 27 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 28 29 using namespace llvm; 30 using namespace omp; 31 using namespace types; 32 33 #define DEBUG_TYPE "openmp-opt" 34 35 static cl::opt<bool> DisableOpenMPOptimizations( 36 "openmp-opt-disable", cl::ZeroOrMore, 37 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 38 cl::init(false)); 39 40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 41 "Number of OpenMP runtime calls deduplicated"); 42 STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 43 "Number of OpenMP runtime functions identified"); 44 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 45 "Number of OpenMP runtime function uses identified"); 46 47 static constexpr auto TAG = "[" DEBUG_TYPE "]"; 48 49 namespace { 50 struct OpenMPOpt { 51 52 OpenMPOpt(SmallPtrSetImpl<Function *> &SCC, 53 SmallPtrSetImpl<Function *> &ModuleSlice, 54 CallGraphUpdater &CGUpdater) 55 : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), 56 OMPBuilder(M), CGUpdater(CGUpdater) { 57 initializeTypes(M); 58 initializeRuntimeFunctions(); 59 OMPBuilder.initialize(); 60 } 61 62 /// Generic information that describes a runtime function 63 struct RuntimeFunctionInfo { 64 /// The kind, as described by the RuntimeFunction enum. 65 RuntimeFunction Kind; 66 67 /// The name of the function. 68 StringRef Name; 69 70 /// Flag to indicate a variadic function. 71 bool IsVarArg; 72 73 /// The return type of the function. 74 Type *ReturnType; 75 76 /// The argument types of the function. 77 SmallVector<Type *, 8> ArgumentTypes; 78 79 /// The declaration if available. 80 Function *Declaration; 81 82 /// Uses of this runtime function per function containing the use. 83 DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap; 84 85 /// Return the number of arguments (or the minimal number for variadic 86 /// functions). 87 size_t getNumArgs() const { return ArgumentTypes.size(); } 88 89 /// Run the callback \p CB on each use and forget the use if the result is 90 /// true. The callback will be fed the function in which the use was 91 /// encountered as second argument. 92 void foreachUse(function_ref<bool(Use &, Function &)> CB) { 93 SmallVector<Use *, 8> ToBeDeleted; 94 for (auto &It : UsesMap) { 95 ToBeDeleted.clear(); 96 for (Use *U : It.second) 97 if (CB(*U, *It.first)) 98 ToBeDeleted.push_back(U); 99 for (Use *U : ToBeDeleted) 100 It.second.erase(U); 101 } 102 } 103 }; 104 105 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 106 bool run() { 107 bool Changed = false; 108 109 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 110 << " functions in a slice with " << ModuleSlice.size() 111 << " functions\n"); 112 113 Changed |= deduplicateRuntimeCalls(); 114 Changed |= deleteParallelRegions(); 115 116 return Changed; 117 } 118 119 private: 120 /// Try to delete parallel regions if possible 121 bool deleteParallelRegions() { 122 const unsigned CallbackCalleeOperand = 2; 123 124 RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call]; 125 if (!RFI.Declaration) 126 return false; 127 128 bool Changed = false; 129 auto DeleteCallCB = [&](Use &U, Function &) { 130 CallInst *CI = getCallIfRegularCall(U); 131 if (!CI) 132 return false; 133 auto *Fn = dyn_cast<Function>( 134 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 135 if (!Fn) 136 return false; 137 if (!Fn->onlyReadsMemory()) 138 return false; 139 if (!Fn->hasFnAttribute(Attribute::WillReturn)) 140 return false; 141 142 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 143 << CI->getCaller()->getName() << "\n"); 144 CGUpdater.removeCallSite(*CI); 145 CI->eraseFromParent(); 146 Changed = true; 147 return true; 148 }; 149 150 RFI.foreachUse(DeleteCallCB); 151 152 return Changed; 153 } 154 155 /// Try to eliminiate runtime calls by reusing existing ones. 156 bool deduplicateRuntimeCalls() { 157 bool Changed = false; 158 159 RuntimeFunction DeduplicableRuntimeCallIDs[] = { 160 OMPRTL_omp_get_num_threads, 161 OMPRTL_omp_in_parallel, 162 OMPRTL_omp_get_cancellation, 163 OMPRTL_omp_get_thread_limit, 164 OMPRTL_omp_get_supported_active_levels, 165 OMPRTL_omp_get_level, 166 OMPRTL_omp_get_ancestor_thread_num, 167 OMPRTL_omp_get_team_size, 168 OMPRTL_omp_get_active_level, 169 OMPRTL_omp_in_final, 170 OMPRTL_omp_get_proc_bind, 171 OMPRTL_omp_get_num_places, 172 OMPRTL_omp_get_num_procs, 173 OMPRTL_omp_get_place_num, 174 OMPRTL_omp_get_partition_num_places, 175 OMPRTL_omp_get_partition_place_nums}; 176 177 // Global-tid is handled separatly. 178 SmallSetVector<Value *, 16> GTIdArgs; 179 collectGlobalThreadIdArguments(GTIdArgs); 180 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 181 << " global thread ID arguments\n"); 182 183 for (Function *F : SCC) { 184 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 185 deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]); 186 187 // __kmpc_global_thread_num is special as we can replace it with an 188 // argument in enough cases to make it worth trying. 189 Value *GTIdArg = nullptr; 190 for (Argument &Arg : F->args()) 191 if (GTIdArgs.count(&Arg)) { 192 GTIdArg = &Arg; 193 break; 194 } 195 Changed |= deduplicateRuntimeCalls( 196 *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 197 } 198 199 return Changed; 200 } 201 202 /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or 203 /// \p ReplVal if given. 204 bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, 205 Value *ReplVal = nullptr) { 206 auto &Uses = RFI.UsesMap[&F]; 207 if (Uses.size() + (ReplVal != nullptr) < 2) 208 return false; 209 210 LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of " 211 << RFI.Name 212 << (ReplVal ? " with an existing value\n" : "\n") 213 << "\n"); 214 assert((!ReplVal || (isa<Argument>(ReplVal) && 215 cast<Argument>(ReplVal)->getParent() == &F)) && 216 "Unexpected replacement value!"); 217 if (!ReplVal) { 218 for (Use *U : Uses) 219 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 220 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 221 ReplVal = CI; 222 break; 223 } 224 if (!ReplVal) 225 return false; 226 } 227 228 bool Changed = false; 229 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 230 CallInst *CI = getCallIfRegularCall(U, &RFI); 231 if (!CI || CI == ReplVal || &F != &Caller) 232 return false; 233 assert(CI->getCaller() == &F && "Unexpected call!"); 234 CGUpdater.removeCallSite(*CI); 235 CI->replaceAllUsesWith(ReplVal); 236 CI->eraseFromParent(); 237 ++NumOpenMPRuntimeCallsDeduplicated; 238 Changed = true; 239 return true; 240 }; 241 RFI.foreachUse(ReplaceAndDeleteCB); 242 243 return Changed; 244 } 245 246 /// Collect arguments that represent the global thread id in \p GTIdArgs. 247 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 248 // TODO: Below we basically perform a fixpoint iteration with a pessimistic 249 // initialization. We could define an AbstractAttribute instead and 250 // run the Attributor here once it can be run as an SCC pass. 251 252 // Helper to check the argument \p ArgNo at all call sites of \p F for 253 // a GTId. 254 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 255 if (!F.hasLocalLinkage()) 256 return false; 257 for (Use &U : F.uses()) { 258 if (CallInst *CI = getCallIfRegularCall(U)) { 259 Value *ArgOp = CI->getArgOperand(ArgNo); 260 if (CI == &RefCI || GTIdArgs.count(ArgOp) || 261 getCallIfRegularCall(*ArgOp, 262 &RFIs[OMPRTL___kmpc_global_thread_num])) 263 continue; 264 } 265 return false; 266 } 267 return true; 268 }; 269 270 // Helper to identify uses of a GTId as GTId arguments. 271 auto AddUserArgs = [&](Value >Id) { 272 for (Use &U : GTId.uses()) 273 if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 274 if (CI->isArgOperand(&U)) 275 if (Function *Callee = CI->getCalledFunction()) 276 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 277 GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 278 }; 279 280 // The argument users of __kmpc_global_thread_num calls are GTIds. 281 RuntimeFunctionInfo &GlobThreadNumRFI = 282 RFIs[OMPRTL___kmpc_global_thread_num]; 283 for (auto &It : GlobThreadNumRFI.UsesMap) 284 for (Use *U : It.second) 285 if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI)) 286 AddUserArgs(*CI); 287 288 // Transitively search for more arguments by looking at the users of the 289 // ones we know already. During the search the GTIdArgs vector is extended 290 // so we cannot cache the size nor can we use a range based for. 291 for (unsigned u = 0; u < GTIdArgs.size(); ++u) 292 AddUserArgs(*GTIdArgs[u]); 293 } 294 295 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 296 /// given it has to be the callee or a nullptr is returned. 297 CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) { 298 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 299 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 300 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 301 return CI; 302 return nullptr; 303 } 304 305 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 306 /// the callee or a nullptr is returned. 307 CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) { 308 CallInst *CI = dyn_cast<CallInst>(&V); 309 if (CI && !CI->hasOperandBundles() && 310 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 311 return CI; 312 return nullptr; 313 } 314 315 /// Helper to initialize all runtime function information for those defined in 316 /// OpenMPKinds.def. 317 void initializeRuntimeFunctions() { 318 // Helper to collect all uses of the decleration in the UsesMap. 319 auto CollectUses = [&](RuntimeFunctionInfo &RFI) { 320 unsigned NumUses = 0; 321 if (!RFI.Declaration) 322 return NumUses; 323 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 324 325 NumOpenMPRuntimeFunctionsIdentified += 1; 326 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 327 328 // TODO: We directly convert uses into proper calls and unknown uses. 329 for (Use &U : RFI.Declaration->uses()) { 330 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 331 if (ModuleSlice.count(UserI->getFunction())) { 332 RFI.UsesMap[UserI->getFunction()].insert(&U); 333 ++NumUses; 334 } 335 } else { 336 RFI.UsesMap[nullptr].insert(&U); 337 ++NumUses; 338 } 339 } 340 return NumUses; 341 }; 342 343 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 344 { \ 345 auto &RFI = RFIs[_Enum]; \ 346 RFI.Kind = _Enum; \ 347 RFI.Name = _Name; \ 348 RFI.IsVarArg = _IsVarArg; \ 349 RFI.ReturnType = _ReturnType; \ 350 RFI.ArgumentTypes = SmallVector<Type *, 8>({__VA_ARGS__}); \ 351 RFI.Declaration = M.getFunction(_Name); \ 352 unsigned NumUses = CollectUses(RFI); \ 353 (void)NumUses; \ 354 LLVM_DEBUG({ \ 355 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 356 << " found\n"; \ 357 if (RFI.Declaration) \ 358 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 359 << RFI.UsesMap.size() << " different functions.\n"; \ 360 }); \ 361 } 362 #include "llvm/Frontend/OpenMP/OMPKinds.def" 363 364 // TODO: We should validate the declaration agains the types we expect. 365 // TODO: We should attach the attributes defined in OMPKinds.def. 366 } 367 368 /// The underyling module. 369 Module &M; 370 371 /// The SCC we are operating on. 372 SmallPtrSetImpl<Function *> &SCC; 373 374 /// The slice of the module we are allowed to look at. 375 SmallPtrSetImpl<Function *> &ModuleSlice; 376 377 /// An OpenMP-IR-Builder instance 378 OpenMPIRBuilder OMPBuilder; 379 380 /// Callback to update the call graph, the first argument is a removed call, 381 /// the second an optional replacement call. 382 CallGraphUpdater &CGUpdater; 383 384 /// Map from runtime function kind to the runtime function description. 385 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 386 RuntimeFunction::OMPRTL___last> 387 RFIs; 388 }; 389 } // namespace 390 391 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 392 CGSCCAnalysisManager &AM, 393 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 394 if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 395 return PreservedAnalyses::all(); 396 397 if (DisableOpenMPOptimizations) 398 return PreservedAnalyses::all(); 399 400 SmallPtrSet<Function *, 16> SCC; 401 for (LazyCallGraph::Node &N : C) 402 SCC.insert(&N.getFunction()); 403 404 if (SCC.empty()) 405 return PreservedAnalyses::all(); 406 407 CallGraphUpdater CGUpdater; 408 CGUpdater.initialize(CG, C, AM, UR); 409 // TODO: Compute the module slice we are allowed to look at. 410 OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); 411 bool Changed = OMPOpt.run(); 412 (void)Changed; 413 return PreservedAnalyses::all(); 414 } 415 416 namespace { 417 418 struct OpenMPOptLegacyPass : public CallGraphSCCPass { 419 CallGraphUpdater CGUpdater; 420 OpenMPInModule OMPInModule; 421 static char ID; 422 423 OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 424 initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 425 } 426 427 void getAnalysisUsage(AnalysisUsage &AU) const override { 428 CallGraphSCCPass::getAnalysisUsage(AU); 429 } 430 431 bool doInitialization(CallGraph &CG) override { 432 // Disable the pass if there is no OpenMP (runtime call) in the module. 433 containsOpenMP(CG.getModule(), OMPInModule); 434 return false; 435 } 436 437 bool runOnSCC(CallGraphSCC &CGSCC) override { 438 if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 439 return false; 440 if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 441 return false; 442 443 SmallPtrSet<Function *, 16> SCC; 444 for (CallGraphNode *CGN : CGSCC) 445 if (Function *Fn = CGN->getFunction()) 446 if (!Fn->isDeclaration()) 447 SCC.insert(Fn); 448 449 if (SCC.empty()) 450 return false; 451 452 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 453 CGUpdater.initialize(CG, CGSCC); 454 455 // TODO: Compute the module slice we are allowed to look at. 456 OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); 457 return OMPOpt.run(); 458 } 459 460 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 461 }; 462 463 } // end anonymous namespace 464 465 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 466 if (OMPInModule.isKnown()) 467 return OMPInModule; 468 469 #define OMP_RTL(_Enum, _Name, ...) \ 470 if (M.getFunction(_Name)) \ 471 return OMPInModule = true; 472 #include "llvm/Frontend/OpenMP/OMPKinds.def" 473 return OMPInModule = false; 474 } 475 476 char OpenMPOptLegacyPass::ID = 0; 477 478 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 479 "OpenMP specific optimizations", false, false) 480 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 481 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 482 "OpenMP specific optimizations", false, false) 483 484 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 485