1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // OpenMP specific optimizations: 10 // 11 // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/OpenMPOpt.h" 16 17 #include "llvm/ADT/EnumeratedArray.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/CallGraph.h" 20 #include "llvm/Analysis/CallGraphSCCPass.h" 21 #include "llvm/Frontend/OpenMP/OMPConstants.h" 22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Transforms/IPO.h" 27 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 28 29 using namespace llvm; 30 using namespace omp; 31 using namespace types; 32 33 #define DEBUG_TYPE "openmp-opt" 34 35 static cl::opt<bool> DisableOpenMPOptimizations( 36 "openmp-opt-disable", cl::ZeroOrMore, 37 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 38 cl::init(false)); 39 40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 41 "Number of OpenMP runtime calls deduplicated"); 42 STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 43 "Number of OpenMP runtime functions identified"); 44 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 45 "Number of OpenMP runtime function uses identified"); 46 47 #if !defined(NDEBUG) 48 static constexpr auto TAG = "[" DEBUG_TYPE "]"; 49 #endif 50 51 namespace { 52 struct OpenMPOpt { 53 54 OpenMPOpt(SmallPtrSetImpl<Function *> &SCC, 55 SmallPtrSetImpl<Function *> &ModuleSlice, 56 CallGraphUpdater &CGUpdater) 57 : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), 58 OMPBuilder(M), CGUpdater(CGUpdater) { 59 initializeTypes(M); 60 initializeRuntimeFunctions(); 61 OMPBuilder.initialize(); 62 } 63 64 /// Generic information that describes a runtime function 65 struct RuntimeFunctionInfo { 66 /// The kind, as described by the RuntimeFunction enum. 67 RuntimeFunction Kind; 68 69 /// The name of the function. 70 StringRef Name; 71 72 /// Flag to indicate a variadic function. 73 bool IsVarArg; 74 75 /// The return type of the function. 76 Type *ReturnType; 77 78 /// The argument types of the function. 79 SmallVector<Type *, 8> ArgumentTypes; 80 81 /// The declaration if available. 82 Function *Declaration = nullptr; 83 84 /// Uses of this runtime function per function containing the use. 85 DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap; 86 87 /// Return the number of arguments (or the minimal number for variadic 88 /// functions). 89 size_t getNumArgs() const { return ArgumentTypes.size(); } 90 91 /// Run the callback \p CB on each use and forget the use if the result is 92 /// true. The callback will be fed the function in which the use was 93 /// encountered as second argument. 94 void foreachUse(function_ref<bool(Use &, Function &)> CB) { 95 SmallVector<Use *, 8> ToBeDeleted; 96 for (auto &It : UsesMap) { 97 ToBeDeleted.clear(); 98 for (Use *U : It.second) 99 if (CB(*U, *It.first)) 100 ToBeDeleted.push_back(U); 101 for (Use *U : ToBeDeleted) 102 It.second.erase(U); 103 } 104 } 105 }; 106 107 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 108 bool run() { 109 bool Changed = false; 110 111 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 112 << " functions in a slice with " << ModuleSlice.size() 113 << " functions\n"); 114 115 Changed |= deduplicateRuntimeCalls(); 116 Changed |= deleteParallelRegions(); 117 118 return Changed; 119 } 120 121 private: 122 /// Try to delete parallel regions if possible. 123 bool deleteParallelRegions() { 124 const unsigned CallbackCalleeOperand = 2; 125 126 RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call]; 127 if (!RFI.Declaration) 128 return false; 129 130 bool Changed = false; 131 auto DeleteCallCB = [&](Use &U, Function &) { 132 CallInst *CI = getCallIfRegularCall(U); 133 if (!CI) 134 return false; 135 auto *Fn = dyn_cast<Function>( 136 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 137 if (!Fn) 138 return false; 139 if (!Fn->onlyReadsMemory()) 140 return false; 141 if (!Fn->hasFnAttribute(Attribute::WillReturn)) 142 return false; 143 144 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 145 << CI->getCaller()->getName() << "\n"); 146 CGUpdater.removeCallSite(*CI); 147 CI->eraseFromParent(); 148 Changed = true; 149 return true; 150 }; 151 152 RFI.foreachUse(DeleteCallCB); 153 154 return Changed; 155 } 156 157 /// Try to eliminiate runtime calls by reusing existing ones. 158 bool deduplicateRuntimeCalls() { 159 bool Changed = false; 160 161 RuntimeFunction DeduplicableRuntimeCallIDs[] = { 162 OMPRTL_omp_get_num_threads, 163 OMPRTL_omp_in_parallel, 164 OMPRTL_omp_get_cancellation, 165 OMPRTL_omp_get_thread_limit, 166 OMPRTL_omp_get_supported_active_levels, 167 OMPRTL_omp_get_level, 168 OMPRTL_omp_get_ancestor_thread_num, 169 OMPRTL_omp_get_team_size, 170 OMPRTL_omp_get_active_level, 171 OMPRTL_omp_in_final, 172 OMPRTL_omp_get_proc_bind, 173 OMPRTL_omp_get_num_places, 174 OMPRTL_omp_get_num_procs, 175 OMPRTL_omp_get_place_num, 176 OMPRTL_omp_get_partition_num_places, 177 OMPRTL_omp_get_partition_place_nums}; 178 179 // Global-tid is handled separatly. 180 SmallSetVector<Value *, 16> GTIdArgs; 181 collectGlobalThreadIdArguments(GTIdArgs); 182 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 183 << " global thread ID arguments\n"); 184 185 for (Function *F : SCC) { 186 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 187 deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]); 188 189 // __kmpc_global_thread_num is special as we can replace it with an 190 // argument in enough cases to make it worth trying. 191 Value *GTIdArg = nullptr; 192 for (Argument &Arg : F->args()) 193 if (GTIdArgs.count(&Arg)) { 194 GTIdArg = &Arg; 195 break; 196 } 197 Changed |= deduplicateRuntimeCalls( 198 *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 199 } 200 201 return Changed; 202 } 203 204 static Value *combinedIdentStruct(Value *Ident0, Value *Ident1, 205 bool GlobalOnly) { 206 // TODO: Figure out how to actually combine multiple debug locations. For 207 // now we just keep the first we find. 208 if (Ident0) 209 return Ident0; 210 if (!GlobalOnly || isa<GlobalValue>(Ident1)) 211 return Ident1; 212 return nullptr; 213 } 214 215 /// Return an `struct ident_t*` value that represents the ones used in the 216 /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 217 /// return a local `struct ident_t*`. For now, if we cannot find a suitable 218 /// return value we create one from scratch. We also do not yet combine 219 /// information, e.g., the source locations, see combinedIdentStruct. 220 Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F, 221 bool GlobalOnly) { 222 Value *Ident = nullptr; 223 auto CombineIdentStruct = [&](Use &U, Function &Caller) { 224 CallInst *CI = getCallIfRegularCall(U, &RFI); 225 if (!CI || &F != &Caller) 226 return false; 227 Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 228 /* GlobalOnly */ true); 229 return false; 230 }; 231 RFI.foreachUse(CombineIdentStruct); 232 233 if (!Ident) { 234 // The IRBuilder uses the insertion block to get to the module, this is 235 // unfortunate but we work around it for now. 236 if (!OMPBuilder.getInsertionPoint().getBlock()) 237 OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 238 &F.getEntryBlock(), F.getEntryBlock().begin())); 239 // Create a fallback location if non was found. 240 // TODO: Use the debug locations of the calls instead. 241 Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr(); 242 Ident = OMPBuilder.getOrCreateIdent(Loc); 243 } 244 return Ident; 245 } 246 247 /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or 248 /// \p ReplVal if given. 249 bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, 250 Value *ReplVal = nullptr) { 251 auto UsesIt = RFI.UsesMap.find(&F); 252 if (UsesIt == RFI.UsesMap.end()) 253 return false; 254 255 auto &Uses = UsesIt->getSecond(); 256 if (Uses.size() + (ReplVal != nullptr) < 2) 257 return false; 258 259 LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of " 260 << RFI.Name 261 << (ReplVal ? " with an existing value\n" : "\n") 262 << "\n"); 263 assert((!ReplVal || (isa<Argument>(ReplVal) && 264 cast<Argument>(ReplVal)->getParent() == &F)) && 265 "Unexpected replacement value!"); 266 267 // TODO: Use dominance to find a good position instead. 268 auto CanBeMoved = [](CallBase &CB) { 269 unsigned NumArgs = CB.getNumArgOperands(); 270 if (NumArgs == 0) 271 return true; 272 if (CB.getArgOperand(0)->getType() != IdentPtr) 273 return false; 274 for (unsigned u = 1; u < NumArgs; ++u) 275 if (isa<Instruction>(CB.getArgOperand(u))) 276 return false; 277 return true; 278 }; 279 280 if (!ReplVal) { 281 for (Use *U : Uses) 282 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 283 if (!CanBeMoved(*CI)) 284 continue; 285 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 286 ReplVal = CI; 287 break; 288 } 289 if (!ReplVal) 290 return false; 291 } 292 293 // If we use a call as a replacement value we need to make sure the ident is 294 // valid at the new location. For now we just pick a global one, either 295 // existing and used by one of the calls, or created from scratch. 296 if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 297 if (CI->getNumArgOperands() > 0 && 298 CI->getArgOperand(0)->getType() == IdentPtr) { 299 Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 300 /* GlobalOnly */ true); 301 CI->setArgOperand(0, Ident); 302 } 303 } 304 305 bool Changed = false; 306 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 307 CallInst *CI = getCallIfRegularCall(U, &RFI); 308 if (!CI || CI == ReplVal || &F != &Caller) 309 return false; 310 assert(CI->getCaller() == &F && "Unexpected call!"); 311 CGUpdater.removeCallSite(*CI); 312 CI->replaceAllUsesWith(ReplVal); 313 CI->eraseFromParent(); 314 ++NumOpenMPRuntimeCallsDeduplicated; 315 Changed = true; 316 return true; 317 }; 318 RFI.foreachUse(ReplaceAndDeleteCB); 319 320 return Changed; 321 } 322 323 /// Collect arguments that represent the global thread id in \p GTIdArgs. 324 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 325 // TODO: Below we basically perform a fixpoint iteration with a pessimistic 326 // initialization. We could define an AbstractAttribute instead and 327 // run the Attributor here once it can be run as an SCC pass. 328 329 // Helper to check the argument \p ArgNo at all call sites of \p F for 330 // a GTId. 331 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 332 if (!F.hasLocalLinkage()) 333 return false; 334 for (Use &U : F.uses()) { 335 if (CallInst *CI = getCallIfRegularCall(U)) { 336 Value *ArgOp = CI->getArgOperand(ArgNo); 337 if (CI == &RefCI || GTIdArgs.count(ArgOp) || 338 getCallIfRegularCall(*ArgOp, 339 &RFIs[OMPRTL___kmpc_global_thread_num])) 340 continue; 341 } 342 return false; 343 } 344 return true; 345 }; 346 347 // Helper to identify uses of a GTId as GTId arguments. 348 auto AddUserArgs = [&](Value >Id) { 349 for (Use &U : GTId.uses()) 350 if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 351 if (CI->isArgOperand(&U)) 352 if (Function *Callee = CI->getCalledFunction()) 353 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 354 GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 355 }; 356 357 // The argument users of __kmpc_global_thread_num calls are GTIds. 358 RuntimeFunctionInfo &GlobThreadNumRFI = 359 RFIs[OMPRTL___kmpc_global_thread_num]; 360 for (auto &It : GlobThreadNumRFI.UsesMap) 361 for (Use *U : It.second) 362 if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI)) 363 AddUserArgs(*CI); 364 365 // Transitively search for more arguments by looking at the users of the 366 // ones we know already. During the search the GTIdArgs vector is extended 367 // so we cannot cache the size nor can we use a range based for. 368 for (unsigned u = 0; u < GTIdArgs.size(); ++u) 369 AddUserArgs(*GTIdArgs[u]); 370 } 371 372 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 373 /// given it has to be the callee or a nullptr is returned. 374 CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) { 375 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 376 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 377 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 378 return CI; 379 return nullptr; 380 } 381 382 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 383 /// the callee or a nullptr is returned. 384 CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) { 385 CallInst *CI = dyn_cast<CallInst>(&V); 386 if (CI && !CI->hasOperandBundles() && 387 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 388 return CI; 389 return nullptr; 390 } 391 392 /// Returns true if the function declaration \p F matches the runtime 393 /// function types, that is, return type \p RTFRetType, and argument types 394 /// \p RTFArgTypes. 395 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 396 SmallVector<Type *, 8> &RTFArgTypes) { 397 // TODO: We should output information to the user (under debug output 398 // and via remarks). 399 400 if (!F) 401 return false; 402 if (F->getReturnType() != RTFRetType) 403 return false; 404 if (F->arg_size() != RTFArgTypes.size()) 405 return false; 406 407 auto RTFTyIt = RTFArgTypes.begin(); 408 for (Argument &Arg : F->args()) { 409 if (Arg.getType() != *RTFTyIt) 410 return false; 411 412 ++RTFTyIt; 413 } 414 415 return true; 416 } 417 418 /// Helper to initialize all runtime function information for those defined in 419 /// OpenMPKinds.def. 420 void initializeRuntimeFunctions() { 421 // Helper to collect all uses of the decleration in the UsesMap. 422 auto CollectUses = [&](RuntimeFunctionInfo &RFI) { 423 unsigned NumUses = 0; 424 if (!RFI.Declaration) 425 return NumUses; 426 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 427 428 NumOpenMPRuntimeFunctionsIdentified += 1; 429 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 430 431 // TODO: We directly convert uses into proper calls and unknown uses. 432 for (Use &U : RFI.Declaration->uses()) { 433 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 434 if (ModuleSlice.count(UserI->getFunction())) { 435 RFI.UsesMap[UserI->getFunction()].insert(&U); 436 ++NumUses; 437 } 438 } else { 439 RFI.UsesMap[nullptr].insert(&U); 440 ++NumUses; 441 } 442 } 443 return NumUses; 444 }; 445 446 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 447 { \ 448 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 449 Function *F = M.getFunction(_Name); \ 450 if (declMatchesRTFTypes(F, _ReturnType , ArgsTypes)) { \ 451 auto &RFI = RFIs[_Enum]; \ 452 RFI.Kind = _Enum; \ 453 RFI.Name = _Name; \ 454 RFI.IsVarArg = _IsVarArg; \ 455 RFI.ReturnType = _ReturnType; \ 456 RFI.ArgumentTypes = std::move(ArgsTypes); \ 457 RFI.Declaration = F; \ 458 unsigned NumUses = CollectUses(RFI); \ 459 (void)NumUses; \ 460 LLVM_DEBUG({ \ 461 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 462 << " found\n"; \ 463 if (RFI.Declaration) \ 464 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 465 << RFI.UsesMap.size() << " different functions.\n"; \ 466 }); \ 467 } \ 468 } 469 #include "llvm/Frontend/OpenMP/OMPKinds.def" 470 471 // TODO: We should attach the attributes defined in OMPKinds.def. 472 } 473 474 /// The underyling module. 475 Module &M; 476 477 /// The SCC we are operating on. 478 SmallPtrSetImpl<Function *> &SCC; 479 480 /// The slice of the module we are allowed to look at. 481 SmallPtrSetImpl<Function *> &ModuleSlice; 482 483 /// An OpenMP-IR-Builder instance 484 OpenMPIRBuilder OMPBuilder; 485 486 /// Callback to update the call graph, the first argument is a removed call, 487 /// the second an optional replacement call. 488 CallGraphUpdater &CGUpdater; 489 490 /// Map from runtime function kind to the runtime function description. 491 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 492 RuntimeFunction::OMPRTL___last> 493 RFIs; 494 }; 495 } // namespace 496 497 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 498 CGSCCAnalysisManager &AM, 499 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 500 if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 501 return PreservedAnalyses::all(); 502 503 if (DisableOpenMPOptimizations) 504 return PreservedAnalyses::all(); 505 506 SmallPtrSet<Function *, 16> SCC; 507 for (LazyCallGraph::Node &N : C) 508 SCC.insert(&N.getFunction()); 509 510 if (SCC.empty()) 511 return PreservedAnalyses::all(); 512 513 CallGraphUpdater CGUpdater; 514 CGUpdater.initialize(CG, C, AM, UR); 515 // TODO: Compute the module slice we are allowed to look at. 516 OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); 517 bool Changed = OMPOpt.run(); 518 (void)Changed; 519 return PreservedAnalyses::all(); 520 } 521 522 namespace { 523 524 struct OpenMPOptLegacyPass : public CallGraphSCCPass { 525 CallGraphUpdater CGUpdater; 526 OpenMPInModule OMPInModule; 527 static char ID; 528 529 OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 530 initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 531 } 532 533 void getAnalysisUsage(AnalysisUsage &AU) const override { 534 CallGraphSCCPass::getAnalysisUsage(AU); 535 } 536 537 bool doInitialization(CallGraph &CG) override { 538 // Disable the pass if there is no OpenMP (runtime call) in the module. 539 containsOpenMP(CG.getModule(), OMPInModule); 540 return false; 541 } 542 543 bool runOnSCC(CallGraphSCC &CGSCC) override { 544 if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 545 return false; 546 if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 547 return false; 548 549 SmallPtrSet<Function *, 16> SCC; 550 for (CallGraphNode *CGN : CGSCC) 551 if (Function *Fn = CGN->getFunction()) 552 if (!Fn->isDeclaration()) 553 SCC.insert(Fn); 554 555 if (SCC.empty()) 556 return false; 557 558 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 559 CGUpdater.initialize(CG, CGSCC); 560 561 // TODO: Compute the module slice we are allowed to look at. 562 OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); 563 return OMPOpt.run(); 564 } 565 566 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 567 }; 568 569 } // end anonymous namespace 570 571 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 572 if (OMPInModule.isKnown()) 573 return OMPInModule; 574 575 #define OMP_RTL(_Enum, _Name, ...) \ 576 if (M.getFunction(_Name)) \ 577 return OMPInModule = true; 578 #include "llvm/Frontend/OpenMP/OMPKinds.def" 579 return OMPInModule = false; 580 } 581 582 char OpenMPOptLegacyPass::ID = 0; 583 584 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 585 "OpenMP specific optimizations", false, false) 586 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 587 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 588 "OpenMP specific optimizations", false, false) 589 590 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 591