1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // OpenMP specific optimizations: 10 // 11 // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/OpenMPOpt.h" 16 17 #include "llvm/ADT/EnumeratedArray.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/CallGraph.h" 20 #include "llvm/Analysis/CallGraphSCCPass.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Frontend/OpenMP/OMPConstants.h" 23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Transforms/IPO.h" 27 #include "llvm/Transforms/IPO/Attributor.h" 28 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 29 30 using namespace llvm; 31 using namespace omp; 32 33 #define DEBUG_TYPE "openmp-opt" 34 35 static cl::opt<bool> DisableOpenMPOptimizations( 36 "openmp-opt-disable", cl::ZeroOrMore, 37 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 38 cl::init(false)); 39 40 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 41 cl::Hidden); 42 43 STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 44 "Number of OpenMP runtime calls deduplicated"); 45 STATISTIC(NumOpenMPParallelRegionsDeleted, 46 "Number of OpenMP parallel regions deleted"); 47 STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 48 "Number of OpenMP runtime functions identified"); 49 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 50 "Number of OpenMP runtime function uses identified"); 51 52 #if !defined(NDEBUG) 53 static constexpr auto TAG = "[" DEBUG_TYPE "]"; 54 #endif 55 56 namespace { 57 58 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 59 /// Attributor runs. 60 struct OMPInformationCache : public InformationCache { 61 OMPInformationCache(Module &M, AnalysisGetter &AG, 62 BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, 63 SmallPtrSetImpl<Function *> &ModuleSlice) 64 : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), 65 OMPBuilder(M) { 66 OMPBuilder.initialize(); 67 initializeRuntimeFunctions(); 68 initializeInternalControlVars(); 69 } 70 71 /// Generic information that describes an internal control variable. 72 struct InternalControlVarInfo { 73 /// The kind, as described by InternalControlVar enum. 74 InternalControlVar Kind; 75 76 /// The name of the ICV. 77 StringRef Name; 78 79 /// Environment variable associated with this ICV. 80 StringRef EnvVarName; 81 82 /// Initial value kind. 83 ICVInitValue InitKind; 84 85 /// Initial value. 86 ConstantInt *InitValue; 87 88 /// Setter RTL function associated with this ICV. 89 RuntimeFunction Setter; 90 91 /// Getter RTL function associated with this ICV. 92 RuntimeFunction Getter; 93 94 /// RTL Function corresponding to the override clause of this ICV 95 RuntimeFunction Clause; 96 }; 97 98 /// Generic information that describes a runtime function 99 struct RuntimeFunctionInfo { 100 101 /// The kind, as described by the RuntimeFunction enum. 102 RuntimeFunction Kind; 103 104 /// The name of the function. 105 StringRef Name; 106 107 /// Flag to indicate a variadic function. 108 bool IsVarArg; 109 110 /// The return type of the function. 111 Type *ReturnType; 112 113 /// The argument types of the function. 114 SmallVector<Type *, 8> ArgumentTypes; 115 116 /// The declaration if available. 117 Function *Declaration = nullptr; 118 119 /// Uses of this runtime function per function containing the use. 120 using UseVector = SmallVector<Use *, 16>; 121 122 /// Return the vector of uses in function \p F. 123 UseVector &getOrCreateUseVector(Function *F) { 124 std::unique_ptr<UseVector> &UV = UsesMap[F]; 125 if (!UV) 126 UV = std::make_unique<UseVector>(); 127 return *UV; 128 } 129 130 /// Return the vector of uses in function \p F or `nullptr` if there are 131 /// none. 132 const UseVector *getUseVector(Function &F) const { 133 auto I = UsesMap.find(&F); 134 if (I != UsesMap.end()) 135 return I->second.get(); 136 return nullptr; 137 } 138 139 /// Return how many functions contain uses of this runtime function. 140 size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 141 142 /// Return the number of arguments (or the minimal number for variadic 143 /// functions). 144 size_t getNumArgs() const { return ArgumentTypes.size(); } 145 146 /// Run the callback \p CB on each use and forget the use if the result is 147 /// true. The callback will be fed the function in which the use was 148 /// encountered as second argument. 149 void foreachUse(function_ref<bool(Use &, Function &)> CB) { 150 for (auto &It : UsesMap) 151 foreachUse(CB, It.first, It.second.get()); 152 } 153 154 /// Run the callback \p CB on each use within the function \p F and forget 155 /// the use if the result is true. 156 void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F, 157 UseVector *Uses = nullptr) { 158 SmallVector<unsigned, 8> ToBeDeleted; 159 ToBeDeleted.clear(); 160 161 unsigned Idx = 0; 162 UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); 163 164 for (Use *U : UV) { 165 if (CB(*U, *F)) 166 ToBeDeleted.push_back(Idx); 167 ++Idx; 168 } 169 170 // Remove the to-be-deleted indices in reverse order as prior 171 // modifcations will not modify the smaller indices. 172 while (!ToBeDeleted.empty()) { 173 unsigned Idx = ToBeDeleted.pop_back_val(); 174 UV[Idx] = UV.back(); 175 UV.pop_back(); 176 } 177 } 178 179 private: 180 /// Map from functions to all uses of this runtime function contained in 181 /// them. 182 DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap; 183 }; 184 185 /// The slice of the module we are allowed to look at. 186 SmallPtrSetImpl<Function *> &ModuleSlice; 187 188 /// An OpenMP-IR-Builder instance 189 OpenMPIRBuilder OMPBuilder; 190 191 /// Map from runtime function kind to the runtime function description. 192 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 193 RuntimeFunction::OMPRTL___last> 194 RFIs; 195 196 /// Map from ICV kind to the ICV description. 197 EnumeratedArray<InternalControlVarInfo, InternalControlVar, 198 InternalControlVar::ICV___last> 199 ICVs; 200 201 /// Helper to initialize all internal control variable information for those 202 /// defined in OMPKinds.def. 203 void initializeInternalControlVars() { 204 #define ICV_RT_SET(_Name, RTL) \ 205 { \ 206 auto &ICV = ICVs[_Name]; \ 207 ICV.Setter = RTL; \ 208 } 209 #define ICV_RT_GET(Name, RTL) \ 210 { \ 211 auto &ICV = ICVs[Name]; \ 212 ICV.Getter = RTL; \ 213 } 214 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 215 { \ 216 auto &ICV = ICVs[Enum]; \ 217 ICV.Name = _Name; \ 218 ICV.Kind = Enum; \ 219 ICV.InitKind = Init; \ 220 ICV.EnvVarName = _EnvVarName; \ 221 switch (ICV.InitKind) { \ 222 case ICV_IMPLEMENTATION_DEFINED: \ 223 ICV.InitValue = nullptr; \ 224 break; \ 225 case ICV_ZERO: \ 226 ICV.InitValue = ConstantInt::get( \ 227 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 228 break; \ 229 case ICV_FALSE: \ 230 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 231 break; \ 232 case ICV_LAST: \ 233 break; \ 234 } \ 235 } 236 #include "llvm/Frontend/OpenMP/OMPKinds.def" 237 } 238 239 /// Returns true if the function declaration \p F matches the runtime 240 /// function types, that is, return type \p RTFRetType, and argument types 241 /// \p RTFArgTypes. 242 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 243 SmallVector<Type *, 8> &RTFArgTypes) { 244 // TODO: We should output information to the user (under debug output 245 // and via remarks). 246 247 if (!F) 248 return false; 249 if (F->getReturnType() != RTFRetType) 250 return false; 251 if (F->arg_size() != RTFArgTypes.size()) 252 return false; 253 254 auto RTFTyIt = RTFArgTypes.begin(); 255 for (Argument &Arg : F->args()) { 256 if (Arg.getType() != *RTFTyIt) 257 return false; 258 259 ++RTFTyIt; 260 } 261 262 return true; 263 } 264 265 /// Helper to initialize all runtime function information for those defined 266 /// in OpenMPKinds.def. 267 void initializeRuntimeFunctions() { 268 // Helper to collect all uses of the decleration in the UsesMap. 269 auto CollectUses = [&](RuntimeFunctionInfo &RFI) { 270 unsigned NumUses = 0; 271 if (!RFI.Declaration) 272 return NumUses; 273 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 274 275 NumOpenMPRuntimeFunctionsIdentified += 1; 276 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 277 278 // TODO: We directly convert uses into proper calls and unknown uses. 279 for (Use &U : RFI.Declaration->uses()) { 280 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 281 if (ModuleSlice.count(UserI->getFunction())) { 282 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 283 ++NumUses; 284 } 285 } else { 286 RFI.getOrCreateUseVector(nullptr).push_back(&U); 287 ++NumUses; 288 } 289 } 290 return NumUses; 291 }; 292 293 Module &M = *((*ModuleSlice.begin())->getParent()); 294 295 // Helper macros for handling __VA_ARGS__ in OMP_RTL 296 #define OMP_TYPE(VarName, ...) \ 297 Type *VarName = OMPBuilder.VarName; \ 298 (void)VarName; 299 300 #define OMP_ARRAY_TYPE(VarName, ...) \ 301 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 302 (void)VarName##Ty; \ 303 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 304 (void)VarName##PtrTy; 305 306 #define OMP_FUNCTION_TYPE(VarName, ...) \ 307 FunctionType *VarName = OMPBuilder.VarName; \ 308 (void)VarName; \ 309 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 310 (void)VarName##Ptr; 311 312 #define OMP_STRUCT_TYPE(VarName, ...) \ 313 StructType *VarName = OMPBuilder.VarName; \ 314 (void)VarName; \ 315 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 316 (void)VarName##Ptr; 317 318 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 319 { \ 320 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 321 Function *F = M.getFunction(_Name); \ 322 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 323 auto &RFI = RFIs[_Enum]; \ 324 RFI.Kind = _Enum; \ 325 RFI.Name = _Name; \ 326 RFI.IsVarArg = _IsVarArg; \ 327 RFI.ReturnType = OMPBuilder._ReturnType; \ 328 RFI.ArgumentTypes = std::move(ArgsTypes); \ 329 RFI.Declaration = F; \ 330 unsigned NumUses = CollectUses(RFI); \ 331 (void)NumUses; \ 332 LLVM_DEBUG({ \ 333 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 334 << " found\n"; \ 335 if (RFI.Declaration) \ 336 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 337 << RFI.getNumFunctionsWithUses() \ 338 << " different functions.\n"; \ 339 }); \ 340 } \ 341 } 342 #include "llvm/Frontend/OpenMP/OMPKinds.def" 343 344 // TODO: We should attach the attributes defined in OMPKinds.def. 345 } 346 }; 347 348 struct OpenMPOpt { 349 350 using OptimizationRemarkGetter = 351 function_ref<OptimizationRemarkEmitter &(Function *)>; 352 353 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 354 OptimizationRemarkGetter OREGetter, 355 OMPInformationCache &OMPInfoCache) 356 : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 357 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache) {} 358 359 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 360 bool run() { 361 bool Changed = false; 362 363 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 364 << " functions in a slice with " 365 << OMPInfoCache.ModuleSlice.size() << " functions\n"); 366 367 /// Print initial ICV values for testing. 368 /// FIXME: This should be done from the Attributor once it is added. 369 if (PrintICVValues) { 370 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; 371 372 for (Function *F : OMPInfoCache.ModuleSlice) { 373 for (auto ICV : ICVs) { 374 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 375 auto Remark = [&](OptimizationRemark OR) { 376 return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 377 << " Value: " 378 << (ICVInfo.InitValue 379 ? ICVInfo.InitValue->getValue().toString(10, true) 380 : "IMPLEMENTATION_DEFINED"); 381 }; 382 383 emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); 384 } 385 } 386 } 387 388 Changed |= deduplicateRuntimeCalls(); 389 Changed |= deleteParallelRegions(); 390 391 return Changed; 392 } 393 394 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 395 /// given it has to be the callee or a nullptr is returned. 396 static CallInst *getCallIfRegularCall( 397 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 398 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 399 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 400 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 401 return CI; 402 return nullptr; 403 } 404 405 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 406 /// the callee or a nullptr is returned. 407 static CallInst *getCallIfRegularCall( 408 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 409 CallInst *CI = dyn_cast<CallInst>(&V); 410 if (CI && !CI->hasOperandBundles() && 411 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 412 return CI; 413 return nullptr; 414 } 415 416 private: 417 /// Try to delete parallel regions if possible. 418 bool deleteParallelRegions() { 419 const unsigned CallbackCalleeOperand = 2; 420 421 OMPInformationCache::RuntimeFunctionInfo &RFI = 422 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 423 424 if (!RFI.Declaration) 425 return false; 426 427 bool Changed = false; 428 auto DeleteCallCB = [&](Use &U, Function &) { 429 CallInst *CI = getCallIfRegularCall(U); 430 if (!CI) 431 return false; 432 auto *Fn = dyn_cast<Function>( 433 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 434 if (!Fn) 435 return false; 436 if (!Fn->onlyReadsMemory()) 437 return false; 438 if (!Fn->hasFnAttribute(Attribute::WillReturn)) 439 return false; 440 441 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 442 << CI->getCaller()->getName() << "\n"); 443 444 auto Remark = [&](OptimizationRemark OR) { 445 return OR << "Parallel region in " 446 << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 447 << " deleted"; 448 }; 449 emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 450 Remark); 451 452 CGUpdater.removeCallSite(*CI); 453 CI->eraseFromParent(); 454 Changed = true; 455 ++NumOpenMPParallelRegionsDeleted; 456 return true; 457 }; 458 459 RFI.foreachUse(DeleteCallCB); 460 461 return Changed; 462 } 463 464 /// Try to eliminiate runtime calls by reusing existing ones. 465 bool deduplicateRuntimeCalls() { 466 bool Changed = false; 467 468 RuntimeFunction DeduplicableRuntimeCallIDs[] = { 469 OMPRTL_omp_get_num_threads, 470 OMPRTL_omp_in_parallel, 471 OMPRTL_omp_get_cancellation, 472 OMPRTL_omp_get_thread_limit, 473 OMPRTL_omp_get_supported_active_levels, 474 OMPRTL_omp_get_level, 475 OMPRTL_omp_get_ancestor_thread_num, 476 OMPRTL_omp_get_team_size, 477 OMPRTL_omp_get_active_level, 478 OMPRTL_omp_in_final, 479 OMPRTL_omp_get_proc_bind, 480 OMPRTL_omp_get_num_places, 481 OMPRTL_omp_get_num_procs, 482 OMPRTL_omp_get_place_num, 483 OMPRTL_omp_get_partition_num_places, 484 OMPRTL_omp_get_partition_place_nums}; 485 486 // Global-tid is handled separately. 487 SmallSetVector<Value *, 16> GTIdArgs; 488 collectGlobalThreadIdArguments(GTIdArgs); 489 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 490 << " global thread ID arguments\n"); 491 492 for (Function *F : SCC) { 493 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 494 deduplicateRuntimeCalls(*F, 495 OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 496 497 // __kmpc_global_thread_num is special as we can replace it with an 498 // argument in enough cases to make it worth trying. 499 Value *GTIdArg = nullptr; 500 for (Argument &Arg : F->args()) 501 if (GTIdArgs.count(&Arg)) { 502 GTIdArg = &Arg; 503 break; 504 } 505 Changed |= deduplicateRuntimeCalls( 506 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 507 } 508 509 return Changed; 510 } 511 512 static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 513 bool GlobalOnly, bool &SingleChoice) { 514 if (CurrentIdent == NextIdent) 515 return CurrentIdent; 516 517 // TODO: Figure out how to actually combine multiple debug locations. For 518 // now we just keep an existing one if there is a single choice. 519 if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 520 SingleChoice = !CurrentIdent; 521 return NextIdent; 522 } 523 return nullptr; 524 } 525 526 /// Return an `struct ident_t*` value that represents the ones used in the 527 /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 528 /// return a local `struct ident_t*`. For now, if we cannot find a suitable 529 /// return value we create one from scratch. We also do not yet combine 530 /// information, e.g., the source locations, see combinedIdentStruct. 531 Value * 532 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 533 Function &F, bool GlobalOnly) { 534 bool SingleChoice = true; 535 Value *Ident = nullptr; 536 auto CombineIdentStruct = [&](Use &U, Function &Caller) { 537 CallInst *CI = getCallIfRegularCall(U, &RFI); 538 if (!CI || &F != &Caller) 539 return false; 540 Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 541 /* GlobalOnly */ true, SingleChoice); 542 return false; 543 }; 544 RFI.foreachUse(CombineIdentStruct); 545 546 if (!Ident || !SingleChoice) { 547 // The IRBuilder uses the insertion block to get to the module, this is 548 // unfortunate but we work around it for now. 549 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 550 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 551 &F.getEntryBlock(), F.getEntryBlock().begin())); 552 // Create a fallback location if non was found. 553 // TODO: Use the debug locations of the calls instead. 554 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 555 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 556 } 557 return Ident; 558 } 559 560 /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or 561 /// \p ReplVal if given. 562 bool deduplicateRuntimeCalls(Function &F, 563 OMPInformationCache::RuntimeFunctionInfo &RFI, 564 Value *ReplVal = nullptr) { 565 auto *UV = RFI.getUseVector(F); 566 if (!UV || UV->size() + (ReplVal != nullptr) < 2) 567 return false; 568 569 LLVM_DEBUG( 570 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 571 << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 572 573 assert((!ReplVal || (isa<Argument>(ReplVal) && 574 cast<Argument>(ReplVal)->getParent() == &F)) && 575 "Unexpected replacement value!"); 576 577 // TODO: Use dominance to find a good position instead. 578 auto CanBeMoved = [this](CallBase &CB) { 579 unsigned NumArgs = CB.getNumArgOperands(); 580 if (NumArgs == 0) 581 return true; 582 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 583 return false; 584 for (unsigned u = 1; u < NumArgs; ++u) 585 if (isa<Instruction>(CB.getArgOperand(u))) 586 return false; 587 return true; 588 }; 589 590 if (!ReplVal) { 591 for (Use *U : *UV) 592 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 593 if (!CanBeMoved(*CI)) 594 continue; 595 596 auto Remark = [&](OptimizationRemark OR) { 597 auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); 598 return OR << "OpenMP runtime call " 599 << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " 600 << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); 601 }; 602 emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark); 603 604 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 605 ReplVal = CI; 606 break; 607 } 608 if (!ReplVal) 609 return false; 610 } 611 612 // If we use a call as a replacement value we need to make sure the ident is 613 // valid at the new location. For now we just pick a global one, either 614 // existing and used by one of the calls, or created from scratch. 615 if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 616 if (CI->getNumArgOperands() > 0 && 617 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 618 Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 619 /* GlobalOnly */ true); 620 CI->setArgOperand(0, Ident); 621 } 622 } 623 624 bool Changed = false; 625 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 626 CallInst *CI = getCallIfRegularCall(U, &RFI); 627 if (!CI || CI == ReplVal || &F != &Caller) 628 return false; 629 assert(CI->getCaller() == &F && "Unexpected call!"); 630 631 auto Remark = [&](OptimizationRemark OR) { 632 return OR << "OpenMP runtime call " 633 << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 634 }; 635 emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark); 636 637 CGUpdater.removeCallSite(*CI); 638 CI->replaceAllUsesWith(ReplVal); 639 CI->eraseFromParent(); 640 ++NumOpenMPRuntimeCallsDeduplicated; 641 Changed = true; 642 return true; 643 }; 644 RFI.foreachUse(ReplaceAndDeleteCB); 645 646 return Changed; 647 } 648 649 /// Collect arguments that represent the global thread id in \p GTIdArgs. 650 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 651 // TODO: Below we basically perform a fixpoint iteration with a pessimistic 652 // initialization. We could define an AbstractAttribute instead and 653 // run the Attributor here once it can be run as an SCC pass. 654 655 // Helper to check the argument \p ArgNo at all call sites of \p F for 656 // a GTId. 657 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 658 if (!F.hasLocalLinkage()) 659 return false; 660 for (Use &U : F.uses()) { 661 if (CallInst *CI = getCallIfRegularCall(U)) { 662 Value *ArgOp = CI->getArgOperand(ArgNo); 663 if (CI == &RefCI || GTIdArgs.count(ArgOp) || 664 getCallIfRegularCall( 665 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 666 continue; 667 } 668 return false; 669 } 670 return true; 671 }; 672 673 // Helper to identify uses of a GTId as GTId arguments. 674 auto AddUserArgs = [&](Value >Id) { 675 for (Use &U : GTId.uses()) 676 if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 677 if (CI->isArgOperand(&U)) 678 if (Function *Callee = CI->getCalledFunction()) 679 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 680 GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 681 }; 682 683 // The argument users of __kmpc_global_thread_num calls are GTIds. 684 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 685 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 686 687 GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { 688 if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 689 AddUserArgs(*CI); 690 return false; 691 }); 692 693 // Transitively search for more arguments by looking at the users of the 694 // ones we know already. During the search the GTIdArgs vector is extended 695 // so we cannot cache the size nor can we use a range based for. 696 for (unsigned u = 0; u < GTIdArgs.size(); ++u) 697 AddUserArgs(*GTIdArgs[u]); 698 } 699 700 /// Emit a remark generically 701 /// 702 /// This template function can be used to generically emit a remark. The 703 /// RemarkKind should be one of the following: 704 /// - OptimizationRemark to indicate a successful optimization attempt 705 /// - OptimizationRemarkMissed to report a failed optimization attempt 706 /// - OptimizationRemarkAnalysis to provide additional information about an 707 /// optimization attempt 708 /// 709 /// The remark is built using a callback function provided by the caller that 710 /// takes a RemarkKind as input and returns a RemarkKind. 711 template <typename RemarkKind, 712 typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>> 713 void emitRemark(Instruction *Inst, StringRef RemarkName, 714 RemarkCallBack &&RemarkCB) { 715 Function *F = Inst->getParent()->getParent(); 716 auto &ORE = OREGetter(F); 717 718 ORE.emit( 719 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); 720 } 721 722 /// Emit a remark on a function. Since only OptimizationRemark is supporting 723 /// this, it can't be made generic. 724 void emitRemarkOnFunction( 725 Function *F, StringRef RemarkName, 726 function_ref<OptimizationRemark(OptimizationRemark &&)> &&RemarkCB) { 727 auto &ORE = OREGetter(F); 728 729 ORE.emit([&]() { 730 return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); 731 }); 732 } 733 734 /// The underyling module. 735 Module &M; 736 737 /// The SCC we are operating on. 738 SmallVectorImpl<Function *> &SCC; 739 740 /// Callback to update the call graph, the first argument is a removed call, 741 /// the second an optional replacement call. 742 CallGraphUpdater &CGUpdater; 743 744 /// Callback to get an OptimizationRemarkEmitter from a Function * 745 OptimizationRemarkGetter OREGetter; 746 747 /// OpenMP-specific information cache. Also Used for Attributor runs. 748 OMPInformationCache &OMPInfoCache; 749 }; 750 } // namespace 751 752 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 753 CGSCCAnalysisManager &AM, 754 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 755 if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 756 return PreservedAnalyses::all(); 757 758 if (DisableOpenMPOptimizations) 759 return PreservedAnalyses::all(); 760 761 SmallPtrSet<Function *, 16> ModuleSlice; 762 SmallVector<Function *, 16> SCC; 763 for (LazyCallGraph::Node &N : C) { 764 SCC.push_back(&N.getFunction()); 765 ModuleSlice.insert(SCC.back()); 766 } 767 768 if (SCC.empty()) 769 return PreservedAnalyses::all(); 770 771 FunctionAnalysisManager &FAM = 772 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 773 774 AnalysisGetter AG(FAM); 775 776 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 777 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 778 }; 779 780 CallGraphUpdater CGUpdater; 781 CGUpdater.initialize(CG, C, AM, UR); 782 783 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 784 BumpPtrAllocator Allocator; 785 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 786 /*CGSCC*/ &Functions, ModuleSlice); 787 788 // TODO: Compute the module slice we are allowed to look at. 789 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache); 790 bool Changed = OMPOpt.run(); 791 (void)Changed; 792 return PreservedAnalyses::all(); 793 } 794 795 namespace { 796 797 struct OpenMPOptLegacyPass : public CallGraphSCCPass { 798 CallGraphUpdater CGUpdater; 799 OpenMPInModule OMPInModule; 800 static char ID; 801 802 OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 803 initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 804 } 805 806 void getAnalysisUsage(AnalysisUsage &AU) const override { 807 CallGraphSCCPass::getAnalysisUsage(AU); 808 } 809 810 bool doInitialization(CallGraph &CG) override { 811 // Disable the pass if there is no OpenMP (runtime call) in the module. 812 containsOpenMP(CG.getModule(), OMPInModule); 813 return false; 814 } 815 816 bool runOnSCC(CallGraphSCC &CGSCC) override { 817 if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 818 return false; 819 if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 820 return false; 821 822 SmallPtrSet<Function *, 16> ModuleSlice; 823 SmallVector<Function *, 16> SCC; 824 for (CallGraphNode *CGN : CGSCC) 825 if (Function *Fn = CGN->getFunction()) 826 if (!Fn->isDeclaration()) { 827 SCC.push_back(Fn); 828 ModuleSlice.insert(Fn); 829 } 830 831 if (SCC.empty()) 832 return false; 833 834 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 835 CGUpdater.initialize(CG, CGSCC); 836 837 // Maintain a map of functions to avoid rebuilding the ORE 838 DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 839 auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 840 std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 841 if (!ORE) 842 ORE = std::make_unique<OptimizationRemarkEmitter>(F); 843 return *ORE; 844 }; 845 846 AnalysisGetter AG; 847 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 848 BumpPtrAllocator Allocator; 849 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, 850 Allocator, 851 /*CGSCC*/ &Functions, ModuleSlice); 852 853 // TODO: Compute the module slice we are allowed to look at. 854 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache); 855 return OMPOpt.run(); 856 } 857 858 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 859 }; 860 861 } // end anonymous namespace 862 863 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 864 if (OMPInModule.isKnown()) 865 return OMPInModule; 866 867 #define OMP_RTL(_Enum, _Name, ...) \ 868 if (M.getFunction(_Name)) \ 869 return OMPInModule = true; 870 #include "llvm/Frontend/OpenMP/OMPKinds.def" 871 return OMPInModule = false; 872 } 873 874 char OpenMPOptLegacyPass::ID = 0; 875 876 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 877 "OpenMP specific optimizations", false, false) 878 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 879 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 880 "OpenMP specific optimizations", false, false) 881 882 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 883