1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // OpenMP specific optimizations: 10 // 11 // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/OpenMPOpt.h" 16 17 #include "llvm/ADT/EnumeratedArray.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/CallGraph.h" 20 #include "llvm/Analysis/CallGraphSCCPass.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Frontend/OpenMP/OMPConstants.h" 23 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Transforms/IPO.h" 27 #include "llvm/Transforms/IPO/Attributor.h" 28 #include "llvm/Transforms/Utils/CallGraphUpdater.h" 29 30 using namespace llvm; 31 using namespace omp; 32 33 #define DEBUG_TYPE "openmp-opt" 34 35 static cl::opt<bool> DisableOpenMPOptimizations( 36 "openmp-opt-disable", cl::ZeroOrMore, 37 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 38 cl::init(false)); 39 40 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 41 cl::Hidden); 42 static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 43 cl::init(false), cl::Hidden); 44 45 static cl::opt<bool> HideMemoryTransferLatency( 46 "openmp-hide-memory-transfer-latency", 47 cl::desc("[WIP] Tries to hide the latency of host to device memory" 48 " transfers"), 49 cl::Hidden, cl::init(false)); 50 51 52 STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 53 "Number of OpenMP runtime calls deduplicated"); 54 STATISTIC(NumOpenMPParallelRegionsDeleted, 55 "Number of OpenMP parallel regions deleted"); 56 STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 57 "Number of OpenMP runtime functions identified"); 58 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 59 "Number of OpenMP runtime function uses identified"); 60 STATISTIC(NumOpenMPTargetRegionKernels, 61 "Number of OpenMP target region entry points (=kernels) identified"); 62 STATISTIC( 63 NumOpenMPParallelRegionsReplacedInGPUStateMachine, 64 "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 65 66 #if !defined(NDEBUG) 67 static constexpr auto TAG = "[" DEBUG_TYPE "]"; 68 #endif 69 70 /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is 71 /// true, constant expression users are not given to \p CB but their uses are 72 /// traversed transitively. 73 template <typename CBTy> 74 static void foreachUse(Function &F, CBTy CB, 75 bool LookThroughConstantExprUses = true) { 76 SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); 77 78 for (unsigned idx = 0; idx < Worklist.size(); ++idx) { 79 Use &U = *Worklist[idx]; 80 81 // Allow use in constant bitcasts and simply look through them. 82 if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { 83 for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) 84 Worklist.push_back(&CEU); 85 continue; 86 } 87 88 CB(U); 89 } 90 } 91 92 namespace { 93 94 struct AAICVTracker; 95 96 /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 97 /// Attributor runs. 98 struct OMPInformationCache : public InformationCache { 99 OMPInformationCache(Module &M, AnalysisGetter &AG, 100 BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, 101 SmallPtrSetImpl<Kernel> &Kernels) 102 : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), 103 Kernels(Kernels) { 104 initializeModuleSlice(CGSCC); 105 106 OMPBuilder.initialize(); 107 initializeRuntimeFunctions(); 108 initializeInternalControlVars(); 109 } 110 111 /// Generic information that describes an internal control variable. 112 struct InternalControlVarInfo { 113 /// The kind, as described by InternalControlVar enum. 114 InternalControlVar Kind; 115 116 /// The name of the ICV. 117 StringRef Name; 118 119 /// Environment variable associated with this ICV. 120 StringRef EnvVarName; 121 122 /// Initial value kind. 123 ICVInitValue InitKind; 124 125 /// Initial value. 126 ConstantInt *InitValue; 127 128 /// Setter RTL function associated with this ICV. 129 RuntimeFunction Setter; 130 131 /// Getter RTL function associated with this ICV. 132 RuntimeFunction Getter; 133 134 /// RTL Function corresponding to the override clause of this ICV 135 RuntimeFunction Clause; 136 }; 137 138 /// Generic information that describes a runtime function 139 struct RuntimeFunctionInfo { 140 141 /// The kind, as described by the RuntimeFunction enum. 142 RuntimeFunction Kind; 143 144 /// The name of the function. 145 StringRef Name; 146 147 /// Flag to indicate a variadic function. 148 bool IsVarArg; 149 150 /// The return type of the function. 151 Type *ReturnType; 152 153 /// The argument types of the function. 154 SmallVector<Type *, 8> ArgumentTypes; 155 156 /// The declaration if available. 157 Function *Declaration = nullptr; 158 159 /// Uses of this runtime function per function containing the use. 160 using UseVector = SmallVector<Use *, 16>; 161 162 /// Clear UsesMap for runtime function. 163 void clearUsesMap() { UsesMap.clear(); } 164 165 /// Boolean conversion that is true if the runtime function was found. 166 operator bool() const { return Declaration; } 167 168 /// Return the vector of uses in function \p F. 169 UseVector &getOrCreateUseVector(Function *F) { 170 std::shared_ptr<UseVector> &UV = UsesMap[F]; 171 if (!UV) 172 UV = std::make_shared<UseVector>(); 173 return *UV; 174 } 175 176 /// Return the vector of uses in function \p F or `nullptr` if there are 177 /// none. 178 const UseVector *getUseVector(Function &F) const { 179 auto I = UsesMap.find(&F); 180 if (I != UsesMap.end()) 181 return I->second.get(); 182 return nullptr; 183 } 184 185 /// Return how many functions contain uses of this runtime function. 186 size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 187 188 /// Return the number of arguments (or the minimal number for variadic 189 /// functions). 190 size_t getNumArgs() const { return ArgumentTypes.size(); } 191 192 /// Run the callback \p CB on each use and forget the use if the result is 193 /// true. The callback will be fed the function in which the use was 194 /// encountered as second argument. 195 void foreachUse(SmallVectorImpl<Function *> &SCC, 196 function_ref<bool(Use &, Function &)> CB) { 197 for (Function *F : SCC) 198 foreachUse(CB, F); 199 } 200 201 /// Run the callback \p CB on each use within the function \p F and forget 202 /// the use if the result is true. 203 void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 204 SmallVector<unsigned, 8> ToBeDeleted; 205 ToBeDeleted.clear(); 206 207 unsigned Idx = 0; 208 UseVector &UV = getOrCreateUseVector(F); 209 210 for (Use *U : UV) { 211 if (CB(*U, *F)) 212 ToBeDeleted.push_back(Idx); 213 ++Idx; 214 } 215 216 // Remove the to-be-deleted indices in reverse order as prior 217 // modifications will not modify the smaller indices. 218 while (!ToBeDeleted.empty()) { 219 unsigned Idx = ToBeDeleted.pop_back_val(); 220 UV[Idx] = UV.back(); 221 UV.pop_back(); 222 } 223 } 224 225 private: 226 /// Map from functions to all uses of this runtime function contained in 227 /// them. 228 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 229 }; 230 231 /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains 232 /// (a subset of) all functions that we can look at during this SCC traversal. 233 /// This includes functions (transitively) called from the SCC and the 234 /// (transitive) callers of SCC functions. We also can look at a function if 235 /// there is a "reference edge", i.a., if the function somehow uses (!=calls) 236 /// a function in the SCC or a caller of a function in the SCC. 237 void initializeModuleSlice(SetVector<Function *> &SCC) { 238 ModuleSlice.insert(SCC.begin(), SCC.end()); 239 240 SmallPtrSet<Function *, 16> Seen; 241 SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); 242 while (!Worklist.empty()) { 243 Function *F = Worklist.pop_back_val(); 244 ModuleSlice.insert(F); 245 246 for (Instruction &I : instructions(*F)) 247 if (auto *CB = dyn_cast<CallBase>(&I)) 248 if (Function *Callee = CB->getCalledFunction()) 249 if (Seen.insert(Callee).second) 250 Worklist.push_back(Callee); 251 } 252 253 Seen.clear(); 254 Worklist.append(SCC.begin(), SCC.end()); 255 while (!Worklist.empty()) { 256 Function *F = Worklist.pop_back_val(); 257 ModuleSlice.insert(F); 258 259 // Traverse all transitive uses. 260 foreachUse(*F, [&](Use &U) { 261 if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) 262 if (Seen.insert(UsrI->getFunction()).second) 263 Worklist.push_back(UsrI->getFunction()); 264 }); 265 } 266 } 267 268 /// The slice of the module we are allowed to look at. 269 SmallPtrSet<Function *, 8> ModuleSlice; 270 271 /// An OpenMP-IR-Builder instance 272 OpenMPIRBuilder OMPBuilder; 273 274 /// Map from runtime function kind to the runtime function description. 275 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 276 RuntimeFunction::OMPRTL___last> 277 RFIs; 278 279 /// Map from ICV kind to the ICV description. 280 EnumeratedArray<InternalControlVarInfo, InternalControlVar, 281 InternalControlVar::ICV___last> 282 ICVs; 283 284 /// Helper to initialize all internal control variable information for those 285 /// defined in OMPKinds.def. 286 void initializeInternalControlVars() { 287 #define ICV_RT_SET(_Name, RTL) \ 288 { \ 289 auto &ICV = ICVs[_Name]; \ 290 ICV.Setter = RTL; \ 291 } 292 #define ICV_RT_GET(Name, RTL) \ 293 { \ 294 auto &ICV = ICVs[Name]; \ 295 ICV.Getter = RTL; \ 296 } 297 #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 298 { \ 299 auto &ICV = ICVs[Enum]; \ 300 ICV.Name = _Name; \ 301 ICV.Kind = Enum; \ 302 ICV.InitKind = Init; \ 303 ICV.EnvVarName = _EnvVarName; \ 304 switch (ICV.InitKind) { \ 305 case ICV_IMPLEMENTATION_DEFINED: \ 306 ICV.InitValue = nullptr; \ 307 break; \ 308 case ICV_ZERO: \ 309 ICV.InitValue = ConstantInt::get( \ 310 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 311 break; \ 312 case ICV_FALSE: \ 313 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 314 break; \ 315 case ICV_LAST: \ 316 break; \ 317 } \ 318 } 319 #include "llvm/Frontend/OpenMP/OMPKinds.def" 320 } 321 322 /// Returns true if the function declaration \p F matches the runtime 323 /// function types, that is, return type \p RTFRetType, and argument types 324 /// \p RTFArgTypes. 325 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 326 SmallVector<Type *, 8> &RTFArgTypes) { 327 // TODO: We should output information to the user (under debug output 328 // and via remarks). 329 330 if (!F) 331 return false; 332 if (F->getReturnType() != RTFRetType) 333 return false; 334 if (F->arg_size() != RTFArgTypes.size()) 335 return false; 336 337 auto RTFTyIt = RTFArgTypes.begin(); 338 for (Argument &Arg : F->args()) { 339 if (Arg.getType() != *RTFTyIt) 340 return false; 341 342 ++RTFTyIt; 343 } 344 345 return true; 346 } 347 348 // Helper to collect all uses of the declaration in the UsesMap. 349 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 350 unsigned NumUses = 0; 351 if (!RFI.Declaration) 352 return NumUses; 353 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 354 355 if (CollectStats) { 356 NumOpenMPRuntimeFunctionsIdentified += 1; 357 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 358 } 359 360 // TODO: We directly convert uses into proper calls and unknown uses. 361 for (Use &U : RFI.Declaration->uses()) { 362 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 363 if (ModuleSlice.count(UserI->getFunction())) { 364 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 365 ++NumUses; 366 } 367 } else { 368 RFI.getOrCreateUseVector(nullptr).push_back(&U); 369 ++NumUses; 370 } 371 } 372 return NumUses; 373 } 374 375 // Helper function to recollect uses of all runtime functions. 376 void recollectUses() { 377 for (int Idx = 0; Idx < RFIs.size(); ++Idx) { 378 auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)]; 379 RFI.clearUsesMap(); 380 collectUses(RFI, /*CollectStats*/ false); 381 } 382 } 383 384 /// Helper to initialize all runtime function information for those defined 385 /// in OpenMPKinds.def. 386 void initializeRuntimeFunctions() { 387 Module &M = *((*ModuleSlice.begin())->getParent()); 388 389 // Helper macros for handling __VA_ARGS__ in OMP_RTL 390 #define OMP_TYPE(VarName, ...) \ 391 Type *VarName = OMPBuilder.VarName; \ 392 (void)VarName; 393 394 #define OMP_ARRAY_TYPE(VarName, ...) \ 395 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 396 (void)VarName##Ty; \ 397 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 398 (void)VarName##PtrTy; 399 400 #define OMP_FUNCTION_TYPE(VarName, ...) \ 401 FunctionType *VarName = OMPBuilder.VarName; \ 402 (void)VarName; \ 403 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 404 (void)VarName##Ptr; 405 406 #define OMP_STRUCT_TYPE(VarName, ...) \ 407 StructType *VarName = OMPBuilder.VarName; \ 408 (void)VarName; \ 409 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 410 (void)VarName##Ptr; 411 412 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 413 { \ 414 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 415 Function *F = M.getFunction(_Name); \ 416 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 417 auto &RFI = RFIs[_Enum]; \ 418 RFI.Kind = _Enum; \ 419 RFI.Name = _Name; \ 420 RFI.IsVarArg = _IsVarArg; \ 421 RFI.ReturnType = OMPBuilder._ReturnType; \ 422 RFI.ArgumentTypes = std::move(ArgsTypes); \ 423 RFI.Declaration = F; \ 424 unsigned NumUses = collectUses(RFI); \ 425 (void)NumUses; \ 426 LLVM_DEBUG({ \ 427 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 428 << " found\n"; \ 429 if (RFI.Declaration) \ 430 dbgs() << TAG << "-> got " << NumUses << " uses in " \ 431 << RFI.getNumFunctionsWithUses() \ 432 << " different functions.\n"; \ 433 }); \ 434 } \ 435 } 436 #include "llvm/Frontend/OpenMP/OMPKinds.def" 437 438 // TODO: We should attach the attributes defined in OMPKinds.def. 439 } 440 441 /// Collection of known kernels (\see Kernel) in the module. 442 SmallPtrSetImpl<Kernel> &Kernels; 443 }; 444 445 struct OpenMPOpt { 446 447 using OptimizationRemarkGetter = 448 function_ref<OptimizationRemarkEmitter &(Function *)>; 449 450 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 451 OptimizationRemarkGetter OREGetter, 452 OMPInformationCache &OMPInfoCache, Attributor &A) 453 : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 454 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 455 456 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 457 bool run() { 458 if (SCC.empty()) 459 return false; 460 461 bool Changed = false; 462 463 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 464 << " functions in a slice with " 465 << OMPInfoCache.ModuleSlice.size() << " functions\n"); 466 467 if (PrintICVValues) 468 printICVs(); 469 if (PrintOpenMPKernels) 470 printKernels(); 471 472 Changed |= rewriteDeviceCodeStateMachine(); 473 474 Changed |= runAttributor(); 475 476 // Recollect uses, in case Attributor deleted any. 477 OMPInfoCache.recollectUses(); 478 479 Changed |= deduplicateRuntimeCalls(); 480 Changed |= deleteParallelRegions(); 481 if (HideMemoryTransferLatency) 482 Changed |= hideMemTransfersLatency(); 483 484 return Changed; 485 } 486 487 /// Print initial ICV values for testing. 488 /// FIXME: This should be done from the Attributor once it is added. 489 void printICVs() const { 490 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; 491 492 for (Function *F : OMPInfoCache.ModuleSlice) { 493 for (auto ICV : ICVs) { 494 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 495 auto Remark = [&](OptimizationRemark OR) { 496 return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 497 << " Value: " 498 << (ICVInfo.InitValue 499 ? ICVInfo.InitValue->getValue().toString(10, true) 500 : "IMPLEMENTATION_DEFINED"); 501 }; 502 503 emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); 504 } 505 } 506 } 507 508 /// Print OpenMP GPU kernels for testing. 509 void printKernels() const { 510 for (Function *F : SCC) { 511 if (!OMPInfoCache.Kernels.count(F)) 512 continue; 513 514 auto Remark = [&](OptimizationRemark OR) { 515 return OR << "OpenMP GPU kernel " 516 << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 517 }; 518 519 emitRemarkOnFunction(F, "OpenMPGPU", Remark); 520 } 521 } 522 523 /// Return the call if \p U is a callee use in a regular call. If \p RFI is 524 /// given it has to be the callee or a nullptr is returned. 525 static CallInst *getCallIfRegularCall( 526 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 527 CallInst *CI = dyn_cast<CallInst>(U.getUser()); 528 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 529 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 530 return CI; 531 return nullptr; 532 } 533 534 /// Return the call if \p V is a regular call. If \p RFI is given it has to be 535 /// the callee or a nullptr is returned. 536 static CallInst *getCallIfRegularCall( 537 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 538 CallInst *CI = dyn_cast<CallInst>(&V); 539 if (CI && !CI->hasOperandBundles() && 540 (!RFI || CI->getCalledFunction() == RFI->Declaration)) 541 return CI; 542 return nullptr; 543 } 544 545 private: 546 /// Try to delete parallel regions if possible. 547 bool deleteParallelRegions() { 548 const unsigned CallbackCalleeOperand = 2; 549 550 OMPInformationCache::RuntimeFunctionInfo &RFI = 551 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 552 553 if (!RFI.Declaration) 554 return false; 555 556 bool Changed = false; 557 auto DeleteCallCB = [&](Use &U, Function &) { 558 CallInst *CI = getCallIfRegularCall(U); 559 if (!CI) 560 return false; 561 auto *Fn = dyn_cast<Function>( 562 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 563 if (!Fn) 564 return false; 565 if (!Fn->onlyReadsMemory()) 566 return false; 567 if (!Fn->hasFnAttribute(Attribute::WillReturn)) 568 return false; 569 570 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 571 << CI->getCaller()->getName() << "\n"); 572 573 auto Remark = [&](OptimizationRemark OR) { 574 return OR << "Parallel region in " 575 << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 576 << " deleted"; 577 }; 578 emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 579 Remark); 580 581 CGUpdater.removeCallSite(*CI); 582 CI->eraseFromParent(); 583 Changed = true; 584 ++NumOpenMPParallelRegionsDeleted; 585 return true; 586 }; 587 588 RFI.foreachUse(SCC, DeleteCallCB); 589 590 return Changed; 591 } 592 593 /// Try to eliminate runtime calls by reusing existing ones. 594 bool deduplicateRuntimeCalls() { 595 bool Changed = false; 596 597 RuntimeFunction DeduplicableRuntimeCallIDs[] = { 598 OMPRTL_omp_get_num_threads, 599 OMPRTL_omp_in_parallel, 600 OMPRTL_omp_get_cancellation, 601 OMPRTL_omp_get_thread_limit, 602 OMPRTL_omp_get_supported_active_levels, 603 OMPRTL_omp_get_level, 604 OMPRTL_omp_get_ancestor_thread_num, 605 OMPRTL_omp_get_team_size, 606 OMPRTL_omp_get_active_level, 607 OMPRTL_omp_in_final, 608 OMPRTL_omp_get_proc_bind, 609 OMPRTL_omp_get_num_places, 610 OMPRTL_omp_get_num_procs, 611 OMPRTL_omp_get_place_num, 612 OMPRTL_omp_get_partition_num_places, 613 OMPRTL_omp_get_partition_place_nums}; 614 615 // Global-tid is handled separately. 616 SmallSetVector<Value *, 16> GTIdArgs; 617 collectGlobalThreadIdArguments(GTIdArgs); 618 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 619 << " global thread ID arguments\n"); 620 621 for (Function *F : SCC) { 622 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 623 deduplicateRuntimeCalls(*F, 624 OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 625 626 // __kmpc_global_thread_num is special as we can replace it with an 627 // argument in enough cases to make it worth trying. 628 Value *GTIdArg = nullptr; 629 for (Argument &Arg : F->args()) 630 if (GTIdArgs.count(&Arg)) { 631 GTIdArg = &Arg; 632 break; 633 } 634 Changed |= deduplicateRuntimeCalls( 635 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 636 } 637 638 return Changed; 639 } 640 641 /// Tries to hide the latency of runtime calls that involve host to 642 /// device memory transfers by splitting them into their "issue" and "wait" 643 /// versions. The "issue" is moved upwards as much as possible. The "wait" is 644 /// moved downards as much as possible. The "issue" issues the memory transfer 645 /// asynchronously, returning a handle. The "wait" waits in the returned 646 /// handle for the memory transfer to finish. 647 bool hideMemTransfersLatency() { 648 auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 649 bool Changed = false; 650 auto SplitMemTransfers = [&](Use &U, Function &Decl) { 651 auto *RTCall = getCallIfRegularCall(U, &RFI); 652 if (!RTCall) 653 return false; 654 655 // TODO: Check if can be moved upwards. 656 bool WasSplit = false; 657 Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 658 if (WaitMovementPoint) 659 WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 660 661 Changed |= WasSplit; 662 return WasSplit; 663 }; 664 RFI.foreachUse(SCC, SplitMemTransfers); 665 666 return Changed; 667 } 668 669 /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 670 /// moved. Returns nullptr if the movement is not possible, or not worth it. 671 Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 672 // FIXME: This traverses only the BasicBlock where RuntimeCall is. 673 // Make it traverse the CFG. 674 675 Instruction *CurrentI = &RuntimeCall; 676 bool IsWorthIt = false; 677 while ((CurrentI = CurrentI->getNextNode())) { 678 679 // TODO: Once we detect the regions to be offloaded we should use the 680 // alias analysis manager to check if CurrentI may modify one of 681 // the offloaded regions. 682 if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 683 if (IsWorthIt) 684 return CurrentI; 685 686 return nullptr; 687 } 688 689 // FIXME: For now if we move it over anything without side effect 690 // is worth it. 691 IsWorthIt = true; 692 } 693 694 // Return end of BasicBlock. 695 return RuntimeCall.getParent()->getTerminator(); 696 } 697 698 /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 699 bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 700 Instruction &WaitMovementPoint) { 701 auto &IRBuilder = OMPInfoCache.OMPBuilder; 702 // Add "issue" runtime call declaration: 703 // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 704 // i8**, i8**, i64*, i64*) 705 FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 706 M, OMPRTL___tgt_target_data_begin_mapper_issue); 707 708 // Change RuntimeCall call site for its asynchronous version. 709 SmallVector<Value *, 8> Args; 710 for (auto &Arg : RuntimeCall.args()) 711 Args.push_back(Arg.get()); 712 713 CallInst *IssueCallsite = 714 CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall); 715 RuntimeCall.eraseFromParent(); 716 717 // Add "wait" runtime call declaration: 718 // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 719 FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 720 M, OMPRTL___tgt_target_data_begin_mapper_wait); 721 722 // Add call site to WaitDecl. 723 Value *WaitParams[2] = { 724 IssueCallsite->getArgOperand(0), // device_id. 725 IssueCallsite // returned handle. 726 }; 727 CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); 728 729 return true; 730 } 731 732 static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 733 bool GlobalOnly, bool &SingleChoice) { 734 if (CurrentIdent == NextIdent) 735 return CurrentIdent; 736 737 // TODO: Figure out how to actually combine multiple debug locations. For 738 // now we just keep an existing one if there is a single choice. 739 if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 740 SingleChoice = !CurrentIdent; 741 return NextIdent; 742 } 743 return nullptr; 744 } 745 746 /// Return an `struct ident_t*` value that represents the ones used in the 747 /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 748 /// return a local `struct ident_t*`. For now, if we cannot find a suitable 749 /// return value we create one from scratch. We also do not yet combine 750 /// information, e.g., the source locations, see combinedIdentStruct. 751 Value * 752 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 753 Function &F, bool GlobalOnly) { 754 bool SingleChoice = true; 755 Value *Ident = nullptr; 756 auto CombineIdentStruct = [&](Use &U, Function &Caller) { 757 CallInst *CI = getCallIfRegularCall(U, &RFI); 758 if (!CI || &F != &Caller) 759 return false; 760 Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 761 /* GlobalOnly */ true, SingleChoice); 762 return false; 763 }; 764 RFI.foreachUse(SCC, CombineIdentStruct); 765 766 if (!Ident || !SingleChoice) { 767 // The IRBuilder uses the insertion block to get to the module, this is 768 // unfortunate but we work around it for now. 769 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 770 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 771 &F.getEntryBlock(), F.getEntryBlock().begin())); 772 // Create a fallback location if non was found. 773 // TODO: Use the debug locations of the calls instead. 774 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 775 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 776 } 777 return Ident; 778 } 779 780 /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 781 /// \p ReplVal if given. 782 bool deduplicateRuntimeCalls(Function &F, 783 OMPInformationCache::RuntimeFunctionInfo &RFI, 784 Value *ReplVal = nullptr) { 785 auto *UV = RFI.getUseVector(F); 786 if (!UV || UV->size() + (ReplVal != nullptr) < 2) 787 return false; 788 789 LLVM_DEBUG( 790 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 791 << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 792 793 assert((!ReplVal || (isa<Argument>(ReplVal) && 794 cast<Argument>(ReplVal)->getParent() == &F)) && 795 "Unexpected replacement value!"); 796 797 // TODO: Use dominance to find a good position instead. 798 auto CanBeMoved = [this](CallBase &CB) { 799 unsigned NumArgs = CB.getNumArgOperands(); 800 if (NumArgs == 0) 801 return true; 802 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 803 return false; 804 for (unsigned u = 1; u < NumArgs; ++u) 805 if (isa<Instruction>(CB.getArgOperand(u))) 806 return false; 807 return true; 808 }; 809 810 if (!ReplVal) { 811 for (Use *U : *UV) 812 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 813 if (!CanBeMoved(*CI)) 814 continue; 815 816 auto Remark = [&](OptimizationRemark OR) { 817 auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); 818 return OR << "OpenMP runtime call " 819 << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " 820 << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); 821 }; 822 emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark); 823 824 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 825 ReplVal = CI; 826 break; 827 } 828 if (!ReplVal) 829 return false; 830 } 831 832 // If we use a call as a replacement value we need to make sure the ident is 833 // valid at the new location. For now we just pick a global one, either 834 // existing and used by one of the calls, or created from scratch. 835 if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 836 if (CI->getNumArgOperands() > 0 && 837 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 838 Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 839 /* GlobalOnly */ true); 840 CI->setArgOperand(0, Ident); 841 } 842 } 843 844 bool Changed = false; 845 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 846 CallInst *CI = getCallIfRegularCall(U, &RFI); 847 if (!CI || CI == ReplVal || &F != &Caller) 848 return false; 849 assert(CI->getCaller() == &F && "Unexpected call!"); 850 851 auto Remark = [&](OptimizationRemark OR) { 852 return OR << "OpenMP runtime call " 853 << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 854 }; 855 emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark); 856 857 CGUpdater.removeCallSite(*CI); 858 CI->replaceAllUsesWith(ReplVal); 859 CI->eraseFromParent(); 860 ++NumOpenMPRuntimeCallsDeduplicated; 861 Changed = true; 862 return true; 863 }; 864 RFI.foreachUse(SCC, ReplaceAndDeleteCB); 865 866 return Changed; 867 } 868 869 /// Collect arguments that represent the global thread id in \p GTIdArgs. 870 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 871 // TODO: Below we basically perform a fixpoint iteration with a pessimistic 872 // initialization. We could define an AbstractAttribute instead and 873 // run the Attributor here once it can be run as an SCC pass. 874 875 // Helper to check the argument \p ArgNo at all call sites of \p F for 876 // a GTId. 877 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 878 if (!F.hasLocalLinkage()) 879 return false; 880 for (Use &U : F.uses()) { 881 if (CallInst *CI = getCallIfRegularCall(U)) { 882 Value *ArgOp = CI->getArgOperand(ArgNo); 883 if (CI == &RefCI || GTIdArgs.count(ArgOp) || 884 getCallIfRegularCall( 885 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 886 continue; 887 } 888 return false; 889 } 890 return true; 891 }; 892 893 // Helper to identify uses of a GTId as GTId arguments. 894 auto AddUserArgs = [&](Value >Id) { 895 for (Use &U : GTId.uses()) 896 if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 897 if (CI->isArgOperand(&U)) 898 if (Function *Callee = CI->getCalledFunction()) 899 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 900 GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 901 }; 902 903 // The argument users of __kmpc_global_thread_num calls are GTIds. 904 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 905 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 906 907 GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 908 if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 909 AddUserArgs(*CI); 910 return false; 911 }); 912 913 // Transitively search for more arguments by looking at the users of the 914 // ones we know already. During the search the GTIdArgs vector is extended 915 // so we cannot cache the size nor can we use a range based for. 916 for (unsigned u = 0; u < GTIdArgs.size(); ++u) 917 AddUserArgs(*GTIdArgs[u]); 918 } 919 920 /// Kernel (=GPU) optimizations and utility functions 921 /// 922 ///{{ 923 924 /// Check if \p F is a kernel, hence entry point for target offloading. 925 bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } 926 927 /// Cache to remember the unique kernel for a function. 928 DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; 929 930 /// Find the unique kernel that will execute \p F, if any. 931 Kernel getUniqueKernelFor(Function &F); 932 933 /// Find the unique kernel that will execute \p I, if any. 934 Kernel getUniqueKernelFor(Instruction &I) { 935 return getUniqueKernelFor(*I.getFunction()); 936 } 937 938 /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 939 /// the cases we can avoid taking the address of a function. 940 bool rewriteDeviceCodeStateMachine(); 941 942 /// 943 ///}} 944 945 /// Emit a remark generically 946 /// 947 /// This template function can be used to generically emit a remark. The 948 /// RemarkKind should be one of the following: 949 /// - OptimizationRemark to indicate a successful optimization attempt 950 /// - OptimizationRemarkMissed to report a failed optimization attempt 951 /// - OptimizationRemarkAnalysis to provide additional information about an 952 /// optimization attempt 953 /// 954 /// The remark is built using a callback function provided by the caller that 955 /// takes a RemarkKind as input and returns a RemarkKind. 956 template <typename RemarkKind, 957 typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>> 958 void emitRemark(Instruction *Inst, StringRef RemarkName, 959 RemarkCallBack &&RemarkCB) const { 960 Function *F = Inst->getParent()->getParent(); 961 auto &ORE = OREGetter(F); 962 963 ORE.emit( 964 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); 965 } 966 967 /// Emit a remark on a function. Since only OptimizationRemark is supporting 968 /// this, it can't be made generic. 969 void 970 emitRemarkOnFunction(Function *F, StringRef RemarkName, 971 function_ref<OptimizationRemark(OptimizationRemark &&)> 972 &&RemarkCB) const { 973 auto &ORE = OREGetter(F); 974 975 ORE.emit([&]() { 976 return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); 977 }); 978 } 979 980 /// The underlying module. 981 Module &M; 982 983 /// The SCC we are operating on. 984 SmallVectorImpl<Function *> &SCC; 985 986 /// Callback to update the call graph, the first argument is a removed call, 987 /// the second an optional replacement call. 988 CallGraphUpdater &CGUpdater; 989 990 /// Callback to get an OptimizationRemarkEmitter from a Function * 991 OptimizationRemarkGetter OREGetter; 992 993 /// OpenMP-specific information cache. Also Used for Attributor runs. 994 OMPInformationCache &OMPInfoCache; 995 996 /// Attributor instance. 997 Attributor &A; 998 999 /// Helper function to run Attributor on SCC. 1000 bool runAttributor() { 1001 if (SCC.empty()) 1002 return false; 1003 1004 registerAAs(); 1005 1006 ChangeStatus Changed = A.run(); 1007 1008 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 1009 << " functions, result: " << Changed << ".\n"); 1010 1011 return Changed == ChangeStatus::CHANGED; 1012 } 1013 1014 /// Populate the Attributor with abstract attribute opportunities in the 1015 /// function. 1016 void registerAAs() { 1017 if (SCC.empty()) 1018 return; 1019 1020 // Create CallSite AA for all Getters. 1021 for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 1022 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 1023 1024 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 1025 1026 auto CreateAA = [&](Use &U, Function &Caller) { 1027 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 1028 if (!CI) 1029 return false; 1030 1031 auto &CB = cast<CallBase>(*CI); 1032 1033 IRPosition CBPos = IRPosition::callsite_function(CB); 1034 A.getOrCreateAAFor<AAICVTracker>(CBPos); 1035 return false; 1036 }; 1037 1038 GetterRFI.foreachUse(SCC, CreateAA); 1039 } 1040 } 1041 }; 1042 1043 Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 1044 if (!OMPInfoCache.ModuleSlice.count(&F)) 1045 return nullptr; 1046 1047 // Use a scope to keep the lifetime of the CachedKernel short. 1048 { 1049 Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 1050 if (CachedKernel) 1051 return *CachedKernel; 1052 1053 // TODO: We should use an AA to create an (optimistic and callback 1054 // call-aware) call graph. For now we stick to simple patterns that 1055 // are less powerful, basically the worst fixpoint. 1056 if (isKernel(F)) { 1057 CachedKernel = Kernel(&F); 1058 return *CachedKernel; 1059 } 1060 1061 CachedKernel = nullptr; 1062 if (!F.hasLocalLinkage()) 1063 return nullptr; 1064 } 1065 1066 auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 1067 if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 1068 // Allow use in equality comparisons. 1069 if (Cmp->isEquality()) 1070 return getUniqueKernelFor(*Cmp); 1071 return nullptr; 1072 } 1073 if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 1074 // Allow direct calls. 1075 if (CB->isCallee(&U)) 1076 return getUniqueKernelFor(*CB); 1077 // Allow the use in __kmpc_kernel_prepare_parallel calls. 1078 if (Function *Callee = CB->getCalledFunction()) 1079 if (Callee->getName() == "__kmpc_kernel_prepare_parallel") 1080 return getUniqueKernelFor(*CB); 1081 return nullptr; 1082 } 1083 // Disallow every other use. 1084 return nullptr; 1085 }; 1086 1087 // TODO: In the future we want to track more than just a unique kernel. 1088 SmallPtrSet<Kernel, 2> PotentialKernels; 1089 foreachUse(F, [&](const Use &U) { 1090 PotentialKernels.insert(GetUniqueKernelForUse(U)); 1091 }); 1092 1093 Kernel K = nullptr; 1094 if (PotentialKernels.size() == 1) 1095 K = *PotentialKernels.begin(); 1096 1097 // Cache the result. 1098 UniqueKernelMap[&F] = K; 1099 1100 return K; 1101 } 1102 1103 bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 1104 OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI = 1105 OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel]; 1106 1107 bool Changed = false; 1108 if (!KernelPrepareParallelRFI) 1109 return Changed; 1110 1111 for (Function *F : SCC) { 1112 1113 // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at 1114 // all. 1115 bool UnknownUse = false; 1116 bool KernelPrepareUse = false; 1117 unsigned NumDirectCalls = 0; 1118 1119 SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 1120 foreachUse(*F, [&](Use &U) { 1121 if (auto *CB = dyn_cast<CallBase>(U.getUser())) 1122 if (CB->isCallee(&U)) { 1123 ++NumDirectCalls; 1124 return; 1125 } 1126 1127 if (isa<ICmpInst>(U.getUser())) { 1128 ToBeReplacedStateMachineUses.push_back(&U); 1129 return; 1130 } 1131 if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall( 1132 *U.getUser(), &KernelPrepareParallelRFI)) { 1133 KernelPrepareUse = true; 1134 ToBeReplacedStateMachineUses.push_back(&U); 1135 return; 1136 } 1137 UnknownUse = true; 1138 }); 1139 1140 // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel 1141 // use. 1142 if (!KernelPrepareUse) 1143 continue; 1144 1145 { 1146 auto Remark = [&](OptimizationRemark OR) { 1147 return OR << "Found a parallel region that is called in a target " 1148 "region but not part of a combined target construct nor " 1149 "nesed inside a target construct without intermediate " 1150 "code. This can lead to excessive register usage for " 1151 "unrelated target regions in the same translation unit " 1152 "due to spurious call edges assumed by ptxas."; 1153 }; 1154 emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1155 } 1156 1157 // If this ever hits, we should investigate. 1158 // TODO: Checking the number of uses is not a necessary restriction and 1159 // should be lifted. 1160 if (UnknownUse || NumDirectCalls != 1 || 1161 ToBeReplacedStateMachineUses.size() != 2) { 1162 { 1163 auto Remark = [&](OptimizationRemark OR) { 1164 return OR << "Parallel region is used in " 1165 << (UnknownUse ? "unknown" : "unexpected") 1166 << " ways; will not attempt to rewrite the state machine."; 1167 }; 1168 emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1169 } 1170 continue; 1171 } 1172 1173 // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give 1174 // up if the function is not called from a unique kernel. 1175 Kernel K = getUniqueKernelFor(*F); 1176 if (!K) { 1177 { 1178 auto Remark = [&](OptimizationRemark OR) { 1179 return OR << "Parallel region is not known to be called from a " 1180 "unique single target region, maybe the surrounding " 1181 "function has external linkage?; will not attempt to " 1182 "rewrite the state machine use."; 1183 }; 1184 emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", 1185 Remark); 1186 } 1187 continue; 1188 } 1189 1190 // We now know F is a parallel body function called only from the kernel K. 1191 // We also identified the state machine uses in which we replace the 1192 // function pointer by a new global symbol for identification purposes. This 1193 // ensures only direct calls to the function are left. 1194 1195 { 1196 auto RemarkParalleRegion = [&](OptimizationRemark OR) { 1197 return OR << "Specialize parallel region that is only reached from a " 1198 "single target region to avoid spurious call edges and " 1199 "excessive register usage in other target regions. " 1200 "(parallel region ID: " 1201 << ore::NV("OpenMPParallelRegion", F->getName()) 1202 << ", kernel ID: " 1203 << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1204 }; 1205 emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", 1206 RemarkParalleRegion); 1207 auto RemarkKernel = [&](OptimizationRemark OR) { 1208 return OR << "Target region containing the parallel region that is " 1209 "specialized. (parallel region ID: " 1210 << ore::NV("OpenMPParallelRegion", F->getName()) 1211 << ", kernel ID: " 1212 << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1213 }; 1214 emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); 1215 } 1216 1217 Module &M = *F->getParent(); 1218 Type *Int8Ty = Type::getInt8Ty(M.getContext()); 1219 1220 auto *ID = new GlobalVariable( 1221 M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 1222 UndefValue::get(Int8Ty), F->getName() + ".ID"); 1223 1224 for (Use *U : ToBeReplacedStateMachineUses) 1225 U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); 1226 1227 ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 1228 1229 Changed = true; 1230 } 1231 1232 return Changed; 1233 } 1234 1235 /// Abstract Attribute for tracking ICV values. 1236 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 1237 using Base = StateWrapper<BooleanState, AbstractAttribute>; 1238 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 1239 1240 void initialize(Attributor &A) override { 1241 Function *F = getAnchorScope(); 1242 if (!F || !A.isFunctionIPOAmendable(*F)) 1243 indicatePessimisticFixpoint(); 1244 } 1245 1246 /// Returns true if value is assumed to be tracked. 1247 bool isAssumedTracked() const { return getAssumed(); } 1248 1249 /// Returns true if value is known to be tracked. 1250 bool isKnownTracked() const { return getAssumed(); } 1251 1252 /// Create an abstract attribute biew for the position \p IRP. 1253 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 1254 1255 /// Return the value with which \p I can be replaced for specific \p ICV. 1256 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, 1257 const Instruction *I, 1258 Attributor &A) const { 1259 return None; 1260 } 1261 1262 /// Return an assumed unique ICV value if a single candidate is found. If 1263 /// there cannot be one, return a nullptr. If it is not clear yet, return the 1264 /// Optional::NoneType. 1265 virtual Optional<Value *> 1266 getUniqueReplacementValue(InternalControlVar ICV) const = 0; 1267 1268 // Currently only nthreads is being tracked. 1269 // this array will only grow with time. 1270 InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 1271 1272 /// See AbstractAttribute::getName() 1273 const std::string getName() const override { return "AAICVTracker"; } 1274 1275 /// See AbstractAttribute::getIdAddr() 1276 const char *getIdAddr() const override { return &ID; } 1277 1278 /// This function should return true if the type of the \p AA is AAICVTracker 1279 static bool classof(const AbstractAttribute *AA) { 1280 return (AA->getIdAddr() == &ID); 1281 } 1282 1283 static const char ID; 1284 }; 1285 1286 struct AAICVTrackerFunction : public AAICVTracker { 1287 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 1288 : AAICVTracker(IRP, A) {} 1289 1290 // FIXME: come up with better string. 1291 const std::string getAsStr() const override { return "ICVTrackerFunction"; } 1292 1293 // FIXME: come up with some stats. 1294 void trackStatistics() const override {} 1295 1296 /// We don't manifest anything for this AA. 1297 ChangeStatus manifest(Attributor &A) override { 1298 return ChangeStatus::UNCHANGED; 1299 } 1300 1301 // Map of ICV to their values at specific program point. 1302 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 1303 InternalControlVar::ICV___last> 1304 ICVReplacementValuesMap; 1305 1306 ChangeStatus updateImpl(Attributor &A) override { 1307 ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 1308 1309 Function *F = getAnchorScope(); 1310 1311 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1312 1313 for (InternalControlVar ICV : TrackableICVs) { 1314 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1315 1316 auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1317 auto TrackValues = [&](Use &U, Function &) { 1318 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 1319 if (!CI) 1320 return false; 1321 1322 // FIXME: handle setters with more that 1 arguments. 1323 /// Track new value. 1324 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 1325 HasChanged = ChangeStatus::CHANGED; 1326 1327 return false; 1328 }; 1329 1330 auto CallCheck = [&](Instruction &I) { 1331 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); 1332 if (ReplVal.hasValue() && 1333 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 1334 HasChanged = ChangeStatus::CHANGED; 1335 1336 return true; 1337 }; 1338 1339 // Track all changes of an ICV. 1340 SetterRFI.foreachUse(TrackValues, F); 1341 1342 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 1343 /* CheckBBLivenessOnly */ true); 1344 1345 /// TODO: Figure out a way to avoid adding entry in 1346 /// ICVReplacementValuesMap 1347 Instruction *Entry = &F->getEntryBlock().front(); 1348 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 1349 ValuesMap.insert(std::make_pair(Entry, nullptr)); 1350 } 1351 1352 return HasChanged; 1353 } 1354 1355 /// Hepler to check if \p I is a call and get the value for it if it is 1356 /// unique. 1357 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, 1358 InternalControlVar &ICV) const { 1359 1360 const auto *CB = dyn_cast<CallBase>(I); 1361 if (!CB) 1362 return None; 1363 1364 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1365 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 1366 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1367 Function *CalledFunction = CB->getCalledFunction(); 1368 1369 if (CalledFunction == GetterRFI.Declaration) 1370 return None; 1371 if (CalledFunction == SetterRFI.Declaration) { 1372 if (ICVReplacementValuesMap[ICV].count(I)) 1373 return ICVReplacementValuesMap[ICV].lookup(I); 1374 1375 return nullptr; 1376 } 1377 1378 // Since we don't know, assume it changes the ICV. 1379 if (CalledFunction->isDeclaration()) 1380 return nullptr; 1381 1382 const auto &ICVTrackingAA = 1383 A.getAAFor<AAICVTracker>(*this, IRPosition::callsite_returned(*CB)); 1384 1385 if (ICVTrackingAA.isAssumedTracked()) 1386 return ICVTrackingAA.getUniqueReplacementValue(ICV); 1387 1388 // If we don't know, assume it changes. 1389 return nullptr; 1390 } 1391 1392 // We don't check unique value for a function, so return None. 1393 Optional<Value *> 1394 getUniqueReplacementValue(InternalControlVar ICV) const override { 1395 return None; 1396 } 1397 1398 /// Return the value with which \p I can be replaced for specific \p ICV. 1399 Optional<Value *> getReplacementValue(InternalControlVar ICV, 1400 const Instruction *I, 1401 Attributor &A) const override { 1402 const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 1403 if (ValuesMap.count(I)) 1404 return ValuesMap.lookup(I); 1405 1406 SmallVector<const Instruction *, 16> Worklist; 1407 SmallPtrSet<const Instruction *, 16> Visited; 1408 Worklist.push_back(I); 1409 1410 Optional<Value *> ReplVal; 1411 1412 while (!Worklist.empty()) { 1413 const Instruction *CurrInst = Worklist.pop_back_val(); 1414 if (!Visited.insert(CurrInst).second) 1415 continue; 1416 1417 const BasicBlock *CurrBB = CurrInst->getParent(); 1418 1419 // Go up and look for all potential setters/calls that might change the 1420 // ICV. 1421 while ((CurrInst = CurrInst->getPrevNode())) { 1422 if (ValuesMap.count(CurrInst)) { 1423 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 1424 // Unknown value, track new. 1425 if (!ReplVal.hasValue()) { 1426 ReplVal = NewReplVal; 1427 break; 1428 } 1429 1430 // If we found a new value, we can't know the icv value anymore. 1431 if (NewReplVal.hasValue()) 1432 if (ReplVal != NewReplVal) 1433 return nullptr; 1434 1435 break; 1436 } 1437 1438 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); 1439 if (!NewReplVal.hasValue()) 1440 continue; 1441 1442 // Unknown value, track new. 1443 if (!ReplVal.hasValue()) { 1444 ReplVal = NewReplVal; 1445 break; 1446 } 1447 1448 // if (NewReplVal.hasValue()) 1449 // We found a new value, we can't know the icv value anymore. 1450 if (ReplVal != NewReplVal) 1451 return nullptr; 1452 } 1453 1454 // If we are in the same BB and we have a value, we are done. 1455 if (CurrBB == I->getParent() && ReplVal.hasValue()) 1456 return ReplVal; 1457 1458 // Go through all predecessors and add terminators for analysis. 1459 for (const BasicBlock *Pred : predecessors(CurrBB)) 1460 if (const Instruction *Terminator = Pred->getTerminator()) 1461 Worklist.push_back(Terminator); 1462 } 1463 1464 return ReplVal; 1465 } 1466 }; 1467 1468 struct AAICVTrackerFunctionReturned : AAICVTracker { 1469 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 1470 : AAICVTracker(IRP, A) {} 1471 1472 // FIXME: come up with better string. 1473 const std::string getAsStr() const override { 1474 return "ICVTrackerFunctionReturned"; 1475 } 1476 1477 // FIXME: come up with some stats. 1478 void trackStatistics() const override {} 1479 1480 /// We don't manifest anything for this AA. 1481 ChangeStatus manifest(Attributor &A) override { 1482 return ChangeStatus::UNCHANGED; 1483 } 1484 1485 // Map of ICV to their values at specific program point. 1486 EnumeratedArray<Optional<Value *>, InternalControlVar, 1487 InternalControlVar::ICV___last> 1488 ICVReplacementValuesMap; 1489 1490 /// Return the value with which \p I can be replaced for specific \p ICV. 1491 Optional<Value *> 1492 getUniqueReplacementValue(InternalControlVar ICV) const override { 1493 return ICVReplacementValuesMap[ICV]; 1494 } 1495 1496 ChangeStatus updateImpl(Attributor &A) override { 1497 ChangeStatus Changed = ChangeStatus::UNCHANGED; 1498 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1499 *this, IRPosition::function(*getAnchorScope())); 1500 1501 if (!ICVTrackingAA.isAssumedTracked()) 1502 return indicatePessimisticFixpoint(); 1503 1504 for (InternalControlVar ICV : TrackableICVs) { 1505 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 1506 Optional<Value *> UniqueICVValue; 1507 1508 auto CheckReturnInst = [&](Instruction &I) { 1509 Optional<Value *> NewReplVal = 1510 ICVTrackingAA.getReplacementValue(ICV, &I, A); 1511 1512 // If we found a second ICV value there is no unique returned value. 1513 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) 1514 return false; 1515 1516 UniqueICVValue = NewReplVal; 1517 1518 return true; 1519 }; 1520 1521 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 1522 /* CheckBBLivenessOnly */ true)) 1523 UniqueICVValue = nullptr; 1524 1525 if (UniqueICVValue == ReplVal) 1526 continue; 1527 1528 ReplVal = UniqueICVValue; 1529 Changed = ChangeStatus::CHANGED; 1530 } 1531 1532 return Changed; 1533 } 1534 }; 1535 1536 struct AAICVTrackerCallSite : AAICVTracker { 1537 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 1538 : AAICVTracker(IRP, A) {} 1539 1540 void initialize(Attributor &A) override { 1541 Function *F = getAnchorScope(); 1542 if (!F || !A.isFunctionIPOAmendable(*F)) 1543 indicatePessimisticFixpoint(); 1544 1545 // We only initialize this AA for getters, so we need to know which ICV it 1546 // gets. 1547 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1548 for (InternalControlVar ICV : TrackableICVs) { 1549 auto ICVInfo = OMPInfoCache.ICVs[ICV]; 1550 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 1551 if (Getter.Declaration == getAssociatedFunction()) { 1552 AssociatedICV = ICVInfo.Kind; 1553 return; 1554 } 1555 } 1556 1557 /// Unknown ICV. 1558 indicatePessimisticFixpoint(); 1559 } 1560 1561 ChangeStatus manifest(Attributor &A) override { 1562 if (!ReplVal.hasValue() || !ReplVal.getValue()) 1563 return ChangeStatus::UNCHANGED; 1564 1565 A.changeValueAfterManifest(*getCtxI(), **ReplVal); 1566 A.deleteAfterManifest(*getCtxI()); 1567 1568 return ChangeStatus::CHANGED; 1569 } 1570 1571 // FIXME: come up with better string. 1572 const std::string getAsStr() const override { return "ICVTrackerCallSite"; } 1573 1574 // FIXME: come up with some stats. 1575 void trackStatistics() const override {} 1576 1577 InternalControlVar AssociatedICV; 1578 Optional<Value *> ReplVal; 1579 1580 ChangeStatus updateImpl(Attributor &A) override { 1581 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1582 *this, IRPosition::function(*getAnchorScope())); 1583 1584 // We don't have any information, so we assume it changes the ICV. 1585 if (!ICVTrackingAA.isAssumedTracked()) 1586 return indicatePessimisticFixpoint(); 1587 1588 Optional<Value *> NewReplVal = 1589 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); 1590 1591 if (ReplVal == NewReplVal) 1592 return ChangeStatus::UNCHANGED; 1593 1594 ReplVal = NewReplVal; 1595 return ChangeStatus::CHANGED; 1596 } 1597 1598 // Return the value with which associated value can be replaced for specific 1599 // \p ICV. 1600 Optional<Value *> 1601 getUniqueReplacementValue(InternalControlVar ICV) const override { 1602 return ReplVal; 1603 } 1604 }; 1605 1606 struct AAICVTrackerCallSiteReturned : AAICVTracker { 1607 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 1608 : AAICVTracker(IRP, A) {} 1609 1610 // FIXME: come up with better string. 1611 const std::string getAsStr() const override { 1612 return "ICVTrackerCallSiteReturned"; 1613 } 1614 1615 // FIXME: come up with some stats. 1616 void trackStatistics() const override {} 1617 1618 /// We don't manifest anything for this AA. 1619 ChangeStatus manifest(Attributor &A) override { 1620 return ChangeStatus::UNCHANGED; 1621 } 1622 1623 // Map of ICV to their values at specific program point. 1624 EnumeratedArray<Optional<Value *>, InternalControlVar, 1625 InternalControlVar::ICV___last> 1626 ICVReplacementValuesMap; 1627 1628 /// Return the value with which associated value can be replaced for specific 1629 /// \p ICV. 1630 Optional<Value *> 1631 getUniqueReplacementValue(InternalControlVar ICV) const override { 1632 return ICVReplacementValuesMap[ICV]; 1633 } 1634 1635 ChangeStatus updateImpl(Attributor &A) override { 1636 ChangeStatus Changed = ChangeStatus::UNCHANGED; 1637 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( 1638 *this, IRPosition::returned(*getAssociatedFunction())); 1639 1640 // We don't have any information, so we assume it changes the ICV. 1641 if (!ICVTrackingAA.isAssumedTracked()) 1642 return indicatePessimisticFixpoint(); 1643 1644 for (InternalControlVar ICV : TrackableICVs) { 1645 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 1646 Optional<Value *> NewReplVal = 1647 ICVTrackingAA.getUniqueReplacementValue(ICV); 1648 1649 if (ReplVal == NewReplVal) 1650 continue; 1651 1652 ReplVal = NewReplVal; 1653 Changed = ChangeStatus::CHANGED; 1654 } 1655 return Changed; 1656 } 1657 }; 1658 } // namespace 1659 1660 const char AAICVTracker::ID = 0; 1661 1662 AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 1663 Attributor &A) { 1664 AAICVTracker *AA = nullptr; 1665 switch (IRP.getPositionKind()) { 1666 case IRPosition::IRP_INVALID: 1667 case IRPosition::IRP_FLOAT: 1668 case IRPosition::IRP_ARGUMENT: 1669 case IRPosition::IRP_CALL_SITE_ARGUMENT: 1670 llvm_unreachable("ICVTracker: invalid IRPosition!"); 1671 case IRPosition::IRP_FUNCTION: 1672 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 1673 break; 1674 case IRPosition::IRP_RETURNED: 1675 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 1676 break; 1677 case IRPosition::IRP_CALL_SITE_RETURNED: 1678 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 1679 break; 1680 case IRPosition::IRP_CALL_SITE: 1681 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 1682 break; 1683 } 1684 1685 return *AA; 1686 } 1687 1688 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 1689 CGSCCAnalysisManager &AM, 1690 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 1691 if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 1692 return PreservedAnalyses::all(); 1693 1694 if (DisableOpenMPOptimizations) 1695 return PreservedAnalyses::all(); 1696 1697 SmallVector<Function *, 16> SCC; 1698 // If there are kernels in the module, we have to run on all SCC's. 1699 bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 1700 for (LazyCallGraph::Node &N : C) { 1701 Function *Fn = &N.getFunction(); 1702 SCC.push_back(Fn); 1703 1704 // Do we already know that the SCC contains kernels, 1705 // or that OpenMP functions are called from this SCC? 1706 if (SCCIsInteresting) 1707 continue; 1708 // If not, let's check that. 1709 SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 1710 } 1711 1712 if (!SCCIsInteresting || SCC.empty()) 1713 return PreservedAnalyses::all(); 1714 1715 FunctionAnalysisManager &FAM = 1716 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1717 1718 AnalysisGetter AG(FAM); 1719 1720 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 1721 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 1722 }; 1723 1724 CallGraphUpdater CGUpdater; 1725 CGUpdater.initialize(CG, C, AM, UR); 1726 1727 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 1728 BumpPtrAllocator Allocator; 1729 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 1730 /*CGSCC*/ Functions, OMPInModule.getKernels()); 1731 1732 SetVector<Function *> ModuleSlice(InfoCache.ModuleSlice.begin(), 1733 InfoCache.ModuleSlice.end()); 1734 Attributor A(ModuleSlice, InfoCache, CGUpdater); 1735 1736 // TODO: Compute the module slice we are allowed to look at. 1737 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 1738 bool Changed = OMPOpt.run(); 1739 if (Changed) 1740 return PreservedAnalyses::none(); 1741 1742 return PreservedAnalyses::all(); 1743 } 1744 1745 namespace { 1746 1747 struct OpenMPOptLegacyPass : public CallGraphSCCPass { 1748 CallGraphUpdater CGUpdater; 1749 OpenMPInModule OMPInModule; 1750 static char ID; 1751 1752 OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 1753 initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 1754 } 1755 1756 void getAnalysisUsage(AnalysisUsage &AU) const override { 1757 CallGraphSCCPass::getAnalysisUsage(AU); 1758 } 1759 1760 bool doInitialization(CallGraph &CG) override { 1761 // Disable the pass if there is no OpenMP (runtime call) in the module. 1762 containsOpenMP(CG.getModule(), OMPInModule); 1763 return false; 1764 } 1765 1766 bool runOnSCC(CallGraphSCC &CGSCC) override { 1767 if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 1768 return false; 1769 if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 1770 return false; 1771 1772 SmallVector<Function *, 16> SCC; 1773 // If there are kernels in the module, we have to run on all SCC's. 1774 bool SCCIsInteresting = !OMPInModule.getKernels().empty(); 1775 for (CallGraphNode *CGN : CGSCC) { 1776 Function *Fn = CGN->getFunction(); 1777 if (!Fn || Fn->isDeclaration()) 1778 continue; 1779 SCC.push_back(Fn); 1780 1781 // Do we already know that the SCC contains kernels, 1782 // or that OpenMP functions are called from this SCC? 1783 if (SCCIsInteresting) 1784 continue; 1785 // If not, let's check that. 1786 SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); 1787 } 1788 1789 if (!SCCIsInteresting || SCC.empty()) 1790 return false; 1791 1792 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1793 CGUpdater.initialize(CG, CGSCC); 1794 1795 // Maintain a map of functions to avoid rebuilding the ORE 1796 DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 1797 auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 1798 std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 1799 if (!ORE) 1800 ORE = std::make_unique<OptimizationRemarkEmitter>(F); 1801 return *ORE; 1802 }; 1803 1804 AnalysisGetter AG; 1805 SetVector<Function *> Functions(SCC.begin(), SCC.end()); 1806 BumpPtrAllocator Allocator; 1807 OMPInformationCache InfoCache( 1808 *(Functions.back()->getParent()), AG, Allocator, 1809 /*CGSCC*/ Functions, OMPInModule.getKernels()); 1810 1811 SetVector<Function *> ModuleSlice(InfoCache.ModuleSlice.begin(), 1812 InfoCache.ModuleSlice.end()); 1813 Attributor A(ModuleSlice, InfoCache, CGUpdater); 1814 1815 // TODO: Compute the module slice we are allowed to look at. 1816 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 1817 return OMPOpt.run(); 1818 } 1819 1820 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 1821 }; 1822 1823 } // end anonymous namespace 1824 1825 void OpenMPInModule::identifyKernels(Module &M) { 1826 1827 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); 1828 if (!MD) 1829 return; 1830 1831 for (auto *Op : MD->operands()) { 1832 if (Op->getNumOperands() < 2) 1833 continue; 1834 MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 1835 if (!KindID || KindID->getString() != "kernel") 1836 continue; 1837 1838 Function *KernelFn = 1839 mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 1840 if (!KernelFn) 1841 continue; 1842 1843 ++NumOpenMPTargetRegionKernels; 1844 1845 Kernels.insert(KernelFn); 1846 } 1847 } 1848 1849 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 1850 if (OMPInModule.isKnown()) 1851 return OMPInModule; 1852 1853 auto RecordFunctionsContainingUsesOf = [&](Function *F) { 1854 for (User *U : F->users()) 1855 if (auto *I = dyn_cast<Instruction>(U)) 1856 OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction()); 1857 }; 1858 1859 // MSVC doesn't like long if-else chains for some reason and instead just 1860 // issues an error. Work around it.. 1861 do { 1862 #define OMP_RTL(_Enum, _Name, ...) \ 1863 if (Function *F = M.getFunction(_Name)) { \ 1864 RecordFunctionsContainingUsesOf(F); \ 1865 OMPInModule = true; \ 1866 } 1867 #include "llvm/Frontend/OpenMP/OMPKinds.def" 1868 } while (false); 1869 1870 // Identify kernels once. TODO: We should split the OMPInformationCache into a 1871 // module and an SCC part. The kernel information, among other things, could 1872 // go into the module part. 1873 if (OMPInModule.isKnown() && OMPInModule) { 1874 OMPInModule.identifyKernels(M); 1875 return true; 1876 } 1877 1878 return OMPInModule = false; 1879 } 1880 1881 char OpenMPOptLegacyPass::ID = 0; 1882 1883 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 1884 "OpenMP specific optimizations", false, false) 1885 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1886 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 1887 "OpenMP specific optimizations", false, false) 1888 1889 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 1890