1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Frontend/OpenMP/OMPConstants.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
28 
29 using namespace llvm;
30 using namespace omp;
31 using namespace types;
32 
33 #define DEBUG_TYPE "openmp-opt"
34 
35 static cl::opt<bool> DisableOpenMPOptimizations(
36     "openmp-opt-disable", cl::ZeroOrMore,
37     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
38     cl::init(false));
39 
40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
41           "Number of OpenMP runtime calls deduplicated");
42 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
43           "Number of OpenMP runtime functions identified");
44 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
45           "Number of OpenMP runtime function uses identified");
46 
47 #if !defined(NDEBUG)
48 static constexpr auto TAG = "[" DEBUG_TYPE "]";
49 #endif
50 
51 namespace {
52 struct OpenMPOpt {
53 
54   OpenMPOpt(SmallPtrSetImpl<Function *> &SCC,
55             SmallPtrSetImpl<Function *> &ModuleSlice,
56             CallGraphUpdater &CGUpdater)
57       : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
58         OMPBuilder(M), CGUpdater(CGUpdater) {
59     initializeTypes(M);
60     initializeRuntimeFunctions();
61     OMPBuilder.initialize();
62   }
63 
64   /// Generic information that describes a runtime function
65   struct RuntimeFunctionInfo {
66     /// The kind, as described by the RuntimeFunction enum.
67     RuntimeFunction Kind;
68 
69     /// The name of the function.
70     StringRef Name;
71 
72     /// Flag to indicate a variadic function.
73     bool IsVarArg;
74 
75     /// The return type of the function.
76     Type *ReturnType;
77 
78     /// The argument types of the function.
79     SmallVector<Type *, 8> ArgumentTypes;
80 
81     /// The declaration if available.
82     Function *Declaration = nullptr;
83 
84     /// Uses of this runtime function per function containing the use.
85     DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap;
86 
87     /// Return the number of arguments (or the minimal number for variadic
88     /// functions).
89     size_t getNumArgs() const { return ArgumentTypes.size(); }
90 
91     /// Run the callback \p CB on each use and forget the use if the result is
92     /// true. The callback will be fed the function in which the use was
93     /// encountered as second argument.
94     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
95       SmallVector<Use *, 8> ToBeDeleted;
96       for (auto &It : UsesMap) {
97         ToBeDeleted.clear();
98         for (Use *U : It.second)
99           if (CB(*U, *It.first))
100             ToBeDeleted.push_back(U);
101         for (Use *U : ToBeDeleted)
102           It.second.erase(U);
103       }
104     }
105   };
106 
107   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
108   bool run() {
109     bool Changed = false;
110 
111     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
112                       << " functions in a slice with " << ModuleSlice.size()
113                       << " functions\n");
114 
115     Changed |= deduplicateRuntimeCalls();
116     Changed |= deleteParallelRegions();
117 
118     return Changed;
119   }
120 
121 private:
122   /// Try to delete parallel regions if possible.
123   bool deleteParallelRegions() {
124     const unsigned CallbackCalleeOperand = 2;
125 
126     RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call];
127     if (!RFI.Declaration)
128       return false;
129 
130     bool Changed = false;
131     auto DeleteCallCB = [&](Use &U, Function &) {
132       CallInst *CI = getCallIfRegularCall(U);
133       if (!CI)
134         return false;
135       auto *Fn = dyn_cast<Function>(
136           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
137       if (!Fn)
138         return false;
139       if (!Fn->onlyReadsMemory())
140         return false;
141       if (!Fn->hasFnAttribute(Attribute::WillReturn))
142         return false;
143 
144       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
145                         << CI->getCaller()->getName() << "\n");
146       CGUpdater.removeCallSite(*CI);
147       CI->eraseFromParent();
148       Changed = true;
149       return true;
150     };
151 
152     RFI.foreachUse(DeleteCallCB);
153 
154     return Changed;
155   }
156 
157   /// Try to eliminiate runtime calls by reusing existing ones.
158   bool deduplicateRuntimeCalls() {
159     bool Changed = false;
160 
161     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
162         OMPRTL_omp_get_num_threads,
163         OMPRTL_omp_in_parallel,
164         OMPRTL_omp_get_cancellation,
165         OMPRTL_omp_get_thread_limit,
166         OMPRTL_omp_get_supported_active_levels,
167         OMPRTL_omp_get_level,
168         OMPRTL_omp_get_ancestor_thread_num,
169         OMPRTL_omp_get_team_size,
170         OMPRTL_omp_get_active_level,
171         OMPRTL_omp_in_final,
172         OMPRTL_omp_get_proc_bind,
173         OMPRTL_omp_get_num_places,
174         OMPRTL_omp_get_num_procs,
175         OMPRTL_omp_get_place_num,
176         OMPRTL_omp_get_partition_num_places,
177         OMPRTL_omp_get_partition_place_nums};
178 
179     // Global-tid is handled separatly.
180     SmallSetVector<Value *, 16> GTIdArgs;
181     collectGlobalThreadIdArguments(GTIdArgs);
182     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
183                       << " global thread ID arguments\n");
184 
185     for (Function *F : SCC) {
186       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
187         deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
188 
189       // __kmpc_global_thread_num is special as we can replace it with an
190       // argument in enough cases to make it worth trying.
191       Value *GTIdArg = nullptr;
192       for (Argument &Arg : F->args())
193         if (GTIdArgs.count(&Arg)) {
194           GTIdArg = &Arg;
195           break;
196         }
197       Changed |= deduplicateRuntimeCalls(
198           *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
199     }
200 
201     return Changed;
202   }
203 
204   static Value *combinedIdentStruct(Value *Ident0, Value *Ident1,
205                                     bool GlobalOnly) {
206     // TODO: Figure out how to actually combine multiple debug locations. For
207     //       now we just keep the first we find.
208     if (Ident0)
209       return Ident0;
210     if (!GlobalOnly || isa<GlobalValue>(Ident1))
211       return Ident1;
212     return nullptr;
213   }
214 
215   /// Return an `struct ident_t*` value that represents the ones used in the
216   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
217   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
218   /// return value we create one from scratch. We also do not yet combine
219   /// information, e.g., the source locations, see combinedIdentStruct.
220   Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F,
221                                         bool GlobalOnly) {
222     Value *Ident = nullptr;
223     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
224       CallInst *CI = getCallIfRegularCall(U, &RFI);
225       if (!CI || &F != &Caller)
226         return false;
227       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
228                                   /* GlobalOnly */ true);
229       return false;
230     };
231     RFI.foreachUse(CombineIdentStruct);
232 
233     if (!Ident) {
234       // The IRBuilder uses the insertion block to get to the module, this is
235       // unfortunate but we work around it for now.
236       if (!OMPBuilder.getInsertionPoint().getBlock())
237         OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
238             &F.getEntryBlock(), F.getEntryBlock().begin()));
239       // Create a fallback location if non was found.
240       // TODO: Use the debug locations of the calls instead.
241       Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr();
242       Ident = OMPBuilder.getOrCreateIdent(Loc);
243     }
244     return Ident;
245   }
246 
247   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
248   /// \p ReplVal if given.
249   bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
250                                Value *ReplVal = nullptr) {
251     auto UsesIt = RFI.UsesMap.find(&F);
252     if (UsesIt == RFI.UsesMap.end())
253       return false;
254 
255     auto &Uses = UsesIt->getSecond();
256     if (Uses.size() + (ReplVal != nullptr) < 2)
257       return false;
258 
259     LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of "
260                       << RFI.Name
261                       << (ReplVal ? " with an existing value\n" : "\n")
262                       << "\n");
263     assert((!ReplVal || (isa<Argument>(ReplVal) &&
264                          cast<Argument>(ReplVal)->getParent() == &F)) &&
265            "Unexpected replacement value!");
266 
267     // TODO: Use dominance to find a good position instead.
268     auto CanBeMoved = [](CallBase &CB) {
269       unsigned NumArgs = CB.getNumArgOperands();
270       if (NumArgs == 0)
271         return true;
272       if (CB.getArgOperand(0)->getType() != IdentPtr)
273         return false;
274       for (unsigned u = 1; u < NumArgs; ++u)
275         if (isa<Instruction>(CB.getArgOperand(u)))
276           return false;
277       return true;
278     };
279 
280     if (!ReplVal) {
281       for (Use *U : Uses)
282         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
283           if (!CanBeMoved(*CI))
284             continue;
285           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
286           ReplVal = CI;
287           break;
288         }
289       if (!ReplVal)
290         return false;
291     }
292 
293     // If we use a call as a replacement value we need to make sure the ident is
294     // valid at the new location. For now we just pick a global one, either
295     // existing and used by one of the calls, or created from scratch.
296     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
297       if (CI->getNumArgOperands() > 0 &&
298           CI->getArgOperand(0)->getType() == IdentPtr) {
299         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
300                                                       /* GlobalOnly */ true);
301         CI->setArgOperand(0, Ident);
302       }
303     }
304 
305     bool Changed = false;
306     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
307       CallInst *CI = getCallIfRegularCall(U, &RFI);
308       if (!CI || CI == ReplVal || &F != &Caller)
309         return false;
310       assert(CI->getCaller() == &F && "Unexpected call!");
311       CGUpdater.removeCallSite(*CI);
312       CI->replaceAllUsesWith(ReplVal);
313       CI->eraseFromParent();
314       ++NumOpenMPRuntimeCallsDeduplicated;
315       Changed = true;
316       return true;
317     };
318     RFI.foreachUse(ReplaceAndDeleteCB);
319 
320     return Changed;
321   }
322 
323   /// Collect arguments that represent the global thread id in \p GTIdArgs.
324   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
325     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
326     //       initialization. We could define an AbstractAttribute instead and
327     //       run the Attributor here once it can be run as an SCC pass.
328 
329     // Helper to check the argument \p ArgNo at all call sites of \p F for
330     // a GTId.
331     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
332       if (!F.hasLocalLinkage())
333         return false;
334       for (Use &U : F.uses()) {
335         if (CallInst *CI = getCallIfRegularCall(U)) {
336           Value *ArgOp = CI->getArgOperand(ArgNo);
337           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
338               getCallIfRegularCall(*ArgOp,
339                                    &RFIs[OMPRTL___kmpc_global_thread_num]))
340             continue;
341         }
342         return false;
343       }
344       return true;
345     };
346 
347     // Helper to identify uses of a GTId as GTId arguments.
348     auto AddUserArgs = [&](Value &GTId) {
349       for (Use &U : GTId.uses())
350         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
351           if (CI->isArgOperand(&U))
352             if (Function *Callee = CI->getCalledFunction())
353               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
354                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
355     };
356 
357     // The argument users of __kmpc_global_thread_num calls are GTIds.
358     RuntimeFunctionInfo &GlobThreadNumRFI =
359         RFIs[OMPRTL___kmpc_global_thread_num];
360     for (auto &It : GlobThreadNumRFI.UsesMap)
361       for (Use *U : It.second)
362         if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI))
363           AddUserArgs(*CI);
364 
365     // Transitively search for more arguments by looking at the users of the
366     // ones we know already. During the search the GTIdArgs vector is extended
367     // so we cannot cache the size nor can we use a range based for.
368     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
369       AddUserArgs(*GTIdArgs[u]);
370   }
371 
372   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
373   /// given it has to be the callee or a nullptr is returned.
374   CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
375     CallInst *CI = dyn_cast<CallInst>(U.getUser());
376     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
377         (!RFI || CI->getCalledFunction() == RFI->Declaration))
378       return CI;
379     return nullptr;
380   }
381 
382   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
383   /// the callee or a nullptr is returned.
384   CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
385     CallInst *CI = dyn_cast<CallInst>(&V);
386     if (CI && !CI->hasOperandBundles() &&
387         (!RFI || CI->getCalledFunction() == RFI->Declaration))
388       return CI;
389     return nullptr;
390   }
391 
392   /// Returns true if the function declaration \p F matches the runtime
393   /// function types, that is, return type \p RTFRetType, and argument types
394   /// \p RTFArgTypes.
395   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
396                                   SmallVector<Type *, 8> &RTFArgTypes) {
397     // TODO: We should output information to the user (under debug output
398     //       and via remarks).
399 
400     if (!F)
401       return false;
402     if (F->getReturnType() != RTFRetType)
403       return false;
404     if (F->arg_size() != RTFArgTypes.size())
405       return false;
406 
407     auto RTFTyIt = RTFArgTypes.begin();
408     for (Argument &Arg : F->args()) {
409       if (Arg.getType() != *RTFTyIt)
410         return false;
411 
412       ++RTFTyIt;
413     }
414 
415     return true;
416   }
417 
418   /// Helper to initialize all runtime function information for those defined in
419   /// OpenMPKinds.def.
420   void initializeRuntimeFunctions() {
421     // Helper to collect all uses of the decleration in the UsesMap.
422     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
423       unsigned NumUses = 0;
424       if (!RFI.Declaration)
425         return NumUses;
426       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
427 
428       NumOpenMPRuntimeFunctionsIdentified += 1;
429       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
430 
431       // TODO: We directly convert uses into proper calls and unknown uses.
432       for (Use &U : RFI.Declaration->uses()) {
433         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
434           if (ModuleSlice.count(UserI->getFunction())) {
435             RFI.UsesMap[UserI->getFunction()].insert(&U);
436             ++NumUses;
437           }
438         } else {
439           RFI.UsesMap[nullptr].insert(&U);
440           ++NumUses;
441         }
442       }
443       return NumUses;
444     };
445 
446 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
447   {                                                                            \
448     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
449     Function *F = M.getFunction(_Name);                                        \
450     if (declMatchesRTFTypes(F, _ReturnType , ArgsTypes)) {                     \
451       auto &RFI = RFIs[_Enum];                                                 \
452       RFI.Kind = _Enum;                                                        \
453       RFI.Name = _Name;                                                        \
454       RFI.IsVarArg = _IsVarArg;                                                \
455       RFI.ReturnType = _ReturnType;                                            \
456       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
457       RFI.Declaration = F;                                                     \
458       unsigned NumUses = CollectUses(RFI);                                     \
459       (void)NumUses;                                                           \
460       LLVM_DEBUG({                                                             \
461         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
462               << " found\n";                                                   \
463         if (RFI.Declaration)                                                   \
464           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
465                 << RFI.UsesMap.size() << " different functions.\n";            \
466       });                                                                      \
467     }                                                                          \
468   }
469 #include "llvm/Frontend/OpenMP/OMPKinds.def"
470 
471     // TODO: We should attach the attributes defined in OMPKinds.def.
472   }
473 
474   /// The underyling module.
475   Module &M;
476 
477   /// The SCC we are operating on.
478   SmallPtrSetImpl<Function *> &SCC;
479 
480   /// The slice of the module we are allowed to look at.
481   SmallPtrSetImpl<Function *> &ModuleSlice;
482 
483   /// An OpenMP-IR-Builder instance
484   OpenMPIRBuilder OMPBuilder;
485 
486   /// Callback to update the call graph, the first argument is a removed call,
487   /// the second an optional replacement call.
488   CallGraphUpdater &CGUpdater;
489 
490   /// Map from runtime function kind to the runtime function description.
491   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
492                   RuntimeFunction::OMPRTL___last>
493       RFIs;
494 };
495 } // namespace
496 
497 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
498                                      CGSCCAnalysisManager &AM,
499                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
500   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
501     return PreservedAnalyses::all();
502 
503   if (DisableOpenMPOptimizations)
504     return PreservedAnalyses::all();
505 
506   SmallPtrSet<Function *, 16> SCC;
507   for (LazyCallGraph::Node &N : C)
508     SCC.insert(&N.getFunction());
509 
510   if (SCC.empty())
511     return PreservedAnalyses::all();
512 
513   CallGraphUpdater CGUpdater;
514   CGUpdater.initialize(CG, C, AM, UR);
515   // TODO: Compute the module slice we are allowed to look at.
516   OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
517   bool Changed = OMPOpt.run();
518   (void)Changed;
519   return PreservedAnalyses::all();
520 }
521 
522 namespace {
523 
524 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
525   CallGraphUpdater CGUpdater;
526   OpenMPInModule OMPInModule;
527   static char ID;
528 
529   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
530     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
531   }
532 
533   void getAnalysisUsage(AnalysisUsage &AU) const override {
534     CallGraphSCCPass::getAnalysisUsage(AU);
535   }
536 
537   bool doInitialization(CallGraph &CG) override {
538     // Disable the pass if there is no OpenMP (runtime call) in the module.
539     containsOpenMP(CG.getModule(), OMPInModule);
540     return false;
541   }
542 
543   bool runOnSCC(CallGraphSCC &CGSCC) override {
544     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
545       return false;
546     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
547       return false;
548 
549     SmallPtrSet<Function *, 16> SCC;
550     for (CallGraphNode *CGN : CGSCC)
551       if (Function *Fn = CGN->getFunction())
552         if (!Fn->isDeclaration())
553           SCC.insert(Fn);
554 
555     if (SCC.empty())
556       return false;
557 
558     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
559     CGUpdater.initialize(CG, CGSCC);
560 
561     // TODO: Compute the module slice we are allowed to look at.
562     OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
563     return OMPOpt.run();
564   }
565 
566   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
567 };
568 
569 } // end anonymous namespace
570 
571 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
572   if (OMPInModule.isKnown())
573     return OMPInModule;
574 
575 #define OMP_RTL(_Enum, _Name, ...)                                             \
576   if (M.getFunction(_Name))                                                    \
577     return OMPInModule = true;
578 #include "llvm/Frontend/OpenMP/OMPKinds.def"
579   return OMPInModule = false;
580 }
581 
582 char OpenMPOptLegacyPass::ID = 0;
583 
584 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
585                       "OpenMP specific optimizations", false, false)
586 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
587 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
588                     "OpenMP specific optimizations", false, false)
589 
590 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
591