1 //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // OpenMP specific optimizations:
10 //
11 // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/OpenMPOpt.h"
16 
17 #include "llvm/ADT/EnumeratedArray.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CallGraph.h"
20 #include "llvm/Analysis/CallGraphSCCPass.h"
21 #include "llvm/Frontend/OpenMP/OMPConstants.h"
22 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Transforms/IPO.h"
27 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
28 
29 using namespace llvm;
30 using namespace omp;
31 using namespace types;
32 
33 #define DEBUG_TYPE "openmp-opt"
34 
35 static cl::opt<bool> DisableOpenMPOptimizations(
36     "openmp-opt-disable", cl::ZeroOrMore,
37     cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
38     cl::init(false));
39 
40 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
41           "Number of OpenMP runtime calls deduplicated");
42 STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
43           "Number of OpenMP runtime functions identified");
44 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
45           "Number of OpenMP runtime function uses identified");
46 
47 static constexpr auto TAG = "[" DEBUG_TYPE "]";
48 
49 namespace {
50 struct OpenMPOpt {
51 
52   OpenMPOpt(SmallPtrSetImpl<Function *> &SCC,
53             SmallPtrSetImpl<Function *> &ModuleSlice,
54             CallGraphUpdater &CGUpdater)
55       : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
56         OMPBuilder(M), CGUpdater(CGUpdater) {
57     initializeTypes(M);
58     initializeRuntimeFunctions();
59     OMPBuilder.initialize();
60   }
61 
62   /// Generic information that describes a runtime function
63   struct RuntimeFunctionInfo {
64     /// The kind, as described by the RuntimeFunction enum.
65     RuntimeFunction Kind;
66 
67     /// The name of the function.
68     StringRef Name;
69 
70     /// Flag to indicate a variadic function.
71     bool IsVarArg;
72 
73     /// The return type of the function.
74     Type *ReturnType;
75 
76     /// The argument types of the function.
77     SmallVector<Type *, 8> ArgumentTypes;
78 
79     /// The declaration if available.
80     Function *Declaration;
81 
82     /// Uses of this runtime function per function containing the use.
83     DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap;
84 
85     /// Return the number of arguments (or the minimal number for variadic
86     /// functions).
87     size_t getNumArgs() const { return ArgumentTypes.size(); }
88 
89     /// Run the callback \p CB on each use and forget the use if the result is
90     /// true. The callback will be fed the function in which the use was
91     /// encountered as second argument.
92     void foreachUse(function_ref<bool(Use &, Function &)> CB) {
93       SmallVector<Use *, 8> ToBeDeleted;
94       for (auto &It : UsesMap) {
95         ToBeDeleted.clear();
96         for (Use *U : It.second)
97           if (CB(*U, *It.first))
98             ToBeDeleted.push_back(U);
99         for (Use *U : ToBeDeleted)
100           It.second.erase(U);
101       }
102     }
103   };
104 
105   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
106   bool run() {
107     bool Changed = false;
108 
109     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
110                       << " functions in a slice with " << ModuleSlice.size()
111                       << " functions\n");
112 
113     Changed |= deduplicateRuntimeCalls();
114     Changed |= deleteParallelRegions();
115 
116     return Changed;
117   }
118 
119 private:
120   /// Try to delete parallel regions if possible
121   bool deleteParallelRegions() {
122     const unsigned CallbackCalleeOperand = 2;
123 
124     RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call];
125     if (!RFI.Declaration)
126       return false;
127 
128     bool Changed = false;
129     auto DeleteCallCB = [&](Use &U, Function &) {
130       CallInst *CI = getCallIfRegularCall(U);
131       if (!CI)
132         return false;
133       auto *Fn = dyn_cast<Function>(
134           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
135       if (!Fn)
136         return false;
137       if (!Fn->onlyReadsMemory())
138         return false;
139       if (!Fn->hasFnAttribute(Attribute::WillReturn))
140         return false;
141 
142       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
143                         << CI->getCaller()->getName() << "\n");
144       CGUpdater.removeCallSite(*CI);
145       CI->eraseFromParent();
146       Changed = true;
147       return true;
148     };
149 
150     RFI.foreachUse(DeleteCallCB);
151 
152     return Changed;
153   }
154 
155   /// Try to eliminiate runtime calls by reusing existing ones.
156   bool deduplicateRuntimeCalls() {
157     bool Changed = false;
158 
159     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
160         OMPRTL_omp_get_num_threads,
161         OMPRTL_omp_in_parallel,
162         OMPRTL_omp_get_cancellation,
163         OMPRTL_omp_get_thread_limit,
164         OMPRTL_omp_get_supported_active_levels,
165         OMPRTL_omp_get_level,
166         OMPRTL_omp_get_ancestor_thread_num,
167         OMPRTL_omp_get_team_size,
168         OMPRTL_omp_get_active_level,
169         OMPRTL_omp_in_final,
170         OMPRTL_omp_get_proc_bind,
171         OMPRTL_omp_get_num_places,
172         OMPRTL_omp_get_num_procs,
173         OMPRTL_omp_get_place_num,
174         OMPRTL_omp_get_partition_num_places,
175         OMPRTL_omp_get_partition_place_nums};
176 
177     // Global-tid is handled separatly.
178     SmallSetVector<Value *, 16> GTIdArgs;
179     collectGlobalThreadIdArguments(GTIdArgs);
180     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
181                       << " global thread ID arguments\n");
182 
183     for (Function *F : SCC) {
184       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
185         deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
186 
187       // __kmpc_global_thread_num is special as we can replace it with an
188       // argument in enough cases to make it worth trying.
189       Value *GTIdArg = nullptr;
190       for (Argument &Arg : F->args())
191         if (GTIdArgs.count(&Arg)) {
192           GTIdArg = &Arg;
193           break;
194         }
195       Changed |= deduplicateRuntimeCalls(
196           *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
197     }
198 
199     return Changed;
200   }
201 
202   /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
203   /// \p ReplVal if given.
204   bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
205                                Value *ReplVal = nullptr) {
206     auto &Uses = RFI.UsesMap[&F];
207     if (Uses.size() + (ReplVal != nullptr) < 2)
208       return false;
209 
210     LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of "
211                       << RFI.Name
212                       << (ReplVal ? " with an existing value\n" : "\n")
213                       << "\n");
214     assert((!ReplVal || (isa<Argument>(ReplVal) &&
215                          cast<Argument>(ReplVal)->getParent() == &F)) &&
216            "Unexpected replacement value!");
217     if (!ReplVal) {
218       for (Use *U : Uses)
219         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
220           CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
221           ReplVal = CI;
222           break;
223         }
224       if (!ReplVal)
225         return false;
226     }
227 
228     bool Changed = false;
229     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
230       CallInst *CI = getCallIfRegularCall(U, &RFI);
231       if (!CI || CI == ReplVal || &F != &Caller)
232         return false;
233       assert(CI->getCaller() == &F && "Unexpected call!");
234       CGUpdater.removeCallSite(*CI);
235       CI->replaceAllUsesWith(ReplVal);
236       CI->eraseFromParent();
237       ++NumOpenMPRuntimeCallsDeduplicated;
238       Changed = true;
239       return true;
240     };
241     RFI.foreachUse(ReplaceAndDeleteCB);
242 
243     return Changed;
244   }
245 
246   /// Collect arguments that represent the global thread id in \p GTIdArgs.
247   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
248     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
249     //       initialization. We could define an AbstractAttribute instead and
250     //       run the Attributor here once it can be run as an SCC pass.
251 
252     // Helper to check the argument \p ArgNo at all call sites of \p F for
253     // a GTId.
254     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
255       if (!F.hasLocalLinkage())
256         return false;
257       for (Use &U : F.uses()) {
258         if (CallInst *CI = getCallIfRegularCall(U)) {
259           Value *ArgOp = CI->getArgOperand(ArgNo);
260           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
261               getCallIfRegularCall(*ArgOp,
262                                    &RFIs[OMPRTL___kmpc_global_thread_num]))
263             continue;
264         }
265         return false;
266       }
267       return true;
268     };
269 
270     // Helper to identify uses of a GTId as GTId arguments.
271     auto AddUserArgs = [&](Value &GTId) {
272       for (Use &U : GTId.uses())
273         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
274           if (CI->isArgOperand(&U))
275             if (Function *Callee = CI->getCalledFunction())
276               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
277                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
278     };
279 
280     // The argument users of __kmpc_global_thread_num calls are GTIds.
281     RuntimeFunctionInfo &GlobThreadNumRFI =
282         RFIs[OMPRTL___kmpc_global_thread_num];
283     for (auto &It : GlobThreadNumRFI.UsesMap)
284       for (Use *U : It.second)
285         if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI))
286           AddUserArgs(*CI);
287 
288     // Transitively search for more arguments by looking at the users of the
289     // ones we know already. During the search the GTIdArgs vector is extended
290     // so we cannot cache the size nor can we use a range based for.
291     for (unsigned u = 0; u < GTIdArgs.size(); ++u)
292       AddUserArgs(*GTIdArgs[u]);
293   }
294 
295   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
296   /// given it has to be the callee or a nullptr is returned.
297   CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
298     CallInst *CI = dyn_cast<CallInst>(U.getUser());
299     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
300         (!RFI || CI->getCalledFunction() == RFI->Declaration))
301       return CI;
302     return nullptr;
303   }
304 
305   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
306   /// the callee or a nullptr is returned.
307   CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
308     CallInst *CI = dyn_cast<CallInst>(&V);
309     if (CI && !CI->hasOperandBundles() &&
310         (!RFI || CI->getCalledFunction() == RFI->Declaration))
311       return CI;
312     return nullptr;
313   }
314 
315   /// Helper to initialize all runtime function information for those defined in
316   /// OpenMPKinds.def.
317   void initializeRuntimeFunctions() {
318     // Helper to collect all uses of the decleration in the UsesMap.
319     auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
320       unsigned NumUses = 0;
321       if (!RFI.Declaration)
322         return NumUses;
323       OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
324 
325       NumOpenMPRuntimeFunctionsIdentified += 1;
326       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
327 
328       // TODO: We directly convert uses into proper calls and unknown uses.
329       for (Use &U : RFI.Declaration->uses()) {
330         if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
331           if (ModuleSlice.count(UserI->getFunction())) {
332             RFI.UsesMap[UserI->getFunction()].insert(&U);
333             ++NumUses;
334           }
335         } else {
336           RFI.UsesMap[nullptr].insert(&U);
337           ++NumUses;
338         }
339       }
340       return NumUses;
341     };
342 
343 #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
344   {                                                                            \
345     auto &RFI = RFIs[_Enum];                                                   \
346     RFI.Kind = _Enum;                                                          \
347     RFI.Name = _Name;                                                          \
348     RFI.IsVarArg = _IsVarArg;                                                  \
349     RFI.ReturnType = _ReturnType;                                              \
350     RFI.ArgumentTypes = SmallVector<Type *, 8>({__VA_ARGS__});                 \
351     RFI.Declaration = M.getFunction(_Name);                                    \
352     unsigned NumUses = CollectUses(RFI);                                       \
353     (void)NumUses;                                                             \
354     LLVM_DEBUG({                                                               \
355       dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")             \
356              << " found\n";                                                    \
357       if (RFI.Declaration)                                                     \
358         dbgs() << TAG << "-> got " << NumUses << " uses in "                   \
359                << RFI.UsesMap.size() << " different functions.\n";             \
360     });                                                                        \
361   }
362 #include "llvm/Frontend/OpenMP/OMPKinds.def"
363 
364     // TODO: We should validate the declaration agains the types we expect.
365     // TODO: We should attach the attributes defined in OMPKinds.def.
366   }
367 
368   /// The underyling module.
369   Module &M;
370 
371   /// The SCC we are operating on.
372   SmallPtrSetImpl<Function *> &SCC;
373 
374   /// The slice of the module we are allowed to look at.
375   SmallPtrSetImpl<Function *> &ModuleSlice;
376 
377   /// An OpenMP-IR-Builder instance
378   OpenMPIRBuilder OMPBuilder;
379 
380   /// Callback to update the call graph, the first argument is a removed call,
381   /// the second an optional replacement call.
382   CallGraphUpdater &CGUpdater;
383 
384   /// Map from runtime function kind to the runtime function description.
385   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
386                   RuntimeFunction::OMPRTL___last>
387       RFIs;
388 };
389 } // namespace
390 
391 PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
392                                      CGSCCAnalysisManager &AM,
393                                      LazyCallGraph &CG, CGSCCUpdateResult &UR) {
394   if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
395     return PreservedAnalyses::all();
396 
397   if (DisableOpenMPOptimizations)
398     return PreservedAnalyses::all();
399 
400   SmallPtrSet<Function *, 16> SCC;
401   for (LazyCallGraph::Node &N : C)
402     SCC.insert(&N.getFunction());
403 
404   if (SCC.empty())
405     return PreservedAnalyses::all();
406 
407   CallGraphUpdater CGUpdater;
408   CGUpdater.initialize(CG, C, AM, UR);
409   // TODO: Compute the module slice we are allowed to look at.
410   OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
411   bool Changed = OMPOpt.run();
412   (void)Changed;
413   return PreservedAnalyses::all();
414 }
415 
416 namespace {
417 
418 struct OpenMPOptLegacyPass : public CallGraphSCCPass {
419   CallGraphUpdater CGUpdater;
420   OpenMPInModule OMPInModule;
421   static char ID;
422 
423   OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
424     initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
425   }
426 
427   void getAnalysisUsage(AnalysisUsage &AU) const override {
428     CallGraphSCCPass::getAnalysisUsage(AU);
429   }
430 
431   bool doInitialization(CallGraph &CG) override {
432     // Disable the pass if there is no OpenMP (runtime call) in the module.
433     containsOpenMP(CG.getModule(), OMPInModule);
434     return false;
435   }
436 
437   bool runOnSCC(CallGraphSCC &CGSCC) override {
438     if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
439       return false;
440     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
441       return false;
442 
443     SmallPtrSet<Function *, 16> SCC;
444     for (CallGraphNode *CGN : CGSCC)
445       if (Function *Fn = CGN->getFunction())
446         if (!Fn->isDeclaration())
447           SCC.insert(Fn);
448 
449     if (SCC.empty())
450       return false;
451 
452     CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
453     CGUpdater.initialize(CG, CGSCC);
454 
455     // TODO: Compute the module slice we are allowed to look at.
456     OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
457     return OMPOpt.run();
458   }
459 
460   bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
461 };
462 
463 } // end anonymous namespace
464 
465 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
466   if (OMPInModule.isKnown())
467     return OMPInModule;
468 
469 #define OMP_RTL(_Enum, _Name, ...)                                             \
470   if (M.getFunction(_Name))                                                    \
471     return OMPInModule = true;
472 #include "llvm/Frontend/OpenMP/OMPKinds.def"
473   return OMPInModule = false;
474 }
475 
476 char OpenMPOptLegacyPass::ID = 0;
477 
478 INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
479                       "OpenMP specific optimizations", false, false)
480 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
481 INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
482                     "OpenMP specific optimizations", false, false)
483 
484 Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
485