1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/Analysis/CallGraph.h"
34 #include "llvm/CodeGen/TargetPassConfig.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/Cloning.h"
39 #include <queue>
40 
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42 
43 using namespace llvm;
44 
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48 
49 namespace {
50 
51 // Target features to propagate.
52 static constexpr const FeatureBitset TargetFeatures = {
53   AMDGPU::FeatureWavefrontSize16,
54   AMDGPU::FeatureWavefrontSize32,
55   AMDGPU::FeatureWavefrontSize64
56 };
57 
58 // Attributes to propagate.
59 // TODO: Support conservative min/max merging instead of cloning.
60 static constexpr const char* AttributeNames[] = {
61   "amdgpu-waves-per-eu",
62   "amdgpu-flat-work-group-size"
63 };
64 
65 static constexpr unsigned NumAttr =
66   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
67 
68 class AMDGPUPropagateAttributes {
69 
70   class FnProperties {
71   private:
72     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
73 
74   public:
75     explicit FnProperties(const TargetMachine &TM, const Function &F) {
76       Features = TM.getSubtargetImpl(F)->getFeatureBits();
77 
78       for (unsigned I = 0; I < NumAttr; ++I)
79         if (F.hasFnAttribute(AttributeNames[I]))
80           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
81     }
82 
83     bool operator == (const FnProperties &Other) const {
84       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
85         return false;
86       for (unsigned I = 0; I < NumAttr; ++I)
87         if (Attributes[I] != Other.Attributes[I])
88           return false;
89       return true;
90     }
91 
92     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
93       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
94       for (unsigned I = 0; I < NumAttr; ++I)
95         New.Attributes[I] = CallerProps.Attributes[I];
96       return New;
97     }
98 
99     FeatureBitset Features;
100     Optional<Attribute> Attributes[NumAttr];
101   };
102 
103   class Clone {
104   public:
105     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
106       Properties(Props), OrigF(OrigF), NewF(NewF) {}
107 
108     FnProperties Properties;
109     Function *OrigF;
110     Function *NewF;
111   };
112 
113   const TargetMachine *TM;
114 
115   // Clone functions as needed or just set attributes.
116   bool AllowClone;
117 
118   CallGraph *ModuleCG = nullptr;
119 
120   // Option propagation roots.
121   SmallSet<Function *, 32> Roots;
122 
123   // Clones of functions with their attributes.
124   SmallVector<Clone, 32> Clones;
125 
126   // To memoize address taken functions.
127   SmallSet<Function *, 32> AddressTakenFunctions;
128 
129   // Find a clone with required features.
130   Function *findFunction(const FnProperties &PropsNeeded,
131                          Function *OrigF);
132 
133   // Clone function \p F and set \p NewProps on the clone.
134   // Cole takes the name of original function.
135   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
136 
137   // Set new function's features in place.
138   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
139 
140   // Set new function's attributes in place.
141   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
142 
143   std::string getFeatureString(const FeatureBitset &Features) const;
144 
145   // Propagate attributes from Roots.
146   bool process();
147 
148 public:
149   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone)
150       : TM(TM), AllowClone(AllowClone) {}
151 
152   // Use F as a root and propagate its attributes.
153   bool process(Function &F);
154 
155   // Propagate attributes starting from kernel functions.
156   bool process(Module &M, CallGraph *CG);
157 
158   // Remove attributes from F.
159   // This is used in presence of address taken functions.
160   bool removeAttributes(Function *F);
161 
162   // Handle call graph rooted at address taken functions.
163   // This function will erase all attributes present
164   // on all functions called from address taken functions transitively.
165   bool handleAddressTakenFunctions(CallGraph *CG);
166 };
167 
168 // Allows to propagate attributes early, but no clonning is allowed as it must
169 // be a function pass to run before any optimizations.
170 // TODO: We shall only need a one instance of module pass, but that needs to be
171 // in the linker pipeline which is currently not possible.
172 class AMDGPUPropagateAttributesEarly : public FunctionPass {
173   const TargetMachine *TM;
174 
175 public:
176   static char ID; // Pass identification
177 
178   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
179     FunctionPass(ID), TM(TM) {
180     initializeAMDGPUPropagateAttributesEarlyPass(
181       *PassRegistry::getPassRegistry());
182   }
183 
184   bool runOnFunction(Function &F) override;
185 };
186 
187 // Allows to propagate attributes with clonning but does that late in the
188 // pipeline.
189 class AMDGPUPropagateAttributesLate : public ModulePass {
190   const TargetMachine *TM;
191 
192 public:
193   static char ID; // Pass identification
194 
195   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
196     ModulePass(ID), TM(TM) {
197     initializeAMDGPUPropagateAttributesLatePass(
198       *PassRegistry::getPassRegistry());
199   }
200 
201   void getAnalysisUsage(AnalysisUsage &AU) const override;
202   bool runOnModule(Module &M) override;
203 };
204 
205 }  // end anonymous namespace.
206 
207 char AMDGPUPropagateAttributesEarly::ID = 0;
208 char AMDGPUPropagateAttributesLate::ID = 0;
209 
210 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
211                 "amdgpu-propagate-attributes-early",
212                 "Early propagate attributes from kernels to functions",
213                 false, false)
214 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
215                 "amdgpu-propagate-attributes-late",
216                 "Late propagate attributes from kernels to functions",
217                 false, false)
218 
219 bool AMDGPUPropagateAttributes::removeAttributes(Function *F) {
220   bool Changed = false;
221   if (!F)
222     return Changed;
223   LLVM_DEBUG(dbgs() << "Removing attributes from " << F->getName() << '\n');
224   for (unsigned I = 0; I < NumAttr; ++I) {
225     if (F->hasFnAttribute(AttributeNames[I])) {
226       F->removeFnAttr(AttributeNames[I]);
227       Changed = true;
228     }
229   }
230   return Changed;
231 }
232 
233 bool AMDGPUPropagateAttributes::handleAddressTakenFunctions(CallGraph *CG) {
234   assert(ModuleCG && "Call graph not present");
235 
236   bool Changed = false;
237   SmallSet<CallGraphNode *, 32> Visited;
238 
239   for (Function *F : AddressTakenFunctions) {
240     CallGraphNode *CGN = (*CG)[F];
241     if (!Visited.count(CGN)) {
242       Changed |= removeAttributes(F);
243       Visited.insert(CGN);
244     }
245 
246     std::queue<CallGraphNode *> SubGraph;
247     SubGraph.push(CGN);
248     while (!SubGraph.empty()) {
249       CallGraphNode *CGN = SubGraph.front();
250       SubGraph.pop();
251       if (!Visited.count(CGN)) {
252         Changed |= removeAttributes(CGN->getFunction());
253         Visited.insert(CGN);
254       }
255       for (auto N : *CGN)
256         SubGraph.push(N.second);
257     }
258   }
259   return Changed;
260 }
261 
262 Function *
263 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
264                                         Function *OrigF) {
265   // TODO: search for clone's clones.
266   for (Clone &C : Clones)
267     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
268       return C.NewF;
269 
270   return nullptr;
271 }
272 
273 bool AMDGPUPropagateAttributes::process(Module &M, CallGraph *CG) {
274   for (auto &F : M.functions())
275     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
276       Roots.insert(&F);
277 
278   ModuleCG = CG;
279   return process();
280 }
281 
282 bool AMDGPUPropagateAttributes::process(Function &F) {
283   Roots.insert(&F);
284   return process();
285 }
286 
287 bool AMDGPUPropagateAttributes::process() {
288   bool Changed = false;
289   SmallSet<Function *, 32> NewRoots;
290   SmallSet<Function *, 32> Replaced;
291 
292   if (Roots.empty())
293     return false;
294   Module &M = *(*Roots.begin())->getParent();
295 
296   do {
297     Roots.insert(NewRoots.begin(), NewRoots.end());
298     NewRoots.clear();
299 
300     for (auto &F : M.functions()) {
301       if (F.isDeclaration())
302         continue;
303 
304       if (F.hasAddressTaken(nullptr, true, true, true))
305         AddressTakenFunctions.insert(&F);
306 
307       const FnProperties CalleeProps(*TM, F);
308       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
309       SmallSet<CallBase *, 32> Visited;
310 
311       for (User *U : F.users()) {
312         Instruction *I = dyn_cast<Instruction>(U);
313         if (!I)
314           continue;
315         CallBase *CI = dyn_cast<CallBase>(I);
316         // Only propagate attributes if F is the called function. Specifically,
317         // do not propagate attributes if F is passed as an argument.
318         // FIXME: handle bitcasted callee, e.g.
319         // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
320         if (!CI || CI->getCalledOperand() != &F)
321           continue;
322         Function *Caller = CI->getCaller();
323         if (!Caller || !Visited.insert(CI).second)
324           continue;
325         if (!Roots.count(Caller) && !NewRoots.count(Caller))
326           continue;
327 
328         const FnProperties CallerProps(*TM, *Caller);
329 
330         if (CalleeProps == CallerProps) {
331           if (!Roots.count(&F))
332             NewRoots.insert(&F);
333           continue;
334         }
335 
336         Function *NewF = findFunction(CallerProps, &F);
337         if (!NewF) {
338           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
339           if (!AllowClone) {
340             // This may set different features on different iteartions if
341             // there is a contradiction in callers' attributes. In this case
342             // we rely on a second pass running on Module, which is allowed
343             // to clone.
344             setFeatures(F, NewProps.Features);
345             setAttributes(F, NewProps.Attributes);
346             NewRoots.insert(&F);
347             Changed = true;
348             break;
349           }
350 
351           NewF = cloneWithProperties(F, NewProps);
352           Clones.push_back(Clone(CallerProps, &F, NewF));
353           NewRoots.insert(NewF);
354         }
355 
356         ToReplace.push_back(std::make_pair(CI, NewF));
357         Replaced.insert(&F);
358 
359         Changed = true;
360       }
361 
362       while (!ToReplace.empty()) {
363         auto R = ToReplace.pop_back_val();
364         R.first->setCalledFunction(R.second);
365       }
366     }
367   } while (!NewRoots.empty());
368 
369   for (Function *F : Replaced) {
370     if (F->use_empty())
371       F->eraseFromParent();
372   }
373 
374   Roots.clear();
375   Clones.clear();
376 
377   // Keep the post processing related to indirect
378   // calls separate to handle them gracefully.
379   // The core traversal need not be affected by this.
380   if (AllowClone)
381     Changed |= handleAddressTakenFunctions(ModuleCG);
382 
383   return Changed;
384 }
385 
386 Function *
387 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
388                                                const FnProperties &NewProps) {
389   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
390 
391   ValueToValueMapTy dummy;
392   Function *NewF = CloneFunction(&F, dummy);
393   setFeatures(*NewF, NewProps.Features);
394   setAttributes(*NewF, NewProps.Attributes);
395   NewF->setVisibility(GlobalValue::DefaultVisibility);
396   NewF->setLinkage(GlobalValue::InternalLinkage);
397 
398   // Swap names. If that is the only clone it will retain the name of now
399   // dead value. Preserve original name for externally visible functions.
400   if (F.hasName() && F.hasLocalLinkage()) {
401     std::string NewName = std::string(NewF->getName());
402     NewF->takeName(&F);
403     F.setName(NewName);
404   }
405 
406   return NewF;
407 }
408 
409 void AMDGPUPropagateAttributes::setFeatures(Function &F,
410                                             const FeatureBitset &NewFeatures) {
411   std::string NewFeatureStr = getFeatureString(NewFeatures);
412 
413   LLVM_DEBUG(dbgs() << "Set features "
414                     << getFeatureString(NewFeatures & TargetFeatures)
415                     << " on " << F.getName() << '\n');
416 
417   F.removeFnAttr("target-features");
418   F.addFnAttr("target-features", NewFeatureStr);
419 }
420 
421 void AMDGPUPropagateAttributes::setAttributes(Function &F,
422     const ArrayRef<Optional<Attribute>> NewAttrs) {
423   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
424   for (unsigned I = 0; I < NumAttr; ++I) {
425     F.removeFnAttr(AttributeNames[I]);
426     if (NewAttrs[I]) {
427       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
428       F.addFnAttr(*NewAttrs[I]);
429     }
430   }
431 }
432 
433 std::string
434 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
435 {
436   std::string Ret;
437   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
438     if (Features[KV.Value])
439       Ret += (StringRef("+") + KV.Key + ",").str();
440     else if (TargetFeatures[KV.Value])
441       Ret += (StringRef("-") + KV.Key + ",").str();
442   }
443   Ret.pop_back(); // Remove last comma.
444   return Ret;
445 }
446 
447 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
448   if (!TM) {
449     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
450     if (!TPC)
451       return false;
452 
453     TM = &TPC->getTM<TargetMachine>();
454   }
455 
456   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
457     return false;
458 
459   return AMDGPUPropagateAttributes(TM, false).process(F);
460 }
461 
462 void AMDGPUPropagateAttributesLate::getAnalysisUsage(AnalysisUsage &AU) const {
463   AU.addRequired<CallGraphWrapperPass>();
464 }
465 
466 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
467   if (!TM) {
468     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
469     if (!TPC)
470       return false;
471 
472     TM = &TPC->getTM<TargetMachine>();
473   }
474   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
475   return AMDGPUPropagateAttributes(TM, true).process(M, &CG);
476 }
477 
478 FunctionPass
479 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
480   return new AMDGPUPropagateAttributesEarly(TM);
481 }
482 
483 ModulePass
484 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
485   return new AMDGPUPropagateAttributesLate(TM);
486 }
487 
488 PreservedAnalyses
489 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
490                                         FunctionAnalysisManager &AM) {
491   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
492     return PreservedAnalyses::all();
493 
494   return AMDGPUPropagateAttributes(&TM, false).process(F)
495              ? PreservedAnalyses::none()
496              : PreservedAnalyses::all();
497 }
498 
499 PreservedAnalyses
500 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &MAM) {
501   AMDGPUPropagateAttributes APA(&TM, true);
502   CallGraph &CG = MAM.getResult<CallGraphAnalysis>(M);
503   const bool Changed = APA.process(M, &CG);
504   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
505 }
506