1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/CodeGen/TargetPassConfig.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include "llvm/Transforms/Utils/Cloning.h"
40 #include <string>
41 
42 #define DEBUG_TYPE "amdgpu-propagate-attributes"
43 
44 using namespace llvm;
45 
46 namespace llvm {
47 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
48 }
49 
50 namespace {
51 
52 // Target features to propagate.
53 static constexpr const FeatureBitset TargetFeatures = {
54   AMDGPU::FeatureWavefrontSize16,
55   AMDGPU::FeatureWavefrontSize32,
56   AMDGPU::FeatureWavefrontSize64
57 };
58 
59 // Attributes to propagate.
60 // TODO: Support conservative min/max merging instead of cloning.
61 static constexpr const char* AttributeNames[] = {
62   "amdgpu-waves-per-eu",
63   "amdgpu-flat-work-group-size"
64 };
65 
66 static constexpr unsigned NumAttr =
67   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
68 
69 class AMDGPUPropagateAttributes {
70 
71   class FnProperties {
72   private:
73     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
74 
75   public:
76     explicit FnProperties(const TargetMachine &TM, const Function &F) {
77       Features = TM.getSubtargetImpl(F)->getFeatureBits();
78 
79       for (unsigned I = 0; I < NumAttr; ++I)
80         if (F.hasFnAttribute(AttributeNames[I]))
81           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
82     }
83 
84     bool operator == (const FnProperties &Other) const {
85       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
86         return false;
87       for (unsigned I = 0; I < NumAttr; ++I)
88         if (Attributes[I] != Other.Attributes[I])
89           return false;
90       return true;
91     }
92 
93     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
94       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
95       for (unsigned I = 0; I < NumAttr; ++I)
96         New.Attributes[I] = CallerProps.Attributes[I];
97       return New;
98     }
99 
100     FeatureBitset Features;
101     Optional<Attribute> Attributes[NumAttr];
102   };
103 
104   class Clone {
105   public:
106     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
107       Properties(Props), OrigF(OrigF), NewF(NewF) {}
108 
109     FnProperties Properties;
110     Function *OrigF;
111     Function *NewF;
112   };
113 
114   const TargetMachine *TM;
115 
116   // Clone functions as needed or just set attributes.
117   bool AllowClone;
118 
119   // Option propagation roots.
120   SmallSet<Function *, 32> Roots;
121 
122   // Clones of functions with their attributes.
123   SmallVector<Clone, 32> Clones;
124 
125   // Find a clone with required features.
126   Function *findFunction(const FnProperties &PropsNeeded,
127                          Function *OrigF);
128 
129   // Clone function \p F and set \p NewProps on the clone.
130   // Cole takes the name of original function.
131   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
132 
133   // Set new function's features in place.
134   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
135 
136   // Set new function's attributes in place.
137   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
138 
139   std::string getFeatureString(const FeatureBitset &Features) const;
140 
141   // Propagate attributes from Roots.
142   bool process();
143 
144 public:
145   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
146     TM(TM), AllowClone(AllowClone) {}
147 
148   // Use F as a root and propagate its attributes.
149   bool process(Function &F);
150 
151   // Propagate attributes starting from kernel functions.
152   bool process(Module &M);
153 };
154 
155 // Allows to propagate attributes early, but no clonning is allowed as it must
156 // be a function pass to run before any optimizations.
157 // TODO: We shall only need a one instance of module pass, but that needs to be
158 // in the linker pipeline which is currently not possible.
159 class AMDGPUPropagateAttributesEarly : public FunctionPass {
160   const TargetMachine *TM;
161 
162 public:
163   static char ID; // Pass identification
164 
165   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
166     FunctionPass(ID), TM(TM) {
167     initializeAMDGPUPropagateAttributesEarlyPass(
168       *PassRegistry::getPassRegistry());
169   }
170 
171   bool runOnFunction(Function &F) override;
172 };
173 
174 // Allows to propagate attributes with clonning but does that late in the
175 // pipeline.
176 class AMDGPUPropagateAttributesLate : public ModulePass {
177   const TargetMachine *TM;
178 
179 public:
180   static char ID; // Pass identification
181 
182   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
183     ModulePass(ID), TM(TM) {
184     initializeAMDGPUPropagateAttributesLatePass(
185       *PassRegistry::getPassRegistry());
186   }
187 
188   bool runOnModule(Module &M) override;
189 };
190 
191 }  // end anonymous namespace.
192 
193 char AMDGPUPropagateAttributesEarly::ID = 0;
194 char AMDGPUPropagateAttributesLate::ID = 0;
195 
196 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
197                 "amdgpu-propagate-attributes-early",
198                 "Early propagate attributes from kernels to functions",
199                 false, false)
200 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
201                 "amdgpu-propagate-attributes-late",
202                 "Late propagate attributes from kernels to functions",
203                 false, false)
204 
205 Function *
206 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
207                                         Function *OrigF) {
208   // TODO: search for clone's clones.
209   for (Clone &C : Clones)
210     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
211       return C.NewF;
212 
213   return nullptr;
214 }
215 
216 bool AMDGPUPropagateAttributes::process(Module &M) {
217   for (auto &F : M.functions())
218     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
219       Roots.insert(&F);
220 
221   return process();
222 }
223 
224 bool AMDGPUPropagateAttributes::process(Function &F) {
225   Roots.insert(&F);
226   return process();
227 }
228 
229 bool AMDGPUPropagateAttributes::process() {
230   bool Changed = false;
231   SmallSet<Function *, 32> NewRoots;
232   SmallSet<Function *, 32> Replaced;
233 
234   if (Roots.empty())
235     return false;
236   Module &M = *(*Roots.begin())->getParent();
237 
238   do {
239     Roots.insert(NewRoots.begin(), NewRoots.end());
240     NewRoots.clear();
241 
242     for (auto &F : M.functions()) {
243       if (F.isDeclaration())
244         continue;
245 
246       const FnProperties CalleeProps(*TM, F);
247       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
248       SmallSet<CallBase *, 32> Visited;
249 
250       for (User *U : F.users()) {
251         Instruction *I = dyn_cast<Instruction>(U);
252         if (!I)
253           continue;
254         CallBase *CI = dyn_cast<CallBase>(I);
255         if (!CI)
256           continue;
257         Function *Caller = CI->getCaller();
258         if (!Caller || !Visited.insert(CI).second)
259           continue;
260         if (!Roots.count(Caller) && !NewRoots.count(Caller))
261           continue;
262 
263         const FnProperties CallerProps(*TM, *Caller);
264 
265         if (CalleeProps == CallerProps) {
266           if (!Roots.count(&F))
267             NewRoots.insert(&F);
268           continue;
269         }
270 
271         Function *NewF = findFunction(CallerProps, &F);
272         if (!NewF) {
273           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
274           if (!AllowClone) {
275             // This may set different features on different iteartions if
276             // there is a contradiction in callers' attributes. In this case
277             // we rely on a second pass running on Module, which is allowed
278             // to clone.
279             setFeatures(F, NewProps.Features);
280             setAttributes(F, NewProps.Attributes);
281             NewRoots.insert(&F);
282             Changed = true;
283             break;
284           }
285 
286           NewF = cloneWithProperties(F, NewProps);
287           Clones.push_back(Clone(CallerProps, &F, NewF));
288           NewRoots.insert(NewF);
289         }
290 
291         ToReplace.push_back(std::make_pair(CI, NewF));
292         Replaced.insert(&F);
293 
294         Changed = true;
295       }
296 
297       while (!ToReplace.empty()) {
298         auto R = ToReplace.pop_back_val();
299         R.first->setCalledFunction(R.second);
300       }
301     }
302   } while (!NewRoots.empty());
303 
304   for (Function *F : Replaced) {
305     if (F->use_empty())
306       F->eraseFromParent();
307   }
308 
309   Roots.clear();
310   Clones.clear();
311 
312   return Changed;
313 }
314 
315 Function *
316 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
317                                                const FnProperties &NewProps) {
318   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
319 
320   ValueToValueMapTy dummy;
321   Function *NewF = CloneFunction(&F, dummy);
322   setFeatures(*NewF, NewProps.Features);
323   setAttributes(*NewF, NewProps.Attributes);
324   NewF->setVisibility(GlobalValue::DefaultVisibility);
325   NewF->setLinkage(GlobalValue::InternalLinkage);
326 
327   // Swap names. If that is the only clone it will retain the name of now
328   // dead value. Preserve original name for externally visible functions.
329   if (F.hasName() && F.hasLocalLinkage()) {
330     std::string NewName = std::string(NewF->getName());
331     NewF->takeName(&F);
332     F.setName(NewName);
333   }
334 
335   return NewF;
336 }
337 
338 void AMDGPUPropagateAttributes::setFeatures(Function &F,
339                                             const FeatureBitset &NewFeatures) {
340   std::string NewFeatureStr = getFeatureString(NewFeatures);
341 
342   LLVM_DEBUG(dbgs() << "Set features "
343                     << getFeatureString(NewFeatures & TargetFeatures)
344                     << " on " << F.getName() << '\n');
345 
346   F.removeFnAttr("target-features");
347   F.addFnAttr("target-features", NewFeatureStr);
348 }
349 
350 void AMDGPUPropagateAttributes::setAttributes(Function &F,
351     const ArrayRef<Optional<Attribute>> NewAttrs) {
352   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
353   for (unsigned I = 0; I < NumAttr; ++I) {
354     F.removeFnAttr(AttributeNames[I]);
355     if (NewAttrs[I]) {
356       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
357       F.addFnAttr(*NewAttrs[I]);
358     }
359   }
360 }
361 
362 std::string
363 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
364 {
365   std::string Ret;
366   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
367     if (Features[KV.Value])
368       Ret += (StringRef("+") + KV.Key + ",").str();
369     else if (TargetFeatures[KV.Value])
370       Ret += (StringRef("-") + KV.Key + ",").str();
371   }
372   Ret.pop_back(); // Remove last comma.
373   return Ret;
374 }
375 
376 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
377   if (!TM) {
378     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
379     if (!TPC)
380       return false;
381 
382     TM = &TPC->getTM<TargetMachine>();
383   }
384 
385   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
386     return false;
387 
388   return AMDGPUPropagateAttributes(TM, false).process(F);
389 }
390 
391 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
392   if (!TM) {
393     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
394     if (!TPC)
395       return false;
396 
397     TM = &TPC->getTM<TargetMachine>();
398   }
399 
400   return AMDGPUPropagateAttributes(TM, true).process(M);
401 }
402 
403 FunctionPass
404 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
405   return new AMDGPUPropagateAttributesEarly(TM);
406 }
407 
408 ModulePass
409 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
410   return new AMDGPUPropagateAttributesLate(TM);
411 }
412