1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/CodeGen/TargetPassConfig.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include "llvm/Transforms/Utils/Cloning.h"
35 #define DEBUG_TYPE "amdgpu-propagate-attributes"
36 
37 using namespace llvm;
38 
39 namespace llvm {
40 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
41 }
42 
43 namespace {
44 
45 // Target features to propagate.
46 static constexpr const FeatureBitset TargetFeatures = {
47   AMDGPU::FeatureWavefrontSize16,
48   AMDGPU::FeatureWavefrontSize32,
49   AMDGPU::FeatureWavefrontSize64
50 };
51 
52 // Attributes to propagate.
53 // TODO: Support conservative min/max merging instead of cloning.
54 static constexpr const char* AttributeNames[] = {
55   "amdgpu-waves-per-eu",
56   "amdgpu-flat-work-group-size"
57 };
58 
59 static constexpr unsigned NumAttr =
60   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
61 
62 class AMDGPUPropagateAttributes {
63 
64   class FnProperties {
65   private:
66     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
67 
68   public:
69     explicit FnProperties(const TargetMachine &TM, const Function &F) {
70       Features = TM.getSubtargetImpl(F)->getFeatureBits();
71 
72       for (unsigned I = 0; I < NumAttr; ++I)
73         if (F.hasFnAttribute(AttributeNames[I]))
74           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
75     }
76 
77     bool operator == (const FnProperties &Other) const {
78       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
79         return false;
80       for (unsigned I = 0; I < NumAttr; ++I)
81         if (Attributes[I] != Other.Attributes[I])
82           return false;
83       return true;
84     }
85 
86     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
87       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
88       for (unsigned I = 0; I < NumAttr; ++I)
89         New.Attributes[I] = CallerProps.Attributes[I];
90       return New;
91     }
92 
93     FeatureBitset Features;
94     Optional<Attribute> Attributes[NumAttr];
95   };
96 
97   class Clone {
98   public:
99     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
100       Properties(Props), OrigF(OrigF), NewF(NewF) {}
101 
102     FnProperties Properties;
103     Function *OrigF;
104     Function *NewF;
105   };
106 
107   const TargetMachine *TM;
108 
109   // Clone functions as needed or just set attributes.
110   bool AllowClone;
111 
112   // Option propagation roots.
113   SmallSet<Function *, 32> Roots;
114 
115   // Clones of functions with their attributes.
116   SmallVector<Clone, 32> Clones;
117 
118   // Find a clone with required features.
119   Function *findFunction(const FnProperties &PropsNeeded,
120                          Function *OrigF);
121 
122   // Clone function \p F and set \p NewProps on the clone.
123   // Cole takes the name of original function.
124   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
125 
126   // Set new function's features in place.
127   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
128 
129   // Set new function's attributes in place.
130   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
131 
132   std::string getFeatureString(const FeatureBitset &Features) const;
133 
134   // Propagate attributes from Roots.
135   bool process();
136 
137 public:
138   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
139     TM(TM), AllowClone(AllowClone) {}
140 
141   // Use F as a root and propagate its attributes.
142   bool process(Function &F);
143 
144   // Propagate attributes starting from kernel functions.
145   bool process(Module &M);
146 };
147 
148 // Allows to propagate attributes early, but no clonning is allowed as it must
149 // be a function pass to run before any optimizations.
150 // TODO: We shall only need a one instance of module pass, but that needs to be
151 // in the linker pipeline which is currently not possible.
152 class AMDGPUPropagateAttributesEarly : public FunctionPass {
153   const TargetMachine *TM;
154 
155 public:
156   static char ID; // Pass identification
157 
158   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
159     FunctionPass(ID), TM(TM) {
160     initializeAMDGPUPropagateAttributesEarlyPass(
161       *PassRegistry::getPassRegistry());
162   }
163 
164   bool runOnFunction(Function &F) override;
165 };
166 
167 // Allows to propagate attributes with clonning but does that late in the
168 // pipeline.
169 class AMDGPUPropagateAttributesLate : public ModulePass {
170   const TargetMachine *TM;
171 
172 public:
173   static char ID; // Pass identification
174 
175   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
176     ModulePass(ID), TM(TM) {
177     initializeAMDGPUPropagateAttributesLatePass(
178       *PassRegistry::getPassRegistry());
179   }
180 
181   bool runOnModule(Module &M) override;
182 };
183 
184 }  // end anonymous namespace.
185 
186 char AMDGPUPropagateAttributesEarly::ID = 0;
187 char AMDGPUPropagateAttributesLate::ID = 0;
188 
189 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
190                 "amdgpu-propagate-attributes-early",
191                 "Early propagate attributes from kernels to functions",
192                 false, false)
193 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
194                 "amdgpu-propagate-attributes-late",
195                 "Late propagate attributes from kernels to functions",
196                 false, false)
197 
198 Function *
199 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
200                                         Function *OrigF) {
201   // TODO: search for clone's clones.
202   for (Clone &C : Clones)
203     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
204       return C.NewF;
205 
206   return nullptr;
207 }
208 
209 bool AMDGPUPropagateAttributes::process(Module &M) {
210   for (auto &F : M.functions())
211     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
212       Roots.insert(&F);
213 
214   return process();
215 }
216 
217 bool AMDGPUPropagateAttributes::process(Function &F) {
218   Roots.insert(&F);
219   return process();
220 }
221 
222 bool AMDGPUPropagateAttributes::process() {
223   bool Changed = false;
224   SmallSet<Function *, 32> NewRoots;
225   SmallSet<Function *, 32> Replaced;
226 
227   if (Roots.empty())
228     return false;
229   Module &M = *(*Roots.begin())->getParent();
230 
231   do {
232     Roots.insert(NewRoots.begin(), NewRoots.end());
233     NewRoots.clear();
234 
235     for (auto &F : M.functions()) {
236       if (F.isDeclaration())
237         continue;
238 
239       const FnProperties CalleeProps(*TM, F);
240       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
241       SmallSet<CallBase *, 32> Visited;
242 
243       for (User *U : F.users()) {
244         Instruction *I = dyn_cast<Instruction>(U);
245         if (!I)
246           continue;
247         CallBase *CI = dyn_cast<CallBase>(I);
248         if (!CI)
249           continue;
250         Function *Caller = CI->getCaller();
251         if (!Caller || !Visited.insert(CI).second)
252           continue;
253         if (!Roots.count(Caller) && !NewRoots.count(Caller))
254           continue;
255 
256         const FnProperties CallerProps(*TM, *Caller);
257 
258         if (CalleeProps == CallerProps) {
259           if (!Roots.count(&F))
260             NewRoots.insert(&F);
261           continue;
262         }
263 
264         Function *NewF = findFunction(CallerProps, &F);
265         if (!NewF) {
266           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
267           if (!AllowClone) {
268             // This may set different features on different iteartions if
269             // there is a contradiction in callers' attributes. In this case
270             // we rely on a second pass running on Module, which is allowed
271             // to clone.
272             setFeatures(F, NewProps.Features);
273             setAttributes(F, NewProps.Attributes);
274             NewRoots.insert(&F);
275             Changed = true;
276             break;
277           }
278 
279           NewF = cloneWithProperties(F, NewProps);
280           Clones.push_back(Clone(CallerProps, &F, NewF));
281           NewRoots.insert(NewF);
282         }
283 
284         ToReplace.push_back(std::make_pair(CI, NewF));
285         Replaced.insert(&F);
286 
287         Changed = true;
288       }
289 
290       while (!ToReplace.empty()) {
291         auto R = ToReplace.pop_back_val();
292         R.first->setCalledFunction(R.second);
293       }
294     }
295   } while (!NewRoots.empty());
296 
297   for (Function *F : Replaced) {
298     if (F->use_empty())
299       F->eraseFromParent();
300   }
301 
302   Roots.clear();
303   Clones.clear();
304 
305   return Changed;
306 }
307 
308 Function *
309 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
310                                                const FnProperties &NewProps) {
311   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
312 
313   ValueToValueMapTy dummy;
314   Function *NewF = CloneFunction(&F, dummy);
315   setFeatures(*NewF, NewProps.Features);
316   setAttributes(*NewF, NewProps.Attributes);
317   NewF->setVisibility(GlobalValue::DefaultVisibility);
318   NewF->setLinkage(GlobalValue::InternalLinkage);
319 
320   // Swap names. If that is the only clone it will retain the name of now
321   // dead value. Preserve original name for externally visible functions.
322   if (F.hasName() && F.hasLocalLinkage()) {
323     std::string NewName = std::string(NewF->getName());
324     NewF->takeName(&F);
325     F.setName(NewName);
326   }
327 
328   return NewF;
329 }
330 
331 void AMDGPUPropagateAttributes::setFeatures(Function &F,
332                                             const FeatureBitset &NewFeatures) {
333   std::string NewFeatureStr = getFeatureString(NewFeatures);
334 
335   LLVM_DEBUG(dbgs() << "Set features "
336                     << getFeatureString(NewFeatures & TargetFeatures)
337                     << " on " << F.getName() << '\n');
338 
339   F.removeFnAttr("target-features");
340   F.addFnAttr("target-features", NewFeatureStr);
341 }
342 
343 void AMDGPUPropagateAttributes::setAttributes(Function &F,
344     const ArrayRef<Optional<Attribute>> NewAttrs) {
345   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
346   for (unsigned I = 0; I < NumAttr; ++I) {
347     F.removeFnAttr(AttributeNames[I]);
348     if (NewAttrs[I]) {
349       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
350       F.addFnAttr(*NewAttrs[I]);
351     }
352   }
353 }
354 
355 std::string
356 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
357 {
358   std::string Ret;
359   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
360     if (Features[KV.Value])
361       Ret += (StringRef("+") + KV.Key + ",").str();
362     else if (TargetFeatures[KV.Value])
363       Ret += (StringRef("-") + KV.Key + ",").str();
364   }
365   Ret.pop_back(); // Remove last comma.
366   return Ret;
367 }
368 
369 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
370   if (!TM) {
371     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
372     if (!TPC)
373       return false;
374 
375     TM = &TPC->getTM<TargetMachine>();
376   }
377 
378   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
379     return false;
380 
381   return AMDGPUPropagateAttributes(TM, false).process(F);
382 }
383 
384 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
385   if (!TM) {
386     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
387     if (!TPC)
388       return false;
389 
390     TM = &TPC->getTM<TargetMachine>();
391   }
392 
393   return AMDGPUPropagateAttributes(TM, true).process(M);
394 }
395 
396 FunctionPass
397 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
398   return new AMDGPUPropagateAttributesEarly(TM);
399 }
400 
401 ModulePass
402 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
403   return new AMDGPUPropagateAttributesLate(TM);
404 }
405 
406 PreservedAnalyses
407 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
408                                         FunctionAnalysisManager &AM) {
409   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
410     return PreservedAnalyses::all();
411 
412   return AMDGPUPropagateAttributes(&TM, false).process(F)
413              ? PreservedAnalyses::none()
414              : PreservedAnalyses::all();
415 }
416 
417 PreservedAnalyses
418 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
419   return AMDGPUPropagateAttributes(&TM, true).process(M)
420              ? PreservedAnalyses::none()
421              : PreservedAnalyses::all();
422 }
423