1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/CodeGen/TargetPassConfig.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/PassManager.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Transforms/Utils/Cloning.h"
41 #include <string>
42 
43 #define DEBUG_TYPE "amdgpu-propagate-attributes"
44 
45 using namespace llvm;
46 
47 namespace llvm {
48 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
49 }
50 
51 namespace {
52 
53 // Target features to propagate.
54 static constexpr const FeatureBitset TargetFeatures = {
55   AMDGPU::FeatureWavefrontSize16,
56   AMDGPU::FeatureWavefrontSize32,
57   AMDGPU::FeatureWavefrontSize64
58 };
59 
60 // Attributes to propagate.
61 // TODO: Support conservative min/max merging instead of cloning.
62 static constexpr const char* AttributeNames[] = {
63   "amdgpu-waves-per-eu",
64   "amdgpu-flat-work-group-size"
65 };
66 
67 static constexpr unsigned NumAttr =
68   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
69 
70 class AMDGPUPropagateAttributes {
71 
72   class FnProperties {
73   private:
74     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
75 
76   public:
77     explicit FnProperties(const TargetMachine &TM, const Function &F) {
78       Features = TM.getSubtargetImpl(F)->getFeatureBits();
79 
80       for (unsigned I = 0; I < NumAttr; ++I)
81         if (F.hasFnAttribute(AttributeNames[I]))
82           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
83     }
84 
85     bool operator == (const FnProperties &Other) const {
86       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
87         return false;
88       for (unsigned I = 0; I < NumAttr; ++I)
89         if (Attributes[I] != Other.Attributes[I])
90           return false;
91       return true;
92     }
93 
94     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
95       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
96       for (unsigned I = 0; I < NumAttr; ++I)
97         New.Attributes[I] = CallerProps.Attributes[I];
98       return New;
99     }
100 
101     FeatureBitset Features;
102     Optional<Attribute> Attributes[NumAttr];
103   };
104 
105   class Clone {
106   public:
107     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
108       Properties(Props), OrigF(OrigF), NewF(NewF) {}
109 
110     FnProperties Properties;
111     Function *OrigF;
112     Function *NewF;
113   };
114 
115   const TargetMachine *TM;
116 
117   // Clone functions as needed or just set attributes.
118   bool AllowClone;
119 
120   // Option propagation roots.
121   SmallSet<Function *, 32> Roots;
122 
123   // Clones of functions with their attributes.
124   SmallVector<Clone, 32> Clones;
125 
126   // Find a clone with required features.
127   Function *findFunction(const FnProperties &PropsNeeded,
128                          Function *OrigF);
129 
130   // Clone function \p F and set \p NewProps on the clone.
131   // Cole takes the name of original function.
132   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
133 
134   // Set new function's features in place.
135   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
136 
137   // Set new function's attributes in place.
138   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
139 
140   std::string getFeatureString(const FeatureBitset &Features) const;
141 
142   // Propagate attributes from Roots.
143   bool process();
144 
145 public:
146   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
147     TM(TM), AllowClone(AllowClone) {}
148 
149   // Use F as a root and propagate its attributes.
150   bool process(Function &F);
151 
152   // Propagate attributes starting from kernel functions.
153   bool process(Module &M);
154 };
155 
156 // Allows to propagate attributes early, but no clonning is allowed as it must
157 // be a function pass to run before any optimizations.
158 // TODO: We shall only need a one instance of module pass, but that needs to be
159 // in the linker pipeline which is currently not possible.
160 class AMDGPUPropagateAttributesEarly : public FunctionPass {
161   const TargetMachine *TM;
162 
163 public:
164   static char ID; // Pass identification
165 
166   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
167     FunctionPass(ID), TM(TM) {
168     initializeAMDGPUPropagateAttributesEarlyPass(
169       *PassRegistry::getPassRegistry());
170   }
171 
172   bool runOnFunction(Function &F) override;
173 };
174 
175 // Allows to propagate attributes with clonning but does that late in the
176 // pipeline.
177 class AMDGPUPropagateAttributesLate : public ModulePass {
178   const TargetMachine *TM;
179 
180 public:
181   static char ID; // Pass identification
182 
183   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
184     ModulePass(ID), TM(TM) {
185     initializeAMDGPUPropagateAttributesLatePass(
186       *PassRegistry::getPassRegistry());
187   }
188 
189   bool runOnModule(Module &M) override;
190 };
191 
192 }  // end anonymous namespace.
193 
194 char AMDGPUPropagateAttributesEarly::ID = 0;
195 char AMDGPUPropagateAttributesLate::ID = 0;
196 
197 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
198                 "amdgpu-propagate-attributes-early",
199                 "Early propagate attributes from kernels to functions",
200                 false, false)
201 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
202                 "amdgpu-propagate-attributes-late",
203                 "Late propagate attributes from kernels to functions",
204                 false, false)
205 
206 Function *
207 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
208                                         Function *OrigF) {
209   // TODO: search for clone's clones.
210   for (Clone &C : Clones)
211     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
212       return C.NewF;
213 
214   return nullptr;
215 }
216 
217 bool AMDGPUPropagateAttributes::process(Module &M) {
218   for (auto &F : M.functions())
219     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
220       Roots.insert(&F);
221 
222   return process();
223 }
224 
225 bool AMDGPUPropagateAttributes::process(Function &F) {
226   Roots.insert(&F);
227   return process();
228 }
229 
230 bool AMDGPUPropagateAttributes::process() {
231   bool Changed = false;
232   SmallSet<Function *, 32> NewRoots;
233   SmallSet<Function *, 32> Replaced;
234 
235   if (Roots.empty())
236     return false;
237   Module &M = *(*Roots.begin())->getParent();
238 
239   do {
240     Roots.insert(NewRoots.begin(), NewRoots.end());
241     NewRoots.clear();
242 
243     for (auto &F : M.functions()) {
244       if (F.isDeclaration())
245         continue;
246 
247       const FnProperties CalleeProps(*TM, F);
248       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
249       SmallSet<CallBase *, 32> Visited;
250 
251       for (User *U : F.users()) {
252         Instruction *I = dyn_cast<Instruction>(U);
253         if (!I)
254           continue;
255         CallBase *CI = dyn_cast<CallBase>(I);
256         if (!CI)
257           continue;
258         Function *Caller = CI->getCaller();
259         if (!Caller || !Visited.insert(CI).second)
260           continue;
261         if (!Roots.count(Caller) && !NewRoots.count(Caller))
262           continue;
263 
264         const FnProperties CallerProps(*TM, *Caller);
265 
266         if (CalleeProps == CallerProps) {
267           if (!Roots.count(&F))
268             NewRoots.insert(&F);
269           continue;
270         }
271 
272         Function *NewF = findFunction(CallerProps, &F);
273         if (!NewF) {
274           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
275           if (!AllowClone) {
276             // This may set different features on different iteartions if
277             // there is a contradiction in callers' attributes. In this case
278             // we rely on a second pass running on Module, which is allowed
279             // to clone.
280             setFeatures(F, NewProps.Features);
281             setAttributes(F, NewProps.Attributes);
282             NewRoots.insert(&F);
283             Changed = true;
284             break;
285           }
286 
287           NewF = cloneWithProperties(F, NewProps);
288           Clones.push_back(Clone(CallerProps, &F, NewF));
289           NewRoots.insert(NewF);
290         }
291 
292         ToReplace.push_back(std::make_pair(CI, NewF));
293         Replaced.insert(&F);
294 
295         Changed = true;
296       }
297 
298       while (!ToReplace.empty()) {
299         auto R = ToReplace.pop_back_val();
300         R.first->setCalledFunction(R.second);
301       }
302     }
303   } while (!NewRoots.empty());
304 
305   for (Function *F : Replaced) {
306     if (F->use_empty())
307       F->eraseFromParent();
308   }
309 
310   Roots.clear();
311   Clones.clear();
312 
313   return Changed;
314 }
315 
316 Function *
317 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
318                                                const FnProperties &NewProps) {
319   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
320 
321   ValueToValueMapTy dummy;
322   Function *NewF = CloneFunction(&F, dummy);
323   setFeatures(*NewF, NewProps.Features);
324   setAttributes(*NewF, NewProps.Attributes);
325   NewF->setVisibility(GlobalValue::DefaultVisibility);
326   NewF->setLinkage(GlobalValue::InternalLinkage);
327 
328   // Swap names. If that is the only clone it will retain the name of now
329   // dead value. Preserve original name for externally visible functions.
330   if (F.hasName() && F.hasLocalLinkage()) {
331     std::string NewName = std::string(NewF->getName());
332     NewF->takeName(&F);
333     F.setName(NewName);
334   }
335 
336   return NewF;
337 }
338 
339 void AMDGPUPropagateAttributes::setFeatures(Function &F,
340                                             const FeatureBitset &NewFeatures) {
341   std::string NewFeatureStr = getFeatureString(NewFeatures);
342 
343   LLVM_DEBUG(dbgs() << "Set features "
344                     << getFeatureString(NewFeatures & TargetFeatures)
345                     << " on " << F.getName() << '\n');
346 
347   F.removeFnAttr("target-features");
348   F.addFnAttr("target-features", NewFeatureStr);
349 }
350 
351 void AMDGPUPropagateAttributes::setAttributes(Function &F,
352     const ArrayRef<Optional<Attribute>> NewAttrs) {
353   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
354   for (unsigned I = 0; I < NumAttr; ++I) {
355     F.removeFnAttr(AttributeNames[I]);
356     if (NewAttrs[I]) {
357       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
358       F.addFnAttr(*NewAttrs[I]);
359     }
360   }
361 }
362 
363 std::string
364 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
365 {
366   std::string Ret;
367   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
368     if (Features[KV.Value])
369       Ret += (StringRef("+") + KV.Key + ",").str();
370     else if (TargetFeatures[KV.Value])
371       Ret += (StringRef("-") + KV.Key + ",").str();
372   }
373   Ret.pop_back(); // Remove last comma.
374   return Ret;
375 }
376 
377 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
378   if (!TM) {
379     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
380     if (!TPC)
381       return false;
382 
383     TM = &TPC->getTM<TargetMachine>();
384   }
385 
386   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
387     return false;
388 
389   return AMDGPUPropagateAttributes(TM, false).process(F);
390 }
391 
392 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
393   if (!TM) {
394     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
395     if (!TPC)
396       return false;
397 
398     TM = &TPC->getTM<TargetMachine>();
399   }
400 
401   return AMDGPUPropagateAttributes(TM, true).process(M);
402 }
403 
404 FunctionPass
405 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
406   return new AMDGPUPropagateAttributesEarly(TM);
407 }
408 
409 ModulePass
410 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
411   return new AMDGPUPropagateAttributesLate(TM);
412 }
413 
414 PreservedAnalyses
415 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
416                                         FunctionAnalysisManager &AM) {
417   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
418     return PreservedAnalyses::all();
419 
420   return AMDGPUPropagateAttributes(&TM, false).process(F)
421              ? PreservedAnalyses::none()
422              : PreservedAnalyses::all();
423 }
424 
425 PreservedAnalyses
426 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
427   return AMDGPUPropagateAttributes(&TM, true).process(M)
428              ? PreservedAnalyses::none()
429              : PreservedAnalyses::all();
430 }
431