1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attributes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is cloned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/InstrTypes.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38 
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40 
41 using namespace llvm;
42 
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46 
47 namespace {
48 
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51   AMDGPU::FeatureWavefrontSize16,
52   AMDGPU::FeatureWavefrontSize32,
53   AMDGPU::FeatureWavefrontSize64
54 };
55 
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
59 
60 static constexpr unsigned NumAttr =
61   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
62 
63 class AMDGPUPropagateAttributes {
64 
65   class FnProperties {
66   private:
67     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
68 
69   public:
70     explicit FnProperties(const TargetMachine &TM, const Function &F) {
71       Features = TM.getSubtargetImpl(F)->getFeatureBits();
72 
73       for (unsigned I = 0; I < NumAttr; ++I)
74         if (F.hasFnAttribute(AttributeNames[I]))
75           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
76     }
77 
78     bool operator == (const FnProperties &Other) const {
79       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
80         return false;
81       for (unsigned I = 0; I < NumAttr; ++I)
82         if (Attributes[I] != Other.Attributes[I])
83           return false;
84       return true;
85     }
86 
87     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
88       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
89       for (unsigned I = 0; I < NumAttr; ++I)
90         New.Attributes[I] = CallerProps.Attributes[I];
91       return New;
92     }
93 
94     FeatureBitset Features;
95     Optional<Attribute> Attributes[NumAttr];
96   };
97 
98   class Clone {
99   public:
100     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
101       Properties(Props), OrigF(OrigF), NewF(NewF) {}
102 
103     FnProperties Properties;
104     Function *OrigF;
105     Function *NewF;
106   };
107 
108   const TargetMachine *TM;
109 
110   // Clone functions as needed or just set attributes.
111   bool AllowClone;
112 
113   // Option propagation roots.
114   SmallSet<Function *, 32> Roots;
115 
116   // Clones of functions with their attributes.
117   SmallVector<Clone, 32> Clones;
118 
119   // Find a clone with required features.
120   Function *findFunction(const FnProperties &PropsNeeded,
121                          Function *OrigF);
122 
123   // Clone function \p F and set \p NewProps on the clone.
124   // Cole takes the name of original function.
125   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
126 
127   // Set new function's features in place.
128   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
129 
130   // Set new function's attributes in place.
131   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
132 
133   std::string getFeatureString(const FeatureBitset &Features) const;
134 
135   // Propagate attributes from Roots.
136   bool process();
137 
138 public:
139   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
140     TM(TM), AllowClone(AllowClone) {}
141 
142   // Use F as a root and propagate its attributes.
143   bool process(Function &F);
144 
145   // Propagate attributes starting from kernel functions.
146   bool process(Module &M);
147 };
148 
149 // Allows to propagate attributes early, but no cloning is allowed as it must
150 // be a function pass to run before any optimizations.
151 // TODO: We shall only need a one instance of module pass, but that needs to be
152 // in the linker pipeline which is currently not possible.
153 class AMDGPUPropagateAttributesEarly : public FunctionPass {
154   const TargetMachine *TM;
155 
156 public:
157   static char ID; // Pass identification
158 
159   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
160     FunctionPass(ID), TM(TM) {
161     initializeAMDGPUPropagateAttributesEarlyPass(
162       *PassRegistry::getPassRegistry());
163   }
164 
165   bool runOnFunction(Function &F) override;
166 };
167 
168 // Allows to propagate attributes with cloning but does that late in the
169 // pipeline.
170 class AMDGPUPropagateAttributesLate : public ModulePass {
171   const TargetMachine *TM;
172 
173 public:
174   static char ID; // Pass identification
175 
176   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
177     ModulePass(ID), TM(TM) {
178     initializeAMDGPUPropagateAttributesLatePass(
179       *PassRegistry::getPassRegistry());
180   }
181 
182   bool runOnModule(Module &M) override;
183 };
184 
185 }  // end anonymous namespace.
186 
187 char AMDGPUPropagateAttributesEarly::ID = 0;
188 char AMDGPUPropagateAttributesLate::ID = 0;
189 
190 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
191                 "amdgpu-propagate-attributes-early",
192                 "Early propagate attributes from kernels to functions",
193                 false, false)
194 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
195                 "amdgpu-propagate-attributes-late",
196                 "Late propagate attributes from kernels to functions",
197                 false, false)
198 
199 Function *
200 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
201                                         Function *OrigF) {
202   // TODO: search for clone's clones.
203   for (Clone &C : Clones)
204     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
205       return C.NewF;
206 
207   return nullptr;
208 }
209 
210 bool AMDGPUPropagateAttributes::process(Module &M) {
211   for (auto &F : M.functions())
212     if (AMDGPU::isKernel(F.getCallingConv()))
213       Roots.insert(&F);
214 
215   return Roots.empty() ? false : process();
216 }
217 
218 bool AMDGPUPropagateAttributes::process(Function &F) {
219   Roots.insert(&F);
220   return process();
221 }
222 
223 bool AMDGPUPropagateAttributes::process() {
224   bool Changed = false;
225   SmallSet<Function *, 32> NewRoots;
226   SmallSet<Function *, 32> Replaced;
227 
228   assert(!Roots.empty());
229   Module &M = *(*Roots.begin())->getParent();
230 
231   do {
232     Roots.insert(NewRoots.begin(), NewRoots.end());
233     NewRoots.clear();
234 
235     for (auto &F : M.functions()) {
236       if (F.isDeclaration())
237         continue;
238 
239       const FnProperties CalleeProps(*TM, F);
240       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
241       SmallSet<CallBase *, 32> Visited;
242 
243       for (User *U : F.users()) {
244         Instruction *I = dyn_cast<Instruction>(U);
245         if (!I)
246           continue;
247         CallBase *CI = dyn_cast<CallBase>(I);
248         // Only propagate attributes if F is the called function. Specifically,
249         // do not propagate attributes if F is passed as an argument.
250         // FIXME: handle bitcasted callee, e.g.
251         // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
252         if (!CI || CI->getCalledOperand() != &F)
253           continue;
254         Function *Caller = CI->getCaller();
255         if (!Caller || !Visited.insert(CI).second)
256           continue;
257         if (!Roots.count(Caller) && !NewRoots.count(Caller))
258           continue;
259 
260         const FnProperties CallerProps(*TM, *Caller);
261 
262         if (CalleeProps == CallerProps) {
263           if (!Roots.count(&F))
264             NewRoots.insert(&F);
265           continue;
266         }
267 
268         Function *NewF = findFunction(CallerProps, &F);
269         if (!NewF) {
270           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271           if (!AllowClone) {
272             // This may set different features on different iterations if
273             // there is a contradiction in callers' attributes. In this case
274             // we rely on a second pass running on Module, which is allowed
275             // to clone.
276             setFeatures(F, NewProps.Features);
277             setAttributes(F, NewProps.Attributes);
278             NewRoots.insert(&F);
279             Changed = true;
280             break;
281           }
282 
283           NewF = cloneWithProperties(F, NewProps);
284           Clones.push_back(Clone(CallerProps, &F, NewF));
285           NewRoots.insert(NewF);
286         }
287 
288         ToReplace.push_back(std::make_pair(CI, NewF));
289         Replaced.insert(&F);
290 
291         Changed = true;
292       }
293 
294       while (!ToReplace.empty()) {
295         auto R = ToReplace.pop_back_val();
296         R.first->setCalledFunction(R.second);
297       }
298     }
299   } while (!NewRoots.empty());
300 
301   for (Function *F : Replaced) {
302     if (F->use_empty())
303       F->eraseFromParent();
304   }
305 
306   Roots.clear();
307   Clones.clear();
308 
309   return Changed;
310 }
311 
312 Function *
313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314                                                const FnProperties &NewProps) {
315   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316 
317   ValueToValueMapTy dummy;
318   Function *NewF = CloneFunction(&F, dummy);
319   setFeatures(*NewF, NewProps.Features);
320   setAttributes(*NewF, NewProps.Attributes);
321   NewF->setVisibility(GlobalValue::DefaultVisibility);
322   NewF->setLinkage(GlobalValue::InternalLinkage);
323 
324   // Swap names. If that is the only clone it will retain the name of now
325   // dead value. Preserve original name for externally visible functions.
326   if (F.hasName() && F.hasLocalLinkage()) {
327     std::string NewName = std::string(NewF->getName());
328     NewF->takeName(&F);
329     F.setName(NewName);
330   }
331 
332   return NewF;
333 }
334 
335 void AMDGPUPropagateAttributes::setFeatures(Function &F,
336                                             const FeatureBitset &NewFeatures) {
337   std::string NewFeatureStr = getFeatureString(NewFeatures);
338 
339   LLVM_DEBUG(dbgs() << "Set features "
340                     << getFeatureString(NewFeatures & TargetFeatures)
341                     << " on " << F.getName() << '\n');
342 
343   F.removeFnAttr("target-features");
344   F.addFnAttr("target-features", NewFeatureStr);
345 }
346 
347 void AMDGPUPropagateAttributes::setAttributes(Function &F,
348     const ArrayRef<Optional<Attribute>> NewAttrs) {
349   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350   for (unsigned I = 0; I < NumAttr; ++I) {
351     F.removeFnAttr(AttributeNames[I]);
352     if (NewAttrs[I]) {
353       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354       F.addFnAttr(*NewAttrs[I]);
355     }
356   }
357 }
358 
359 std::string
360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361 {
362   std::string Ret;
363   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364     if (Features[KV.Value])
365       Ret += (StringRef("+") + KV.Key + ",").str();
366     else if (TargetFeatures[KV.Value])
367       Ret += (StringRef("-") + KV.Key + ",").str();
368   }
369   Ret.pop_back(); // Remove last comma.
370   return Ret;
371 }
372 
373 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
374   if (!TM) {
375     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
376     if (!TPC)
377       return false;
378 
379     TM = &TPC->getTM<TargetMachine>();
380   }
381 
382   if (!AMDGPU::isKernel(F.getCallingConv()))
383     return false;
384 
385   return AMDGPUPropagateAttributes(TM, false).process(F);
386 }
387 
388 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
389   if (!TM) {
390     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
391     if (!TPC)
392       return false;
393 
394     TM = &TPC->getTM<TargetMachine>();
395   }
396 
397   return AMDGPUPropagateAttributes(TM, true).process(M);
398 }
399 
400 FunctionPass
401 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
402   return new AMDGPUPropagateAttributesEarly(TM);
403 }
404 
405 ModulePass
406 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
407   return new AMDGPUPropagateAttributesLate(TM);
408 }
409 
410 PreservedAnalyses
411 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
412                                         FunctionAnalysisManager &AM) {
413   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
414     return PreservedAnalyses::all();
415 
416   return AMDGPUPropagateAttributes(&TM, false).process(F)
417              ? PreservedAnalyses::none()
418              : PreservedAnalyses::all();
419 }
420 
421 PreservedAnalyses
422 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
423   return AMDGPUPropagateAttributes(&TM, true).process(M)
424              ? PreservedAnalyses::none()
425              : PreservedAnalyses::all();
426 }
427