1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/Cloning.h"
39 #include <string>
40 
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42 
43 using namespace llvm;
44 
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48 
49 namespace {
50 
51 class AMDGPUPropagateAttributes {
52   const FeatureBitset TargetFeatures = {
53     AMDGPU::FeatureWavefrontSize16,
54     AMDGPU::FeatureWavefrontSize32,
55     AMDGPU::FeatureWavefrontSize64
56   };
57 
58   class Clone{
59   public:
60     Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61       FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62 
63     FeatureBitset FeatureMask;
64     Function *OrigF;
65     Function *NewF;
66   };
67 
68   const TargetMachine *TM;
69 
70   // Clone functions as needed or just set attributes.
71   bool AllowClone;
72 
73   // Option propagation roots.
74   SmallSet<Function *, 32> Roots;
75 
76   // Clones of functions with their attributes.
77   SmallVector<Clone, 32> Clones;
78 
79   // Find a clone with required features.
80   Function *findFunction(const FeatureBitset &FeaturesNeeded,
81                          Function *OrigF);
82 
83   // Clone function F and set NewFeatures on the clone.
84   // Cole takes the name of original function.
85   Function *cloneWithFeatures(Function &F,
86                               const FeatureBitset &NewFeatures);
87 
88   // Set new function's features in place.
89   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90 
91   std::string getFeatureString(const FeatureBitset &Features) const;
92 
93   // Propagate attributes from Roots.
94   bool process();
95 
96 public:
97   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98     TM(TM), AllowClone(AllowClone) {}
99 
100   // Use F as a root and propagate its attributes.
101   bool process(Function &F);
102 
103   // Propagate attributes starting from kernel functions.
104   bool process(Module &M);
105 };
106 
107 // Allows to propagate attributes early, but no clonning is allowed as it must
108 // be a function pass to run before any optimizations.
109 // TODO: We shall only need a one instance of module pass, but that needs to be
110 // in the linker pipeline which is currently not possible.
111 class AMDGPUPropagateAttributesEarly : public FunctionPass {
112   const TargetMachine *TM;
113 
114 public:
115   static char ID; // Pass identification
116 
117   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118     FunctionPass(ID), TM(TM) {
119     initializeAMDGPUPropagateAttributesEarlyPass(
120       *PassRegistry::getPassRegistry());
121   }
122 
123   bool runOnFunction(Function &F) override;
124 };
125 
126 // Allows to propagate attributes with clonning but does that late in the
127 // pipeline.
128 class AMDGPUPropagateAttributesLate : public ModulePass {
129   const TargetMachine *TM;
130 
131 public:
132   static char ID; // Pass identification
133 
134   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135     ModulePass(ID), TM(TM) {
136     initializeAMDGPUPropagateAttributesLatePass(
137       *PassRegistry::getPassRegistry());
138   }
139 
140   bool runOnModule(Module &M) override;
141 };
142 
143 }  // end anonymous namespace.
144 
145 char AMDGPUPropagateAttributesEarly::ID = 0;
146 char AMDGPUPropagateAttributesLate::ID = 0;
147 
148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149                 "amdgpu-propagate-attributes-early",
150                 "Early propagate attributes from kernels to functions",
151                 false, false)
152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153                 "amdgpu-propagate-attributes-late",
154                 "Late propagate attributes from kernels to functions",
155                 false, false)
156 
157 Function *
158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159                                         Function *OrigF) {
160   // TODO: search for clone's clones.
161   for (Clone &C : Clones)
162     if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
163       return C.NewF;
164 
165   return nullptr;
166 }
167 
168 bool AMDGPUPropagateAttributes::process(Module &M) {
169   for (auto &F : M.functions())
170     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171       Roots.insert(&F);
172 
173   return process();
174 }
175 
176 bool AMDGPUPropagateAttributes::process(Function &F) {
177   Roots.insert(&F);
178   return process();
179 }
180 
181 bool AMDGPUPropagateAttributes::process() {
182   bool Changed = false;
183   SmallSet<Function *, 32> NewRoots;
184   SmallSet<Function *, 32> Replaced;
185 
186   if (Roots.empty())
187     return false;
188   Module &M = *(*Roots.begin())->getParent();
189 
190   do {
191     Roots.insert(NewRoots.begin(), NewRoots.end());
192     NewRoots.clear();
193 
194     for (auto &F : M.functions()) {
195       if (F.isDeclaration())
196         continue;
197 
198       const FeatureBitset &CalleeBits =
199         TM->getSubtargetImpl(F)->getFeatureBits();
200       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
201       SmallSet<CallBase *, 32> Visited;
202 
203       for (User *U : F.users()) {
204         Instruction *I = dyn_cast<Instruction>(U);
205         if (!I)
206           continue;
207         CallBase *CI = dyn_cast<CallBase>(I);
208         if (!CI)
209           continue;
210         Function *Caller = CI->getCaller();
211         if (!Caller || !Visited.insert(CI).second)
212           continue;
213         if (!Roots.count(Caller) && !NewRoots.count(Caller))
214           continue;
215 
216         const FeatureBitset &CallerBits =
217           TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
218 
219         if (CallerBits == (CalleeBits  & TargetFeatures)) {
220           if (!Roots.count(&F))
221             NewRoots.insert(&F);
222           continue;
223         }
224 
225         Function *NewF = findFunction(CallerBits, &F);
226         if (!NewF) {
227           FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
228                                     CallerBits);
229           if (!AllowClone) {
230             // This may set different features on different iteartions if
231             // there is a contradiction in callers' attributes. In this case
232             // we rely on a second pass running on Module, which is allowed
233             // to clone.
234             setFeatures(F, NewFeatures);
235             NewRoots.insert(&F);
236             Changed = true;
237             break;
238           }
239 
240           NewF = cloneWithFeatures(F, NewFeatures);
241           Clones.push_back(Clone(CallerBits, &F, NewF));
242           NewRoots.insert(NewF);
243         }
244 
245         ToReplace.push_back(std::make_pair(CI, NewF));
246         Replaced.insert(&F);
247 
248         Changed = true;
249       }
250 
251       while (!ToReplace.empty()) {
252         auto R = ToReplace.pop_back_val();
253         R.first->setCalledFunction(R.second);
254       }
255     }
256   } while (!NewRoots.empty());
257 
258   for (Function *F : Replaced) {
259     if (F->use_empty())
260       F->eraseFromParent();
261   }
262 
263   Roots.clear();
264   Clones.clear();
265 
266   return Changed;
267 }
268 
269 Function *
270 AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
271                                              const FeatureBitset &NewFeatures) {
272   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
273 
274   ValueToValueMapTy dummy;
275   Function *NewF = CloneFunction(&F, dummy);
276   setFeatures(*NewF, NewFeatures);
277   NewF->setVisibility(GlobalValue::DefaultVisibility);
278   NewF->setLinkage(GlobalValue::InternalLinkage);
279 
280   // Swap names. If that is the only clone it will retain the name of now
281   // dead value. Preserve original name for externally visible functions.
282   if (F.hasName() && F.hasLocalLinkage()) {
283     std::string NewName = std::string(NewF->getName());
284     NewF->takeName(&F);
285     F.setName(NewName);
286   }
287 
288   return NewF;
289 }
290 
291 void AMDGPUPropagateAttributes::setFeatures(Function &F,
292                                             const FeatureBitset &NewFeatures) {
293   std::string NewFeatureStr = getFeatureString(NewFeatures);
294 
295   LLVM_DEBUG(dbgs() << "Set features "
296                     << getFeatureString(NewFeatures & TargetFeatures)
297                     << " on " << F.getName() << '\n');
298 
299   F.removeFnAttr("target-features");
300   F.addFnAttr("target-features", NewFeatureStr);
301 }
302 
303 std::string
304 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
305 {
306   std::string Ret;
307   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
308     if (Features[KV.Value])
309       Ret += (StringRef("+") + KV.Key + ",").str();
310     else if (TargetFeatures[KV.Value])
311       Ret += (StringRef("-") + KV.Key + ",").str();
312   }
313   Ret.pop_back(); // Remove last comma.
314   return Ret;
315 }
316 
317 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
318   if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
319     return false;
320 
321   return AMDGPUPropagateAttributes(TM, false).process(F);
322 }
323 
324 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
325   if (!TM)
326     return false;
327 
328   return AMDGPUPropagateAttributes(TM, true).process(M);
329 }
330 
331 FunctionPass
332 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
333   return new AMDGPUPropagateAttributesEarly(TM);
334 }
335 
336 ModulePass
337 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
338   return new AMDGPUPropagateAttributesLate(TM);
339 }
340