1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attrbutes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is clonned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "AMDGPUSubtarget.h" 31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 32 #include "Utils/AMDGPUBaseInfo.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include <string> 40 41 #define DEBUG_TYPE "amdgpu-propagate-attributes" 42 43 using namespace llvm; 44 45 namespace llvm { 46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 47 } 48 49 namespace { 50 51 class AMDGPUPropagateAttributes { 52 const FeatureBitset TargetFeatures = { 53 AMDGPU::FeatureWavefrontSize16, 54 AMDGPU::FeatureWavefrontSize32, 55 AMDGPU::FeatureWavefrontSize64 56 }; 57 58 class Clone{ 59 public: 60 Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : 61 FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} 62 63 FeatureBitset FeatureMask; 64 Function *OrigF; 65 Function *NewF; 66 }; 67 68 const TargetMachine *TM; 69 70 // Clone functions as needed or just set attributes. 71 bool AllowClone; 72 73 // Option propagation roots. 74 SmallSet<Function *, 32> Roots; 75 76 // Clones of functions with their attributes. 77 SmallVector<Clone, 32> Clones; 78 79 // Find a clone with required features. 80 Function *findFunction(const FeatureBitset &FeaturesNeeded, 81 Function *OrigF); 82 83 // Clone function F and set NewFeatures on the clone. 84 // Cole takes the name of original function. 85 Function *cloneWithFeatures(Function &F, 86 const FeatureBitset &NewFeatures); 87 88 // Set new function's features in place. 89 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 90 91 std::string getFeatureString(const FeatureBitset &Features) const; 92 93 // Propagate attributes from Roots. 94 bool process(); 95 96 public: 97 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 98 TM(TM), AllowClone(AllowClone) {} 99 100 // Use F as a root and propagate its attributes. 101 bool process(Function &F); 102 103 // Propagate attributes starting from kernel functions. 104 bool process(Module &M); 105 }; 106 107 // Allows to propagate attributes early, but no clonning is allowed as it must 108 // be a function pass to run before any optimizations. 109 // TODO: We shall only need a one instance of module pass, but that needs to be 110 // in the linker pipeline which is currently not possible. 111 class AMDGPUPropagateAttributesEarly : public FunctionPass { 112 const TargetMachine *TM; 113 114 public: 115 static char ID; // Pass identification 116 117 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 118 FunctionPass(ID), TM(TM) { 119 initializeAMDGPUPropagateAttributesEarlyPass( 120 *PassRegistry::getPassRegistry()); 121 } 122 123 bool runOnFunction(Function &F) override; 124 }; 125 126 // Allows to propagate attributes with clonning but does that late in the 127 // pipeline. 128 class AMDGPUPropagateAttributesLate : public ModulePass { 129 const TargetMachine *TM; 130 131 public: 132 static char ID; // Pass identification 133 134 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 135 ModulePass(ID), TM(TM) { 136 initializeAMDGPUPropagateAttributesLatePass( 137 *PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnModule(Module &M) override; 141 }; 142 143 } // end anonymous namespace. 144 145 char AMDGPUPropagateAttributesEarly::ID = 0; 146 char AMDGPUPropagateAttributesLate::ID = 0; 147 148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 149 "amdgpu-propagate-attributes-early", 150 "Early propagate attributes from kernels to functions", 151 false, false) 152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 153 "amdgpu-propagate-attributes-late", 154 "Late propagate attributes from kernels to functions", 155 false, false) 156 157 Function * 158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, 159 Function *OrigF) { 160 // TODO: search for clone's clones. 161 for (Clone &C : Clones) 162 if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) 163 return C.NewF; 164 165 return nullptr; 166 } 167 168 bool AMDGPUPropagateAttributes::process(Module &M) { 169 for (auto &F : M.functions()) 170 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 171 Roots.insert(&F); 172 173 return process(); 174 } 175 176 bool AMDGPUPropagateAttributes::process(Function &F) { 177 Roots.insert(&F); 178 return process(); 179 } 180 181 bool AMDGPUPropagateAttributes::process() { 182 bool Changed = false; 183 SmallSet<Function *, 32> NewRoots; 184 SmallSet<Function *, 32> Replaced; 185 186 if (Roots.empty()) 187 return false; 188 Module &M = *(*Roots.begin())->getParent(); 189 190 do { 191 Roots.insert(NewRoots.begin(), NewRoots.end()); 192 NewRoots.clear(); 193 194 for (auto &F : M.functions()) { 195 if (F.isDeclaration()) 196 continue; 197 198 const FeatureBitset &CalleeBits = 199 TM->getSubtargetImpl(F)->getFeatureBits(); 200 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 201 SmallSet<CallBase *, 32> Visited; 202 203 for (User *U : F.users()) { 204 Instruction *I = dyn_cast<Instruction>(U); 205 if (!I) 206 continue; 207 CallBase *CI = dyn_cast<CallBase>(I); 208 if (!CI) 209 continue; 210 Function *Caller = CI->getCaller(); 211 if (!Caller || !Visited.insert(CI).second) 212 continue; 213 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 214 continue; 215 216 const FeatureBitset &CallerBits = 217 TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; 218 219 if (CallerBits == (CalleeBits & TargetFeatures)) { 220 if (!Roots.count(&F)) 221 NewRoots.insert(&F); 222 continue; 223 } 224 225 Function *NewF = findFunction(CallerBits, &F); 226 if (!NewF) { 227 FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | 228 CallerBits); 229 if (!AllowClone) { 230 // This may set different features on different iteartions if 231 // there is a contradiction in callers' attributes. In this case 232 // we rely on a second pass running on Module, which is allowed 233 // to clone. 234 setFeatures(F, NewFeatures); 235 NewRoots.insert(&F); 236 Changed = true; 237 break; 238 } 239 240 NewF = cloneWithFeatures(F, NewFeatures); 241 Clones.push_back(Clone(CallerBits, &F, NewF)); 242 NewRoots.insert(NewF); 243 } 244 245 ToReplace.push_back(std::make_pair(CI, NewF)); 246 Replaced.insert(&F); 247 248 Changed = true; 249 } 250 251 while (!ToReplace.empty()) { 252 auto R = ToReplace.pop_back_val(); 253 R.first->setCalledFunction(R.second); 254 } 255 } 256 } while (!NewRoots.empty()); 257 258 for (Function *F : Replaced) { 259 if (F->use_empty()) 260 F->eraseFromParent(); 261 } 262 263 Roots.clear(); 264 Clones.clear(); 265 266 return Changed; 267 } 268 269 Function * 270 AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, 271 const FeatureBitset &NewFeatures) { 272 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 273 274 ValueToValueMapTy dummy; 275 Function *NewF = CloneFunction(&F, dummy); 276 setFeatures(*NewF, NewFeatures); 277 NewF->setVisibility(GlobalValue::DefaultVisibility); 278 NewF->setLinkage(GlobalValue::InternalLinkage); 279 280 // Swap names. If that is the only clone it will retain the name of now 281 // dead value. Preserve original name for externally visible functions. 282 if (F.hasName() && F.hasLocalLinkage()) { 283 std::string NewName = std::string(NewF->getName()); 284 NewF->takeName(&F); 285 F.setName(NewName); 286 } 287 288 return NewF; 289 } 290 291 void AMDGPUPropagateAttributes::setFeatures(Function &F, 292 const FeatureBitset &NewFeatures) { 293 std::string NewFeatureStr = getFeatureString(NewFeatures); 294 295 LLVM_DEBUG(dbgs() << "Set features " 296 << getFeatureString(NewFeatures & TargetFeatures) 297 << " on " << F.getName() << '\n'); 298 299 F.removeFnAttr("target-features"); 300 F.addFnAttr("target-features", NewFeatureStr); 301 } 302 303 std::string 304 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 305 { 306 std::string Ret; 307 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 308 if (Features[KV.Value]) 309 Ret += (StringRef("+") + KV.Key + ",").str(); 310 else if (TargetFeatures[KV.Value]) 311 Ret += (StringRef("-") + KV.Key + ",").str(); 312 } 313 Ret.pop_back(); // Remove last comma. 314 return Ret; 315 } 316 317 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 318 if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 319 return false; 320 321 return AMDGPUPropagateAttributes(TM, false).process(F); 322 } 323 324 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 325 if (!TM) 326 return false; 327 328 return AMDGPUPropagateAttributes(TM, true).process(M); 329 } 330 331 FunctionPass 332 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 333 return new AMDGPUPropagateAttributesEarly(TM); 334 } 335 336 ModulePass 337 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 338 return new AMDGPUPropagateAttributesLate(TM); 339 } 340