1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attrbutes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is clonned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "AMDGPUSubtarget.h" 31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 32 #include "Utils/AMDGPUBaseInfo.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/CodeGen/TargetPassConfig.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/Target/TargetMachine.h" 39 #include "llvm/Transforms/Utils/Cloning.h" 40 #include <string> 41 42 #define DEBUG_TYPE "amdgpu-propagate-attributes" 43 44 using namespace llvm; 45 46 namespace llvm { 47 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 48 } 49 50 namespace { 51 52 // Target features to propagate. 53 static constexpr const FeatureBitset TargetFeatures = { 54 AMDGPU::FeatureWavefrontSize16, 55 AMDGPU::FeatureWavefrontSize32, 56 AMDGPU::FeatureWavefrontSize64 57 }; 58 59 // Attributes to propagate. 60 // TODO: Support conservative min/max merging instead of cloning. 61 static constexpr const char* AttributeNames[] = { 62 "amdgpu-waves-per-eu", 63 "amdgpu-flat-work-group-size" 64 }; 65 66 static constexpr unsigned NumAttr = 67 sizeof(AttributeNames) / sizeof(AttributeNames[0]); 68 69 class AMDGPUPropagateAttributes { 70 71 class FnProperties { 72 private: 73 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 74 75 public: 76 explicit FnProperties(const TargetMachine &TM, const Function &F) { 77 Features = TM.getSubtargetImpl(F)->getFeatureBits(); 78 79 for (unsigned I = 0; I < NumAttr; ++I) 80 if (F.hasFnAttribute(AttributeNames[I])) 81 Attributes[I] = F.getFnAttribute(AttributeNames[I]); 82 } 83 84 bool operator == (const FnProperties &Other) const { 85 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 86 return false; 87 for (unsigned I = 0; I < NumAttr; ++I) 88 if (Attributes[I] != Other.Attributes[I]) 89 return false; 90 return true; 91 } 92 93 FnProperties adjustToCaller(const FnProperties &CallerProps) const { 94 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 95 for (unsigned I = 0; I < NumAttr; ++I) 96 New.Attributes[I] = CallerProps.Attributes[I]; 97 return New; 98 } 99 100 FeatureBitset Features; 101 Optional<Attribute> Attributes[NumAttr]; 102 }; 103 104 class Clone { 105 public: 106 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 107 Properties(Props), OrigF(OrigF), NewF(NewF) {} 108 109 FnProperties Properties; 110 Function *OrigF; 111 Function *NewF; 112 }; 113 114 const TargetMachine *TM; 115 116 // Clone functions as needed or just set attributes. 117 bool AllowClone; 118 119 // Option propagation roots. 120 SmallSet<Function *, 32> Roots; 121 122 // Clones of functions with their attributes. 123 SmallVector<Clone, 32> Clones; 124 125 // Find a clone with required features. 126 Function *findFunction(const FnProperties &PropsNeeded, 127 Function *OrigF); 128 129 // Clone function \p F and set \p NewProps on the clone. 130 // Cole takes the name of original function. 131 Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 132 133 // Set new function's features in place. 134 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 135 136 // Set new function's attributes in place. 137 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 138 139 std::string getFeatureString(const FeatureBitset &Features) const; 140 141 // Propagate attributes from Roots. 142 bool process(); 143 144 public: 145 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 146 TM(TM), AllowClone(AllowClone) {} 147 148 // Use F as a root and propagate its attributes. 149 bool process(Function &F); 150 151 // Propagate attributes starting from kernel functions. 152 bool process(Module &M); 153 }; 154 155 // Allows to propagate attributes early, but no clonning is allowed as it must 156 // be a function pass to run before any optimizations. 157 // TODO: We shall only need a one instance of module pass, but that needs to be 158 // in the linker pipeline which is currently not possible. 159 class AMDGPUPropagateAttributesEarly : public FunctionPass { 160 const TargetMachine *TM; 161 162 public: 163 static char ID; // Pass identification 164 165 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 166 FunctionPass(ID), TM(TM) { 167 initializeAMDGPUPropagateAttributesEarlyPass( 168 *PassRegistry::getPassRegistry()); 169 } 170 171 bool runOnFunction(Function &F) override; 172 }; 173 174 // Allows to propagate attributes with clonning but does that late in the 175 // pipeline. 176 class AMDGPUPropagateAttributesLate : public ModulePass { 177 const TargetMachine *TM; 178 179 public: 180 static char ID; // Pass identification 181 182 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 183 ModulePass(ID), TM(TM) { 184 initializeAMDGPUPropagateAttributesLatePass( 185 *PassRegistry::getPassRegistry()); 186 } 187 188 bool runOnModule(Module &M) override; 189 }; 190 191 } // end anonymous namespace. 192 193 char AMDGPUPropagateAttributesEarly::ID = 0; 194 char AMDGPUPropagateAttributesLate::ID = 0; 195 196 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 197 "amdgpu-propagate-attributes-early", 198 "Early propagate attributes from kernels to functions", 199 false, false) 200 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 201 "amdgpu-propagate-attributes-late", 202 "Late propagate attributes from kernels to functions", 203 false, false) 204 205 Function * 206 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 207 Function *OrigF) { 208 // TODO: search for clone's clones. 209 for (Clone &C : Clones) 210 if (C.OrigF == OrigF && PropsNeeded == C.Properties) 211 return C.NewF; 212 213 return nullptr; 214 } 215 216 bool AMDGPUPropagateAttributes::process(Module &M) { 217 for (auto &F : M.functions()) 218 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 219 Roots.insert(&F); 220 221 return process(); 222 } 223 224 bool AMDGPUPropagateAttributes::process(Function &F) { 225 Roots.insert(&F); 226 return process(); 227 } 228 229 bool AMDGPUPropagateAttributes::process() { 230 bool Changed = false; 231 SmallSet<Function *, 32> NewRoots; 232 SmallSet<Function *, 32> Replaced; 233 234 if (Roots.empty()) 235 return false; 236 Module &M = *(*Roots.begin())->getParent(); 237 238 do { 239 Roots.insert(NewRoots.begin(), NewRoots.end()); 240 NewRoots.clear(); 241 242 for (auto &F : M.functions()) { 243 if (F.isDeclaration()) 244 continue; 245 246 const FnProperties CalleeProps(*TM, F); 247 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 248 SmallSet<CallBase *, 32> Visited; 249 250 for (User *U : F.users()) { 251 Instruction *I = dyn_cast<Instruction>(U); 252 if (!I) 253 continue; 254 CallBase *CI = dyn_cast<CallBase>(I); 255 if (!CI) 256 continue; 257 Function *Caller = CI->getCaller(); 258 if (!Caller || !Visited.insert(CI).second) 259 continue; 260 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 261 continue; 262 263 const FnProperties CallerProps(*TM, *Caller); 264 265 if (CalleeProps == CallerProps) { 266 if (!Roots.count(&F)) 267 NewRoots.insert(&F); 268 continue; 269 } 270 271 Function *NewF = findFunction(CallerProps, &F); 272 if (!NewF) { 273 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 274 if (!AllowClone) { 275 // This may set different features on different iteartions if 276 // there is a contradiction in callers' attributes. In this case 277 // we rely on a second pass running on Module, which is allowed 278 // to clone. 279 setFeatures(F, NewProps.Features); 280 setAttributes(F, NewProps.Attributes); 281 NewRoots.insert(&F); 282 Changed = true; 283 break; 284 } 285 286 NewF = cloneWithProperties(F, NewProps); 287 Clones.push_back(Clone(CallerProps, &F, NewF)); 288 NewRoots.insert(NewF); 289 } 290 291 ToReplace.push_back(std::make_pair(CI, NewF)); 292 Replaced.insert(&F); 293 294 Changed = true; 295 } 296 297 while (!ToReplace.empty()) { 298 auto R = ToReplace.pop_back_val(); 299 R.first->setCalledFunction(R.second); 300 } 301 } 302 } while (!NewRoots.empty()); 303 304 for (Function *F : Replaced) { 305 if (F->use_empty()) 306 F->eraseFromParent(); 307 } 308 309 Roots.clear(); 310 Clones.clear(); 311 312 return Changed; 313 } 314 315 Function * 316 AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 317 const FnProperties &NewProps) { 318 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 319 320 ValueToValueMapTy dummy; 321 Function *NewF = CloneFunction(&F, dummy); 322 setFeatures(*NewF, NewProps.Features); 323 setAttributes(*NewF, NewProps.Attributes); 324 NewF->setVisibility(GlobalValue::DefaultVisibility); 325 NewF->setLinkage(GlobalValue::InternalLinkage); 326 327 // Swap names. If that is the only clone it will retain the name of now 328 // dead value. Preserve original name for externally visible functions. 329 if (F.hasName() && F.hasLocalLinkage()) { 330 std::string NewName = std::string(NewF->getName()); 331 NewF->takeName(&F); 332 F.setName(NewName); 333 } 334 335 return NewF; 336 } 337 338 void AMDGPUPropagateAttributes::setFeatures(Function &F, 339 const FeatureBitset &NewFeatures) { 340 std::string NewFeatureStr = getFeatureString(NewFeatures); 341 342 LLVM_DEBUG(dbgs() << "Set features " 343 << getFeatureString(NewFeatures & TargetFeatures) 344 << " on " << F.getName() << '\n'); 345 346 F.removeFnAttr("target-features"); 347 F.addFnAttr("target-features", NewFeatureStr); 348 } 349 350 void AMDGPUPropagateAttributes::setAttributes(Function &F, 351 const ArrayRef<Optional<Attribute>> NewAttrs) { 352 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 353 for (unsigned I = 0; I < NumAttr; ++I) { 354 F.removeFnAttr(AttributeNames[I]); 355 if (NewAttrs[I]) { 356 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 357 F.addFnAttr(*NewAttrs[I]); 358 } 359 } 360 } 361 362 std::string 363 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 364 { 365 std::string Ret; 366 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 367 if (Features[KV.Value]) 368 Ret += (StringRef("+") + KV.Key + ",").str(); 369 else if (TargetFeatures[KV.Value]) 370 Ret += (StringRef("-") + KV.Key + ",").str(); 371 } 372 Ret.pop_back(); // Remove last comma. 373 return Ret; 374 } 375 376 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 377 if (!TM) { 378 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 379 if (!TPC) 380 return false; 381 382 TM = &TPC->getTM<TargetMachine>(); 383 } 384 385 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 386 return false; 387 388 return AMDGPUPropagateAttributes(TM, false).process(F); 389 } 390 391 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 392 if (!TM) { 393 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 394 if (!TPC) 395 return false; 396 397 TM = &TPC->getTM<TargetMachine>(); 398 } 399 400 return AMDGPUPropagateAttributes(TM, true).process(M); 401 } 402 403 FunctionPass 404 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 405 return new AMDGPUPropagateAttributesEarly(TM); 406 } 407 408 ModulePass 409 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 410 return new AMDGPUPropagateAttributesLate(TM); 411 } 412