1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attrbutes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is clonned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "AMDGPUSubtarget.h" 31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 32 #include "Utils/AMDGPUBaseInfo.h" 33 #include "llvm/ADT/SmallSet.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/CodeGen/TargetPassConfig.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/IR/PassManager.h" 39 #include "llvm/Target/TargetMachine.h" 40 #include "llvm/Transforms/Utils/Cloning.h" 41 #include <string> 42 43 #define DEBUG_TYPE "amdgpu-propagate-attributes" 44 45 using namespace llvm; 46 47 namespace llvm { 48 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 49 } 50 51 namespace { 52 53 // Target features to propagate. 54 static constexpr const FeatureBitset TargetFeatures = { 55 AMDGPU::FeatureWavefrontSize16, 56 AMDGPU::FeatureWavefrontSize32, 57 AMDGPU::FeatureWavefrontSize64 58 }; 59 60 // Attributes to propagate. 61 // TODO: Support conservative min/max merging instead of cloning. 62 static constexpr const char* AttributeNames[] = { 63 "amdgpu-waves-per-eu", 64 "amdgpu-flat-work-group-size" 65 }; 66 67 static constexpr unsigned NumAttr = 68 sizeof(AttributeNames) / sizeof(AttributeNames[0]); 69 70 class AMDGPUPropagateAttributes { 71 72 class FnProperties { 73 private: 74 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 75 76 public: 77 explicit FnProperties(const TargetMachine &TM, const Function &F) { 78 Features = TM.getSubtargetImpl(F)->getFeatureBits(); 79 80 for (unsigned I = 0; I < NumAttr; ++I) 81 if (F.hasFnAttribute(AttributeNames[I])) 82 Attributes[I] = F.getFnAttribute(AttributeNames[I]); 83 } 84 85 bool operator == (const FnProperties &Other) const { 86 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 87 return false; 88 for (unsigned I = 0; I < NumAttr; ++I) 89 if (Attributes[I] != Other.Attributes[I]) 90 return false; 91 return true; 92 } 93 94 FnProperties adjustToCaller(const FnProperties &CallerProps) const { 95 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 96 for (unsigned I = 0; I < NumAttr; ++I) 97 New.Attributes[I] = CallerProps.Attributes[I]; 98 return New; 99 } 100 101 FeatureBitset Features; 102 Optional<Attribute> Attributes[NumAttr]; 103 }; 104 105 class Clone { 106 public: 107 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 108 Properties(Props), OrigF(OrigF), NewF(NewF) {} 109 110 FnProperties Properties; 111 Function *OrigF; 112 Function *NewF; 113 }; 114 115 const TargetMachine *TM; 116 117 // Clone functions as needed or just set attributes. 118 bool AllowClone; 119 120 // Option propagation roots. 121 SmallSet<Function *, 32> Roots; 122 123 // Clones of functions with their attributes. 124 SmallVector<Clone, 32> Clones; 125 126 // Find a clone with required features. 127 Function *findFunction(const FnProperties &PropsNeeded, 128 Function *OrigF); 129 130 // Clone function \p F and set \p NewProps on the clone. 131 // Cole takes the name of original function. 132 Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 133 134 // Set new function's features in place. 135 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 136 137 // Set new function's attributes in place. 138 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 139 140 std::string getFeatureString(const FeatureBitset &Features) const; 141 142 // Propagate attributes from Roots. 143 bool process(); 144 145 public: 146 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 147 TM(TM), AllowClone(AllowClone) {} 148 149 // Use F as a root and propagate its attributes. 150 bool process(Function &F); 151 152 // Propagate attributes starting from kernel functions. 153 bool process(Module &M); 154 }; 155 156 // Allows to propagate attributes early, but no clonning is allowed as it must 157 // be a function pass to run before any optimizations. 158 // TODO: We shall only need a one instance of module pass, but that needs to be 159 // in the linker pipeline which is currently not possible. 160 class AMDGPUPropagateAttributesEarly : public FunctionPass { 161 const TargetMachine *TM; 162 163 public: 164 static char ID; // Pass identification 165 166 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 167 FunctionPass(ID), TM(TM) { 168 initializeAMDGPUPropagateAttributesEarlyPass( 169 *PassRegistry::getPassRegistry()); 170 } 171 172 bool runOnFunction(Function &F) override; 173 }; 174 175 // Allows to propagate attributes with clonning but does that late in the 176 // pipeline. 177 class AMDGPUPropagateAttributesLate : public ModulePass { 178 const TargetMachine *TM; 179 180 public: 181 static char ID; // Pass identification 182 183 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 184 ModulePass(ID), TM(TM) { 185 initializeAMDGPUPropagateAttributesLatePass( 186 *PassRegistry::getPassRegistry()); 187 } 188 189 bool runOnModule(Module &M) override; 190 }; 191 192 } // end anonymous namespace. 193 194 char AMDGPUPropagateAttributesEarly::ID = 0; 195 char AMDGPUPropagateAttributesLate::ID = 0; 196 197 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 198 "amdgpu-propagate-attributes-early", 199 "Early propagate attributes from kernels to functions", 200 false, false) 201 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 202 "amdgpu-propagate-attributes-late", 203 "Late propagate attributes from kernels to functions", 204 false, false) 205 206 Function * 207 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 208 Function *OrigF) { 209 // TODO: search for clone's clones. 210 for (Clone &C : Clones) 211 if (C.OrigF == OrigF && PropsNeeded == C.Properties) 212 return C.NewF; 213 214 return nullptr; 215 } 216 217 bool AMDGPUPropagateAttributes::process(Module &M) { 218 for (auto &F : M.functions()) 219 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 220 Roots.insert(&F); 221 222 return process(); 223 } 224 225 bool AMDGPUPropagateAttributes::process(Function &F) { 226 Roots.insert(&F); 227 return process(); 228 } 229 230 bool AMDGPUPropagateAttributes::process() { 231 bool Changed = false; 232 SmallSet<Function *, 32> NewRoots; 233 SmallSet<Function *, 32> Replaced; 234 235 if (Roots.empty()) 236 return false; 237 Module &M = *(*Roots.begin())->getParent(); 238 239 do { 240 Roots.insert(NewRoots.begin(), NewRoots.end()); 241 NewRoots.clear(); 242 243 for (auto &F : M.functions()) { 244 if (F.isDeclaration()) 245 continue; 246 247 const FnProperties CalleeProps(*TM, F); 248 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 249 SmallSet<CallBase *, 32> Visited; 250 251 for (User *U : F.users()) { 252 Instruction *I = dyn_cast<Instruction>(U); 253 if (!I) 254 continue; 255 CallBase *CI = dyn_cast<CallBase>(I); 256 if (!CI) 257 continue; 258 Function *Caller = CI->getCaller(); 259 if (!Caller || !Visited.insert(CI).second) 260 continue; 261 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 262 continue; 263 264 const FnProperties CallerProps(*TM, *Caller); 265 266 if (CalleeProps == CallerProps) { 267 if (!Roots.count(&F)) 268 NewRoots.insert(&F); 269 continue; 270 } 271 272 Function *NewF = findFunction(CallerProps, &F); 273 if (!NewF) { 274 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 275 if (!AllowClone) { 276 // This may set different features on different iteartions if 277 // there is a contradiction in callers' attributes. In this case 278 // we rely on a second pass running on Module, which is allowed 279 // to clone. 280 setFeatures(F, NewProps.Features); 281 setAttributes(F, NewProps.Attributes); 282 NewRoots.insert(&F); 283 Changed = true; 284 break; 285 } 286 287 NewF = cloneWithProperties(F, NewProps); 288 Clones.push_back(Clone(CallerProps, &F, NewF)); 289 NewRoots.insert(NewF); 290 } 291 292 ToReplace.push_back(std::make_pair(CI, NewF)); 293 Replaced.insert(&F); 294 295 Changed = true; 296 } 297 298 while (!ToReplace.empty()) { 299 auto R = ToReplace.pop_back_val(); 300 R.first->setCalledFunction(R.second); 301 } 302 } 303 } while (!NewRoots.empty()); 304 305 for (Function *F : Replaced) { 306 if (F->use_empty()) 307 F->eraseFromParent(); 308 } 309 310 Roots.clear(); 311 Clones.clear(); 312 313 return Changed; 314 } 315 316 Function * 317 AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 318 const FnProperties &NewProps) { 319 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 320 321 ValueToValueMapTy dummy; 322 Function *NewF = CloneFunction(&F, dummy); 323 setFeatures(*NewF, NewProps.Features); 324 setAttributes(*NewF, NewProps.Attributes); 325 NewF->setVisibility(GlobalValue::DefaultVisibility); 326 NewF->setLinkage(GlobalValue::InternalLinkage); 327 328 // Swap names. If that is the only clone it will retain the name of now 329 // dead value. Preserve original name for externally visible functions. 330 if (F.hasName() && F.hasLocalLinkage()) { 331 std::string NewName = std::string(NewF->getName()); 332 NewF->takeName(&F); 333 F.setName(NewName); 334 } 335 336 return NewF; 337 } 338 339 void AMDGPUPropagateAttributes::setFeatures(Function &F, 340 const FeatureBitset &NewFeatures) { 341 std::string NewFeatureStr = getFeatureString(NewFeatures); 342 343 LLVM_DEBUG(dbgs() << "Set features " 344 << getFeatureString(NewFeatures & TargetFeatures) 345 << " on " << F.getName() << '\n'); 346 347 F.removeFnAttr("target-features"); 348 F.addFnAttr("target-features", NewFeatureStr); 349 } 350 351 void AMDGPUPropagateAttributes::setAttributes(Function &F, 352 const ArrayRef<Optional<Attribute>> NewAttrs) { 353 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 354 for (unsigned I = 0; I < NumAttr; ++I) { 355 F.removeFnAttr(AttributeNames[I]); 356 if (NewAttrs[I]) { 357 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 358 F.addFnAttr(*NewAttrs[I]); 359 } 360 } 361 } 362 363 std::string 364 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 365 { 366 std::string Ret; 367 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 368 if (Features[KV.Value]) 369 Ret += (StringRef("+") + KV.Key + ",").str(); 370 else if (TargetFeatures[KV.Value]) 371 Ret += (StringRef("-") + KV.Key + ",").str(); 372 } 373 Ret.pop_back(); // Remove last comma. 374 return Ret; 375 } 376 377 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 378 if (!TM) { 379 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 380 if (!TPC) 381 return false; 382 383 TM = &TPC->getTM<TargetMachine>(); 384 } 385 386 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 387 return false; 388 389 return AMDGPUPropagateAttributes(TM, false).process(F); 390 } 391 392 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 393 if (!TM) { 394 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 395 if (!TPC) 396 return false; 397 398 TM = &TPC->getTM<TargetMachine>(); 399 } 400 401 return AMDGPUPropagateAttributes(TM, true).process(M); 402 } 403 404 FunctionPass 405 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 406 return new AMDGPUPropagateAttributesEarly(TM); 407 } 408 409 ModulePass 410 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 411 return new AMDGPUPropagateAttributesLate(TM); 412 } 413 414 PreservedAnalyses 415 AMDGPUPropagateAttributesEarlyPass::run(Function &F, 416 FunctionAnalysisManager &AM) { 417 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 418 return PreservedAnalyses::all(); 419 420 return AMDGPUPropagateAttributes(&TM, false).process(F) 421 ? PreservedAnalyses::none() 422 : PreservedAnalyses::all(); 423 } 424 425 PreservedAnalyses 426 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { 427 return AMDGPUPropagateAttributes(&TM, true).process(M) 428 ? PreservedAnalyses::none() 429 : PreservedAnalyses::all(); 430 } 431