1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attrbutes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is clonned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "AMDGPUSubtarget.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/CodeGen/TargetPassConfig.h" 33 #include "llvm/Target/TargetMachine.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #define DEBUG_TYPE "amdgpu-propagate-attributes" 36 37 using namespace llvm; 38 39 namespace llvm { 40 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 41 } 42 43 namespace { 44 45 // Target features to propagate. 46 static constexpr const FeatureBitset TargetFeatures = { 47 AMDGPU::FeatureWavefrontSize16, 48 AMDGPU::FeatureWavefrontSize32, 49 AMDGPU::FeatureWavefrontSize64 50 }; 51 52 // Attributes to propagate. 53 // TODO: Support conservative min/max merging instead of cloning. 54 static constexpr const char* AttributeNames[] = { 55 "amdgpu-waves-per-eu", 56 "amdgpu-flat-work-group-size" 57 }; 58 59 static constexpr unsigned NumAttr = 60 sizeof(AttributeNames) / sizeof(AttributeNames[0]); 61 62 class AMDGPUPropagateAttributes { 63 64 class FnProperties { 65 private: 66 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 67 68 public: 69 explicit FnProperties(const TargetMachine &TM, const Function &F) { 70 Features = TM.getSubtargetImpl(F)->getFeatureBits(); 71 72 for (unsigned I = 0; I < NumAttr; ++I) 73 if (F.hasFnAttribute(AttributeNames[I])) 74 Attributes[I] = F.getFnAttribute(AttributeNames[I]); 75 } 76 77 bool operator == (const FnProperties &Other) const { 78 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 79 return false; 80 for (unsigned I = 0; I < NumAttr; ++I) 81 if (Attributes[I] != Other.Attributes[I]) 82 return false; 83 return true; 84 } 85 86 FnProperties adjustToCaller(const FnProperties &CallerProps) const { 87 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 88 for (unsigned I = 0; I < NumAttr; ++I) 89 New.Attributes[I] = CallerProps.Attributes[I]; 90 return New; 91 } 92 93 FeatureBitset Features; 94 Optional<Attribute> Attributes[NumAttr]; 95 }; 96 97 class Clone { 98 public: 99 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 100 Properties(Props), OrigF(OrigF), NewF(NewF) {} 101 102 FnProperties Properties; 103 Function *OrigF; 104 Function *NewF; 105 }; 106 107 const TargetMachine *TM; 108 109 // Clone functions as needed or just set attributes. 110 bool AllowClone; 111 112 // Option propagation roots. 113 SmallSet<Function *, 32> Roots; 114 115 // Clones of functions with their attributes. 116 SmallVector<Clone, 32> Clones; 117 118 // Find a clone with required features. 119 Function *findFunction(const FnProperties &PropsNeeded, 120 Function *OrigF); 121 122 // Clone function \p F and set \p NewProps on the clone. 123 // Cole takes the name of original function. 124 Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 125 126 // Set new function's features in place. 127 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 128 129 // Set new function's attributes in place. 130 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 131 132 std::string getFeatureString(const FeatureBitset &Features) const; 133 134 // Propagate attributes from Roots. 135 bool process(); 136 137 public: 138 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 139 TM(TM), AllowClone(AllowClone) {} 140 141 // Use F as a root and propagate its attributes. 142 bool process(Function &F); 143 144 // Propagate attributes starting from kernel functions. 145 bool process(Module &M); 146 }; 147 148 // Allows to propagate attributes early, but no clonning is allowed as it must 149 // be a function pass to run before any optimizations. 150 // TODO: We shall only need a one instance of module pass, but that needs to be 151 // in the linker pipeline which is currently not possible. 152 class AMDGPUPropagateAttributesEarly : public FunctionPass { 153 const TargetMachine *TM; 154 155 public: 156 static char ID; // Pass identification 157 158 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 159 FunctionPass(ID), TM(TM) { 160 initializeAMDGPUPropagateAttributesEarlyPass( 161 *PassRegistry::getPassRegistry()); 162 } 163 164 bool runOnFunction(Function &F) override; 165 }; 166 167 // Allows to propagate attributes with clonning but does that late in the 168 // pipeline. 169 class AMDGPUPropagateAttributesLate : public ModulePass { 170 const TargetMachine *TM; 171 172 public: 173 static char ID; // Pass identification 174 175 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 176 ModulePass(ID), TM(TM) { 177 initializeAMDGPUPropagateAttributesLatePass( 178 *PassRegistry::getPassRegistry()); 179 } 180 181 bool runOnModule(Module &M) override; 182 }; 183 184 } // end anonymous namespace. 185 186 char AMDGPUPropagateAttributesEarly::ID = 0; 187 char AMDGPUPropagateAttributesLate::ID = 0; 188 189 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 190 "amdgpu-propagate-attributes-early", 191 "Early propagate attributes from kernels to functions", 192 false, false) 193 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 194 "amdgpu-propagate-attributes-late", 195 "Late propagate attributes from kernels to functions", 196 false, false) 197 198 Function * 199 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 200 Function *OrigF) { 201 // TODO: search for clone's clones. 202 for (Clone &C : Clones) 203 if (C.OrigF == OrigF && PropsNeeded == C.Properties) 204 return C.NewF; 205 206 return nullptr; 207 } 208 209 bool AMDGPUPropagateAttributes::process(Module &M) { 210 for (auto &F : M.functions()) 211 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 212 Roots.insert(&F); 213 214 return process(); 215 } 216 217 bool AMDGPUPropagateAttributes::process(Function &F) { 218 Roots.insert(&F); 219 return process(); 220 } 221 222 bool AMDGPUPropagateAttributes::process() { 223 bool Changed = false; 224 SmallSet<Function *, 32> NewRoots; 225 SmallSet<Function *, 32> Replaced; 226 227 if (Roots.empty()) 228 return false; 229 Module &M = *(*Roots.begin())->getParent(); 230 231 do { 232 Roots.insert(NewRoots.begin(), NewRoots.end()); 233 NewRoots.clear(); 234 235 for (auto &F : M.functions()) { 236 if (F.isDeclaration()) 237 continue; 238 239 const FnProperties CalleeProps(*TM, F); 240 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 241 SmallSet<CallBase *, 32> Visited; 242 243 for (User *U : F.users()) { 244 Instruction *I = dyn_cast<Instruction>(U); 245 if (!I) 246 continue; 247 CallBase *CI = dyn_cast<CallBase>(I); 248 if (!CI) 249 continue; 250 Function *Caller = CI->getCaller(); 251 if (!Caller || !Visited.insert(CI).second) 252 continue; 253 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 254 continue; 255 256 const FnProperties CallerProps(*TM, *Caller); 257 258 if (CalleeProps == CallerProps) { 259 if (!Roots.count(&F)) 260 NewRoots.insert(&F); 261 continue; 262 } 263 264 Function *NewF = findFunction(CallerProps, &F); 265 if (!NewF) { 266 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 267 if (!AllowClone) { 268 // This may set different features on different iteartions if 269 // there is a contradiction in callers' attributes. In this case 270 // we rely on a second pass running on Module, which is allowed 271 // to clone. 272 setFeatures(F, NewProps.Features); 273 setAttributes(F, NewProps.Attributes); 274 NewRoots.insert(&F); 275 Changed = true; 276 break; 277 } 278 279 NewF = cloneWithProperties(F, NewProps); 280 Clones.push_back(Clone(CallerProps, &F, NewF)); 281 NewRoots.insert(NewF); 282 } 283 284 ToReplace.push_back(std::make_pair(CI, NewF)); 285 Replaced.insert(&F); 286 287 Changed = true; 288 } 289 290 while (!ToReplace.empty()) { 291 auto R = ToReplace.pop_back_val(); 292 R.first->setCalledFunction(R.second); 293 } 294 } 295 } while (!NewRoots.empty()); 296 297 for (Function *F : Replaced) { 298 if (F->use_empty()) 299 F->eraseFromParent(); 300 } 301 302 Roots.clear(); 303 Clones.clear(); 304 305 return Changed; 306 } 307 308 Function * 309 AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 310 const FnProperties &NewProps) { 311 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 312 313 ValueToValueMapTy dummy; 314 Function *NewF = CloneFunction(&F, dummy); 315 setFeatures(*NewF, NewProps.Features); 316 setAttributes(*NewF, NewProps.Attributes); 317 NewF->setVisibility(GlobalValue::DefaultVisibility); 318 NewF->setLinkage(GlobalValue::InternalLinkage); 319 320 // Swap names. If that is the only clone it will retain the name of now 321 // dead value. Preserve original name for externally visible functions. 322 if (F.hasName() && F.hasLocalLinkage()) { 323 std::string NewName = std::string(NewF->getName()); 324 NewF->takeName(&F); 325 F.setName(NewName); 326 } 327 328 return NewF; 329 } 330 331 void AMDGPUPropagateAttributes::setFeatures(Function &F, 332 const FeatureBitset &NewFeatures) { 333 std::string NewFeatureStr = getFeatureString(NewFeatures); 334 335 LLVM_DEBUG(dbgs() << "Set features " 336 << getFeatureString(NewFeatures & TargetFeatures) 337 << " on " << F.getName() << '\n'); 338 339 F.removeFnAttr("target-features"); 340 F.addFnAttr("target-features", NewFeatureStr); 341 } 342 343 void AMDGPUPropagateAttributes::setAttributes(Function &F, 344 const ArrayRef<Optional<Attribute>> NewAttrs) { 345 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 346 for (unsigned I = 0; I < NumAttr; ++I) { 347 F.removeFnAttr(AttributeNames[I]); 348 if (NewAttrs[I]) { 349 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 350 F.addFnAttr(*NewAttrs[I]); 351 } 352 } 353 } 354 355 std::string 356 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 357 { 358 std::string Ret; 359 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 360 if (Features[KV.Value]) 361 Ret += (StringRef("+") + KV.Key + ",").str(); 362 else if (TargetFeatures[KV.Value]) 363 Ret += (StringRef("-") + KV.Key + ",").str(); 364 } 365 Ret.pop_back(); // Remove last comma. 366 return Ret; 367 } 368 369 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 370 if (!TM) { 371 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 372 if (!TPC) 373 return false; 374 375 TM = &TPC->getTM<TargetMachine>(); 376 } 377 378 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 379 return false; 380 381 return AMDGPUPropagateAttributes(TM, false).process(F); 382 } 383 384 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 385 if (!TM) { 386 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 387 if (!TPC) 388 return false; 389 390 TM = &TPC->getTM<TargetMachine>(); 391 } 392 393 return AMDGPUPropagateAttributes(TM, true).process(M); 394 } 395 396 FunctionPass 397 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 398 return new AMDGPUPropagateAttributesEarly(TM); 399 } 400 401 ModulePass 402 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 403 return new AMDGPUPropagateAttributesLate(TM); 404 } 405 406 PreservedAnalyses 407 AMDGPUPropagateAttributesEarlyPass::run(Function &F, 408 FunctionAnalysisManager &AM) { 409 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 410 return PreservedAnalyses::all(); 411 412 return AMDGPUPropagateAttributes(&TM, false).process(F) 413 ? PreservedAnalyses::none() 414 : PreservedAnalyses::all(); 415 } 416 417 PreservedAnalyses 418 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { 419 return AMDGPUPropagateAttributes(&TM, true).process(M) 420 ? PreservedAnalyses::none() 421 : PreservedAnalyses::all(); 422 } 423