1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attrbutes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is clonned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 31 #include "Utils/AMDGPUBaseInfo.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/CodeGen/TargetPassConfig.h" 34 #include "llvm/CodeGen/TargetSubtargetInfo.h" 35 #include "llvm/IR/InstrTypes.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Transforms/Utils/Cloning.h" 38 39 #define DEBUG_TYPE "amdgpu-propagate-attributes" 40 41 using namespace llvm; 42 43 namespace llvm { 44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 45 } 46 47 namespace { 48 49 // Target features to propagate. 50 static constexpr const FeatureBitset TargetFeatures = { 51 AMDGPU::FeatureWavefrontSize16, 52 AMDGPU::FeatureWavefrontSize32, 53 AMDGPU::FeatureWavefrontSize64 54 }; 55 56 // Attributes to propagate. 57 // TODO: Support conservative min/max merging instead of cloning. 58 static constexpr const char* AttributeNames[] = { 59 "amdgpu-waves-per-eu", 60 "amdgpu-flat-work-group-size" 61 }; 62 63 static constexpr unsigned NumAttr = 64 sizeof(AttributeNames) / sizeof(AttributeNames[0]); 65 66 class AMDGPUPropagateAttributes { 67 68 class FnProperties { 69 private: 70 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 71 72 public: 73 explicit FnProperties(const TargetMachine &TM, const Function &F) { 74 Features = TM.getSubtargetImpl(F)->getFeatureBits(); 75 76 for (unsigned I = 0; I < NumAttr; ++I) 77 if (F.hasFnAttribute(AttributeNames[I])) 78 Attributes[I] = F.getFnAttribute(AttributeNames[I]); 79 } 80 81 bool operator == (const FnProperties &Other) const { 82 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 83 return false; 84 for (unsigned I = 0; I < NumAttr; ++I) 85 if (Attributes[I] != Other.Attributes[I]) 86 return false; 87 return true; 88 } 89 90 FnProperties adjustToCaller(const FnProperties &CallerProps) const { 91 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 92 for (unsigned I = 0; I < NumAttr; ++I) 93 New.Attributes[I] = CallerProps.Attributes[I]; 94 return New; 95 } 96 97 FeatureBitset Features; 98 Optional<Attribute> Attributes[NumAttr]; 99 }; 100 101 class Clone { 102 public: 103 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 104 Properties(Props), OrigF(OrigF), NewF(NewF) {} 105 106 FnProperties Properties; 107 Function *OrigF; 108 Function *NewF; 109 }; 110 111 const TargetMachine *TM; 112 113 // Clone functions as needed or just set attributes. 114 bool AllowClone; 115 116 // Option propagation roots. 117 SmallSet<Function *, 32> Roots; 118 119 // Clones of functions with their attributes. 120 SmallVector<Clone, 32> Clones; 121 122 // Find a clone with required features. 123 Function *findFunction(const FnProperties &PropsNeeded, 124 Function *OrigF); 125 126 // Clone function \p F and set \p NewProps on the clone. 127 // Cole takes the name of original function. 128 Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 129 130 // Set new function's features in place. 131 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 132 133 // Set new function's attributes in place. 134 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 135 136 std::string getFeatureString(const FeatureBitset &Features) const; 137 138 // Propagate attributes from Roots. 139 bool process(); 140 141 public: 142 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 143 TM(TM), AllowClone(AllowClone) {} 144 145 // Use F as a root and propagate its attributes. 146 bool process(Function &F); 147 148 // Propagate attributes starting from kernel functions. 149 bool process(Module &M); 150 }; 151 152 // Allows to propagate attributes early, but no clonning is allowed as it must 153 // be a function pass to run before any optimizations. 154 // TODO: We shall only need a one instance of module pass, but that needs to be 155 // in the linker pipeline which is currently not possible. 156 class AMDGPUPropagateAttributesEarly : public FunctionPass { 157 const TargetMachine *TM; 158 159 public: 160 static char ID; // Pass identification 161 162 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 163 FunctionPass(ID), TM(TM) { 164 initializeAMDGPUPropagateAttributesEarlyPass( 165 *PassRegistry::getPassRegistry()); 166 } 167 168 bool runOnFunction(Function &F) override; 169 }; 170 171 // Allows to propagate attributes with clonning but does that late in the 172 // pipeline. 173 class AMDGPUPropagateAttributesLate : public ModulePass { 174 const TargetMachine *TM; 175 176 public: 177 static char ID; // Pass identification 178 179 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 180 ModulePass(ID), TM(TM) { 181 initializeAMDGPUPropagateAttributesLatePass( 182 *PassRegistry::getPassRegistry()); 183 } 184 185 bool runOnModule(Module &M) override; 186 }; 187 188 } // end anonymous namespace. 189 190 char AMDGPUPropagateAttributesEarly::ID = 0; 191 char AMDGPUPropagateAttributesLate::ID = 0; 192 193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 194 "amdgpu-propagate-attributes-early", 195 "Early propagate attributes from kernels to functions", 196 false, false) 197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 198 "amdgpu-propagate-attributes-late", 199 "Late propagate attributes from kernels to functions", 200 false, false) 201 202 Function * 203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 204 Function *OrigF) { 205 // TODO: search for clone's clones. 206 for (Clone &C : Clones) 207 if (C.OrigF == OrigF && PropsNeeded == C.Properties) 208 return C.NewF; 209 210 return nullptr; 211 } 212 213 bool AMDGPUPropagateAttributes::process(Module &M) { 214 for (auto &F : M.functions()) 215 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 216 Roots.insert(&F); 217 218 return process(); 219 } 220 221 bool AMDGPUPropagateAttributes::process(Function &F) { 222 Roots.insert(&F); 223 return process(); 224 } 225 226 bool AMDGPUPropagateAttributes::process() { 227 bool Changed = false; 228 SmallSet<Function *, 32> NewRoots; 229 SmallSet<Function *, 32> Replaced; 230 231 if (Roots.empty()) 232 return false; 233 Module &M = *(*Roots.begin())->getParent(); 234 235 do { 236 Roots.insert(NewRoots.begin(), NewRoots.end()); 237 NewRoots.clear(); 238 239 for (auto &F : M.functions()) { 240 if (F.isDeclaration()) 241 continue; 242 243 // Skip propagating attributes and features to 244 // address taken functions. 245 if (F.hasAddressTaken()) { 246 if (!Roots.count(&F)) 247 NewRoots.insert(&F); 248 continue; 249 } 250 251 const FnProperties CalleeProps(*TM, F); 252 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 253 SmallSet<CallBase *, 32> Visited; 254 255 for (User *U : F.users()) { 256 Instruction *I = dyn_cast<Instruction>(U); 257 if (!I) 258 continue; 259 CallBase *CI = dyn_cast<CallBase>(I); 260 if (!CI) 261 continue; 262 Function *Caller = CI->getCaller(); 263 if (!Caller || !Visited.insert(CI).second) 264 continue; 265 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 266 continue; 267 268 const FnProperties CallerProps(*TM, *Caller); 269 270 // Convergence is allowed if the caller has its 271 // address taken because all callee's (attributes + features) 272 // may not agree as the callee may be the target of 273 // more than one function (called directly or indirectly). 274 if (Caller->hasAddressTaken() || CalleeProps == CallerProps) { 275 if (!Roots.count(&F)) 276 NewRoots.insert(&F); 277 continue; 278 } 279 280 Function *NewF = findFunction(CallerProps, &F); 281 if (!NewF) { 282 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 283 if (!AllowClone) { 284 // This may set different features on different iteartions if 285 // there is a contradiction in callers' attributes. In this case 286 // we rely on a second pass running on Module, which is allowed 287 // to clone. 288 setFeatures(F, NewProps.Features); 289 setAttributes(F, NewProps.Attributes); 290 NewRoots.insert(&F); 291 Changed = true; 292 break; 293 } 294 295 NewF = cloneWithProperties(F, NewProps); 296 Clones.push_back(Clone(CallerProps, &F, NewF)); 297 NewRoots.insert(NewF); 298 } 299 300 ToReplace.push_back(std::make_pair(CI, NewF)); 301 Replaced.insert(&F); 302 303 Changed = true; 304 } 305 306 while (!ToReplace.empty()) { 307 auto R = ToReplace.pop_back_val(); 308 R.first->setCalledFunction(R.second); 309 } 310 } 311 } while (!NewRoots.empty()); 312 313 for (Function *F : Replaced) { 314 if (F->use_empty()) 315 F->eraseFromParent(); 316 } 317 318 Roots.clear(); 319 Clones.clear(); 320 321 return Changed; 322 } 323 324 Function * 325 AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 326 const FnProperties &NewProps) { 327 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 328 329 ValueToValueMapTy dummy; 330 Function *NewF = CloneFunction(&F, dummy); 331 setFeatures(*NewF, NewProps.Features); 332 setAttributes(*NewF, NewProps.Attributes); 333 NewF->setVisibility(GlobalValue::DefaultVisibility); 334 NewF->setLinkage(GlobalValue::InternalLinkage); 335 336 // Swap names. If that is the only clone it will retain the name of now 337 // dead value. Preserve original name for externally visible functions. 338 if (F.hasName() && F.hasLocalLinkage()) { 339 std::string NewName = std::string(NewF->getName()); 340 NewF->takeName(&F); 341 F.setName(NewName); 342 } 343 344 return NewF; 345 } 346 347 void AMDGPUPropagateAttributes::setFeatures(Function &F, 348 const FeatureBitset &NewFeatures) { 349 std::string NewFeatureStr = getFeatureString(NewFeatures); 350 351 LLVM_DEBUG(dbgs() << "Set features " 352 << getFeatureString(NewFeatures & TargetFeatures) 353 << " on " << F.getName() << '\n'); 354 355 F.removeFnAttr("target-features"); 356 F.addFnAttr("target-features", NewFeatureStr); 357 } 358 359 void AMDGPUPropagateAttributes::setAttributes(Function &F, 360 const ArrayRef<Optional<Attribute>> NewAttrs) { 361 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 362 for (unsigned I = 0; I < NumAttr; ++I) { 363 F.removeFnAttr(AttributeNames[I]); 364 if (NewAttrs[I]) { 365 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 366 F.addFnAttr(*NewAttrs[I]); 367 } 368 } 369 } 370 371 std::string 372 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 373 { 374 std::string Ret; 375 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 376 if (Features[KV.Value]) 377 Ret += (StringRef("+") + KV.Key + ",").str(); 378 else if (TargetFeatures[KV.Value]) 379 Ret += (StringRef("-") + KV.Key + ",").str(); 380 } 381 Ret.pop_back(); // Remove last comma. 382 return Ret; 383 } 384 385 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 386 if (!TM) { 387 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 388 if (!TPC) 389 return false; 390 391 TM = &TPC->getTM<TargetMachine>(); 392 } 393 394 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 395 return false; 396 397 return AMDGPUPropagateAttributes(TM, false).process(F); 398 } 399 400 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 401 if (!TM) { 402 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 403 if (!TPC) 404 return false; 405 406 TM = &TPC->getTM<TargetMachine>(); 407 } 408 409 return AMDGPUPropagateAttributes(TM, true).process(M); 410 } 411 412 FunctionPass 413 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 414 return new AMDGPUPropagateAttributesEarly(TM); 415 } 416 417 ModulePass 418 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 419 return new AMDGPUPropagateAttributesLate(TM); 420 } 421 422 PreservedAnalyses 423 AMDGPUPropagateAttributesEarlyPass::run(Function &F, 424 FunctionAnalysisManager &AM) { 425 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 426 return PreservedAnalyses::all(); 427 428 return AMDGPUPropagateAttributes(&TM, false).process(F) 429 ? PreservedAnalyses::none() 430 : PreservedAnalyses::all(); 431 } 432 433 PreservedAnalyses 434 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { 435 return AMDGPUPropagateAttributes(&TM, true).process(M) 436 ? PreservedAnalyses::none() 437 : PreservedAnalyses::all(); 438 } 439