1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief This pass propagates attributes from kernels to the non-entry 11 /// functions. Most of the library functions were not compiled for specific ABI, 12 /// yet will be correctly compiled if proper attributes are propagated from the 13 /// caller. 14 /// 15 /// The pass analyzes call graph and propagates ABI target features through the 16 /// call graph. 17 /// 18 /// It can run in two modes: as a function or module pass. A function pass 19 /// simply propagates attributes. A module pass clones functions if there are 20 /// callers with different ABI. If a function is cloned all call sites will 21 /// be updated to use a correct clone. 22 /// 23 /// A function pass is limited in functionality but can run early in the 24 /// pipeline. A module pass is more powerful but has to run late, so misses 25 /// library folding opportunities. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 31 #include "Utils/AMDGPUBaseInfo.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/CodeGen/TargetPassConfig.h" 34 #include "llvm/CodeGen/TargetSubtargetInfo.h" 35 #include "llvm/IR/InstrTypes.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Transforms/Utils/Cloning.h" 38 39 #define DEBUG_TYPE "amdgpu-propagate-attributes" 40 41 using namespace llvm; 42 43 namespace llvm { 44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 45 } 46 47 namespace { 48 49 // Target features to propagate. 50 static constexpr const FeatureBitset TargetFeatures = { 51 AMDGPU::FeatureWavefrontSize16, 52 AMDGPU::FeatureWavefrontSize32, 53 AMDGPU::FeatureWavefrontSize64 54 }; 55 56 // Attributes to propagate. 57 // TODO: Support conservative min/max merging instead of cloning. 58 static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"}; 59 60 static constexpr unsigned NumAttr = 61 sizeof(AttributeNames) / sizeof(AttributeNames[0]); 62 63 class AMDGPUPropagateAttributes { 64 65 class FnProperties { 66 private: 67 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 68 69 public: 70 explicit FnProperties(const TargetMachine &TM, const Function &F) { 71 Features = TM.getSubtargetImpl(F)->getFeatureBits(); 72 73 for (unsigned I = 0; I < NumAttr; ++I) 74 if (F.hasFnAttribute(AttributeNames[I])) 75 Attributes[I] = F.getFnAttribute(AttributeNames[I]); 76 } 77 78 bool operator == (const FnProperties &Other) const { 79 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 80 return false; 81 for (unsigned I = 0; I < NumAttr; ++I) 82 if (Attributes[I] != Other.Attributes[I]) 83 return false; 84 return true; 85 } 86 87 FnProperties adjustToCaller(const FnProperties &CallerProps) const { 88 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 89 for (unsigned I = 0; I < NumAttr; ++I) 90 New.Attributes[I] = CallerProps.Attributes[I]; 91 return New; 92 } 93 94 FeatureBitset Features; 95 Optional<Attribute> Attributes[NumAttr]; 96 }; 97 98 class Clone { 99 public: 100 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 101 Properties(Props), OrigF(OrigF), NewF(NewF) {} 102 103 FnProperties Properties; 104 Function *OrigF; 105 Function *NewF; 106 }; 107 108 const TargetMachine *TM; 109 110 // Clone functions as needed or just set attributes. 111 bool AllowClone; 112 113 // Option propagation roots. 114 SmallSet<Function *, 32> Roots; 115 116 // Clones of functions with their attributes. 117 SmallVector<Clone, 32> Clones; 118 119 // Find a clone with required features. 120 Function *findFunction(const FnProperties &PropsNeeded, 121 Function *OrigF); 122 123 // Clone function \p F and set \p NewProps on the clone. 124 // Cole takes the name of original function. 125 Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 126 127 // Set new function's features in place. 128 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 129 130 // Set new function's attributes in place. 131 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 132 133 std::string getFeatureString(const FeatureBitset &Features) const; 134 135 // Propagate attributes from Roots. 136 bool process(); 137 138 public: 139 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 140 TM(TM), AllowClone(AllowClone) {} 141 142 // Use F as a root and propagate its attributes. 143 bool process(Function &F); 144 145 // Propagate attributes starting from kernel functions. 146 bool process(Module &M); 147 }; 148 149 // Allows to propagate attributes early, but no cloning is allowed as it must 150 // be a function pass to run before any optimizations. 151 // TODO: We shall only need a one instance of module pass, but that needs to be 152 // in the linker pipeline which is currently not possible. 153 class AMDGPUPropagateAttributesEarly : public FunctionPass { 154 const TargetMachine *TM; 155 156 public: 157 static char ID; // Pass identification 158 159 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 160 FunctionPass(ID), TM(TM) { 161 initializeAMDGPUPropagateAttributesEarlyPass( 162 *PassRegistry::getPassRegistry()); 163 } 164 165 bool runOnFunction(Function &F) override; 166 }; 167 168 // Allows to propagate attributes with cloning but does that late in the 169 // pipeline. 170 class AMDGPUPropagateAttributesLate : public ModulePass { 171 const TargetMachine *TM; 172 173 public: 174 static char ID; // Pass identification 175 176 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 177 ModulePass(ID), TM(TM) { 178 initializeAMDGPUPropagateAttributesLatePass( 179 *PassRegistry::getPassRegistry()); 180 } 181 182 bool runOnModule(Module &M) override; 183 }; 184 185 } // end anonymous namespace. 186 187 char AMDGPUPropagateAttributesEarly::ID = 0; 188 char AMDGPUPropagateAttributesLate::ID = 0; 189 190 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 191 "amdgpu-propagate-attributes-early", 192 "Early propagate attributes from kernels to functions", 193 false, false) 194 INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 195 "amdgpu-propagate-attributes-late", 196 "Late propagate attributes from kernels to functions", 197 false, false) 198 199 Function * 200 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 201 Function *OrigF) { 202 // TODO: search for clone's clones. 203 for (Clone &C : Clones) 204 if (C.OrigF == OrigF && PropsNeeded == C.Properties) 205 return C.NewF; 206 207 return nullptr; 208 } 209 210 bool AMDGPUPropagateAttributes::process(Module &M) { 211 for (auto &F : M.functions()) 212 if (AMDGPU::isKernel(F.getCallingConv())) 213 Roots.insert(&F); 214 215 return Roots.empty() ? false : process(); 216 } 217 218 bool AMDGPUPropagateAttributes::process(Function &F) { 219 Roots.insert(&F); 220 return process(); 221 } 222 223 bool AMDGPUPropagateAttributes::process() { 224 bool Changed = false; 225 SmallSet<Function *, 32> NewRoots; 226 SmallSet<Function *, 32> Replaced; 227 228 assert(!Roots.empty()); 229 Module &M = *(*Roots.begin())->getParent(); 230 231 do { 232 Roots.insert(NewRoots.begin(), NewRoots.end()); 233 NewRoots.clear(); 234 235 for (auto &F : M.functions()) { 236 if (F.isDeclaration()) 237 continue; 238 239 const FnProperties CalleeProps(*TM, F); 240 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 241 SmallSet<CallBase *, 32> Visited; 242 243 for (User *U : F.users()) { 244 Instruction *I = dyn_cast<Instruction>(U); 245 if (!I) 246 continue; 247 CallBase *CI = dyn_cast<CallBase>(I); 248 // Only propagate attributes if F is the called function. Specifically, 249 // do not propagate attributes if F is passed as an argument. 250 // FIXME: handle bitcasted callee, e.g. 251 // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)() 252 if (!CI || CI->getCalledOperand() != &F) 253 continue; 254 Function *Caller = CI->getCaller(); 255 if (!Caller || !Visited.insert(CI).second) 256 continue; 257 if (!Roots.count(Caller) && !NewRoots.count(Caller)) 258 continue; 259 260 const FnProperties CallerProps(*TM, *Caller); 261 262 if (CalleeProps == CallerProps) { 263 if (!Roots.count(&F)) 264 NewRoots.insert(&F); 265 continue; 266 } 267 268 Function *NewF = findFunction(CallerProps, &F); 269 if (!NewF) { 270 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 271 if (!AllowClone) { 272 // This may set different features on different iterations if 273 // there is a contradiction in callers' attributes. In this case 274 // we rely on a second pass running on Module, which is allowed 275 // to clone. 276 setFeatures(F, NewProps.Features); 277 setAttributes(F, NewProps.Attributes); 278 NewRoots.insert(&F); 279 Changed = true; 280 break; 281 } 282 283 NewF = cloneWithProperties(F, NewProps); 284 Clones.push_back(Clone(CallerProps, &F, NewF)); 285 NewRoots.insert(NewF); 286 } 287 288 ToReplace.push_back(std::make_pair(CI, NewF)); 289 Replaced.insert(&F); 290 291 Changed = true; 292 } 293 294 while (!ToReplace.empty()) { 295 auto R = ToReplace.pop_back_val(); 296 R.first->setCalledFunction(R.second); 297 } 298 } 299 } while (!NewRoots.empty()); 300 301 for (Function *F : Replaced) { 302 if (F->use_empty()) 303 F->eraseFromParent(); 304 } 305 306 Roots.clear(); 307 Clones.clear(); 308 309 return Changed; 310 } 311 312 Function * 313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 314 const FnProperties &NewProps) { 315 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 316 317 ValueToValueMapTy dummy; 318 Function *NewF = CloneFunction(&F, dummy); 319 setFeatures(*NewF, NewProps.Features); 320 setAttributes(*NewF, NewProps.Attributes); 321 NewF->setVisibility(GlobalValue::DefaultVisibility); 322 NewF->setLinkage(GlobalValue::InternalLinkage); 323 324 // Swap names. If that is the only clone it will retain the name of now 325 // dead value. Preserve original name for externally visible functions. 326 if (F.hasName() && F.hasLocalLinkage()) { 327 std::string NewName = std::string(NewF->getName()); 328 NewF->takeName(&F); 329 F.setName(NewName); 330 } 331 332 return NewF; 333 } 334 335 void AMDGPUPropagateAttributes::setFeatures(Function &F, 336 const FeatureBitset &NewFeatures) { 337 std::string NewFeatureStr = getFeatureString(NewFeatures); 338 339 LLVM_DEBUG(dbgs() << "Set features " 340 << getFeatureString(NewFeatures & TargetFeatures) 341 << " on " << F.getName() << '\n'); 342 343 F.removeFnAttr("target-features"); 344 F.addFnAttr("target-features", NewFeatureStr); 345 } 346 347 void AMDGPUPropagateAttributes::setAttributes(Function &F, 348 const ArrayRef<Optional<Attribute>> NewAttrs) { 349 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 350 for (unsigned I = 0; I < NumAttr; ++I) { 351 F.removeFnAttr(AttributeNames[I]); 352 if (NewAttrs[I]) { 353 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 354 F.addFnAttr(*NewAttrs[I]); 355 } 356 } 357 } 358 359 std::string 360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 361 { 362 std::string Ret; 363 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 364 if (Features[KV.Value]) 365 Ret += (StringRef("+") + KV.Key + ",").str(); 366 else if (TargetFeatures[KV.Value]) 367 Ret += (StringRef("-") + KV.Key + ",").str(); 368 } 369 Ret.pop_back(); // Remove last comma. 370 return Ret; 371 } 372 373 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 374 if (!TM) { 375 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 376 if (!TPC) 377 return false; 378 379 TM = &TPC->getTM<TargetMachine>(); 380 } 381 382 if (!AMDGPU::isKernel(F.getCallingConv())) 383 return false; 384 385 return AMDGPUPropagateAttributes(TM, false).process(F); 386 } 387 388 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 389 if (!TM) { 390 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 391 if (!TPC) 392 return false; 393 394 TM = &TPC->getTM<TargetMachine>(); 395 } 396 397 return AMDGPUPropagateAttributes(TM, true).process(M); 398 } 399 400 FunctionPass 401 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 402 return new AMDGPUPropagateAttributesEarly(TM); 403 } 404 405 ModulePass 406 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 407 return new AMDGPUPropagateAttributesLate(TM); 408 } 409 410 PreservedAnalyses 411 AMDGPUPropagateAttributesEarlyPass::run(Function &F, 412 FunctionAnalysisManager &AM) { 413 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) 414 return PreservedAnalyses::all(); 415 416 return AMDGPUPropagateAttributes(&TM, false).process(F) 417 ? PreservedAnalyses::none() 418 : PreservedAnalyses::all(); 419 } 420 421 PreservedAnalyses 422 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { 423 return AMDGPUPropagateAttributes(&TM, true).process(M) 424 ? PreservedAnalyses::none() 425 : PreservedAnalyses::all(); 426 } 427