1 //===- AMDGPULDSUtils.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // AMDGPU LDS related helper utility functions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPULDSUtils.h" 14 #include "Utils/AMDGPUBaseInfo.h" 15 #include "llvm/ADT/DepthFirstIterator.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/ReplaceConstant.h" 20 21 using namespace llvm; 22 23 namespace llvm { 24 25 namespace AMDGPU { 26 27 // An helper class for collecting all reachable callees for each kernel defined 28 // within the module. 29 class CollectReachableCallees { 30 Module &M; 31 CallGraph CG; 32 SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions; 33 34 // Collect all address taken functions within the module. 35 void collectAddressTakenFunctions() { 36 auto *ECNode = CG.getExternalCallingNode(); 37 38 for (auto GI = ECNode->begin(), GE = ECNode->end(); GI != GE; ++GI) { 39 auto *CGN = GI->second; 40 auto *F = CGN->getFunction(); 41 if (!F || F->isDeclaration() || AMDGPU::isKernelCC(F)) 42 continue; 43 AddressTakenFunctions.insert(CGN); 44 } 45 } 46 47 // For given kernel, collect all its reachable non-kernel functions. 48 SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) { 49 SmallPtrSet<Function *, 8> ReachableCallees; 50 51 // Call graph node which represents this kernel. 52 auto *KCGN = CG[K]; 53 54 // Go through all call graph nodes reachable from the node representing this 55 // kernel, visit all their call sites, if the call site is direct, add 56 // corresponding callee to reachable callee set, if it is indirect, resolve 57 // the indirect call site to potential reachable callees, add them to 58 // reachable callee set, and repeat the process for the newly added 59 // potential callee nodes. 60 // 61 // FIXME: Need to handle bit-casted function pointers. 62 // 63 SmallVector<CallGraphNode *, 8> CGNStack(df_begin(KCGN), df_end(KCGN)); 64 SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes; 65 while (!CGNStack.empty()) { 66 auto *CGN = CGNStack.pop_back_val(); 67 68 if (!VisitedCGNodes.insert(CGN).second) 69 continue; 70 71 for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) { 72 auto *RCB = cast<CallBase>(GI->first.getValue()); 73 auto *RCGN = GI->second; 74 75 if (auto *DCallee = RCGN->getFunction()) { 76 ReachableCallees.insert(DCallee); 77 } else if (RCB->isIndirectCall()) { 78 auto *RCBFTy = RCB->getFunctionType(); 79 for (auto *ACGN : AddressTakenFunctions) { 80 auto *ACallee = ACGN->getFunction(); 81 if (ACallee->getFunctionType() == RCBFTy) { 82 ReachableCallees.insert(ACallee); 83 CGNStack.append(df_begin(ACGN), df_end(ACGN)); 84 } 85 } 86 } 87 } 88 } 89 90 return ReachableCallees; 91 } 92 93 public: 94 explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) { 95 // Collect address taken functions. 96 collectAddressTakenFunctions(); 97 } 98 99 void collectReachableCallees( 100 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 101 // Collect reachable callee set for each kernel defined in the module. 102 for (Function &F : M.functions()) { 103 if (!AMDGPU::isKernelCC(&F)) 104 continue; 105 Function *K = &F; 106 KernelToCallees[K] = collectReachableCallees(K); 107 } 108 } 109 }; 110 111 void collectReachableCallees( 112 Module &M, 113 DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) { 114 CollectReachableCallees CRC{M}; 115 CRC.collectReachableCallees(KernelToCallees); 116 } 117 118 SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) { 119 SmallPtrSet<Function *, 8> LDSAccessors; 120 SmallVector<User *, 8> UserStack(GV->users()); 121 SmallPtrSet<User *, 8> VisitedUsers; 122 123 while (!UserStack.empty()) { 124 auto *U = UserStack.pop_back_val(); 125 126 // `U` is already visited? continue to next one. 127 if (!VisitedUsers.insert(U).second) 128 continue; 129 130 // `U` is a global variable which is initialized with LDS. Ignore LDS. 131 if (isa<GlobalValue>(U)) 132 return SmallPtrSet<Function *, 8>(); 133 134 // Recursively explore constant users. 135 if (isa<Constant>(U)) { 136 append_range(UserStack, U->users()); 137 continue; 138 } 139 140 // `U` should be an instruction, if it belongs to a non-kernel function F, 141 // then collect F. 142 Function *F = cast<Instruction>(U)->getFunction(); 143 if (!AMDGPU::isKernelCC(F)) 144 LDSAccessors.insert(F); 145 } 146 147 return LDSAccessors; 148 } 149 150 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> 151 getFunctionToInstsMap(User *U, bool CollectKernelInsts) { 152 DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts; 153 SmallVector<User *, 8> UserStack; 154 SmallPtrSet<User *, 8> VisitedUsers; 155 156 UserStack.push_back(U); 157 158 while (!UserStack.empty()) { 159 auto *UU = UserStack.pop_back_val(); 160 161 if (!VisitedUsers.insert(UU).second) 162 continue; 163 164 if (isa<GlobalValue>(UU)) 165 continue; 166 167 if (isa<Constant>(UU)) { 168 append_range(UserStack, UU->users()); 169 continue; 170 } 171 172 auto *I = cast<Instruction>(UU); 173 Function *F = I->getFunction(); 174 if (CollectKernelInsts) { 175 if (!AMDGPU::isKernelCC(F)) { 176 continue; 177 } 178 } else { 179 if (AMDGPU::isKernelCC(F)) { 180 continue; 181 } 182 } 183 184 FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>())); 185 FunctionToInsts[F].insert(I); 186 } 187 188 return FunctionToInsts; 189 } 190 191 bool isKernelCC(const Function *Func) { 192 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 193 } 194 195 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { 196 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), 197 GV->getValueType()); 198 } 199 200 static void collectFunctionUses(User *U, const Function *F, 201 SetVector<Instruction *> &InstUsers) { 202 SmallVector<User *> Stack{U}; 203 204 while (!Stack.empty()) { 205 U = Stack.pop_back_val(); 206 207 if (auto *I = dyn_cast<Instruction>(U)) { 208 if (I->getFunction() == F) 209 InstUsers.insert(I); 210 continue; 211 } 212 213 if (!isa<ConstantExpr>(U)) 214 continue; 215 216 append_range(Stack, U->users()); 217 } 218 } 219 220 void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { 221 SetVector<Instruction *> InstUsers; 222 223 collectFunctionUses(C, F, InstUsers); 224 for (Instruction *I : InstUsers) { 225 convertConstantExprsToInstructions(I, C); 226 } 227 } 228 229 bool hasUserInstruction(const GlobalValue *GV) { 230 SmallPtrSet<const User *, 8> Visited; 231 SmallVector<const User *, 16> Stack(GV->users()); 232 233 while (!Stack.empty()) { 234 const User *U = Stack.pop_back_val(); 235 236 if (!Visited.insert(U).second) 237 continue; 238 239 if (isa<Instruction>(U)) 240 return true; 241 242 append_range(Stack, U->users()); 243 } 244 245 return false; 246 } 247 248 bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) { 249 // We are not interested in kernel LDS lowering for module LDS itself. 250 if (F && GV.getName() == "llvm.amdgcn.module.lds") 251 return false; 252 253 bool Ret = false; 254 SmallPtrSet<const User *, 8> Visited; 255 SmallVector<const User *, 16> Stack(GV.users()); 256 SmallPtrSet<const GlobalValue *, 8> GlobalUsers; 257 258 assert(!F || isKernelCC(F)); 259 260 while (!Stack.empty()) { 261 const User *V = Stack.pop_back_val(); 262 Visited.insert(V); 263 264 if (auto *G = dyn_cast<GlobalValue>(V)) { 265 StringRef GName = G->getName(); 266 if (F && GName != "llvm.used" && GName != "llvm.compiler.used") { 267 // For kernel LDS lowering, if G is not a compiler.used list, then we 268 // cannot lower the lds GV since we cannot replace the use of GV within 269 // G. 270 return false; 271 } 272 GlobalUsers.insert(G); 273 continue; 274 } 275 276 if (auto *I = dyn_cast<Instruction>(V)) { 277 const Function *UF = I->getFunction(); 278 if (UF == F) { 279 // Used from this kernel, we want to put it into the structure. 280 Ret = true; 281 } else if (!F) { 282 // For module LDS lowering, lowering is required if the user instruction 283 // is from non-kernel function. 284 Ret |= !isKernelCC(UF); 285 } 286 continue; 287 } 288 289 // User V should be a constant, recursively visit users of V. 290 assert(isa<Constant>(V) && "Expected a constant."); 291 append_range(Stack, V->users()); 292 } 293 294 if (!F && !Ret) { 295 // For module LDS lowering, we have not yet decided if we should lower GV or 296 // not. Explore all global users of GV, and check if atleast one of these 297 // global users appear as an use within an instruction (possibly nested use 298 // via constant expression), if so, then conservately lower LDS. 299 for (auto *G : GlobalUsers) 300 Ret |= hasUserInstruction(G); 301 } 302 303 return Ret; 304 } 305 306 std::vector<GlobalVariable *> findVariablesToLower(Module &M, 307 const Function *F) { 308 std::vector<llvm::GlobalVariable *> LocalVars; 309 for (auto &GV : M.globals()) { 310 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 311 continue; 312 } 313 if (!GV.hasInitializer()) { 314 // addrspace(3) without initializer implies cuda/hip extern __shared__ 315 // the semantics for such a variable appears to be that all extern 316 // __shared__ variables alias one another, in which case this transform 317 // is not required 318 continue; 319 } 320 if (!isa<UndefValue>(GV.getInitializer())) { 321 // Initializers are unimplemented for local address space. 322 // Leave such variables in place for consistent error reporting. 323 continue; 324 } 325 if (GV.isConstant()) { 326 // A constant undef variable can't be written to, and any load is 327 // undef, so it should be eliminated by the optimizer. It could be 328 // dropped by the back end if not. This pass skips over it. 329 continue; 330 } 331 if (!shouldLowerLDSToStruct(GV, F)) { 332 continue; 333 } 334 LocalVars.push_back(&GV); 335 } 336 return LocalVars; 337 } 338 339 SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) { 340 SmallPtrSet<GlobalValue *, 32> UsedList; 341 342 SmallVector<GlobalValue *, 32> TmpVec; 343 collectUsedGlobalVariables(M, TmpVec, true); 344 UsedList.insert(TmpVec.begin(), TmpVec.end()); 345 346 TmpVec.clear(); 347 collectUsedGlobalVariables(M, TmpVec, false); 348 UsedList.insert(TmpVec.begin(), TmpVec.end()); 349 350 return UsedList; 351 } 352 353 } // end namespace AMDGPU 354 355 } // end namespace llvm 356