1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/Analysis/AliasAnalysis.h"
15 #include "llvm/Analysis/MemorySSA.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/IntrinsicInst.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/ReplaceConstant.h"
21 
22 #define DEBUG_TYPE "amdgpu-memory-utils"
23 
24 using namespace llvm;
25 
26 namespace llvm {
27 
28 namespace AMDGPU {
29 
30 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
31   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
32                                        GV->getValueType());
33 }
34 
35 static void collectFunctionUses(User *U, const Function *F,
36                                 SetVector<Instruction *> &InstUsers) {
37   SmallVector<User *> Stack{U};
38 
39   while (!Stack.empty()) {
40     U = Stack.pop_back_val();
41 
42     if (auto *I = dyn_cast<Instruction>(U)) {
43       if (I->getFunction() == F)
44         InstUsers.insert(I);
45       continue;
46     }
47 
48     if (!isa<ConstantExpr>(U))
49       continue;
50 
51     append_range(Stack, U->users());
52   }
53 }
54 
55 void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
56   SetVector<Instruction *> InstUsers;
57 
58   collectFunctionUses(C, F, InstUsers);
59   for (Instruction *I : InstUsers) {
60     convertConstantExprsToInstructions(I, C);
61   }
62 }
63 
64 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
65                                    const Function *F) {
66   // We are not interested in kernel LDS lowering for module LDS itself.
67   if (F && GV.getName() == "llvm.amdgcn.module.lds")
68     return false;
69 
70   bool Ret = false;
71   SmallPtrSet<const User *, 8> Visited;
72   SmallVector<const User *, 16> Stack(GV.users());
73 
74   assert(!F || isKernelCC(F));
75 
76   while (!Stack.empty()) {
77     const User *V = Stack.pop_back_val();
78     Visited.insert(V);
79 
80     if (isa<GlobalValue>(V)) {
81       // This use of the LDS variable is the initializer of a global variable.
82       // This is ill formed. The address of an LDS variable is kernel dependent
83       // and unknown until runtime. It can't be written to a global variable.
84       continue;
85     }
86 
87     if (auto *I = dyn_cast<Instruction>(V)) {
88       const Function *UF = I->getFunction();
89       if (UF == F) {
90         // Used from this kernel, we want to put it into the structure.
91         Ret = true;
92       } else if (!F) {
93         // For module LDS lowering, lowering is required if the user instruction
94         // is from non-kernel function.
95         Ret |= !isKernelCC(UF);
96       }
97       continue;
98     }
99 
100     // User V should be a constant, recursively visit users of V.
101     assert(isa<Constant>(V) && "Expected a constant.");
102     append_range(Stack, V->users());
103   }
104 
105   return Ret;
106 }
107 
108 std::vector<GlobalVariable *> findVariablesToLower(Module &M,
109                                                    const Function *F) {
110   std::vector<llvm::GlobalVariable *> LocalVars;
111   for (auto &GV : M.globals()) {
112     if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
113       continue;
114     }
115     if (!GV.hasInitializer()) {
116       // addrspace(3) without initializer implies cuda/hip extern __shared__
117       // the semantics for such a variable appears to be that all extern
118       // __shared__ variables alias one another, in which case this transform
119       // is not required
120       continue;
121     }
122     if (!isa<UndefValue>(GV.getInitializer())) {
123       // Initializers are unimplemented for LDS address space.
124       // Leave such variables in place for consistent error reporting.
125       continue;
126     }
127     if (GV.isConstant()) {
128       // A constant undef variable can't be written to, and any load is
129       // undef, so it should be eliminated by the optimizer. It could be
130       // dropped by the back end if not. This pass skips over it.
131       continue;
132     }
133     if (!shouldLowerLDSToStruct(GV, F)) {
134       continue;
135     }
136     LocalVars.push_back(&GV);
137   }
138   return LocalVars;
139 }
140 
141 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
142   Instruction *DefInst = Def->getMemoryInst();
143 
144   if (isa<FenceInst>(DefInst))
145     return false;
146 
147   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
148     switch (II->getIntrinsicID()) {
149     case Intrinsic::amdgcn_s_barrier:
150     case Intrinsic::amdgcn_wave_barrier:
151       return false;
152     default:
153       break;
154     }
155   }
156 
157   // Ignore atomics not aliasing with the original load, any atomic is a
158   // universal MemoryDef from MSSA's point of view too, just like a fence.
159   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
160     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
161   };
162 
163   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
164       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
165     return false;
166 
167   return true;
168 }
169 
170 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
171                            AAResults *AA) {
172   MemorySSAWalker *Walker = MSSA->getWalker();
173   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
174   SmallSet<MemoryAccess *, 8> Visited;
175   MemoryLocation Loc(MemoryLocation::get(Load));
176 
177   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
178 
179   // Start with a nearest dominating clobbering access, it will be either
180   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
181   // MemoryPhi if several MemoryDefs can define this memory state. In that
182   // case add all Defs to WorkList and continue going up and checking all
183   // the definitions of this memory location until the root. When all the
184   // defs are exhausted and came to the entry state we have no clobber.
185   // Along the scan ignore barriers and fences which are considered clobbers
186   // by the MemorySSA, but not really writing anything into the memory.
187   while (!WorkList.empty()) {
188     MemoryAccess *MA = WorkList.pop_back_val();
189     if (!Visited.insert(MA).second)
190       continue;
191 
192     if (MSSA->isLiveOnEntryDef(MA))
193       continue;
194 
195     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
196       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');
197 
198       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
199         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
200         return true;
201       }
202 
203       WorkList.push_back(
204           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
205       continue;
206     }
207 
208     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
209     for (auto &Use : Phi->incoming_values())
210       WorkList.push_back(cast<MemoryAccess>(&Use));
211   }
212 
213   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
214   return false;
215 }
216 
217 } // end namespace AMDGPU
218 
219 } // end namespace llvm
220