1290e5722SStanislav Mekhanoshin //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2290e5722SStanislav Mekhanoshin //
3290e5722SStanislav Mekhanoshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4290e5722SStanislav Mekhanoshin // See https://llvm.org/LICENSE.txt for license information.
5290e5722SStanislav Mekhanoshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6290e5722SStanislav Mekhanoshin //
7290e5722SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
8290e5722SStanislav Mekhanoshin
9290e5722SStanislav Mekhanoshin #include "AMDGPUMemoryUtils.h"
10290e5722SStanislav Mekhanoshin #include "AMDGPU.h"
11c7eb8463SStanislav Mekhanoshin #include "AMDGPUBaseInfo.h"
12c7eb8463SStanislav Mekhanoshin #include "llvm/ADT/SetVector.h"
13290e5722SStanislav Mekhanoshin #include "llvm/ADT/SmallSet.h"
14290e5722SStanislav Mekhanoshin #include "llvm/Analysis/AliasAnalysis.h"
15290e5722SStanislav Mekhanoshin #include "llvm/Analysis/MemorySSA.h"
16c7eb8463SStanislav Mekhanoshin #include "llvm/IR/DataLayout.h"
17290e5722SStanislav Mekhanoshin #include "llvm/IR/Instructions.h"
18290e5722SStanislav Mekhanoshin #include "llvm/IR/IntrinsicInst.h"
19c7eb8463SStanislav Mekhanoshin #include "llvm/IR/IntrinsicsAMDGPU.h"
20c7eb8463SStanislav Mekhanoshin #include "llvm/IR/ReplaceConstant.h"
21290e5722SStanislav Mekhanoshin
22290e5722SStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-memory-utils"
23290e5722SStanislav Mekhanoshin
24290e5722SStanislav Mekhanoshin using namespace llvm;
25290e5722SStanislav Mekhanoshin
26290e5722SStanislav Mekhanoshin namespace llvm {
27290e5722SStanislav Mekhanoshin
28290e5722SStanislav Mekhanoshin namespace AMDGPU {
29290e5722SStanislav Mekhanoshin
getAlign(DataLayout const & DL,const GlobalVariable * GV)30c7eb8463SStanislav Mekhanoshin Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
31c7eb8463SStanislav Mekhanoshin return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
32c7eb8463SStanislav Mekhanoshin GV->getValueType());
33c7eb8463SStanislav Mekhanoshin }
34c7eb8463SStanislav Mekhanoshin
collectFunctionUses(User * U,const Function * F,SetVector<Instruction * > & InstUsers)35c7eb8463SStanislav Mekhanoshin static void collectFunctionUses(User *U, const Function *F,
36c7eb8463SStanislav Mekhanoshin SetVector<Instruction *> &InstUsers) {
37c7eb8463SStanislav Mekhanoshin SmallVector<User *> Stack{U};
38c7eb8463SStanislav Mekhanoshin
39c7eb8463SStanislav Mekhanoshin while (!Stack.empty()) {
40c7eb8463SStanislav Mekhanoshin U = Stack.pop_back_val();
41c7eb8463SStanislav Mekhanoshin
42c7eb8463SStanislav Mekhanoshin if (auto *I = dyn_cast<Instruction>(U)) {
43c7eb8463SStanislav Mekhanoshin if (I->getFunction() == F)
44c7eb8463SStanislav Mekhanoshin InstUsers.insert(I);
45c7eb8463SStanislav Mekhanoshin continue;
46c7eb8463SStanislav Mekhanoshin }
47c7eb8463SStanislav Mekhanoshin
48c7eb8463SStanislav Mekhanoshin if (!isa<ConstantExpr>(U))
49c7eb8463SStanislav Mekhanoshin continue;
50c7eb8463SStanislav Mekhanoshin
51c7eb8463SStanislav Mekhanoshin append_range(Stack, U->users());
52c7eb8463SStanislav Mekhanoshin }
53c7eb8463SStanislav Mekhanoshin }
54c7eb8463SStanislav Mekhanoshin
replaceConstantUsesInFunction(ConstantExpr * C,const Function * F)55c7eb8463SStanislav Mekhanoshin void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
56c7eb8463SStanislav Mekhanoshin SetVector<Instruction *> InstUsers;
57c7eb8463SStanislav Mekhanoshin
58c7eb8463SStanislav Mekhanoshin collectFunctionUses(C, F, InstUsers);
59c7eb8463SStanislav Mekhanoshin for (Instruction *I : InstUsers) {
60c7eb8463SStanislav Mekhanoshin convertConstantExprsToInstructions(I, C);
61c7eb8463SStanislav Mekhanoshin }
62c7eb8463SStanislav Mekhanoshin }
63c7eb8463SStanislav Mekhanoshin
shouldLowerLDSToStruct(const GlobalVariable & GV,const Function * F)64c7eb8463SStanislav Mekhanoshin static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
65c7eb8463SStanislav Mekhanoshin const Function *F) {
66c7eb8463SStanislav Mekhanoshin // We are not interested in kernel LDS lowering for module LDS itself.
67c7eb8463SStanislav Mekhanoshin if (F && GV.getName() == "llvm.amdgcn.module.lds")
68c7eb8463SStanislav Mekhanoshin return false;
69c7eb8463SStanislav Mekhanoshin
70c7eb8463SStanislav Mekhanoshin bool Ret = false;
71c7eb8463SStanislav Mekhanoshin SmallPtrSet<const User *, 8> Visited;
72c7eb8463SStanislav Mekhanoshin SmallVector<const User *, 16> Stack(GV.users());
73c7eb8463SStanislav Mekhanoshin
74c7eb8463SStanislav Mekhanoshin assert(!F || isKernelCC(F));
75c7eb8463SStanislav Mekhanoshin
76c7eb8463SStanislav Mekhanoshin while (!Stack.empty()) {
77c7eb8463SStanislav Mekhanoshin const User *V = Stack.pop_back_val();
78c7eb8463SStanislav Mekhanoshin Visited.insert(V);
79c7eb8463SStanislav Mekhanoshin
80c7eb8463SStanislav Mekhanoshin if (isa<GlobalValue>(V)) {
81c7eb8463SStanislav Mekhanoshin // This use of the LDS variable is the initializer of a global variable.
82c7eb8463SStanislav Mekhanoshin // This is ill formed. The address of an LDS variable is kernel dependent
83c7eb8463SStanislav Mekhanoshin // and unknown until runtime. It can't be written to a global variable.
84c7eb8463SStanislav Mekhanoshin continue;
85c7eb8463SStanislav Mekhanoshin }
86c7eb8463SStanislav Mekhanoshin
87c7eb8463SStanislav Mekhanoshin if (auto *I = dyn_cast<Instruction>(V)) {
88c7eb8463SStanislav Mekhanoshin const Function *UF = I->getFunction();
89c7eb8463SStanislav Mekhanoshin if (UF == F) {
90c7eb8463SStanislav Mekhanoshin // Used from this kernel, we want to put it into the structure.
91c7eb8463SStanislav Mekhanoshin Ret = true;
92c7eb8463SStanislav Mekhanoshin } else if (!F) {
93c7eb8463SStanislav Mekhanoshin // For module LDS lowering, lowering is required if the user instruction
94c7eb8463SStanislav Mekhanoshin // is from non-kernel function.
95c7eb8463SStanislav Mekhanoshin Ret |= !isKernelCC(UF);
96c7eb8463SStanislav Mekhanoshin }
97c7eb8463SStanislav Mekhanoshin continue;
98c7eb8463SStanislav Mekhanoshin }
99c7eb8463SStanislav Mekhanoshin
100c7eb8463SStanislav Mekhanoshin // User V should be a constant, recursively visit users of V.
101c7eb8463SStanislav Mekhanoshin assert(isa<Constant>(V) && "Expected a constant.");
102c7eb8463SStanislav Mekhanoshin append_range(Stack, V->users());
103c7eb8463SStanislav Mekhanoshin }
104c7eb8463SStanislav Mekhanoshin
105c7eb8463SStanislav Mekhanoshin return Ret;
106c7eb8463SStanislav Mekhanoshin }
107c7eb8463SStanislav Mekhanoshin
findVariablesToLower(Module & M,const Function * F)108c7eb8463SStanislav Mekhanoshin std::vector<GlobalVariable *> findVariablesToLower(Module &M,
109c7eb8463SStanislav Mekhanoshin const Function *F) {
110c7eb8463SStanislav Mekhanoshin std::vector<llvm::GlobalVariable *> LocalVars;
111c7eb8463SStanislav Mekhanoshin for (auto &GV : M.globals()) {
112c7eb8463SStanislav Mekhanoshin if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
113c7eb8463SStanislav Mekhanoshin continue;
114c7eb8463SStanislav Mekhanoshin }
115c7eb8463SStanislav Mekhanoshin if (!GV.hasInitializer()) {
116c7eb8463SStanislav Mekhanoshin // addrspace(3) without initializer implies cuda/hip extern __shared__
117c7eb8463SStanislav Mekhanoshin // the semantics for such a variable appears to be that all extern
118c7eb8463SStanislav Mekhanoshin // __shared__ variables alias one another, in which case this transform
119c7eb8463SStanislav Mekhanoshin // is not required
120c7eb8463SStanislav Mekhanoshin continue;
121c7eb8463SStanislav Mekhanoshin }
122c7eb8463SStanislav Mekhanoshin if (!isa<UndefValue>(GV.getInitializer())) {
123c7eb8463SStanislav Mekhanoshin // Initializers are unimplemented for LDS address space.
124c7eb8463SStanislav Mekhanoshin // Leave such variables in place for consistent error reporting.
125c7eb8463SStanislav Mekhanoshin continue;
126c7eb8463SStanislav Mekhanoshin }
127c7eb8463SStanislav Mekhanoshin if (GV.isConstant()) {
128c7eb8463SStanislav Mekhanoshin // A constant undef variable can't be written to, and any load is
129c7eb8463SStanislav Mekhanoshin // undef, so it should be eliminated by the optimizer. It could be
130c7eb8463SStanislav Mekhanoshin // dropped by the back end if not. This pass skips over it.
131c7eb8463SStanislav Mekhanoshin continue;
132c7eb8463SStanislav Mekhanoshin }
133c7eb8463SStanislav Mekhanoshin if (!shouldLowerLDSToStruct(GV, F)) {
134c7eb8463SStanislav Mekhanoshin continue;
135c7eb8463SStanislav Mekhanoshin }
136c7eb8463SStanislav Mekhanoshin LocalVars.push_back(&GV);
137c7eb8463SStanislav Mekhanoshin }
138c7eb8463SStanislav Mekhanoshin return LocalVars;
139c7eb8463SStanislav Mekhanoshin }
140c7eb8463SStanislav Mekhanoshin
isReallyAClobber(const Value * Ptr,MemoryDef * Def,AAResults * AA)141290e5722SStanislav Mekhanoshin bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
142290e5722SStanislav Mekhanoshin Instruction *DefInst = Def->getMemoryInst();
143290e5722SStanislav Mekhanoshin
144290e5722SStanislav Mekhanoshin if (isa<FenceInst>(DefInst))
145290e5722SStanislav Mekhanoshin return false;
146290e5722SStanislav Mekhanoshin
147290e5722SStanislav Mekhanoshin if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
148290e5722SStanislav Mekhanoshin switch (II->getIntrinsicID()) {
149290e5722SStanislav Mekhanoshin case Intrinsic::amdgcn_s_barrier:
150290e5722SStanislav Mekhanoshin case Intrinsic::amdgcn_wave_barrier:
151*2db70021SAustin Kerbow case Intrinsic::amdgcn_sched_barrier:
152290e5722SStanislav Mekhanoshin return false;
153290e5722SStanislav Mekhanoshin default:
154290e5722SStanislav Mekhanoshin break;
155290e5722SStanislav Mekhanoshin }
156290e5722SStanislav Mekhanoshin }
157290e5722SStanislav Mekhanoshin
158290e5722SStanislav Mekhanoshin // Ignore atomics not aliasing with the original load, any atomic is a
159290e5722SStanislav Mekhanoshin // universal MemoryDef from MSSA's point of view too, just like a fence.
160290e5722SStanislav Mekhanoshin const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
161290e5722SStanislav Mekhanoshin return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
162290e5722SStanislav Mekhanoshin };
163290e5722SStanislav Mekhanoshin
164290e5722SStanislav Mekhanoshin if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
165290e5722SStanislav Mekhanoshin checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
166290e5722SStanislav Mekhanoshin return false;
167290e5722SStanislav Mekhanoshin
168290e5722SStanislav Mekhanoshin return true;
169290e5722SStanislav Mekhanoshin }
170290e5722SStanislav Mekhanoshin
isClobberedInFunction(const LoadInst * Load,MemorySSA * MSSA,AAResults * AA)171290e5722SStanislav Mekhanoshin bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
172290e5722SStanislav Mekhanoshin AAResults *AA) {
173290e5722SStanislav Mekhanoshin MemorySSAWalker *Walker = MSSA->getWalker();
174290e5722SStanislav Mekhanoshin SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
175290e5722SStanislav Mekhanoshin SmallSet<MemoryAccess *, 8> Visited;
176290e5722SStanislav Mekhanoshin MemoryLocation Loc(MemoryLocation::get(Load));
177290e5722SStanislav Mekhanoshin
178290e5722SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
179290e5722SStanislav Mekhanoshin
180290e5722SStanislav Mekhanoshin // Start with a nearest dominating clobbering access, it will be either
181290e5722SStanislav Mekhanoshin // live on entry (nothing to do, load is not clobbered), MemoryDef, or
182290e5722SStanislav Mekhanoshin // MemoryPhi if several MemoryDefs can define this memory state. In that
183290e5722SStanislav Mekhanoshin // case add all Defs to WorkList and continue going up and checking all
184290e5722SStanislav Mekhanoshin // the definitions of this memory location until the root. When all the
185290e5722SStanislav Mekhanoshin // defs are exhausted and came to the entry state we have no clobber.
186290e5722SStanislav Mekhanoshin // Along the scan ignore barriers and fences which are considered clobbers
187290e5722SStanislav Mekhanoshin // by the MemorySSA, but not really writing anything into the memory.
188290e5722SStanislav Mekhanoshin while (!WorkList.empty()) {
189290e5722SStanislav Mekhanoshin MemoryAccess *MA = WorkList.pop_back_val();
190290e5722SStanislav Mekhanoshin if (!Visited.insert(MA).second)
191290e5722SStanislav Mekhanoshin continue;
192290e5722SStanislav Mekhanoshin
193290e5722SStanislav Mekhanoshin if (MSSA->isLiveOnEntryDef(MA))
194290e5722SStanislav Mekhanoshin continue;
195290e5722SStanislav Mekhanoshin
196290e5722SStanislav Mekhanoshin if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
197290e5722SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
198290e5722SStanislav Mekhanoshin
199290e5722SStanislav Mekhanoshin if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
200290e5722SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
201290e5722SStanislav Mekhanoshin return true;
202290e5722SStanislav Mekhanoshin }
203290e5722SStanislav Mekhanoshin
204290e5722SStanislav Mekhanoshin WorkList.push_back(
205290e5722SStanislav Mekhanoshin Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
206290e5722SStanislav Mekhanoshin continue;
207290e5722SStanislav Mekhanoshin }
208290e5722SStanislav Mekhanoshin
209290e5722SStanislav Mekhanoshin const MemoryPhi *Phi = cast<MemoryPhi>(MA);
210290e5722SStanislav Mekhanoshin for (auto &Use : Phi->incoming_values())
211290e5722SStanislav Mekhanoshin WorkList.push_back(cast<MemoryAccess>(&Use));
212290e5722SStanislav Mekhanoshin }
213290e5722SStanislav Mekhanoshin
214290e5722SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " -> no clobber\n");
215290e5722SStanislav Mekhanoshin return false;
216290e5722SStanislav Mekhanoshin }
217290e5722SStanislav Mekhanoshin
218290e5722SStanislav Mekhanoshin } // end namespace AMDGPU
219290e5722SStanislav Mekhanoshin
220290e5722SStanislav Mekhanoshin } // end namespace llvm
221