10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
13*fe013be4SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
15*fe013be4SDimitry Andric #include "llvm/IR/ConstantRange.h"
16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
17*fe013be4SDimitry Andric #include "llvm/IR/Metadata.h"
18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
22bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
23bdd1243dSDimitry Andric                                              const AMDGPUSubtarget &ST)
24bdd1243dSDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
25e8d8bef9SDimitry Andric       IsModuleEntryFunction(
26bdd1243dSDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
27bdd1243dSDimitry Andric       NoSignedZerosFPMath(false) {
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
300b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
33fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
36fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
370b57cec5SDimitry Andric 
3881ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
3981ad6265SDimitry Andric   // global sizes?
4081ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
4181ad6265SDimitry Andric   if (!S.empty())
4281ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
4381ad6265SDimitry Andric 
4481ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
4581ad6265SDimitry Andric   StaticGDSSize = GDSSize;
4681ad6265SDimitry Andric 
47*fe013be4SDimitry Andric   // Second value, if present, is the maximum value that can be assigned.
48*fe013be4SDimitry Andric   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
49*fe013be4SDimitry Andric   // during codegen.
50*fe013be4SDimitry Andric   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
51*fe013be4SDimitry Andric       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
52*fe013be4SDimitry Andric 
53*fe013be4SDimitry Andric   // The two separate variables are only profitable when the LDS module lowering
54*fe013be4SDimitry Andric   // pass is disabled. If graphics does not use dynamic LDS, this is never
55*fe013be4SDimitry Andric   // profitable. Leaving cleanup for a later change.
56*fe013be4SDimitry Andric   LDSSize = LDSSizeRange.first;
57*fe013be4SDimitry Andric   StaticLDSSize = LDSSize;
58*fe013be4SDimitry Andric 
590b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
600b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
610b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
62bdd1243dSDimitry Andric 
63bdd1243dSDimitry Andric   // FIXME: Shouldn't be target specific
64bdd1243dSDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
65bdd1243dSDimitry Andric   NoSignedZerosFPMath =
66bdd1243dSDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
70bdd1243dSDimitry Andric                                                   const GlobalVariable &GV,
71bdd1243dSDimitry Andric                                                   Align Trailing) {
72bdd1243dSDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
730b57cec5SDimitry Andric   if (!Entry.second)
740b57cec5SDimitry Andric     return Entry.first->second;
750b57cec5SDimitry Andric 
765ffd83dbSDimitry Andric   Align Alignment =
775ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
780b57cec5SDimitry Andric 
7981ad6265SDimitry Andric   unsigned Offset;
8081ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
81*fe013be4SDimitry Andric 
82*fe013be4SDimitry Andric     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
83*fe013be4SDimitry Andric     if (MaybeAbs) {
84*fe013be4SDimitry Andric       // Absolute address LDS variables that exist prior to the LDS lowering
85*fe013be4SDimitry Andric       // pass raise a fatal error in that pass. These failure modes are only
86*fe013be4SDimitry Andric       // reachable if that lowering pass is disabled or broken. If/when adding
87*fe013be4SDimitry Andric       // support for absolute addresses on user specified variables, the
88*fe013be4SDimitry Andric       // alignment check moves to the lowering pass and the frame calculation
89*fe013be4SDimitry Andric       // needs to take the user variables into consideration.
90*fe013be4SDimitry Andric 
91*fe013be4SDimitry Andric       uint32_t ObjectStart = *MaybeAbs;
92*fe013be4SDimitry Andric 
93*fe013be4SDimitry Andric       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
94*fe013be4SDimitry Andric         report_fatal_error("Absolute address LDS variable inconsistent with "
95*fe013be4SDimitry Andric                            "variable alignment");
96*fe013be4SDimitry Andric       }
97*fe013be4SDimitry Andric 
98*fe013be4SDimitry Andric       if (isModuleEntryFunction()) {
99*fe013be4SDimitry Andric         // If this is a module entry function, we can also sanity check against
100*fe013be4SDimitry Andric         // the static frame. Strictly it would be better to check against the
101*fe013be4SDimitry Andric         // attribute, i.e. that the variable is within the always-allocated
102*fe013be4SDimitry Andric         // section, and not within some other non-absolute-address object
103*fe013be4SDimitry Andric         // allocated here, but the extra error detection is minimal and we would
104*fe013be4SDimitry Andric         // have to pass the Function around or cache the attribute value.
105*fe013be4SDimitry Andric         uint32_t ObjectEnd =
106*fe013be4SDimitry Andric             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
107*fe013be4SDimitry Andric         if (ObjectEnd > StaticLDSSize) {
108*fe013be4SDimitry Andric           report_fatal_error(
109*fe013be4SDimitry Andric               "Absolute address LDS variable outside of static frame");
110*fe013be4SDimitry Andric         }
111*fe013be4SDimitry Andric       }
112*fe013be4SDimitry Andric 
113*fe013be4SDimitry Andric       Entry.first->second = ObjectStart;
114*fe013be4SDimitry Andric       return ObjectStart;
115*fe013be4SDimitry Andric     }
116*fe013be4SDimitry Andric 
1170b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
1180b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
1190b57cec5SDimitry Andric     /// during lowering.
12081ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
1210b57cec5SDimitry Andric 
122e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
123e8d8bef9SDimitry Andric 
124bdd1243dSDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
125bdd1243dSDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
12681ad6265SDimitry Andric   } else {
12781ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
12881ad6265SDimitry Andric            "expected region address space");
1290b57cec5SDimitry Andric 
13081ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
13181ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
13281ad6265SDimitry Andric 
13381ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
13481ad6265SDimitry Andric     GDSSize = StaticGDSSize;
13581ad6265SDimitry Andric   }
13681ad6265SDimitry Andric 
13781ad6265SDimitry Andric   Entry.first->second = Offset;
1380b57cec5SDimitry Andric   return Offset;
1390b57cec5SDimitry Andric }
140e8d8bef9SDimitry Andric 
141*fe013be4SDimitry Andric static const GlobalVariable *
142*fe013be4SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
143bdd1243dSDimitry Andric   const Module *M = F.getParent();
144*fe013be4SDimitry Andric   std::string KernelDynLDSName = "llvm.amdgcn.";
145*fe013be4SDimitry Andric   KernelDynLDSName += F.getName();
146*fe013be4SDimitry Andric   KernelDynLDSName += ".dynlds";
147*fe013be4SDimitry Andric   return M->getNamedGlobal(KernelDynLDSName);
148fe6060f1SDimitry Andric }
149fe6060f1SDimitry Andric 
150bdd1243dSDimitry Andric std::optional<uint32_t>
151fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
152*fe013be4SDimitry Andric   // TODO: Would be more consistent with the abs symbols to use a range
153*fe013be4SDimitry Andric   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
154fcaf7f86SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
155*fe013be4SDimitry Andric     if (ConstantInt *KnownSize =
156*fe013be4SDimitry Andric             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
157*fe013be4SDimitry Andric       uint64_t ZExt = KnownSize->getZExtValue();
158*fe013be4SDimitry Andric       if (ZExt <= UINT32_MAX) {
159*fe013be4SDimitry Andric         return ZExt;
160fcaf7f86SDimitry Andric       }
161fcaf7f86SDimitry Andric     }
162fcaf7f86SDimitry Andric   }
163fcaf7f86SDimitry Andric   return {};
164fcaf7f86SDimitry Andric }
165fcaf7f86SDimitry Andric 
166*fe013be4SDimitry Andric std::optional<uint32_t>
167*fe013be4SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
168*fe013be4SDimitry Andric   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
169*fe013be4SDimitry Andric     return {};
170*fe013be4SDimitry Andric 
171*fe013be4SDimitry Andric   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
172*fe013be4SDimitry Andric   if (!AbsSymRange)
173*fe013be4SDimitry Andric     return {};
174*fe013be4SDimitry Andric 
175*fe013be4SDimitry Andric   if (const APInt *V = AbsSymRange->getSingleElement()) {
176*fe013be4SDimitry Andric     std::optional<uint64_t> ZExt = V->tryZExtValue();
177*fe013be4SDimitry Andric     if (ZExt && (*ZExt <= UINT32_MAX)) {
178*fe013be4SDimitry Andric       return *ZExt;
179*fe013be4SDimitry Andric     }
180*fe013be4SDimitry Andric   }
181*fe013be4SDimitry Andric 
182*fe013be4SDimitry Andric   return {};
183*fe013be4SDimitry Andric }
184*fe013be4SDimitry Andric 
185*fe013be4SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
186e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
187*fe013be4SDimitry Andric   const Module *M = F.getParent();
188*fe013be4SDimitry Andric   const DataLayout &DL = M->getDataLayout();
189e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   Align Alignment =
192e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
193e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
194e8d8bef9SDimitry Andric     return;
195e8d8bef9SDimitry Andric 
196e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
197e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
198*fe013be4SDimitry Andric 
199*fe013be4SDimitry Andric   // If there is a dynamic LDS variable associated with this function F, every
200*fe013be4SDimitry Andric   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
201*fe013be4SDimitry Andric   // map to the same address. This holds because no LDS is allocated after the
202*fe013be4SDimitry Andric   // lowering pass if there are dynamic LDS variables present.
203*fe013be4SDimitry Andric   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
204*fe013be4SDimitry Andric   if (Dyn) {
205*fe013be4SDimitry Andric     unsigned Offset = LDSSize; // return this?
206*fe013be4SDimitry Andric     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
207*fe013be4SDimitry Andric     if (!Expect || (Offset != *Expect)) {
208*fe013be4SDimitry Andric       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
209*fe013be4SDimitry Andric     }
210*fe013be4SDimitry Andric   }
211e8d8bef9SDimitry Andric }
212