10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 13*fe013be4SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 15*fe013be4SDimitry Andric #include "llvm/IR/ConstantRange.h" 16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h" 17*fe013be4SDimitry Andric #include "llvm/IR/Metadata.h" 18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 22bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 23bdd1243dSDimitry Andric const AMDGPUSubtarget &ST) 24bdd1243dSDimitry Andric : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 25e8d8bef9SDimitry Andric IsModuleEntryFunction( 26bdd1243dSDimitry Andric AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 27bdd1243dSDimitry Andric NoSignedZerosFPMath(false) { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 300b57cec5SDimitry Andric // except reserved size is not correctly aligned. 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 33fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 36fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 370b57cec5SDimitry Andric 3881ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 3981ad6265SDimitry Andric // global sizes? 4081ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 4181ad6265SDimitry Andric if (!S.empty()) 4281ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 4381ad6265SDimitry Andric 4481ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 4581ad6265SDimitry Andric StaticGDSSize = GDSSize; 4681ad6265SDimitry Andric 47*fe013be4SDimitry Andric // Second value, if present, is the maximum value that can be assigned. 48*fe013be4SDimitry Andric // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 49*fe013be4SDimitry Andric // during codegen. 50*fe013be4SDimitry Andric std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 51*fe013be4SDimitry Andric F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 52*fe013be4SDimitry Andric 53*fe013be4SDimitry Andric // The two separate variables are only profitable when the LDS module lowering 54*fe013be4SDimitry Andric // pass is disabled. If graphics does not use dynamic LDS, this is never 55*fe013be4SDimitry Andric // profitable. Leaving cleanup for a later change. 56*fe013be4SDimitry Andric LDSSize = LDSSizeRange.first; 57*fe013be4SDimitry Andric StaticLDSSize = LDSSize; 58*fe013be4SDimitry Andric 590b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 600b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 610b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 62bdd1243dSDimitry Andric 63bdd1243dSDimitry Andric // FIXME: Shouldn't be target specific 64bdd1243dSDimitry Andric Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 65bdd1243dSDimitry Andric NoSignedZerosFPMath = 66bdd1243dSDimitry Andric NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 70bdd1243dSDimitry Andric const GlobalVariable &GV, 71bdd1243dSDimitry Andric Align Trailing) { 72bdd1243dSDimitry Andric auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 730b57cec5SDimitry Andric if (!Entry.second) 740b57cec5SDimitry Andric return Entry.first->second; 750b57cec5SDimitry Andric 765ffd83dbSDimitry Andric Align Alignment = 775ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 780b57cec5SDimitry Andric 7981ad6265SDimitry Andric unsigned Offset; 8081ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 81*fe013be4SDimitry Andric 82*fe013be4SDimitry Andric std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 83*fe013be4SDimitry Andric if (MaybeAbs) { 84*fe013be4SDimitry Andric // Absolute address LDS variables that exist prior to the LDS lowering 85*fe013be4SDimitry Andric // pass raise a fatal error in that pass. These failure modes are only 86*fe013be4SDimitry Andric // reachable if that lowering pass is disabled or broken. If/when adding 87*fe013be4SDimitry Andric // support for absolute addresses on user specified variables, the 88*fe013be4SDimitry Andric // alignment check moves to the lowering pass and the frame calculation 89*fe013be4SDimitry Andric // needs to take the user variables into consideration. 90*fe013be4SDimitry Andric 91*fe013be4SDimitry Andric uint32_t ObjectStart = *MaybeAbs; 92*fe013be4SDimitry Andric 93*fe013be4SDimitry Andric if (ObjectStart != alignTo(ObjectStart, Alignment)) { 94*fe013be4SDimitry Andric report_fatal_error("Absolute address LDS variable inconsistent with " 95*fe013be4SDimitry Andric "variable alignment"); 96*fe013be4SDimitry Andric } 97*fe013be4SDimitry Andric 98*fe013be4SDimitry Andric if (isModuleEntryFunction()) { 99*fe013be4SDimitry Andric // If this is a module entry function, we can also sanity check against 100*fe013be4SDimitry Andric // the static frame. Strictly it would be better to check against the 101*fe013be4SDimitry Andric // attribute, i.e. that the variable is within the always-allocated 102*fe013be4SDimitry Andric // section, and not within some other non-absolute-address object 103*fe013be4SDimitry Andric // allocated here, but the extra error detection is minimal and we would 104*fe013be4SDimitry Andric // have to pass the Function around or cache the attribute value. 105*fe013be4SDimitry Andric uint32_t ObjectEnd = 106*fe013be4SDimitry Andric ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 107*fe013be4SDimitry Andric if (ObjectEnd > StaticLDSSize) { 108*fe013be4SDimitry Andric report_fatal_error( 109*fe013be4SDimitry Andric "Absolute address LDS variable outside of static frame"); 110*fe013be4SDimitry Andric } 111*fe013be4SDimitry Andric } 112*fe013be4SDimitry Andric 113*fe013be4SDimitry Andric Entry.first->second = ObjectStart; 114*fe013be4SDimitry Andric return ObjectStart; 115*fe013be4SDimitry Andric } 116*fe013be4SDimitry Andric 1170b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 1180b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 1190b57cec5SDimitry Andric /// during lowering. 12081ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 1210b57cec5SDimitry Andric 122e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 123e8d8bef9SDimitry Andric 124bdd1243dSDimitry Andric // Align LDS size to trailing, e.g. for aligning dynamic shared memory 125bdd1243dSDimitry Andric LDSSize = alignTo(StaticLDSSize, Trailing); 12681ad6265SDimitry Andric } else { 12781ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 12881ad6265SDimitry Andric "expected region address space"); 1290b57cec5SDimitry Andric 13081ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 13181ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 13281ad6265SDimitry Andric 13381ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 13481ad6265SDimitry Andric GDSSize = StaticGDSSize; 13581ad6265SDimitry Andric } 13681ad6265SDimitry Andric 13781ad6265SDimitry Andric Entry.first->second = Offset; 1380b57cec5SDimitry Andric return Offset; 1390b57cec5SDimitry Andric } 140e8d8bef9SDimitry Andric 141*fe013be4SDimitry Andric static const GlobalVariable * 142*fe013be4SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) { 143bdd1243dSDimitry Andric const Module *M = F.getParent(); 144*fe013be4SDimitry Andric std::string KernelDynLDSName = "llvm.amdgcn."; 145*fe013be4SDimitry Andric KernelDynLDSName += F.getName(); 146*fe013be4SDimitry Andric KernelDynLDSName += ".dynlds"; 147*fe013be4SDimitry Andric return M->getNamedGlobal(KernelDynLDSName); 148fe6060f1SDimitry Andric } 149fe6060f1SDimitry Andric 150bdd1243dSDimitry Andric std::optional<uint32_t> 151fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 152*fe013be4SDimitry Andric // TODO: Would be more consistent with the abs symbols to use a range 153*fe013be4SDimitry Andric MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 154fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) { 155*fe013be4SDimitry Andric if (ConstantInt *KnownSize = 156*fe013be4SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(0))) { 157*fe013be4SDimitry Andric uint64_t ZExt = KnownSize->getZExtValue(); 158*fe013be4SDimitry Andric if (ZExt <= UINT32_MAX) { 159*fe013be4SDimitry Andric return ZExt; 160fcaf7f86SDimitry Andric } 161fcaf7f86SDimitry Andric } 162fcaf7f86SDimitry Andric } 163fcaf7f86SDimitry Andric return {}; 164fcaf7f86SDimitry Andric } 165fcaf7f86SDimitry Andric 166*fe013be4SDimitry Andric std::optional<uint32_t> 167*fe013be4SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 168*fe013be4SDimitry Andric if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 169*fe013be4SDimitry Andric return {}; 170*fe013be4SDimitry Andric 171*fe013be4SDimitry Andric std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 172*fe013be4SDimitry Andric if (!AbsSymRange) 173*fe013be4SDimitry Andric return {}; 174*fe013be4SDimitry Andric 175*fe013be4SDimitry Andric if (const APInt *V = AbsSymRange->getSingleElement()) { 176*fe013be4SDimitry Andric std::optional<uint64_t> ZExt = V->tryZExtValue(); 177*fe013be4SDimitry Andric if (ZExt && (*ZExt <= UINT32_MAX)) { 178*fe013be4SDimitry Andric return *ZExt; 179*fe013be4SDimitry Andric } 180*fe013be4SDimitry Andric } 181*fe013be4SDimitry Andric 182*fe013be4SDimitry Andric return {}; 183*fe013be4SDimitry Andric } 184*fe013be4SDimitry Andric 185*fe013be4SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 186e8d8bef9SDimitry Andric const GlobalVariable &GV) { 187*fe013be4SDimitry Andric const Module *M = F.getParent(); 188*fe013be4SDimitry Andric const DataLayout &DL = M->getDataLayout(); 189e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric Align Alignment = 192e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 193e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 194e8d8bef9SDimitry Andric return; 195e8d8bef9SDimitry Andric 196e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 197e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 198*fe013be4SDimitry Andric 199*fe013be4SDimitry Andric // If there is a dynamic LDS variable associated with this function F, every 200*fe013be4SDimitry Andric // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 201*fe013be4SDimitry Andric // map to the same address. This holds because no LDS is allocated after the 202*fe013be4SDimitry Andric // lowering pass if there are dynamic LDS variables present. 203*fe013be4SDimitry Andric const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 204*fe013be4SDimitry Andric if (Dyn) { 205*fe013be4SDimitry Andric unsigned Offset = LDSSize; // return this? 206*fe013be4SDimitry Andric std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 207*fe013be4SDimitry Andric if (!Expect || (Offset != *Expect)) { 208*fe013be4SDimitry Andric report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 209*fe013be4SDimitry Andric } 210*fe013be4SDimitry Andric } 211e8d8bef9SDimitry Andric } 212