152ef4019SMatt Arsenault //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 252ef4019SMatt Arsenault // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 652ef4019SMatt Arsenault // 752ef4019SMatt Arsenault //===----------------------------------------------------------------------===// 852ef4019SMatt Arsenault 945bb48eaSTom Stellard #include "AMDGPUMachineFunction.h" 1070306542Sserge-sans-paille #include "AMDGPU.h" 111c538423SStanislav Mekhanoshin #include "AMDGPUPerfHintAnalysis.h" 126a87e9b0Sdfukalov #include "AMDGPUSubtarget.h" 131c538423SStanislav Mekhanoshin #include "llvm/CodeGen/MachineModuleInfo.h" 146a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h" 15e935f05aSMatt Arsenault 1645bb48eaSTom Stellard using namespace llvm; 1745bb48eaSTom Stellard 185733167fSSebastian Neubauer AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) 19f3a344d2SKazu Hirata : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC( 20f3a344d2SKazu Hirata MF.getFunction().getCallingConv())), 215733167fSSebastian Neubauer IsModuleEntryFunction( 225733167fSSebastian Neubauer AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), 2361813b80SMatt Arsenault NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { 244bec7d42SMatt Arsenault const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 254bec7d42SMatt Arsenault 2652ef4019SMatt Arsenault // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 2752ef4019SMatt Arsenault // except reserved size is not correctly aligned. 284bec7d42SMatt Arsenault const Function &F = MF.getFunction(); 291c538423SStanislav Mekhanoshin 30e7e23e3eSMatt Arsenault Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 31d6de1e1aSSerge Guelton MemoryBound = MemBoundAttr.getValueAsBool(); 32e7e23e3eSMatt Arsenault 33e7e23e3eSMatt Arsenault Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 34d6de1e1aSSerge Guelton WaveLimiter = WaveLimitAttr.getValueAsBool(); 354bec7d42SMatt Arsenault 36b5ec1312SMatt Arsenault // FIXME: How is this attribute supposed to interact with statically known 37b5ec1312SMatt Arsenault // global sizes? 38b5ec1312SMatt Arsenault StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 39b5ec1312SMatt Arsenault if (!S.empty()) 40b5ec1312SMatt Arsenault S.consumeInteger(0, GDSSize); 41b5ec1312SMatt Arsenault 42b5ec1312SMatt Arsenault // Assume the attribute allocates before any known GDS globals. 43b5ec1312SMatt Arsenault StaticGDSSize = GDSSize; 44b5ec1312SMatt Arsenault 454bec7d42SMatt Arsenault CallingConv::ID CC = F.getCallingConv(); 464bec7d42SMatt Arsenault if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 474bec7d42SMatt Arsenault ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 48beb24f5bSNikolay Haustov } 49beb24f5bSNikolay Haustov 5052ef4019SMatt Arsenault unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 51a2caa3b6SEli Friedman const GlobalVariable &GV) { 5252ef4019SMatt Arsenault auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); 5352ef4019SMatt Arsenault if (!Entry.second) 5452ef4019SMatt Arsenault return Entry.first->second; 5552ef4019SMatt Arsenault 5652911428SGuillaume Chatelet Align Alignment = 5752911428SGuillaume Chatelet DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 5852ef4019SMatt Arsenault 59b5ec1312SMatt Arsenault unsigned Offset; 60b5ec1312SMatt Arsenault if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 6152ef4019SMatt Arsenault /// TODO: We should sort these to minimize wasted space due to alignment 6252ef4019SMatt Arsenault /// padding. Currently the padding is decided by the first encountered use 6352ef4019SMatt Arsenault /// during lowering. 64b5ec1312SMatt Arsenault Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 6552ef4019SMatt Arsenault 665257a60eSMichael Liao StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 675257a60eSMichael Liao 685257a60eSMichael Liao // Update the LDS size considering the padding to align the dynamic shared 695257a60eSMichael Liao // memory. 705257a60eSMichael Liao LDSSize = alignTo(StaticLDSSize, DynLDSAlign); 71b5ec1312SMatt Arsenault } else { 721900b6c7SMatt Arsenault assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 731900b6c7SMatt Arsenault "expected region address space"); 741900b6c7SMatt Arsenault 75b5ec1312SMatt Arsenault Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 76b5ec1312SMatt Arsenault StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 7752ef4019SMatt Arsenault 78b5ec1312SMatt Arsenault // FIXME: Apply alignment of dynamic GDS 79b5ec1312SMatt Arsenault GDSSize = StaticGDSSize; 80b5ec1312SMatt Arsenault } 81b5ec1312SMatt Arsenault 82b5ec1312SMatt Arsenault Entry.first->second = Offset; 8352ef4019SMatt Arsenault return Offset; 84beb24f5bSNikolay Haustov } 855257a60eSMichael Liao 86*bc78c099SJon Chesterfield // This kernel calls no functions that require the module lds struct 87*bc78c099SJon Chesterfield static bool canElideModuleLDS(const Function &F) { 88*bc78c099SJon Chesterfield return F.hasFnAttribute("amdgpu-elide-module-lds"); 89*bc78c099SJon Chesterfield } 90*bc78c099SJon Chesterfield 91*bc78c099SJon Chesterfield void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { 92*bc78c099SJon Chesterfield const Module *M = F.getParent(); 9313e49dceSJon Chesterfield if (isModuleEntryFunction()) { 94748db5bfSStanislav Mekhanoshin const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); 95*bc78c099SJon Chesterfield if (GV && !canElideModuleLDS(F)) { 9613e49dceSJon Chesterfield unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); 9713e49dceSJon Chesterfield (void)Offset; 9813e49dceSJon Chesterfield assert(Offset == 0 && 9913e49dceSJon Chesterfield "Module LDS expected to be allocated before other LDS"); 10013e49dceSJon Chesterfield } 10113e49dceSJon Chesterfield } 10213e49dceSJon Chesterfield } 10313e49dceSJon Chesterfield 1045257a60eSMichael Liao void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 1055257a60eSMichael Liao const GlobalVariable &GV) { 1065257a60eSMichael Liao assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 1075257a60eSMichael Liao 1085257a60eSMichael Liao Align Alignment = 1095257a60eSMichael Liao DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 1105257a60eSMichael Liao if (Alignment <= DynLDSAlign) 1115257a60eSMichael Liao return; 1125257a60eSMichael Liao 1135257a60eSMichael Liao LDSSize = alignTo(StaticLDSSize, Alignment); 1145257a60eSMichael Liao DynLDSAlign = Alignment; 1155257a60eSMichael Liao } 116