152ef4019SMatt Arsenault //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
252ef4019SMatt Arsenault //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
652ef4019SMatt Arsenault //
752ef4019SMatt Arsenault //===----------------------------------------------------------------------===//
852ef4019SMatt Arsenault 
945bb48eaSTom Stellard #include "AMDGPUMachineFunction.h"
1070306542Sserge-sans-paille #include "AMDGPU.h"
111c538423SStanislav Mekhanoshin #include "AMDGPUPerfHintAnalysis.h"
126a87e9b0Sdfukalov #include "AMDGPUSubtarget.h"
131c538423SStanislav Mekhanoshin #include "llvm/CodeGen/MachineModuleInfo.h"
14*3a205977SJon Chesterfield #include "llvm/IR/Constants.h"
156a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h"
16e935f05aSMatt Arsenault 
1745bb48eaSTom Stellard using namespace llvm;
1845bb48eaSTom Stellard 
AMDGPUMachineFunction(const MachineFunction & MF)195733167fSSebastian Neubauer AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
20f3a344d2SKazu Hirata     : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC(
21f3a344d2SKazu Hirata                                   MF.getFunction().getCallingConv())),
225733167fSSebastian Neubauer       IsModuleEntryFunction(
235733167fSSebastian Neubauer           AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
2461813b80SMatt Arsenault       NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
254bec7d42SMatt Arsenault   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
264bec7d42SMatt Arsenault 
2752ef4019SMatt Arsenault   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
2852ef4019SMatt Arsenault   // except reserved size is not correctly aligned.
294bec7d42SMatt Arsenault   const Function &F = MF.getFunction();
301c538423SStanislav Mekhanoshin 
31e7e23e3eSMatt Arsenault   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32d6de1e1aSSerge Guelton   MemoryBound = MemBoundAttr.getValueAsBool();
33e7e23e3eSMatt Arsenault 
34e7e23e3eSMatt Arsenault   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35d6de1e1aSSerge Guelton   WaveLimiter = WaveLimitAttr.getValueAsBool();
364bec7d42SMatt Arsenault 
37b5ec1312SMatt Arsenault   // FIXME: How is this attribute supposed to interact with statically known
38b5ec1312SMatt Arsenault   // global sizes?
39b5ec1312SMatt Arsenault   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40b5ec1312SMatt Arsenault   if (!S.empty())
41b5ec1312SMatt Arsenault     S.consumeInteger(0, GDSSize);
42b5ec1312SMatt Arsenault 
43b5ec1312SMatt Arsenault   // Assume the attribute allocates before any known GDS globals.
44b5ec1312SMatt Arsenault   StaticGDSSize = GDSSize;
45b5ec1312SMatt Arsenault 
464bec7d42SMatt Arsenault   CallingConv::ID CC = F.getCallingConv();
474bec7d42SMatt Arsenault   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
484bec7d42SMatt Arsenault     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49beb24f5bSNikolay Haustov }
50beb24f5bSNikolay Haustov 
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV)5152ef4019SMatt Arsenault unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
52a2caa3b6SEli Friedman                                                   const GlobalVariable &GV) {
5352ef4019SMatt Arsenault   auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
5452ef4019SMatt Arsenault   if (!Entry.second)
5552ef4019SMatt Arsenault     return Entry.first->second;
5652ef4019SMatt Arsenault 
5752911428SGuillaume Chatelet   Align Alignment =
5852911428SGuillaume Chatelet       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
5952ef4019SMatt Arsenault 
60b5ec1312SMatt Arsenault   unsigned Offset;
61b5ec1312SMatt Arsenault   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
6252ef4019SMatt Arsenault     /// TODO: We should sort these to minimize wasted space due to alignment
6352ef4019SMatt Arsenault     /// padding. Currently the padding is decided by the first encountered use
6452ef4019SMatt Arsenault     /// during lowering.
65b5ec1312SMatt Arsenault     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
6652ef4019SMatt Arsenault 
675257a60eSMichael Liao     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
685257a60eSMichael Liao 
695257a60eSMichael Liao     // Update the LDS size considering the padding to align the dynamic shared
705257a60eSMichael Liao     // memory.
715257a60eSMichael Liao     LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
72b5ec1312SMatt Arsenault   } else {
731900b6c7SMatt Arsenault     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
741900b6c7SMatt Arsenault            "expected region address space");
751900b6c7SMatt Arsenault 
76b5ec1312SMatt Arsenault     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
77b5ec1312SMatt Arsenault     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
7852ef4019SMatt Arsenault 
79b5ec1312SMatt Arsenault     // FIXME: Apply alignment of dynamic GDS
80b5ec1312SMatt Arsenault     GDSSize = StaticGDSSize;
81b5ec1312SMatt Arsenault   }
82b5ec1312SMatt Arsenault 
83b5ec1312SMatt Arsenault   Entry.first->second = Offset;
8452ef4019SMatt Arsenault   return Offset;
85beb24f5bSNikolay Haustov }
865257a60eSMichael Liao 
87bc78c099SJon Chesterfield // This kernel calls no functions that require the module lds struct
canElideModuleLDS(const Function & F)88bc78c099SJon Chesterfield static bool canElideModuleLDS(const Function &F) {
89bc78c099SJon Chesterfield   return F.hasFnAttribute("amdgpu-elide-module-lds");
90bc78c099SJon Chesterfield }
91bc78c099SJon Chesterfield 
allocateModuleLDSGlobal(const Function & F)92bc78c099SJon Chesterfield void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
93bc78c099SJon Chesterfield   const Module *M = F.getParent();
9413e49dceSJon Chesterfield   if (isModuleEntryFunction()) {
95748db5bfSStanislav Mekhanoshin     const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
96bc78c099SJon Chesterfield     if (GV && !canElideModuleLDS(F)) {
9713e49dceSJon Chesterfield       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
9813e49dceSJon Chesterfield       (void)Offset;
9913e49dceSJon Chesterfield       assert(Offset == 0 &&
10013e49dceSJon Chesterfield              "Module LDS expected to be allocated before other LDS");
10113e49dceSJon Chesterfield     }
10213e49dceSJon Chesterfield   }
10313e49dceSJon Chesterfield }
10413e49dceSJon Chesterfield 
105*3a205977SJon Chesterfield Optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)106*3a205977SJon Chesterfield AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
107*3a205977SJon Chesterfield   auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
108*3a205977SJon Chesterfield   if (MD && MD->getNumOperands() == 1) {
109*3a205977SJon Chesterfield     ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
110*3a205977SJon Chesterfield     if (KnownSize) {
111*3a205977SJon Chesterfield       uint64_t V = KnownSize->getZExtValue();
112*3a205977SJon Chesterfield       if (V <= UINT32_MAX) {
113*3a205977SJon Chesterfield         return V;
114*3a205977SJon Chesterfield       }
115*3a205977SJon Chesterfield     }
116*3a205977SJon Chesterfield   }
117*3a205977SJon Chesterfield   return {};
118*3a205977SJon Chesterfield }
119*3a205977SJon Chesterfield 
setDynLDSAlign(const DataLayout & DL,const GlobalVariable & GV)1205257a60eSMichael Liao void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
1215257a60eSMichael Liao                                            const GlobalVariable &GV) {
1225257a60eSMichael Liao   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
1235257a60eSMichael Liao 
1245257a60eSMichael Liao   Align Alignment =
1255257a60eSMichael Liao       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
1265257a60eSMichael Liao   if (Alignment <= DynLDSAlign)
1275257a60eSMichael Liao     return;
1285257a60eSMichael Liao 
1295257a60eSMichael Liao   LDSSize = alignTo(StaticLDSSize, Alignment);
1305257a60eSMichael Liao   DynLDSAlign = Alignment;
1315257a60eSMichael Liao }
132