1d68904f9SJames Henderson //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
1045bb48eaSTom Stellard ///
1145bb48eaSTom Stellard /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
1245bb48eaSTom Stellard /// code.  When passed an MCAsmStreamer it prints assembly and when passed
1345bb48eaSTom Stellard /// an MCObjectStreamer it outputs binary code.
1445bb48eaSTom Stellard //
1545bb48eaSTom Stellard //===----------------------------------------------------------------------===//
1645bb48eaSTom Stellard //
1745bb48eaSTom Stellard 
1845bb48eaSTom Stellard #include "AMDGPUAsmPrinter.h"
1945bb48eaSTom Stellard #include "AMDGPU.h"
206a87e9b0Sdfukalov #include "AMDGPUHSAMetadataStreamer.h"
212b08f6afSSebastian Neubauer #include "AMDGPUResourceUsageAnalysis.h"
226a87e9b0Sdfukalov #include "AMDKernelCodeT.h"
23560d7e04Sdfukalov #include "GCNSubtarget.h"
24c0bd7bd4SRichard Trieu #include "MCTargetDesc/AMDGPUInstPrinter.h"
256bda14b3SChandler Carruth #include "MCTargetDesc/AMDGPUTargetStreamer.h"
26c5015010STom Stellard #include "R600AsmPrinter.h"
276bda14b3SChandler Carruth #include "SIMachineFunctionInfo.h"
288ce2ee9dSRichard Trieu #include "TargetInfo/AMDGPUTargetInfo.h"
296bda14b3SChandler Carruth #include "Utils/AMDGPUBaseInfo.h"
3067357739SVang Thao #include "llvm/Analysis/OptimizationRemarkEmitter.h"
31ef736a1cSserge-sans-paille #include "llvm/BinaryFormat/ELF.h"
32989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFrameInfo.h"
3367357739SVang Thao #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
34ff98241fSMatt Arsenault #include "llvm/IR/DiagnosticInfo.h"
3533cb8f5bSTim Renouf #include "llvm/MC/MCAssembler.h"
3645bb48eaSTom Stellard #include "llvm/MC/MCContext.h"
3745bb48eaSTom Stellard #include "llvm/MC/MCSectionELF.h"
3845bb48eaSTom Stellard #include "llvm/MC/MCStreamer.h"
3989b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
406a87e9b0Sdfukalov #include "llvm/Support/AMDHSAKernelDescriptor.h"
41ef736a1cSserge-sans-paille #include "llvm/Support/TargetParser.h"
426054e650SDavid Blaikie #include "llvm/Target/TargetLoweringObjectFile.h"
436a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h"
4445bb48eaSTom Stellard 
4545bb48eaSTom Stellard using namespace llvm;
46c3beb6a0SKonstantin Zhuravlyov using namespace llvm::AMDGPU;
4745bb48eaSTom Stellard 
481024b73eSMatt Arsenault // This should get the default rounding mode from the kernel. We just set the
491024b73eSMatt Arsenault // default here, but this could change if the OpenCL rounding mode pragmas are
501024b73eSMatt Arsenault // used.
5145bb48eaSTom Stellard //
5245bb48eaSTom Stellard // The denormal mode here should match what is reported by the OpenCL runtime
5345bb48eaSTom Stellard // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
5445bb48eaSTom Stellard // can also be override to flush with the -cl-denorms-are-zero compiler flag.
5545bb48eaSTom Stellard //
5645bb48eaSTom Stellard // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
5745bb48eaSTom Stellard // precision, and leaves single precision to flush all and does not report
5845bb48eaSTom Stellard // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
5945bb48eaSTom Stellard // CL_FP_DENORM for both.
6045bb48eaSTom Stellard //
6145bb48eaSTom Stellard // FIXME: It seems some instructions do not support single precision denormals
6245bb48eaSTom Stellard // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
6345bb48eaSTom Stellard // and sin_f32, cos_f32 on most parts).
6445bb48eaSTom Stellard 
6545bb48eaSTom Stellard // We want to use these instructions, and using fp32 denormals also causes
6645bb48eaSTom Stellard // instructions to run at the double precision rate for the device so it's
6745bb48eaSTom Stellard // probably best to just report no single precision denormals.
getFPMode(AMDGPU::SIModeRegisterDefaults Mode)68db0ed3e4SMatt Arsenault static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
6945bb48eaSTom Stellard   return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
7045bb48eaSTom Stellard          FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
711024b73eSMatt Arsenault          FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
721024b73eSMatt Arsenault          FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
7345bb48eaSTom Stellard }
7445bb48eaSTom Stellard 
7545bb48eaSTom Stellard static AsmPrinter *
createAMDGPUAsmPrinterPass(TargetMachine & tm,std::unique_ptr<MCStreamer> && Streamer)7645bb48eaSTom Stellard createAMDGPUAsmPrinterPass(TargetMachine &tm,
7745bb48eaSTom Stellard                            std::unique_ptr<MCStreamer> &&Streamer) {
7845bb48eaSTom Stellard   return new AMDGPUAsmPrinter(tm, std::move(Streamer));
7945bb48eaSTom Stellard }
8045bb48eaSTom Stellard 
LLVMInitializeAMDGPUAsmPrinter()810dbcb363STom Stellard extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
82f42454b9SMehdi Amini   TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
83c5015010STom Stellard                                      llvm::createR600AsmPrinterPass);
84f42454b9SMehdi Amini   TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
85f42454b9SMehdi Amini                                      createAMDGPUAsmPrinterPass);
8645bb48eaSTom Stellard }
8745bb48eaSTom Stellard 
AMDGPUAsmPrinter(TargetMachine & TM,std::unique_ptr<MCStreamer> Streamer)8845bb48eaSTom Stellard AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
8945bb48eaSTom Stellard                                    std::unique_ptr<MCStreamer> Streamer)
901a14bfa0SYaxun Liu     : AsmPrinter(TM, std::move(Streamer)) {
913fdf3b15SKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
923fdf3b15SKonstantin Zhuravlyov     if (isHsaAbiVersion2(getGlobalSTI())) {
936a87e9b0Sdfukalov       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
94f4ace637SKonstantin Zhuravlyov     } else if (isHsaAbiVersion3(getGlobalSTI())) {
956a87e9b0Sdfukalov       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
961194b9cdSChangpeng Fang     } else if (isHsaAbiVersion5(getGlobalSTI())) {
971194b9cdSChangpeng Fang       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
98f4ace637SKonstantin Zhuravlyov     } else {
99f4ace637SKonstantin Zhuravlyov       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
1003fdf3b15SKonstantin Zhuravlyov     }
1013fdf3b15SKonstantin Zhuravlyov   }
1021a14bfa0SYaxun Liu }
10345bb48eaSTom Stellard 
getPassName() const104117296c0SMehdi Amini StringRef AMDGPUAsmPrinter::getPassName() const {
105f9245b75SMatt Arsenault   return "AMDGPU Assembly Printer";
106f9245b75SMatt Arsenault }
107f9245b75SMatt Arsenault 
getGlobalSTI() const1084cd9509eSMatt Arsenault const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {
1097498cd61SKonstantin Zhuravlyov   return TM.getMCSubtargetInfo();
1107498cd61SKonstantin Zhuravlyov }
1117498cd61SKonstantin Zhuravlyov 
getTargetStreamer() const1128c18f5b3SKonstantin Zhuravlyov AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
1138c18f5b3SKonstantin Zhuravlyov   if (!OutStreamer)
1148c18f5b3SKonstantin Zhuravlyov     return nullptr;
1158c18f5b3SKonstantin Zhuravlyov   return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
1167498cd61SKonstantin Zhuravlyov }
1177498cd61SKonstantin Zhuravlyov 
emitStartOfAsmFile(Module & M)1180dce409cSFangrui Song void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
1194a025622SSebastian Neubauer   IsTargetStreamerInitialized = false;
1204a025622SSebastian Neubauer }
1214a025622SSebastian Neubauer 
initTargetStreamer(Module & M)1224a025622SSebastian Neubauer void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
1234a025622SSebastian Neubauer   IsTargetStreamerInitialized = true;
1244a025622SSebastian Neubauer 
125f4ace637SKonstantin Zhuravlyov   // TODO: Which one is called first, emitStartOfAsmFile or
126f4ace637SKonstantin Zhuravlyov   // emitFunctionBodyStart?
127f4ace637SKonstantin Zhuravlyov   if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
128f4ace637SKonstantin Zhuravlyov     initializeTargetID(M);
12900f2cb11SKonstantin Zhuravlyov 
130eda425edSKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
131eda425edSKonstantin Zhuravlyov       TM.getTargetTriple().getOS() != Triple::AMDPAL)
132eda425edSKonstantin Zhuravlyov     return;
133eda425edSKonstantin Zhuravlyov 
1341194b9cdSChangpeng Fang   if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
135f4ace637SKonstantin Zhuravlyov     getTargetStreamer()->EmitDirectiveAMDGCNTarget();
136f4ace637SKonstantin Zhuravlyov 
137eda425edSKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
138f4ace637SKonstantin Zhuravlyov     HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
139eda425edSKonstantin Zhuravlyov 
140eda425edSKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
141d737b551STim Renouf     getTargetStreamer()->getPALMetadata()->readFromIR(M);
142eda425edSKonstantin Zhuravlyov 
1431194b9cdSChangpeng Fang   if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
144f5b36e56SScott Linder     return;
145f5b36e56SScott Linder 
146f4ace637SKonstantin Zhuravlyov   // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
147eda425edSKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
1488c18f5b3SKonstantin Zhuravlyov     getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
149eda425edSKonstantin Zhuravlyov 
150f4ace637SKonstantin Zhuravlyov   // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
1514cd9509eSMatt Arsenault   IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
152f4ace637SKonstantin Zhuravlyov   getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
15371e43ee4SKonstantin Zhuravlyov       Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
1547498cd61SKonstantin Zhuravlyov }
1557498cd61SKonstantin Zhuravlyov 
emitEndOfAsmFile(Module & M)1560dce409cSFangrui Song void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
1574a025622SSebastian Neubauer   // Init target streamer if it has not yet happened
1584a025622SSebastian Neubauer   if (!IsTargetStreamerInitialized)
1594a025622SSebastian Neubauer     initTargetStreamer(M);
1604a025622SSebastian Neubauer 
1618c18f5b3SKonstantin Zhuravlyov   // Following code requires TargetStreamer to be present.
1628c18f5b3SKonstantin Zhuravlyov   if (!getTargetStreamer())
1638c18f5b3SKonstantin Zhuravlyov     return;
1648c18f5b3SKonstantin Zhuravlyov 
1653fdf3b15SKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
166f4ace637SKonstantin Zhuravlyov       isHsaAbiVersion2(getGlobalSTI()))
167f4ace637SKonstantin Zhuravlyov     getTargetStreamer()->EmitISAVersion();
1689c05b2bcSKonstantin Zhuravlyov 
1699c05b2bcSKonstantin Zhuravlyov   // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
170f4ace637SKonstantin Zhuravlyov   // Emit HSA Metadata (NT_AMD_HSA_METADATA).
1719c05b2bcSKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
172f5b36e56SScott Linder     HSAMetadataStream->end();
173f5b36e56SScott Linder     bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
174f5b36e56SScott Linder     (void)Success;
175f5b36e56SScott Linder     assert(Success && "Malformed HSA Metadata");
1769c05b2bcSKonstantin Zhuravlyov   }
177f5b36e56SScott Linder }
178f4218372STom Stellard 
isBlockOnlyReachableByFallthrough(const MachineBasicBlock * MBB) const1796bc43d86SMatt Arsenault bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
1806bc43d86SMatt Arsenault   const MachineBasicBlock *MBB) const {
1816bc43d86SMatt Arsenault   if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
1826bc43d86SMatt Arsenault     return false;
1836bc43d86SMatt Arsenault 
1846bc43d86SMatt Arsenault   if (MBB->empty())
1856bc43d86SMatt Arsenault     return true;
1866bc43d86SMatt Arsenault 
1876bc43d86SMatt Arsenault   // If this is a block implementing a long branch, an expression relative to
1886bc43d86SMatt Arsenault   // the start of the block is needed.  to the start of the block.
1896bc43d86SMatt Arsenault   // XXX - Is there a smarter way to check this?
1906bc43d86SMatt Arsenault   return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
1916bc43d86SMatt Arsenault }
1926bc43d86SMatt Arsenault 
emitFunctionBodyStart()1930dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionBodyStart() {
19400f2cb11SKonstantin Zhuravlyov   const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
195f4ace637SKonstantin Zhuravlyov   const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
196f4ace637SKonstantin Zhuravlyov   const Function &F = MF->getFunction();
197f4ace637SKonstantin Zhuravlyov 
198f4ace637SKonstantin Zhuravlyov   // TODO: Which one is called first, emitStartOfAsmFile or
199f4ace637SKonstantin Zhuravlyov   // emitFunctionBodyStart?
200f4ace637SKonstantin Zhuravlyov   if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
201f4ace637SKonstantin Zhuravlyov     initializeTargetID(*F.getParent());
202f4ace637SKonstantin Zhuravlyov 
203f4ace637SKonstantin Zhuravlyov   const auto &FunctionTargetID = STM.getTargetID();
204f4ace637SKonstantin Zhuravlyov   // Make sure function's xnack settings are compatible with module's
205f4ace637SKonstantin Zhuravlyov   // xnack settings.
206f4ace637SKonstantin Zhuravlyov   if (FunctionTargetID.isXnackSupported() &&
207f4ace637SKonstantin Zhuravlyov       FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
208f4ace637SKonstantin Zhuravlyov       FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
209f4ace637SKonstantin Zhuravlyov     OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
210f4ace637SKonstantin Zhuravlyov                            "' function does not match module xnack setting");
211f4ace637SKonstantin Zhuravlyov     return;
212f4ace637SKonstantin Zhuravlyov   }
213f4ace637SKonstantin Zhuravlyov   // Make sure function's sramecc settings are compatible with module's
214f4ace637SKonstantin Zhuravlyov   // sramecc settings.
215f4ace637SKonstantin Zhuravlyov   if (FunctionTargetID.isSramEccSupported() &&
216f4ace637SKonstantin Zhuravlyov       FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
217f4ace637SKonstantin Zhuravlyov       FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
218f4ace637SKonstantin Zhuravlyov     OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
219f4ace637SKonstantin Zhuravlyov                            "' function does not match module sramecc setting");
220f4ace637SKonstantin Zhuravlyov     return;
221f4ace637SKonstantin Zhuravlyov   }
222f4ace637SKonstantin Zhuravlyov 
22300f2cb11SKonstantin Zhuravlyov   if (!MFI.isEntryFunction())
22400f2cb11SKonstantin Zhuravlyov     return;
225021a218dSMatt Arsenault 
2263fdf3b15SKonstantin Zhuravlyov   if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
2274bec7d42SMatt Arsenault       (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
2284bec7d42SMatt Arsenault        F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
229ca0e7f64SKonstantin Zhuravlyov     amd_kernel_code_t KernelCode;
230b03dd8daSMatt Arsenault     getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
2318c18f5b3SKonstantin Zhuravlyov     getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
232f151a45cSTom Stellard   }
2337498cd61SKonstantin Zhuravlyov 
234f5b36e56SScott Linder   if (STM.isAmdHsaOS())
235f5b36e56SScott Linder     HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
236f151a45cSTom Stellard }
237f151a45cSTom Stellard 
emitFunctionBodyEnd()2380dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
23900f2cb11SKonstantin Zhuravlyov   const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
24000f2cb11SKonstantin Zhuravlyov   if (!MFI.isEntryFunction())
24100f2cb11SKonstantin Zhuravlyov     return;
2424cd9509eSMatt Arsenault 
2433fdf3b15SKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
2443fdf3b15SKonstantin Zhuravlyov       isHsaAbiVersion2(getGlobalSTI()))
24500f2cb11SKonstantin Zhuravlyov     return;
24600f2cb11SKonstantin Zhuravlyov 
247ce25bc3eSKonstantin Zhuravlyov   auto &Streamer = getTargetStreamer()->getStreamer();
248ce25bc3eSKonstantin Zhuravlyov   auto &Context = Streamer.getContext();
249ce25bc3eSKonstantin Zhuravlyov   auto &ObjectFileInfo = *Context.getObjectFileInfo();
250ce25bc3eSKonstantin Zhuravlyov   auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
251ce25bc3eSKonstantin Zhuravlyov 
25215d82c62SFangrui Song   Streamer.pushSection();
253adf4142fSFangrui Song   Streamer.switchSection(&ReadOnlySection);
254ce25bc3eSKonstantin Zhuravlyov 
255ce25bc3eSKonstantin Zhuravlyov   // CP microcode requires the kernel descriptor to be allocated on 64 byte
256ce25bc3eSKonstantin Zhuravlyov   // alignment.
2576d2d589bSFangrui Song   Streamer.emitValueToAlignment(64, 0, 1, 0);
258ce25bc3eSKonstantin Zhuravlyov   if (ReadOnlySection.getAlignment() < 64)
25918f805a7SGuillaume Chatelet     ReadOnlySection.setAlignment(Align(64));
260ce25bc3eSKonstantin Zhuravlyov 
261f4ace637SKonstantin Zhuravlyov   const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
2624cd9509eSMatt Arsenault 
26300f2cb11SKonstantin Zhuravlyov   SmallString<128> KernelName;
26400f2cb11SKonstantin Zhuravlyov   getNameWithPrefix(KernelName, &MF->getFunction());
26500f2cb11SKonstantin Zhuravlyov   getTargetStreamer()->EmitAmdhsaKernelDescriptor(
266f4ace637SKonstantin Zhuravlyov       STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
2671e8c2c70SScott Linder       CurrentProgramInfo.NumVGPRsForWavesPerEU,
2681e8c2c70SScott Linder       CurrentProgramInfo.NumSGPRsForWavesPerEU -
269f4ace637SKonstantin Zhuravlyov           IsaInfo::getNumExtraSGPRs(&STM,
2701e8c2c70SScott Linder                                     CurrentProgramInfo.VCCUsed,
2711e8c2c70SScott Linder                                     CurrentProgramInfo.FlatUsed),
272f4ace637SKonstantin Zhuravlyov       CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
273ce25bc3eSKonstantin Zhuravlyov 
27415d82c62SFangrui Song   Streamer.popSection();
27500f2cb11SKonstantin Zhuravlyov }
27600f2cb11SKonstantin Zhuravlyov 
emitFunctionEntryLabel()2770dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
2783fdf3b15SKonstantin Zhuravlyov   if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
2791194b9cdSChangpeng Fang       isHsaAbiVersion3AndAbove(getGlobalSTI())) {
2800dce409cSFangrui Song     AsmPrinter::emitFunctionEntryLabel();
28100f2cb11SKonstantin Zhuravlyov     return;
28200f2cb11SKonstantin Zhuravlyov   }
28300f2cb11SKonstantin Zhuravlyov 
2841e1b05dbSTom Stellard   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2855bfbae5cSTom Stellard   const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
286aa067cb9SKonstantin Zhuravlyov   if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
2871b9748c6STom Stellard     SmallString<128> SymbolName;
288f1caa283SMatthias Braun     getNameWithPrefix(SymbolName, &MF->getFunction()),
2898c18f5b3SKonstantin Zhuravlyov     getTargetStreamer()->EmitAMDGPUSymbolType(
2907498cd61SKonstantin Zhuravlyov         SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
2911e1b05dbSTom Stellard   }
29233cb8f5bSTim Renouf   if (DumpCodeInstEmitter) {
293cead41d4STim Renouf     // Disassemble function name label to text.
294f1caa283SMatthias Braun     DisasmLines.push_back(MF->getName().str() + ":");
295cead41d4STim Renouf     DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
296cead41d4STim Renouf     HexLines.push_back("");
297cead41d4STim Renouf   }
2981e1b05dbSTom Stellard 
2990dce409cSFangrui Song   AsmPrinter::emitFunctionEntryLabel();
3001e1b05dbSTom Stellard }
3011e1b05dbSTom Stellard 
emitBasicBlockStart(const MachineBasicBlock & MBB)3020dce409cSFangrui Song void AMDGPUAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
30333cb8f5bSTim Renouf   if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
304cead41d4STim Renouf     // Write a line for the basic block label if it is not only fallthrough.
305cead41d4STim Renouf     DisasmLines.push_back(
306cead41d4STim Renouf         (Twine("BB") + Twine(getFunctionNumber())
307cead41d4STim Renouf          + "_" + Twine(MBB.getNumber()) + ":").str());
308cead41d4STim Renouf     DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
309cead41d4STim Renouf     HexLines.push_back("");
310cead41d4STim Renouf   }
3110dce409cSFangrui Song   AsmPrinter::emitBasicBlockStart(MBB);
312cead41d4STim Renouf }
313cead41d4STim Renouf 
emitGlobalVariable(const GlobalVariable * GV)3141d49eb00SFangrui Song void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
3152710171aSNicolai Haehnle   if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
3162710171aSNicolai Haehnle     if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
3172710171aSNicolai Haehnle       OutContext.reportError({},
3182710171aSNicolai Haehnle                              Twine(GV->getName()) +
3192710171aSNicolai Haehnle                                  ": unsupported initializer for address space");
32000f2f91aSTom Stellard       return;
3212710171aSNicolai Haehnle     }
3222710171aSNicolai Haehnle 
3232710171aSNicolai Haehnle     // LDS variables aren't emitted in HSA or PAL yet.
3242710171aSNicolai Haehnle     const Triple::OSType OS = TM.getTargetTriple().getOS();
3252710171aSNicolai Haehnle     if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
3262710171aSNicolai Haehnle       return;
3272710171aSNicolai Haehnle 
3282710171aSNicolai Haehnle     MCSymbol *GVSym = getSymbol(GV);
3292710171aSNicolai Haehnle 
3302710171aSNicolai Haehnle     GVSym->redefineIfPossible();
3312710171aSNicolai Haehnle     if (GVSym->isDefined() || GVSym->isVariable())
3322710171aSNicolai Haehnle       report_fatal_error("symbol '" + Twine(GVSym->getName()) +
3332710171aSNicolai Haehnle                          "' is already defined");
3342710171aSNicolai Haehnle 
3352710171aSNicolai Haehnle     const DataLayout &DL = GV->getParent()->getDataLayout();
3362710171aSNicolai Haehnle     uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
337129b531cSKazu Hirata     Align Alignment = GV->getAlign().value_or(Align(4));
3382710171aSNicolai Haehnle 
3390bc77a0fSFangrui Song     emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
3400bc77a0fSFangrui Song     emitLinkage(GV, GVSym);
341f0a665afSMichael Liao     if (auto TS = getTargetStreamer())
34252911428SGuillaume Chatelet       TS->emitAMDGPULDS(GVSym, Size, Alignment);
3432710171aSNicolai Haehnle     return;
3442710171aSNicolai Haehnle   }
34500f2f91aSTom Stellard 
3461d49eb00SFangrui Song   AsmPrinter::emitGlobalVariable(GV);
347e3b5aeafSTom Stellard }
348e3b5aeafSTom Stellard 
doFinalization(Module & M)349b03dd8daSMatt Arsenault bool AMDGPUAsmPrinter::doFinalization(Module &M) {
350ae4fcb97SNicolai Haehnle   // Pad with s_code_end to help tools and guard against instruction prefetch
351ae4fcb97SNicolai Haehnle   // causing stale data in caches. Arguably this should be done by the linker,
352ae4fcb97SNicolai Haehnle   // which is why this isn't done for Mesa.
353ae4fcb97SNicolai Haehnle   const MCSubtargetInfo &STI = *getGlobalSTI();
354a8d9d507SStanislav Mekhanoshin   if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
355ae4fcb97SNicolai Haehnle       (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
356ae4fcb97SNicolai Haehnle        STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
357adf4142fSFangrui Song     OutStreamer->switchSection(getObjFileLowering().getTextSection());
358a8d9d507SStanislav Mekhanoshin     getTargetStreamer()->EmitCodeEnd(STI);
35941bbe101SStanislav Mekhanoshin   }
36041bbe101SStanislav Mekhanoshin 
361b03dd8daSMatt Arsenault   return AsmPrinter::doFinalization(M);
362b03dd8daSMatt Arsenault }
363b03dd8daSMatt Arsenault 
364b03dd8daSMatt Arsenault // Print comments that apply to both callable functions and entry points.
emitCommonFunctionComments(uint32_t NumVGPR,Optional<uint32_t> NumAGPR,uint32_t TotalNumVGPR,uint32_t NumSGPR,uint64_t ScratchSize,uint64_t CodeSize,const AMDGPUMachineFunction * MFI)365b03dd8daSMatt Arsenault void AMDGPUAsmPrinter::emitCommonFunctionComments(
366b03dd8daSMatt Arsenault   uint32_t NumVGPR,
367075bc48aSStanislav Mekhanoshin   Optional<uint32_t> NumAGPR,
368075bc48aSStanislav Mekhanoshin   uint32_t TotalNumVGPR,
369b03dd8daSMatt Arsenault   uint32_t NumSGPR,
3709ba465a9SMatt Arsenault   uint64_t ScratchSize,
3711c538423SStanislav Mekhanoshin   uint64_t CodeSize,
3721c538423SStanislav Mekhanoshin   const AMDGPUMachineFunction *MFI) {
373b03dd8daSMatt Arsenault   OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
374b03dd8daSMatt Arsenault   OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
375b03dd8daSMatt Arsenault   OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
376075bc48aSStanislav Mekhanoshin   if (NumAGPR) {
377075bc48aSStanislav Mekhanoshin     OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
378075bc48aSStanislav Mekhanoshin     OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
379075bc48aSStanislav Mekhanoshin                                 false);
380075bc48aSStanislav Mekhanoshin   }
381b03dd8daSMatt Arsenault   OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
3821c538423SStanislav Mekhanoshin   OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
3831c538423SStanislav Mekhanoshin                               false);
384b03dd8daSMatt Arsenault }
385b03dd8daSMatt Arsenault 
getAmdhsaKernelCodeProperties(const MachineFunction & MF) const38600f2cb11SKonstantin Zhuravlyov uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
38700f2cb11SKonstantin Zhuravlyov     const MachineFunction &MF) const {
38800f2cb11SKonstantin Zhuravlyov   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
38900f2cb11SKonstantin Zhuravlyov   uint16_t KernelCodeProperties = 0;
39000f2cb11SKonstantin Zhuravlyov 
39100f2cb11SKonstantin Zhuravlyov   if (MFI.hasPrivateSegmentBuffer()) {
39200f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
39300f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
39400f2cb11SKonstantin Zhuravlyov   }
39500f2cb11SKonstantin Zhuravlyov   if (MFI.hasDispatchPtr()) {
39600f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
39700f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
39800f2cb11SKonstantin Zhuravlyov   }
3990f20a35bSChangpeng Fang   if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
40000f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
40100f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
40200f2cb11SKonstantin Zhuravlyov   }
40300f2cb11SKonstantin Zhuravlyov   if (MFI.hasKernargSegmentPtr()) {
40400f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
40500f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
40600f2cb11SKonstantin Zhuravlyov   }
40700f2cb11SKonstantin Zhuravlyov   if (MFI.hasDispatchID()) {
40800f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
40900f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
41000f2cb11SKonstantin Zhuravlyov   }
41100f2cb11SKonstantin Zhuravlyov   if (MFI.hasFlatScratchInit()) {
41200f2cb11SKonstantin Zhuravlyov     KernelCodeProperties |=
41300f2cb11SKonstantin Zhuravlyov         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
41400f2cb11SKonstantin Zhuravlyov   }
4155d00c306SStanislav Mekhanoshin   if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
4165d00c306SStanislav Mekhanoshin     KernelCodeProperties |=
4175d00c306SStanislav Mekhanoshin         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
4185d00c306SStanislav Mekhanoshin   }
41900f2cb11SKonstantin Zhuravlyov 
420*d96361d7SAbinav Puthan Purayil   if (CurrentProgramInfo.DynamicCallStack) {
421*d96361d7SAbinav Puthan Purayil     KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
422*d96361d7SAbinav Puthan Purayil   }
423*d96361d7SAbinav Puthan Purayil 
42400f2cb11SKonstantin Zhuravlyov   return KernelCodeProperties;
42500f2cb11SKonstantin Zhuravlyov }
42600f2cb11SKonstantin Zhuravlyov 
getAmdhsaKernelDescriptor(const MachineFunction & MF,const SIProgramInfo & PI) const42700f2cb11SKonstantin Zhuravlyov amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
42800f2cb11SKonstantin Zhuravlyov     const MachineFunction &MF,
42900f2cb11SKonstantin Zhuravlyov     const SIProgramInfo &PI) const {
430a8d9d507SStanislav Mekhanoshin   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
431f4ace637SKonstantin Zhuravlyov   const Function &F = MF.getFunction();
432f4ace637SKonstantin Zhuravlyov 
43300f2cb11SKonstantin Zhuravlyov   amdhsa::kernel_descriptor_t KernelDescriptor;
43400f2cb11SKonstantin Zhuravlyov   memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
43500f2cb11SKonstantin Zhuravlyov 
43600f2cb11SKonstantin Zhuravlyov   assert(isUInt<32>(PI.ScratchSize));
4371124bf4aSSebastian Neubauer   assert(isUInt<32>(PI.getComputePGMRSrc1()));
43800f2cb11SKonstantin Zhuravlyov   assert(isUInt<32>(PI.ComputePGMRSrc2));
43900f2cb11SKonstantin Zhuravlyov 
44000f2cb11SKonstantin Zhuravlyov   KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
44100f2cb11SKonstantin Zhuravlyov   KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
442f4ace637SKonstantin Zhuravlyov 
443f4ace637SKonstantin Zhuravlyov   Align MaxKernArgAlign;
444f4ace637SKonstantin Zhuravlyov   KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
445f4ace637SKonstantin Zhuravlyov 
4461124bf4aSSebastian Neubauer   KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
44700f2cb11SKonstantin Zhuravlyov   KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
44800f2cb11SKonstantin Zhuravlyov   KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
44900f2cb11SKonstantin Zhuravlyov 
450a8d9d507SStanislav Mekhanoshin   assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
451a8d9d507SStanislav Mekhanoshin   if (STM.hasGFX90AInsts())
452a8d9d507SStanislav Mekhanoshin     KernelDescriptor.compute_pgm_rsrc3 =
453a8d9d507SStanislav Mekhanoshin       CurrentProgramInfo.ComputePGMRSrc3GFX90A;
454a8d9d507SStanislav Mekhanoshin 
45500f2cb11SKonstantin Zhuravlyov   return KernelDescriptor;
45600f2cb11SKonstantin Zhuravlyov }
45700f2cb11SKonstantin Zhuravlyov 
runOnMachineFunction(MachineFunction & MF)45845bb48eaSTom Stellard bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
4594a025622SSebastian Neubauer   // Init target streamer lazily on the first function so that previous passes
4604a025622SSebastian Neubauer   // can set metadata.
4614a025622SSebastian Neubauer   if (!IsTargetStreamerInitialized)
4624a025622SSebastian Neubauer     initTargetStreamer(*MF.getFunction().getParent());
4634a025622SSebastian Neubauer 
4642b08f6afSSebastian Neubauer   ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
465b03dd8daSMatt Arsenault   CurrentProgramInfo = SIProgramInfo();
466b03dd8daSMatt Arsenault 
4676cb7b8a4SMatt Arsenault   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
46845bb48eaSTom Stellard 
46945bb48eaSTom Stellard   // The starting address of all shader programs must be 256 bytes aligned.
4706cb7b8a4SMatt Arsenault   // Regular functions just need the basic required instruction alignment.
47118f805a7SGuillaume Chatelet   MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
47245bb48eaSTom Stellard 
47345bb48eaSTom Stellard   SetupMachineFunction(MF);
47445bb48eaSTom Stellard 
4755bfbae5cSTom Stellard   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
47645bb48eaSTom Stellard   MCContext &Context = getObjFileLowering().getContext();
477807ecc3dSTim Renouf   // FIXME: This should be an explicit check for Mesa.
478807ecc3dSTim Renouf   if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
47945bb48eaSTom Stellard     MCSectionELF *ConfigSection =
48045bb48eaSTom Stellard         Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
481adf4142fSFangrui Song     OutStreamer->switchSection(ConfigSection);
48267a6d540SKonstantin Zhuravlyov   }
48345bb48eaSTom Stellard 
4845733167fSSebastian Neubauer   if (MFI->isModuleEntryFunction()) {
485b03dd8daSMatt Arsenault     getSIProgramInfo(CurrentProgramInfo, MF);
486b03dd8daSMatt Arsenault   }
487b03dd8daSMatt Arsenault 
488edd67564SSebastian Neubauer   if (STM.isAmdPalOS()) {
489edd67564SSebastian Neubauer     if (MFI->isEntryFunction())
490c3beb6a0SKonstantin Zhuravlyov       EmitPALMetadata(MF, CurrentProgramInfo);
4915733167fSSebastian Neubauer     else if (MFI->isModuleEntryFunction())
492edd67564SSebastian Neubauer       emitPALFunctionMetadata(MF);
493edd67564SSebastian Neubauer   } else if (!STM.isAmdHsaOS()) {
494b03dd8daSMatt Arsenault     EmitProgramInfoSI(MF, CurrentProgramInfo);
495f151a45cSTom Stellard   }
49645bb48eaSTom Stellard 
4978479240bSJay Foad   DumpCodeInstEmitter = nullptr;
4988479240bSJay Foad   if (STM.dumpCode()) {
4998479240bSJay Foad     // For -dumpcode, get the assembler out of the streamer, even if it does
5008479240bSJay Foad     // not really want to let us have it. This only works with -filetype=obj.
5018479240bSJay Foad     bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
5028479240bSJay Foad     OutStreamer->setUseAssemblerInfoForParsing(true);
5038479240bSJay Foad     MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
5048479240bSJay Foad     OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
5058479240bSJay Foad     if (Assembler)
5068479240bSJay Foad       DumpCodeInstEmitter = Assembler->getEmitterPtr();
5078479240bSJay Foad   }
5088479240bSJay Foad 
50945bb48eaSTom Stellard   DisasmLines.clear();
51045bb48eaSTom Stellard   HexLines.clear();
51145bb48eaSTom Stellard   DisasmLineMaxLen = 0;
51245bb48eaSTom Stellard 
5130dce409cSFangrui Song   emitFunctionBody();
51445bb48eaSTom Stellard 
51567357739SVang Thao   emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
51667357739SVang Thao                            STM.hasMAIInsts());
51767357739SVang Thao 
51845bb48eaSTom Stellard   if (isVerbose()) {
51945bb48eaSTom Stellard     MCSectionELF *CommentSection =
52045bb48eaSTom Stellard         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
521adf4142fSFangrui Song     OutStreamer->switchSection(CommentSection);
52245bb48eaSTom Stellard 
523b03dd8daSMatt Arsenault     if (!MFI->isEntryFunction()) {
524021a218dSMatt Arsenault       OutStreamer->emitRawComment(" Function info:", false);
5252b08f6afSSebastian Neubauer       const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
5262b08f6afSSebastian Neubauer           ResourceUsage->getResourceInfo(&MF.getFunction());
527b03dd8daSMatt Arsenault       emitCommonFunctionComments(
528b03dd8daSMatt Arsenault         Info.NumVGPR,
529075bc48aSStanislav Mekhanoshin         STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
530075bc48aSStanislav Mekhanoshin         Info.getTotalNumVGPRs(STM),
5315bfbae5cSTom Stellard         Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
532b03dd8daSMatt Arsenault         Info.PrivateSegmentSize,
5331c538423SStanislav Mekhanoshin         getFunctionCodeSize(MF), MFI);
534b03dd8daSMatt Arsenault       return false;
535021a218dSMatt Arsenault     }
536021a218dSMatt Arsenault 
537b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(" Kernel info:", false);
538075bc48aSStanislav Mekhanoshin     emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
539075bc48aSStanislav Mekhanoshin                                STM.hasMAIInsts()
540075bc48aSStanislav Mekhanoshin                                  ? CurrentProgramInfo.NumAccVGPR
541075bc48aSStanislav Mekhanoshin                                  : Optional<uint32_t>(),
542075bc48aSStanislav Mekhanoshin                                CurrentProgramInfo.NumVGPR,
543b03dd8daSMatt Arsenault                                CurrentProgramInfo.NumSGPR,
544b03dd8daSMatt Arsenault                                CurrentProgramInfo.ScratchSize,
5451c538423SStanislav Mekhanoshin                                getFunctionCodeSize(MF), MFI);
546b03dd8daSMatt Arsenault 
547b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
548b03dd8daSMatt Arsenault       " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
549b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
550b03dd8daSMatt Arsenault       " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
551b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
552b03dd8daSMatt Arsenault       " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
553fd8ab09cSMatt Arsenault       " bytes/workgroup (compile time only)", false);
554d41c0dbfSMatt Arsenault 
555b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
556b03dd8daSMatt Arsenault       " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
557b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
558b03dd8daSMatt Arsenault       " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
559021a218dSMatt Arsenault 
560b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
561b03dd8daSMatt Arsenault       " NumSGPRsForWavesPerEU: " +
562b03dd8daSMatt Arsenault       Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
563b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
564b03dd8daSMatt Arsenault       " NumVGPRsForWavesPerEU: " +
565b03dd8daSMatt Arsenault       Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
5661d65026cSKonstantin Zhuravlyov 
567a8d9d507SStanislav Mekhanoshin     if (STM.hasGFX90AInsts())
568a8d9d507SStanislav Mekhanoshin       OutStreamer->emitRawComment(
569a8d9d507SStanislav Mekhanoshin         " AccumOffset: " +
570a8d9d507SStanislav Mekhanoshin         Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
571a8d9d507SStanislav Mekhanoshin 
572b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
5732594fa85SStanislav Mekhanoshin       " Occupancy: " +
5742594fa85SStanislav Mekhanoshin       Twine(CurrentProgramInfo.Occupancy), false);
5752594fa85SStanislav Mekhanoshin 
5762594fa85SStanislav Mekhanoshin     OutStreamer->emitRawComment(
5771c538423SStanislav Mekhanoshin       " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
5781c538423SStanislav Mekhanoshin 
579b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
580eb66bf08SStanislav Mekhanoshin       " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
581eb66bf08SStanislav Mekhanoshin       Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
582eb66bf08SStanislav Mekhanoshin     OutStreamer->emitRawComment(
583b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:USER_SGPR: " +
584b03dd8daSMatt Arsenault       Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
585b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
586b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
587b03dd8daSMatt Arsenault       Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
588b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
589b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
590b03dd8daSMatt Arsenault       Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
591b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
592b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
593b03dd8daSMatt Arsenault       Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
594b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
595b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
596b03dd8daSMatt Arsenault       Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
597b03dd8daSMatt Arsenault     OutStreamer->emitRawComment(
598b03dd8daSMatt Arsenault       " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
599b03dd8daSMatt Arsenault       Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
600d41c0dbfSMatt Arsenault       false);
601a8d9d507SStanislav Mekhanoshin 
602a8d9d507SStanislav Mekhanoshin     assert(STM.hasGFX90AInsts() ||
603a8d9d507SStanislav Mekhanoshin            CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
604a8d9d507SStanislav Mekhanoshin     if (STM.hasGFX90AInsts()) {
605a8d9d507SStanislav Mekhanoshin       OutStreamer->emitRawComment(
606a8d9d507SStanislav Mekhanoshin         " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
607a8d9d507SStanislav Mekhanoshin         Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
608a8d9d507SStanislav Mekhanoshin                                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
609a8d9d507SStanislav Mekhanoshin                                false);
610a8d9d507SStanislav Mekhanoshin       OutStreamer->emitRawComment(
611a8d9d507SStanislav Mekhanoshin         " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
612a8d9d507SStanislav Mekhanoshin         Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
613a8d9d507SStanislav Mekhanoshin                                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
614a8d9d507SStanislav Mekhanoshin                                false);
615a8d9d507SStanislav Mekhanoshin     }
61645bb48eaSTom Stellard   }
61745bb48eaSTom Stellard 
61833cb8f5bSTim Renouf   if (DumpCodeInstEmitter) {
61945bb48eaSTom Stellard 
620adf4142fSFangrui Song     OutStreamer->switchSection(
6214327a9b4SSebastian Neubauer         Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
62245bb48eaSTom Stellard 
62345bb48eaSTom Stellard     for (size_t i = 0; i < DisasmLines.size(); ++i) {
624cead41d4STim Renouf       std::string Comment = "\n";
625cead41d4STim Renouf       if (!HexLines[i].empty()) {
626cead41d4STim Renouf         Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
62745bb48eaSTom Stellard         Comment += " ; " + HexLines[i] + "\n";
628cead41d4STim Renouf       }
62945bb48eaSTom Stellard 
630a55daa14SFangrui Song       OutStreamer->emitBytes(StringRef(DisasmLines[i]));
631a55daa14SFangrui Song       OutStreamer->emitBytes(StringRef(Comment));
63245bb48eaSTom Stellard     }
63345bb48eaSTom Stellard   }
63445bb48eaSTom Stellard 
63545bb48eaSTom Stellard   return false;
63645bb48eaSTom Stellard }
63745bb48eaSTom Stellard 
638f4ace637SKonstantin Zhuravlyov // TODO: Fold this into emitFunctionBodyStart.
initializeTargetID(const Module & M)639f4ace637SKonstantin Zhuravlyov void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
640f4ace637SKonstantin Zhuravlyov   // In the beginning all features are either 'Any' or 'NotSupported',
641f4ace637SKonstantin Zhuravlyov   // depending on global target features. This will cover empty modules.
642f4ace637SKonstantin Zhuravlyov   getTargetStreamer()->initializeTargetID(
643f4ace637SKonstantin Zhuravlyov       *getGlobalSTI(), getGlobalSTI()->getFeatureString());
644f4ace637SKonstantin Zhuravlyov 
645f4ace637SKonstantin Zhuravlyov   // If module is empty, we are done.
646f4ace637SKonstantin Zhuravlyov   if (M.empty())
647f4ace637SKonstantin Zhuravlyov     return;
648f4ace637SKonstantin Zhuravlyov 
649f4ace637SKonstantin Zhuravlyov   // If module is not empty, need to find first 'Off' or 'On' feature
650f4ace637SKonstantin Zhuravlyov   // setting per feature from functions in module.
651f4ace637SKonstantin Zhuravlyov   for (auto &F : M) {
652f4ace637SKonstantin Zhuravlyov     auto &TSTargetID = getTargetStreamer()->getTargetID();
653f4ace637SKonstantin Zhuravlyov     if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
654f4ace637SKonstantin Zhuravlyov         (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
655f4ace637SKonstantin Zhuravlyov       break;
656f4ace637SKonstantin Zhuravlyov 
657f4ace637SKonstantin Zhuravlyov     const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
658f4ace637SKonstantin Zhuravlyov     const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
659f4ace637SKonstantin Zhuravlyov     if (TSTargetID->isXnackSupported())
660f4ace637SKonstantin Zhuravlyov       if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
661f4ace637SKonstantin Zhuravlyov         TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
662f4ace637SKonstantin Zhuravlyov     if (TSTargetID->isSramEccSupported())
663f4ace637SKonstantin Zhuravlyov       if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
664f4ace637SKonstantin Zhuravlyov         TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
665f4ace637SKonstantin Zhuravlyov   }
666f4ace637SKonstantin Zhuravlyov }
667f4ace637SKonstantin Zhuravlyov 
getFunctionCodeSize(const MachineFunction & MF) const668a3566f21SMatt Arsenault uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
6695bfbae5cSTom Stellard   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
67043e92fe3SMatt Arsenault   const SIInstrInfo *TII = STM.getInstrInfo();
67145bb48eaSTom Stellard 
672a3566f21SMatt Arsenault   uint64_t CodeSize = 0;
673a3566f21SMatt Arsenault 
67445bb48eaSTom Stellard   for (const MachineBasicBlock &MBB : MF) {
67545bb48eaSTom Stellard     for (const MachineInstr &MI : MBB) {
67645bb48eaSTom Stellard       // TODO: CodeSize should account for multiple functions.
677c5746865SMatt Arsenault 
678c5746865SMatt Arsenault       // TODO: Should we count size of debug info?
679801bf7ebSShiva Chen       if (MI.isDebugInstr())
680c5746865SMatt Arsenault         continue;
681c5746865SMatt Arsenault 
682a9720c67SMatt Arsenault       CodeSize += TII->getInstSizeInBytes(MI);
683a3566f21SMatt Arsenault     }
684a3566f21SMatt Arsenault   }
68545bb48eaSTom Stellard 
686a3566f21SMatt Arsenault   return CodeSize;
687a3566f21SMatt Arsenault }
68845bb48eaSTom Stellard 
getSIProgramInfo(SIProgramInfo & ProgInfo,const MachineFunction & MF)689b03dd8daSMatt Arsenault void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
690b03dd8daSMatt Arsenault                                         const MachineFunction &MF) {
6912b08f6afSSebastian Neubauer   const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
6922b08f6afSSebastian Neubauer       ResourceUsage->getResourceInfo(&MF.getFunction());
693075bc48aSStanislav Mekhanoshin   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
694b03dd8daSMatt Arsenault 
695075bc48aSStanislav Mekhanoshin   ProgInfo.NumArchVGPR = Info.NumVGPR;
696075bc48aSStanislav Mekhanoshin   ProgInfo.NumAccVGPR = Info.NumAGPR;
697075bc48aSStanislav Mekhanoshin   ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
698a8d9d507SStanislav Mekhanoshin   ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
699a8d9d507SStanislav Mekhanoshin   ProgInfo.TgSplit = STM.isTgSplitEnabled();
700b03dd8daSMatt Arsenault   ProgInfo.NumSGPR = Info.NumExplicitSGPR;
701b03dd8daSMatt Arsenault   ProgInfo.ScratchSize = Info.PrivateSegmentSize;
702b03dd8daSMatt Arsenault   ProgInfo.VCCUsed = Info.UsesVCC;
703b03dd8daSMatt Arsenault   ProgInfo.FlatUsed = Info.UsesFlatScratch;
704b03dd8daSMatt Arsenault   ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
705b03dd8daSMatt Arsenault 
7061ed4caffSMatt Arsenault   const uint64_t MaxScratchPerWorkitem =
707ff85d61aSJay Foad       STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
7081ed4caffSMatt Arsenault   if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
709f1caa283SMatthias Braun     DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
710e4223438SArthur Eubanks                                           ProgInfo.ScratchSize,
711e4223438SArthur Eubanks                                           MaxScratchPerWorkitem, DS_Error);
712f1caa283SMatthias Braun     MF.getFunction().getContext().diagnose(DiagStackSize);
7139ba465a9SMatt Arsenault   }
7149ba465a9SMatt Arsenault 
715b03dd8daSMatt Arsenault   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
716b03dd8daSMatt Arsenault 
7173c874ce4Smadhur13490   // The calculations related to SGPR/VGPR blocks are
7181e8c2c70SScott Linder   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
7191e8c2c70SScott Linder   // unified.
7201e8c2c70SScott Linder   unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
7214cd9509eSMatt Arsenault       &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
722f2f3d147SKonstantin Zhuravlyov 
72391f22fbfSMarek Olsak   // Check the addressable register limit before we add ExtraSGPRs.
72491f22fbfSMarek Olsak   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
72591f22fbfSMarek Olsak       !STM.hasSGPRInitBug()) {
726e03b1d7bSKonstantin Zhuravlyov     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
727a3566f21SMatt Arsenault     if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
72891f22fbfSMarek Olsak       // This can happen due to a compiler bug or when using inline asm.
729f1caa283SMatthias Braun       LLVMContext &Ctx = MF.getFunction().getContext();
730e4223438SArthur Eubanks       DiagnosticInfoResourceLimit Diag(
731e4223438SArthur Eubanks           MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
732e4223438SArthur Eubanks           MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
73391f22fbfSMarek Olsak       Ctx.diagnose(Diag);
734a3566f21SMatt Arsenault       ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
73591f22fbfSMarek Olsak     }
73691f22fbfSMarek Olsak   }
73791f22fbfSMarek Olsak 
7381d65026cSKonstantin Zhuravlyov   // Account for extra SGPRs and VGPRs reserved for debugger use.
739a3566f21SMatt Arsenault   ProgInfo.NumSGPR += ExtraSGPRs;
74045bb48eaSTom Stellard 
74179f75468SMatt Arsenault   const Function &F = MF.getFunction();
74279f75468SMatt Arsenault 
743fd8d4af3STim Renouf   // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
744fd8d4af3STim Renouf   // dispatch registers are function args.
745fd8d4af3STim Renouf   unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
74679f75468SMatt Arsenault 
74779f75468SMatt Arsenault   if (isShader(F.getCallingConv())) {
74869f7d81dSDavid Stuttard     bool IsPixelShader =
74969f7d81dSDavid Stuttard         F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
75069f7d81dSDavid Stuttard 
75169f7d81dSDavid Stuttard     // Calculate the number of VGPR registers based on the SPI input registers
75269f7d81dSDavid Stuttard     uint32_t InputEna = 0;
75369f7d81dSDavid Stuttard     uint32_t InputAddr = 0;
75469f7d81dSDavid Stuttard     unsigned LastEna = 0;
75569f7d81dSDavid Stuttard 
75669f7d81dSDavid Stuttard     if (IsPixelShader) {
75769f7d81dSDavid Stuttard       // Note for IsPixelShader:
75869f7d81dSDavid Stuttard       // By this stage, all enabled inputs are tagged in InputAddr as well.
75969f7d81dSDavid Stuttard       // We will use InputAddr to determine whether the input counts against the
76069f7d81dSDavid Stuttard       // vgpr total and only use the InputEnable to determine the last input
76169f7d81dSDavid Stuttard       // that is relevant - if extra arguments are used, then we have to honour
76269f7d81dSDavid Stuttard       // the InputAddr for any intermediate non-enabled inputs.
76369f7d81dSDavid Stuttard       InputEna = MFI->getPSInputEnable();
76469f7d81dSDavid Stuttard       InputAddr = MFI->getPSInputAddr();
76569f7d81dSDavid Stuttard 
76669f7d81dSDavid Stuttard       // We only need to consider input args up to the last used arg.
76769f7d81dSDavid Stuttard       assert((InputEna || InputAddr) &&
76869f7d81dSDavid Stuttard              "PSInputAddr and PSInputEnable should "
76969f7d81dSDavid Stuttard              "never both be 0 for AMDGPU_PS shaders");
77069f7d81dSDavid Stuttard       // There are some rare circumstances where InputAddr is non-zero and
77169f7d81dSDavid Stuttard       // InputEna can be set to 0. In this case we default to setting LastEna
77269f7d81dSDavid Stuttard       // to 1.
77369f7d81dSDavid Stuttard       LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
77469f7d81dSDavid Stuttard     }
77569f7d81dSDavid Stuttard 
77679f75468SMatt Arsenault     // FIXME: We should be using the number of registers determined during
77779f75468SMatt Arsenault     // calling convention lowering to legalize the types.
77879f75468SMatt Arsenault     const DataLayout &DL = F.getParent()->getDataLayout();
77969f7d81dSDavid Stuttard     unsigned PSArgCount = 0;
78069f7d81dSDavid Stuttard     unsigned IntermediateVGPR = 0;
78179f75468SMatt Arsenault     for (auto &Arg : F.args()) {
78279f75468SMatt Arsenault       unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
78369f7d81dSDavid Stuttard       if (Arg.hasAttribute(Attribute::InReg)) {
784fd8d4af3STim Renouf         WaveDispatchNumSGPR += NumRegs;
78569f7d81dSDavid Stuttard       } else {
78669f7d81dSDavid Stuttard         // If this is a PS shader and we're processing the PS Input args (first
78769f7d81dSDavid Stuttard         // 16 VGPR), use the InputEna and InputAddr bits to define how many
78869f7d81dSDavid Stuttard         // VGPRs are actually used.
78969f7d81dSDavid Stuttard         // Any extra VGPR arguments are handled as normal arguments (and
79069f7d81dSDavid Stuttard         // contribute to the VGPR count whether they're used or not).
79169f7d81dSDavid Stuttard         if (IsPixelShader && PSArgCount < 16) {
79269f7d81dSDavid Stuttard           if ((1 << PSArgCount) & InputAddr) {
79369f7d81dSDavid Stuttard             if (PSArgCount < LastEna)
79469f7d81dSDavid Stuttard               WaveDispatchNumVGPR += NumRegs;
795fd8d4af3STim Renouf             else
79669f7d81dSDavid Stuttard               IntermediateVGPR += NumRegs;
79769f7d81dSDavid Stuttard           }
79869f7d81dSDavid Stuttard           PSArgCount++;
79969f7d81dSDavid Stuttard         } else {
80069f7d81dSDavid Stuttard           // If there are extra arguments we have to include the allocation for
80169f7d81dSDavid Stuttard           // the non-used (but enabled with InputAddr) input arguments
80269f7d81dSDavid Stuttard           if (IntermediateVGPR) {
80369f7d81dSDavid Stuttard             WaveDispatchNumVGPR += IntermediateVGPR;
80469f7d81dSDavid Stuttard             IntermediateVGPR = 0;
80569f7d81dSDavid Stuttard           }
806fd8d4af3STim Renouf           WaveDispatchNumVGPR += NumRegs;
807fd8d4af3STim Renouf         }
80869f7d81dSDavid Stuttard       }
80969f7d81dSDavid Stuttard     }
810fd8d4af3STim Renouf     ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
81169f7d81dSDavid Stuttard     ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
81269f7d81dSDavid Stuttard     ProgInfo.NumVGPR =
81369f7d81dSDavid Stuttard         Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
81479f75468SMatt Arsenault   }
815fd8d4af3STim Renouf 
8161d65026cSKonstantin Zhuravlyov   // Adjust number of registers used to meet default/requested minimum/maximum
8171d65026cSKonstantin Zhuravlyov   // number of waves per execution unit request.
8181d65026cSKonstantin Zhuravlyov   ProgInfo.NumSGPRsForWavesPerEU = std::max(
819a3566f21SMatt Arsenault     std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
8201d65026cSKonstantin Zhuravlyov   ProgInfo.NumVGPRsForWavesPerEU = std::max(
821a3566f21SMatt Arsenault     std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
8221d65026cSKonstantin Zhuravlyov 
82391f22fbfSMarek Olsak   if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
82491f22fbfSMarek Olsak       STM.hasSGPRInitBug()) {
8259f89ede1SKonstantin Zhuravlyov     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
8269f89ede1SKonstantin Zhuravlyov     if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
8279f89ede1SKonstantin Zhuravlyov       // This can happen due to a compiler bug or when using inline asm to use
8289f89ede1SKonstantin Zhuravlyov       // the registers which are usually reserved for vcc etc.
829f1caa283SMatthias Braun       LLVMContext &Ctx = MF.getFunction().getContext();
830e4223438SArthur Eubanks       DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
831e4223438SArthur Eubanks                                        ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
832e4223438SArthur Eubanks                                        DS_Error, DK_ResourceLimit);
833ff98241fSMatt Arsenault       Ctx.diagnose(Diag);
8349f89ede1SKonstantin Zhuravlyov       ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
8359f89ede1SKonstantin Zhuravlyov       ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
836417c93e3SMatt Arsenault     }
83791f22fbfSMarek Olsak   }
83845bb48eaSTom Stellard 
8394eae3019SMatt Arsenault   if (STM.hasSGPRInitBug()) {
8409f89ede1SKonstantin Zhuravlyov     ProgInfo.NumSGPR =
8419f89ede1SKonstantin Zhuravlyov         AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
8429f89ede1SKonstantin Zhuravlyov     ProgInfo.NumSGPRsForWavesPerEU =
8439f89ede1SKonstantin Zhuravlyov         AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
84445bb48eaSTom Stellard   }
84545bb48eaSTom Stellard 
846161e2b42SMatt Arsenault   if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
847f1caa283SMatthias Braun     LLVMContext &Ctx = MF.getFunction().getContext();
848f1caa283SMatthias Braun     DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
849e4223438SArthur Eubanks                                      MFI->getNumUserSGPRs(),
850e4223438SArthur Eubanks                                      STM.getMaxNumUserSGPRs(), DS_Error);
851ff98241fSMatt Arsenault     Ctx.diagnose(Diag);
85241003af2SMatt Arsenault   }
85341003af2SMatt Arsenault 
85452ef4019SMatt Arsenault   if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
855f1caa283SMatthias Braun     LLVMContext &Ctx = MF.getFunction().getContext();
856f1caa283SMatthias Braun     DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
857e4223438SArthur Eubanks                                      MFI->getLDSSize(),
858e4223438SArthur Eubanks                                      STM.getLocalMemorySize(), DS_Error);
859ff98241fSMatt Arsenault     Ctx.diagnose(Diag);
8601c4d0efeSMatt Arsenault   }
8611c4d0efeSMatt Arsenault 
8621e8c2c70SScott Linder   ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
863be3d7ba9SDavid Stuttard       &STM, ProgInfo.NumSGPRsForWavesPerEU);
8641e8c2c70SScott Linder   ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
865be3d7ba9SDavid Stuttard       &STM, ProgInfo.NumVGPRsForWavesPerEU);
866e03b1d7bSKonstantin Zhuravlyov 
867db0ed3e4SMatt Arsenault   const SIModeRegisterDefaults Mode = MFI->getMode();
868db0ed3e4SMatt Arsenault 
86945bb48eaSTom Stellard   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
87045bb48eaSTom Stellard   // register.
871db0ed3e4SMatt Arsenault   ProgInfo.FloatMode = getFPMode(Mode);
87245bb48eaSTom Stellard 
873055e4dceSMatt Arsenault   ProgInfo.IEEEMode = Mode.IEEE;
87445bb48eaSTom Stellard 
8757293f989SMatt Arsenault   // Make clamp modifier on NaN input returns 0.
876055e4dceSMatt Arsenault   ProgInfo.DX10Clamp = Mode.DX10Clamp;
87745bb48eaSTom Stellard 
87845bb48eaSTom Stellard   unsigned LDSAlignShift;
8795bfbae5cSTom Stellard   if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
88045bb48eaSTom Stellard     // LDS is allocated in 64 dword blocks.
88145bb48eaSTom Stellard     LDSAlignShift = 8;
88245bb48eaSTom Stellard   } else {
88345bb48eaSTom Stellard     // LDS is allocated in 128 dword blocks.
88445bb48eaSTom Stellard     LDSAlignShift = 9;
88545bb48eaSTom Stellard   }
88645bb48eaSTom Stellard 
88767357739SVang Thao   ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
88867357739SVang Thao   ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
88967357739SVang Thao 
890bc7902f1SMatt Arsenault   ProgInfo.LDSSize = MFI->getLDSSize();
89145bb48eaSTom Stellard   ProgInfo.LDSBlocks =
892ef0fe1eeSAaron Ballman       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
89345bb48eaSTom Stellard 
894ff85d61aSJay Foad   // Scratch is allocated in 64-dword or 256-dword blocks.
895ff85d61aSJay Foad   unsigned ScratchAlignShift =
896ff85d61aSJay Foad       STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
89745bb48eaSTom Stellard   // We need to program the hardware with the amount of scratch memory that
89845bb48eaSTom Stellard   // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
89945bb48eaSTom Stellard   // scratch memory used per thread.
900ff85d61aSJay Foad   ProgInfo.ScratchBlocks = divideCeil(
901ff85d61aSJay Foad       ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
90245bb48eaSTom Stellard 
90341bbe101SStanislav Mekhanoshin   if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
90441bbe101SStanislav Mekhanoshin     ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
90541bbe101SStanislav Mekhanoshin     ProgInfo.MemOrdered = 1;
90641bbe101SStanislav Mekhanoshin   }
90741bbe101SStanislav Mekhanoshin 
90826f8f3dbSMatt Arsenault   // 0 = X, 1 = XY, 2 = XYZ
90926f8f3dbSMatt Arsenault   unsigned TIDIGCompCnt = 0;
91026f8f3dbSMatt Arsenault   if (MFI->hasWorkItemIDZ())
91126f8f3dbSMatt Arsenault     TIDIGCompCnt = 2;
91226f8f3dbSMatt Arsenault   else if (MFI->hasWorkItemIDY())
91326f8f3dbSMatt Arsenault     TIDIGCompCnt = 1;
91426f8f3dbSMatt Arsenault 
9150bdaef38SMatt Arsenault   // The private segment wave byte offset is the last of the system SGPRs. We
9160bdaef38SMatt Arsenault   // initially assumed it was allocated, and may have used it. It shouldn't harm
9170bdaef38SMatt Arsenault   // anything to disable it if we know the stack isn't used here. We may still
9180bdaef38SMatt Arsenault   // have emitted code reading it to initialize scratch, but if that's unused
9190bdaef38SMatt Arsenault   // reading garbage should be OK.
9200bdaef38SMatt Arsenault   const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
92145bb48eaSTom Stellard   ProgInfo.ComputePGMRSrc2 =
9220bdaef38SMatt Arsenault       S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
92326f8f3dbSMatt Arsenault       S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
9242ca6b1f2SKonstantin Zhuravlyov       // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
9252ca6b1f2SKonstantin Zhuravlyov       S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) |
92626f8f3dbSMatt Arsenault       S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
92726f8f3dbSMatt Arsenault       S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
92826f8f3dbSMatt Arsenault       S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
92926f8f3dbSMatt Arsenault       S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
93026f8f3dbSMatt Arsenault       S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
93126f8f3dbSMatt Arsenault       S_00B84C_EXCP_EN_MSB(0) |
9326ccb076aSKonstantin Zhuravlyov       // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
9336ccb076aSKonstantin Zhuravlyov       S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
93426f8f3dbSMatt Arsenault       S_00B84C_EXCP_EN(0);
9352594fa85SStanislav Mekhanoshin 
936a8d9d507SStanislav Mekhanoshin   if (STM.hasGFX90AInsts()) {
937a8d9d507SStanislav Mekhanoshin     AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
938a8d9d507SStanislav Mekhanoshin                     amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
939a8d9d507SStanislav Mekhanoshin                     ProgInfo.AccumOffset);
940a8d9d507SStanislav Mekhanoshin     AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
941a8d9d507SStanislav Mekhanoshin                     amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
942a8d9d507SStanislav Mekhanoshin                     ProgInfo.TgSplit);
943a8d9d507SStanislav Mekhanoshin   }
944a8d9d507SStanislav Mekhanoshin 
9456f09bb7dSMatt Arsenault   ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
9462594fa85SStanislav Mekhanoshin                                             ProgInfo.NumSGPRsForWavesPerEU,
9472594fa85SStanislav Mekhanoshin                                             ProgInfo.NumVGPRsForWavesPerEU);
94845bb48eaSTom Stellard }
94945bb48eaSTom Stellard 
getRsrcReg(CallingConv::ID CallConv)950df3a20cdSNicolai Haehnle static unsigned getRsrcReg(CallingConv::ID CallConv) {
951df3a20cdSNicolai Haehnle   switch (CallConv) {
952cd1d5aafSJustin Bogner   default: LLVM_FALLTHROUGH;
953df3a20cdSNicolai Haehnle   case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
954ef1ae8ffSTim Renouf   case CallingConv::AMDGPU_LS: return R_00B528_SPI_SHADER_PGM_RSRC1_LS;
955a302a736SMarek Olsak   case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS;
956ef1ae8ffSTim Renouf   case CallingConv::AMDGPU_ES: return R_00B328_SPI_SHADER_PGM_RSRC1_ES;
957df3a20cdSNicolai Haehnle   case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
958df3a20cdSNicolai Haehnle   case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
959ef1ae8ffSTim Renouf   case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
96045bb48eaSTom Stellard   }
96145bb48eaSTom Stellard }
96245bb48eaSTom Stellard 
EmitProgramInfoSI(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo)96345bb48eaSTom Stellard void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
964b03dd8daSMatt Arsenault                                          const SIProgramInfo &CurrentProgramInfo) {
96545bb48eaSTom Stellard   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
966ff85d61aSJay Foad   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
967f1caa283SMatthias Braun   unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
96845bb48eaSTom Stellard 
969f1caa283SMatthias Braun   if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
970692e0c96SFangrui Song     OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
97145bb48eaSTom Stellard 
9721124bf4aSSebastian Neubauer     OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
97345bb48eaSTom Stellard 
974692e0c96SFangrui Song     OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
975692e0c96SFangrui Song     OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
97645bb48eaSTom Stellard 
977692e0c96SFangrui Song     OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
978ff85d61aSJay Foad     OutStreamer->emitInt32(
979ff85d61aSJay Foad         STM.getGeneration() >= AMDGPUSubtarget::GFX11
980ff85d61aSJay Foad             ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
981ff85d61aSJay Foad             : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
98245bb48eaSTom Stellard 
98345bb48eaSTom Stellard     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
98445bb48eaSTom Stellard     // 0" comment but I don't see a corresponding field in the register spec.
98545bb48eaSTom Stellard   } else {
986692e0c96SFangrui Song     OutStreamer->emitInt32(RsrcReg);
98777497103SFangrui Song     OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
988b03dd8daSMatt Arsenault                               S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
989692e0c96SFangrui Song     OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
990ff85d61aSJay Foad     OutStreamer->emitInt32(
991ff85d61aSJay Foad         STM.getGeneration() >= AMDGPUSubtarget::GFX11
992ff85d61aSJay Foad             ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
993ff85d61aSJay Foad             : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
994807ecc3dSTim Renouf   }
995807ecc3dSTim Renouf 
996f1caa283SMatthias Braun   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
997692e0c96SFangrui Song     OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);
998929a8ad2SJay Foad     unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
999929a8ad2SJay Foad                                 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1000929a8ad2SJay Foad                                 : CurrentProgramInfo.LDSBlocks;
1001929a8ad2SJay Foad     OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1002692e0c96SFangrui Song     OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);
1003692e0c96SFangrui Song     OutStreamer->emitInt32(MFI->getPSInputEnable());
1004692e0c96SFangrui Song     OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);
1005692e0c96SFangrui Song     OutStreamer->emitInt32(MFI->getPSInputAddr());
100645bb48eaSTom Stellard   }
10070532c190SMarek Olsak 
1008692e0c96SFangrui Song   OutStreamer->emitInt32(R_SPILLED_SGPRS);
1009692e0c96SFangrui Song   OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1010692e0c96SFangrui Song   OutStreamer->emitInt32(R_SPILLED_VGPRS);
1011692e0c96SFangrui Song   OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
101245bb48eaSTom Stellard }
101345bb48eaSTom Stellard 
101472800f04STim Renouf // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
101572800f04STim Renouf // is AMDPAL.  It stores each compute/SPI register setting and other PAL
1016d737b551STim Renouf // metadata items into the PALMD::Metadata, combining with any provided by the
1017d737b551STim Renouf // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1018d737b551STim Renouf // is then written as a single block in the .note section.
EmitPALMetadata(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo)1019c3beb6a0SKonstantin Zhuravlyov void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
102072800f04STim Renouf        const SIProgramInfo &CurrentProgramInfo) {
102172800f04STim Renouf   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1022d737b551STim Renouf   auto CC = MF.getFunction().getCallingConv();
1023d737b551STim Renouf   auto MD = getTargetStreamer()->getPALMetadata();
1024d737b551STim Renouf 
1025e7bd52f8STim Renouf   MD->setEntryPoint(CC, MF.getFunction().getName());
1026d737b551STim Renouf   MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
102774702444SJacob Lambert 
102874702444SJacob Lambert   // Only set AGPRs for supported devices
102974702444SJacob Lambert   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
103074702444SJacob Lambert   if (STM.hasMAIInsts()) {
103174702444SJacob Lambert     MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
103274702444SJacob Lambert   }
103374702444SJacob Lambert 
1034d737b551STim Renouf   MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
10351124bf4aSSebastian Neubauer   MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
10361124bf4aSSebastian Neubauer   if (AMDGPU::isCompute(CC)) {
1037d737b551STim Renouf     MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
103872800f04STim Renouf   } else {
103972800f04STim Renouf     if (CurrentProgramInfo.ScratchBlocks > 0)
1040d737b551STim Renouf       MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
104172800f04STim Renouf   }
1042d737b551STim Renouf   // ScratchSize is in bytes, 16 aligned.
1043d737b551STim Renouf   MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1044f1caa283SMatthias Braun   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1045929a8ad2SJay Foad     unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1046929a8ad2SJay Foad                                 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1047929a8ad2SJay Foad                                 : CurrentProgramInfo.LDSBlocks;
1048929a8ad2SJay Foad     MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1049d737b551STim Renouf     MD->setSpiPsInputEna(MFI->getPSInputEnable());
1050d737b551STim Renouf     MD->setSpiPsInputAddr(MFI->getPSInputAddr());
105172800f04STim Renouf   }
10525d00c306SStanislav Mekhanoshin 
10535d00c306SStanislav Mekhanoshin   if (STM.isWave32())
10545d00c306SStanislav Mekhanoshin     MD->setWave32(MF.getFunction().getCallingConv());
105572800f04STim Renouf }
105672800f04STim Renouf 
emitPALFunctionMetadata(const MachineFunction & MF)1057edd67564SSebastian Neubauer void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1058edd67564SSebastian Neubauer   auto *MD = getTargetStreamer()->getPALMetadata();
1059edd67564SSebastian Neubauer   const MachineFrameInfo &MFI = MF.getFrameInfo();
10605733167fSSebastian Neubauer   MD->setFunctionScratchSize(MF, MFI.getStackSize());
1061db646de3SSebastian Neubauer 
10625733167fSSebastian Neubauer   // Set compute registers
10635733167fSSebastian Neubauer   MD->setRsrc1(CallingConv::AMDGPU_CS,
10645733167fSSebastian Neubauer                CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
10655733167fSSebastian Neubauer   MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
1066db646de3SSebastian Neubauer 
1067db646de3SSebastian Neubauer   // Set optional info
1068db646de3SSebastian Neubauer   MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
1069db646de3SSebastian Neubauer   MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1070db646de3SSebastian Neubauer   MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1071edd67564SSebastian Neubauer }
1072edd67564SSebastian Neubauer 
107324ee0785SMatt Arsenault // This is supposed to be log2(Size)
getElementByteSizeValue(unsigned Size)107424ee0785SMatt Arsenault static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
107524ee0785SMatt Arsenault   switch (Size) {
107624ee0785SMatt Arsenault   case 4:
107724ee0785SMatt Arsenault     return AMD_ELEMENT_4_BYTES;
107824ee0785SMatt Arsenault   case 8:
107924ee0785SMatt Arsenault     return AMD_ELEMENT_8_BYTES;
108024ee0785SMatt Arsenault   case 16:
108124ee0785SMatt Arsenault     return AMD_ELEMENT_16_BYTES;
108224ee0785SMatt Arsenault   default:
108324ee0785SMatt Arsenault     llvm_unreachable("invalid private_element_size");
108424ee0785SMatt Arsenault   }
108524ee0785SMatt Arsenault }
108624ee0785SMatt Arsenault 
getAmdKernelCode(amd_kernel_code_t & Out,const SIProgramInfo & CurrentProgramInfo,const MachineFunction & MF) const1087ca0e7f64SKonstantin Zhuravlyov void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1088b03dd8daSMatt Arsenault                                         const SIProgramInfo &CurrentProgramInfo,
1089ca0e7f64SKonstantin Zhuravlyov                                         const MachineFunction &MF) const {
10904bec7d42SMatt Arsenault   const Function &F = MF.getFunction();
10914bec7d42SMatt Arsenault   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
10924bec7d42SMatt Arsenault          F.getCallingConv() == CallingConv::SPIR_KERNEL);
10934bec7d42SMatt Arsenault 
109445bb48eaSTom Stellard   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
10955bfbae5cSTom Stellard   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
109645bb48eaSTom Stellard 
10974cd9509eSMatt Arsenault   AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
109845bb48eaSTom Stellard 
1099ca0e7f64SKonstantin Zhuravlyov   Out.compute_pgm_resource_registers =
11001124bf4aSSebastian Neubauer       CurrentProgramInfo.getComputePGMRSrc1() |
1101b03dd8daSMatt Arsenault       (CurrentProgramInfo.ComputePGMRSrc2 << 32);
110241bbe101SStanislav Mekhanoshin   Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
110326f8f3dbSMatt Arsenault 
1104b03dd8daSMatt Arsenault   if (CurrentProgramInfo.DynamicCallStack)
1105b03dd8daSMatt Arsenault     Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
1106b03dd8daSMatt Arsenault 
1107ca0e7f64SKonstantin Zhuravlyov   AMD_HSA_BITS_SET(Out.code_properties,
110824ee0785SMatt Arsenault                    AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
1109cf6565f6SStanislav Mekhanoshin                    getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
111024ee0785SMatt Arsenault 
111126f8f3dbSMatt Arsenault   if (MFI->hasPrivateSegmentBuffer()) {
1112ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |=
111326f8f3dbSMatt Arsenault       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
111426f8f3dbSMatt Arsenault   }
111526f8f3dbSMatt Arsenault 
111626f8f3dbSMatt Arsenault   if (MFI->hasDispatchPtr())
1117ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
111826f8f3dbSMatt Arsenault 
11190f20a35bSChangpeng Fang   if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
1120ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
112126f8f3dbSMatt Arsenault 
112226f8f3dbSMatt Arsenault   if (MFI->hasKernargSegmentPtr())
1123ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
112426f8f3dbSMatt Arsenault 
112526f8f3dbSMatt Arsenault   if (MFI->hasDispatchID())
1126ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
112726f8f3dbSMatt Arsenault 
112826f8f3dbSMatt Arsenault   if (MFI->hasFlatScratchInit())
1129ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
113026f8f3dbSMatt Arsenault 
113148f29f21STom Stellard   if (MFI->hasDispatchPtr())
1132ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
113348f29f21STom Stellard 
11345b504976SNicolai Haehnle   if (STM.isXNACKEnabled())
1135ca0e7f64SKonstantin Zhuravlyov     Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
11365b504976SNicolai Haehnle 
1137b65fa483SGuillaume Chatelet   Align MaxKernArgAlign;
11384bec7d42SMatt Arsenault   Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1139b03dd8daSMatt Arsenault   Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1140b03dd8daSMatt Arsenault   Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1141b03dd8daSMatt Arsenault   Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1142b03dd8daSMatt Arsenault   Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
114345bb48eaSTom Stellard 
1144b65fa483SGuillaume Chatelet   // kernarg_segment_alignment is specified as log of the alignment.
1145b65fa483SGuillaume Chatelet   // The minimum alignment is 16.
114690ff1487SMatt Arsenault   // FIXME: The metadata treats the minimum as 4?
1147b65fa483SGuillaume Chatelet   Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
114845bb48eaSTom Stellard }
114945bb48eaSTom Stellard 
PrintAsmOperand(const MachineInstr * MI,unsigned OpNo,const char * ExtraCode,raw_ostream & O)115045bb48eaSTom Stellard bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
115145bb48eaSTom Stellard                                        const char *ExtraCode, raw_ostream &O) {
115236cd1859SMatt Arsenault   // First try the generic code, which knows about modifiers like 'c' and 'n'.
11535277b3ffSNick Desaulniers   if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
115436cd1859SMatt Arsenault     return false;
115536cd1859SMatt Arsenault 
115645bb48eaSTom Stellard   if (ExtraCode && ExtraCode[0]) {
115745bb48eaSTom Stellard     if (ExtraCode[1] != 0)
115845bb48eaSTom Stellard       return true; // Unknown modifier.
115945bb48eaSTom Stellard 
116045bb48eaSTom Stellard     switch (ExtraCode[0]) {
116145bb48eaSTom Stellard     case 'r':
116245bb48eaSTom Stellard       break;
116336cd1859SMatt Arsenault     default:
116436cd1859SMatt Arsenault       return true;
116545bb48eaSTom Stellard     }
116645bb48eaSTom Stellard   }
116745bb48eaSTom Stellard 
116836cd1859SMatt Arsenault   // TODO: Should be able to support other operand types like globals.
116936cd1859SMatt Arsenault   const MachineOperand &MO = MI->getOperand(OpNo);
117036cd1859SMatt Arsenault   if (MO.isReg()) {
117136cd1859SMatt Arsenault     AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,
117236cd1859SMatt Arsenault                                        *MF->getSubtarget().getRegisterInfo());
117345bb48eaSTom Stellard     return false;
1174b087b91cSDmitry Preobrazhensky   } else if (MO.isImm()) {
1175b087b91cSDmitry Preobrazhensky     int64_t Val = MO.getImm();
1176b087b91cSDmitry Preobrazhensky     if (AMDGPU::isInlinableIntLiteral(Val)) {
1177b087b91cSDmitry Preobrazhensky       O << Val;
1178b087b91cSDmitry Preobrazhensky     } else if (isUInt<16>(Val)) {
11791c9d6810SDmitry Preobrazhensky       O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1180b087b91cSDmitry Preobrazhensky     } else if (isUInt<32>(Val)) {
11811c9d6810SDmitry Preobrazhensky       O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1182b087b91cSDmitry Preobrazhensky     } else {
1183b087b91cSDmitry Preobrazhensky       O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
118445bb48eaSTom Stellard     }
1185b087b91cSDmitry Preobrazhensky     return false;
1186b087b91cSDmitry Preobrazhensky   }
118736cd1859SMatt Arsenault   return true;
118836cd1859SMatt Arsenault }
11892b08f6afSSebastian Neubauer 
getAnalysisUsage(AnalysisUsage & AU) const11902b08f6afSSebastian Neubauer void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
11912b08f6afSSebastian Neubauer   AU.addRequired<AMDGPUResourceUsageAnalysis>();
11922b08f6afSSebastian Neubauer   AU.addPreserved<AMDGPUResourceUsageAnalysis>();
11932b08f6afSSebastian Neubauer   AsmPrinter::getAnalysisUsage(AU);
11942b08f6afSSebastian Neubauer }
119567357739SVang Thao 
emitResourceUsageRemarks(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo,bool isModuleEntryFunction,bool hasMAIInsts)119667357739SVang Thao void AMDGPUAsmPrinter::emitResourceUsageRemarks(
119767357739SVang Thao     const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
119867357739SVang Thao     bool isModuleEntryFunction, bool hasMAIInsts) {
119967357739SVang Thao   if (!ORE)
120067357739SVang Thao     return;
120167357739SVang Thao 
120267357739SVang Thao   const char *Name = "kernel-resource-usage";
120367357739SVang Thao   const char *Indent = "    ";
120467357739SVang Thao 
120567357739SVang Thao   // If the remark is not specifically enabled, do not output to yaml
120667357739SVang Thao   LLVMContext &Ctx = MF.getFunction().getContext();
120767357739SVang Thao   if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
120867357739SVang Thao     return;
120967357739SVang Thao 
121067357739SVang Thao   auto EmitResourceUsageRemark = [&](StringRef RemarkName,
121167357739SVang Thao                                      StringRef RemarkLabel, auto Argument) {
121267357739SVang Thao     // Add an indent for every line besides the line with the kernel name. This
121367357739SVang Thao     // makes it easier to tell which resource usage go with which kernel since
121467357739SVang Thao     // the kernel name will always be displayed first.
121567357739SVang Thao     std::string LabelStr = RemarkLabel.str() + ": ";
121667357739SVang Thao     if (!RemarkName.equals("FunctionName"))
121767357739SVang Thao       LabelStr = Indent + LabelStr;
121867357739SVang Thao 
121967357739SVang Thao     ORE->emit([&]() {
122067357739SVang Thao       return MachineOptimizationRemarkAnalysis(Name, RemarkName,
122167357739SVang Thao                                                MF.getFunction().getSubprogram(),
122267357739SVang Thao                                                &MF.front())
122367357739SVang Thao              << LabelStr << ore::NV(RemarkName, Argument);
122467357739SVang Thao     });
122567357739SVang Thao   };
122667357739SVang Thao 
122767357739SVang Thao   // FIXME: Formatting here is pretty nasty because clang does not accept
122867357739SVang Thao   // newlines from diagnostics. This forces us to emit multiple diagnostic
122967357739SVang Thao   // remarks to simulate newlines. If and when clang does accept newlines, this
123067357739SVang Thao   // formatting should be aggregated into one remark with newlines to avoid
123167357739SVang Thao   // printing multiple diagnostic location and diag opts.
123267357739SVang Thao   EmitResourceUsageRemark("FunctionName", "Function Name",
123367357739SVang Thao                           MF.getFunction().getName());
123467357739SVang Thao   EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
123567357739SVang Thao   EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
123667357739SVang Thao   if (hasMAIInsts)
123767357739SVang Thao     EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
123867357739SVang Thao   EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
123967357739SVang Thao                           CurrentProgramInfo.ScratchSize);
124067357739SVang Thao   EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
124167357739SVang Thao                           CurrentProgramInfo.Occupancy);
124267357739SVang Thao   EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
124367357739SVang Thao                           CurrentProgramInfo.SGPRSpill);
124467357739SVang Thao   EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
124567357739SVang Thao                           CurrentProgramInfo.VGPRSpill);
124667357739SVang Thao   if (isModuleEntryFunction)
124767357739SVang Thao     EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
124867357739SVang Thao                             CurrentProgramInfo.LDSSize);
124967357739SVang Thao }
1250