1d68904f9SJames Henderson //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
1045bb48eaSTom Stellard ///
1145bb48eaSTom Stellard /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
1245bb48eaSTom Stellard /// code. When passed an MCAsmStreamer it prints assembly and when passed
1345bb48eaSTom Stellard /// an MCObjectStreamer it outputs binary code.
1445bb48eaSTom Stellard //
1545bb48eaSTom Stellard //===----------------------------------------------------------------------===//
1645bb48eaSTom Stellard //
1745bb48eaSTom Stellard
1845bb48eaSTom Stellard #include "AMDGPUAsmPrinter.h"
1945bb48eaSTom Stellard #include "AMDGPU.h"
206a87e9b0Sdfukalov #include "AMDGPUHSAMetadataStreamer.h"
212b08f6afSSebastian Neubauer #include "AMDGPUResourceUsageAnalysis.h"
226a87e9b0Sdfukalov #include "AMDKernelCodeT.h"
23560d7e04Sdfukalov #include "GCNSubtarget.h"
24c0bd7bd4SRichard Trieu #include "MCTargetDesc/AMDGPUInstPrinter.h"
256bda14b3SChandler Carruth #include "MCTargetDesc/AMDGPUTargetStreamer.h"
26c5015010STom Stellard #include "R600AsmPrinter.h"
276bda14b3SChandler Carruth #include "SIMachineFunctionInfo.h"
288ce2ee9dSRichard Trieu #include "TargetInfo/AMDGPUTargetInfo.h"
296bda14b3SChandler Carruth #include "Utils/AMDGPUBaseInfo.h"
3067357739SVang Thao #include "llvm/Analysis/OptimizationRemarkEmitter.h"
31ef736a1cSserge-sans-paille #include "llvm/BinaryFormat/ELF.h"
32989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFrameInfo.h"
3367357739SVang Thao #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
34ff98241fSMatt Arsenault #include "llvm/IR/DiagnosticInfo.h"
3533cb8f5bSTim Renouf #include "llvm/MC/MCAssembler.h"
3645bb48eaSTom Stellard #include "llvm/MC/MCContext.h"
3745bb48eaSTom Stellard #include "llvm/MC/MCSectionELF.h"
3845bb48eaSTom Stellard #include "llvm/MC/MCStreamer.h"
3989b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
406a87e9b0Sdfukalov #include "llvm/Support/AMDHSAKernelDescriptor.h"
41ef736a1cSserge-sans-paille #include "llvm/Support/TargetParser.h"
426054e650SDavid Blaikie #include "llvm/Target/TargetLoweringObjectFile.h"
436a87e9b0Sdfukalov #include "llvm/Target/TargetMachine.h"
4445bb48eaSTom Stellard
4545bb48eaSTom Stellard using namespace llvm;
46c3beb6a0SKonstantin Zhuravlyov using namespace llvm::AMDGPU;
4745bb48eaSTom Stellard
481024b73eSMatt Arsenault // This should get the default rounding mode from the kernel. We just set the
491024b73eSMatt Arsenault // default here, but this could change if the OpenCL rounding mode pragmas are
501024b73eSMatt Arsenault // used.
5145bb48eaSTom Stellard //
5245bb48eaSTom Stellard // The denormal mode here should match what is reported by the OpenCL runtime
5345bb48eaSTom Stellard // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
5445bb48eaSTom Stellard // can also be override to flush with the -cl-denorms-are-zero compiler flag.
5545bb48eaSTom Stellard //
5645bb48eaSTom Stellard // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
5745bb48eaSTom Stellard // precision, and leaves single precision to flush all and does not report
5845bb48eaSTom Stellard // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
5945bb48eaSTom Stellard // CL_FP_DENORM for both.
6045bb48eaSTom Stellard //
6145bb48eaSTom Stellard // FIXME: It seems some instructions do not support single precision denormals
6245bb48eaSTom Stellard // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
6345bb48eaSTom Stellard // and sin_f32, cos_f32 on most parts).
6445bb48eaSTom Stellard
6545bb48eaSTom Stellard // We want to use these instructions, and using fp32 denormals also causes
6645bb48eaSTom Stellard // instructions to run at the double precision rate for the device so it's
6745bb48eaSTom Stellard // probably best to just report no single precision denormals.
getFPMode(AMDGPU::SIModeRegisterDefaults Mode)68db0ed3e4SMatt Arsenault static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
6945bb48eaSTom Stellard return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
7045bb48eaSTom Stellard FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
711024b73eSMatt Arsenault FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
721024b73eSMatt Arsenault FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
7345bb48eaSTom Stellard }
7445bb48eaSTom Stellard
7545bb48eaSTom Stellard static AsmPrinter *
createAMDGPUAsmPrinterPass(TargetMachine & tm,std::unique_ptr<MCStreamer> && Streamer)7645bb48eaSTom Stellard createAMDGPUAsmPrinterPass(TargetMachine &tm,
7745bb48eaSTom Stellard std::unique_ptr<MCStreamer> &&Streamer) {
7845bb48eaSTom Stellard return new AMDGPUAsmPrinter(tm, std::move(Streamer));
7945bb48eaSTom Stellard }
8045bb48eaSTom Stellard
LLVMInitializeAMDGPUAsmPrinter()810dbcb363STom Stellard extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
82f42454b9SMehdi Amini TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
83c5015010STom Stellard llvm::createR600AsmPrinterPass);
84f42454b9SMehdi Amini TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
85f42454b9SMehdi Amini createAMDGPUAsmPrinterPass);
8645bb48eaSTom Stellard }
8745bb48eaSTom Stellard
AMDGPUAsmPrinter(TargetMachine & TM,std::unique_ptr<MCStreamer> Streamer)8845bb48eaSTom Stellard AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
8945bb48eaSTom Stellard std::unique_ptr<MCStreamer> Streamer)
901a14bfa0SYaxun Liu : AsmPrinter(TM, std::move(Streamer)) {
913fdf3b15SKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
923fdf3b15SKonstantin Zhuravlyov if (isHsaAbiVersion2(getGlobalSTI())) {
936a87e9b0Sdfukalov HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
94f4ace637SKonstantin Zhuravlyov } else if (isHsaAbiVersion3(getGlobalSTI())) {
956a87e9b0Sdfukalov HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
961194b9cdSChangpeng Fang } else if (isHsaAbiVersion5(getGlobalSTI())) {
971194b9cdSChangpeng Fang HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
98f4ace637SKonstantin Zhuravlyov } else {
99f4ace637SKonstantin Zhuravlyov HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
1003fdf3b15SKonstantin Zhuravlyov }
1013fdf3b15SKonstantin Zhuravlyov }
1021a14bfa0SYaxun Liu }
10345bb48eaSTom Stellard
getPassName() const104117296c0SMehdi Amini StringRef AMDGPUAsmPrinter::getPassName() const {
105f9245b75SMatt Arsenault return "AMDGPU Assembly Printer";
106f9245b75SMatt Arsenault }
107f9245b75SMatt Arsenault
getGlobalSTI() const1084cd9509eSMatt Arsenault const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {
1097498cd61SKonstantin Zhuravlyov return TM.getMCSubtargetInfo();
1107498cd61SKonstantin Zhuravlyov }
1117498cd61SKonstantin Zhuravlyov
getTargetStreamer() const1128c18f5b3SKonstantin Zhuravlyov AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
1138c18f5b3SKonstantin Zhuravlyov if (!OutStreamer)
1148c18f5b3SKonstantin Zhuravlyov return nullptr;
1158c18f5b3SKonstantin Zhuravlyov return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
1167498cd61SKonstantin Zhuravlyov }
1177498cd61SKonstantin Zhuravlyov
emitStartOfAsmFile(Module & M)1180dce409cSFangrui Song void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
1194a025622SSebastian Neubauer IsTargetStreamerInitialized = false;
1204a025622SSebastian Neubauer }
1214a025622SSebastian Neubauer
initTargetStreamer(Module & M)1224a025622SSebastian Neubauer void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
1234a025622SSebastian Neubauer IsTargetStreamerInitialized = true;
1244a025622SSebastian Neubauer
125f4ace637SKonstantin Zhuravlyov // TODO: Which one is called first, emitStartOfAsmFile or
126f4ace637SKonstantin Zhuravlyov // emitFunctionBodyStart?
127f4ace637SKonstantin Zhuravlyov if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
128f4ace637SKonstantin Zhuravlyov initializeTargetID(M);
12900f2cb11SKonstantin Zhuravlyov
130eda425edSKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
131eda425edSKonstantin Zhuravlyov TM.getTargetTriple().getOS() != Triple::AMDPAL)
132eda425edSKonstantin Zhuravlyov return;
133eda425edSKonstantin Zhuravlyov
1341194b9cdSChangpeng Fang if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
135f4ace637SKonstantin Zhuravlyov getTargetStreamer()->EmitDirectiveAMDGCNTarget();
136f4ace637SKonstantin Zhuravlyov
137eda425edSKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
138f4ace637SKonstantin Zhuravlyov HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
139eda425edSKonstantin Zhuravlyov
140eda425edSKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
141d737b551STim Renouf getTargetStreamer()->getPALMetadata()->readFromIR(M);
142eda425edSKonstantin Zhuravlyov
1431194b9cdSChangpeng Fang if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
144f5b36e56SScott Linder return;
145f5b36e56SScott Linder
146f4ace637SKonstantin Zhuravlyov // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
147eda425edSKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
1488c18f5b3SKonstantin Zhuravlyov getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
149eda425edSKonstantin Zhuravlyov
150f4ace637SKonstantin Zhuravlyov // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
1514cd9509eSMatt Arsenault IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
152f4ace637SKonstantin Zhuravlyov getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
15371e43ee4SKonstantin Zhuravlyov Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
1547498cd61SKonstantin Zhuravlyov }
1557498cd61SKonstantin Zhuravlyov
emitEndOfAsmFile(Module & M)1560dce409cSFangrui Song void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
1574a025622SSebastian Neubauer // Init target streamer if it has not yet happened
1584a025622SSebastian Neubauer if (!IsTargetStreamerInitialized)
1594a025622SSebastian Neubauer initTargetStreamer(M);
1604a025622SSebastian Neubauer
1618c18f5b3SKonstantin Zhuravlyov // Following code requires TargetStreamer to be present.
1628c18f5b3SKonstantin Zhuravlyov if (!getTargetStreamer())
1638c18f5b3SKonstantin Zhuravlyov return;
1648c18f5b3SKonstantin Zhuravlyov
1653fdf3b15SKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
166f4ace637SKonstantin Zhuravlyov isHsaAbiVersion2(getGlobalSTI()))
167f4ace637SKonstantin Zhuravlyov getTargetStreamer()->EmitISAVersion();
1689c05b2bcSKonstantin Zhuravlyov
1699c05b2bcSKonstantin Zhuravlyov // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
170f4ace637SKonstantin Zhuravlyov // Emit HSA Metadata (NT_AMD_HSA_METADATA).
1719c05b2bcSKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
172f5b36e56SScott Linder HSAMetadataStream->end();
173f5b36e56SScott Linder bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
174f5b36e56SScott Linder (void)Success;
175f5b36e56SScott Linder assert(Success && "Malformed HSA Metadata");
1769c05b2bcSKonstantin Zhuravlyov }
177f5b36e56SScott Linder }
178f4218372STom Stellard
isBlockOnlyReachableByFallthrough(const MachineBasicBlock * MBB) const1796bc43d86SMatt Arsenault bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
1806bc43d86SMatt Arsenault const MachineBasicBlock *MBB) const {
1816bc43d86SMatt Arsenault if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
1826bc43d86SMatt Arsenault return false;
1836bc43d86SMatt Arsenault
1846bc43d86SMatt Arsenault if (MBB->empty())
1856bc43d86SMatt Arsenault return true;
1866bc43d86SMatt Arsenault
1876bc43d86SMatt Arsenault // If this is a block implementing a long branch, an expression relative to
1886bc43d86SMatt Arsenault // the start of the block is needed. to the start of the block.
1896bc43d86SMatt Arsenault // XXX - Is there a smarter way to check this?
1906bc43d86SMatt Arsenault return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
1916bc43d86SMatt Arsenault }
1926bc43d86SMatt Arsenault
emitFunctionBodyStart()1930dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionBodyStart() {
19400f2cb11SKonstantin Zhuravlyov const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
195f4ace637SKonstantin Zhuravlyov const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
196f4ace637SKonstantin Zhuravlyov const Function &F = MF->getFunction();
197f4ace637SKonstantin Zhuravlyov
198f4ace637SKonstantin Zhuravlyov // TODO: Which one is called first, emitStartOfAsmFile or
199f4ace637SKonstantin Zhuravlyov // emitFunctionBodyStart?
200f4ace637SKonstantin Zhuravlyov if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
201f4ace637SKonstantin Zhuravlyov initializeTargetID(*F.getParent());
202f4ace637SKonstantin Zhuravlyov
203f4ace637SKonstantin Zhuravlyov const auto &FunctionTargetID = STM.getTargetID();
204f4ace637SKonstantin Zhuravlyov // Make sure function's xnack settings are compatible with module's
205f4ace637SKonstantin Zhuravlyov // xnack settings.
206f4ace637SKonstantin Zhuravlyov if (FunctionTargetID.isXnackSupported() &&
207f4ace637SKonstantin Zhuravlyov FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
208f4ace637SKonstantin Zhuravlyov FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
209f4ace637SKonstantin Zhuravlyov OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
210f4ace637SKonstantin Zhuravlyov "' function does not match module xnack setting");
211f4ace637SKonstantin Zhuravlyov return;
212f4ace637SKonstantin Zhuravlyov }
213f4ace637SKonstantin Zhuravlyov // Make sure function's sramecc settings are compatible with module's
214f4ace637SKonstantin Zhuravlyov // sramecc settings.
215f4ace637SKonstantin Zhuravlyov if (FunctionTargetID.isSramEccSupported() &&
216f4ace637SKonstantin Zhuravlyov FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
217f4ace637SKonstantin Zhuravlyov FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
218f4ace637SKonstantin Zhuravlyov OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
219f4ace637SKonstantin Zhuravlyov "' function does not match module sramecc setting");
220f4ace637SKonstantin Zhuravlyov return;
221f4ace637SKonstantin Zhuravlyov }
222f4ace637SKonstantin Zhuravlyov
22300f2cb11SKonstantin Zhuravlyov if (!MFI.isEntryFunction())
22400f2cb11SKonstantin Zhuravlyov return;
225021a218dSMatt Arsenault
2263fdf3b15SKonstantin Zhuravlyov if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
2274bec7d42SMatt Arsenault (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
2284bec7d42SMatt Arsenault F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
229ca0e7f64SKonstantin Zhuravlyov amd_kernel_code_t KernelCode;
230b03dd8daSMatt Arsenault getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
2318c18f5b3SKonstantin Zhuravlyov getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
232f151a45cSTom Stellard }
2337498cd61SKonstantin Zhuravlyov
234f5b36e56SScott Linder if (STM.isAmdHsaOS())
235f5b36e56SScott Linder HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
236f151a45cSTom Stellard }
237f151a45cSTom Stellard
emitFunctionBodyEnd()2380dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
23900f2cb11SKonstantin Zhuravlyov const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
24000f2cb11SKonstantin Zhuravlyov if (!MFI.isEntryFunction())
24100f2cb11SKonstantin Zhuravlyov return;
2424cd9509eSMatt Arsenault
2433fdf3b15SKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
2443fdf3b15SKonstantin Zhuravlyov isHsaAbiVersion2(getGlobalSTI()))
24500f2cb11SKonstantin Zhuravlyov return;
24600f2cb11SKonstantin Zhuravlyov
247ce25bc3eSKonstantin Zhuravlyov auto &Streamer = getTargetStreamer()->getStreamer();
248ce25bc3eSKonstantin Zhuravlyov auto &Context = Streamer.getContext();
249ce25bc3eSKonstantin Zhuravlyov auto &ObjectFileInfo = *Context.getObjectFileInfo();
250ce25bc3eSKonstantin Zhuravlyov auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
251ce25bc3eSKonstantin Zhuravlyov
25215d82c62SFangrui Song Streamer.pushSection();
253adf4142fSFangrui Song Streamer.switchSection(&ReadOnlySection);
254ce25bc3eSKonstantin Zhuravlyov
255ce25bc3eSKonstantin Zhuravlyov // CP microcode requires the kernel descriptor to be allocated on 64 byte
256ce25bc3eSKonstantin Zhuravlyov // alignment.
2576d2d589bSFangrui Song Streamer.emitValueToAlignment(64, 0, 1, 0);
258ce25bc3eSKonstantin Zhuravlyov if (ReadOnlySection.getAlignment() < 64)
25918f805a7SGuillaume Chatelet ReadOnlySection.setAlignment(Align(64));
260ce25bc3eSKonstantin Zhuravlyov
261f4ace637SKonstantin Zhuravlyov const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
2624cd9509eSMatt Arsenault
26300f2cb11SKonstantin Zhuravlyov SmallString<128> KernelName;
26400f2cb11SKonstantin Zhuravlyov getNameWithPrefix(KernelName, &MF->getFunction());
26500f2cb11SKonstantin Zhuravlyov getTargetStreamer()->EmitAmdhsaKernelDescriptor(
266f4ace637SKonstantin Zhuravlyov STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
2671e8c2c70SScott Linder CurrentProgramInfo.NumVGPRsForWavesPerEU,
2681e8c2c70SScott Linder CurrentProgramInfo.NumSGPRsForWavesPerEU -
269f4ace637SKonstantin Zhuravlyov IsaInfo::getNumExtraSGPRs(&STM,
2701e8c2c70SScott Linder CurrentProgramInfo.VCCUsed,
2711e8c2c70SScott Linder CurrentProgramInfo.FlatUsed),
272f4ace637SKonstantin Zhuravlyov CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
273ce25bc3eSKonstantin Zhuravlyov
27415d82c62SFangrui Song Streamer.popSection();
27500f2cb11SKonstantin Zhuravlyov }
27600f2cb11SKonstantin Zhuravlyov
emitFunctionEntryLabel()2770dce409cSFangrui Song void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
2783fdf3b15SKonstantin Zhuravlyov if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
2791194b9cdSChangpeng Fang isHsaAbiVersion3AndAbove(getGlobalSTI())) {
2800dce409cSFangrui Song AsmPrinter::emitFunctionEntryLabel();
28100f2cb11SKonstantin Zhuravlyov return;
28200f2cb11SKonstantin Zhuravlyov }
28300f2cb11SKonstantin Zhuravlyov
2841e1b05dbSTom Stellard const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2855bfbae5cSTom Stellard const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
286aa067cb9SKonstantin Zhuravlyov if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
2871b9748c6STom Stellard SmallString<128> SymbolName;
288f1caa283SMatthias Braun getNameWithPrefix(SymbolName, &MF->getFunction()),
2898c18f5b3SKonstantin Zhuravlyov getTargetStreamer()->EmitAMDGPUSymbolType(
2907498cd61SKonstantin Zhuravlyov SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
2911e1b05dbSTom Stellard }
29233cb8f5bSTim Renouf if (DumpCodeInstEmitter) {
293cead41d4STim Renouf // Disassemble function name label to text.
294f1caa283SMatthias Braun DisasmLines.push_back(MF->getName().str() + ":");
295cead41d4STim Renouf DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
296cead41d4STim Renouf HexLines.push_back("");
297cead41d4STim Renouf }
2981e1b05dbSTom Stellard
2990dce409cSFangrui Song AsmPrinter::emitFunctionEntryLabel();
3001e1b05dbSTom Stellard }
3011e1b05dbSTom Stellard
emitBasicBlockStart(const MachineBasicBlock & MBB)3020dce409cSFangrui Song void AMDGPUAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
30333cb8f5bSTim Renouf if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
304cead41d4STim Renouf // Write a line for the basic block label if it is not only fallthrough.
305cead41d4STim Renouf DisasmLines.push_back(
306cead41d4STim Renouf (Twine("BB") + Twine(getFunctionNumber())
307cead41d4STim Renouf + "_" + Twine(MBB.getNumber()) + ":").str());
308cead41d4STim Renouf DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
309cead41d4STim Renouf HexLines.push_back("");
310cead41d4STim Renouf }
3110dce409cSFangrui Song AsmPrinter::emitBasicBlockStart(MBB);
312cead41d4STim Renouf }
313cead41d4STim Renouf
emitGlobalVariable(const GlobalVariable * GV)3141d49eb00SFangrui Song void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
3152710171aSNicolai Haehnle if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
3162710171aSNicolai Haehnle if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
3172710171aSNicolai Haehnle OutContext.reportError({},
3182710171aSNicolai Haehnle Twine(GV->getName()) +
3192710171aSNicolai Haehnle ": unsupported initializer for address space");
32000f2f91aSTom Stellard return;
3212710171aSNicolai Haehnle }
3222710171aSNicolai Haehnle
3232710171aSNicolai Haehnle // LDS variables aren't emitted in HSA or PAL yet.
3242710171aSNicolai Haehnle const Triple::OSType OS = TM.getTargetTriple().getOS();
3252710171aSNicolai Haehnle if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
3262710171aSNicolai Haehnle return;
3272710171aSNicolai Haehnle
3282710171aSNicolai Haehnle MCSymbol *GVSym = getSymbol(GV);
3292710171aSNicolai Haehnle
3302710171aSNicolai Haehnle GVSym->redefineIfPossible();
3312710171aSNicolai Haehnle if (GVSym->isDefined() || GVSym->isVariable())
3322710171aSNicolai Haehnle report_fatal_error("symbol '" + Twine(GVSym->getName()) +
3332710171aSNicolai Haehnle "' is already defined");
3342710171aSNicolai Haehnle
3352710171aSNicolai Haehnle const DataLayout &DL = GV->getParent()->getDataLayout();
3362710171aSNicolai Haehnle uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
337129b531cSKazu Hirata Align Alignment = GV->getAlign().value_or(Align(4));
3382710171aSNicolai Haehnle
3390bc77a0fSFangrui Song emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
3400bc77a0fSFangrui Song emitLinkage(GV, GVSym);
341f0a665afSMichael Liao if (auto TS = getTargetStreamer())
34252911428SGuillaume Chatelet TS->emitAMDGPULDS(GVSym, Size, Alignment);
3432710171aSNicolai Haehnle return;
3442710171aSNicolai Haehnle }
34500f2f91aSTom Stellard
3461d49eb00SFangrui Song AsmPrinter::emitGlobalVariable(GV);
347e3b5aeafSTom Stellard }
348e3b5aeafSTom Stellard
doFinalization(Module & M)349b03dd8daSMatt Arsenault bool AMDGPUAsmPrinter::doFinalization(Module &M) {
350ae4fcb97SNicolai Haehnle // Pad with s_code_end to help tools and guard against instruction prefetch
351ae4fcb97SNicolai Haehnle // causing stale data in caches. Arguably this should be done by the linker,
352ae4fcb97SNicolai Haehnle // which is why this isn't done for Mesa.
353ae4fcb97SNicolai Haehnle const MCSubtargetInfo &STI = *getGlobalSTI();
354a8d9d507SStanislav Mekhanoshin if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
355ae4fcb97SNicolai Haehnle (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
356ae4fcb97SNicolai Haehnle STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
357adf4142fSFangrui Song OutStreamer->switchSection(getObjFileLowering().getTextSection());
358a8d9d507SStanislav Mekhanoshin getTargetStreamer()->EmitCodeEnd(STI);
35941bbe101SStanislav Mekhanoshin }
36041bbe101SStanislav Mekhanoshin
361b03dd8daSMatt Arsenault return AsmPrinter::doFinalization(M);
362b03dd8daSMatt Arsenault }
363b03dd8daSMatt Arsenault
364b03dd8daSMatt Arsenault // Print comments that apply to both callable functions and entry points.
emitCommonFunctionComments(uint32_t NumVGPR,Optional<uint32_t> NumAGPR,uint32_t TotalNumVGPR,uint32_t NumSGPR,uint64_t ScratchSize,uint64_t CodeSize,const AMDGPUMachineFunction * MFI)365b03dd8daSMatt Arsenault void AMDGPUAsmPrinter::emitCommonFunctionComments(
366b03dd8daSMatt Arsenault uint32_t NumVGPR,
367075bc48aSStanislav Mekhanoshin Optional<uint32_t> NumAGPR,
368075bc48aSStanislav Mekhanoshin uint32_t TotalNumVGPR,
369b03dd8daSMatt Arsenault uint32_t NumSGPR,
3709ba465a9SMatt Arsenault uint64_t ScratchSize,
3711c538423SStanislav Mekhanoshin uint64_t CodeSize,
3721c538423SStanislav Mekhanoshin const AMDGPUMachineFunction *MFI) {
373b03dd8daSMatt Arsenault OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
374b03dd8daSMatt Arsenault OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
375b03dd8daSMatt Arsenault OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
376075bc48aSStanislav Mekhanoshin if (NumAGPR) {
377075bc48aSStanislav Mekhanoshin OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
378075bc48aSStanislav Mekhanoshin OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
379075bc48aSStanislav Mekhanoshin false);
380075bc48aSStanislav Mekhanoshin }
381b03dd8daSMatt Arsenault OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
3821c538423SStanislav Mekhanoshin OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
3831c538423SStanislav Mekhanoshin false);
384b03dd8daSMatt Arsenault }
385b03dd8daSMatt Arsenault
getAmdhsaKernelCodeProperties(const MachineFunction & MF) const38600f2cb11SKonstantin Zhuravlyov uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
38700f2cb11SKonstantin Zhuravlyov const MachineFunction &MF) const {
38800f2cb11SKonstantin Zhuravlyov const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
38900f2cb11SKonstantin Zhuravlyov uint16_t KernelCodeProperties = 0;
39000f2cb11SKonstantin Zhuravlyov
39100f2cb11SKonstantin Zhuravlyov if (MFI.hasPrivateSegmentBuffer()) {
39200f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
39300f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
39400f2cb11SKonstantin Zhuravlyov }
39500f2cb11SKonstantin Zhuravlyov if (MFI.hasDispatchPtr()) {
39600f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
39700f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
39800f2cb11SKonstantin Zhuravlyov }
3990f20a35bSChangpeng Fang if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
40000f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
40100f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
40200f2cb11SKonstantin Zhuravlyov }
40300f2cb11SKonstantin Zhuravlyov if (MFI.hasKernargSegmentPtr()) {
40400f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
40500f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
40600f2cb11SKonstantin Zhuravlyov }
40700f2cb11SKonstantin Zhuravlyov if (MFI.hasDispatchID()) {
40800f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
40900f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
41000f2cb11SKonstantin Zhuravlyov }
41100f2cb11SKonstantin Zhuravlyov if (MFI.hasFlatScratchInit()) {
41200f2cb11SKonstantin Zhuravlyov KernelCodeProperties |=
41300f2cb11SKonstantin Zhuravlyov amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
41400f2cb11SKonstantin Zhuravlyov }
4155d00c306SStanislav Mekhanoshin if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
4165d00c306SStanislav Mekhanoshin KernelCodeProperties |=
4175d00c306SStanislav Mekhanoshin amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
4185d00c306SStanislav Mekhanoshin }
41900f2cb11SKonstantin Zhuravlyov
420*d96361d7SAbinav Puthan Purayil if (CurrentProgramInfo.DynamicCallStack) {
421*d96361d7SAbinav Puthan Purayil KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
422*d96361d7SAbinav Puthan Purayil }
423*d96361d7SAbinav Puthan Purayil
42400f2cb11SKonstantin Zhuravlyov return KernelCodeProperties;
42500f2cb11SKonstantin Zhuravlyov }
42600f2cb11SKonstantin Zhuravlyov
getAmdhsaKernelDescriptor(const MachineFunction & MF,const SIProgramInfo & PI) const42700f2cb11SKonstantin Zhuravlyov amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
42800f2cb11SKonstantin Zhuravlyov const MachineFunction &MF,
42900f2cb11SKonstantin Zhuravlyov const SIProgramInfo &PI) const {
430a8d9d507SStanislav Mekhanoshin const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
431f4ace637SKonstantin Zhuravlyov const Function &F = MF.getFunction();
432f4ace637SKonstantin Zhuravlyov
43300f2cb11SKonstantin Zhuravlyov amdhsa::kernel_descriptor_t KernelDescriptor;
43400f2cb11SKonstantin Zhuravlyov memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
43500f2cb11SKonstantin Zhuravlyov
43600f2cb11SKonstantin Zhuravlyov assert(isUInt<32>(PI.ScratchSize));
4371124bf4aSSebastian Neubauer assert(isUInt<32>(PI.getComputePGMRSrc1()));
43800f2cb11SKonstantin Zhuravlyov assert(isUInt<32>(PI.ComputePGMRSrc2));
43900f2cb11SKonstantin Zhuravlyov
44000f2cb11SKonstantin Zhuravlyov KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
44100f2cb11SKonstantin Zhuravlyov KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
442f4ace637SKonstantin Zhuravlyov
443f4ace637SKonstantin Zhuravlyov Align MaxKernArgAlign;
444f4ace637SKonstantin Zhuravlyov KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
445f4ace637SKonstantin Zhuravlyov
4461124bf4aSSebastian Neubauer KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
44700f2cb11SKonstantin Zhuravlyov KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
44800f2cb11SKonstantin Zhuravlyov KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
44900f2cb11SKonstantin Zhuravlyov
450a8d9d507SStanislav Mekhanoshin assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
451a8d9d507SStanislav Mekhanoshin if (STM.hasGFX90AInsts())
452a8d9d507SStanislav Mekhanoshin KernelDescriptor.compute_pgm_rsrc3 =
453a8d9d507SStanislav Mekhanoshin CurrentProgramInfo.ComputePGMRSrc3GFX90A;
454a8d9d507SStanislav Mekhanoshin
45500f2cb11SKonstantin Zhuravlyov return KernelDescriptor;
45600f2cb11SKonstantin Zhuravlyov }
45700f2cb11SKonstantin Zhuravlyov
runOnMachineFunction(MachineFunction & MF)45845bb48eaSTom Stellard bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
4594a025622SSebastian Neubauer // Init target streamer lazily on the first function so that previous passes
4604a025622SSebastian Neubauer // can set metadata.
4614a025622SSebastian Neubauer if (!IsTargetStreamerInitialized)
4624a025622SSebastian Neubauer initTargetStreamer(*MF.getFunction().getParent());
4634a025622SSebastian Neubauer
4642b08f6afSSebastian Neubauer ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
465b03dd8daSMatt Arsenault CurrentProgramInfo = SIProgramInfo();
466b03dd8daSMatt Arsenault
4676cb7b8a4SMatt Arsenault const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
46845bb48eaSTom Stellard
46945bb48eaSTom Stellard // The starting address of all shader programs must be 256 bytes aligned.
4706cb7b8a4SMatt Arsenault // Regular functions just need the basic required instruction alignment.
47118f805a7SGuillaume Chatelet MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
47245bb48eaSTom Stellard
47345bb48eaSTom Stellard SetupMachineFunction(MF);
47445bb48eaSTom Stellard
4755bfbae5cSTom Stellard const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
47645bb48eaSTom Stellard MCContext &Context = getObjFileLowering().getContext();
477807ecc3dSTim Renouf // FIXME: This should be an explicit check for Mesa.
478807ecc3dSTim Renouf if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
47945bb48eaSTom Stellard MCSectionELF *ConfigSection =
48045bb48eaSTom Stellard Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
481adf4142fSFangrui Song OutStreamer->switchSection(ConfigSection);
48267a6d540SKonstantin Zhuravlyov }
48345bb48eaSTom Stellard
4845733167fSSebastian Neubauer if (MFI->isModuleEntryFunction()) {
485b03dd8daSMatt Arsenault getSIProgramInfo(CurrentProgramInfo, MF);
486b03dd8daSMatt Arsenault }
487b03dd8daSMatt Arsenault
488edd67564SSebastian Neubauer if (STM.isAmdPalOS()) {
489edd67564SSebastian Neubauer if (MFI->isEntryFunction())
490c3beb6a0SKonstantin Zhuravlyov EmitPALMetadata(MF, CurrentProgramInfo);
4915733167fSSebastian Neubauer else if (MFI->isModuleEntryFunction())
492edd67564SSebastian Neubauer emitPALFunctionMetadata(MF);
493edd67564SSebastian Neubauer } else if (!STM.isAmdHsaOS()) {
494b03dd8daSMatt Arsenault EmitProgramInfoSI(MF, CurrentProgramInfo);
495f151a45cSTom Stellard }
49645bb48eaSTom Stellard
4978479240bSJay Foad DumpCodeInstEmitter = nullptr;
4988479240bSJay Foad if (STM.dumpCode()) {
4998479240bSJay Foad // For -dumpcode, get the assembler out of the streamer, even if it does
5008479240bSJay Foad // not really want to let us have it. This only works with -filetype=obj.
5018479240bSJay Foad bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
5028479240bSJay Foad OutStreamer->setUseAssemblerInfoForParsing(true);
5038479240bSJay Foad MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
5048479240bSJay Foad OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
5058479240bSJay Foad if (Assembler)
5068479240bSJay Foad DumpCodeInstEmitter = Assembler->getEmitterPtr();
5078479240bSJay Foad }
5088479240bSJay Foad
50945bb48eaSTom Stellard DisasmLines.clear();
51045bb48eaSTom Stellard HexLines.clear();
51145bb48eaSTom Stellard DisasmLineMaxLen = 0;
51245bb48eaSTom Stellard
5130dce409cSFangrui Song emitFunctionBody();
51445bb48eaSTom Stellard
51567357739SVang Thao emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
51667357739SVang Thao STM.hasMAIInsts());
51767357739SVang Thao
51845bb48eaSTom Stellard if (isVerbose()) {
51945bb48eaSTom Stellard MCSectionELF *CommentSection =
52045bb48eaSTom Stellard Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
521adf4142fSFangrui Song OutStreamer->switchSection(CommentSection);
52245bb48eaSTom Stellard
523b03dd8daSMatt Arsenault if (!MFI->isEntryFunction()) {
524021a218dSMatt Arsenault OutStreamer->emitRawComment(" Function info:", false);
5252b08f6afSSebastian Neubauer const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
5262b08f6afSSebastian Neubauer ResourceUsage->getResourceInfo(&MF.getFunction());
527b03dd8daSMatt Arsenault emitCommonFunctionComments(
528b03dd8daSMatt Arsenault Info.NumVGPR,
529075bc48aSStanislav Mekhanoshin STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
530075bc48aSStanislav Mekhanoshin Info.getTotalNumVGPRs(STM),
5315bfbae5cSTom Stellard Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
532b03dd8daSMatt Arsenault Info.PrivateSegmentSize,
5331c538423SStanislav Mekhanoshin getFunctionCodeSize(MF), MFI);
534b03dd8daSMatt Arsenault return false;
535021a218dSMatt Arsenault }
536021a218dSMatt Arsenault
537b03dd8daSMatt Arsenault OutStreamer->emitRawComment(" Kernel info:", false);
538075bc48aSStanislav Mekhanoshin emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
539075bc48aSStanislav Mekhanoshin STM.hasMAIInsts()
540075bc48aSStanislav Mekhanoshin ? CurrentProgramInfo.NumAccVGPR
541075bc48aSStanislav Mekhanoshin : Optional<uint32_t>(),
542075bc48aSStanislav Mekhanoshin CurrentProgramInfo.NumVGPR,
543b03dd8daSMatt Arsenault CurrentProgramInfo.NumSGPR,
544b03dd8daSMatt Arsenault CurrentProgramInfo.ScratchSize,
5451c538423SStanislav Mekhanoshin getFunctionCodeSize(MF), MFI);
546b03dd8daSMatt Arsenault
547b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
548b03dd8daSMatt Arsenault " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
549b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
550b03dd8daSMatt Arsenault " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
551b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
552b03dd8daSMatt Arsenault " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
553fd8ab09cSMatt Arsenault " bytes/workgroup (compile time only)", false);
554d41c0dbfSMatt Arsenault
555b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
556b03dd8daSMatt Arsenault " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
557b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
558b03dd8daSMatt Arsenault " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
559021a218dSMatt Arsenault
560b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
561b03dd8daSMatt Arsenault " NumSGPRsForWavesPerEU: " +
562b03dd8daSMatt Arsenault Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
563b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
564b03dd8daSMatt Arsenault " NumVGPRsForWavesPerEU: " +
565b03dd8daSMatt Arsenault Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
5661d65026cSKonstantin Zhuravlyov
567a8d9d507SStanislav Mekhanoshin if (STM.hasGFX90AInsts())
568a8d9d507SStanislav Mekhanoshin OutStreamer->emitRawComment(
569a8d9d507SStanislav Mekhanoshin " AccumOffset: " +
570a8d9d507SStanislav Mekhanoshin Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
571a8d9d507SStanislav Mekhanoshin
572b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
5732594fa85SStanislav Mekhanoshin " Occupancy: " +
5742594fa85SStanislav Mekhanoshin Twine(CurrentProgramInfo.Occupancy), false);
5752594fa85SStanislav Mekhanoshin
5762594fa85SStanislav Mekhanoshin OutStreamer->emitRawComment(
5771c538423SStanislav Mekhanoshin " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
5781c538423SStanislav Mekhanoshin
579b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
580eb66bf08SStanislav Mekhanoshin " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
581eb66bf08SStanislav Mekhanoshin Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
582eb66bf08SStanislav Mekhanoshin OutStreamer->emitRawComment(
583b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:USER_SGPR: " +
584b03dd8daSMatt Arsenault Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
585b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
586b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
587b03dd8daSMatt Arsenault Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
588b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
589b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
590b03dd8daSMatt Arsenault Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
591b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
592b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
593b03dd8daSMatt Arsenault Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
594b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
595b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
596b03dd8daSMatt Arsenault Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
597b03dd8daSMatt Arsenault OutStreamer->emitRawComment(
598b03dd8daSMatt Arsenault " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
599b03dd8daSMatt Arsenault Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
600d41c0dbfSMatt Arsenault false);
601a8d9d507SStanislav Mekhanoshin
602a8d9d507SStanislav Mekhanoshin assert(STM.hasGFX90AInsts() ||
603a8d9d507SStanislav Mekhanoshin CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
604a8d9d507SStanislav Mekhanoshin if (STM.hasGFX90AInsts()) {
605a8d9d507SStanislav Mekhanoshin OutStreamer->emitRawComment(
606a8d9d507SStanislav Mekhanoshin " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
607a8d9d507SStanislav Mekhanoshin Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
608a8d9d507SStanislav Mekhanoshin amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
609a8d9d507SStanislav Mekhanoshin false);
610a8d9d507SStanislav Mekhanoshin OutStreamer->emitRawComment(
611a8d9d507SStanislav Mekhanoshin " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
612a8d9d507SStanislav Mekhanoshin Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
613a8d9d507SStanislav Mekhanoshin amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
614a8d9d507SStanislav Mekhanoshin false);
615a8d9d507SStanislav Mekhanoshin }
61645bb48eaSTom Stellard }
61745bb48eaSTom Stellard
61833cb8f5bSTim Renouf if (DumpCodeInstEmitter) {
61945bb48eaSTom Stellard
620adf4142fSFangrui Song OutStreamer->switchSection(
6214327a9b4SSebastian Neubauer Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
62245bb48eaSTom Stellard
62345bb48eaSTom Stellard for (size_t i = 0; i < DisasmLines.size(); ++i) {
624cead41d4STim Renouf std::string Comment = "\n";
625cead41d4STim Renouf if (!HexLines[i].empty()) {
626cead41d4STim Renouf Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
62745bb48eaSTom Stellard Comment += " ; " + HexLines[i] + "\n";
628cead41d4STim Renouf }
62945bb48eaSTom Stellard
630a55daa14SFangrui Song OutStreamer->emitBytes(StringRef(DisasmLines[i]));
631a55daa14SFangrui Song OutStreamer->emitBytes(StringRef(Comment));
63245bb48eaSTom Stellard }
63345bb48eaSTom Stellard }
63445bb48eaSTom Stellard
63545bb48eaSTom Stellard return false;
63645bb48eaSTom Stellard }
63745bb48eaSTom Stellard
638f4ace637SKonstantin Zhuravlyov // TODO: Fold this into emitFunctionBodyStart.
initializeTargetID(const Module & M)639f4ace637SKonstantin Zhuravlyov void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
640f4ace637SKonstantin Zhuravlyov // In the beginning all features are either 'Any' or 'NotSupported',
641f4ace637SKonstantin Zhuravlyov // depending on global target features. This will cover empty modules.
642f4ace637SKonstantin Zhuravlyov getTargetStreamer()->initializeTargetID(
643f4ace637SKonstantin Zhuravlyov *getGlobalSTI(), getGlobalSTI()->getFeatureString());
644f4ace637SKonstantin Zhuravlyov
645f4ace637SKonstantin Zhuravlyov // If module is empty, we are done.
646f4ace637SKonstantin Zhuravlyov if (M.empty())
647f4ace637SKonstantin Zhuravlyov return;
648f4ace637SKonstantin Zhuravlyov
649f4ace637SKonstantin Zhuravlyov // If module is not empty, need to find first 'Off' or 'On' feature
650f4ace637SKonstantin Zhuravlyov // setting per feature from functions in module.
651f4ace637SKonstantin Zhuravlyov for (auto &F : M) {
652f4ace637SKonstantin Zhuravlyov auto &TSTargetID = getTargetStreamer()->getTargetID();
653f4ace637SKonstantin Zhuravlyov if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
654f4ace637SKonstantin Zhuravlyov (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
655f4ace637SKonstantin Zhuravlyov break;
656f4ace637SKonstantin Zhuravlyov
657f4ace637SKonstantin Zhuravlyov const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
658f4ace637SKonstantin Zhuravlyov const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
659f4ace637SKonstantin Zhuravlyov if (TSTargetID->isXnackSupported())
660f4ace637SKonstantin Zhuravlyov if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
661f4ace637SKonstantin Zhuravlyov TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
662f4ace637SKonstantin Zhuravlyov if (TSTargetID->isSramEccSupported())
663f4ace637SKonstantin Zhuravlyov if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
664f4ace637SKonstantin Zhuravlyov TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
665f4ace637SKonstantin Zhuravlyov }
666f4ace637SKonstantin Zhuravlyov }
667f4ace637SKonstantin Zhuravlyov
getFunctionCodeSize(const MachineFunction & MF) const668a3566f21SMatt Arsenault uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
6695bfbae5cSTom Stellard const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
67043e92fe3SMatt Arsenault const SIInstrInfo *TII = STM.getInstrInfo();
67145bb48eaSTom Stellard
672a3566f21SMatt Arsenault uint64_t CodeSize = 0;
673a3566f21SMatt Arsenault
67445bb48eaSTom Stellard for (const MachineBasicBlock &MBB : MF) {
67545bb48eaSTom Stellard for (const MachineInstr &MI : MBB) {
67645bb48eaSTom Stellard // TODO: CodeSize should account for multiple functions.
677c5746865SMatt Arsenault
678c5746865SMatt Arsenault // TODO: Should we count size of debug info?
679801bf7ebSShiva Chen if (MI.isDebugInstr())
680c5746865SMatt Arsenault continue;
681c5746865SMatt Arsenault
682a9720c67SMatt Arsenault CodeSize += TII->getInstSizeInBytes(MI);
683a3566f21SMatt Arsenault }
684a3566f21SMatt Arsenault }
68545bb48eaSTom Stellard
686a3566f21SMatt Arsenault return CodeSize;
687a3566f21SMatt Arsenault }
68845bb48eaSTom Stellard
getSIProgramInfo(SIProgramInfo & ProgInfo,const MachineFunction & MF)689b03dd8daSMatt Arsenault void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
690b03dd8daSMatt Arsenault const MachineFunction &MF) {
6912b08f6afSSebastian Neubauer const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
6922b08f6afSSebastian Neubauer ResourceUsage->getResourceInfo(&MF.getFunction());
693075bc48aSStanislav Mekhanoshin const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
694b03dd8daSMatt Arsenault
695075bc48aSStanislav Mekhanoshin ProgInfo.NumArchVGPR = Info.NumVGPR;
696075bc48aSStanislav Mekhanoshin ProgInfo.NumAccVGPR = Info.NumAGPR;
697075bc48aSStanislav Mekhanoshin ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
698a8d9d507SStanislav Mekhanoshin ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
699a8d9d507SStanislav Mekhanoshin ProgInfo.TgSplit = STM.isTgSplitEnabled();
700b03dd8daSMatt Arsenault ProgInfo.NumSGPR = Info.NumExplicitSGPR;
701b03dd8daSMatt Arsenault ProgInfo.ScratchSize = Info.PrivateSegmentSize;
702b03dd8daSMatt Arsenault ProgInfo.VCCUsed = Info.UsesVCC;
703b03dd8daSMatt Arsenault ProgInfo.FlatUsed = Info.UsesFlatScratch;
704b03dd8daSMatt Arsenault ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
705b03dd8daSMatt Arsenault
7061ed4caffSMatt Arsenault const uint64_t MaxScratchPerWorkitem =
707ff85d61aSJay Foad STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
7081ed4caffSMatt Arsenault if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
709f1caa283SMatthias Braun DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
710e4223438SArthur Eubanks ProgInfo.ScratchSize,
711e4223438SArthur Eubanks MaxScratchPerWorkitem, DS_Error);
712f1caa283SMatthias Braun MF.getFunction().getContext().diagnose(DiagStackSize);
7139ba465a9SMatt Arsenault }
7149ba465a9SMatt Arsenault
715b03dd8daSMatt Arsenault const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
716b03dd8daSMatt Arsenault
7173c874ce4Smadhur13490 // The calculations related to SGPR/VGPR blocks are
7181e8c2c70SScott Linder // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
7191e8c2c70SScott Linder // unified.
7201e8c2c70SScott Linder unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
7214cd9509eSMatt Arsenault &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
722f2f3d147SKonstantin Zhuravlyov
72391f22fbfSMarek Olsak // Check the addressable register limit before we add ExtraSGPRs.
72491f22fbfSMarek Olsak if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
72591f22fbfSMarek Olsak !STM.hasSGPRInitBug()) {
726e03b1d7bSKonstantin Zhuravlyov unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
727a3566f21SMatt Arsenault if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
72891f22fbfSMarek Olsak // This can happen due to a compiler bug or when using inline asm.
729f1caa283SMatthias Braun LLVMContext &Ctx = MF.getFunction().getContext();
730e4223438SArthur Eubanks DiagnosticInfoResourceLimit Diag(
731e4223438SArthur Eubanks MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
732e4223438SArthur Eubanks MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
73391f22fbfSMarek Olsak Ctx.diagnose(Diag);
734a3566f21SMatt Arsenault ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
73591f22fbfSMarek Olsak }
73691f22fbfSMarek Olsak }
73791f22fbfSMarek Olsak
7381d65026cSKonstantin Zhuravlyov // Account for extra SGPRs and VGPRs reserved for debugger use.
739a3566f21SMatt Arsenault ProgInfo.NumSGPR += ExtraSGPRs;
74045bb48eaSTom Stellard
74179f75468SMatt Arsenault const Function &F = MF.getFunction();
74279f75468SMatt Arsenault
743fd8d4af3STim Renouf // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
744fd8d4af3STim Renouf // dispatch registers are function args.
745fd8d4af3STim Renouf unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
74679f75468SMatt Arsenault
74779f75468SMatt Arsenault if (isShader(F.getCallingConv())) {
74869f7d81dSDavid Stuttard bool IsPixelShader =
74969f7d81dSDavid Stuttard F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
75069f7d81dSDavid Stuttard
75169f7d81dSDavid Stuttard // Calculate the number of VGPR registers based on the SPI input registers
75269f7d81dSDavid Stuttard uint32_t InputEna = 0;
75369f7d81dSDavid Stuttard uint32_t InputAddr = 0;
75469f7d81dSDavid Stuttard unsigned LastEna = 0;
75569f7d81dSDavid Stuttard
75669f7d81dSDavid Stuttard if (IsPixelShader) {
75769f7d81dSDavid Stuttard // Note for IsPixelShader:
75869f7d81dSDavid Stuttard // By this stage, all enabled inputs are tagged in InputAddr as well.
75969f7d81dSDavid Stuttard // We will use InputAddr to determine whether the input counts against the
76069f7d81dSDavid Stuttard // vgpr total and only use the InputEnable to determine the last input
76169f7d81dSDavid Stuttard // that is relevant - if extra arguments are used, then we have to honour
76269f7d81dSDavid Stuttard // the InputAddr for any intermediate non-enabled inputs.
76369f7d81dSDavid Stuttard InputEna = MFI->getPSInputEnable();
76469f7d81dSDavid Stuttard InputAddr = MFI->getPSInputAddr();
76569f7d81dSDavid Stuttard
76669f7d81dSDavid Stuttard // We only need to consider input args up to the last used arg.
76769f7d81dSDavid Stuttard assert((InputEna || InputAddr) &&
76869f7d81dSDavid Stuttard "PSInputAddr and PSInputEnable should "
76969f7d81dSDavid Stuttard "never both be 0 for AMDGPU_PS shaders");
77069f7d81dSDavid Stuttard // There are some rare circumstances where InputAddr is non-zero and
77169f7d81dSDavid Stuttard // InputEna can be set to 0. In this case we default to setting LastEna
77269f7d81dSDavid Stuttard // to 1.
77369f7d81dSDavid Stuttard LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
77469f7d81dSDavid Stuttard }
77569f7d81dSDavid Stuttard
77679f75468SMatt Arsenault // FIXME: We should be using the number of registers determined during
77779f75468SMatt Arsenault // calling convention lowering to legalize the types.
77879f75468SMatt Arsenault const DataLayout &DL = F.getParent()->getDataLayout();
77969f7d81dSDavid Stuttard unsigned PSArgCount = 0;
78069f7d81dSDavid Stuttard unsigned IntermediateVGPR = 0;
78179f75468SMatt Arsenault for (auto &Arg : F.args()) {
78279f75468SMatt Arsenault unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
78369f7d81dSDavid Stuttard if (Arg.hasAttribute(Attribute::InReg)) {
784fd8d4af3STim Renouf WaveDispatchNumSGPR += NumRegs;
78569f7d81dSDavid Stuttard } else {
78669f7d81dSDavid Stuttard // If this is a PS shader and we're processing the PS Input args (first
78769f7d81dSDavid Stuttard // 16 VGPR), use the InputEna and InputAddr bits to define how many
78869f7d81dSDavid Stuttard // VGPRs are actually used.
78969f7d81dSDavid Stuttard // Any extra VGPR arguments are handled as normal arguments (and
79069f7d81dSDavid Stuttard // contribute to the VGPR count whether they're used or not).
79169f7d81dSDavid Stuttard if (IsPixelShader && PSArgCount < 16) {
79269f7d81dSDavid Stuttard if ((1 << PSArgCount) & InputAddr) {
79369f7d81dSDavid Stuttard if (PSArgCount < LastEna)
79469f7d81dSDavid Stuttard WaveDispatchNumVGPR += NumRegs;
795fd8d4af3STim Renouf else
79669f7d81dSDavid Stuttard IntermediateVGPR += NumRegs;
79769f7d81dSDavid Stuttard }
79869f7d81dSDavid Stuttard PSArgCount++;
79969f7d81dSDavid Stuttard } else {
80069f7d81dSDavid Stuttard // If there are extra arguments we have to include the allocation for
80169f7d81dSDavid Stuttard // the non-used (but enabled with InputAddr) input arguments
80269f7d81dSDavid Stuttard if (IntermediateVGPR) {
80369f7d81dSDavid Stuttard WaveDispatchNumVGPR += IntermediateVGPR;
80469f7d81dSDavid Stuttard IntermediateVGPR = 0;
80569f7d81dSDavid Stuttard }
806fd8d4af3STim Renouf WaveDispatchNumVGPR += NumRegs;
807fd8d4af3STim Renouf }
80869f7d81dSDavid Stuttard }
80969f7d81dSDavid Stuttard }
810fd8d4af3STim Renouf ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
81169f7d81dSDavid Stuttard ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
81269f7d81dSDavid Stuttard ProgInfo.NumVGPR =
81369f7d81dSDavid Stuttard Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
81479f75468SMatt Arsenault }
815fd8d4af3STim Renouf
8161d65026cSKonstantin Zhuravlyov // Adjust number of registers used to meet default/requested minimum/maximum
8171d65026cSKonstantin Zhuravlyov // number of waves per execution unit request.
8181d65026cSKonstantin Zhuravlyov ProgInfo.NumSGPRsForWavesPerEU = std::max(
819a3566f21SMatt Arsenault std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
8201d65026cSKonstantin Zhuravlyov ProgInfo.NumVGPRsForWavesPerEU = std::max(
821a3566f21SMatt Arsenault std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
8221d65026cSKonstantin Zhuravlyov
82391f22fbfSMarek Olsak if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
82491f22fbfSMarek Olsak STM.hasSGPRInitBug()) {
8259f89ede1SKonstantin Zhuravlyov unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
8269f89ede1SKonstantin Zhuravlyov if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
8279f89ede1SKonstantin Zhuravlyov // This can happen due to a compiler bug or when using inline asm to use
8289f89ede1SKonstantin Zhuravlyov // the registers which are usually reserved for vcc etc.
829f1caa283SMatthias Braun LLVMContext &Ctx = MF.getFunction().getContext();
830e4223438SArthur Eubanks DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
831e4223438SArthur Eubanks ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
832e4223438SArthur Eubanks DS_Error, DK_ResourceLimit);
833ff98241fSMatt Arsenault Ctx.diagnose(Diag);
8349f89ede1SKonstantin Zhuravlyov ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
8359f89ede1SKonstantin Zhuravlyov ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
836417c93e3SMatt Arsenault }
83791f22fbfSMarek Olsak }
83845bb48eaSTom Stellard
8394eae3019SMatt Arsenault if (STM.hasSGPRInitBug()) {
8409f89ede1SKonstantin Zhuravlyov ProgInfo.NumSGPR =
8419f89ede1SKonstantin Zhuravlyov AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
8429f89ede1SKonstantin Zhuravlyov ProgInfo.NumSGPRsForWavesPerEU =
8439f89ede1SKonstantin Zhuravlyov AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
84445bb48eaSTom Stellard }
84545bb48eaSTom Stellard
846161e2b42SMatt Arsenault if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
847f1caa283SMatthias Braun LLVMContext &Ctx = MF.getFunction().getContext();
848f1caa283SMatthias Braun DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
849e4223438SArthur Eubanks MFI->getNumUserSGPRs(),
850e4223438SArthur Eubanks STM.getMaxNumUserSGPRs(), DS_Error);
851ff98241fSMatt Arsenault Ctx.diagnose(Diag);
85241003af2SMatt Arsenault }
85341003af2SMatt Arsenault
85452ef4019SMatt Arsenault if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
855f1caa283SMatthias Braun LLVMContext &Ctx = MF.getFunction().getContext();
856f1caa283SMatthias Braun DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
857e4223438SArthur Eubanks MFI->getLDSSize(),
858e4223438SArthur Eubanks STM.getLocalMemorySize(), DS_Error);
859ff98241fSMatt Arsenault Ctx.diagnose(Diag);
8601c4d0efeSMatt Arsenault }
8611c4d0efeSMatt Arsenault
8621e8c2c70SScott Linder ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
863be3d7ba9SDavid Stuttard &STM, ProgInfo.NumSGPRsForWavesPerEU);
8641e8c2c70SScott Linder ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
865be3d7ba9SDavid Stuttard &STM, ProgInfo.NumVGPRsForWavesPerEU);
866e03b1d7bSKonstantin Zhuravlyov
867db0ed3e4SMatt Arsenault const SIModeRegisterDefaults Mode = MFI->getMode();
868db0ed3e4SMatt Arsenault
86945bb48eaSTom Stellard // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
87045bb48eaSTom Stellard // register.
871db0ed3e4SMatt Arsenault ProgInfo.FloatMode = getFPMode(Mode);
87245bb48eaSTom Stellard
873055e4dceSMatt Arsenault ProgInfo.IEEEMode = Mode.IEEE;
87445bb48eaSTom Stellard
8757293f989SMatt Arsenault // Make clamp modifier on NaN input returns 0.
876055e4dceSMatt Arsenault ProgInfo.DX10Clamp = Mode.DX10Clamp;
87745bb48eaSTom Stellard
87845bb48eaSTom Stellard unsigned LDSAlignShift;
8795bfbae5cSTom Stellard if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
88045bb48eaSTom Stellard // LDS is allocated in 64 dword blocks.
88145bb48eaSTom Stellard LDSAlignShift = 8;
88245bb48eaSTom Stellard } else {
88345bb48eaSTom Stellard // LDS is allocated in 128 dword blocks.
88445bb48eaSTom Stellard LDSAlignShift = 9;
88545bb48eaSTom Stellard }
88645bb48eaSTom Stellard
88767357739SVang Thao ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
88867357739SVang Thao ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
88967357739SVang Thao
890bc7902f1SMatt Arsenault ProgInfo.LDSSize = MFI->getLDSSize();
89145bb48eaSTom Stellard ProgInfo.LDSBlocks =
892ef0fe1eeSAaron Ballman alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
89345bb48eaSTom Stellard
894ff85d61aSJay Foad // Scratch is allocated in 64-dword or 256-dword blocks.
895ff85d61aSJay Foad unsigned ScratchAlignShift =
896ff85d61aSJay Foad STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
89745bb48eaSTom Stellard // We need to program the hardware with the amount of scratch memory that
89845bb48eaSTom Stellard // is used by the entire wave. ProgInfo.ScratchSize is the amount of
89945bb48eaSTom Stellard // scratch memory used per thread.
900ff85d61aSJay Foad ProgInfo.ScratchBlocks = divideCeil(
901ff85d61aSJay Foad ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
90245bb48eaSTom Stellard
90341bbe101SStanislav Mekhanoshin if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
90441bbe101SStanislav Mekhanoshin ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
90541bbe101SStanislav Mekhanoshin ProgInfo.MemOrdered = 1;
90641bbe101SStanislav Mekhanoshin }
90741bbe101SStanislav Mekhanoshin
90826f8f3dbSMatt Arsenault // 0 = X, 1 = XY, 2 = XYZ
90926f8f3dbSMatt Arsenault unsigned TIDIGCompCnt = 0;
91026f8f3dbSMatt Arsenault if (MFI->hasWorkItemIDZ())
91126f8f3dbSMatt Arsenault TIDIGCompCnt = 2;
91226f8f3dbSMatt Arsenault else if (MFI->hasWorkItemIDY())
91326f8f3dbSMatt Arsenault TIDIGCompCnt = 1;
91426f8f3dbSMatt Arsenault
9150bdaef38SMatt Arsenault // The private segment wave byte offset is the last of the system SGPRs. We
9160bdaef38SMatt Arsenault // initially assumed it was allocated, and may have used it. It shouldn't harm
9170bdaef38SMatt Arsenault // anything to disable it if we know the stack isn't used here. We may still
9180bdaef38SMatt Arsenault // have emitted code reading it to initialize scratch, but if that's unused
9190bdaef38SMatt Arsenault // reading garbage should be OK.
9200bdaef38SMatt Arsenault const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
92145bb48eaSTom Stellard ProgInfo.ComputePGMRSrc2 =
9220bdaef38SMatt Arsenault S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
92326f8f3dbSMatt Arsenault S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
9242ca6b1f2SKonstantin Zhuravlyov // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
9252ca6b1f2SKonstantin Zhuravlyov S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) |
92626f8f3dbSMatt Arsenault S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
92726f8f3dbSMatt Arsenault S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
92826f8f3dbSMatt Arsenault S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
92926f8f3dbSMatt Arsenault S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
93026f8f3dbSMatt Arsenault S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
93126f8f3dbSMatt Arsenault S_00B84C_EXCP_EN_MSB(0) |
9326ccb076aSKonstantin Zhuravlyov // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
9336ccb076aSKonstantin Zhuravlyov S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
93426f8f3dbSMatt Arsenault S_00B84C_EXCP_EN(0);
9352594fa85SStanislav Mekhanoshin
936a8d9d507SStanislav Mekhanoshin if (STM.hasGFX90AInsts()) {
937a8d9d507SStanislav Mekhanoshin AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
938a8d9d507SStanislav Mekhanoshin amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
939a8d9d507SStanislav Mekhanoshin ProgInfo.AccumOffset);
940a8d9d507SStanislav Mekhanoshin AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
941a8d9d507SStanislav Mekhanoshin amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
942a8d9d507SStanislav Mekhanoshin ProgInfo.TgSplit);
943a8d9d507SStanislav Mekhanoshin }
944a8d9d507SStanislav Mekhanoshin
9456f09bb7dSMatt Arsenault ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
9462594fa85SStanislav Mekhanoshin ProgInfo.NumSGPRsForWavesPerEU,
9472594fa85SStanislav Mekhanoshin ProgInfo.NumVGPRsForWavesPerEU);
94845bb48eaSTom Stellard }
94945bb48eaSTom Stellard
getRsrcReg(CallingConv::ID CallConv)950df3a20cdSNicolai Haehnle static unsigned getRsrcReg(CallingConv::ID CallConv) {
951df3a20cdSNicolai Haehnle switch (CallConv) {
952cd1d5aafSJustin Bogner default: LLVM_FALLTHROUGH;
953df3a20cdSNicolai Haehnle case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
954ef1ae8ffSTim Renouf case CallingConv::AMDGPU_LS: return R_00B528_SPI_SHADER_PGM_RSRC1_LS;
955a302a736SMarek Olsak case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS;
956ef1ae8ffSTim Renouf case CallingConv::AMDGPU_ES: return R_00B328_SPI_SHADER_PGM_RSRC1_ES;
957df3a20cdSNicolai Haehnle case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
958df3a20cdSNicolai Haehnle case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
959ef1ae8ffSTim Renouf case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
96045bb48eaSTom Stellard }
96145bb48eaSTom Stellard }
96245bb48eaSTom Stellard
EmitProgramInfoSI(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo)96345bb48eaSTom Stellard void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
964b03dd8daSMatt Arsenault const SIProgramInfo &CurrentProgramInfo) {
96545bb48eaSTom Stellard const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
966ff85d61aSJay Foad const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
967f1caa283SMatthias Braun unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
96845bb48eaSTom Stellard
969f1caa283SMatthias Braun if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
970692e0c96SFangrui Song OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
97145bb48eaSTom Stellard
9721124bf4aSSebastian Neubauer OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
97345bb48eaSTom Stellard
974692e0c96SFangrui Song OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
975692e0c96SFangrui Song OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
97645bb48eaSTom Stellard
977692e0c96SFangrui Song OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
978ff85d61aSJay Foad OutStreamer->emitInt32(
979ff85d61aSJay Foad STM.getGeneration() >= AMDGPUSubtarget::GFX11
980ff85d61aSJay Foad ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
981ff85d61aSJay Foad : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
98245bb48eaSTom Stellard
98345bb48eaSTom Stellard // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
98445bb48eaSTom Stellard // 0" comment but I don't see a corresponding field in the register spec.
98545bb48eaSTom Stellard } else {
986692e0c96SFangrui Song OutStreamer->emitInt32(RsrcReg);
98777497103SFangrui Song OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
988b03dd8daSMatt Arsenault S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
989692e0c96SFangrui Song OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
990ff85d61aSJay Foad OutStreamer->emitInt32(
991ff85d61aSJay Foad STM.getGeneration() >= AMDGPUSubtarget::GFX11
992ff85d61aSJay Foad ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
993ff85d61aSJay Foad : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
994807ecc3dSTim Renouf }
995807ecc3dSTim Renouf
996f1caa283SMatthias Braun if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
997692e0c96SFangrui Song OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);
998929a8ad2SJay Foad unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
999929a8ad2SJay Foad ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1000929a8ad2SJay Foad : CurrentProgramInfo.LDSBlocks;
1001929a8ad2SJay Foad OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1002692e0c96SFangrui Song OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);
1003692e0c96SFangrui Song OutStreamer->emitInt32(MFI->getPSInputEnable());
1004692e0c96SFangrui Song OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);
1005692e0c96SFangrui Song OutStreamer->emitInt32(MFI->getPSInputAddr());
100645bb48eaSTom Stellard }
10070532c190SMarek Olsak
1008692e0c96SFangrui Song OutStreamer->emitInt32(R_SPILLED_SGPRS);
1009692e0c96SFangrui Song OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1010692e0c96SFangrui Song OutStreamer->emitInt32(R_SPILLED_VGPRS);
1011692e0c96SFangrui Song OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
101245bb48eaSTom Stellard }
101345bb48eaSTom Stellard
101472800f04STim Renouf // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
101572800f04STim Renouf // is AMDPAL. It stores each compute/SPI register setting and other PAL
1016d737b551STim Renouf // metadata items into the PALMD::Metadata, combining with any provided by the
1017d737b551STim Renouf // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1018d737b551STim Renouf // is then written as a single block in the .note section.
EmitPALMetadata(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo)1019c3beb6a0SKonstantin Zhuravlyov void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
102072800f04STim Renouf const SIProgramInfo &CurrentProgramInfo) {
102172800f04STim Renouf const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1022d737b551STim Renouf auto CC = MF.getFunction().getCallingConv();
1023d737b551STim Renouf auto MD = getTargetStreamer()->getPALMetadata();
1024d737b551STim Renouf
1025e7bd52f8STim Renouf MD->setEntryPoint(CC, MF.getFunction().getName());
1026d737b551STim Renouf MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
102774702444SJacob Lambert
102874702444SJacob Lambert // Only set AGPRs for supported devices
102974702444SJacob Lambert const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
103074702444SJacob Lambert if (STM.hasMAIInsts()) {
103174702444SJacob Lambert MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
103274702444SJacob Lambert }
103374702444SJacob Lambert
1034d737b551STim Renouf MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
10351124bf4aSSebastian Neubauer MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
10361124bf4aSSebastian Neubauer if (AMDGPU::isCompute(CC)) {
1037d737b551STim Renouf MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
103872800f04STim Renouf } else {
103972800f04STim Renouf if (CurrentProgramInfo.ScratchBlocks > 0)
1040d737b551STim Renouf MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
104172800f04STim Renouf }
1042d737b551STim Renouf // ScratchSize is in bytes, 16 aligned.
1043d737b551STim Renouf MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1044f1caa283SMatthias Braun if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1045929a8ad2SJay Foad unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1046929a8ad2SJay Foad ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1047929a8ad2SJay Foad : CurrentProgramInfo.LDSBlocks;
1048929a8ad2SJay Foad MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1049d737b551STim Renouf MD->setSpiPsInputEna(MFI->getPSInputEnable());
1050d737b551STim Renouf MD->setSpiPsInputAddr(MFI->getPSInputAddr());
105172800f04STim Renouf }
10525d00c306SStanislav Mekhanoshin
10535d00c306SStanislav Mekhanoshin if (STM.isWave32())
10545d00c306SStanislav Mekhanoshin MD->setWave32(MF.getFunction().getCallingConv());
105572800f04STim Renouf }
105672800f04STim Renouf
emitPALFunctionMetadata(const MachineFunction & MF)1057edd67564SSebastian Neubauer void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1058edd67564SSebastian Neubauer auto *MD = getTargetStreamer()->getPALMetadata();
1059edd67564SSebastian Neubauer const MachineFrameInfo &MFI = MF.getFrameInfo();
10605733167fSSebastian Neubauer MD->setFunctionScratchSize(MF, MFI.getStackSize());
1061db646de3SSebastian Neubauer
10625733167fSSebastian Neubauer // Set compute registers
10635733167fSSebastian Neubauer MD->setRsrc1(CallingConv::AMDGPU_CS,
10645733167fSSebastian Neubauer CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
10655733167fSSebastian Neubauer MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
1066db646de3SSebastian Neubauer
1067db646de3SSebastian Neubauer // Set optional info
1068db646de3SSebastian Neubauer MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
1069db646de3SSebastian Neubauer MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1070db646de3SSebastian Neubauer MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1071edd67564SSebastian Neubauer }
1072edd67564SSebastian Neubauer
107324ee0785SMatt Arsenault // This is supposed to be log2(Size)
getElementByteSizeValue(unsigned Size)107424ee0785SMatt Arsenault static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
107524ee0785SMatt Arsenault switch (Size) {
107624ee0785SMatt Arsenault case 4:
107724ee0785SMatt Arsenault return AMD_ELEMENT_4_BYTES;
107824ee0785SMatt Arsenault case 8:
107924ee0785SMatt Arsenault return AMD_ELEMENT_8_BYTES;
108024ee0785SMatt Arsenault case 16:
108124ee0785SMatt Arsenault return AMD_ELEMENT_16_BYTES;
108224ee0785SMatt Arsenault default:
108324ee0785SMatt Arsenault llvm_unreachable("invalid private_element_size");
108424ee0785SMatt Arsenault }
108524ee0785SMatt Arsenault }
108624ee0785SMatt Arsenault
getAmdKernelCode(amd_kernel_code_t & Out,const SIProgramInfo & CurrentProgramInfo,const MachineFunction & MF) const1087ca0e7f64SKonstantin Zhuravlyov void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1088b03dd8daSMatt Arsenault const SIProgramInfo &CurrentProgramInfo,
1089ca0e7f64SKonstantin Zhuravlyov const MachineFunction &MF) const {
10904bec7d42SMatt Arsenault const Function &F = MF.getFunction();
10914bec7d42SMatt Arsenault assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
10924bec7d42SMatt Arsenault F.getCallingConv() == CallingConv::SPIR_KERNEL);
10934bec7d42SMatt Arsenault
109445bb48eaSTom Stellard const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
10955bfbae5cSTom Stellard const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
109645bb48eaSTom Stellard
10974cd9509eSMatt Arsenault AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
109845bb48eaSTom Stellard
1099ca0e7f64SKonstantin Zhuravlyov Out.compute_pgm_resource_registers =
11001124bf4aSSebastian Neubauer CurrentProgramInfo.getComputePGMRSrc1() |
1101b03dd8daSMatt Arsenault (CurrentProgramInfo.ComputePGMRSrc2 << 32);
110241bbe101SStanislav Mekhanoshin Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
110326f8f3dbSMatt Arsenault
1104b03dd8daSMatt Arsenault if (CurrentProgramInfo.DynamicCallStack)
1105b03dd8daSMatt Arsenault Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
1106b03dd8daSMatt Arsenault
1107ca0e7f64SKonstantin Zhuravlyov AMD_HSA_BITS_SET(Out.code_properties,
110824ee0785SMatt Arsenault AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
1109cf6565f6SStanislav Mekhanoshin getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
111024ee0785SMatt Arsenault
111126f8f3dbSMatt Arsenault if (MFI->hasPrivateSegmentBuffer()) {
1112ca0e7f64SKonstantin Zhuravlyov Out.code_properties |=
111326f8f3dbSMatt Arsenault AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
111426f8f3dbSMatt Arsenault }
111526f8f3dbSMatt Arsenault
111626f8f3dbSMatt Arsenault if (MFI->hasDispatchPtr())
1117ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
111826f8f3dbSMatt Arsenault
11190f20a35bSChangpeng Fang if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
1120ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
112126f8f3dbSMatt Arsenault
112226f8f3dbSMatt Arsenault if (MFI->hasKernargSegmentPtr())
1123ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
112426f8f3dbSMatt Arsenault
112526f8f3dbSMatt Arsenault if (MFI->hasDispatchID())
1126ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
112726f8f3dbSMatt Arsenault
112826f8f3dbSMatt Arsenault if (MFI->hasFlatScratchInit())
1129ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
113026f8f3dbSMatt Arsenault
113148f29f21STom Stellard if (MFI->hasDispatchPtr())
1132ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
113348f29f21STom Stellard
11345b504976SNicolai Haehnle if (STM.isXNACKEnabled())
1135ca0e7f64SKonstantin Zhuravlyov Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
11365b504976SNicolai Haehnle
1137b65fa483SGuillaume Chatelet Align MaxKernArgAlign;
11384bec7d42SMatt Arsenault Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1139b03dd8daSMatt Arsenault Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1140b03dd8daSMatt Arsenault Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1141b03dd8daSMatt Arsenault Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1142b03dd8daSMatt Arsenault Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
114345bb48eaSTom Stellard
1144b65fa483SGuillaume Chatelet // kernarg_segment_alignment is specified as log of the alignment.
1145b65fa483SGuillaume Chatelet // The minimum alignment is 16.
114690ff1487SMatt Arsenault // FIXME: The metadata treats the minimum as 4?
1147b65fa483SGuillaume Chatelet Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
114845bb48eaSTom Stellard }
114945bb48eaSTom Stellard
PrintAsmOperand(const MachineInstr * MI,unsigned OpNo,const char * ExtraCode,raw_ostream & O)115045bb48eaSTom Stellard bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
115145bb48eaSTom Stellard const char *ExtraCode, raw_ostream &O) {
115236cd1859SMatt Arsenault // First try the generic code, which knows about modifiers like 'c' and 'n'.
11535277b3ffSNick Desaulniers if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
115436cd1859SMatt Arsenault return false;
115536cd1859SMatt Arsenault
115645bb48eaSTom Stellard if (ExtraCode && ExtraCode[0]) {
115745bb48eaSTom Stellard if (ExtraCode[1] != 0)
115845bb48eaSTom Stellard return true; // Unknown modifier.
115945bb48eaSTom Stellard
116045bb48eaSTom Stellard switch (ExtraCode[0]) {
116145bb48eaSTom Stellard case 'r':
116245bb48eaSTom Stellard break;
116336cd1859SMatt Arsenault default:
116436cd1859SMatt Arsenault return true;
116545bb48eaSTom Stellard }
116645bb48eaSTom Stellard }
116745bb48eaSTom Stellard
116836cd1859SMatt Arsenault // TODO: Should be able to support other operand types like globals.
116936cd1859SMatt Arsenault const MachineOperand &MO = MI->getOperand(OpNo);
117036cd1859SMatt Arsenault if (MO.isReg()) {
117136cd1859SMatt Arsenault AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,
117236cd1859SMatt Arsenault *MF->getSubtarget().getRegisterInfo());
117345bb48eaSTom Stellard return false;
1174b087b91cSDmitry Preobrazhensky } else if (MO.isImm()) {
1175b087b91cSDmitry Preobrazhensky int64_t Val = MO.getImm();
1176b087b91cSDmitry Preobrazhensky if (AMDGPU::isInlinableIntLiteral(Val)) {
1177b087b91cSDmitry Preobrazhensky O << Val;
1178b087b91cSDmitry Preobrazhensky } else if (isUInt<16>(Val)) {
11791c9d6810SDmitry Preobrazhensky O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1180b087b91cSDmitry Preobrazhensky } else if (isUInt<32>(Val)) {
11811c9d6810SDmitry Preobrazhensky O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1182b087b91cSDmitry Preobrazhensky } else {
1183b087b91cSDmitry Preobrazhensky O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
118445bb48eaSTom Stellard }
1185b087b91cSDmitry Preobrazhensky return false;
1186b087b91cSDmitry Preobrazhensky }
118736cd1859SMatt Arsenault return true;
118836cd1859SMatt Arsenault }
11892b08f6afSSebastian Neubauer
getAnalysisUsage(AnalysisUsage & AU) const11902b08f6afSSebastian Neubauer void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
11912b08f6afSSebastian Neubauer AU.addRequired<AMDGPUResourceUsageAnalysis>();
11922b08f6afSSebastian Neubauer AU.addPreserved<AMDGPUResourceUsageAnalysis>();
11932b08f6afSSebastian Neubauer AsmPrinter::getAnalysisUsage(AU);
11942b08f6afSSebastian Neubauer }
119567357739SVang Thao
emitResourceUsageRemarks(const MachineFunction & MF,const SIProgramInfo & CurrentProgramInfo,bool isModuleEntryFunction,bool hasMAIInsts)119667357739SVang Thao void AMDGPUAsmPrinter::emitResourceUsageRemarks(
119767357739SVang Thao const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
119867357739SVang Thao bool isModuleEntryFunction, bool hasMAIInsts) {
119967357739SVang Thao if (!ORE)
120067357739SVang Thao return;
120167357739SVang Thao
120267357739SVang Thao const char *Name = "kernel-resource-usage";
120367357739SVang Thao const char *Indent = " ";
120467357739SVang Thao
120567357739SVang Thao // If the remark is not specifically enabled, do not output to yaml
120667357739SVang Thao LLVMContext &Ctx = MF.getFunction().getContext();
120767357739SVang Thao if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
120867357739SVang Thao return;
120967357739SVang Thao
121067357739SVang Thao auto EmitResourceUsageRemark = [&](StringRef RemarkName,
121167357739SVang Thao StringRef RemarkLabel, auto Argument) {
121267357739SVang Thao // Add an indent for every line besides the line with the kernel name. This
121367357739SVang Thao // makes it easier to tell which resource usage go with which kernel since
121467357739SVang Thao // the kernel name will always be displayed first.
121567357739SVang Thao std::string LabelStr = RemarkLabel.str() + ": ";
121667357739SVang Thao if (!RemarkName.equals("FunctionName"))
121767357739SVang Thao LabelStr = Indent + LabelStr;
121867357739SVang Thao
121967357739SVang Thao ORE->emit([&]() {
122067357739SVang Thao return MachineOptimizationRemarkAnalysis(Name, RemarkName,
122167357739SVang Thao MF.getFunction().getSubprogram(),
122267357739SVang Thao &MF.front())
122367357739SVang Thao << LabelStr << ore::NV(RemarkName, Argument);
122467357739SVang Thao });
122567357739SVang Thao };
122667357739SVang Thao
122767357739SVang Thao // FIXME: Formatting here is pretty nasty because clang does not accept
122867357739SVang Thao // newlines from diagnostics. This forces us to emit multiple diagnostic
122967357739SVang Thao // remarks to simulate newlines. If and when clang does accept newlines, this
123067357739SVang Thao // formatting should be aggregated into one remark with newlines to avoid
123167357739SVang Thao // printing multiple diagnostic location and diag opts.
123267357739SVang Thao EmitResourceUsageRemark("FunctionName", "Function Name",
123367357739SVang Thao MF.getFunction().getName());
123467357739SVang Thao EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
123567357739SVang Thao EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
123667357739SVang Thao if (hasMAIInsts)
123767357739SVang Thao EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
123867357739SVang Thao EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
123967357739SVang Thao CurrentProgramInfo.ScratchSize);
124067357739SVang Thao EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
124167357739SVang Thao CurrentProgramInfo.Occupancy);
124267357739SVang Thao EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
124367357739SVang Thao CurrentProgramInfo.SGPRSpill);
124467357739SVang Thao EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
124567357739SVang Thao CurrentProgramInfo.VGPRSpill);
124667357739SVang Thao if (isModuleEntryFunction)
124767357739SVang Thao EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
124867357739SVang Thao CurrentProgramInfo.LDSSize);
124967357739SVang Thao }
1250