1*c9157d92SDimitry Andric //===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==// 2*c9157d92SDimitry Andric // 3*c9157d92SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*c9157d92SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*c9157d92SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*c9157d92SDimitry Andric // 7*c9157d92SDimitry Andric //===----------------------------------------------------------------------===// 8*c9157d92SDimitry Andric // 9*c9157d92SDimitry Andric /// \file 10*c9157d92SDimitry Andric /// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU 11*c9157d92SDimitry Andric /// instructions that produce single-use VGPR values. If the value is forwarded 12*c9157d92SDimitry Andric /// to the consumer instruction prior to VGPR writeback, the hardware can 13*c9157d92SDimitry Andric /// then skip (kill) the VGPR write. 14*c9157d92SDimitry Andric // 15*c9157d92SDimitry Andric //===----------------------------------------------------------------------===// 16*c9157d92SDimitry Andric 17*c9157d92SDimitry Andric #include "AMDGPU.h" 18*c9157d92SDimitry Andric #include "GCNSubtarget.h" 19*c9157d92SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20*c9157d92SDimitry Andric #include "SIInstrInfo.h" 21*c9157d92SDimitry Andric #include "llvm/ADT/DenseMap.h" 22*c9157d92SDimitry Andric #include "llvm/ADT/STLExtras.h" 23*c9157d92SDimitry Andric #include "llvm/ADT/StringRef.h" 24*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 25*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 26*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 28*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 29*c9157d92SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 30*c9157d92SDimitry Andric #include "llvm/CodeGen/Register.h" 31*c9157d92SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 32*c9157d92SDimitry Andric #include "llvm/IR/DebugLoc.h" 33*c9157d92SDimitry Andric #include "llvm/MC/MCRegister.h" 34*c9157d92SDimitry Andric #include "llvm/Pass.h" 35*c9157d92SDimitry Andric 36*c9157d92SDimitry Andric using namespace llvm; 37*c9157d92SDimitry Andric 38*c9157d92SDimitry Andric #define DEBUG_TYPE "amdgpu-insert-single-use-vdst" 39*c9157d92SDimitry Andric 40*c9157d92SDimitry Andric namespace { 41*c9157d92SDimitry Andric class AMDGPUInsertSingleUseVDST : public MachineFunctionPass { 42*c9157d92SDimitry Andric private: 43*c9157d92SDimitry Andric const SIInstrInfo *SII; 44*c9157d92SDimitry Andric 45*c9157d92SDimitry Andric public: 46*c9157d92SDimitry Andric static char ID; 47*c9157d92SDimitry Andric AMDGPUInsertSingleUseVDST()48*c9157d92SDimitry Andric AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {} 49*c9157d92SDimitry Andric emitSingleUseVDST(MachineInstr & MI) const50*c9157d92SDimitry Andric void emitSingleUseVDST(MachineInstr &MI) const { 51*c9157d92SDimitry Andric // Mark the following instruction as a single-use producer: 52*c9157d92SDimitry Andric // s_singleuse_vdst { supr0: 1 } 53*c9157d92SDimitry Andric BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST)) 54*c9157d92SDimitry Andric .addImm(0x1); 55*c9157d92SDimitry Andric } 56*c9157d92SDimitry Andric runOnMachineFunction(MachineFunction & MF)57*c9157d92SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 58*c9157d92SDimitry Andric const auto &ST = MF.getSubtarget<GCNSubtarget>(); 59*c9157d92SDimitry Andric if (!ST.hasVGPRSingleUseHintInsts()) 60*c9157d92SDimitry Andric return false; 61*c9157d92SDimitry Andric 62*c9157d92SDimitry Andric SII = ST.getInstrInfo(); 63*c9157d92SDimitry Andric const auto *TRI = &SII->getRegisterInfo(); 64*c9157d92SDimitry Andric bool InstructionEmitted = false; 65*c9157d92SDimitry Andric 66*c9157d92SDimitry Andric for (MachineBasicBlock &MBB : MF) { 67*c9157d92SDimitry Andric DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits 68*c9157d92SDimitry Andric 69*c9157d92SDimitry Andric // Handle boundaries at the end of basic block separately to avoid 70*c9157d92SDimitry Andric // false positives. If they are live at the end of a basic block then 71*c9157d92SDimitry Andric // assume it has more uses later on. 72*c9157d92SDimitry Andric for (const auto &Liveouts : MBB.liveouts()) 73*c9157d92SDimitry Andric RegisterUseCount[Liveouts.PhysReg] = 2; 74*c9157d92SDimitry Andric 75*c9157d92SDimitry Andric for (MachineInstr &MI : reverse(MBB.instrs())) { 76*c9157d92SDimitry Andric // All registers in all operands need to be single use for an 77*c9157d92SDimitry Andric // instruction to be marked as a single use producer. 78*c9157d92SDimitry Andric bool AllProducerOperandsAreSingleUse = true; 79*c9157d92SDimitry Andric 80*c9157d92SDimitry Andric for (const auto &Operand : MI.operands()) { 81*c9157d92SDimitry Andric if (!Operand.isReg()) 82*c9157d92SDimitry Andric continue; 83*c9157d92SDimitry Andric const auto Reg = Operand.getReg(); 84*c9157d92SDimitry Andric 85*c9157d92SDimitry Andric // Count the number of times each register is read. 86*c9157d92SDimitry Andric if (Operand.readsReg()) 87*c9157d92SDimitry Andric RegisterUseCount[Reg]++; 88*c9157d92SDimitry Andric 89*c9157d92SDimitry Andric // Do not attempt to optimise across exec mask changes. 90*c9157d92SDimitry Andric if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) { 91*c9157d92SDimitry Andric for (auto &UsedReg : RegisterUseCount) 92*c9157d92SDimitry Andric UsedReg.second = 2; 93*c9157d92SDimitry Andric } 94*c9157d92SDimitry Andric 95*c9157d92SDimitry Andric // If we are at the point where the register first became live, 96*c9157d92SDimitry Andric // check if the operands are single use. 97*c9157d92SDimitry Andric if (!MI.modifiesRegister(Reg, TRI)) 98*c9157d92SDimitry Andric continue; 99*c9157d92SDimitry Andric if (RegisterUseCount[Reg] > 1) 100*c9157d92SDimitry Andric AllProducerOperandsAreSingleUse = false; 101*c9157d92SDimitry Andric // Reset uses count when a register is no longer live. 102*c9157d92SDimitry Andric RegisterUseCount.erase(Reg); 103*c9157d92SDimitry Andric } 104*c9157d92SDimitry Andric if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) { 105*c9157d92SDimitry Andric // TODO: Replace with candidate logging for instruction grouping 106*c9157d92SDimitry Andric // later. 107*c9157d92SDimitry Andric emitSingleUseVDST(MI); 108*c9157d92SDimitry Andric InstructionEmitted = true; 109*c9157d92SDimitry Andric } 110*c9157d92SDimitry Andric } 111*c9157d92SDimitry Andric } 112*c9157d92SDimitry Andric return InstructionEmitted; 113*c9157d92SDimitry Andric } 114*c9157d92SDimitry Andric }; 115*c9157d92SDimitry Andric } // namespace 116*c9157d92SDimitry Andric 117*c9157d92SDimitry Andric char AMDGPUInsertSingleUseVDST::ID = 0; 118*c9157d92SDimitry Andric 119*c9157d92SDimitry Andric char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID; 120*c9157d92SDimitry Andric 121*c9157d92SDimitry Andric INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE, 122*c9157d92SDimitry Andric "AMDGPU Insert SingleUseVDST", false, false) 123