1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Any MIMG instructions that use tfe or lwe require an initialization of the 12 /// result register that will be written in the case of a memory access failure 13 /// The required code is also added to tie this init code to the result of the 14 /// img instruction 15 /// 16 //===----------------------------------------------------------------------===// 17 // 18 19 #include "AMDGPU.h" 20 #include "AMDGPUSubtarget.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "SIInstrInfo.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Target/TargetMachine.h" 29 30 #define DEBUG_TYPE "si-img-init" 31 32 using namespace llvm; 33 34 namespace { 35 36 class SIAddIMGInit : public MachineFunctionPass { 37 public: 38 static char ID; 39 40 public: 41 SIAddIMGInit() : MachineFunctionPass(ID) { 42 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); 43 } 44 45 bool runOnMachineFunction(MachineFunction &MF) override; 46 47 void getAnalysisUsage(AnalysisUsage &AU) const override { 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51 }; 52 53 } // End anonymous namespace. 54 55 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) 56 57 char SIAddIMGInit::ID = 0; 58 59 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; 60 61 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } 62 63 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { 64 MachineRegisterInfo &MRI = MF.getRegInfo(); 65 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 66 const SIInstrInfo *TII = ST.getInstrInfo(); 67 const SIRegisterInfo *RI = ST.getRegisterInfo(); 68 bool Changed = false; 69 70 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; 71 ++BI) { 72 MachineBasicBlock &MBB = *BI; 73 MachineBasicBlock::iterator I, Next; 74 for (I = MBB.begin(); I != MBB.end(); I = Next) { 75 Next = std::next(I); 76 MachineInstr &MI = *I; 77 78 auto Opcode = MI.getOpcode(); 79 if (TII->isMIMG(Opcode) && !MI.mayStore()) { 80 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); 81 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); 82 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); 83 84 // Check for instructions that don't have tfe or lwe fields 85 // There shouldn't be any at this point. 86 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); 87 88 unsigned TFEVal = TFE->getImm(); 89 unsigned LWEVal = LWE->getImm(); 90 unsigned D16Val = D16 ? D16->getImm() : 0; 91 92 if (TFEVal || LWEVal) { 93 // At least one of TFE or LWE are non-zero 94 // We have to insert a suitable initialization of the result value and 95 // tie this to the dest of the image instruction. 96 97 const DebugLoc &DL = MI.getDebugLoc(); 98 99 int DstIdx = 100 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 101 102 // Calculate which dword we have to initialize to 0. 103 MachineOperand *MO_Dmask = 104 TII->getNamedOperand(MI, AMDGPU::OpName::dmask); 105 106 // check that dmask operand is found. 107 assert(MO_Dmask && "Expected dmask operand in instruction"); 108 109 unsigned dmask = MO_Dmask->getImm(); 110 // Determine the number of active lanes taking into account the 111 // Gather4 special case 112 unsigned ActiveLanes = 113 TII->isGather4(Opcode) ? 4 : countPopulation(dmask); 114 115 // Subreg indices are counted from 1 116 // When D16 then we want next whole VGPR after write data. 117 static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); 118 119 bool Packed = !ST.hasUnpackedD16VMem(); 120 121 unsigned InitIdx = 122 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; 123 124 // Abandon attempt if the dst size isn't large enough 125 // - this is in fact an error but this is picked up elsewhere and 126 // reported correctly. 127 uint32_t DstSize = 128 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; 129 if (DstSize < InitIdx) 130 continue; 131 132 // Create a register for the intialization value. 133 unsigned PrevDst = 134 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 135 unsigned NewDst = 0; // Final initialized value will be in here 136 137 // If PRTStrictNull feature is enabled (the default) then initialize 138 // all the result registers to 0, otherwise just the error indication 139 // register (VGPRn+1) 140 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; 141 unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; 142 143 if (DstSize == 1) { 144 // In this case we can just initialize the result directly 145 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) 146 .addImm(0); 147 NewDst = PrevDst; 148 } else { 149 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); 150 for (; SizeLeft; SizeLeft--, CurrIdx++) { 151 NewDst = 152 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); 153 // Initialize dword 154 unsigned SubReg = 155 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 156 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) 157 .addImm(0); 158 // Insert into the super-reg 159 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) 160 .addReg(PrevDst) 161 .addReg(SubReg) 162 .addImm(CurrIdx); 163 164 PrevDst = NewDst; 165 } 166 } 167 168 // Add as an implicit operand 169 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); 170 171 // Tie the just added implicit operand to the dst 172 MI.tieOperands(DstIdx, MI.getNumOperands() - 1); 173 174 Changed = true; 175 } 176 } 177 } 178 } 179 180 return Changed; 181 } 182