1*b5893f02SDimitry Andric //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2*b5893f02SDimitry Andric //
3*b5893f02SDimitry Andric //                     The LLVM Compiler Infrastructure
4*b5893f02SDimitry Andric //
5*b5893f02SDimitry Andric // This file is distributed under the University of Illinois Open Source
6*b5893f02SDimitry Andric // License. See LICENSE.TXT for details.
7*b5893f02SDimitry Andric //
8*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
9*b5893f02SDimitry Andric //
10*b5893f02SDimitry Andric /// \file
11*b5893f02SDimitry Andric /// Any MIMG instructions that use tfe or lwe require an initialization of the
12*b5893f02SDimitry Andric /// result register that will be written in the case of a memory access failure
13*b5893f02SDimitry Andric /// The required code is also added to tie this init code to the result of the
14*b5893f02SDimitry Andric /// img instruction
15*b5893f02SDimitry Andric ///
16*b5893f02SDimitry Andric //===----------------------------------------------------------------------===//
17*b5893f02SDimitry Andric //
18*b5893f02SDimitry Andric 
19*b5893f02SDimitry Andric #include "AMDGPU.h"
20*b5893f02SDimitry Andric #include "AMDGPUSubtarget.h"
21*b5893f02SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22*b5893f02SDimitry Andric #include "SIInstrInfo.h"
23*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
24*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
25*b5893f02SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
26*b5893f02SDimitry Andric #include "llvm/IR/Function.h"
27*b5893f02SDimitry Andric #include "llvm/Support/Debug.h"
28*b5893f02SDimitry Andric #include "llvm/Target/TargetMachine.h"
29*b5893f02SDimitry Andric 
30*b5893f02SDimitry Andric #define DEBUG_TYPE "si-img-init"
31*b5893f02SDimitry Andric 
32*b5893f02SDimitry Andric using namespace llvm;
33*b5893f02SDimitry Andric 
34*b5893f02SDimitry Andric namespace {
35*b5893f02SDimitry Andric 
36*b5893f02SDimitry Andric class SIAddIMGInit : public MachineFunctionPass {
37*b5893f02SDimitry Andric public:
38*b5893f02SDimitry Andric   static char ID;
39*b5893f02SDimitry Andric 
40*b5893f02SDimitry Andric public:
SIAddIMGInit()41*b5893f02SDimitry Andric   SIAddIMGInit() : MachineFunctionPass(ID) {
42*b5893f02SDimitry Andric     initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
43*b5893f02SDimitry Andric   }
44*b5893f02SDimitry Andric 
45*b5893f02SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
46*b5893f02SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const47*b5893f02SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
48*b5893f02SDimitry Andric     AU.setPreservesCFG();
49*b5893f02SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
50*b5893f02SDimitry Andric   }
51*b5893f02SDimitry Andric };
52*b5893f02SDimitry Andric 
53*b5893f02SDimitry Andric } // End anonymous namespace.
54*b5893f02SDimitry Andric 
55*b5893f02SDimitry Andric INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
56*b5893f02SDimitry Andric 
57*b5893f02SDimitry Andric char SIAddIMGInit::ID = 0;
58*b5893f02SDimitry Andric 
59*b5893f02SDimitry Andric char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
60*b5893f02SDimitry Andric 
createSIAddIMGInitPass()61*b5893f02SDimitry Andric FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
62*b5893f02SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)63*b5893f02SDimitry Andric bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
64*b5893f02SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
65*b5893f02SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
66*b5893f02SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
67*b5893f02SDimitry Andric   const SIRegisterInfo *RI = ST.getRegisterInfo();
68*b5893f02SDimitry Andric   bool Changed = false;
69*b5893f02SDimitry Andric 
70*b5893f02SDimitry Andric   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
71*b5893f02SDimitry Andric        ++BI) {
72*b5893f02SDimitry Andric     MachineBasicBlock &MBB = *BI;
73*b5893f02SDimitry Andric     MachineBasicBlock::iterator I, Next;
74*b5893f02SDimitry Andric     for (I = MBB.begin(); I != MBB.end(); I = Next) {
75*b5893f02SDimitry Andric       Next = std::next(I);
76*b5893f02SDimitry Andric       MachineInstr &MI = *I;
77*b5893f02SDimitry Andric 
78*b5893f02SDimitry Andric       auto Opcode = MI.getOpcode();
79*b5893f02SDimitry Andric       if (TII->isMIMG(Opcode) && !MI.mayStore()) {
80*b5893f02SDimitry Andric         MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
81*b5893f02SDimitry Andric         MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
82*b5893f02SDimitry Andric         MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
83*b5893f02SDimitry Andric 
84*b5893f02SDimitry Andric         // Check for instructions that don't have tfe or lwe fields
85*b5893f02SDimitry Andric         // There shouldn't be any at this point.
86*b5893f02SDimitry Andric         assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
87*b5893f02SDimitry Andric 
88*b5893f02SDimitry Andric         unsigned TFEVal = TFE->getImm();
89*b5893f02SDimitry Andric         unsigned LWEVal = LWE->getImm();
90*b5893f02SDimitry Andric         unsigned D16Val = D16 ? D16->getImm() : 0;
91*b5893f02SDimitry Andric 
92*b5893f02SDimitry Andric         if (TFEVal || LWEVal) {
93*b5893f02SDimitry Andric           // At least one of TFE or LWE are non-zero
94*b5893f02SDimitry Andric           // We have to insert a suitable initialization of the result value and
95*b5893f02SDimitry Andric           // tie this to the dest of the image instruction.
96*b5893f02SDimitry Andric 
97*b5893f02SDimitry Andric           const DebugLoc &DL = MI.getDebugLoc();
98*b5893f02SDimitry Andric 
99*b5893f02SDimitry Andric           int DstIdx =
100*b5893f02SDimitry Andric               AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
101*b5893f02SDimitry Andric 
102*b5893f02SDimitry Andric           // Calculate which dword we have to initialize to 0.
103*b5893f02SDimitry Andric           MachineOperand *MO_Dmask =
104*b5893f02SDimitry Andric               TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
105*b5893f02SDimitry Andric 
106*b5893f02SDimitry Andric           // check that dmask operand is found.
107*b5893f02SDimitry Andric           assert(MO_Dmask && "Expected dmask operand in instruction");
108*b5893f02SDimitry Andric 
109*b5893f02SDimitry Andric           unsigned dmask = MO_Dmask->getImm();
110*b5893f02SDimitry Andric           // Determine the number of active lanes taking into account the
111*b5893f02SDimitry Andric           // Gather4 special case
112*b5893f02SDimitry Andric           unsigned ActiveLanes =
113*b5893f02SDimitry Andric               TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
114*b5893f02SDimitry Andric 
115*b5893f02SDimitry Andric           // Subreg indices are counted from 1
116*b5893f02SDimitry Andric           // When D16 then we want next whole VGPR after write data.
117*b5893f02SDimitry Andric           static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
118*b5893f02SDimitry Andric 
119*b5893f02SDimitry Andric           bool Packed = !ST.hasUnpackedD16VMem();
120*b5893f02SDimitry Andric 
121*b5893f02SDimitry Andric           unsigned InitIdx =
122*b5893f02SDimitry Andric               D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
123*b5893f02SDimitry Andric 
124*b5893f02SDimitry Andric           // Abandon attempt if the dst size isn't large enough
125*b5893f02SDimitry Andric           // - this is in fact an error but this is picked up elsewhere and
126*b5893f02SDimitry Andric           // reported correctly.
127*b5893f02SDimitry Andric           uint32_t DstSize =
128*b5893f02SDimitry Andric               RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
129*b5893f02SDimitry Andric           if (DstSize < InitIdx)
130*b5893f02SDimitry Andric             continue;
131*b5893f02SDimitry Andric 
132*b5893f02SDimitry Andric           // Create a register for the intialization value.
133*b5893f02SDimitry Andric           unsigned PrevDst =
134*b5893f02SDimitry Andric               MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
135*b5893f02SDimitry Andric           unsigned NewDst = 0; // Final initialized value will be in here
136*b5893f02SDimitry Andric 
137*b5893f02SDimitry Andric           // If PRTStrictNull feature is enabled (the default) then initialize
138*b5893f02SDimitry Andric           // all the result registers to 0, otherwise just the error indication
139*b5893f02SDimitry Andric           // register (VGPRn+1)
140*b5893f02SDimitry Andric           unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
141*b5893f02SDimitry Andric           unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
142*b5893f02SDimitry Andric 
143*b5893f02SDimitry Andric           if (DstSize == 1) {
144*b5893f02SDimitry Andric             // In this case we can just initialize the result directly
145*b5893f02SDimitry Andric             BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
146*b5893f02SDimitry Andric                 .addImm(0);
147*b5893f02SDimitry Andric             NewDst = PrevDst;
148*b5893f02SDimitry Andric           } else {
149*b5893f02SDimitry Andric             BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
150*b5893f02SDimitry Andric             for (; SizeLeft; SizeLeft--, CurrIdx++) {
151*b5893f02SDimitry Andric               NewDst =
152*b5893f02SDimitry Andric                   MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
153*b5893f02SDimitry Andric               // Initialize dword
154*b5893f02SDimitry Andric               unsigned SubReg =
155*b5893f02SDimitry Andric                   MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
156*b5893f02SDimitry Andric               BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
157*b5893f02SDimitry Andric                   .addImm(0);
158*b5893f02SDimitry Andric               // Insert into the super-reg
159*b5893f02SDimitry Andric               BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
160*b5893f02SDimitry Andric                   .addReg(PrevDst)
161*b5893f02SDimitry Andric                   .addReg(SubReg)
162*b5893f02SDimitry Andric                   .addImm(CurrIdx);
163*b5893f02SDimitry Andric 
164*b5893f02SDimitry Andric               PrevDst = NewDst;
165*b5893f02SDimitry Andric             }
166*b5893f02SDimitry Andric           }
167*b5893f02SDimitry Andric 
168*b5893f02SDimitry Andric           // Add as an implicit operand
169*b5893f02SDimitry Andric           MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
170*b5893f02SDimitry Andric 
171*b5893f02SDimitry Andric           // Tie the just added implicit operand to the dst
172*b5893f02SDimitry Andric           MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
173*b5893f02SDimitry Andric 
174*b5893f02SDimitry Andric           Changed = true;
175*b5893f02SDimitry Andric         }
176*b5893f02SDimitry Andric       }
177*b5893f02SDimitry Andric     }
178*b5893f02SDimitry Andric   }
179*b5893f02SDimitry Andric 
180*b5893f02SDimitry Andric   return Changed;
181*b5893f02SDimitry Andric }
182