1 //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 /// SIFixupVectorISel pass cleans up post ISEL Vector issues. 10 /// Currently this will convert GLOBAL_{LOAD|STORE}_* 11 /// and GLOBAL_Atomic_* instructions into their _SADDR variants, 12 /// feeding the sreg into the saddr field of the new instruction. 13 /// We currently handle a REG_SEQUENCE feeding the vaddr 14 /// and decompose it into a base and index. 15 /// 16 /// Transform: 17 /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 18 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, 19 /// %24:vgpr_32, %19:sreg_64_xexec 20 /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 21 /// %11:vreg_64 = COPY %16:vreg_64 22 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 23 /// Into: 24 /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 25 /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 26 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... 27 /// 28 //===----------------------------------------------------------------------===// 29 // 30 31 #include "AMDGPU.h" 32 #include "AMDGPUSubtarget.h" 33 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/CodeGen/MachineFunctionPass.h" 36 #include "llvm/CodeGen/MachineInstrBuilder.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Target/TargetMachine.h" 42 #define DEBUG_TYPE "si-fixup-vector-isel" 43 44 using namespace llvm; 45 46 static cl::opt<bool> EnableGlobalSGPRAddr( 47 "amdgpu-enable-global-sgpr-addr", 48 cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), 49 cl::init(false)); 50 51 STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); 52 STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); 53 54 namespace { 55 56 class SIFixupVectorISel : public MachineFunctionPass { 57 public: 58 static char ID; 59 60 public: 61 SIFixupVectorISel() : MachineFunctionPass(ID) { 62 initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); 63 } 64 65 bool runOnMachineFunction(MachineFunction &MF) override; 66 67 void getAnalysisUsage(AnalysisUsage &AU) const override { 68 AU.setPreservesCFG(); 69 MachineFunctionPass::getAnalysisUsage(AU); 70 } 71 }; 72 73 } // End anonymous namespace. 74 75 INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, 76 "SI Fixup Vector ISel", false, false) 77 78 char SIFixupVectorISel::ID = 0; 79 80 char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; 81 82 FunctionPass *llvm::createSIFixupVectorISelPass() { 83 return new SIFixupVectorISel(); 84 } 85 86 static bool findSRegBaseAndIndex(MachineOperand *Op, 87 unsigned &BaseReg, 88 unsigned &IndexReg, 89 MachineRegisterInfo &MRI, 90 const SIRegisterInfo *TRI) { 91 SmallVector<MachineOperand *, 8> Worklist; 92 Worklist.push_back(Op); 93 while (!Worklist.empty()) { 94 MachineOperand *WOp = Worklist.pop_back_val(); 95 if (!WOp->isReg() || 96 !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) 97 continue; 98 MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); 99 switch (DefInst->getOpcode()) { 100 default: 101 continue; 102 case AMDGPU::COPY: 103 Worklist.push_back(&DefInst->getOperand(1)); 104 break; 105 case AMDGPU::REG_SEQUENCE: 106 if (DefInst->getNumOperands() != 5) 107 continue; 108 Worklist.push_back(&DefInst->getOperand(1)); 109 Worklist.push_back(&DefInst->getOperand(3)); 110 break; 111 case AMDGPU::V_ADD_I32_e64: 112 // The V_ADD_* and its analogous V_ADDCV_* are generated by 113 // a previous pass which lowered from an ADD_64_PSEUDO, 114 // which generates subregs to break up the 64 bit args. 115 if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) 116 continue; 117 BaseReg = DefInst->getOperand(2).getReg(); 118 if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) 119 continue; 120 IndexReg = DefInst->getOperand(3).getReg(); 121 // Chase the IndexReg. 122 MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); 123 if (!MI || !MI->isCopy()) 124 continue; 125 // Make sure the reg class is 64 bit for Index. 126 // If the Index register is a subreg, we want it to reference 127 // a 64 bit register which we will use as the Index reg. 128 const TargetRegisterClass *IdxRC, *BaseRC; 129 IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); 130 if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) 131 continue; 132 IndexReg = MI->getOperand(1).getReg(); 133 // Chase the BaseReg. 134 MI = MRI.getUniqueVRegDef(BaseReg); 135 if (!MI || !MI->isCopy()) 136 continue; 137 // Make sure the register class is 64 bit for Base. 138 BaseReg = MI->getOperand(1).getReg(); 139 BaseRC = MRI.getRegClass(BaseReg); 140 if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) 141 continue; 142 // Make sure Base is SReg and Index is VReg. 143 if (!TRI->isSGPRReg(MRI, BaseReg)) 144 return false; 145 if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) 146 return false; 147 // clear any killed flags on Index and Base regs, used later. 148 MRI.clearKillFlags(IndexReg); 149 MRI.clearKillFlags(BaseReg); 150 return true; 151 } 152 } 153 return false; 154 } 155 156 // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. 157 static bool fixupGlobalSaddr(MachineBasicBlock &MBB, 158 MachineFunction &MF, 159 MachineRegisterInfo &MRI, 160 const GCNSubtarget &ST, 161 const SIInstrInfo *TII, 162 const SIRegisterInfo *TRI) { 163 if (!EnableGlobalSGPRAddr) 164 return false; 165 bool FuncModified = false; 166 MachineBasicBlock::iterator I, Next; 167 for (I = MBB.begin(); I != MBB.end(); I = Next) { 168 Next = std::next(I); 169 MachineInstr &MI = *I; 170 int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); 171 if (NewOpcd < 0) 172 continue; 173 // Update our statistics on opportunities seen. 174 ++NumSGPRGlobalOccurs; 175 LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); 176 // Need a Base and Index or we cant transform to _SADDR. 177 unsigned BaseReg = 0; 178 unsigned IndexReg = 0; 179 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); 180 if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) 181 continue; 182 ++NumSGPRGlobalSaddrs; 183 FuncModified = true; 184 // Create the new _SADDR Memory instruction. 185 bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; 186 MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); 187 MachineInstr *NewGlob = nullptr; 188 NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); 189 if (HasVdst) 190 NewGlob->addOperand(MF, MI.getOperand(0)); 191 NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); 192 if (VData) 193 NewGlob->addOperand(MF, *VData); 194 NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); 195 NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); 196 197 MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); 198 // Atomics dont have a GLC, so omit the field if not there. 199 if (Glc) 200 NewGlob->addOperand(MF, *Glc); 201 NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); 202 // _D16 have an vdst_in operand, copy it in. 203 MachineOperand *VDstInOp = TII->getNamedOperand(MI, 204 AMDGPU::OpName::vdst_in); 205 if (VDstInOp) 206 NewGlob->addOperand(MF, *VDstInOp); 207 NewGlob->copyImplicitOps(MF, MI); 208 NewGlob->cloneMemRefs(MF, MI); 209 // Remove the old Global Memop instruction. 210 MI.eraseFromParent(); 211 LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); 212 } 213 return FuncModified; 214 } 215 216 bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { 217 if (skipFunction(MF.getFunction())) 218 return false; 219 220 MachineRegisterInfo &MRI = MF.getRegInfo(); 221 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 222 const SIInstrInfo *TII = ST.getInstrInfo(); 223 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 224 225 bool FuncModified = false; 226 for (MachineBasicBlock &MBB : MF) { 227 // Cleanup missed Saddr opportunites from ISel. 228 FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); 229 } 230 return FuncModified; 231 } 232