1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/IR/Type.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 #define DEBUG_TYPE "amdgpu-isel" 30 31 using namespace llvm; 32 33 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 34 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) 35 : InstructionSelector(), TII(*STI.getInstrInfo()), 36 TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} 37 38 MachineOperand 39 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 40 unsigned SubIdx) const { 41 42 MachineInstr *MI = MO.getParent(); 43 MachineBasicBlock *BB = MO.getParent()->getParent(); 44 MachineFunction *MF = BB->getParent(); 45 MachineRegisterInfo &MRI = MF->getRegInfo(); 46 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 47 48 if (MO.isReg()) { 49 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 50 unsigned Reg = MO.getReg(); 51 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 52 .addReg(Reg, 0, ComposedSubIdx); 53 54 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 55 MO.isKill(), MO.isDead(), MO.isUndef(), 56 MO.isEarlyClobber(), 0, MO.isDebug(), 57 MO.isInternalRead()); 58 } 59 60 assert(MO.isImm()); 61 62 APInt Imm(64, MO.getImm()); 63 64 switch (SubIdx) { 65 default: 66 llvm_unreachable("do not know to split immediate with this sub index."); 67 case AMDGPU::sub0: 68 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 69 case AMDGPU::sub1: 70 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 71 } 72 } 73 74 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 75 MachineBasicBlock *BB = I.getParent(); 76 MachineFunction *MF = BB->getParent(); 77 MachineRegisterInfo &MRI = MF->getRegInfo(); 78 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 79 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 80 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 81 82 if (Size != 64) 83 return false; 84 85 DebugLoc DL = I.getDebugLoc(); 86 87 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 88 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 89 90 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 91 .add(Lo1) 92 .add(Lo2); 93 94 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 95 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 96 97 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 98 .add(Hi1) 99 .add(Hi2); 100 101 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 102 .addReg(DstLo) 103 .addImm(AMDGPU::sub0) 104 .addReg(DstHi) 105 .addImm(AMDGPU::sub1); 106 107 for (MachineOperand &MO : I.explicit_operands()) { 108 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 109 continue; 110 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 111 } 112 113 I.eraseFromParent(); 114 return true; 115 } 116 117 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 118 return selectG_ADD(I); 119 } 120 121 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 122 MachineBasicBlock *BB = I.getParent(); 123 DebugLoc DL = I.getDebugLoc(); 124 125 // FIXME: Select store instruction based on address space 126 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) 127 .add(I.getOperand(1)) 128 .add(I.getOperand(0)) 129 .addImm(0) 130 .addImm(0) 131 .addImm(0); 132 133 // Now that we selected an opcode, we need to constrain the register 134 // operands to use appropriate classes. 135 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 136 137 I.eraseFromParent(); 138 return Ret; 139 } 140 141 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 142 MachineBasicBlock *BB = I.getParent(); 143 MachineFunction *MF = BB->getParent(); 144 MachineRegisterInfo &MRI = MF->getRegInfo(); 145 unsigned DstReg = I.getOperand(0).getReg(); 146 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 147 148 if (Size == 32) { 149 I.setDesc(TII.get(AMDGPU::S_MOV_B32)); 150 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 151 } 152 153 assert(Size == 64); 154 155 DebugLoc DL = I.getDebugLoc(); 156 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 157 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 158 const APInt &Imm = I.getOperand(1).getCImm()->getValue(); 159 160 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) 161 .addImm(Imm.trunc(32).getZExtValue()); 162 163 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) 164 .addImm(Imm.ashr(32).getZExtValue()); 165 166 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 167 .addReg(LoReg) 168 .addImm(AMDGPU::sub0) 169 .addReg(HiReg) 170 .addImm(AMDGPU::sub1); 171 // We can't call constrainSelectedInstRegOperands here, because it doesn't 172 // work for target independent opcodes 173 I.eraseFromParent(); 174 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 175 } 176 177 static bool isConstant(const MachineInstr &MI) { 178 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 179 } 180 181 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 182 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 183 184 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 185 186 assert(PtrMI); 187 188 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 189 return; 190 191 GEPInfo GEPInfo(*PtrMI); 192 193 for (unsigned i = 1, e = 3; i < e; ++i) { 194 const MachineOperand &GEPOp = PtrMI->getOperand(i); 195 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 196 assert(OpDef); 197 if (isConstant(*OpDef)) { 198 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 199 // are lacking other optimizations. 200 assert(GEPInfo.Imm == 0); 201 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 202 continue; 203 } 204 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 205 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 206 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 207 else 208 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 209 } 210 211 AddrInfo.push_back(GEPInfo); 212 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 213 } 214 215 static bool isInstrUniform(const MachineInstr &MI) { 216 if (!MI.hasOneMemOperand()) 217 return false; 218 219 const MachineMemOperand *MMO = *MI.memoperands_begin(); 220 const Value *Ptr = MMO->getValue(); 221 222 // UndefValue means this is a load of a kernel input. These are uniform. 223 // Sometimes LDS instructions have constant pointers. 224 // If Ptr is null, then that means this mem operand contains a 225 // PseudoSourceValue like GOT. 226 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 227 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 228 return true; 229 230 const Instruction *I = dyn_cast<Instruction>(Ptr); 231 return I && I->getMetadata("amdgpu.uniform"); 232 } 233 234 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 235 236 if (LoadSize == 32) 237 return BaseOpcode; 238 239 switch (BaseOpcode) { 240 case AMDGPU::S_LOAD_DWORD_IMM: 241 switch (LoadSize) { 242 case 64: 243 return AMDGPU::S_LOAD_DWORDX2_IMM; 244 case 128: 245 return AMDGPU::S_LOAD_DWORDX4_IMM; 246 case 256: 247 return AMDGPU::S_LOAD_DWORDX8_IMM; 248 case 512: 249 return AMDGPU::S_LOAD_DWORDX16_IMM; 250 } 251 break; 252 case AMDGPU::S_LOAD_DWORD_IMM_ci: 253 switch (LoadSize) { 254 case 64: 255 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 256 case 128: 257 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 258 case 256: 259 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 260 case 512: 261 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 262 } 263 break; 264 case AMDGPU::S_LOAD_DWORD_SGPR: 265 switch (LoadSize) { 266 case 64: 267 return AMDGPU::S_LOAD_DWORDX2_SGPR; 268 case 128: 269 return AMDGPU::S_LOAD_DWORDX4_SGPR; 270 case 256: 271 return AMDGPU::S_LOAD_DWORDX8_SGPR; 272 case 512: 273 return AMDGPU::S_LOAD_DWORDX16_SGPR; 274 } 275 break; 276 } 277 llvm_unreachable("Invalid base smrd opcode or size"); 278 } 279 280 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 281 for (const GEPInfo &GEPInfo : AddrInfo) { 282 if (!GEPInfo.VgprParts.empty()) 283 return true; 284 } 285 return false; 286 } 287 288 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 289 ArrayRef<GEPInfo> AddrInfo) const { 290 291 if (!I.hasOneMemOperand()) 292 return false; 293 294 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) 295 return false; 296 297 if (!isInstrUniform(I)) 298 return false; 299 300 if (hasVgprParts(AddrInfo)) 301 return false; 302 303 MachineBasicBlock *BB = I.getParent(); 304 MachineFunction *MF = BB->getParent(); 305 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 306 MachineRegisterInfo &MRI = MF->getRegInfo(); 307 unsigned DstReg = I.getOperand(0).getReg(); 308 const DebugLoc &DL = I.getDebugLoc(); 309 unsigned Opcode; 310 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 311 312 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 313 314 const GEPInfo &GEPInfo = AddrInfo[0]; 315 316 unsigned PtrReg = GEPInfo.SgprParts[0]; 317 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 318 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 319 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 320 321 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 322 .addReg(PtrReg) 323 .addImm(EncodedImm) 324 .addImm(0); // glc 325 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 326 } 327 328 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 329 isUInt<32>(EncodedImm)) { 330 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 331 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 332 .addReg(PtrReg) 333 .addImm(EncodedImm) 334 .addImm(0); // glc 335 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 336 } 337 338 if (isUInt<32>(GEPInfo.Imm)) { 339 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 340 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 341 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 342 .addImm(GEPInfo.Imm); 343 344 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 345 .addReg(PtrReg) 346 .addReg(OffsetReg) 347 .addImm(0); // glc 348 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 349 } 350 } 351 352 unsigned PtrReg = I.getOperand(1).getReg(); 353 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 354 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 355 .addReg(PtrReg) 356 .addImm(0) 357 .addImm(0); // glc 358 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 359 } 360 361 362 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 363 MachineBasicBlock *BB = I.getParent(); 364 MachineFunction *MF = BB->getParent(); 365 MachineRegisterInfo &MRI = MF->getRegInfo(); 366 DebugLoc DL = I.getDebugLoc(); 367 unsigned DstReg = I.getOperand(0).getReg(); 368 unsigned PtrReg = I.getOperand(1).getReg(); 369 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 370 unsigned Opcode; 371 372 SmallVector<GEPInfo, 4> AddrInfo; 373 374 getAddrModeInfo(I, MRI, AddrInfo); 375 376 if (selectSMRD(I, AddrInfo)) { 377 I.eraseFromParent(); 378 return true; 379 } 380 381 switch (LoadSize) { 382 default: 383 llvm_unreachable("Load size not supported\n"); 384 case 32: 385 Opcode = AMDGPU::FLAT_LOAD_DWORD; 386 break; 387 case 64: 388 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 389 break; 390 } 391 392 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 393 .add(I.getOperand(0)) 394 .addReg(PtrReg) 395 .addImm(0) 396 .addImm(0) 397 .addImm(0); 398 399 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 400 I.eraseFromParent(); 401 return Ret; 402 } 403 404 bool AMDGPUInstructionSelector::select(MachineInstr &I) const { 405 406 if (!isPreISelGenericOpcode(I.getOpcode())) 407 return true; 408 409 switch (I.getOpcode()) { 410 default: 411 break; 412 case TargetOpcode::G_ADD: 413 return selectG_ADD(I); 414 case TargetOpcode::G_CONSTANT: 415 return selectG_CONSTANT(I); 416 case TargetOpcode::G_GEP: 417 return selectG_GEP(I); 418 case TargetOpcode::G_LOAD: 419 return selectG_LOAD(I); 420 case TargetOpcode::G_STORE: 421 return selectG_STORE(I); 422 } 423 return false; 424 } 425