1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/IR/Type.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 #define DEBUG_TYPE "amdgpu-isel" 30 31 using namespace llvm; 32 33 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 34 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) 35 : InstructionSelector(), TII(*STI.getInstrInfo()), 36 TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} 37 38 MachineOperand 39 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 40 unsigned SubIdx) const { 41 42 MachineInstr *MI = MO.getParent(); 43 MachineBasicBlock *BB = MO.getParent()->getParent(); 44 MachineFunction *MF = BB->getParent(); 45 MachineRegisterInfo &MRI = MF->getRegInfo(); 46 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 47 48 if (MO.isReg()) { 49 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 50 unsigned Reg = MO.getReg(); 51 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 52 .addReg(Reg, 0, ComposedSubIdx); 53 54 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 55 MO.isKill(), MO.isDead(), MO.isUndef(), 56 MO.isEarlyClobber(), 0, MO.isDebug(), 57 MO.isInternalRead()); 58 } 59 60 assert(MO.isImm()); 61 62 APInt Imm(64, MO.getImm()); 63 64 switch (SubIdx) { 65 default: 66 llvm_unreachable("do not know to split immediate with this sub index."); 67 case AMDGPU::sub0: 68 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 69 case AMDGPU::sub1: 70 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 71 } 72 } 73 74 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 75 MachineBasicBlock *BB = I.getParent(); 76 MachineFunction *MF = BB->getParent(); 77 MachineRegisterInfo &MRI = MF->getRegInfo(); 78 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 79 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 80 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 81 82 if (Size != 64) 83 return false; 84 85 DebugLoc DL = I.getDebugLoc(); 86 87 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 88 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 89 90 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 91 .add(Lo1) 92 .add(Lo2); 93 94 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 95 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 96 97 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 98 .add(Hi1) 99 .add(Hi2); 100 101 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 102 .addReg(DstLo) 103 .addImm(AMDGPU::sub0) 104 .addReg(DstHi) 105 .addImm(AMDGPU::sub1); 106 107 for (MachineOperand &MO : I.explicit_operands()) { 108 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 109 continue; 110 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 111 } 112 113 I.eraseFromParent(); 114 return true; 115 } 116 117 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 118 return selectG_ADD(I); 119 } 120 121 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 122 MachineBasicBlock *BB = I.getParent(); 123 DebugLoc DL = I.getDebugLoc(); 124 125 // FIXME: Select store instruction based on address space 126 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) 127 .add(I.getOperand(1)) 128 .add(I.getOperand(0)) 129 .addImm(0) // offset 130 .addImm(0) // glc 131 .addImm(0); // slc 132 133 134 // Now that we selected an opcode, we need to constrain the register 135 // operands to use appropriate classes. 136 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 137 138 I.eraseFromParent(); 139 return Ret; 140 } 141 142 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 143 MachineBasicBlock *BB = I.getParent(); 144 MachineFunction *MF = BB->getParent(); 145 MachineRegisterInfo &MRI = MF->getRegInfo(); 146 unsigned DstReg = I.getOperand(0).getReg(); 147 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 148 149 if (Size == 32) { 150 I.setDesc(TII.get(AMDGPU::S_MOV_B32)); 151 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 152 } 153 154 assert(Size == 64); 155 156 DebugLoc DL = I.getDebugLoc(); 157 unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 158 unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 159 const APInt &Imm = I.getOperand(1).getCImm()->getValue(); 160 161 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) 162 .addImm(Imm.trunc(32).getZExtValue()); 163 164 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) 165 .addImm(Imm.ashr(32).getZExtValue()); 166 167 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 168 .addReg(LoReg) 169 .addImm(AMDGPU::sub0) 170 .addReg(HiReg) 171 .addImm(AMDGPU::sub1); 172 // We can't call constrainSelectedInstRegOperands here, because it doesn't 173 // work for target independent opcodes 174 I.eraseFromParent(); 175 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 176 } 177 178 static bool isConstant(const MachineInstr &MI) { 179 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 180 } 181 182 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 183 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 184 185 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 186 187 assert(PtrMI); 188 189 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 190 return; 191 192 GEPInfo GEPInfo(*PtrMI); 193 194 for (unsigned i = 1, e = 3; i < e; ++i) { 195 const MachineOperand &GEPOp = PtrMI->getOperand(i); 196 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 197 assert(OpDef); 198 if (isConstant(*OpDef)) { 199 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 200 // are lacking other optimizations. 201 assert(GEPInfo.Imm == 0); 202 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 203 continue; 204 } 205 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 206 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 207 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 208 else 209 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 210 } 211 212 AddrInfo.push_back(GEPInfo); 213 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 214 } 215 216 static bool isInstrUniform(const MachineInstr &MI) { 217 if (!MI.hasOneMemOperand()) 218 return false; 219 220 const MachineMemOperand *MMO = *MI.memoperands_begin(); 221 const Value *Ptr = MMO->getValue(); 222 223 // UndefValue means this is a load of a kernel input. These are uniform. 224 // Sometimes LDS instructions have constant pointers. 225 // If Ptr is null, then that means this mem operand contains a 226 // PseudoSourceValue like GOT. 227 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 228 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 229 return true; 230 231 const Instruction *I = dyn_cast<Instruction>(Ptr); 232 return I && I->getMetadata("amdgpu.uniform"); 233 } 234 235 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 236 237 if (LoadSize == 32) 238 return BaseOpcode; 239 240 switch (BaseOpcode) { 241 case AMDGPU::S_LOAD_DWORD_IMM: 242 switch (LoadSize) { 243 case 64: 244 return AMDGPU::S_LOAD_DWORDX2_IMM; 245 case 128: 246 return AMDGPU::S_LOAD_DWORDX4_IMM; 247 case 256: 248 return AMDGPU::S_LOAD_DWORDX8_IMM; 249 case 512: 250 return AMDGPU::S_LOAD_DWORDX16_IMM; 251 } 252 break; 253 case AMDGPU::S_LOAD_DWORD_IMM_ci: 254 switch (LoadSize) { 255 case 64: 256 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 257 case 128: 258 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 259 case 256: 260 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 261 case 512: 262 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 263 } 264 break; 265 case AMDGPU::S_LOAD_DWORD_SGPR: 266 switch (LoadSize) { 267 case 64: 268 return AMDGPU::S_LOAD_DWORDX2_SGPR; 269 case 128: 270 return AMDGPU::S_LOAD_DWORDX4_SGPR; 271 case 256: 272 return AMDGPU::S_LOAD_DWORDX8_SGPR; 273 case 512: 274 return AMDGPU::S_LOAD_DWORDX16_SGPR; 275 } 276 break; 277 } 278 llvm_unreachable("Invalid base smrd opcode or size"); 279 } 280 281 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 282 for (const GEPInfo &GEPInfo : AddrInfo) { 283 if (!GEPInfo.VgprParts.empty()) 284 return true; 285 } 286 return false; 287 } 288 289 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 290 ArrayRef<GEPInfo> AddrInfo) const { 291 292 if (!I.hasOneMemOperand()) 293 return false; 294 295 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) 296 return false; 297 298 if (!isInstrUniform(I)) 299 return false; 300 301 if (hasVgprParts(AddrInfo)) 302 return false; 303 304 MachineBasicBlock *BB = I.getParent(); 305 MachineFunction *MF = BB->getParent(); 306 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 307 MachineRegisterInfo &MRI = MF->getRegInfo(); 308 unsigned DstReg = I.getOperand(0).getReg(); 309 const DebugLoc &DL = I.getDebugLoc(); 310 unsigned Opcode; 311 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 312 313 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 314 315 const GEPInfo &GEPInfo = AddrInfo[0]; 316 317 unsigned PtrReg = GEPInfo.SgprParts[0]; 318 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 319 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 320 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 321 322 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 323 .addReg(PtrReg) 324 .addImm(EncodedImm) 325 .addImm(0); // glc 326 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 327 } 328 329 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 330 isUInt<32>(EncodedImm)) { 331 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 332 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 333 .addReg(PtrReg) 334 .addImm(EncodedImm) 335 .addImm(0); // glc 336 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 337 } 338 339 if (isUInt<32>(GEPInfo.Imm)) { 340 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 341 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 342 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 343 .addImm(GEPInfo.Imm); 344 345 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 346 .addReg(PtrReg) 347 .addReg(OffsetReg) 348 .addImm(0); // glc 349 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 350 } 351 } 352 353 unsigned PtrReg = I.getOperand(1).getReg(); 354 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 355 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 356 .addReg(PtrReg) 357 .addImm(0) 358 .addImm(0); // glc 359 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 360 } 361 362 363 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 364 MachineBasicBlock *BB = I.getParent(); 365 MachineFunction *MF = BB->getParent(); 366 MachineRegisterInfo &MRI = MF->getRegInfo(); 367 DebugLoc DL = I.getDebugLoc(); 368 unsigned DstReg = I.getOperand(0).getReg(); 369 unsigned PtrReg = I.getOperand(1).getReg(); 370 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 371 unsigned Opcode; 372 373 SmallVector<GEPInfo, 4> AddrInfo; 374 375 getAddrModeInfo(I, MRI, AddrInfo); 376 377 if (selectSMRD(I, AddrInfo)) { 378 I.eraseFromParent(); 379 return true; 380 } 381 382 switch (LoadSize) { 383 default: 384 llvm_unreachable("Load size not supported\n"); 385 case 32: 386 Opcode = AMDGPU::FLAT_LOAD_DWORD; 387 break; 388 case 64: 389 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 390 break; 391 } 392 393 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 394 .add(I.getOperand(0)) 395 .addReg(PtrReg) 396 .addImm(0) // offset 397 .addImm(0) // glc 398 .addImm(0); // slc 399 400 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 401 I.eraseFromParent(); 402 return Ret; 403 } 404 405 bool AMDGPUInstructionSelector::select(MachineInstr &I) const { 406 407 if (!isPreISelGenericOpcode(I.getOpcode())) 408 return true; 409 410 switch (I.getOpcode()) { 411 default: 412 break; 413 case TargetOpcode::G_ADD: 414 return selectG_ADD(I); 415 case TargetOpcode::G_CONSTANT: 416 return selectG_CONSTANT(I); 417 case TargetOpcode::G_GEP: 418 return selectG_GEP(I); 419 case TargetOpcode::G_LOAD: 420 return selectG_LOAD(I); 421 case TargetOpcode::G_STORE: 422 return selectG_STORE(I); 423 } 424 return false; 425 } 426