1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "AMDGPUTargetMachine.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 39 #define GET_GLOBALISEL_IMPL 40 #define AMDGPUSubtarget GCNSubtarget 41 #include "AMDGPUGenGlobalISel.inc" 42 #undef GET_GLOBALISEL_IMPL 43 #undef AMDGPUSubtarget 44 45 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 46 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 47 const AMDGPUTargetMachine &TM) 48 : InstructionSelector(), TII(*STI.getInstrInfo()), 49 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 50 STI(STI), 51 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 52 #define GET_GLOBALISEL_PREDICATES_INIT 53 #include "AMDGPUGenGlobalISel.inc" 54 #undef GET_GLOBALISEL_PREDICATES_INIT 55 #define GET_GLOBALISEL_TEMPORARIES_INIT 56 #include "AMDGPUGenGlobalISel.inc" 57 #undef GET_GLOBALISEL_TEMPORARIES_INIT 58 { 59 } 60 61 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 62 63 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 64 MachineBasicBlock *BB = I.getParent(); 65 MachineFunction *MF = BB->getParent(); 66 MachineRegisterInfo &MRI = MF->getRegInfo(); 67 I.setDesc(TII.get(TargetOpcode::COPY)); 68 for (const MachineOperand &MO : I.operands()) { 69 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 70 continue; 71 72 const TargetRegisterClass *RC = 73 TRI.getConstrainedRegClassForOperand(MO, MRI); 74 if (!RC) 75 continue; 76 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 77 } 78 return true; 79 } 80 81 MachineOperand 82 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 83 unsigned SubIdx) const { 84 85 MachineInstr *MI = MO.getParent(); 86 MachineBasicBlock *BB = MO.getParent()->getParent(); 87 MachineFunction *MF = BB->getParent(); 88 MachineRegisterInfo &MRI = MF->getRegInfo(); 89 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 90 91 if (MO.isReg()) { 92 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 93 unsigned Reg = MO.getReg(); 94 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 95 .addReg(Reg, 0, ComposedSubIdx); 96 97 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 98 MO.isKill(), MO.isDead(), MO.isUndef(), 99 MO.isEarlyClobber(), 0, MO.isDebug(), 100 MO.isInternalRead()); 101 } 102 103 assert(MO.isImm()); 104 105 APInt Imm(64, MO.getImm()); 106 107 switch (SubIdx) { 108 default: 109 llvm_unreachable("do not know to split immediate with this sub index."); 110 case AMDGPU::sub0: 111 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 112 case AMDGPU::sub1: 113 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 114 } 115 } 116 117 static int64_t getConstant(const MachineInstr *MI) { 118 return MI->getOperand(1).getCImm()->getSExtValue(); 119 } 120 121 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 122 MachineBasicBlock *BB = I.getParent(); 123 MachineFunction *MF = BB->getParent(); 124 MachineRegisterInfo &MRI = MF->getRegInfo(); 125 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 126 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 127 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 128 129 if (Size != 64) 130 return false; 131 132 DebugLoc DL = I.getDebugLoc(); 133 134 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 135 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 136 137 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 138 .add(Lo1) 139 .add(Lo2); 140 141 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 142 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 143 144 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 145 .add(Hi1) 146 .add(Hi2); 147 148 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 149 .addReg(DstLo) 150 .addImm(AMDGPU::sub0) 151 .addReg(DstHi) 152 .addImm(AMDGPU::sub1); 153 154 for (MachineOperand &MO : I.explicit_operands()) { 155 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 156 continue; 157 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 158 } 159 160 I.eraseFromParent(); 161 return true; 162 } 163 164 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 165 return selectG_ADD(I); 166 } 167 168 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 169 MachineBasicBlock *BB = I.getParent(); 170 MachineFunction *MF = BB->getParent(); 171 MachineRegisterInfo &MRI = MF->getRegInfo(); 172 const MachineOperand &MO = I.getOperand(0); 173 const TargetRegisterClass *RC = 174 TRI.getConstrainedRegClassForOperand(MO, MRI); 175 if (RC) 176 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 177 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 178 return true; 179 } 180 181 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, 182 CodeGenCoverage &CoverageInfo) const { 183 unsigned IntrinsicID = I.getOperand(1).getIntrinsicID(); 184 185 switch (IntrinsicID) { 186 default: 187 break; 188 case Intrinsic::maxnum: 189 case Intrinsic::minnum: 190 case Intrinsic::amdgcn_cvt_pkrtz: 191 return selectImpl(I, CoverageInfo); 192 193 case Intrinsic::amdgcn_kernarg_segment_ptr: { 194 MachineFunction *MF = I.getParent()->getParent(); 195 MachineRegisterInfo &MRI = MF->getRegInfo(); 196 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 197 const ArgDescriptor *InputPtrReg; 198 const TargetRegisterClass *RC; 199 const DebugLoc &DL = I.getDebugLoc(); 200 201 std::tie(InputPtrReg, RC) 202 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 203 if (!InputPtrReg) 204 report_fatal_error("missing kernarg segment ptr"); 205 206 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY)) 207 .add(I.getOperand(0)) 208 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister())); 209 I.eraseFromParent(); 210 return true; 211 } 212 } 213 return false; 214 } 215 216 static MachineInstr * 217 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 218 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 219 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 220 const DebugLoc &DL = Insert->getDebugLoc(); 221 MachineBasicBlock &BB = *Insert->getParent(); 222 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 223 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 224 .addImm(Tgt) 225 .addReg(Reg0) 226 .addReg(Reg1) 227 .addReg(Reg2) 228 .addReg(Reg3) 229 .addImm(VM) 230 .addImm(Compr) 231 .addImm(Enabled); 232 } 233 234 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 235 MachineInstr &I, 236 CodeGenCoverage &CoverageInfo) const { 237 MachineBasicBlock *BB = I.getParent(); 238 MachineFunction *MF = BB->getParent(); 239 MachineRegisterInfo &MRI = MF->getRegInfo(); 240 241 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 242 switch (IntrinsicID) { 243 case Intrinsic::amdgcn_exp: { 244 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 245 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 246 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 247 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 248 249 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 250 I.getOperand(4).getReg(), 251 I.getOperand(5).getReg(), 252 I.getOperand(6).getReg(), 253 VM, false, Enabled, Done); 254 255 I.eraseFromParent(); 256 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 257 } 258 case Intrinsic::amdgcn_exp_compr: { 259 const DebugLoc &DL = I.getDebugLoc(); 260 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 261 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 262 unsigned Reg0 = I.getOperand(3).getReg(); 263 unsigned Reg1 = I.getOperand(4).getReg(); 264 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 265 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 266 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 267 268 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 269 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 270 true, Enabled, Done); 271 272 I.eraseFromParent(); 273 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 274 } 275 } 276 return false; 277 } 278 279 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 280 MachineBasicBlock *BB = I.getParent(); 281 MachineFunction *MF = BB->getParent(); 282 MachineRegisterInfo &MRI = MF->getRegInfo(); 283 DebugLoc DL = I.getDebugLoc(); 284 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 285 unsigned Opcode; 286 287 // FIXME: Select store instruction based on address space 288 switch (StoreSize) { 289 default: 290 return false; 291 case 32: 292 Opcode = AMDGPU::FLAT_STORE_DWORD; 293 break; 294 case 64: 295 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 296 break; 297 case 96: 298 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 299 break; 300 case 128: 301 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 302 break; 303 } 304 305 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 306 .add(I.getOperand(1)) 307 .add(I.getOperand(0)) 308 .addImm(0) // offset 309 .addImm(0) // glc 310 .addImm(0); // slc 311 312 313 // Now that we selected an opcode, we need to constrain the register 314 // operands to use appropriate classes. 315 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 316 317 I.eraseFromParent(); 318 return Ret; 319 } 320 321 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 322 MachineBasicBlock *BB = I.getParent(); 323 MachineFunction *MF = BB->getParent(); 324 MachineRegisterInfo &MRI = MF->getRegInfo(); 325 MachineOperand &ImmOp = I.getOperand(1); 326 327 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 328 if (ImmOp.isFPImm()) { 329 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 330 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 331 } else if (ImmOp.isCImm()) { 332 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 333 } 334 335 unsigned DstReg = I.getOperand(0).getReg(); 336 unsigned Size; 337 bool IsSgpr; 338 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 339 if (RB) { 340 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 341 Size = MRI.getType(DstReg).getSizeInBits(); 342 } else { 343 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 344 IsSgpr = TRI.isSGPRClass(RC); 345 Size = TRI.getRegSizeInBits(*RC); 346 } 347 348 if (Size != 32 && Size != 64) 349 return false; 350 351 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 352 if (Size == 32) { 353 I.setDesc(TII.get(Opcode)); 354 I.addImplicitDefUseOperands(*MF); 355 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 356 } 357 358 DebugLoc DL = I.getDebugLoc(); 359 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 360 &AMDGPU::VGPR_32RegClass; 361 unsigned LoReg = MRI.createVirtualRegister(RC); 362 unsigned HiReg = MRI.createVirtualRegister(RC); 363 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 364 365 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 366 .addImm(Imm.trunc(32).getZExtValue()); 367 368 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 369 .addImm(Imm.ashr(32).getZExtValue()); 370 371 const MachineInstr *RS = 372 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 373 .addReg(LoReg) 374 .addImm(AMDGPU::sub0) 375 .addReg(HiReg) 376 .addImm(AMDGPU::sub1); 377 378 // We can't call constrainSelectedInstRegOperands here, because it doesn't 379 // work for target independent opcodes 380 I.eraseFromParent(); 381 const TargetRegisterClass *DstRC = 382 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 383 if (!DstRC) 384 return true; 385 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 386 } 387 388 static bool isConstant(const MachineInstr &MI) { 389 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 390 } 391 392 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 393 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 394 395 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 396 397 assert(PtrMI); 398 399 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 400 return; 401 402 GEPInfo GEPInfo(*PtrMI); 403 404 for (unsigned i = 1, e = 3; i < e; ++i) { 405 const MachineOperand &GEPOp = PtrMI->getOperand(i); 406 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 407 assert(OpDef); 408 if (isConstant(*OpDef)) { 409 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 410 // are lacking other optimizations. 411 assert(GEPInfo.Imm == 0); 412 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 413 continue; 414 } 415 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 416 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 417 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 418 else 419 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 420 } 421 422 AddrInfo.push_back(GEPInfo); 423 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 424 } 425 426 static bool isInstrUniform(const MachineInstr &MI) { 427 if (!MI.hasOneMemOperand()) 428 return false; 429 430 const MachineMemOperand *MMO = *MI.memoperands_begin(); 431 const Value *Ptr = MMO->getValue(); 432 433 // UndefValue means this is a load of a kernel input. These are uniform. 434 // Sometimes LDS instructions have constant pointers. 435 // If Ptr is null, then that means this mem operand contains a 436 // PseudoSourceValue like GOT. 437 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 438 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 439 return true; 440 441 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 442 return true; 443 444 const Instruction *I = dyn_cast<Instruction>(Ptr); 445 return I && I->getMetadata("amdgpu.uniform"); 446 } 447 448 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 449 450 if (LoadSize == 32) 451 return BaseOpcode; 452 453 switch (BaseOpcode) { 454 case AMDGPU::S_LOAD_DWORD_IMM: 455 switch (LoadSize) { 456 case 64: 457 return AMDGPU::S_LOAD_DWORDX2_IMM; 458 case 128: 459 return AMDGPU::S_LOAD_DWORDX4_IMM; 460 case 256: 461 return AMDGPU::S_LOAD_DWORDX8_IMM; 462 case 512: 463 return AMDGPU::S_LOAD_DWORDX16_IMM; 464 } 465 break; 466 case AMDGPU::S_LOAD_DWORD_IMM_ci: 467 switch (LoadSize) { 468 case 64: 469 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 470 case 128: 471 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 472 case 256: 473 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 474 case 512: 475 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 476 } 477 break; 478 case AMDGPU::S_LOAD_DWORD_SGPR: 479 switch (LoadSize) { 480 case 64: 481 return AMDGPU::S_LOAD_DWORDX2_SGPR; 482 case 128: 483 return AMDGPU::S_LOAD_DWORDX4_SGPR; 484 case 256: 485 return AMDGPU::S_LOAD_DWORDX8_SGPR; 486 case 512: 487 return AMDGPU::S_LOAD_DWORDX16_SGPR; 488 } 489 break; 490 } 491 llvm_unreachable("Invalid base smrd opcode or size"); 492 } 493 494 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 495 for (const GEPInfo &GEPInfo : AddrInfo) { 496 if (!GEPInfo.VgprParts.empty()) 497 return true; 498 } 499 return false; 500 } 501 502 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 503 ArrayRef<GEPInfo> AddrInfo) const { 504 505 if (!I.hasOneMemOperand()) 506 return false; 507 508 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS && 509 (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT) 510 return false; 511 512 if (!isInstrUniform(I)) 513 return false; 514 515 if (hasVgprParts(AddrInfo)) 516 return false; 517 518 MachineBasicBlock *BB = I.getParent(); 519 MachineFunction *MF = BB->getParent(); 520 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); 521 MachineRegisterInfo &MRI = MF->getRegInfo(); 522 unsigned DstReg = I.getOperand(0).getReg(); 523 const DebugLoc &DL = I.getDebugLoc(); 524 unsigned Opcode; 525 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 526 527 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 528 529 const GEPInfo &GEPInfo = AddrInfo[0]; 530 531 unsigned PtrReg = GEPInfo.SgprParts[0]; 532 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 533 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 534 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 535 536 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 537 .addReg(PtrReg) 538 .addImm(EncodedImm) 539 .addImm(0); // glc 540 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 541 } 542 543 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 544 isUInt<32>(EncodedImm)) { 545 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 546 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 547 .addReg(PtrReg) 548 .addImm(EncodedImm) 549 .addImm(0); // glc 550 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 551 } 552 553 if (isUInt<32>(GEPInfo.Imm)) { 554 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 555 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 556 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 557 .addImm(GEPInfo.Imm); 558 559 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 560 .addReg(PtrReg) 561 .addReg(OffsetReg) 562 .addImm(0); // glc 563 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 564 } 565 } 566 567 unsigned PtrReg = I.getOperand(1).getReg(); 568 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 569 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 570 .addReg(PtrReg) 571 .addImm(0) 572 .addImm(0); // glc 573 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 574 } 575 576 577 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 578 MachineBasicBlock *BB = I.getParent(); 579 MachineFunction *MF = BB->getParent(); 580 MachineRegisterInfo &MRI = MF->getRegInfo(); 581 DebugLoc DL = I.getDebugLoc(); 582 unsigned DstReg = I.getOperand(0).getReg(); 583 unsigned PtrReg = I.getOperand(1).getReg(); 584 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 585 unsigned Opcode; 586 587 SmallVector<GEPInfo, 4> AddrInfo; 588 589 getAddrModeInfo(I, MRI, AddrInfo); 590 591 if (selectSMRD(I, AddrInfo)) { 592 I.eraseFromParent(); 593 return true; 594 } 595 596 switch (LoadSize) { 597 default: 598 llvm_unreachable("Load size not supported\n"); 599 case 32: 600 Opcode = AMDGPU::FLAT_LOAD_DWORD; 601 break; 602 case 64: 603 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 604 break; 605 } 606 607 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 608 .add(I.getOperand(0)) 609 .addReg(PtrReg) 610 .addImm(0) // offset 611 .addImm(0) // glc 612 .addImm(0); // slc 613 614 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 615 I.eraseFromParent(); 616 return Ret; 617 } 618 619 bool AMDGPUInstructionSelector::select(MachineInstr &I, 620 CodeGenCoverage &CoverageInfo) const { 621 622 if (!isPreISelGenericOpcode(I.getOpcode())) { 623 if (I.isCopy()) 624 return selectCOPY(I); 625 return true; 626 } 627 628 switch (I.getOpcode()) { 629 default: 630 return selectImpl(I, CoverageInfo); 631 case TargetOpcode::G_ADD: 632 return selectG_ADD(I); 633 case TargetOpcode::G_BITCAST: 634 return selectCOPY(I); 635 case TargetOpcode::G_CONSTANT: 636 case TargetOpcode::G_FCONSTANT: 637 return selectG_CONSTANT(I); 638 case TargetOpcode::G_GEP: 639 return selectG_GEP(I); 640 case TargetOpcode::G_IMPLICIT_DEF: 641 return selectG_IMPLICIT_DEF(I); 642 case TargetOpcode::G_INTRINSIC: 643 return selectG_INTRINSIC(I, CoverageInfo); 644 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 645 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 646 case TargetOpcode::G_LOAD: 647 return selectG_LOAD(I); 648 case TargetOpcode::G_STORE: 649 return selectG_STORE(I); 650 } 651 return false; 652 } 653 654 InstructionSelector::ComplexRendererFns 655 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 656 return {{ 657 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 658 }}; 659 660 } 661 662 /// 663 /// This will select either an SGPR or VGPR operand and will save us from 664 /// having to write an extra tablegen pattern. 665 InstructionSelector::ComplexRendererFns 666 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 667 return {{ 668 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 669 }}; 670 } 671 672 InstructionSelector::ComplexRendererFns 673 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 674 return {{ 675 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 676 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 677 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 678 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 679 }}; 680 } 681 InstructionSelector::ComplexRendererFns 682 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 683 return {{ 684 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 685 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 686 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 687 }}; 688 } 689 690 InstructionSelector::ComplexRendererFns 691 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 692 return {{ 693 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 694 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 695 }}; 696 } 697