1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "AMDGPUTargetMachine.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 39 #define GET_GLOBALISEL_IMPL 40 #include "AMDGPUGenGlobalISel.inc" 41 #undef GET_GLOBALISEL_IMPL 42 43 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 44 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, 45 const AMDGPUTargetMachine &TM) 46 : InstructionSelector(), TII(*STI.getInstrInfo()), 47 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 48 STI(STI), 49 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 50 #define GET_GLOBALISEL_PREDICATES_INIT 51 #include "AMDGPUGenGlobalISel.inc" 52 #undef GET_GLOBALISEL_PREDICATES_INIT 53 #define GET_GLOBALISEL_TEMPORARIES_INIT 54 #include "AMDGPUGenGlobalISel.inc" 55 #undef GET_GLOBALISEL_TEMPORARIES_INIT 56 ,AMDGPUASI(STI.getAMDGPUAS()) 57 { 58 } 59 60 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 61 62 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 63 MachineBasicBlock *BB = I.getParent(); 64 MachineFunction *MF = BB->getParent(); 65 MachineRegisterInfo &MRI = MF->getRegInfo(); 66 I.setDesc(TII.get(TargetOpcode::COPY)); 67 for (const MachineOperand &MO : I.operands()) { 68 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 69 continue; 70 71 const TargetRegisterClass *RC = 72 TRI.getConstrainedRegClassForOperand(MO, MRI); 73 if (!RC) 74 continue; 75 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 76 } 77 return true; 78 } 79 80 MachineOperand 81 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 82 unsigned SubIdx) const { 83 84 MachineInstr *MI = MO.getParent(); 85 MachineBasicBlock *BB = MO.getParent()->getParent(); 86 MachineFunction *MF = BB->getParent(); 87 MachineRegisterInfo &MRI = MF->getRegInfo(); 88 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 89 90 if (MO.isReg()) { 91 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 92 unsigned Reg = MO.getReg(); 93 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 94 .addReg(Reg, 0, ComposedSubIdx); 95 96 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 97 MO.isKill(), MO.isDead(), MO.isUndef(), 98 MO.isEarlyClobber(), 0, MO.isDebug(), 99 MO.isInternalRead()); 100 } 101 102 assert(MO.isImm()); 103 104 APInt Imm(64, MO.getImm()); 105 106 switch (SubIdx) { 107 default: 108 llvm_unreachable("do not know to split immediate with this sub index."); 109 case AMDGPU::sub0: 110 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 111 case AMDGPU::sub1: 112 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 113 } 114 } 115 116 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 117 MachineBasicBlock *BB = I.getParent(); 118 MachineFunction *MF = BB->getParent(); 119 MachineRegisterInfo &MRI = MF->getRegInfo(); 120 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 121 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 122 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 123 124 if (Size != 64) 125 return false; 126 127 DebugLoc DL = I.getDebugLoc(); 128 129 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 130 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 131 132 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 133 .add(Lo1) 134 .add(Lo2); 135 136 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 137 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 138 139 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 140 .add(Hi1) 141 .add(Hi2); 142 143 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 144 .addReg(DstLo) 145 .addImm(AMDGPU::sub0) 146 .addReg(DstHi) 147 .addImm(AMDGPU::sub1); 148 149 for (MachineOperand &MO : I.explicit_operands()) { 150 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 151 continue; 152 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 153 } 154 155 I.eraseFromParent(); 156 return true; 157 } 158 159 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 160 return selectG_ADD(I); 161 } 162 163 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 164 MachineBasicBlock *BB = I.getParent(); 165 MachineFunction *MF = BB->getParent(); 166 MachineRegisterInfo &MRI = MF->getRegInfo(); 167 const MachineOperand &MO = I.getOperand(0); 168 const TargetRegisterClass *RC = 169 TRI.getConstrainedRegClassForOperand(MO, MRI); 170 if (RC) 171 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 172 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 173 return true; 174 } 175 176 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, 177 CodeGenCoverage &CoverageInfo) const { 178 unsigned IntrinsicID = I.getOperand(1).getIntrinsicID(); 179 180 switch (IntrinsicID) { 181 default: 182 break; 183 case Intrinsic::amdgcn_cvt_pkrtz: 184 return selectImpl(I, CoverageInfo); 185 186 case Intrinsic::amdgcn_kernarg_segment_ptr: { 187 MachineFunction *MF = I.getParent()->getParent(); 188 MachineRegisterInfo &MRI = MF->getRegInfo(); 189 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 190 const ArgDescriptor *InputPtrReg; 191 const TargetRegisterClass *RC; 192 const DebugLoc &DL = I.getDebugLoc(); 193 194 std::tie(InputPtrReg, RC) 195 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 196 if (!InputPtrReg) 197 report_fatal_error("missing kernarg segment ptr"); 198 199 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY)) 200 .add(I.getOperand(0)) 201 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister())); 202 I.eraseFromParent(); 203 return true; 204 } 205 } 206 return false; 207 } 208 209 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 210 MachineBasicBlock *BB = I.getParent(); 211 MachineFunction *MF = BB->getParent(); 212 MachineRegisterInfo &MRI = MF->getRegInfo(); 213 DebugLoc DL = I.getDebugLoc(); 214 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 215 unsigned Opcode; 216 217 // FIXME: Select store instruction based on address space 218 switch (StoreSize) { 219 default: 220 return false; 221 case 32: 222 Opcode = AMDGPU::FLAT_STORE_DWORD; 223 break; 224 case 64: 225 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 226 break; 227 case 96: 228 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 229 break; 230 case 128: 231 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 232 break; 233 } 234 235 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 236 .add(I.getOperand(1)) 237 .add(I.getOperand(0)) 238 .addImm(0) // offset 239 .addImm(0) // glc 240 .addImm(0); // slc 241 242 243 // Now that we selected an opcode, we need to constrain the register 244 // operands to use appropriate classes. 245 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 246 247 I.eraseFromParent(); 248 return Ret; 249 } 250 251 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 252 MachineBasicBlock *BB = I.getParent(); 253 MachineFunction *MF = BB->getParent(); 254 MachineRegisterInfo &MRI = MF->getRegInfo(); 255 MachineOperand &ImmOp = I.getOperand(1); 256 257 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 258 if (ImmOp.isFPImm()) { 259 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 260 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 261 } else if (ImmOp.isCImm()) { 262 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 263 } 264 265 unsigned DstReg = I.getOperand(0).getReg(); 266 unsigned Size; 267 bool IsSgpr; 268 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 269 if (RB) { 270 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 271 Size = MRI.getType(DstReg).getSizeInBits(); 272 } else { 273 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 274 IsSgpr = TRI.isSGPRClass(RC); 275 Size = TRI.getRegSizeInBits(*RC); 276 } 277 278 if (Size != 32 && Size != 64) 279 return false; 280 281 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 282 if (Size == 32) { 283 I.setDesc(TII.get(Opcode)); 284 I.addImplicitDefUseOperands(*MF); 285 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 286 } 287 288 DebugLoc DL = I.getDebugLoc(); 289 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 290 &AMDGPU::VGPR_32RegClass; 291 unsigned LoReg = MRI.createVirtualRegister(RC); 292 unsigned HiReg = MRI.createVirtualRegister(RC); 293 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 294 295 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 296 .addImm(Imm.trunc(32).getZExtValue()); 297 298 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 299 .addImm(Imm.ashr(32).getZExtValue()); 300 301 const MachineInstr *RS = 302 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 303 .addReg(LoReg) 304 .addImm(AMDGPU::sub0) 305 .addReg(HiReg) 306 .addImm(AMDGPU::sub1); 307 308 // We can't call constrainSelectedInstRegOperands here, because it doesn't 309 // work for target independent opcodes 310 I.eraseFromParent(); 311 const TargetRegisterClass *DstRC = 312 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 313 if (!DstRC) 314 return true; 315 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 316 } 317 318 static bool isConstant(const MachineInstr &MI) { 319 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 320 } 321 322 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 323 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 324 325 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 326 327 assert(PtrMI); 328 329 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 330 return; 331 332 GEPInfo GEPInfo(*PtrMI); 333 334 for (unsigned i = 1, e = 3; i < e; ++i) { 335 const MachineOperand &GEPOp = PtrMI->getOperand(i); 336 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 337 assert(OpDef); 338 if (isConstant(*OpDef)) { 339 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 340 // are lacking other optimizations. 341 assert(GEPInfo.Imm == 0); 342 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 343 continue; 344 } 345 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 346 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 347 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 348 else 349 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 350 } 351 352 AddrInfo.push_back(GEPInfo); 353 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 354 } 355 356 static bool isInstrUniform(const MachineInstr &MI) { 357 if (!MI.hasOneMemOperand()) 358 return false; 359 360 const MachineMemOperand *MMO = *MI.memoperands_begin(); 361 const Value *Ptr = MMO->getValue(); 362 363 // UndefValue means this is a load of a kernel input. These are uniform. 364 // Sometimes LDS instructions have constant pointers. 365 // If Ptr is null, then that means this mem operand contains a 366 // PseudoSourceValue like GOT. 367 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 368 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 369 return true; 370 371 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 372 return true; 373 374 const Instruction *I = dyn_cast<Instruction>(Ptr); 375 return I && I->getMetadata("amdgpu.uniform"); 376 } 377 378 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 379 380 if (LoadSize == 32) 381 return BaseOpcode; 382 383 switch (BaseOpcode) { 384 case AMDGPU::S_LOAD_DWORD_IMM: 385 switch (LoadSize) { 386 case 64: 387 return AMDGPU::S_LOAD_DWORDX2_IMM; 388 case 128: 389 return AMDGPU::S_LOAD_DWORDX4_IMM; 390 case 256: 391 return AMDGPU::S_LOAD_DWORDX8_IMM; 392 case 512: 393 return AMDGPU::S_LOAD_DWORDX16_IMM; 394 } 395 break; 396 case AMDGPU::S_LOAD_DWORD_IMM_ci: 397 switch (LoadSize) { 398 case 64: 399 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 400 case 128: 401 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 402 case 256: 403 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 404 case 512: 405 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 406 } 407 break; 408 case AMDGPU::S_LOAD_DWORD_SGPR: 409 switch (LoadSize) { 410 case 64: 411 return AMDGPU::S_LOAD_DWORDX2_SGPR; 412 case 128: 413 return AMDGPU::S_LOAD_DWORDX4_SGPR; 414 case 256: 415 return AMDGPU::S_LOAD_DWORDX8_SGPR; 416 case 512: 417 return AMDGPU::S_LOAD_DWORDX16_SGPR; 418 } 419 break; 420 } 421 llvm_unreachable("Invalid base smrd opcode or size"); 422 } 423 424 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 425 for (const GEPInfo &GEPInfo : AddrInfo) { 426 if (!GEPInfo.VgprParts.empty()) 427 return true; 428 } 429 return false; 430 } 431 432 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 433 ArrayRef<GEPInfo> AddrInfo) const { 434 435 if (!I.hasOneMemOperand()) 436 return false; 437 438 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS && 439 (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT) 440 return false; 441 442 if (!isInstrUniform(I)) 443 return false; 444 445 if (hasVgprParts(AddrInfo)) 446 return false; 447 448 MachineBasicBlock *BB = I.getParent(); 449 MachineFunction *MF = BB->getParent(); 450 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 451 MachineRegisterInfo &MRI = MF->getRegInfo(); 452 unsigned DstReg = I.getOperand(0).getReg(); 453 const DebugLoc &DL = I.getDebugLoc(); 454 unsigned Opcode; 455 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 456 457 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 458 459 const GEPInfo &GEPInfo = AddrInfo[0]; 460 461 unsigned PtrReg = GEPInfo.SgprParts[0]; 462 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 463 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 464 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 465 466 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 467 .addReg(PtrReg) 468 .addImm(EncodedImm) 469 .addImm(0); // glc 470 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 471 } 472 473 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 474 isUInt<32>(EncodedImm)) { 475 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 476 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 477 .addReg(PtrReg) 478 .addImm(EncodedImm) 479 .addImm(0); // glc 480 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 481 } 482 483 if (isUInt<32>(GEPInfo.Imm)) { 484 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 485 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 486 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 487 .addImm(GEPInfo.Imm); 488 489 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 490 .addReg(PtrReg) 491 .addReg(OffsetReg) 492 .addImm(0); // glc 493 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 494 } 495 } 496 497 unsigned PtrReg = I.getOperand(1).getReg(); 498 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 499 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 500 .addReg(PtrReg) 501 .addImm(0) 502 .addImm(0); // glc 503 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 504 } 505 506 507 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 508 MachineBasicBlock *BB = I.getParent(); 509 MachineFunction *MF = BB->getParent(); 510 MachineRegisterInfo &MRI = MF->getRegInfo(); 511 DebugLoc DL = I.getDebugLoc(); 512 unsigned DstReg = I.getOperand(0).getReg(); 513 unsigned PtrReg = I.getOperand(1).getReg(); 514 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 515 unsigned Opcode; 516 517 SmallVector<GEPInfo, 4> AddrInfo; 518 519 getAddrModeInfo(I, MRI, AddrInfo); 520 521 if (selectSMRD(I, AddrInfo)) { 522 I.eraseFromParent(); 523 return true; 524 } 525 526 switch (LoadSize) { 527 default: 528 llvm_unreachable("Load size not supported\n"); 529 case 32: 530 Opcode = AMDGPU::FLAT_LOAD_DWORD; 531 break; 532 case 64: 533 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 534 break; 535 } 536 537 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 538 .add(I.getOperand(0)) 539 .addReg(PtrReg) 540 .addImm(0) // offset 541 .addImm(0) // glc 542 .addImm(0); // slc 543 544 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 545 I.eraseFromParent(); 546 return Ret; 547 } 548 549 bool AMDGPUInstructionSelector::select(MachineInstr &I, 550 CodeGenCoverage &CoverageInfo) const { 551 552 if (!isPreISelGenericOpcode(I.getOpcode())) { 553 if (I.isCopy()) 554 return selectCOPY(I); 555 return true; 556 } 557 558 switch (I.getOpcode()) { 559 default: 560 return selectImpl(I, CoverageInfo); 561 case TargetOpcode::G_ADD: 562 return selectG_ADD(I); 563 case TargetOpcode::G_BITCAST: 564 return selectCOPY(I); 565 case TargetOpcode::G_CONSTANT: 566 case TargetOpcode::G_FCONSTANT: 567 return selectG_CONSTANT(I); 568 case TargetOpcode::G_GEP: 569 return selectG_GEP(I); 570 case TargetOpcode::G_IMPLICIT_DEF: 571 return selectG_IMPLICIT_DEF(I); 572 case TargetOpcode::G_INTRINSIC: 573 return selectG_INTRINSIC(I, CoverageInfo); 574 case TargetOpcode::G_LOAD: 575 return selectG_LOAD(I); 576 case TargetOpcode::G_STORE: 577 return selectG_STORE(I); 578 } 579 return false; 580 } 581 582 InstructionSelector::ComplexRendererFns 583 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 584 return {{ 585 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 586 }}; 587 588 } 589 590 /// 591 /// This will select either an SGPR or VGPR operand and will save us from 592 /// having to write an extra tablegen pattern. 593 InstructionSelector::ComplexRendererFns 594 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 595 return {{ 596 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 597 }}; 598 } 599 600 InstructionSelector::ComplexRendererFns 601 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 602 return {{ 603 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 604 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 605 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 606 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 607 }}; 608 } 609 InstructionSelector::ComplexRendererFns 610 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 611 return {{ 612 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 613 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 614 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 615 }}; 616 } 617 618 InstructionSelector::ComplexRendererFns 619 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 620 return {{ 621 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 622 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 623 }}; 624 } 625