1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 #define DEBUG_TYPE "amdgpu-isel" 35 36 using namespace llvm; 37 38 #define GET_GLOBALISEL_IMPL 39 #define AMDGPUSubtarget GCNSubtarget 40 #include "AMDGPUGenGlobalISel.inc" 41 #undef GET_GLOBALISEL_IMPL 42 #undef AMDGPUSubtarget 43 44 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 45 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 46 const AMDGPUTargetMachine &TM) 47 : InstructionSelector(), TII(*STI.getInstrInfo()), 48 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 49 STI(STI), 50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 51 #define GET_GLOBALISEL_PREDICATES_INIT 52 #include "AMDGPUGenGlobalISel.inc" 53 #undef GET_GLOBALISEL_PREDICATES_INIT 54 #define GET_GLOBALISEL_TEMPORARIES_INIT 55 #include "AMDGPUGenGlobalISel.inc" 56 #undef GET_GLOBALISEL_TEMPORARIES_INIT 57 { 58 } 59 60 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 61 62 static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { 63 if (Reg == AMDGPU::SCC) 64 return true; 65 66 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 67 return false; 68 69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 70 const TargetRegisterClass *RC = 71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 72 if (RC) 73 return RC->getID() == AMDGPU::SReg_32_XM0RegClassID && 74 MRI.getType(Reg).getSizeInBits() == 1; 75 76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 77 return RB->getID() == AMDGPU::SCCRegBankID; 78 } 79 80 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 81 MachineBasicBlock *BB = I.getParent(); 82 MachineFunction *MF = BB->getParent(); 83 MachineRegisterInfo &MRI = MF->getRegInfo(); 84 I.setDesc(TII.get(TargetOpcode::COPY)); 85 86 // Special case for COPY from the scc register bank. The scc register bank 87 // is modeled using 32-bit sgprs. 88 const MachineOperand &Src = I.getOperand(1); 89 unsigned SrcReg = Src.getReg(); 90 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { 91 unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI); 92 unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI); 93 94 // We have a copy from a 32-bit to 64-bit register. This happens 95 // when we are selecting scc->vcc copies. 96 if (DstSize == 64) { 97 const DebugLoc &DL = I.getDebugLoc(); 98 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg()) 99 .addImm(0) 100 .addReg(SrcReg); 101 if (!MRI.getRegClassOrNull(SrcReg)) 102 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 103 I.eraseFromParent(); 104 return true; 105 } 106 } 107 108 for (const MachineOperand &MO : I.operands()) { 109 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 110 continue; 111 112 const TargetRegisterClass *RC = 113 TRI.getConstrainedRegClassForOperand(MO, MRI); 114 if (!RC) 115 continue; 116 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 117 } 118 return true; 119 } 120 121 MachineOperand 122 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 123 unsigned SubIdx) const { 124 125 MachineInstr *MI = MO.getParent(); 126 MachineBasicBlock *BB = MO.getParent()->getParent(); 127 MachineFunction *MF = BB->getParent(); 128 MachineRegisterInfo &MRI = MF->getRegInfo(); 129 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 130 131 if (MO.isReg()) { 132 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 133 unsigned Reg = MO.getReg(); 134 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 135 .addReg(Reg, 0, ComposedSubIdx); 136 137 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 138 MO.isKill(), MO.isDead(), MO.isUndef(), 139 MO.isEarlyClobber(), 0, MO.isDebug(), 140 MO.isInternalRead()); 141 } 142 143 assert(MO.isImm()); 144 145 APInt Imm(64, MO.getImm()); 146 147 switch (SubIdx) { 148 default: 149 llvm_unreachable("do not know to split immediate with this sub index."); 150 case AMDGPU::sub0: 151 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 152 case AMDGPU::sub1: 153 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 154 } 155 } 156 157 static int64_t getConstant(const MachineInstr *MI) { 158 return MI->getOperand(1).getCImm()->getSExtValue(); 159 } 160 161 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 162 MachineBasicBlock *BB = I.getParent(); 163 MachineFunction *MF = BB->getParent(); 164 MachineRegisterInfo &MRI = MF->getRegInfo(); 165 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 166 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 167 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 168 169 if (Size != 64) 170 return false; 171 172 DebugLoc DL = I.getDebugLoc(); 173 174 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 175 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 176 177 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 178 .add(Lo1) 179 .add(Lo2); 180 181 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 182 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 183 184 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 185 .add(Hi1) 186 .add(Hi2); 187 188 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 189 .addReg(DstLo) 190 .addImm(AMDGPU::sub0) 191 .addReg(DstHi) 192 .addImm(AMDGPU::sub1); 193 194 for (MachineOperand &MO : I.explicit_operands()) { 195 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 196 continue; 197 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 198 } 199 200 I.eraseFromParent(); 201 return true; 202 } 203 204 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 205 MachineBasicBlock *BB = I.getParent(); 206 MachineFunction *MF = BB->getParent(); 207 MachineRegisterInfo &MRI = MF->getRegInfo(); 208 assert(I.getOperand(2).getImm() % 32 == 0); 209 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 210 const DebugLoc &DL = I.getDebugLoc(); 211 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 212 I.getOperand(0).getReg()) 213 .addReg(I.getOperand(1).getReg(), 0, SubReg); 214 215 for (const MachineOperand &MO : Copy->operands()) { 216 const TargetRegisterClass *RC = 217 TRI.getConstrainedRegClassForOperand(MO, MRI); 218 if (!RC) 219 continue; 220 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 221 } 222 I.eraseFromParent(); 223 return true; 224 } 225 226 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 227 return selectG_ADD(I); 228 } 229 230 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 231 MachineBasicBlock *BB = I.getParent(); 232 MachineFunction *MF = BB->getParent(); 233 MachineRegisterInfo &MRI = MF->getRegInfo(); 234 const MachineOperand &MO = I.getOperand(0); 235 236 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 237 // regbank check here is to know why getConstrainedRegClassForOperand failed. 238 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 239 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 240 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 241 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 242 return true; 243 } 244 245 return false; 246 } 247 248 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 249 MachineBasicBlock *BB = I.getParent(); 250 MachineFunction *MF = BB->getParent(); 251 MachineRegisterInfo &MRI = MF->getRegInfo(); 252 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 253 DebugLoc DL = I.getDebugLoc(); 254 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 255 .addDef(I.getOperand(0).getReg()) 256 .addReg(I.getOperand(1).getReg()) 257 .addReg(I.getOperand(2).getReg()) 258 .addImm(SubReg); 259 260 for (const MachineOperand &MO : Ins->operands()) { 261 if (!MO.isReg()) 262 continue; 263 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 264 continue; 265 266 const TargetRegisterClass *RC = 267 TRI.getConstrainedRegClassForOperand(MO, MRI); 268 if (!RC) 269 continue; 270 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 271 } 272 I.eraseFromParent(); 273 return true; 274 } 275 276 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, 277 CodeGenCoverage &CoverageInfo) const { 278 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 279 switch (IntrinsicID) { 280 default: 281 break; 282 case Intrinsic::maxnum: 283 case Intrinsic::minnum: 284 case Intrinsic::amdgcn_cvt_pkrtz: 285 return selectImpl(I, CoverageInfo); 286 287 case Intrinsic::amdgcn_kernarg_segment_ptr: { 288 MachineFunction *MF = I.getParent()->getParent(); 289 MachineRegisterInfo &MRI = MF->getRegInfo(); 290 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 291 const ArgDescriptor *InputPtrReg; 292 const TargetRegisterClass *RC; 293 const DebugLoc &DL = I.getDebugLoc(); 294 295 std::tie(InputPtrReg, RC) 296 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 297 if (!InputPtrReg) 298 report_fatal_error("missing kernarg segment ptr"); 299 300 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY)) 301 .add(I.getOperand(0)) 302 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister())); 303 I.eraseFromParent(); 304 return true; 305 } 306 } 307 return false; 308 } 309 310 static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 311 assert(Size == 32 || Size == 64); 312 switch (P) { 313 default: 314 llvm_unreachable("Unknown condition code!"); 315 case CmpInst::ICMP_NE: 316 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 317 case CmpInst::ICMP_EQ: 318 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 319 case CmpInst::ICMP_SGT: 320 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 321 case CmpInst::ICMP_SGE: 322 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 323 case CmpInst::ICMP_SLT: 324 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 325 case CmpInst::ICMP_SLE: 326 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 327 case CmpInst::ICMP_UGT: 328 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 329 case CmpInst::ICMP_UGE: 330 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 331 case CmpInst::ICMP_ULT: 332 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 333 case CmpInst::ICMP_ULE: 334 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 335 } 336 } 337 338 static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 339 // FIXME: VI supports 64-bit comparse. 340 assert(Size == 32); 341 switch (P) { 342 default: 343 llvm_unreachable("Unknown condition code!"); 344 case CmpInst::ICMP_NE: 345 return AMDGPU::S_CMP_LG_U32; 346 case CmpInst::ICMP_EQ: 347 return AMDGPU::S_CMP_EQ_U32; 348 case CmpInst::ICMP_SGT: 349 return AMDGPU::S_CMP_GT_I32; 350 case CmpInst::ICMP_SGE: 351 return AMDGPU::S_CMP_GE_I32; 352 case CmpInst::ICMP_SLT: 353 return AMDGPU::S_CMP_LT_I32; 354 case CmpInst::ICMP_SLE: 355 return AMDGPU::S_CMP_LE_I32; 356 case CmpInst::ICMP_UGT: 357 return AMDGPU::S_CMP_GT_U32; 358 case CmpInst::ICMP_UGE: 359 return AMDGPU::S_CMP_GE_U32; 360 case CmpInst::ICMP_ULT: 361 return AMDGPU::S_CMP_LT_U32; 362 case CmpInst::ICMP_ULE: 363 return AMDGPU::S_CMP_LE_U32; 364 } 365 } 366 367 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 368 MachineBasicBlock *BB = I.getParent(); 369 MachineFunction *MF = BB->getParent(); 370 MachineRegisterInfo &MRI = MF->getRegInfo(); 371 DebugLoc DL = I.getDebugLoc(); 372 373 unsigned SrcReg = I.getOperand(2).getReg(); 374 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 375 // FIXME: VI supports 64-bit compares. 376 assert(Size == 32); 377 378 unsigned CCReg = I.getOperand(0).getReg(); 379 if (isSCC(CCReg, MRI)) { 380 unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); 381 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 382 .add(I.getOperand(2)) 383 .add(I.getOperand(3)); 384 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 385 .addReg(AMDGPU::SCC); 386 bool Ret = 387 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 388 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 389 I.eraseFromParent(); 390 return Ret; 391 } 392 393 assert(Size == 32 || Size == 64); 394 unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); 395 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 396 I.getOperand(0).getReg()) 397 .add(I.getOperand(2)) 398 .add(I.getOperand(3)); 399 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 400 AMDGPU::SReg_64RegClass, MRI); 401 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 402 I.eraseFromParent(); 403 return Ret; 404 } 405 406 static MachineInstr * 407 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 408 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 409 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 410 const DebugLoc &DL = Insert->getDebugLoc(); 411 MachineBasicBlock &BB = *Insert->getParent(); 412 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 413 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 414 .addImm(Tgt) 415 .addReg(Reg0) 416 .addReg(Reg1) 417 .addReg(Reg2) 418 .addReg(Reg3) 419 .addImm(VM) 420 .addImm(Compr) 421 .addImm(Enabled); 422 } 423 424 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 425 MachineInstr &I, 426 CodeGenCoverage &CoverageInfo) const { 427 MachineBasicBlock *BB = I.getParent(); 428 MachineFunction *MF = BB->getParent(); 429 MachineRegisterInfo &MRI = MF->getRegInfo(); 430 431 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 432 switch (IntrinsicID) { 433 case Intrinsic::amdgcn_exp: { 434 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 435 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 436 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 437 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 438 439 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 440 I.getOperand(4).getReg(), 441 I.getOperand(5).getReg(), 442 I.getOperand(6).getReg(), 443 VM, false, Enabled, Done); 444 445 I.eraseFromParent(); 446 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 447 } 448 case Intrinsic::amdgcn_exp_compr: { 449 const DebugLoc &DL = I.getDebugLoc(); 450 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 451 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 452 unsigned Reg0 = I.getOperand(3).getReg(); 453 unsigned Reg1 = I.getOperand(4).getReg(); 454 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 455 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 456 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 457 458 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 459 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 460 true, Enabled, Done); 461 462 I.eraseFromParent(); 463 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 464 } 465 } 466 return false; 467 } 468 469 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 470 MachineBasicBlock *BB = I.getParent(); 471 MachineFunction *MF = BB->getParent(); 472 MachineRegisterInfo &MRI = MF->getRegInfo(); 473 const DebugLoc &DL = I.getDebugLoc(); 474 475 unsigned DstReg = I.getOperand(0).getReg(); 476 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 477 assert(Size == 32 || Size == 64); 478 const MachineOperand &CCOp = I.getOperand(1); 479 unsigned CCReg = CCOp.getReg(); 480 if (isSCC(CCReg, MRI)) { 481 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 : 482 AMDGPU::S_CSELECT_B64; 483 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 484 .addReg(CCReg); 485 486 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 487 // bank, because it does not cover the register class that we used to represent 488 // for it. So we need to manually set the register class here. 489 if (!MRI.getRegClassOrNull(CCReg)) 490 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 491 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 492 .add(I.getOperand(2)) 493 .add(I.getOperand(3)); 494 495 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 496 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 497 I.eraseFromParent(); 498 return Ret; 499 } 500 501 assert(Size == 32); 502 // FIXME: Support 64-bit select 503 MachineInstr *Select = 504 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 505 .addImm(0) 506 .add(I.getOperand(3)) 507 .addImm(0) 508 .add(I.getOperand(2)) 509 .add(I.getOperand(1)); 510 511 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 512 I.eraseFromParent(); 513 return Ret; 514 } 515 516 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 517 MachineBasicBlock *BB = I.getParent(); 518 MachineFunction *MF = BB->getParent(); 519 MachineRegisterInfo &MRI = MF->getRegInfo(); 520 DebugLoc DL = I.getDebugLoc(); 521 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 522 unsigned Opcode; 523 524 // FIXME: Select store instruction based on address space 525 switch (StoreSize) { 526 default: 527 return false; 528 case 32: 529 Opcode = AMDGPU::FLAT_STORE_DWORD; 530 break; 531 case 64: 532 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 533 break; 534 case 96: 535 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 536 break; 537 case 128: 538 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 539 break; 540 } 541 542 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 543 .add(I.getOperand(1)) 544 .add(I.getOperand(0)) 545 .addImm(0) // offset 546 .addImm(0) // glc 547 .addImm(0) // slc 548 .addImm(0); // dlc 549 550 551 // Now that we selected an opcode, we need to constrain the register 552 // operands to use appropriate classes. 553 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 554 555 I.eraseFromParent(); 556 return Ret; 557 } 558 559 static int sizeToSubRegIndex(unsigned Size) { 560 switch (Size) { 561 case 32: 562 return AMDGPU::sub0; 563 case 64: 564 return AMDGPU::sub0_sub1; 565 case 96: 566 return AMDGPU::sub0_sub1_sub2; 567 case 128: 568 return AMDGPU::sub0_sub1_sub2_sub3; 569 case 256: 570 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 571 default: 572 if (Size < 32) 573 return AMDGPU::sub0; 574 if (Size > 256) 575 return -1; 576 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 577 } 578 } 579 580 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 581 MachineBasicBlock *BB = I.getParent(); 582 MachineFunction *MF = BB->getParent(); 583 MachineRegisterInfo &MRI = MF->getRegInfo(); 584 585 unsigned DstReg = I.getOperand(0).getReg(); 586 unsigned SrcReg = I.getOperand(1).getReg(); 587 const LLT DstTy = MRI.getType(DstReg); 588 const LLT SrcTy = MRI.getType(SrcReg); 589 if (!DstTy.isScalar()) 590 return false; 591 592 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 593 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 594 if (SrcRB != DstRB) 595 return false; 596 597 unsigned DstSize = DstTy.getSizeInBits(); 598 unsigned SrcSize = SrcTy.getSizeInBits(); 599 600 const TargetRegisterClass *SrcRC 601 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 602 const TargetRegisterClass *DstRC 603 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 604 605 if (SrcSize > 32) { 606 int SubRegIdx = sizeToSubRegIndex(DstSize); 607 if (SubRegIdx == -1) 608 return false; 609 610 // Deal with weird cases where the class only partially supports the subreg 611 // index. 612 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 613 if (!SrcRC) 614 return false; 615 616 I.getOperand(1).setSubReg(SubRegIdx); 617 } 618 619 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 620 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 621 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 622 return false; 623 } 624 625 I.setDesc(TII.get(TargetOpcode::COPY)); 626 return true; 627 } 628 629 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 630 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 631 const DebugLoc &DL = I.getDebugLoc(); 632 MachineBasicBlock &MBB = *I.getParent(); 633 MachineFunction &MF = *MBB.getParent(); 634 MachineRegisterInfo &MRI = MF.getRegInfo(); 635 const unsigned DstReg = I.getOperand(0).getReg(); 636 const unsigned SrcReg = I.getOperand(1).getReg(); 637 638 const LLT DstTy = MRI.getType(DstReg); 639 const LLT SrcTy = MRI.getType(SrcReg); 640 const LLT S1 = LLT::scalar(1); 641 const unsigned SrcSize = SrcTy.getSizeInBits(); 642 const unsigned DstSize = DstTy.getSizeInBits(); 643 if (!DstTy.isScalar()) 644 return false; 645 646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 647 648 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 649 if (SrcTy != S1 || DstSize > 64) // Invalid 650 return false; 651 652 unsigned Opcode = 653 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 654 const TargetRegisterClass *DstRC = 655 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 656 657 // FIXME: Create an extra copy to avoid incorrectly constraining the result 658 // of the scc producer. 659 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 660 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 661 .addReg(SrcReg); 662 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 663 .addReg(TmpReg); 664 665 // The instruction operands are backwards from what you would expect. 666 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 667 .addImm(0) 668 .addImm(Signed ? -1 : 1); 669 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 670 } 671 672 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 673 if (SrcTy != S1) // Invalid 674 return false; 675 676 MachineInstr *ExtI = 677 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 678 .addImm(0) // src0_modifiers 679 .addImm(0) // src0 680 .addImm(0) // src1_modifiers 681 .addImm(Signed ? -1 : 1) // src1 682 .addUse(SrcReg); 683 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 684 } 685 686 if (I.getOpcode() == AMDGPU::G_ANYEXT) 687 return selectCOPY(I); 688 689 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 690 // 64-bit should have been split up in RegBankSelect 691 // 692 // TODO: USE V_AND_B32 when the constant mask is an inline immediate for 693 // unsigned for smaller code size. 694 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 695 MachineInstr *ExtI = 696 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 697 .addReg(SrcReg) 698 .addImm(0) // Offset 699 .addImm(SrcSize); // Width 700 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 701 } 702 703 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 704 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 705 return false; 706 707 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 708 const unsigned SextOpc = SrcSize == 8 ? 709 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 710 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 711 .addReg(SrcReg); 712 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 713 } 714 715 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 716 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 717 718 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 719 if (DstSize > 32 && SrcSize <= 32) { 720 // We need a 64-bit register source, but the high bits don't matter. 721 unsigned ExtReg 722 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 723 unsigned UndefReg 724 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 725 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 726 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 727 .addReg(SrcReg) 728 .addImm(AMDGPU::sub0) 729 .addReg(UndefReg) 730 .addImm(AMDGPU::sub1); 731 732 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 733 .addReg(ExtReg) 734 .addImm(SrcSize << 16); 735 736 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 737 } 738 739 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 740 .addReg(SrcReg) 741 .addImm(SrcSize << 16); 742 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 743 } 744 745 return false; 746 } 747 748 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 749 MachineBasicBlock *BB = I.getParent(); 750 MachineFunction *MF = BB->getParent(); 751 MachineRegisterInfo &MRI = MF->getRegInfo(); 752 MachineOperand &ImmOp = I.getOperand(1); 753 754 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 755 if (ImmOp.isFPImm()) { 756 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 757 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 758 } else if (ImmOp.isCImm()) { 759 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 760 } 761 762 unsigned DstReg = I.getOperand(0).getReg(); 763 unsigned Size; 764 bool IsSgpr; 765 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 766 if (RB) { 767 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 768 Size = MRI.getType(DstReg).getSizeInBits(); 769 } else { 770 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 771 IsSgpr = TRI.isSGPRClass(RC); 772 Size = TRI.getRegSizeInBits(*RC); 773 } 774 775 if (Size != 32 && Size != 64) 776 return false; 777 778 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 779 if (Size == 32) { 780 I.setDesc(TII.get(Opcode)); 781 I.addImplicitDefUseOperands(*MF); 782 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 783 } 784 785 DebugLoc DL = I.getDebugLoc(); 786 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 787 &AMDGPU::VGPR_32RegClass; 788 unsigned LoReg = MRI.createVirtualRegister(RC); 789 unsigned HiReg = MRI.createVirtualRegister(RC); 790 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 791 792 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 793 .addImm(Imm.trunc(32).getZExtValue()); 794 795 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 796 .addImm(Imm.ashr(32).getZExtValue()); 797 798 const MachineInstr *RS = 799 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 800 .addReg(LoReg) 801 .addImm(AMDGPU::sub0) 802 .addReg(HiReg) 803 .addImm(AMDGPU::sub1); 804 805 // We can't call constrainSelectedInstRegOperands here, because it doesn't 806 // work for target independent opcodes 807 I.eraseFromParent(); 808 const TargetRegisterClass *DstRC = 809 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 810 if (!DstRC) 811 return true; 812 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 813 } 814 815 static bool isConstant(const MachineInstr &MI) { 816 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 817 } 818 819 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 820 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 821 822 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 823 824 assert(PtrMI); 825 826 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 827 return; 828 829 GEPInfo GEPInfo(*PtrMI); 830 831 for (unsigned i = 1, e = 3; i < e; ++i) { 832 const MachineOperand &GEPOp = PtrMI->getOperand(i); 833 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 834 assert(OpDef); 835 if (isConstant(*OpDef)) { 836 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 837 // are lacking other optimizations. 838 assert(GEPInfo.Imm == 0); 839 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 840 continue; 841 } 842 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 843 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 844 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 845 else 846 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 847 } 848 849 AddrInfo.push_back(GEPInfo); 850 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 851 } 852 853 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 854 if (!MI.hasOneMemOperand()) 855 return false; 856 857 const MachineMemOperand *MMO = *MI.memoperands_begin(); 858 const Value *Ptr = MMO->getValue(); 859 860 // UndefValue means this is a load of a kernel input. These are uniform. 861 // Sometimes LDS instructions have constant pointers. 862 // If Ptr is null, then that means this mem operand contains a 863 // PseudoSourceValue like GOT. 864 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 865 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 866 return true; 867 868 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 869 return true; 870 871 const Instruction *I = dyn_cast<Instruction>(Ptr); 872 return I && I->getMetadata("amdgpu.uniform"); 873 } 874 875 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 876 for (const GEPInfo &GEPInfo : AddrInfo) { 877 if (!GEPInfo.VgprParts.empty()) 878 return true; 879 } 880 return false; 881 } 882 883 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 884 MachineBasicBlock *BB = I.getParent(); 885 MachineFunction *MF = BB->getParent(); 886 MachineRegisterInfo &MRI = MF->getRegInfo(); 887 DebugLoc DL = I.getDebugLoc(); 888 unsigned DstReg = I.getOperand(0).getReg(); 889 unsigned PtrReg = I.getOperand(1).getReg(); 890 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 891 unsigned Opcode; 892 893 SmallVector<GEPInfo, 4> AddrInfo; 894 895 getAddrModeInfo(I, MRI, AddrInfo); 896 897 switch (LoadSize) { 898 default: 899 llvm_unreachable("Load size not supported\n"); 900 case 32: 901 Opcode = AMDGPU::FLAT_LOAD_DWORD; 902 break; 903 case 64: 904 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 905 break; 906 } 907 908 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 909 .add(I.getOperand(0)) 910 .addReg(PtrReg) 911 .addImm(0) // offset 912 .addImm(0) // glc 913 .addImm(0) // slc 914 .addImm(0); // dlc 915 916 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 917 I.eraseFromParent(); 918 return Ret; 919 } 920 921 bool AMDGPUInstructionSelector::select(MachineInstr &I, 922 CodeGenCoverage &CoverageInfo) const { 923 924 if (!isPreISelGenericOpcode(I.getOpcode())) { 925 if (I.isCopy()) 926 return selectCOPY(I); 927 return true; 928 } 929 930 switch (I.getOpcode()) { 931 default: 932 return selectImpl(I, CoverageInfo); 933 case TargetOpcode::G_ADD: 934 return selectG_ADD(I); 935 case TargetOpcode::G_INTTOPTR: 936 case TargetOpcode::G_BITCAST: 937 return selectCOPY(I); 938 case TargetOpcode::G_CONSTANT: 939 case TargetOpcode::G_FCONSTANT: 940 return selectG_CONSTANT(I); 941 case TargetOpcode::G_EXTRACT: 942 return selectG_EXTRACT(I); 943 case TargetOpcode::G_GEP: 944 return selectG_GEP(I); 945 case TargetOpcode::G_IMPLICIT_DEF: 946 return selectG_IMPLICIT_DEF(I); 947 case TargetOpcode::G_INSERT: 948 return selectG_INSERT(I); 949 case TargetOpcode::G_INTRINSIC: 950 return selectG_INTRINSIC(I, CoverageInfo); 951 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 952 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 953 case TargetOpcode::G_ICMP: 954 return selectG_ICMP(I); 955 case TargetOpcode::G_LOAD: 956 if (selectImpl(I, CoverageInfo)) 957 return true; 958 return selectG_LOAD(I); 959 case TargetOpcode::G_SELECT: 960 return selectG_SELECT(I); 961 case TargetOpcode::G_STORE: 962 return selectG_STORE(I); 963 case TargetOpcode::G_TRUNC: 964 return selectG_TRUNC(I); 965 case TargetOpcode::G_SEXT: 966 case TargetOpcode::G_ZEXT: 967 case TargetOpcode::G_ANYEXT: 968 if (selectG_SZA_EXT(I)) { 969 I.eraseFromParent(); 970 return true; 971 } 972 973 return false; 974 } 975 return false; 976 } 977 978 InstructionSelector::ComplexRendererFns 979 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 980 return {{ 981 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 982 }}; 983 984 } 985 986 /// 987 /// This will select either an SGPR or VGPR operand and will save us from 988 /// having to write an extra tablegen pattern. 989 InstructionSelector::ComplexRendererFns 990 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 991 return {{ 992 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 993 }}; 994 } 995 996 InstructionSelector::ComplexRendererFns 997 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 998 return {{ 999 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1000 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 1001 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1002 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1003 }}; 1004 } 1005 InstructionSelector::ComplexRendererFns 1006 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1007 return {{ 1008 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1009 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1010 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1011 }}; 1012 } 1013 1014 InstructionSelector::ComplexRendererFns 1015 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1016 return {{ 1017 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1018 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 1019 }}; 1020 } 1021 1022 InstructionSelector::ComplexRendererFns 1023 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1024 MachineRegisterInfo &MRI = 1025 Root.getParent()->getParent()->getParent()->getRegInfo(); 1026 1027 SmallVector<GEPInfo, 4> AddrInfo; 1028 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1029 1030 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1031 return None; 1032 1033 const GEPInfo &GEPInfo = AddrInfo[0]; 1034 1035 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1036 return None; 1037 1038 unsigned PtrReg = GEPInfo.SgprParts[0]; 1039 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1040 return {{ 1041 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1042 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1043 }}; 1044 } 1045 1046 InstructionSelector::ComplexRendererFns 1047 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1048 MachineRegisterInfo &MRI = 1049 Root.getParent()->getParent()->getParent()->getRegInfo(); 1050 1051 SmallVector<GEPInfo, 4> AddrInfo; 1052 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1053 1054 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1055 return None; 1056 1057 const GEPInfo &GEPInfo = AddrInfo[0]; 1058 unsigned PtrReg = GEPInfo.SgprParts[0]; 1059 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1060 if (!isUInt<32>(EncodedImm)) 1061 return None; 1062 1063 return {{ 1064 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1065 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1066 }}; 1067 } 1068 1069 InstructionSelector::ComplexRendererFns 1070 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1071 MachineInstr *MI = Root.getParent(); 1072 MachineBasicBlock *MBB = MI->getParent(); 1073 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1074 1075 SmallVector<GEPInfo, 4> AddrInfo; 1076 getAddrModeInfo(*MI, MRI, AddrInfo); 1077 1078 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1079 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1080 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1081 return None; 1082 1083 const GEPInfo &GEPInfo = AddrInfo[0]; 1084 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1085 return None; 1086 1087 // If we make it this far we have a load with an 32-bit immediate offset. 1088 // It is OK to select this using a sgpr offset, because we have already 1089 // failed trying to select this load into one of the _IMM variants since 1090 // the _IMM Patterns are considered before the _SGPR patterns. 1091 unsigned PtrReg = GEPInfo.SgprParts[0]; 1092 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1093 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1094 .addImm(GEPInfo.Imm); 1095 return {{ 1096 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1097 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1098 }}; 1099 } 1100