1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 27 #include "llvm/CodeGen/GlobalISel/Utils.h" 28 #include "llvm/CodeGen/MachineBasicBlock.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #define DEBUG_TYPE "amdgpu-isel" 38 39 using namespace llvm; 40 using namespace MIPatternMatch; 41 42 #define GET_GLOBALISEL_IMPL 43 #define AMDGPUSubtarget GCNSubtarget 44 #include "AMDGPUGenGlobalISel.inc" 45 #undef GET_GLOBALISEL_IMPL 46 #undef AMDGPUSubtarget 47 48 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 49 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 50 const AMDGPUTargetMachine &TM) 51 : InstructionSelector(), TII(*STI.getInstrInfo()), 52 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 53 STI(STI), 54 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 55 #define GET_GLOBALISEL_PREDICATES_INIT 56 #include "AMDGPUGenGlobalISel.inc" 57 #undef GET_GLOBALISEL_PREDICATES_INIT 58 #define GET_GLOBALISEL_TEMPORARIES_INIT 59 #include "AMDGPUGenGlobalISel.inc" 60 #undef GET_GLOBALISEL_TEMPORARIES_INIT 61 { 62 } 63 64 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 65 66 void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB, 67 CodeGenCoverage &CoverageInfo) { 68 MRI = &MF.getRegInfo(); 69 InstructionSelector::setupMF(MF, KB, CoverageInfo); 70 } 71 72 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 73 if (Register::isPhysicalRegister(Reg)) 74 return Reg == AMDGPU::SCC; 75 76 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 77 const TargetRegisterClass *RC = 78 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 79 if (RC) { 80 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 81 // context of the register bank has been lost. 82 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 83 return false; 84 const LLT Ty = MRI.getType(Reg); 85 return Ty.isValid() && Ty.getSizeInBits() == 1; 86 } 87 88 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 89 return RB->getID() == AMDGPU::SCCRegBankID; 90 } 91 92 bool AMDGPUInstructionSelector::isVCC(Register Reg, 93 const MachineRegisterInfo &MRI) const { 94 if (Register::isPhysicalRegister(Reg)) 95 return Reg == TRI.getVCC(); 96 97 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 98 const TargetRegisterClass *RC = 99 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 100 if (RC) { 101 const LLT Ty = MRI.getType(Reg); 102 return RC->hasSuperClassEq(TRI.getBoolRC()) && 103 Ty.isValid() && Ty.getSizeInBits() == 1; 104 } 105 106 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 107 return RB->getID() == AMDGPU::VCCRegBankID; 108 } 109 110 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 111 const DebugLoc &DL = I.getDebugLoc(); 112 MachineBasicBlock *BB = I.getParent(); 113 I.setDesc(TII.get(TargetOpcode::COPY)); 114 115 const MachineOperand &Src = I.getOperand(1); 116 MachineOperand &Dst = I.getOperand(0); 117 Register DstReg = Dst.getReg(); 118 Register SrcReg = Src.getReg(); 119 120 if (isVCC(DstReg, *MRI)) { 121 if (SrcReg == AMDGPU::SCC) { 122 const TargetRegisterClass *RC 123 = TRI.getConstrainedRegClassForOperand(Dst, *MRI); 124 if (!RC) 125 return true; 126 return RBI.constrainGenericRegister(DstReg, *RC, *MRI); 127 } 128 129 if (!isVCC(SrcReg, *MRI)) { 130 // TODO: Should probably leave the copy and let copyPhysReg expand it. 131 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI)) 132 return false; 133 134 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 135 .addImm(0) 136 .addReg(SrcReg); 137 138 if (!MRI->getRegClassOrNull(SrcReg)) 139 MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI)); 140 I.eraseFromParent(); 141 return true; 142 } 143 144 const TargetRegisterClass *RC = 145 TRI.getConstrainedRegClassForOperand(Dst, *MRI); 146 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) 147 return false; 148 149 // Don't constrain the source register to a class so the def instruction 150 // handles it (unless it's undef). 151 // 152 // FIXME: This is a hack. When selecting the def, we neeed to know 153 // specifically know that the result is VCCRegBank, and not just an SGPR 154 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 155 if (Src.isUndef()) { 156 const TargetRegisterClass *SrcRC = 157 TRI.getConstrainedRegClassForOperand(Src, *MRI); 158 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) 159 return false; 160 } 161 162 return true; 163 } 164 165 for (const MachineOperand &MO : I.operands()) { 166 if (Register::isPhysicalRegister(MO.getReg())) 167 continue; 168 169 const TargetRegisterClass *RC = 170 TRI.getConstrainedRegClassForOperand(MO, *MRI); 171 if (!RC) 172 continue; 173 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); 174 } 175 return true; 176 } 177 178 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 179 const Register DefReg = I.getOperand(0).getReg(); 180 const LLT DefTy = MRI->getType(DefReg); 181 182 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 183 184 const RegClassOrRegBank &RegClassOrBank = 185 MRI->getRegClassOrRegBank(DefReg); 186 187 const TargetRegisterClass *DefRC 188 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 189 if (!DefRC) { 190 if (!DefTy.isValid()) { 191 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 192 return false; 193 } 194 195 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 196 if (RB.getID() == AMDGPU::SCCRegBankID) { 197 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 198 return false; 199 } 200 201 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI); 202 if (!DefRC) { 203 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 204 return false; 205 } 206 } 207 208 I.setDesc(TII.get(TargetOpcode::PHI)); 209 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI); 210 } 211 212 MachineOperand 213 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 214 const TargetRegisterClass &SubRC, 215 unsigned SubIdx) const { 216 217 MachineInstr *MI = MO.getParent(); 218 MachineBasicBlock *BB = MO.getParent()->getParent(); 219 Register DstReg = MRI->createVirtualRegister(&SubRC); 220 221 if (MO.isReg()) { 222 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 223 Register Reg = MO.getReg(); 224 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 225 .addReg(Reg, 0, ComposedSubIdx); 226 227 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 228 MO.isKill(), MO.isDead(), MO.isUndef(), 229 MO.isEarlyClobber(), 0, MO.isDebug(), 230 MO.isInternalRead()); 231 } 232 233 assert(MO.isImm()); 234 235 APInt Imm(64, MO.getImm()); 236 237 switch (SubIdx) { 238 default: 239 llvm_unreachable("do not know to split immediate with this sub index."); 240 case AMDGPU::sub0: 241 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 242 case AMDGPU::sub1: 243 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 244 } 245 } 246 247 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 248 switch (Opc) { 249 case AMDGPU::G_AND: 250 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 251 case AMDGPU::G_OR: 252 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 253 case AMDGPU::G_XOR: 254 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 255 default: 256 llvm_unreachable("not a bit op"); 257 } 258 } 259 260 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 261 MachineOperand &Dst = I.getOperand(0); 262 MachineOperand &Src0 = I.getOperand(1); 263 MachineOperand &Src1 = I.getOperand(2); 264 Register DstReg = Dst.getReg(); 265 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); 266 267 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 268 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 269 const TargetRegisterClass *RC = TRI.getBoolRC(); 270 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 271 RC == &AMDGPU::SReg_64RegClass); 272 I.setDesc(TII.get(InstOpc)); 273 274 // FIXME: Hack to avoid turning the register bank into a register class. 275 // The selector for G_ICMP relies on seeing the register bank for the result 276 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 277 // be ambiguous whether it's a scalar or vector bool. 278 if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg())) 279 MRI->setRegClass(Src0.getReg(), RC); 280 if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg())) 281 MRI->setRegClass(Src1.getReg(), RC); 282 283 return RBI.constrainGenericRegister(DstReg, *RC, *MRI); 284 } 285 286 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 287 // the result? 288 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 289 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 290 I.setDesc(TII.get(InstOpc)); 291 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 292 } 293 294 return false; 295 } 296 297 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 298 MachineBasicBlock *BB = I.getParent(); 299 MachineFunction *MF = BB->getParent(); 300 Register DstReg = I.getOperand(0).getReg(); 301 const DebugLoc &DL = I.getDebugLoc(); 302 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); 303 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 304 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 305 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 306 307 if (Size == 32) { 308 if (IsSALU) { 309 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 310 MachineInstr *Add = 311 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 312 .add(I.getOperand(1)) 313 .add(I.getOperand(2)); 314 I.eraseFromParent(); 315 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 316 } 317 318 if (STI.hasAddNoCarry()) { 319 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 320 I.setDesc(TII.get(Opc)); 321 I.addOperand(*MF, MachineOperand::CreateImm(0)); 322 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 323 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 324 } 325 326 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 327 328 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass()); 329 MachineInstr *Add 330 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 331 .addDef(UnusedCarry, RegState::Dead) 332 .add(I.getOperand(1)) 333 .add(I.getOperand(2)) 334 .addImm(0); 335 I.eraseFromParent(); 336 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 337 } 338 339 assert(!Sub && "illegal sub should not reach here"); 340 341 const TargetRegisterClass &RC 342 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 343 const TargetRegisterClass &HalfRC 344 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 345 346 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 347 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 348 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 349 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 350 351 Register DstLo = MRI->createVirtualRegister(&HalfRC); 352 Register DstHi = MRI->createVirtualRegister(&HalfRC); 353 354 if (IsSALU) { 355 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 356 .add(Lo1) 357 .add(Lo2); 358 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 359 .add(Hi1) 360 .add(Hi2); 361 } else { 362 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 363 Register CarryReg = MRI->createVirtualRegister(CarryRC); 364 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 365 .addDef(CarryReg) 366 .add(Lo1) 367 .add(Lo2) 368 .addImm(0); 369 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 370 .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead) 371 .add(Hi1) 372 .add(Hi2) 373 .addReg(CarryReg, RegState::Kill) 374 .addImm(0); 375 376 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 377 return false; 378 } 379 380 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 381 .addReg(DstLo) 382 .addImm(AMDGPU::sub0) 383 .addReg(DstHi) 384 .addImm(AMDGPU::sub1); 385 386 387 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) 388 return false; 389 390 I.eraseFromParent(); 391 return true; 392 } 393 394 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 395 MachineBasicBlock *BB = I.getParent(); 396 assert(I.getOperand(2).getImm() % 32 == 0); 397 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 398 const DebugLoc &DL = I.getDebugLoc(); 399 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 400 I.getOperand(0).getReg()) 401 .addReg(I.getOperand(1).getReg(), 0, SubReg); 402 403 for (const MachineOperand &MO : Copy->operands()) { 404 const TargetRegisterClass *RC = 405 TRI.getConstrainedRegClassForOperand(MO, *MRI); 406 if (!RC) 407 continue; 408 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); 409 } 410 I.eraseFromParent(); 411 return true; 412 } 413 414 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 415 MachineBasicBlock *BB = MI.getParent(); 416 Register DstReg = MI.getOperand(0).getReg(); 417 LLT DstTy = MRI->getType(DstReg); 418 LLT SrcTy = MRI->getType(MI.getOperand(1).getReg()); 419 420 const unsigned SrcSize = SrcTy.getSizeInBits(); 421 if (SrcSize < 32) 422 return false; 423 424 const DebugLoc &DL = MI.getDebugLoc(); 425 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); 426 const unsigned DstSize = DstTy.getSizeInBits(); 427 const TargetRegisterClass *DstRC = 428 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); 429 if (!DstRC) 430 return false; 431 432 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 433 MachineInstrBuilder MIB = 434 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 435 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 436 MachineOperand &Src = MI.getOperand(I + 1); 437 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 438 MIB.addImm(SubRegs[I]); 439 440 const TargetRegisterClass *SrcRC 441 = TRI.getConstrainedRegClassForOperand(Src, *MRI); 442 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI)) 443 return false; 444 } 445 446 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) 447 return false; 448 449 MI.eraseFromParent(); 450 return true; 451 } 452 453 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 454 MachineBasicBlock *BB = MI.getParent(); 455 const int NumDst = MI.getNumOperands() - 1; 456 457 MachineOperand &Src = MI.getOperand(NumDst); 458 459 Register SrcReg = Src.getReg(); 460 Register DstReg0 = MI.getOperand(0).getReg(); 461 LLT DstTy = MRI->getType(DstReg0); 462 LLT SrcTy = MRI->getType(SrcReg); 463 464 const unsigned DstSize = DstTy.getSizeInBits(); 465 const unsigned SrcSize = SrcTy.getSizeInBits(); 466 const DebugLoc &DL = MI.getDebugLoc(); 467 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); 468 469 const TargetRegisterClass *SrcRC = 470 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI); 471 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) 472 return false; 473 474 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 475 476 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 477 // source, and this relies on the fact that the same subregister indices are 478 // used for both. 479 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 480 for (int I = 0, E = NumDst; I != E; ++I) { 481 MachineOperand &Dst = MI.getOperand(I); 482 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 483 .addReg(SrcReg, SrcFlags, SubRegs[I]); 484 485 const TargetRegisterClass *DstRC = 486 TRI.getConstrainedRegClassForOperand(Dst, *MRI); 487 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI)) 488 return false; 489 } 490 491 MI.eraseFromParent(); 492 return true; 493 } 494 495 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 496 return selectG_ADD_SUB(I); 497 } 498 499 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 500 const MachineOperand &MO = I.getOperand(0); 501 502 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 503 // regbank check here is to know why getConstrainedRegClassForOperand failed. 504 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI); 505 if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) || 506 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) { 507 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 508 return true; 509 } 510 511 return false; 512 } 513 514 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 515 MachineBasicBlock *BB = I.getParent(); 516 Register Src0Reg = I.getOperand(1).getReg(); 517 Register Src1Reg = I.getOperand(2).getReg(); 518 LLT Src1Ty = MRI->getType(Src1Reg); 519 if (Src1Ty.getSizeInBits() != 32) 520 return false; 521 522 int64_t Offset = I.getOperand(3).getImm(); 523 if (Offset % 32 != 0) 524 return false; 525 526 unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); 527 const DebugLoc &DL = I.getDebugLoc(); 528 529 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 530 .addDef(I.getOperand(0).getReg()) 531 .addReg(Src0Reg) 532 .addReg(Src1Reg) 533 .addImm(SubReg); 534 535 for (const MachineOperand &MO : Ins->operands()) { 536 if (!MO.isReg()) 537 continue; 538 if (Register::isPhysicalRegister(MO.getReg())) 539 continue; 540 541 const TargetRegisterClass *RC = 542 TRI.getConstrainedRegClassForOperand(MO, *MRI); 543 if (!RC) 544 continue; 545 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); 546 } 547 I.eraseFromParent(); 548 return true; 549 } 550 551 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { 552 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 553 switch (IntrinsicID) { 554 case Intrinsic::amdgcn_if_break: { 555 MachineBasicBlock *BB = I.getParent(); 556 557 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 558 // SelectionDAG uses for wave32 vs wave64. 559 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 560 .add(I.getOperand(0)) 561 .add(I.getOperand(2)) 562 .add(I.getOperand(3)); 563 564 Register DstReg = I.getOperand(0).getReg(); 565 Register Src0Reg = I.getOperand(2).getReg(); 566 Register Src1Reg = I.getOperand(3).getReg(); 567 568 I.eraseFromParent(); 569 570 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 571 if (!MRI->getRegClassOrNull(Reg)) 572 MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); 573 } 574 575 return true; 576 } 577 default: 578 return selectImpl(I, *CoverageInfo); 579 } 580 } 581 582 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 583 if (Size != 32 && Size != 64) 584 return -1; 585 switch (P) { 586 default: 587 llvm_unreachable("Unknown condition code!"); 588 case CmpInst::ICMP_NE: 589 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 590 case CmpInst::ICMP_EQ: 591 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 592 case CmpInst::ICMP_SGT: 593 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 594 case CmpInst::ICMP_SGE: 595 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 596 case CmpInst::ICMP_SLT: 597 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 598 case CmpInst::ICMP_SLE: 599 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 600 case CmpInst::ICMP_UGT: 601 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 602 case CmpInst::ICMP_UGE: 603 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 604 case CmpInst::ICMP_ULT: 605 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 606 case CmpInst::ICMP_ULE: 607 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 608 } 609 } 610 611 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 612 unsigned Size) const { 613 if (Size == 64) { 614 if (!STI.hasScalarCompareEq64()) 615 return -1; 616 617 switch (P) { 618 case CmpInst::ICMP_NE: 619 return AMDGPU::S_CMP_LG_U64; 620 case CmpInst::ICMP_EQ: 621 return AMDGPU::S_CMP_EQ_U64; 622 default: 623 return -1; 624 } 625 } 626 627 if (Size != 32) 628 return -1; 629 630 switch (P) { 631 case CmpInst::ICMP_NE: 632 return AMDGPU::S_CMP_LG_U32; 633 case CmpInst::ICMP_EQ: 634 return AMDGPU::S_CMP_EQ_U32; 635 case CmpInst::ICMP_SGT: 636 return AMDGPU::S_CMP_GT_I32; 637 case CmpInst::ICMP_SGE: 638 return AMDGPU::S_CMP_GE_I32; 639 case CmpInst::ICMP_SLT: 640 return AMDGPU::S_CMP_LT_I32; 641 case CmpInst::ICMP_SLE: 642 return AMDGPU::S_CMP_LE_I32; 643 case CmpInst::ICMP_UGT: 644 return AMDGPU::S_CMP_GT_U32; 645 case CmpInst::ICMP_UGE: 646 return AMDGPU::S_CMP_GE_U32; 647 case CmpInst::ICMP_ULT: 648 return AMDGPU::S_CMP_LT_U32; 649 case CmpInst::ICMP_ULE: 650 return AMDGPU::S_CMP_LE_U32; 651 default: 652 llvm_unreachable("Unknown condition code!"); 653 } 654 } 655 656 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 657 MachineBasicBlock *BB = I.getParent(); 658 const DebugLoc &DL = I.getDebugLoc(); 659 660 Register SrcReg = I.getOperand(2).getReg(); 661 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); 662 663 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 664 665 Register CCReg = I.getOperand(0).getReg(); 666 if (isSCC(CCReg, *MRI)) { 667 int Opcode = getS_CMPOpcode(Pred, Size); 668 if (Opcode == -1) 669 return false; 670 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 671 .add(I.getOperand(2)) 672 .add(I.getOperand(3)); 673 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 674 .addReg(AMDGPU::SCC); 675 bool Ret = 676 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 677 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI); 678 I.eraseFromParent(); 679 return Ret; 680 } 681 682 int Opcode = getV_CMPOpcode(Pred, Size); 683 if (Opcode == -1) 684 return false; 685 686 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 687 I.getOperand(0).getReg()) 688 .add(I.getOperand(2)) 689 .add(I.getOperand(3)); 690 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 691 *TRI.getBoolRC(), *MRI); 692 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 693 I.eraseFromParent(); 694 return Ret; 695 } 696 697 static MachineInstr * 698 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 699 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 700 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 701 const DebugLoc &DL = Insert->getDebugLoc(); 702 MachineBasicBlock &BB = *Insert->getParent(); 703 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 704 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 705 .addImm(Tgt) 706 .addReg(Reg0) 707 .addReg(Reg1) 708 .addReg(Reg2) 709 .addReg(Reg3) 710 .addImm(VM) 711 .addImm(Compr) 712 .addImm(Enabled); 713 } 714 715 static bool isZero(Register Reg, MachineRegisterInfo &MRI) { 716 int64_t C; 717 if (mi_match(Reg, MRI, m_ICst(C)) && C == 0) 718 return true; 719 720 // FIXME: matcher should ignore copies 721 return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; 722 } 723 724 static unsigned extractGLC(unsigned CachePolicy) { 725 return CachePolicy & 1; 726 } 727 728 static unsigned extractSLC(unsigned CachePolicy) { 729 return (CachePolicy >> 1) & 1; 730 } 731 732 static unsigned extractDLC(unsigned CachePolicy) { 733 return (CachePolicy >> 2) & 1; 734 } 735 736 // Returns Base register, constant offset, and offset def point. 737 static std::tuple<Register, unsigned, MachineInstr *> 738 getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { 739 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 740 if (!Def) 741 return std::make_tuple(Reg, 0, nullptr); 742 743 if (Def->getOpcode() == AMDGPU::G_CONSTANT) { 744 unsigned Offset; 745 const MachineOperand &Op = Def->getOperand(1); 746 if (Op.isImm()) 747 Offset = Op.getImm(); 748 else 749 Offset = Op.getCImm()->getZExtValue(); 750 751 return std::make_tuple(Register(), Offset, Def); 752 } 753 754 int64_t Offset; 755 if (Def->getOpcode() == AMDGPU::G_ADD) { 756 // TODO: Handle G_OR used for add case 757 if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset))) 758 return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); 759 760 // FIXME: matcher should ignore copies 761 if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset)))) 762 return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); 763 } 764 765 return std::make_tuple(Reg, 0, Def); 766 } 767 768 static unsigned getBufferStoreOpcode(LLT Ty, 769 const unsigned MemSize, 770 const bool Offen) { 771 const int Size = Ty.getSizeInBits(); 772 switch (8 * MemSize) { 773 case 8: 774 return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : 775 AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; 776 case 16: 777 return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : 778 AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; 779 default: 780 unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : 781 AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; 782 if (Size > 32) 783 Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); 784 return Opc; 785 } 786 } 787 788 static unsigned getBufferStoreFormatOpcode(LLT Ty, 789 const unsigned MemSize, 790 const bool Offen) { 791 bool IsD16Packed = Ty.getScalarSizeInBits() == 16; 792 bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits(); 793 int NumElts = Ty.isVector() ? Ty.getNumElements() : 1; 794 795 if (IsD16Packed) { 796 switch (NumElts) { 797 case 1: 798 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : 799 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; 800 case 2: 801 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact : 802 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact; 803 case 3: 804 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact : 805 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact; 806 case 4: 807 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact : 808 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact; 809 default: 810 return -1; 811 } 812 } 813 814 if (IsD16Unpacked) { 815 switch (NumElts) { 816 case 1: 817 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : 818 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; 819 case 2: 820 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact : 821 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact; 822 case 3: 823 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact : 824 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact; 825 case 4: 826 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact : 827 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact; 828 default: 829 return -1; 830 } 831 } 832 833 switch (NumElts) { 834 case 1: 835 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact : 836 AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact; 837 case 2: 838 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact : 839 AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact; 840 case 3: 841 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact : 842 AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact; 843 case 4: 844 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact : 845 AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact; 846 default: 847 return -1; 848 } 849 850 llvm_unreachable("unhandled buffer store"); 851 } 852 853 // TODO: Move this to combiner 854 // Returns base register, imm offset, total constant offset. 855 std::tuple<Register, unsigned, unsigned> 856 AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B, 857 Register OrigOffset) const { 858 const unsigned MaxImm = 4095; 859 Register BaseReg; 860 unsigned TotalConstOffset; 861 MachineInstr *OffsetDef; 862 863 std::tie(BaseReg, TotalConstOffset, OffsetDef) 864 = getBaseWithConstantOffset(*MRI, OrigOffset); 865 866 unsigned ImmOffset = TotalConstOffset; 867 868 // If the immediate value is too big for the immoffset field, put the value 869 // and -4096 into the immoffset field so that the value that is copied/added 870 // for the voffset field is a multiple of 4096, and it stands more chance 871 // of being CSEd with the copy/add for another similar load/store.f 872 // However, do not do that rounding down to a multiple of 4096 if that is a 873 // negative number, as it appears to be illegal to have a negative offset 874 // in the vgpr, even if adding the immediate offset makes it positive. 875 unsigned Overflow = ImmOffset & ~MaxImm; 876 ImmOffset -= Overflow; 877 if ((int32_t)Overflow < 0) { 878 Overflow += ImmOffset; 879 ImmOffset = 0; 880 } 881 882 if (Overflow != 0) { 883 // In case this is in a waterfall loop, insert offset code at the def point 884 // of the offset, not inside the loop. 885 MachineBasicBlock::iterator OldInsPt = B.getInsertPt(); 886 MachineBasicBlock &OldMBB = B.getMBB(); 887 B.setInstr(*OffsetDef); 888 889 if (!BaseReg) { 890 BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 891 B.buildInstr(AMDGPU::V_MOV_B32_e32) 892 .addDef(BaseReg) 893 .addImm(Overflow); 894 } else { 895 Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 896 B.buildInstr(AMDGPU::V_MOV_B32_e32) 897 .addDef(OverflowVal) 898 .addImm(Overflow); 899 900 Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 901 TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg) 902 .addReg(BaseReg) 903 .addReg(OverflowVal, RegState::Kill) 904 .addImm(0); 905 BaseReg = NewBaseReg; 906 } 907 908 B.setInsertPt(OldMBB, OldInsPt); 909 } 910 911 return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset); 912 } 913 914 bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, 915 bool IsFormat) const { 916 MachineIRBuilder B(MI); 917 MachineFunction &MF = B.getMF(); 918 Register VData = MI.getOperand(1).getReg(); 919 LLT Ty = MRI->getType(VData); 920 921 int Size = Ty.getSizeInBits(); 922 if (Size % 32 != 0) 923 return false; 924 925 // FIXME: Verifier should enforce 1 MMO for these intrinsics. 926 MachineMemOperand *MMO = *MI.memoperands_begin(); 927 const int MemSize = MMO->getSize(); 928 929 Register RSrc = MI.getOperand(2).getReg(); 930 Register VOffset = MI.getOperand(3).getReg(); 931 Register SOffset = MI.getOperand(4).getReg(); 932 unsigned CachePolicy = MI.getOperand(5).getImm(); 933 unsigned ImmOffset; 934 unsigned TotalOffset; 935 936 std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset); 937 if (TotalOffset != 0) 938 MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize); 939 940 const bool Offen = !isZero(VOffset, *MRI); 941 942 int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) : 943 getBufferStoreOpcode(Ty, MemSize, Offen); 944 if (Opc == -1) 945 return false; 946 947 MachineInstrBuilder MIB = B.buildInstr(Opc) 948 .addUse(VData); 949 950 if (Offen) 951 MIB.addUse(VOffset); 952 953 MIB.addUse(RSrc) 954 .addUse(SOffset) 955 .addImm(ImmOffset) 956 .addImm(extractGLC(CachePolicy)) 957 .addImm(extractSLC(CachePolicy)) 958 .addImm(0) // tfe: FIXME: Remove from inst 959 .addImm(extractDLC(CachePolicy)) 960 .addMemOperand(MMO); 961 962 MI.eraseFromParent(); 963 964 return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 965 } 966 967 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 968 MachineInstr &I) const { 969 MachineBasicBlock *BB = I.getParent(); 970 unsigned IntrinsicID = I.getIntrinsicID(); 971 switch (IntrinsicID) { 972 case Intrinsic::amdgcn_exp: { 973 int64_t Tgt = I.getOperand(1).getImm(); 974 int64_t Enabled = I.getOperand(2).getImm(); 975 int64_t Done = I.getOperand(7).getImm(); 976 int64_t VM = I.getOperand(8).getImm(); 977 978 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 979 I.getOperand(4).getReg(), 980 I.getOperand(5).getReg(), 981 I.getOperand(6).getReg(), 982 VM, false, Enabled, Done); 983 984 I.eraseFromParent(); 985 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 986 } 987 case Intrinsic::amdgcn_exp_compr: { 988 const DebugLoc &DL = I.getDebugLoc(); 989 int64_t Tgt = I.getOperand(1).getImm(); 990 int64_t Enabled = I.getOperand(2).getImm(); 991 Register Reg0 = I.getOperand(3).getReg(); 992 Register Reg1 = I.getOperand(4).getReg(); 993 Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 994 int64_t Done = I.getOperand(5).getImm(); 995 int64_t VM = I.getOperand(6).getImm(); 996 997 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 998 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 999 true, Enabled, Done); 1000 1001 I.eraseFromParent(); 1002 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 1003 } 1004 case Intrinsic::amdgcn_end_cf: { 1005 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 1006 // SelectionDAG uses for wave32 vs wave64. 1007 BuildMI(*BB, &I, I.getDebugLoc(), 1008 TII.get(AMDGPU::SI_END_CF)) 1009 .add(I.getOperand(1)); 1010 1011 Register Reg = I.getOperand(1).getReg(); 1012 I.eraseFromParent(); 1013 1014 if (!MRI->getRegClassOrNull(Reg)) 1015 MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); 1016 return true; 1017 } 1018 case Intrinsic::amdgcn_raw_buffer_store: 1019 return selectStoreIntrinsic(I, false); 1020 case Intrinsic::amdgcn_raw_buffer_store_format: 1021 return selectStoreIntrinsic(I, true); 1022 default: 1023 return selectImpl(I, *CoverageInfo); 1024 } 1025 } 1026 1027 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 1028 MachineBasicBlock *BB = I.getParent(); 1029 const DebugLoc &DL = I.getDebugLoc(); 1030 1031 Register DstReg = I.getOperand(0).getReg(); 1032 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); 1033 assert(Size <= 32 || Size == 64); 1034 const MachineOperand &CCOp = I.getOperand(1); 1035 Register CCReg = CCOp.getReg(); 1036 if (isSCC(CCReg, *MRI)) { 1037 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 1038 AMDGPU::S_CSELECT_B32; 1039 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 1040 .addReg(CCReg); 1041 1042 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 1043 // bank, because it does not cover the register class that we used to represent 1044 // for it. So we need to manually set the register class here. 1045 if (!MRI->getRegClassOrNull(CCReg)) 1046 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); 1047 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 1048 .add(I.getOperand(2)) 1049 .add(I.getOperand(3)); 1050 1051 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 1052 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 1053 I.eraseFromParent(); 1054 return Ret; 1055 } 1056 1057 // Wide VGPR select should have been split in RegBankSelect. 1058 if (Size > 32) 1059 return false; 1060 1061 MachineInstr *Select = 1062 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 1063 .addImm(0) 1064 .add(I.getOperand(3)) 1065 .addImm(0) 1066 .add(I.getOperand(2)) 1067 .add(I.getOperand(1)); 1068 1069 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 1070 I.eraseFromParent(); 1071 return Ret; 1072 } 1073 1074 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 1075 initM0(I); 1076 return selectImpl(I, *CoverageInfo); 1077 } 1078 1079 static int sizeToSubRegIndex(unsigned Size) { 1080 switch (Size) { 1081 case 32: 1082 return AMDGPU::sub0; 1083 case 64: 1084 return AMDGPU::sub0_sub1; 1085 case 96: 1086 return AMDGPU::sub0_sub1_sub2; 1087 case 128: 1088 return AMDGPU::sub0_sub1_sub2_sub3; 1089 case 256: 1090 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 1091 default: 1092 if (Size < 32) 1093 return AMDGPU::sub0; 1094 if (Size > 256) 1095 return -1; 1096 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 1097 } 1098 } 1099 1100 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 1101 Register DstReg = I.getOperand(0).getReg(); 1102 Register SrcReg = I.getOperand(1).getReg(); 1103 const LLT DstTy = MRI->getType(DstReg); 1104 const LLT SrcTy = MRI->getType(SrcReg); 1105 if (!DstTy.isScalar()) 1106 return false; 1107 1108 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 1109 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); 1110 if (SrcRB != DstRB) 1111 return false; 1112 1113 unsigned DstSize = DstTy.getSizeInBits(); 1114 unsigned SrcSize = SrcTy.getSizeInBits(); 1115 1116 const TargetRegisterClass *SrcRC 1117 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI); 1118 const TargetRegisterClass *DstRC 1119 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI); 1120 1121 if (SrcSize > 32) { 1122 int SubRegIdx = sizeToSubRegIndex(DstSize); 1123 if (SubRegIdx == -1) 1124 return false; 1125 1126 // Deal with weird cases where the class only partially supports the subreg 1127 // index. 1128 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 1129 if (!SrcRC) 1130 return false; 1131 1132 I.getOperand(1).setSubReg(SubRegIdx); 1133 } 1134 1135 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || 1136 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) { 1137 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 1138 return false; 1139 } 1140 1141 I.setDesc(TII.get(TargetOpcode::COPY)); 1142 return true; 1143 } 1144 1145 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 1146 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 1147 Mask = maskTrailingOnes<unsigned>(Size); 1148 int SignedMask = static_cast<int>(Mask); 1149 return SignedMask >= -16 && SignedMask <= 64; 1150 } 1151 1152 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 1153 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 1154 const DebugLoc &DL = I.getDebugLoc(); 1155 MachineBasicBlock &MBB = *I.getParent(); 1156 const Register DstReg = I.getOperand(0).getReg(); 1157 const Register SrcReg = I.getOperand(1).getReg(); 1158 1159 const LLT DstTy = MRI->getType(DstReg); 1160 const LLT SrcTy = MRI->getType(SrcReg); 1161 const LLT S1 = LLT::scalar(1); 1162 const unsigned SrcSize = SrcTy.getSizeInBits(); 1163 const unsigned DstSize = DstTy.getSizeInBits(); 1164 if (!DstTy.isScalar()) 1165 return false; 1166 1167 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); 1168 1169 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 1170 if (SrcTy != S1 || DstSize > 64) // Invalid 1171 return false; 1172 1173 unsigned Opcode = 1174 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 1175 const TargetRegisterClass *DstRC = 1176 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 1177 1178 // FIXME: Create an extra copy to avoid incorrectly constraining the result 1179 // of the scc producer. 1180 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 1181 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 1182 .addReg(SrcReg); 1183 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 1184 .addReg(TmpReg); 1185 1186 // The instruction operands are backwards from what you would expect. 1187 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 1188 .addImm(0) 1189 .addImm(Signed ? -1 : 1); 1190 I.eraseFromParent(); 1191 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); 1192 } 1193 1194 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 1195 if (SrcTy != S1) // Invalid 1196 return false; 1197 1198 MachineInstr *ExtI = 1199 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 1200 .addImm(0) // src0_modifiers 1201 .addImm(0) // src0 1202 .addImm(0) // src1_modifiers 1203 .addImm(Signed ? -1 : 1) // src1 1204 .addUse(SrcReg); 1205 I.eraseFromParent(); 1206 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1207 } 1208 1209 if (I.getOpcode() == AMDGPU::G_ANYEXT) 1210 return selectCOPY(I); 1211 1212 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 1213 // 64-bit should have been split up in RegBankSelect 1214 1215 // Try to use an and with a mask if it will save code size. 1216 unsigned Mask; 1217 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1218 MachineInstr *ExtI = 1219 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 1220 .addImm(Mask) 1221 .addReg(SrcReg); 1222 I.eraseFromParent(); 1223 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1224 } 1225 1226 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 1227 MachineInstr *ExtI = 1228 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 1229 .addReg(SrcReg) 1230 .addImm(0) // Offset 1231 .addImm(SrcSize); // Width 1232 I.eraseFromParent(); 1233 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1234 } 1235 1236 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1237 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) 1238 return false; 1239 1240 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1241 const unsigned SextOpc = SrcSize == 8 ? 1242 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1243 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1244 .addReg(SrcReg); 1245 I.eraseFromParent(); 1246 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); 1247 } 1248 1249 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1250 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1251 1252 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1253 if (DstSize > 32 && SrcSize <= 32) { 1254 // We need a 64-bit register source, but the high bits don't matter. 1255 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); 1256 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 1257 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1258 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1259 .addReg(SrcReg) 1260 .addImm(AMDGPU::sub0) 1261 .addReg(UndefReg) 1262 .addImm(AMDGPU::sub1); 1263 1264 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1265 .addReg(ExtReg) 1266 .addImm(SrcSize << 16); 1267 1268 I.eraseFromParent(); 1269 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI); 1270 } 1271 1272 unsigned Mask; 1273 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1274 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1275 .addReg(SrcReg) 1276 .addImm(Mask); 1277 } else { 1278 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1279 .addReg(SrcReg) 1280 .addImm(SrcSize << 16); 1281 } 1282 1283 I.eraseFromParent(); 1284 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); 1285 } 1286 1287 return false; 1288 } 1289 1290 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1291 MachineBasicBlock *BB = I.getParent(); 1292 MachineOperand &ImmOp = I.getOperand(1); 1293 1294 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1295 if (ImmOp.isFPImm()) { 1296 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1297 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1298 } else if (ImmOp.isCImm()) { 1299 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1300 } 1301 1302 Register DstReg = I.getOperand(0).getReg(); 1303 unsigned Size; 1304 bool IsSgpr; 1305 const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg()); 1306 if (RB) { 1307 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1308 Size = MRI->getType(DstReg).getSizeInBits(); 1309 } else { 1310 const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg); 1311 IsSgpr = TRI.isSGPRClass(RC); 1312 Size = TRI.getRegSizeInBits(*RC); 1313 } 1314 1315 if (Size != 32 && Size != 64) 1316 return false; 1317 1318 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1319 if (Size == 32) { 1320 I.setDesc(TII.get(Opcode)); 1321 I.addImplicitDefUseOperands(*MF); 1322 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1323 } 1324 1325 DebugLoc DL = I.getDebugLoc(); 1326 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1327 &AMDGPU::VGPR_32RegClass; 1328 Register LoReg = MRI->createVirtualRegister(RC); 1329 Register HiReg = MRI->createVirtualRegister(RC); 1330 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1331 1332 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1333 .addImm(Imm.trunc(32).getZExtValue()); 1334 1335 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1336 .addImm(Imm.ashr(32).getZExtValue()); 1337 1338 const MachineInstr *RS = 1339 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1340 .addReg(LoReg) 1341 .addImm(AMDGPU::sub0) 1342 .addReg(HiReg) 1343 .addImm(AMDGPU::sub1); 1344 1345 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1346 // work for target independent opcodes 1347 I.eraseFromParent(); 1348 const TargetRegisterClass *DstRC = 1349 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), *MRI); 1350 if (!DstRC) 1351 return true; 1352 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); 1353 } 1354 1355 static bool isConstant(const MachineInstr &MI) { 1356 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1357 } 1358 1359 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1360 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1361 1362 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1363 1364 assert(PtrMI); 1365 1366 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1367 return; 1368 1369 GEPInfo GEPInfo(*PtrMI); 1370 1371 for (unsigned i = 1; i != 3; ++i) { 1372 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1373 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1374 assert(OpDef); 1375 if (i == 2 && isConstant(*OpDef)) { 1376 // TODO: Could handle constant base + variable offset, but a combine 1377 // probably should have commuted it. 1378 assert(GEPInfo.Imm == 0); 1379 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1380 continue; 1381 } 1382 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1383 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1384 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1385 else 1386 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1387 } 1388 1389 AddrInfo.push_back(GEPInfo); 1390 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1391 } 1392 1393 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1394 if (!MI.hasOneMemOperand()) 1395 return false; 1396 1397 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1398 const Value *Ptr = MMO->getValue(); 1399 1400 // UndefValue means this is a load of a kernel input. These are uniform. 1401 // Sometimes LDS instructions have constant pointers. 1402 // If Ptr is null, then that means this mem operand contains a 1403 // PseudoSourceValue like GOT. 1404 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1405 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1406 return true; 1407 1408 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1409 return true; 1410 1411 const Instruction *I = dyn_cast<Instruction>(Ptr); 1412 return I && I->getMetadata("amdgpu.uniform"); 1413 } 1414 1415 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1416 for (const GEPInfo &GEPInfo : AddrInfo) { 1417 if (!GEPInfo.VgprParts.empty()) 1418 return true; 1419 } 1420 return false; 1421 } 1422 1423 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { 1424 MachineBasicBlock *BB = I.getParent(); 1425 1426 const LLT PtrTy = MRI->getType(I.getOperand(1).getReg()); 1427 unsigned AS = PtrTy.getAddressSpace(); 1428 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && 1429 STI.ldsRequiresM0Init()) { 1430 // If DS instructions require M0 initializtion, insert it before selecting. 1431 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 1432 .addImm(-1); 1433 } 1434 } 1435 1436 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const { 1437 initM0(I); 1438 return selectImpl(I, *CoverageInfo); 1439 } 1440 1441 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1442 MachineBasicBlock *BB = I.getParent(); 1443 MachineOperand &CondOp = I.getOperand(0); 1444 Register CondReg = CondOp.getReg(); 1445 const DebugLoc &DL = I.getDebugLoc(); 1446 1447 unsigned BrOpcode; 1448 Register CondPhysReg; 1449 const TargetRegisterClass *ConstrainRC; 1450 1451 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1452 // whether the branch is uniform when selecting the instruction. In 1453 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1454 // RegBankSelect knows what it's doing if the branch condition is scc, even 1455 // though it currently does not. 1456 if (isSCC(CondReg, *MRI)) { 1457 CondPhysReg = AMDGPU::SCC; 1458 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1459 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1460 } else if (isVCC(CondReg, *MRI)) { 1461 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1462 // We sort of know that a VCC producer based on the register bank, that ands 1463 // inactive lanes with 0. What if there was a logical operation with vcc 1464 // producers in different blocks/with different exec masks? 1465 // FIXME: Should scc->vcc copies and with exec? 1466 CondPhysReg = TRI.getVCC(); 1467 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1468 ConstrainRC = TRI.getBoolRC(); 1469 } else 1470 return false; 1471 1472 if (!MRI->getRegClassOrNull(CondReg)) 1473 MRI->setRegClass(CondReg, ConstrainRC); 1474 1475 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1476 .addReg(CondReg); 1477 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1478 .addMBB(I.getOperand(1).getMBB()); 1479 1480 I.eraseFromParent(); 1481 return true; 1482 } 1483 1484 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1485 Register DstReg = I.getOperand(0).getReg(); 1486 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 1487 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1488 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1489 if (IsVGPR) 1490 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1491 1492 return RBI.constrainGenericRegister( 1493 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI); 1494 } 1495 1496 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { 1497 uint64_t Align = I.getOperand(2).getImm(); 1498 const uint64_t Mask = ~((UINT64_C(1) << Align) - 1); 1499 1500 MachineBasicBlock *BB = I.getParent(); 1501 1502 Register DstReg = I.getOperand(0).getReg(); 1503 Register SrcReg = I.getOperand(1).getReg(); 1504 1505 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 1506 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); 1507 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1508 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; 1509 unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; 1510 const TargetRegisterClass &RegRC 1511 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; 1512 1513 LLT Ty = MRI->getType(DstReg); 1514 1515 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB, 1516 *MRI); 1517 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB, 1518 *MRI); 1519 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || 1520 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) 1521 return false; 1522 1523 const DebugLoc &DL = I.getDebugLoc(); 1524 Register ImmReg = MRI->createVirtualRegister(&RegRC); 1525 BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg) 1526 .addImm(Mask); 1527 1528 if (Ty.getSizeInBits() == 32) { 1529 BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) 1530 .addReg(SrcReg) 1531 .addReg(ImmReg); 1532 I.eraseFromParent(); 1533 return true; 1534 } 1535 1536 Register HiReg = MRI->createVirtualRegister(&RegRC); 1537 Register LoReg = MRI->createVirtualRegister(&RegRC); 1538 Register MaskLo = MRI->createVirtualRegister(&RegRC); 1539 1540 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) 1541 .addReg(SrcReg, 0, AMDGPU::sub0); 1542 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) 1543 .addReg(SrcReg, 0, AMDGPU::sub1); 1544 1545 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo) 1546 .addReg(LoReg) 1547 .addReg(ImmReg); 1548 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1549 .addReg(MaskLo) 1550 .addImm(AMDGPU::sub0) 1551 .addReg(HiReg) 1552 .addImm(AMDGPU::sub1); 1553 I.eraseFromParent(); 1554 return true; 1555 } 1556 1557 bool AMDGPUInstructionSelector::select(MachineInstr &I) { 1558 if (I.isPHI()) 1559 return selectPHI(I); 1560 1561 if (!isPreISelGenericOpcode(I.getOpcode())) { 1562 if (I.isCopy()) 1563 return selectCOPY(I); 1564 return true; 1565 } 1566 1567 switch (I.getOpcode()) { 1568 case TargetOpcode::G_AND: 1569 case TargetOpcode::G_OR: 1570 case TargetOpcode::G_XOR: 1571 if (selectG_AND_OR_XOR(I)) 1572 return true; 1573 return selectImpl(I, *CoverageInfo); 1574 case TargetOpcode::G_ADD: 1575 case TargetOpcode::G_SUB: 1576 if (selectImpl(I, *CoverageInfo)) 1577 return true; 1578 return selectG_ADD_SUB(I); 1579 case TargetOpcode::G_INTTOPTR: 1580 case TargetOpcode::G_BITCAST: 1581 return selectCOPY(I); 1582 case TargetOpcode::G_CONSTANT: 1583 case TargetOpcode::G_FCONSTANT: 1584 return selectG_CONSTANT(I); 1585 case TargetOpcode::G_EXTRACT: 1586 return selectG_EXTRACT(I); 1587 case TargetOpcode::G_MERGE_VALUES: 1588 case TargetOpcode::G_BUILD_VECTOR: 1589 case TargetOpcode::G_CONCAT_VECTORS: 1590 return selectG_MERGE_VALUES(I); 1591 case TargetOpcode::G_UNMERGE_VALUES: 1592 return selectG_UNMERGE_VALUES(I); 1593 case TargetOpcode::G_GEP: 1594 return selectG_GEP(I); 1595 case TargetOpcode::G_IMPLICIT_DEF: 1596 return selectG_IMPLICIT_DEF(I); 1597 case TargetOpcode::G_INSERT: 1598 return selectG_INSERT(I); 1599 case TargetOpcode::G_INTRINSIC: 1600 return selectG_INTRINSIC(I); 1601 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1602 return selectG_INTRINSIC_W_SIDE_EFFECTS(I); 1603 case TargetOpcode::G_ICMP: 1604 if (selectG_ICMP(I)) 1605 return true; 1606 return selectImpl(I, *CoverageInfo); 1607 case TargetOpcode::G_LOAD: 1608 case TargetOpcode::G_ATOMIC_CMPXCHG: 1609 case TargetOpcode::G_ATOMICRMW_XCHG: 1610 case TargetOpcode::G_ATOMICRMW_ADD: 1611 case TargetOpcode::G_ATOMICRMW_SUB: 1612 case TargetOpcode::G_ATOMICRMW_AND: 1613 case TargetOpcode::G_ATOMICRMW_OR: 1614 case TargetOpcode::G_ATOMICRMW_XOR: 1615 case TargetOpcode::G_ATOMICRMW_MIN: 1616 case TargetOpcode::G_ATOMICRMW_MAX: 1617 case TargetOpcode::G_ATOMICRMW_UMIN: 1618 case TargetOpcode::G_ATOMICRMW_UMAX: 1619 case TargetOpcode::G_ATOMICRMW_FADD: 1620 return selectG_LOAD_ATOMICRMW(I); 1621 case TargetOpcode::G_SELECT: 1622 return selectG_SELECT(I); 1623 case TargetOpcode::G_STORE: 1624 return selectG_STORE(I); 1625 case TargetOpcode::G_TRUNC: 1626 return selectG_TRUNC(I); 1627 case TargetOpcode::G_SEXT: 1628 case TargetOpcode::G_ZEXT: 1629 case TargetOpcode::G_ANYEXT: 1630 return selectG_SZA_EXT(I); 1631 case TargetOpcode::G_BRCOND: 1632 return selectG_BRCOND(I); 1633 case TargetOpcode::G_FRAME_INDEX: 1634 return selectG_FRAME_INDEX(I); 1635 case TargetOpcode::G_FENCE: 1636 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1637 // is checking for G_CONSTANT 1638 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1639 return true; 1640 case TargetOpcode::G_PTR_MASK: 1641 return selectG_PTR_MASK(I); 1642 default: 1643 return selectImpl(I, *CoverageInfo); 1644 } 1645 return false; 1646 } 1647 1648 InstructionSelector::ComplexRendererFns 1649 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1650 return {{ 1651 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1652 }}; 1653 1654 } 1655 1656 std::pair<Register, unsigned> 1657 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1658 Register Src) const { 1659 unsigned Mods = 0; 1660 MachineInstr *MI = MRI->getVRegDef(Src); 1661 1662 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1663 Src = MI->getOperand(1).getReg(); 1664 Mods |= SISrcMods::NEG; 1665 MI = MRI->getVRegDef(Src); 1666 } 1667 1668 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1669 Src = MI->getOperand(1).getReg(); 1670 Mods |= SISrcMods::ABS; 1671 } 1672 1673 return std::make_pair(Src, Mods); 1674 } 1675 1676 /// 1677 /// This will select either an SGPR or VGPR operand and will save us from 1678 /// having to write an extra tablegen pattern. 1679 InstructionSelector::ComplexRendererFns 1680 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1681 return {{ 1682 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1683 }}; 1684 } 1685 1686 InstructionSelector::ComplexRendererFns 1687 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1688 Register Src; 1689 unsigned Mods; 1690 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); 1691 1692 return {{ 1693 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1694 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1695 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1696 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1697 }}; 1698 } 1699 1700 InstructionSelector::ComplexRendererFns 1701 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { 1702 Register Src; 1703 unsigned Mods; 1704 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); 1705 1706 return {{ 1707 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1708 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1709 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1710 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1711 }}; 1712 } 1713 1714 InstructionSelector::ComplexRendererFns 1715 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1716 return {{ 1717 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1718 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1719 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1720 }}; 1721 } 1722 1723 InstructionSelector::ComplexRendererFns 1724 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1725 Register Src; 1726 unsigned Mods; 1727 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); 1728 1729 return {{ 1730 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1731 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1732 }}; 1733 } 1734 1735 InstructionSelector::ComplexRendererFns 1736 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { 1737 // FIXME: Handle clamp and op_sel 1738 return {{ 1739 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1740 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods 1741 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp 1742 }}; 1743 } 1744 1745 InstructionSelector::ComplexRendererFns 1746 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { 1747 // FIXME: Handle op_sel 1748 return {{ 1749 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1750 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 1751 }}; 1752 } 1753 1754 InstructionSelector::ComplexRendererFns 1755 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1756 SmallVector<GEPInfo, 4> AddrInfo; 1757 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); 1758 1759 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1760 return None; 1761 1762 const GEPInfo &GEPInfo = AddrInfo[0]; 1763 1764 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1765 return None; 1766 1767 unsigned PtrReg = GEPInfo.SgprParts[0]; 1768 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1769 return {{ 1770 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1771 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1772 }}; 1773 } 1774 1775 InstructionSelector::ComplexRendererFns 1776 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1777 SmallVector<GEPInfo, 4> AddrInfo; 1778 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); 1779 1780 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1781 return None; 1782 1783 const GEPInfo &GEPInfo = AddrInfo[0]; 1784 unsigned PtrReg = GEPInfo.SgprParts[0]; 1785 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1786 if (!isUInt<32>(EncodedImm)) 1787 return None; 1788 1789 return {{ 1790 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1791 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1792 }}; 1793 } 1794 1795 InstructionSelector::ComplexRendererFns 1796 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1797 MachineInstr *MI = Root.getParent(); 1798 MachineBasicBlock *MBB = MI->getParent(); 1799 1800 SmallVector<GEPInfo, 4> AddrInfo; 1801 getAddrModeInfo(*MI, *MRI, AddrInfo); 1802 1803 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1804 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1805 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1806 return None; 1807 1808 const GEPInfo &GEPInfo = AddrInfo[0]; 1809 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1810 return None; 1811 1812 // If we make it this far we have a load with an 32-bit immediate offset. 1813 // It is OK to select this using a sgpr offset, because we have already 1814 // failed trying to select this load into one of the _IMM variants since 1815 // the _IMM Patterns are considered before the _SGPR patterns. 1816 unsigned PtrReg = GEPInfo.SgprParts[0]; 1817 Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1818 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1819 .addImm(GEPInfo.Imm); 1820 return {{ 1821 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1822 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1823 }}; 1824 } 1825 1826 template <bool Signed> 1827 InstructionSelector::ComplexRendererFns 1828 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1829 MachineInstr *MI = Root.getParent(); 1830 1831 InstructionSelector::ComplexRendererFns Default = {{ 1832 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1833 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1834 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1835 }}; 1836 1837 if (!STI.hasFlatInstOffsets()) 1838 return Default; 1839 1840 const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg()); 1841 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1842 return Default; 1843 1844 Optional<int64_t> Offset = 1845 getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI); 1846 if (!Offset.hasValue()) 1847 return Default; 1848 1849 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1850 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1851 return Default; 1852 1853 Register BasePtr = OpDef->getOperand(1).getReg(); 1854 1855 return {{ 1856 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1857 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1858 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1859 }}; 1860 } 1861 1862 InstructionSelector::ComplexRendererFns 1863 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1864 return selectFlatOffsetImpl<false>(Root); 1865 } 1866 1867 InstructionSelector::ComplexRendererFns 1868 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1869 return selectFlatOffsetImpl<true>(Root); 1870 } 1871 1872 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1873 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1874 return PSV && PSV->isStack(); 1875 } 1876 1877 InstructionSelector::ComplexRendererFns 1878 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1879 MachineInstr *MI = Root.getParent(); 1880 MachineBasicBlock *MBB = MI->getParent(); 1881 MachineFunction *MF = MBB->getParent(); 1882 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1883 1884 int64_t Offset = 0; 1885 if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) { 1886 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1887 1888 // TODO: Should this be inside the render function? The iterator seems to 1889 // move. 1890 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1891 HighBits) 1892 .addImm(Offset & ~4095); 1893 1894 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1895 MIB.addReg(Info->getScratchRSrcReg()); 1896 }, 1897 [=](MachineInstrBuilder &MIB) { // vaddr 1898 MIB.addReg(HighBits); 1899 }, 1900 [=](MachineInstrBuilder &MIB) { // soffset 1901 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1902 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1903 1904 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1905 ? Info->getStackPtrOffsetReg() 1906 : Info->getScratchWaveOffsetReg(); 1907 MIB.addReg(SOffsetReg); 1908 }, 1909 [=](MachineInstrBuilder &MIB) { // offset 1910 MIB.addImm(Offset & 4095); 1911 }}}; 1912 } 1913 1914 assert(Offset == 0); 1915 1916 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1917 // offsets. 1918 Optional<int> FI; 1919 Register VAddr = Root.getReg(); 1920 if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) { 1921 if (isBaseWithConstantOffset(Root, *MRI)) { 1922 const MachineOperand &LHS = RootDef->getOperand(1); 1923 const MachineOperand &RHS = RootDef->getOperand(2); 1924 const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); 1925 const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); 1926 if (LHSDef && RHSDef) { 1927 int64_t PossibleOffset = 1928 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1929 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1930 (!STI.privateMemoryResourceIsRangeChecked() || 1931 KnownBits->signBitIsZero(LHS.getReg()))) { 1932 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1933 FI = LHSDef->getOperand(1).getIndex(); 1934 else 1935 VAddr = LHS.getReg(); 1936 Offset = PossibleOffset; 1937 } 1938 } 1939 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1940 FI = RootDef->getOperand(1).getIndex(); 1941 } 1942 } 1943 1944 // If we don't know this private access is a local stack object, it needs to 1945 // be relative to the entry point's scratch wave offset register. 1946 // TODO: Should split large offsets that don't fit like above. 1947 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1948 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1949 : Info->getScratchWaveOffsetReg(); 1950 1951 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1952 MIB.addReg(Info->getScratchRSrcReg()); 1953 }, 1954 [=](MachineInstrBuilder &MIB) { // vaddr 1955 if (FI.hasValue()) 1956 MIB.addFrameIndex(FI.getValue()); 1957 else 1958 MIB.addReg(VAddr); 1959 }, 1960 [=](MachineInstrBuilder &MIB) { // soffset 1961 MIB.addReg(SOffset); 1962 }, 1963 [=](MachineInstrBuilder &MIB) { // offset 1964 MIB.addImm(Offset); 1965 }}}; 1966 } 1967 1968 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, 1969 const MachineOperand &Base, 1970 int64_t Offset, 1971 unsigned OffsetBits) const { 1972 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 1973 (OffsetBits == 8 && !isUInt<8>(Offset))) 1974 return false; 1975 1976 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 1977 return true; 1978 1979 // On Southern Islands instruction with a negative base value and an offset 1980 // don't seem to work. 1981 return KnownBits->signBitIsZero(Base.getReg()); 1982 } 1983 1984 InstructionSelector::ComplexRendererFns 1985 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1986 MachineOperand &Root) const { 1987 MachineInstr *MI = Root.getParent(); 1988 MachineBasicBlock *MBB = MI->getParent(); 1989 1990 int64_t Offset = 0; 1991 if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) || 1992 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1993 return {}; 1994 1995 const MachineFunction *MF = MBB->getParent(); 1996 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1997 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1998 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1999 2000 Register SOffsetReg = isStackPtrRelative(PtrInfo) 2001 ? Info->getStackPtrOffsetReg() 2002 : Info->getScratchWaveOffsetReg(); 2003 return {{ 2004 [=](MachineInstrBuilder &MIB) { 2005 MIB.addReg(Info->getScratchRSrcReg()); 2006 }, // rsrc 2007 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 2008 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 2009 }}; 2010 } 2011 2012 InstructionSelector::ComplexRendererFns 2013 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { 2014 const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); 2015 if (!RootDef) { 2016 return {{ 2017 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 2018 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 2019 }}; 2020 } 2021 2022 int64_t ConstAddr = 0; 2023 if (isBaseWithConstantOffset(Root, *MRI)) { 2024 const MachineOperand &LHS = RootDef->getOperand(1); 2025 const MachineOperand &RHS = RootDef->getOperand(2); 2026 const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); 2027 const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); 2028 if (LHSDef && RHSDef) { 2029 int64_t PossibleOffset = 2030 RHSDef->getOperand(1).getCImm()->getSExtValue(); 2031 if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) { 2032 // (add n0, c0) 2033 return {{ 2034 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 2035 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } 2036 }}; 2037 } 2038 } 2039 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 2040 2041 2042 2043 } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { 2044 2045 2046 } 2047 2048 return {{ 2049 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 2050 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 2051 }}; 2052 } 2053