1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 #define GET_GLOBALISEL_IMPL 41 #define AMDGPUSubtarget GCNSubtarget 42 #include "AMDGPUGenGlobalISel.inc" 43 #undef GET_GLOBALISEL_IMPL 44 #undef AMDGPUSubtarget 45 46 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 47 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 48 const AMDGPUTargetMachine &TM) 49 : InstructionSelector(), TII(*STI.getInstrInfo()), 50 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 51 STI(STI), 52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 53 #define GET_GLOBALISEL_PREDICATES_INIT 54 #include "AMDGPUGenGlobalISel.inc" 55 #undef GET_GLOBALISEL_PREDICATES_INIT 56 #define GET_GLOBALISEL_TEMPORARIES_INIT 57 #include "AMDGPUGenGlobalISel.inc" 58 #undef GET_GLOBALISEL_TEMPORARIES_INIT 59 { 60 } 61 62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 63 64 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 65 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 66 return Reg == AMDGPU::SCC; 67 68 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 69 const TargetRegisterClass *RC = 70 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 71 if (RC) { 72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 73 // context of the register bank has been lost. 74 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 75 return false; 76 const LLT Ty = MRI.getType(Reg); 77 return Ty.isValid() && Ty.getSizeInBits() == 1; 78 } 79 80 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 81 return RB->getID() == AMDGPU::SCCRegBankID; 82 } 83 84 bool AMDGPUInstructionSelector::isVCC(Register Reg, 85 const MachineRegisterInfo &MRI) const { 86 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 87 return Reg == TRI.getVCC(); 88 89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 90 const TargetRegisterClass *RC = 91 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 92 if (RC) { 93 const LLT Ty = MRI.getType(Reg); 94 return RC->hasSuperClassEq(TRI.getBoolRC()) && 95 Ty.isValid() && Ty.getSizeInBits() == 1; 96 } 97 98 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 99 return RB->getID() == AMDGPU::VCCRegBankID; 100 } 101 102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 103 const DebugLoc &DL = I.getDebugLoc(); 104 MachineBasicBlock *BB = I.getParent(); 105 MachineFunction *MF = BB->getParent(); 106 MachineRegisterInfo &MRI = MF->getRegInfo(); 107 I.setDesc(TII.get(TargetOpcode::COPY)); 108 109 const MachineOperand &Src = I.getOperand(1); 110 MachineOperand &Dst = I.getOperand(0); 111 Register DstReg = Dst.getReg(); 112 Register SrcReg = Src.getReg(); 113 114 if (isVCC(DstReg, MRI)) { 115 if (SrcReg == AMDGPU::SCC) { 116 const TargetRegisterClass *RC 117 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 118 if (!RC) 119 return true; 120 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 121 } 122 123 if (!isVCC(SrcReg, MRI)) { 124 // TODO: Should probably leave the copy and let copyPhysReg expand it. 125 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) 126 return false; 127 128 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 129 .addImm(0) 130 .addReg(SrcReg); 131 132 if (!MRI.getRegClassOrNull(SrcReg)) 133 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 134 I.eraseFromParent(); 135 return true; 136 } 137 138 const TargetRegisterClass *RC = 139 TRI.getConstrainedRegClassForOperand(Dst, MRI); 140 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) 141 return false; 142 143 // Don't constrain the source register to a class so the def instruction 144 // handles it (unless it's undef). 145 // 146 // FIXME: This is a hack. When selecting the def, we neeed to know 147 // specifically know that the result is VCCRegBank, and not just an SGPR 148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 149 if (Src.isUndef()) { 150 const TargetRegisterClass *SrcRC = 151 TRI.getConstrainedRegClassForOperand(Src, MRI); 152 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 153 return false; 154 } 155 156 return true; 157 } 158 159 for (const MachineOperand &MO : I.operands()) { 160 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 161 continue; 162 163 const TargetRegisterClass *RC = 164 TRI.getConstrainedRegClassForOperand(MO, MRI); 165 if (!RC) 166 continue; 167 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 168 } 169 return true; 170 } 171 172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 173 MachineBasicBlock *BB = I.getParent(); 174 MachineFunction *MF = BB->getParent(); 175 MachineRegisterInfo &MRI = MF->getRegInfo(); 176 177 const Register DefReg = I.getOperand(0).getReg(); 178 const LLT DefTy = MRI.getType(DefReg); 179 180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 181 182 const RegClassOrRegBank &RegClassOrBank = 183 MRI.getRegClassOrRegBank(DefReg); 184 185 const TargetRegisterClass *DefRC 186 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 187 if (!DefRC) { 188 if (!DefTy.isValid()) { 189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 190 return false; 191 } 192 193 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 194 if (RB.getID() == AMDGPU::SCCRegBankID) { 195 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 196 return false; 197 } 198 199 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 200 if (!DefRC) { 201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 202 return false; 203 } 204 } 205 206 I.setDesc(TII.get(TargetOpcode::PHI)); 207 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 208 } 209 210 MachineOperand 211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 212 const TargetRegisterClass &SubRC, 213 unsigned SubIdx) const { 214 215 MachineInstr *MI = MO.getParent(); 216 MachineBasicBlock *BB = MO.getParent()->getParent(); 217 MachineFunction *MF = BB->getParent(); 218 MachineRegisterInfo &MRI = MF->getRegInfo(); 219 Register DstReg = MRI.createVirtualRegister(&SubRC); 220 221 if (MO.isReg()) { 222 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 223 unsigned Reg = MO.getReg(); 224 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 225 .addReg(Reg, 0, ComposedSubIdx); 226 227 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 228 MO.isKill(), MO.isDead(), MO.isUndef(), 229 MO.isEarlyClobber(), 0, MO.isDebug(), 230 MO.isInternalRead()); 231 } 232 233 assert(MO.isImm()); 234 235 APInt Imm(64, MO.getImm()); 236 237 switch (SubIdx) { 238 default: 239 llvm_unreachable("do not know to split immediate with this sub index."); 240 case AMDGPU::sub0: 241 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 242 case AMDGPU::sub1: 243 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 244 } 245 } 246 247 static int64_t getConstant(const MachineInstr *MI) { 248 return MI->getOperand(1).getCImm()->getSExtValue(); 249 } 250 251 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 252 switch (Opc) { 253 case AMDGPU::G_AND: 254 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 255 case AMDGPU::G_OR: 256 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 257 case AMDGPU::G_XOR: 258 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 259 default: 260 llvm_unreachable("not a bit op"); 261 } 262 } 263 264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 265 MachineBasicBlock *BB = I.getParent(); 266 MachineFunction *MF = BB->getParent(); 267 MachineRegisterInfo &MRI = MF->getRegInfo(); 268 MachineOperand &Dst = I.getOperand(0); 269 MachineOperand &Src0 = I.getOperand(1); 270 MachineOperand &Src1 = I.getOperand(2); 271 Register DstReg = Dst.getReg(); 272 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 273 274 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 275 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 276 const TargetRegisterClass *RC = TRI.getBoolRC(); 277 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 278 RC == &AMDGPU::SReg_64RegClass); 279 I.setDesc(TII.get(InstOpc)); 280 281 // FIXME: Hack to avoid turning the register bank into a register class. 282 // The selector for G_ICMP relies on seeing the register bank for the result 283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 284 // be ambiguous whether it's a scalar or vector bool. 285 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) 286 MRI.setRegClass(Src0.getReg(), RC); 287 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) 288 MRI.setRegClass(Src1.getReg(), RC); 289 290 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 291 } 292 293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 294 // the result? 295 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 296 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 297 I.setDesc(TII.get(InstOpc)); 298 299 const TargetRegisterClass *RC 300 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 301 if (!RC) 302 return false; 303 return RBI.constrainGenericRegister(DstReg, *RC, MRI) && 304 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && 305 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); 306 } 307 308 return false; 309 } 310 311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 312 MachineBasicBlock *BB = I.getParent(); 313 MachineFunction *MF = BB->getParent(); 314 MachineRegisterInfo &MRI = MF->getRegInfo(); 315 Register DstReg = I.getOperand(0).getReg(); 316 const DebugLoc &DL = I.getDebugLoc(); 317 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 318 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 319 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 320 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 321 322 if (Size == 32) { 323 if (IsSALU) { 324 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 325 MachineInstr *Add = 326 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 327 .add(I.getOperand(1)) 328 .add(I.getOperand(2)); 329 I.eraseFromParent(); 330 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 331 } 332 333 if (STI.hasAddNoCarry()) { 334 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 335 I.setDesc(TII.get(Opc)); 336 I.addOperand(*MF, MachineOperand::CreateImm(0)); 337 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 338 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 339 } 340 341 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 342 343 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 344 MachineInstr *Add 345 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 346 .addDef(UnusedCarry, RegState::Dead) 347 .add(I.getOperand(1)) 348 .add(I.getOperand(2)) 349 .addImm(0); 350 I.eraseFromParent(); 351 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 352 } 353 354 assert(!Sub && "illegal sub should not reach here"); 355 356 const TargetRegisterClass &RC 357 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 358 const TargetRegisterClass &HalfRC 359 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 360 361 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 362 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 363 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 364 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 365 366 Register DstLo = MRI.createVirtualRegister(&HalfRC); 367 Register DstHi = MRI.createVirtualRegister(&HalfRC); 368 369 if (IsSALU) { 370 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 371 .add(Lo1) 372 .add(Lo2); 373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 374 .add(Hi1) 375 .add(Hi2); 376 } else { 377 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 378 Register CarryReg = MRI.createVirtualRegister(CarryRC); 379 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 380 .addDef(CarryReg) 381 .add(Lo1) 382 .add(Lo2) 383 .addImm(0); 384 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 385 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 386 .add(Hi1) 387 .add(Hi2) 388 .addReg(CarryReg, RegState::Kill) 389 .addImm(0); 390 391 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 392 return false; 393 } 394 395 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 396 .addReg(DstLo) 397 .addImm(AMDGPU::sub0) 398 .addReg(DstHi) 399 .addImm(AMDGPU::sub1); 400 401 402 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 403 return false; 404 405 I.eraseFromParent(); 406 return true; 407 } 408 409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 410 MachineBasicBlock *BB = I.getParent(); 411 MachineFunction *MF = BB->getParent(); 412 MachineRegisterInfo &MRI = MF->getRegInfo(); 413 assert(I.getOperand(2).getImm() % 32 == 0); 414 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 415 const DebugLoc &DL = I.getDebugLoc(); 416 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 417 I.getOperand(0).getReg()) 418 .addReg(I.getOperand(1).getReg(), 0, SubReg); 419 420 for (const MachineOperand &MO : Copy->operands()) { 421 const TargetRegisterClass *RC = 422 TRI.getConstrainedRegClassForOperand(MO, MRI); 423 if (!RC) 424 continue; 425 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 426 } 427 I.eraseFromParent(); 428 return true; 429 } 430 431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 432 MachineBasicBlock *BB = MI.getParent(); 433 MachineFunction *MF = BB->getParent(); 434 MachineRegisterInfo &MRI = MF->getRegInfo(); 435 Register DstReg = MI.getOperand(0).getReg(); 436 LLT DstTy = MRI.getType(DstReg); 437 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 438 439 const unsigned SrcSize = SrcTy.getSizeInBits(); 440 if (SrcSize < 32) 441 return false; 442 443 const DebugLoc &DL = MI.getDebugLoc(); 444 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); 445 const unsigned DstSize = DstTy.getSizeInBits(); 446 const TargetRegisterClass *DstRC = 447 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); 448 if (!DstRC) 449 return false; 450 451 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 452 MachineInstrBuilder MIB = 453 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 454 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 455 MachineOperand &Src = MI.getOperand(I + 1); 456 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 457 MIB.addImm(SubRegs[I]); 458 459 const TargetRegisterClass *SrcRC 460 = TRI.getConstrainedRegClassForOperand(Src, MRI); 461 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) 462 return false; 463 } 464 465 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) 466 return false; 467 468 MI.eraseFromParent(); 469 return true; 470 } 471 472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 473 MachineBasicBlock *BB = MI.getParent(); 474 MachineFunction *MF = BB->getParent(); 475 MachineRegisterInfo &MRI = MF->getRegInfo(); 476 const int NumDst = MI.getNumOperands() - 1; 477 478 MachineOperand &Src = MI.getOperand(NumDst); 479 480 Register SrcReg = Src.getReg(); 481 Register DstReg0 = MI.getOperand(0).getReg(); 482 LLT DstTy = MRI.getType(DstReg0); 483 LLT SrcTy = MRI.getType(SrcReg); 484 485 const unsigned DstSize = DstTy.getSizeInBits(); 486 const unsigned SrcSize = SrcTy.getSizeInBits(); 487 const DebugLoc &DL = MI.getDebugLoc(); 488 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 489 490 const TargetRegisterClass *SrcRC = 491 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); 492 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 493 return false; 494 495 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 496 497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 498 // source, and this relies on the fact that the same subregister indices are 499 // used for both. 500 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 501 for (int I = 0, E = NumDst; I != E; ++I) { 502 MachineOperand &Dst = MI.getOperand(I); 503 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 504 .addReg(SrcReg, SrcFlags, SubRegs[I]); 505 506 const TargetRegisterClass *DstRC = 507 TRI.getConstrainedRegClassForOperand(Dst, MRI); 508 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) 509 return false; 510 } 511 512 MI.eraseFromParent(); 513 return true; 514 } 515 516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 517 return selectG_ADD_SUB(I); 518 } 519 520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 521 MachineBasicBlock *BB = I.getParent(); 522 MachineFunction *MF = BB->getParent(); 523 MachineRegisterInfo &MRI = MF->getRegInfo(); 524 const MachineOperand &MO = I.getOperand(0); 525 526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 527 // regbank check here is to know why getConstrainedRegClassForOperand failed. 528 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 529 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 530 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 531 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 532 return true; 533 } 534 535 return false; 536 } 537 538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 539 MachineBasicBlock *BB = I.getParent(); 540 MachineFunction *MF = BB->getParent(); 541 MachineRegisterInfo &MRI = MF->getRegInfo(); 542 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 543 DebugLoc DL = I.getDebugLoc(); 544 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 545 .addDef(I.getOperand(0).getReg()) 546 .addReg(I.getOperand(1).getReg()) 547 .addReg(I.getOperand(2).getReg()) 548 .addImm(SubReg); 549 550 for (const MachineOperand &MO : Ins->operands()) { 551 if (!MO.isReg()) 552 continue; 553 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 554 continue; 555 556 const TargetRegisterClass *RC = 557 TRI.getConstrainedRegClassForOperand(MO, MRI); 558 if (!RC) 559 continue; 560 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 561 } 562 I.eraseFromParent(); 563 return true; 564 } 565 566 bool AMDGPUInstructionSelector::selectG_INTRINSIC( 567 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 568 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 569 switch (IntrinsicID) { 570 case Intrinsic::amdgcn_if_break: { 571 MachineBasicBlock *BB = I.getParent(); 572 MachineFunction *MF = BB->getParent(); 573 MachineRegisterInfo &MRI = MF->getRegInfo(); 574 575 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 576 // SelectionDAG uses for wave32 vs wave64. 577 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 578 .add(I.getOperand(0)) 579 .add(I.getOperand(2)) 580 .add(I.getOperand(3)); 581 582 Register DstReg = I.getOperand(0).getReg(); 583 Register Src0Reg = I.getOperand(2).getReg(); 584 Register Src1Reg = I.getOperand(3).getReg(); 585 586 I.eraseFromParent(); 587 588 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 589 if (!MRI.getRegClassOrNull(Reg)) 590 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 591 } 592 593 return true; 594 } 595 default: 596 return selectImpl(I, CoverageInfo); 597 } 598 } 599 600 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 601 if (Size != 32 && Size != 64) 602 return -1; 603 switch (P) { 604 default: 605 llvm_unreachable("Unknown condition code!"); 606 case CmpInst::ICMP_NE: 607 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 608 case CmpInst::ICMP_EQ: 609 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 610 case CmpInst::ICMP_SGT: 611 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 612 case CmpInst::ICMP_SGE: 613 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 614 case CmpInst::ICMP_SLT: 615 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 616 case CmpInst::ICMP_SLE: 617 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 618 case CmpInst::ICMP_UGT: 619 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 620 case CmpInst::ICMP_UGE: 621 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 622 case CmpInst::ICMP_ULT: 623 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 624 case CmpInst::ICMP_ULE: 625 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 626 } 627 } 628 629 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 630 unsigned Size) const { 631 if (Size == 64) { 632 if (!STI.hasScalarCompareEq64()) 633 return -1; 634 635 switch (P) { 636 case CmpInst::ICMP_NE: 637 return AMDGPU::S_CMP_LG_U64; 638 case CmpInst::ICMP_EQ: 639 return AMDGPU::S_CMP_EQ_U64; 640 default: 641 return -1; 642 } 643 } 644 645 if (Size != 32) 646 return -1; 647 648 switch (P) { 649 case CmpInst::ICMP_NE: 650 return AMDGPU::S_CMP_LG_U32; 651 case CmpInst::ICMP_EQ: 652 return AMDGPU::S_CMP_EQ_U32; 653 case CmpInst::ICMP_SGT: 654 return AMDGPU::S_CMP_GT_I32; 655 case CmpInst::ICMP_SGE: 656 return AMDGPU::S_CMP_GE_I32; 657 case CmpInst::ICMP_SLT: 658 return AMDGPU::S_CMP_LT_I32; 659 case CmpInst::ICMP_SLE: 660 return AMDGPU::S_CMP_LE_I32; 661 case CmpInst::ICMP_UGT: 662 return AMDGPU::S_CMP_GT_U32; 663 case CmpInst::ICMP_UGE: 664 return AMDGPU::S_CMP_GE_U32; 665 case CmpInst::ICMP_ULT: 666 return AMDGPU::S_CMP_LT_U32; 667 case CmpInst::ICMP_ULE: 668 return AMDGPU::S_CMP_LE_U32; 669 default: 670 llvm_unreachable("Unknown condition code!"); 671 } 672 } 673 674 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 675 MachineBasicBlock *BB = I.getParent(); 676 MachineFunction *MF = BB->getParent(); 677 MachineRegisterInfo &MRI = MF->getRegInfo(); 678 const DebugLoc &DL = I.getDebugLoc(); 679 680 unsigned SrcReg = I.getOperand(2).getReg(); 681 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 682 683 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 684 685 unsigned CCReg = I.getOperand(0).getReg(); 686 if (isSCC(CCReg, MRI)) { 687 int Opcode = getS_CMPOpcode(Pred, Size); 688 if (Opcode == -1) 689 return false; 690 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 691 .add(I.getOperand(2)) 692 .add(I.getOperand(3)); 693 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 694 .addReg(AMDGPU::SCC); 695 bool Ret = 696 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 697 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 698 I.eraseFromParent(); 699 return Ret; 700 } 701 702 int Opcode = getV_CMPOpcode(Pred, Size); 703 if (Opcode == -1) 704 return false; 705 706 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 707 I.getOperand(0).getReg()) 708 .add(I.getOperand(2)) 709 .add(I.getOperand(3)); 710 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 711 *TRI.getBoolRC(), MRI); 712 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 713 I.eraseFromParent(); 714 return Ret; 715 } 716 717 static MachineInstr * 718 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 719 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 720 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 721 const DebugLoc &DL = Insert->getDebugLoc(); 722 MachineBasicBlock &BB = *Insert->getParent(); 723 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 724 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 725 .addImm(Tgt) 726 .addReg(Reg0) 727 .addReg(Reg1) 728 .addReg(Reg2) 729 .addReg(Reg3) 730 .addImm(VM) 731 .addImm(Compr) 732 .addImm(Enabled); 733 } 734 735 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 736 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 737 MachineBasicBlock *BB = I.getParent(); 738 MachineFunction *MF = BB->getParent(); 739 MachineRegisterInfo &MRI = MF->getRegInfo(); 740 741 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 742 switch (IntrinsicID) { 743 case Intrinsic::amdgcn_exp: { 744 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 745 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 746 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 747 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 748 749 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 750 I.getOperand(4).getReg(), 751 I.getOperand(5).getReg(), 752 I.getOperand(6).getReg(), 753 VM, false, Enabled, Done); 754 755 I.eraseFromParent(); 756 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 757 } 758 case Intrinsic::amdgcn_exp_compr: { 759 const DebugLoc &DL = I.getDebugLoc(); 760 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 761 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 762 unsigned Reg0 = I.getOperand(3).getReg(); 763 unsigned Reg1 = I.getOperand(4).getReg(); 764 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 765 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 766 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 767 768 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 769 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 770 true, Enabled, Done); 771 772 I.eraseFromParent(); 773 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 774 } 775 case Intrinsic::amdgcn_end_cf: { 776 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 777 // SelectionDAG uses for wave32 vs wave64. 778 BuildMI(*BB, &I, I.getDebugLoc(), 779 TII.get(AMDGPU::SI_END_CF)) 780 .add(I.getOperand(1)); 781 782 Register Reg = I.getOperand(1).getReg(); 783 I.eraseFromParent(); 784 785 if (!MRI.getRegClassOrNull(Reg)) 786 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 787 return true; 788 } 789 default: 790 return selectImpl(I, CoverageInfo); 791 } 792 } 793 794 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 795 MachineBasicBlock *BB = I.getParent(); 796 MachineFunction *MF = BB->getParent(); 797 MachineRegisterInfo &MRI = MF->getRegInfo(); 798 const DebugLoc &DL = I.getDebugLoc(); 799 800 unsigned DstReg = I.getOperand(0).getReg(); 801 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 802 assert(Size <= 32 || Size == 64); 803 const MachineOperand &CCOp = I.getOperand(1); 804 unsigned CCReg = CCOp.getReg(); 805 if (isSCC(CCReg, MRI)) { 806 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 807 AMDGPU::S_CSELECT_B32; 808 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 809 .addReg(CCReg); 810 811 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 812 // bank, because it does not cover the register class that we used to represent 813 // for it. So we need to manually set the register class here. 814 if (!MRI.getRegClassOrNull(CCReg)) 815 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 816 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 817 .add(I.getOperand(2)) 818 .add(I.getOperand(3)); 819 820 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 821 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 822 I.eraseFromParent(); 823 return Ret; 824 } 825 826 // Wide VGPR select should have been split in RegBankSelect. 827 if (Size > 32) 828 return false; 829 830 MachineInstr *Select = 831 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 832 .addImm(0) 833 .add(I.getOperand(3)) 834 .addImm(0) 835 .add(I.getOperand(2)) 836 .add(I.getOperand(1)); 837 838 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 839 I.eraseFromParent(); 840 return Ret; 841 } 842 843 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 844 MachineBasicBlock *BB = I.getParent(); 845 MachineFunction *MF = BB->getParent(); 846 MachineRegisterInfo &MRI = MF->getRegInfo(); 847 DebugLoc DL = I.getDebugLoc(); 848 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI); 849 if (PtrSize != 64) { 850 LLVM_DEBUG(dbgs() << "Unhandled address space\n"); 851 return false; 852 } 853 854 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 855 unsigned Opcode; 856 857 // FIXME: Remove this when integers > s32 naturally selected. 858 switch (StoreSize) { 859 default: 860 return false; 861 case 32: 862 Opcode = AMDGPU::FLAT_STORE_DWORD; 863 break; 864 case 64: 865 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 866 break; 867 case 96: 868 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 869 break; 870 case 128: 871 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 872 break; 873 } 874 875 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 876 .add(I.getOperand(1)) 877 .add(I.getOperand(0)) 878 .addImm(0) // offset 879 .addImm(0) // glc 880 .addImm(0) // slc 881 .addImm(0); // dlc 882 883 884 // Now that we selected an opcode, we need to constrain the register 885 // operands to use appropriate classes. 886 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 887 888 I.eraseFromParent(); 889 return Ret; 890 } 891 892 static int sizeToSubRegIndex(unsigned Size) { 893 switch (Size) { 894 case 32: 895 return AMDGPU::sub0; 896 case 64: 897 return AMDGPU::sub0_sub1; 898 case 96: 899 return AMDGPU::sub0_sub1_sub2; 900 case 128: 901 return AMDGPU::sub0_sub1_sub2_sub3; 902 case 256: 903 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 904 default: 905 if (Size < 32) 906 return AMDGPU::sub0; 907 if (Size > 256) 908 return -1; 909 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 910 } 911 } 912 913 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 914 MachineBasicBlock *BB = I.getParent(); 915 MachineFunction *MF = BB->getParent(); 916 MachineRegisterInfo &MRI = MF->getRegInfo(); 917 918 unsigned DstReg = I.getOperand(0).getReg(); 919 unsigned SrcReg = I.getOperand(1).getReg(); 920 const LLT DstTy = MRI.getType(DstReg); 921 const LLT SrcTy = MRI.getType(SrcReg); 922 if (!DstTy.isScalar()) 923 return false; 924 925 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 926 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 927 if (SrcRB != DstRB) 928 return false; 929 930 unsigned DstSize = DstTy.getSizeInBits(); 931 unsigned SrcSize = SrcTy.getSizeInBits(); 932 933 const TargetRegisterClass *SrcRC 934 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 935 const TargetRegisterClass *DstRC 936 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 937 938 if (SrcSize > 32) { 939 int SubRegIdx = sizeToSubRegIndex(DstSize); 940 if (SubRegIdx == -1) 941 return false; 942 943 // Deal with weird cases where the class only partially supports the subreg 944 // index. 945 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 946 if (!SrcRC) 947 return false; 948 949 I.getOperand(1).setSubReg(SubRegIdx); 950 } 951 952 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 953 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 954 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 955 return false; 956 } 957 958 I.setDesc(TII.get(TargetOpcode::COPY)); 959 return true; 960 } 961 962 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 963 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 964 Mask = maskTrailingOnes<unsigned>(Size); 965 int SignedMask = static_cast<int>(Mask); 966 return SignedMask >= -16 && SignedMask <= 64; 967 } 968 969 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 970 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 971 const DebugLoc &DL = I.getDebugLoc(); 972 MachineBasicBlock &MBB = *I.getParent(); 973 MachineFunction &MF = *MBB.getParent(); 974 MachineRegisterInfo &MRI = MF.getRegInfo(); 975 const unsigned DstReg = I.getOperand(0).getReg(); 976 const unsigned SrcReg = I.getOperand(1).getReg(); 977 978 const LLT DstTy = MRI.getType(DstReg); 979 const LLT SrcTy = MRI.getType(SrcReg); 980 const LLT S1 = LLT::scalar(1); 981 const unsigned SrcSize = SrcTy.getSizeInBits(); 982 const unsigned DstSize = DstTy.getSizeInBits(); 983 if (!DstTy.isScalar()) 984 return false; 985 986 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 987 988 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 989 if (SrcTy != S1 || DstSize > 64) // Invalid 990 return false; 991 992 unsigned Opcode = 993 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 994 const TargetRegisterClass *DstRC = 995 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 996 997 // FIXME: Create an extra copy to avoid incorrectly constraining the result 998 // of the scc producer. 999 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1000 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 1001 .addReg(SrcReg); 1002 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 1003 .addReg(TmpReg); 1004 1005 // The instruction operands are backwards from what you would expect. 1006 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 1007 .addImm(0) 1008 .addImm(Signed ? -1 : 1); 1009 I.eraseFromParent(); 1010 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1011 } 1012 1013 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 1014 if (SrcTy != S1) // Invalid 1015 return false; 1016 1017 MachineInstr *ExtI = 1018 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 1019 .addImm(0) // src0_modifiers 1020 .addImm(0) // src0 1021 .addImm(0) // src1_modifiers 1022 .addImm(Signed ? -1 : 1) // src1 1023 .addUse(SrcReg); 1024 I.eraseFromParent(); 1025 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1026 } 1027 1028 if (I.getOpcode() == AMDGPU::G_ANYEXT) 1029 return selectCOPY(I); 1030 1031 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 1032 // 64-bit should have been split up in RegBankSelect 1033 1034 // Try to use an and with a mask if it will save code size. 1035 unsigned Mask; 1036 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1037 MachineInstr *ExtI = 1038 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 1039 .addImm(Mask) 1040 .addReg(SrcReg); 1041 I.eraseFromParent(); 1042 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1043 } 1044 1045 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 1046 MachineInstr *ExtI = 1047 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 1048 .addReg(SrcReg) 1049 .addImm(0) // Offset 1050 .addImm(SrcSize); // Width 1051 I.eraseFromParent(); 1052 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1053 } 1054 1055 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1056 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 1057 return false; 1058 1059 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1060 const unsigned SextOpc = SrcSize == 8 ? 1061 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1062 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1063 .addReg(SrcReg); 1064 I.eraseFromParent(); 1065 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1066 } 1067 1068 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1069 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1070 1071 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1072 if (DstSize > 32 && SrcSize <= 32) { 1073 // We need a 64-bit register source, but the high bits don't matter. 1074 unsigned ExtReg 1075 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1076 unsigned UndefReg 1077 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1078 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1079 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1080 .addReg(SrcReg) 1081 .addImm(AMDGPU::sub0) 1082 .addReg(UndefReg) 1083 .addImm(AMDGPU::sub1); 1084 1085 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1086 .addReg(ExtReg) 1087 .addImm(SrcSize << 16); 1088 1089 I.eraseFromParent(); 1090 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 1091 } 1092 1093 unsigned Mask; 1094 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1095 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1096 .addReg(SrcReg) 1097 .addImm(Mask); 1098 } else { 1099 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1100 .addReg(SrcReg) 1101 .addImm(SrcSize << 16); 1102 } 1103 1104 I.eraseFromParent(); 1105 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1106 } 1107 1108 return false; 1109 } 1110 1111 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1112 MachineBasicBlock *BB = I.getParent(); 1113 MachineFunction *MF = BB->getParent(); 1114 MachineRegisterInfo &MRI = MF->getRegInfo(); 1115 MachineOperand &ImmOp = I.getOperand(1); 1116 1117 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1118 if (ImmOp.isFPImm()) { 1119 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1120 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1121 } else if (ImmOp.isCImm()) { 1122 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1123 } 1124 1125 unsigned DstReg = I.getOperand(0).getReg(); 1126 unsigned Size; 1127 bool IsSgpr; 1128 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 1129 if (RB) { 1130 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1131 Size = MRI.getType(DstReg).getSizeInBits(); 1132 } else { 1133 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 1134 IsSgpr = TRI.isSGPRClass(RC); 1135 Size = TRI.getRegSizeInBits(*RC); 1136 } 1137 1138 if (Size != 32 && Size != 64) 1139 return false; 1140 1141 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1142 if (Size == 32) { 1143 I.setDesc(TII.get(Opcode)); 1144 I.addImplicitDefUseOperands(*MF); 1145 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1146 } 1147 1148 DebugLoc DL = I.getDebugLoc(); 1149 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1150 &AMDGPU::VGPR_32RegClass; 1151 unsigned LoReg = MRI.createVirtualRegister(RC); 1152 unsigned HiReg = MRI.createVirtualRegister(RC); 1153 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1154 1155 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1156 .addImm(Imm.trunc(32).getZExtValue()); 1157 1158 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1159 .addImm(Imm.ashr(32).getZExtValue()); 1160 1161 const MachineInstr *RS = 1162 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1163 .addReg(LoReg) 1164 .addImm(AMDGPU::sub0) 1165 .addReg(HiReg) 1166 .addImm(AMDGPU::sub1); 1167 1168 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1169 // work for target independent opcodes 1170 I.eraseFromParent(); 1171 const TargetRegisterClass *DstRC = 1172 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 1173 if (!DstRC) 1174 return true; 1175 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1176 } 1177 1178 static bool isConstant(const MachineInstr &MI) { 1179 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1180 } 1181 1182 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1183 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1184 1185 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1186 1187 assert(PtrMI); 1188 1189 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1190 return; 1191 1192 GEPInfo GEPInfo(*PtrMI); 1193 1194 for (unsigned i = 1, e = 3; i < e; ++i) { 1195 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1196 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1197 assert(OpDef); 1198 if (isConstant(*OpDef)) { 1199 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 1200 // are lacking other optimizations. 1201 assert(GEPInfo.Imm == 0); 1202 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1203 continue; 1204 } 1205 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1206 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1207 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1208 else 1209 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1210 } 1211 1212 AddrInfo.push_back(GEPInfo); 1213 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1214 } 1215 1216 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1217 if (!MI.hasOneMemOperand()) 1218 return false; 1219 1220 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1221 const Value *Ptr = MMO->getValue(); 1222 1223 // UndefValue means this is a load of a kernel input. These are uniform. 1224 // Sometimes LDS instructions have constant pointers. 1225 // If Ptr is null, then that means this mem operand contains a 1226 // PseudoSourceValue like GOT. 1227 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1228 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1229 return true; 1230 1231 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1232 return true; 1233 1234 const Instruction *I = dyn_cast<Instruction>(Ptr); 1235 return I && I->getMetadata("amdgpu.uniform"); 1236 } 1237 1238 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1239 for (const GEPInfo &GEPInfo : AddrInfo) { 1240 if (!GEPInfo.VgprParts.empty()) 1241 return true; 1242 } 1243 return false; 1244 } 1245 1246 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 1247 // TODO: Can/should we insert m0 initialization here for DS instructions and 1248 // call the normal selector? 1249 return false; 1250 } 1251 1252 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1253 MachineBasicBlock *BB = I.getParent(); 1254 MachineFunction *MF = BB->getParent(); 1255 MachineRegisterInfo &MRI = MF->getRegInfo(); 1256 MachineOperand &CondOp = I.getOperand(0); 1257 Register CondReg = CondOp.getReg(); 1258 const DebugLoc &DL = I.getDebugLoc(); 1259 1260 unsigned BrOpcode; 1261 Register CondPhysReg; 1262 const TargetRegisterClass *ConstrainRC; 1263 1264 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1265 // whether the branch is uniform when selecting the instruction. In 1266 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1267 // RegBankSelect knows what it's doing if the branch condition is scc, even 1268 // though it currently does not. 1269 if (isSCC(CondReg, MRI)) { 1270 CondPhysReg = AMDGPU::SCC; 1271 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1272 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1273 } else if (isVCC(CondReg, MRI)) { 1274 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1275 // We sort of know that a VCC producer based on the register bank, that ands 1276 // inactive lanes with 0. What if there was a logical operation with vcc 1277 // producers in different blocks/with different exec masks? 1278 // FIXME: Should scc->vcc copies and with exec? 1279 CondPhysReg = TRI.getVCC(); 1280 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1281 ConstrainRC = TRI.getBoolRC(); 1282 } else 1283 return false; 1284 1285 if (!MRI.getRegClassOrNull(CondReg)) 1286 MRI.setRegClass(CondReg, ConstrainRC); 1287 1288 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1289 .addReg(CondReg); 1290 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1291 .addMBB(I.getOperand(1).getMBB()); 1292 1293 I.eraseFromParent(); 1294 return true; 1295 } 1296 1297 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1298 MachineBasicBlock *BB = I.getParent(); 1299 MachineFunction *MF = BB->getParent(); 1300 MachineRegisterInfo &MRI = MF->getRegInfo(); 1301 1302 Register DstReg = I.getOperand(0).getReg(); 1303 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1304 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1305 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1306 if (IsVGPR) 1307 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1308 1309 return RBI.constrainGenericRegister( 1310 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1311 } 1312 1313 bool AMDGPUInstructionSelector::select(MachineInstr &I, 1314 CodeGenCoverage &CoverageInfo) const { 1315 if (I.isPHI()) 1316 return selectPHI(I); 1317 1318 if (!isPreISelGenericOpcode(I.getOpcode())) { 1319 if (I.isCopy()) 1320 return selectCOPY(I); 1321 return true; 1322 } 1323 1324 switch (I.getOpcode()) { 1325 case TargetOpcode::G_AND: 1326 case TargetOpcode::G_OR: 1327 case TargetOpcode::G_XOR: 1328 if (selectG_AND_OR_XOR(I)) 1329 return true; 1330 return selectImpl(I, CoverageInfo); 1331 case TargetOpcode::G_ADD: 1332 case TargetOpcode::G_SUB: 1333 if (selectG_ADD_SUB(I)) 1334 return true; 1335 LLVM_FALLTHROUGH; 1336 default: 1337 return selectImpl(I, CoverageInfo); 1338 case TargetOpcode::G_INTTOPTR: 1339 case TargetOpcode::G_BITCAST: 1340 return selectCOPY(I); 1341 case TargetOpcode::G_CONSTANT: 1342 case TargetOpcode::G_FCONSTANT: 1343 return selectG_CONSTANT(I); 1344 case TargetOpcode::G_EXTRACT: 1345 return selectG_EXTRACT(I); 1346 case TargetOpcode::G_MERGE_VALUES: 1347 case TargetOpcode::G_BUILD_VECTOR: 1348 case TargetOpcode::G_CONCAT_VECTORS: 1349 return selectG_MERGE_VALUES(I); 1350 case TargetOpcode::G_UNMERGE_VALUES: 1351 return selectG_UNMERGE_VALUES(I); 1352 case TargetOpcode::G_GEP: 1353 return selectG_GEP(I); 1354 case TargetOpcode::G_IMPLICIT_DEF: 1355 return selectG_IMPLICIT_DEF(I); 1356 case TargetOpcode::G_INSERT: 1357 return selectG_INSERT(I); 1358 case TargetOpcode::G_INTRINSIC: 1359 return selectG_INTRINSIC(I, CoverageInfo); 1360 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1361 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 1362 case TargetOpcode::G_ICMP: 1363 if (selectG_ICMP(I)) 1364 return true; 1365 return selectImpl(I, CoverageInfo); 1366 case TargetOpcode::G_LOAD: 1367 return selectImpl(I, CoverageInfo); 1368 case TargetOpcode::G_SELECT: 1369 return selectG_SELECT(I); 1370 case TargetOpcode::G_STORE: 1371 if (selectImpl(I, CoverageInfo)) 1372 return true; 1373 return selectG_STORE(I); 1374 case TargetOpcode::G_TRUNC: 1375 return selectG_TRUNC(I); 1376 case TargetOpcode::G_SEXT: 1377 case TargetOpcode::G_ZEXT: 1378 case TargetOpcode::G_ANYEXT: 1379 return selectG_SZA_EXT(I); 1380 case TargetOpcode::G_BRCOND: 1381 return selectG_BRCOND(I); 1382 case TargetOpcode::G_FRAME_INDEX: 1383 return selectG_FRAME_INDEX(I); 1384 case TargetOpcode::G_FENCE: 1385 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1386 // is checking for G_CONSTANT 1387 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1388 return true; 1389 } 1390 return false; 1391 } 1392 1393 InstructionSelector::ComplexRendererFns 1394 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1395 return {{ 1396 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1397 }}; 1398 1399 } 1400 1401 std::pair<Register, unsigned> 1402 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1403 Register Src, const MachineRegisterInfo &MRI) const { 1404 unsigned Mods = 0; 1405 MachineInstr *MI = MRI.getVRegDef(Src); 1406 1407 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1408 Src = MI->getOperand(1).getReg(); 1409 Mods |= SISrcMods::NEG; 1410 MI = MRI.getVRegDef(Src); 1411 } 1412 1413 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1414 Src = MI->getOperand(1).getReg(); 1415 Mods |= SISrcMods::ABS; 1416 } 1417 1418 return std::make_pair(Src, Mods); 1419 } 1420 1421 /// 1422 /// This will select either an SGPR or VGPR operand and will save us from 1423 /// having to write an extra tablegen pattern. 1424 InstructionSelector::ComplexRendererFns 1425 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1426 return {{ 1427 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1428 }}; 1429 } 1430 1431 InstructionSelector::ComplexRendererFns 1432 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1433 MachineRegisterInfo &MRI 1434 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1435 1436 Register Src; 1437 unsigned Mods; 1438 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1439 1440 return {{ 1441 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1442 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1443 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1444 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1445 }}; 1446 } 1447 InstructionSelector::ComplexRendererFns 1448 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1449 return {{ 1450 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1451 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1452 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1453 }}; 1454 } 1455 1456 InstructionSelector::ComplexRendererFns 1457 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1458 MachineRegisterInfo &MRI 1459 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1460 1461 Register Src; 1462 unsigned Mods; 1463 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1464 1465 return {{ 1466 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1467 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1468 }}; 1469 } 1470 1471 InstructionSelector::ComplexRendererFns 1472 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1473 MachineRegisterInfo &MRI = 1474 Root.getParent()->getParent()->getParent()->getRegInfo(); 1475 1476 SmallVector<GEPInfo, 4> AddrInfo; 1477 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1478 1479 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1480 return None; 1481 1482 const GEPInfo &GEPInfo = AddrInfo[0]; 1483 1484 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1485 return None; 1486 1487 unsigned PtrReg = GEPInfo.SgprParts[0]; 1488 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1489 return {{ 1490 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1491 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1492 }}; 1493 } 1494 1495 InstructionSelector::ComplexRendererFns 1496 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1497 MachineRegisterInfo &MRI = 1498 Root.getParent()->getParent()->getParent()->getRegInfo(); 1499 1500 SmallVector<GEPInfo, 4> AddrInfo; 1501 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1502 1503 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1504 return None; 1505 1506 const GEPInfo &GEPInfo = AddrInfo[0]; 1507 unsigned PtrReg = GEPInfo.SgprParts[0]; 1508 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1509 if (!isUInt<32>(EncodedImm)) 1510 return None; 1511 1512 return {{ 1513 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1514 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1515 }}; 1516 } 1517 1518 InstructionSelector::ComplexRendererFns 1519 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1520 MachineInstr *MI = Root.getParent(); 1521 MachineBasicBlock *MBB = MI->getParent(); 1522 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1523 1524 SmallVector<GEPInfo, 4> AddrInfo; 1525 getAddrModeInfo(*MI, MRI, AddrInfo); 1526 1527 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1528 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1529 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1530 return None; 1531 1532 const GEPInfo &GEPInfo = AddrInfo[0]; 1533 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1534 return None; 1535 1536 // If we make it this far we have a load with an 32-bit immediate offset. 1537 // It is OK to select this using a sgpr offset, because we have already 1538 // failed trying to select this load into one of the _IMM variants since 1539 // the _IMM Patterns are considered before the _SGPR patterns. 1540 unsigned PtrReg = GEPInfo.SgprParts[0]; 1541 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1542 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1543 .addImm(GEPInfo.Imm); 1544 return {{ 1545 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1546 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1547 }}; 1548 } 1549 1550 template <bool Signed> 1551 InstructionSelector::ComplexRendererFns 1552 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1553 MachineInstr *MI = Root.getParent(); 1554 MachineBasicBlock *MBB = MI->getParent(); 1555 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1556 1557 InstructionSelector::ComplexRendererFns Default = {{ 1558 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1559 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1560 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1561 }}; 1562 1563 if (!STI.hasFlatInstOffsets()) 1564 return Default; 1565 1566 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); 1567 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1568 return Default; 1569 1570 Optional<int64_t> Offset = 1571 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); 1572 if (!Offset.hasValue()) 1573 return Default; 1574 1575 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1576 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1577 return Default; 1578 1579 Register BasePtr = OpDef->getOperand(1).getReg(); 1580 1581 return {{ 1582 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1583 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1584 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1585 }}; 1586 } 1587 1588 InstructionSelector::ComplexRendererFns 1589 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1590 return selectFlatOffsetImpl<false>(Root); 1591 } 1592 1593 InstructionSelector::ComplexRendererFns 1594 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1595 return selectFlatOffsetImpl<true>(Root); 1596 } 1597 1598 // FIXME: Implement 1599 static bool signBitIsZero(const MachineOperand &Op, 1600 const MachineRegisterInfo &MRI) { 1601 return false; 1602 } 1603 1604 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1605 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1606 return PSV && PSV->isStack(); 1607 } 1608 1609 InstructionSelector::ComplexRendererFns 1610 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1611 MachineInstr *MI = Root.getParent(); 1612 MachineBasicBlock *MBB = MI->getParent(); 1613 MachineFunction *MF = MBB->getParent(); 1614 MachineRegisterInfo &MRI = MF->getRegInfo(); 1615 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1616 1617 int64_t Offset = 0; 1618 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { 1619 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1620 1621 // TODO: Should this be inside the render function? The iterator seems to 1622 // move. 1623 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1624 HighBits) 1625 .addImm(Offset & ~4095); 1626 1627 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1628 MIB.addReg(Info->getScratchRSrcReg()); 1629 }, 1630 [=](MachineInstrBuilder &MIB) { // vaddr 1631 MIB.addReg(HighBits); 1632 }, 1633 [=](MachineInstrBuilder &MIB) { // soffset 1634 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1635 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1636 1637 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1638 ? Info->getStackPtrOffsetReg() 1639 : Info->getScratchWaveOffsetReg(); 1640 MIB.addReg(SOffsetReg); 1641 }, 1642 [=](MachineInstrBuilder &MIB) { // offset 1643 MIB.addImm(Offset & 4095); 1644 }}}; 1645 } 1646 1647 assert(Offset == 0); 1648 1649 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1650 // offsets. 1651 Optional<int> FI; 1652 Register VAddr = Root.getReg(); 1653 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { 1654 if (isBaseWithConstantOffset(Root, MRI)) { 1655 const MachineOperand &LHS = RootDef->getOperand(1); 1656 const MachineOperand &RHS = RootDef->getOperand(2); 1657 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1658 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1659 if (LHSDef && RHSDef) { 1660 int64_t PossibleOffset = 1661 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1662 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1663 (!STI.privateMemoryResourceIsRangeChecked() || 1664 signBitIsZero(LHS, MRI))) { 1665 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1666 FI = LHSDef->getOperand(1).getIndex(); 1667 else 1668 VAddr = LHS.getReg(); 1669 Offset = PossibleOffset; 1670 } 1671 } 1672 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1673 FI = RootDef->getOperand(1).getIndex(); 1674 } 1675 } 1676 1677 // If we don't know this private access is a local stack object, it needs to 1678 // be relative to the entry point's scratch wave offset register. 1679 // TODO: Should split large offsets that don't fit like above. 1680 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1681 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1682 : Info->getScratchWaveOffsetReg(); 1683 1684 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1685 MIB.addReg(Info->getScratchRSrcReg()); 1686 }, 1687 [=](MachineInstrBuilder &MIB) { // vaddr 1688 if (FI.hasValue()) 1689 MIB.addFrameIndex(FI.getValue()); 1690 else 1691 MIB.addReg(VAddr); 1692 }, 1693 [=](MachineInstrBuilder &MIB) { // soffset 1694 MIB.addReg(SOffset); 1695 }, 1696 [=](MachineInstrBuilder &MIB) { // offset 1697 MIB.addImm(Offset); 1698 }}}; 1699 } 1700 1701 InstructionSelector::ComplexRendererFns 1702 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1703 MachineOperand &Root) const { 1704 MachineInstr *MI = Root.getParent(); 1705 MachineBasicBlock *MBB = MI->getParent(); 1706 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1707 1708 int64_t Offset = 0; 1709 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || 1710 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1711 return {}; 1712 1713 const MachineFunction *MF = MBB->getParent(); 1714 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1715 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1716 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1717 1718 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1719 ? Info->getStackPtrOffsetReg() 1720 : Info->getScratchWaveOffsetReg(); 1721 return {{ 1722 [=](MachineInstrBuilder &MIB) { 1723 MIB.addReg(Info->getScratchRSrcReg()); 1724 }, // rsrc 1725 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 1726 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 1727 }}; 1728 } 1729