1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/GlobalISel/Utils.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 #define DEBUG_TYPE "amdgpu-isel" 37 38 using namespace llvm; 39 using namespace MIPatternMatch; 40 41 #define GET_GLOBALISEL_IMPL 42 #define AMDGPUSubtarget GCNSubtarget 43 #include "AMDGPUGenGlobalISel.inc" 44 #undef GET_GLOBALISEL_IMPL 45 #undef AMDGPUSubtarget 46 47 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 48 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 49 const AMDGPUTargetMachine &TM) 50 : InstructionSelector(), TII(*STI.getInstrInfo()), 51 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 52 STI(STI), 53 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 54 #define GET_GLOBALISEL_PREDICATES_INIT 55 #include "AMDGPUGenGlobalISel.inc" 56 #undef GET_GLOBALISEL_PREDICATES_INIT 57 #define GET_GLOBALISEL_TEMPORARIES_INIT 58 #include "AMDGPUGenGlobalISel.inc" 59 #undef GET_GLOBALISEL_TEMPORARIES_INIT 60 { 61 } 62 63 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 64 65 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 66 if (Register::isPhysicalRegister(Reg)) 67 return Reg == AMDGPU::SCC; 68 69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 70 const TargetRegisterClass *RC = 71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 72 if (RC) { 73 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 74 // context of the register bank has been lost. 75 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 76 return false; 77 const LLT Ty = MRI.getType(Reg); 78 return Ty.isValid() && Ty.getSizeInBits() == 1; 79 } 80 81 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 82 return RB->getID() == AMDGPU::SCCRegBankID; 83 } 84 85 bool AMDGPUInstructionSelector::isVCC(Register Reg, 86 const MachineRegisterInfo &MRI) const { 87 if (Register::isPhysicalRegister(Reg)) 88 return Reg == TRI.getVCC(); 89 90 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 91 const TargetRegisterClass *RC = 92 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 93 if (RC) { 94 const LLT Ty = MRI.getType(Reg); 95 return RC->hasSuperClassEq(TRI.getBoolRC()) && 96 Ty.isValid() && Ty.getSizeInBits() == 1; 97 } 98 99 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 100 return RB->getID() == AMDGPU::VCCRegBankID; 101 } 102 103 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 104 const DebugLoc &DL = I.getDebugLoc(); 105 MachineBasicBlock *BB = I.getParent(); 106 MachineFunction *MF = BB->getParent(); 107 MachineRegisterInfo &MRI = MF->getRegInfo(); 108 I.setDesc(TII.get(TargetOpcode::COPY)); 109 110 const MachineOperand &Src = I.getOperand(1); 111 MachineOperand &Dst = I.getOperand(0); 112 Register DstReg = Dst.getReg(); 113 Register SrcReg = Src.getReg(); 114 115 if (isVCC(DstReg, MRI)) { 116 if (SrcReg == AMDGPU::SCC) { 117 const TargetRegisterClass *RC 118 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 119 if (!RC) 120 return true; 121 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 122 } 123 124 if (!isVCC(SrcReg, MRI)) { 125 // TODO: Should probably leave the copy and let copyPhysReg expand it. 126 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) 127 return false; 128 129 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 130 .addImm(0) 131 .addReg(SrcReg); 132 133 if (!MRI.getRegClassOrNull(SrcReg)) 134 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 135 I.eraseFromParent(); 136 return true; 137 } 138 139 const TargetRegisterClass *RC = 140 TRI.getConstrainedRegClassForOperand(Dst, MRI); 141 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) 142 return false; 143 144 // Don't constrain the source register to a class so the def instruction 145 // handles it (unless it's undef). 146 // 147 // FIXME: This is a hack. When selecting the def, we neeed to know 148 // specifically know that the result is VCCRegBank, and not just an SGPR 149 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 150 if (Src.isUndef()) { 151 const TargetRegisterClass *SrcRC = 152 TRI.getConstrainedRegClassForOperand(Src, MRI); 153 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 154 return false; 155 } 156 157 return true; 158 } 159 160 for (const MachineOperand &MO : I.operands()) { 161 if (Register::isPhysicalRegister(MO.getReg())) 162 continue; 163 164 const TargetRegisterClass *RC = 165 TRI.getConstrainedRegClassForOperand(MO, MRI); 166 if (!RC) 167 continue; 168 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 169 } 170 return true; 171 } 172 173 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 174 MachineBasicBlock *BB = I.getParent(); 175 MachineFunction *MF = BB->getParent(); 176 MachineRegisterInfo &MRI = MF->getRegInfo(); 177 178 const Register DefReg = I.getOperand(0).getReg(); 179 const LLT DefTy = MRI.getType(DefReg); 180 181 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 182 183 const RegClassOrRegBank &RegClassOrBank = 184 MRI.getRegClassOrRegBank(DefReg); 185 186 const TargetRegisterClass *DefRC 187 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 188 if (!DefRC) { 189 if (!DefTy.isValid()) { 190 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 191 return false; 192 } 193 194 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 195 if (RB.getID() == AMDGPU::SCCRegBankID) { 196 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 197 return false; 198 } 199 200 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 201 if (!DefRC) { 202 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 203 return false; 204 } 205 } 206 207 I.setDesc(TII.get(TargetOpcode::PHI)); 208 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 209 } 210 211 MachineOperand 212 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 213 const TargetRegisterClass &SubRC, 214 unsigned SubIdx) const { 215 216 MachineInstr *MI = MO.getParent(); 217 MachineBasicBlock *BB = MO.getParent()->getParent(); 218 MachineFunction *MF = BB->getParent(); 219 MachineRegisterInfo &MRI = MF->getRegInfo(); 220 Register DstReg = MRI.createVirtualRegister(&SubRC); 221 222 if (MO.isReg()) { 223 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 224 Register Reg = MO.getReg(); 225 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 226 .addReg(Reg, 0, ComposedSubIdx); 227 228 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 229 MO.isKill(), MO.isDead(), MO.isUndef(), 230 MO.isEarlyClobber(), 0, MO.isDebug(), 231 MO.isInternalRead()); 232 } 233 234 assert(MO.isImm()); 235 236 APInt Imm(64, MO.getImm()); 237 238 switch (SubIdx) { 239 default: 240 llvm_unreachable("do not know to split immediate with this sub index."); 241 case AMDGPU::sub0: 242 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 243 case AMDGPU::sub1: 244 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 245 } 246 } 247 248 static int64_t getConstant(const MachineInstr *MI) { 249 return MI->getOperand(1).getCImm()->getSExtValue(); 250 } 251 252 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 253 switch (Opc) { 254 case AMDGPU::G_AND: 255 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 256 case AMDGPU::G_OR: 257 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 258 case AMDGPU::G_XOR: 259 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 260 default: 261 llvm_unreachable("not a bit op"); 262 } 263 } 264 265 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 266 MachineBasicBlock *BB = I.getParent(); 267 MachineFunction *MF = BB->getParent(); 268 MachineRegisterInfo &MRI = MF->getRegInfo(); 269 MachineOperand &Dst = I.getOperand(0); 270 MachineOperand &Src0 = I.getOperand(1); 271 MachineOperand &Src1 = I.getOperand(2); 272 Register DstReg = Dst.getReg(); 273 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 274 275 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 276 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 277 const TargetRegisterClass *RC = TRI.getBoolRC(); 278 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 279 RC == &AMDGPU::SReg_64RegClass); 280 I.setDesc(TII.get(InstOpc)); 281 282 // FIXME: Hack to avoid turning the register bank into a register class. 283 // The selector for G_ICMP relies on seeing the register bank for the result 284 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 285 // be ambiguous whether it's a scalar or vector bool. 286 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) 287 MRI.setRegClass(Src0.getReg(), RC); 288 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) 289 MRI.setRegClass(Src1.getReg(), RC); 290 291 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 292 } 293 294 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 295 // the result? 296 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 297 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 298 I.setDesc(TII.get(InstOpc)); 299 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 300 } 301 302 return false; 303 } 304 305 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 306 MachineBasicBlock *BB = I.getParent(); 307 MachineFunction *MF = BB->getParent(); 308 MachineRegisterInfo &MRI = MF->getRegInfo(); 309 Register DstReg = I.getOperand(0).getReg(); 310 const DebugLoc &DL = I.getDebugLoc(); 311 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 312 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 313 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 314 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 315 316 if (Size == 32) { 317 if (IsSALU) { 318 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 319 MachineInstr *Add = 320 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 321 .add(I.getOperand(1)) 322 .add(I.getOperand(2)); 323 I.eraseFromParent(); 324 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 325 } 326 327 if (STI.hasAddNoCarry()) { 328 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 329 I.setDesc(TII.get(Opc)); 330 I.addOperand(*MF, MachineOperand::CreateImm(0)); 331 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 332 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 333 } 334 335 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 336 337 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 338 MachineInstr *Add 339 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 340 .addDef(UnusedCarry, RegState::Dead) 341 .add(I.getOperand(1)) 342 .add(I.getOperand(2)) 343 .addImm(0); 344 I.eraseFromParent(); 345 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 346 } 347 348 assert(!Sub && "illegal sub should not reach here"); 349 350 const TargetRegisterClass &RC 351 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 352 const TargetRegisterClass &HalfRC 353 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 354 355 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 356 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 357 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 358 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 359 360 Register DstLo = MRI.createVirtualRegister(&HalfRC); 361 Register DstHi = MRI.createVirtualRegister(&HalfRC); 362 363 if (IsSALU) { 364 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 365 .add(Lo1) 366 .add(Lo2); 367 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 368 .add(Hi1) 369 .add(Hi2); 370 } else { 371 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 372 Register CarryReg = MRI.createVirtualRegister(CarryRC); 373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 374 .addDef(CarryReg) 375 .add(Lo1) 376 .add(Lo2) 377 .addImm(0); 378 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 379 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 380 .add(Hi1) 381 .add(Hi2) 382 .addReg(CarryReg, RegState::Kill) 383 .addImm(0); 384 385 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 386 return false; 387 } 388 389 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 390 .addReg(DstLo) 391 .addImm(AMDGPU::sub0) 392 .addReg(DstHi) 393 .addImm(AMDGPU::sub1); 394 395 396 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 397 return false; 398 399 I.eraseFromParent(); 400 return true; 401 } 402 403 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 404 MachineBasicBlock *BB = I.getParent(); 405 MachineFunction *MF = BB->getParent(); 406 MachineRegisterInfo &MRI = MF->getRegInfo(); 407 assert(I.getOperand(2).getImm() % 32 == 0); 408 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 409 const DebugLoc &DL = I.getDebugLoc(); 410 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 411 I.getOperand(0).getReg()) 412 .addReg(I.getOperand(1).getReg(), 0, SubReg); 413 414 for (const MachineOperand &MO : Copy->operands()) { 415 const TargetRegisterClass *RC = 416 TRI.getConstrainedRegClassForOperand(MO, MRI); 417 if (!RC) 418 continue; 419 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 420 } 421 I.eraseFromParent(); 422 return true; 423 } 424 425 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 426 MachineBasicBlock *BB = MI.getParent(); 427 MachineFunction *MF = BB->getParent(); 428 MachineRegisterInfo &MRI = MF->getRegInfo(); 429 Register DstReg = MI.getOperand(0).getReg(); 430 LLT DstTy = MRI.getType(DstReg); 431 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 432 433 const unsigned SrcSize = SrcTy.getSizeInBits(); 434 if (SrcSize < 32) 435 return false; 436 437 const DebugLoc &DL = MI.getDebugLoc(); 438 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); 439 const unsigned DstSize = DstTy.getSizeInBits(); 440 const TargetRegisterClass *DstRC = 441 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); 442 if (!DstRC) 443 return false; 444 445 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 446 MachineInstrBuilder MIB = 447 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 448 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 449 MachineOperand &Src = MI.getOperand(I + 1); 450 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 451 MIB.addImm(SubRegs[I]); 452 453 const TargetRegisterClass *SrcRC 454 = TRI.getConstrainedRegClassForOperand(Src, MRI); 455 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) 456 return false; 457 } 458 459 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) 460 return false; 461 462 MI.eraseFromParent(); 463 return true; 464 } 465 466 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 467 MachineBasicBlock *BB = MI.getParent(); 468 MachineFunction *MF = BB->getParent(); 469 MachineRegisterInfo &MRI = MF->getRegInfo(); 470 const int NumDst = MI.getNumOperands() - 1; 471 472 MachineOperand &Src = MI.getOperand(NumDst); 473 474 Register SrcReg = Src.getReg(); 475 Register DstReg0 = MI.getOperand(0).getReg(); 476 LLT DstTy = MRI.getType(DstReg0); 477 LLT SrcTy = MRI.getType(SrcReg); 478 479 const unsigned DstSize = DstTy.getSizeInBits(); 480 const unsigned SrcSize = SrcTy.getSizeInBits(); 481 const DebugLoc &DL = MI.getDebugLoc(); 482 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 483 484 const TargetRegisterClass *SrcRC = 485 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); 486 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 487 return false; 488 489 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 490 491 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 492 // source, and this relies on the fact that the same subregister indices are 493 // used for both. 494 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 495 for (int I = 0, E = NumDst; I != E; ++I) { 496 MachineOperand &Dst = MI.getOperand(I); 497 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 498 .addReg(SrcReg, SrcFlags, SubRegs[I]); 499 500 const TargetRegisterClass *DstRC = 501 TRI.getConstrainedRegClassForOperand(Dst, MRI); 502 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) 503 return false; 504 } 505 506 MI.eraseFromParent(); 507 return true; 508 } 509 510 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 511 return selectG_ADD_SUB(I); 512 } 513 514 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 515 MachineBasicBlock *BB = I.getParent(); 516 MachineFunction *MF = BB->getParent(); 517 MachineRegisterInfo &MRI = MF->getRegInfo(); 518 const MachineOperand &MO = I.getOperand(0); 519 520 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 521 // regbank check here is to know why getConstrainedRegClassForOperand failed. 522 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 523 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 524 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 525 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 526 return true; 527 } 528 529 return false; 530 } 531 532 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 533 MachineBasicBlock *BB = I.getParent(); 534 MachineFunction *MF = BB->getParent(); 535 MachineRegisterInfo &MRI = MF->getRegInfo(); 536 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 537 DebugLoc DL = I.getDebugLoc(); 538 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 539 .addDef(I.getOperand(0).getReg()) 540 .addReg(I.getOperand(1).getReg()) 541 .addReg(I.getOperand(2).getReg()) 542 .addImm(SubReg); 543 544 for (const MachineOperand &MO : Ins->operands()) { 545 if (!MO.isReg()) 546 continue; 547 if (Register::isPhysicalRegister(MO.getReg())) 548 continue; 549 550 const TargetRegisterClass *RC = 551 TRI.getConstrainedRegClassForOperand(MO, MRI); 552 if (!RC) 553 continue; 554 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 555 } 556 I.eraseFromParent(); 557 return true; 558 } 559 560 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { 561 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 562 switch (IntrinsicID) { 563 case Intrinsic::amdgcn_if_break: { 564 MachineBasicBlock *BB = I.getParent(); 565 MachineFunction *MF = BB->getParent(); 566 MachineRegisterInfo &MRI = MF->getRegInfo(); 567 568 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 569 // SelectionDAG uses for wave32 vs wave64. 570 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 571 .add(I.getOperand(0)) 572 .add(I.getOperand(2)) 573 .add(I.getOperand(3)); 574 575 Register DstReg = I.getOperand(0).getReg(); 576 Register Src0Reg = I.getOperand(2).getReg(); 577 Register Src1Reg = I.getOperand(3).getReg(); 578 579 I.eraseFromParent(); 580 581 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 582 if (!MRI.getRegClassOrNull(Reg)) 583 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 584 } 585 586 return true; 587 } 588 default: 589 return selectImpl(I, *CoverageInfo); 590 } 591 } 592 593 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 594 if (Size != 32 && Size != 64) 595 return -1; 596 switch (P) { 597 default: 598 llvm_unreachable("Unknown condition code!"); 599 case CmpInst::ICMP_NE: 600 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 601 case CmpInst::ICMP_EQ: 602 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 603 case CmpInst::ICMP_SGT: 604 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 605 case CmpInst::ICMP_SGE: 606 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 607 case CmpInst::ICMP_SLT: 608 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 609 case CmpInst::ICMP_SLE: 610 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 611 case CmpInst::ICMP_UGT: 612 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 613 case CmpInst::ICMP_UGE: 614 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 615 case CmpInst::ICMP_ULT: 616 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 617 case CmpInst::ICMP_ULE: 618 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 619 } 620 } 621 622 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 623 unsigned Size) const { 624 if (Size == 64) { 625 if (!STI.hasScalarCompareEq64()) 626 return -1; 627 628 switch (P) { 629 case CmpInst::ICMP_NE: 630 return AMDGPU::S_CMP_LG_U64; 631 case CmpInst::ICMP_EQ: 632 return AMDGPU::S_CMP_EQ_U64; 633 default: 634 return -1; 635 } 636 } 637 638 if (Size != 32) 639 return -1; 640 641 switch (P) { 642 case CmpInst::ICMP_NE: 643 return AMDGPU::S_CMP_LG_U32; 644 case CmpInst::ICMP_EQ: 645 return AMDGPU::S_CMP_EQ_U32; 646 case CmpInst::ICMP_SGT: 647 return AMDGPU::S_CMP_GT_I32; 648 case CmpInst::ICMP_SGE: 649 return AMDGPU::S_CMP_GE_I32; 650 case CmpInst::ICMP_SLT: 651 return AMDGPU::S_CMP_LT_I32; 652 case CmpInst::ICMP_SLE: 653 return AMDGPU::S_CMP_LE_I32; 654 case CmpInst::ICMP_UGT: 655 return AMDGPU::S_CMP_GT_U32; 656 case CmpInst::ICMP_UGE: 657 return AMDGPU::S_CMP_GE_U32; 658 case CmpInst::ICMP_ULT: 659 return AMDGPU::S_CMP_LT_U32; 660 case CmpInst::ICMP_ULE: 661 return AMDGPU::S_CMP_LE_U32; 662 default: 663 llvm_unreachable("Unknown condition code!"); 664 } 665 } 666 667 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 668 MachineBasicBlock *BB = I.getParent(); 669 MachineFunction *MF = BB->getParent(); 670 MachineRegisterInfo &MRI = MF->getRegInfo(); 671 const DebugLoc &DL = I.getDebugLoc(); 672 673 Register SrcReg = I.getOperand(2).getReg(); 674 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 675 676 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 677 678 Register CCReg = I.getOperand(0).getReg(); 679 if (isSCC(CCReg, MRI)) { 680 int Opcode = getS_CMPOpcode(Pred, Size); 681 if (Opcode == -1) 682 return false; 683 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 684 .add(I.getOperand(2)) 685 .add(I.getOperand(3)); 686 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 687 .addReg(AMDGPU::SCC); 688 bool Ret = 689 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 690 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 691 I.eraseFromParent(); 692 return Ret; 693 } 694 695 int Opcode = getV_CMPOpcode(Pred, Size); 696 if (Opcode == -1) 697 return false; 698 699 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 700 I.getOperand(0).getReg()) 701 .add(I.getOperand(2)) 702 .add(I.getOperand(3)); 703 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 704 *TRI.getBoolRC(), MRI); 705 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 706 I.eraseFromParent(); 707 return Ret; 708 } 709 710 static MachineInstr * 711 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 712 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 713 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 714 const DebugLoc &DL = Insert->getDebugLoc(); 715 MachineBasicBlock &BB = *Insert->getParent(); 716 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 717 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 718 .addImm(Tgt) 719 .addReg(Reg0) 720 .addReg(Reg1) 721 .addReg(Reg2) 722 .addReg(Reg3) 723 .addImm(VM) 724 .addImm(Compr) 725 .addImm(Enabled); 726 } 727 728 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 729 MachineInstr &I) const { 730 MachineBasicBlock *BB = I.getParent(); 731 MachineFunction *MF = BB->getParent(); 732 MachineRegisterInfo &MRI = MF->getRegInfo(); 733 734 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 735 switch (IntrinsicID) { 736 case Intrinsic::amdgcn_exp: { 737 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 738 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 739 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 740 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 741 742 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 743 I.getOperand(4).getReg(), 744 I.getOperand(5).getReg(), 745 I.getOperand(6).getReg(), 746 VM, false, Enabled, Done); 747 748 I.eraseFromParent(); 749 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 750 } 751 case Intrinsic::amdgcn_exp_compr: { 752 const DebugLoc &DL = I.getDebugLoc(); 753 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 754 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 755 Register Reg0 = I.getOperand(3).getReg(); 756 Register Reg1 = I.getOperand(4).getReg(); 757 Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 758 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 759 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 760 761 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 762 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 763 true, Enabled, Done); 764 765 I.eraseFromParent(); 766 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 767 } 768 case Intrinsic::amdgcn_end_cf: { 769 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 770 // SelectionDAG uses for wave32 vs wave64. 771 BuildMI(*BB, &I, I.getDebugLoc(), 772 TII.get(AMDGPU::SI_END_CF)) 773 .add(I.getOperand(1)); 774 775 Register Reg = I.getOperand(1).getReg(); 776 I.eraseFromParent(); 777 778 if (!MRI.getRegClassOrNull(Reg)) 779 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 780 return true; 781 } 782 default: 783 return selectImpl(I, *CoverageInfo); 784 } 785 } 786 787 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 788 MachineBasicBlock *BB = I.getParent(); 789 MachineFunction *MF = BB->getParent(); 790 MachineRegisterInfo &MRI = MF->getRegInfo(); 791 const DebugLoc &DL = I.getDebugLoc(); 792 793 Register DstReg = I.getOperand(0).getReg(); 794 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 795 assert(Size <= 32 || Size == 64); 796 const MachineOperand &CCOp = I.getOperand(1); 797 Register CCReg = CCOp.getReg(); 798 if (isSCC(CCReg, MRI)) { 799 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 800 AMDGPU::S_CSELECT_B32; 801 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 802 .addReg(CCReg); 803 804 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 805 // bank, because it does not cover the register class that we used to represent 806 // for it. So we need to manually set the register class here. 807 if (!MRI.getRegClassOrNull(CCReg)) 808 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 809 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 810 .add(I.getOperand(2)) 811 .add(I.getOperand(3)); 812 813 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 814 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 815 I.eraseFromParent(); 816 return Ret; 817 } 818 819 // Wide VGPR select should have been split in RegBankSelect. 820 if (Size > 32) 821 return false; 822 823 MachineInstr *Select = 824 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 825 .addImm(0) 826 .add(I.getOperand(3)) 827 .addImm(0) 828 .add(I.getOperand(2)) 829 .add(I.getOperand(1)); 830 831 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 832 I.eraseFromParent(); 833 return Ret; 834 } 835 836 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 837 initM0(I); 838 return selectImpl(I, *CoverageInfo); 839 } 840 841 static int sizeToSubRegIndex(unsigned Size) { 842 switch (Size) { 843 case 32: 844 return AMDGPU::sub0; 845 case 64: 846 return AMDGPU::sub0_sub1; 847 case 96: 848 return AMDGPU::sub0_sub1_sub2; 849 case 128: 850 return AMDGPU::sub0_sub1_sub2_sub3; 851 case 256: 852 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 853 default: 854 if (Size < 32) 855 return AMDGPU::sub0; 856 if (Size > 256) 857 return -1; 858 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 859 } 860 } 861 862 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 863 MachineBasicBlock *BB = I.getParent(); 864 MachineFunction *MF = BB->getParent(); 865 MachineRegisterInfo &MRI = MF->getRegInfo(); 866 867 Register DstReg = I.getOperand(0).getReg(); 868 Register SrcReg = I.getOperand(1).getReg(); 869 const LLT DstTy = MRI.getType(DstReg); 870 const LLT SrcTy = MRI.getType(SrcReg); 871 if (!DstTy.isScalar()) 872 return false; 873 874 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 875 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 876 if (SrcRB != DstRB) 877 return false; 878 879 unsigned DstSize = DstTy.getSizeInBits(); 880 unsigned SrcSize = SrcTy.getSizeInBits(); 881 882 const TargetRegisterClass *SrcRC 883 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 884 const TargetRegisterClass *DstRC 885 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 886 887 if (SrcSize > 32) { 888 int SubRegIdx = sizeToSubRegIndex(DstSize); 889 if (SubRegIdx == -1) 890 return false; 891 892 // Deal with weird cases where the class only partially supports the subreg 893 // index. 894 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 895 if (!SrcRC) 896 return false; 897 898 I.getOperand(1).setSubReg(SubRegIdx); 899 } 900 901 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 902 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 903 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 904 return false; 905 } 906 907 I.setDesc(TII.get(TargetOpcode::COPY)); 908 return true; 909 } 910 911 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 912 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 913 Mask = maskTrailingOnes<unsigned>(Size); 914 int SignedMask = static_cast<int>(Mask); 915 return SignedMask >= -16 && SignedMask <= 64; 916 } 917 918 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 919 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 920 const DebugLoc &DL = I.getDebugLoc(); 921 MachineBasicBlock &MBB = *I.getParent(); 922 MachineFunction &MF = *MBB.getParent(); 923 MachineRegisterInfo &MRI = MF.getRegInfo(); 924 const Register DstReg = I.getOperand(0).getReg(); 925 const Register SrcReg = I.getOperand(1).getReg(); 926 927 const LLT DstTy = MRI.getType(DstReg); 928 const LLT SrcTy = MRI.getType(SrcReg); 929 const LLT S1 = LLT::scalar(1); 930 const unsigned SrcSize = SrcTy.getSizeInBits(); 931 const unsigned DstSize = DstTy.getSizeInBits(); 932 if (!DstTy.isScalar()) 933 return false; 934 935 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 936 937 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 938 if (SrcTy != S1 || DstSize > 64) // Invalid 939 return false; 940 941 unsigned Opcode = 942 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 943 const TargetRegisterClass *DstRC = 944 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 945 946 // FIXME: Create an extra copy to avoid incorrectly constraining the result 947 // of the scc producer. 948 Register TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 949 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 950 .addReg(SrcReg); 951 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 952 .addReg(TmpReg); 953 954 // The instruction operands are backwards from what you would expect. 955 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 956 .addImm(0) 957 .addImm(Signed ? -1 : 1); 958 I.eraseFromParent(); 959 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 960 } 961 962 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 963 if (SrcTy != S1) // Invalid 964 return false; 965 966 MachineInstr *ExtI = 967 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 968 .addImm(0) // src0_modifiers 969 .addImm(0) // src0 970 .addImm(0) // src1_modifiers 971 .addImm(Signed ? -1 : 1) // src1 972 .addUse(SrcReg); 973 I.eraseFromParent(); 974 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 975 } 976 977 if (I.getOpcode() == AMDGPU::G_ANYEXT) 978 return selectCOPY(I); 979 980 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 981 // 64-bit should have been split up in RegBankSelect 982 983 // Try to use an and with a mask if it will save code size. 984 unsigned Mask; 985 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 986 MachineInstr *ExtI = 987 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 988 .addImm(Mask) 989 .addReg(SrcReg); 990 I.eraseFromParent(); 991 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 992 } 993 994 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 995 MachineInstr *ExtI = 996 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 997 .addReg(SrcReg) 998 .addImm(0) // Offset 999 .addImm(SrcSize); // Width 1000 I.eraseFromParent(); 1001 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1002 } 1003 1004 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1005 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 1006 return false; 1007 1008 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1009 const unsigned SextOpc = SrcSize == 8 ? 1010 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1011 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1012 .addReg(SrcReg); 1013 I.eraseFromParent(); 1014 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1015 } 1016 1017 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1018 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1019 1020 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1021 if (DstSize > 32 && SrcSize <= 32) { 1022 // We need a 64-bit register source, but the high bits don't matter. 1023 Register ExtReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1024 Register UndefReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1025 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1026 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1027 .addReg(SrcReg) 1028 .addImm(AMDGPU::sub0) 1029 .addReg(UndefReg) 1030 .addImm(AMDGPU::sub1); 1031 1032 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1033 .addReg(ExtReg) 1034 .addImm(SrcSize << 16); 1035 1036 I.eraseFromParent(); 1037 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 1038 } 1039 1040 unsigned Mask; 1041 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1042 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1043 .addReg(SrcReg) 1044 .addImm(Mask); 1045 } else { 1046 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1047 .addReg(SrcReg) 1048 .addImm(SrcSize << 16); 1049 } 1050 1051 I.eraseFromParent(); 1052 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1053 } 1054 1055 return false; 1056 } 1057 1058 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1059 MachineBasicBlock *BB = I.getParent(); 1060 MachineFunction *MF = BB->getParent(); 1061 MachineRegisterInfo &MRI = MF->getRegInfo(); 1062 MachineOperand &ImmOp = I.getOperand(1); 1063 1064 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1065 if (ImmOp.isFPImm()) { 1066 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1067 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1068 } else if (ImmOp.isCImm()) { 1069 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1070 } 1071 1072 Register DstReg = I.getOperand(0).getReg(); 1073 unsigned Size; 1074 bool IsSgpr; 1075 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 1076 if (RB) { 1077 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1078 Size = MRI.getType(DstReg).getSizeInBits(); 1079 } else { 1080 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 1081 IsSgpr = TRI.isSGPRClass(RC); 1082 Size = TRI.getRegSizeInBits(*RC); 1083 } 1084 1085 if (Size != 32 && Size != 64) 1086 return false; 1087 1088 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1089 if (Size == 32) { 1090 I.setDesc(TII.get(Opcode)); 1091 I.addImplicitDefUseOperands(*MF); 1092 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1093 } 1094 1095 DebugLoc DL = I.getDebugLoc(); 1096 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1097 &AMDGPU::VGPR_32RegClass; 1098 Register LoReg = MRI.createVirtualRegister(RC); 1099 Register HiReg = MRI.createVirtualRegister(RC); 1100 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1101 1102 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1103 .addImm(Imm.trunc(32).getZExtValue()); 1104 1105 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1106 .addImm(Imm.ashr(32).getZExtValue()); 1107 1108 const MachineInstr *RS = 1109 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1110 .addReg(LoReg) 1111 .addImm(AMDGPU::sub0) 1112 .addReg(HiReg) 1113 .addImm(AMDGPU::sub1); 1114 1115 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1116 // work for target independent opcodes 1117 I.eraseFromParent(); 1118 const TargetRegisterClass *DstRC = 1119 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 1120 if (!DstRC) 1121 return true; 1122 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1123 } 1124 1125 static bool isConstant(const MachineInstr &MI) { 1126 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1127 } 1128 1129 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1130 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1131 1132 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1133 1134 assert(PtrMI); 1135 1136 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1137 return; 1138 1139 GEPInfo GEPInfo(*PtrMI); 1140 1141 for (unsigned i = 1; i != 3; ++i) { 1142 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1143 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1144 assert(OpDef); 1145 if (i == 2 && isConstant(*OpDef)) { 1146 // TODO: Could handle constant base + variable offset, but a combine 1147 // probably should have commuted it. 1148 assert(GEPInfo.Imm == 0); 1149 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1150 continue; 1151 } 1152 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1153 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1154 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1155 else 1156 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1157 } 1158 1159 AddrInfo.push_back(GEPInfo); 1160 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1161 } 1162 1163 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1164 if (!MI.hasOneMemOperand()) 1165 return false; 1166 1167 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1168 const Value *Ptr = MMO->getValue(); 1169 1170 // UndefValue means this is a load of a kernel input. These are uniform. 1171 // Sometimes LDS instructions have constant pointers. 1172 // If Ptr is null, then that means this mem operand contains a 1173 // PseudoSourceValue like GOT. 1174 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1175 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1176 return true; 1177 1178 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1179 return true; 1180 1181 const Instruction *I = dyn_cast<Instruction>(Ptr); 1182 return I && I->getMetadata("amdgpu.uniform"); 1183 } 1184 1185 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1186 for (const GEPInfo &GEPInfo : AddrInfo) { 1187 if (!GEPInfo.VgprParts.empty()) 1188 return true; 1189 } 1190 return false; 1191 } 1192 1193 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { 1194 MachineBasicBlock *BB = I.getParent(); 1195 MachineFunction *MF = BB->getParent(); 1196 MachineRegisterInfo &MRI = MF->getRegInfo(); 1197 1198 const LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 1199 unsigned AS = PtrTy.getAddressSpace(); 1200 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && 1201 STI.ldsRequiresM0Init()) { 1202 // If DS instructions require M0 initializtion, insert it before selecting. 1203 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 1204 .addImm(-1); 1205 } 1206 } 1207 1208 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const { 1209 initM0(I); 1210 return selectImpl(I, *CoverageInfo); 1211 } 1212 1213 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1214 MachineBasicBlock *BB = I.getParent(); 1215 MachineFunction *MF = BB->getParent(); 1216 MachineRegisterInfo &MRI = MF->getRegInfo(); 1217 MachineOperand &CondOp = I.getOperand(0); 1218 Register CondReg = CondOp.getReg(); 1219 const DebugLoc &DL = I.getDebugLoc(); 1220 1221 unsigned BrOpcode; 1222 Register CondPhysReg; 1223 const TargetRegisterClass *ConstrainRC; 1224 1225 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1226 // whether the branch is uniform when selecting the instruction. In 1227 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1228 // RegBankSelect knows what it's doing if the branch condition is scc, even 1229 // though it currently does not. 1230 if (isSCC(CondReg, MRI)) { 1231 CondPhysReg = AMDGPU::SCC; 1232 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1233 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1234 } else if (isVCC(CondReg, MRI)) { 1235 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1236 // We sort of know that a VCC producer based on the register bank, that ands 1237 // inactive lanes with 0. What if there was a logical operation with vcc 1238 // producers in different blocks/with different exec masks? 1239 // FIXME: Should scc->vcc copies and with exec? 1240 CondPhysReg = TRI.getVCC(); 1241 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1242 ConstrainRC = TRI.getBoolRC(); 1243 } else 1244 return false; 1245 1246 if (!MRI.getRegClassOrNull(CondReg)) 1247 MRI.setRegClass(CondReg, ConstrainRC); 1248 1249 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1250 .addReg(CondReg); 1251 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1252 .addMBB(I.getOperand(1).getMBB()); 1253 1254 I.eraseFromParent(); 1255 return true; 1256 } 1257 1258 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1259 MachineBasicBlock *BB = I.getParent(); 1260 MachineFunction *MF = BB->getParent(); 1261 MachineRegisterInfo &MRI = MF->getRegInfo(); 1262 1263 Register DstReg = I.getOperand(0).getReg(); 1264 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1265 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1266 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1267 if (IsVGPR) 1268 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1269 1270 return RBI.constrainGenericRegister( 1271 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1272 } 1273 1274 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { 1275 uint64_t Align = I.getOperand(2).getImm(); 1276 const uint64_t Mask = ~((UINT64_C(1) << Align) - 1); 1277 1278 MachineBasicBlock *BB = I.getParent(); 1279 MachineFunction *MF = BB->getParent(); 1280 MachineRegisterInfo &MRI = MF->getRegInfo(); 1281 1282 Register DstReg = I.getOperand(0).getReg(); 1283 Register SrcReg = I.getOperand(1).getReg(); 1284 1285 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1286 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 1287 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1288 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; 1289 unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; 1290 const TargetRegisterClass &RegRC 1291 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; 1292 1293 LLT Ty = MRI.getType(DstReg); 1294 1295 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB, 1296 MRI); 1297 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB, 1298 MRI); 1299 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI) || 1300 !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 1301 return false; 1302 1303 const DebugLoc &DL = I.getDebugLoc(); 1304 Register ImmReg = MRI.createVirtualRegister(&RegRC); 1305 BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg) 1306 .addImm(Mask); 1307 1308 if (Ty.getSizeInBits() == 32) { 1309 BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) 1310 .addReg(SrcReg) 1311 .addReg(ImmReg); 1312 I.eraseFromParent(); 1313 return true; 1314 } 1315 1316 Register HiReg = MRI.createVirtualRegister(&RegRC); 1317 Register LoReg = MRI.createVirtualRegister(&RegRC); 1318 Register MaskLo = MRI.createVirtualRegister(&RegRC); 1319 1320 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) 1321 .addReg(SrcReg, 0, AMDGPU::sub0); 1322 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) 1323 .addReg(SrcReg, 0, AMDGPU::sub1); 1324 1325 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo) 1326 .addReg(LoReg) 1327 .addReg(ImmReg); 1328 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1329 .addReg(MaskLo) 1330 .addImm(AMDGPU::sub0) 1331 .addReg(HiReg) 1332 .addImm(AMDGPU::sub1); 1333 I.eraseFromParent(); 1334 return true; 1335 } 1336 1337 bool AMDGPUInstructionSelector::select(MachineInstr &I) { 1338 if (I.isPHI()) 1339 return selectPHI(I); 1340 1341 if (!isPreISelGenericOpcode(I.getOpcode())) { 1342 if (I.isCopy()) 1343 return selectCOPY(I); 1344 return true; 1345 } 1346 1347 switch (I.getOpcode()) { 1348 case TargetOpcode::G_AND: 1349 case TargetOpcode::G_OR: 1350 case TargetOpcode::G_XOR: 1351 if (selectG_AND_OR_XOR(I)) 1352 return true; 1353 return selectImpl(I, *CoverageInfo); 1354 case TargetOpcode::G_ADD: 1355 case TargetOpcode::G_SUB: 1356 if (selectImpl(I, *CoverageInfo)) 1357 return true; 1358 return selectG_ADD_SUB(I); 1359 case TargetOpcode::G_INTTOPTR: 1360 case TargetOpcode::G_BITCAST: 1361 return selectCOPY(I); 1362 case TargetOpcode::G_CONSTANT: 1363 case TargetOpcode::G_FCONSTANT: 1364 return selectG_CONSTANT(I); 1365 case TargetOpcode::G_EXTRACT: 1366 return selectG_EXTRACT(I); 1367 case TargetOpcode::G_MERGE_VALUES: 1368 case TargetOpcode::G_BUILD_VECTOR: 1369 case TargetOpcode::G_CONCAT_VECTORS: 1370 return selectG_MERGE_VALUES(I); 1371 case TargetOpcode::G_UNMERGE_VALUES: 1372 return selectG_UNMERGE_VALUES(I); 1373 case TargetOpcode::G_GEP: 1374 return selectG_GEP(I); 1375 case TargetOpcode::G_IMPLICIT_DEF: 1376 return selectG_IMPLICIT_DEF(I); 1377 case TargetOpcode::G_INSERT: 1378 return selectG_INSERT(I); 1379 case TargetOpcode::G_INTRINSIC: 1380 return selectG_INTRINSIC(I); 1381 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1382 return selectG_INTRINSIC_W_SIDE_EFFECTS(I); 1383 case TargetOpcode::G_ICMP: 1384 if (selectG_ICMP(I)) 1385 return true; 1386 return selectImpl(I, *CoverageInfo); 1387 case TargetOpcode::G_LOAD: 1388 case TargetOpcode::G_ATOMIC_CMPXCHG: 1389 case TargetOpcode::G_ATOMICRMW_XCHG: 1390 case TargetOpcode::G_ATOMICRMW_ADD: 1391 case TargetOpcode::G_ATOMICRMW_SUB: 1392 case TargetOpcode::G_ATOMICRMW_AND: 1393 case TargetOpcode::G_ATOMICRMW_OR: 1394 case TargetOpcode::G_ATOMICRMW_XOR: 1395 case TargetOpcode::G_ATOMICRMW_MIN: 1396 case TargetOpcode::G_ATOMICRMW_MAX: 1397 case TargetOpcode::G_ATOMICRMW_UMIN: 1398 case TargetOpcode::G_ATOMICRMW_UMAX: 1399 case TargetOpcode::G_ATOMICRMW_FADD: 1400 return selectG_LOAD_ATOMICRMW(I); 1401 case TargetOpcode::G_SELECT: 1402 return selectG_SELECT(I); 1403 case TargetOpcode::G_STORE: 1404 return selectG_STORE(I); 1405 case TargetOpcode::G_TRUNC: 1406 return selectG_TRUNC(I); 1407 case TargetOpcode::G_SEXT: 1408 case TargetOpcode::G_ZEXT: 1409 case TargetOpcode::G_ANYEXT: 1410 return selectG_SZA_EXT(I); 1411 case TargetOpcode::G_BRCOND: 1412 return selectG_BRCOND(I); 1413 case TargetOpcode::G_FRAME_INDEX: 1414 return selectG_FRAME_INDEX(I); 1415 case TargetOpcode::G_FENCE: 1416 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1417 // is checking for G_CONSTANT 1418 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1419 return true; 1420 case TargetOpcode::G_PTR_MASK: 1421 return selectG_PTR_MASK(I); 1422 default: 1423 return selectImpl(I, *CoverageInfo); 1424 } 1425 return false; 1426 } 1427 1428 InstructionSelector::ComplexRendererFns 1429 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1430 return {{ 1431 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1432 }}; 1433 1434 } 1435 1436 std::pair<Register, unsigned> 1437 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1438 Register Src, const MachineRegisterInfo &MRI) const { 1439 unsigned Mods = 0; 1440 MachineInstr *MI = MRI.getVRegDef(Src); 1441 1442 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1443 Src = MI->getOperand(1).getReg(); 1444 Mods |= SISrcMods::NEG; 1445 MI = MRI.getVRegDef(Src); 1446 } 1447 1448 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1449 Src = MI->getOperand(1).getReg(); 1450 Mods |= SISrcMods::ABS; 1451 } 1452 1453 return std::make_pair(Src, Mods); 1454 } 1455 1456 /// 1457 /// This will select either an SGPR or VGPR operand and will save us from 1458 /// having to write an extra tablegen pattern. 1459 InstructionSelector::ComplexRendererFns 1460 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1461 return {{ 1462 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1463 }}; 1464 } 1465 1466 InstructionSelector::ComplexRendererFns 1467 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1468 MachineRegisterInfo &MRI 1469 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1470 1471 Register Src; 1472 unsigned Mods; 1473 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1474 1475 return {{ 1476 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1477 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1478 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1479 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1480 }}; 1481 } 1482 1483 InstructionSelector::ComplexRendererFns 1484 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { 1485 MachineRegisterInfo &MRI 1486 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1487 1488 Register Src; 1489 unsigned Mods; 1490 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1491 1492 return {{ 1493 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1494 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1495 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1496 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1497 }}; 1498 } 1499 1500 InstructionSelector::ComplexRendererFns 1501 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1502 return {{ 1503 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1504 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1505 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1506 }}; 1507 } 1508 1509 InstructionSelector::ComplexRendererFns 1510 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1511 MachineRegisterInfo &MRI 1512 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1513 1514 Register Src; 1515 unsigned Mods; 1516 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1517 1518 return {{ 1519 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1520 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1521 }}; 1522 } 1523 1524 InstructionSelector::ComplexRendererFns 1525 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { 1526 // FIXME: Handle clamp and op_sel 1527 return {{ 1528 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1529 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods 1530 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp 1531 }}; 1532 } 1533 1534 InstructionSelector::ComplexRendererFns 1535 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { 1536 // FIXME: Handle op_sel 1537 return {{ 1538 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1539 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 1540 }}; 1541 } 1542 1543 InstructionSelector::ComplexRendererFns 1544 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1545 MachineRegisterInfo &MRI = 1546 Root.getParent()->getParent()->getParent()->getRegInfo(); 1547 1548 SmallVector<GEPInfo, 4> AddrInfo; 1549 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1550 1551 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1552 return None; 1553 1554 const GEPInfo &GEPInfo = AddrInfo[0]; 1555 1556 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1557 return None; 1558 1559 unsigned PtrReg = GEPInfo.SgprParts[0]; 1560 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1561 return {{ 1562 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1563 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1564 }}; 1565 } 1566 1567 InstructionSelector::ComplexRendererFns 1568 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1569 MachineRegisterInfo &MRI = 1570 Root.getParent()->getParent()->getParent()->getRegInfo(); 1571 1572 SmallVector<GEPInfo, 4> AddrInfo; 1573 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1574 1575 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1576 return None; 1577 1578 const GEPInfo &GEPInfo = AddrInfo[0]; 1579 unsigned PtrReg = GEPInfo.SgprParts[0]; 1580 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1581 if (!isUInt<32>(EncodedImm)) 1582 return None; 1583 1584 return {{ 1585 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1586 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1587 }}; 1588 } 1589 1590 InstructionSelector::ComplexRendererFns 1591 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1592 MachineInstr *MI = Root.getParent(); 1593 MachineBasicBlock *MBB = MI->getParent(); 1594 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1595 1596 SmallVector<GEPInfo, 4> AddrInfo; 1597 getAddrModeInfo(*MI, MRI, AddrInfo); 1598 1599 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1600 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1601 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1602 return None; 1603 1604 const GEPInfo &GEPInfo = AddrInfo[0]; 1605 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1606 return None; 1607 1608 // If we make it this far we have a load with an 32-bit immediate offset. 1609 // It is OK to select this using a sgpr offset, because we have already 1610 // failed trying to select this load into one of the _IMM variants since 1611 // the _IMM Patterns are considered before the _SGPR patterns. 1612 unsigned PtrReg = GEPInfo.SgprParts[0]; 1613 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1614 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1615 .addImm(GEPInfo.Imm); 1616 return {{ 1617 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1618 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1619 }}; 1620 } 1621 1622 template <bool Signed> 1623 InstructionSelector::ComplexRendererFns 1624 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1625 MachineInstr *MI = Root.getParent(); 1626 MachineBasicBlock *MBB = MI->getParent(); 1627 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1628 1629 InstructionSelector::ComplexRendererFns Default = {{ 1630 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1631 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1632 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1633 }}; 1634 1635 if (!STI.hasFlatInstOffsets()) 1636 return Default; 1637 1638 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); 1639 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1640 return Default; 1641 1642 Optional<int64_t> Offset = 1643 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); 1644 if (!Offset.hasValue()) 1645 return Default; 1646 1647 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1648 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1649 return Default; 1650 1651 Register BasePtr = OpDef->getOperand(1).getReg(); 1652 1653 return {{ 1654 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1655 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1656 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1657 }}; 1658 } 1659 1660 InstructionSelector::ComplexRendererFns 1661 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1662 return selectFlatOffsetImpl<false>(Root); 1663 } 1664 1665 InstructionSelector::ComplexRendererFns 1666 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1667 return selectFlatOffsetImpl<true>(Root); 1668 } 1669 1670 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1671 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1672 return PSV && PSV->isStack(); 1673 } 1674 1675 InstructionSelector::ComplexRendererFns 1676 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1677 MachineInstr *MI = Root.getParent(); 1678 MachineBasicBlock *MBB = MI->getParent(); 1679 MachineFunction *MF = MBB->getParent(); 1680 MachineRegisterInfo &MRI = MF->getRegInfo(); 1681 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1682 1683 int64_t Offset = 0; 1684 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { 1685 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1686 1687 // TODO: Should this be inside the render function? The iterator seems to 1688 // move. 1689 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1690 HighBits) 1691 .addImm(Offset & ~4095); 1692 1693 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1694 MIB.addReg(Info->getScratchRSrcReg()); 1695 }, 1696 [=](MachineInstrBuilder &MIB) { // vaddr 1697 MIB.addReg(HighBits); 1698 }, 1699 [=](MachineInstrBuilder &MIB) { // soffset 1700 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1701 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1702 1703 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1704 ? Info->getStackPtrOffsetReg() 1705 : Info->getScratchWaveOffsetReg(); 1706 MIB.addReg(SOffsetReg); 1707 }, 1708 [=](MachineInstrBuilder &MIB) { // offset 1709 MIB.addImm(Offset & 4095); 1710 }}}; 1711 } 1712 1713 assert(Offset == 0); 1714 1715 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1716 // offsets. 1717 Optional<int> FI; 1718 Register VAddr = Root.getReg(); 1719 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { 1720 if (isBaseWithConstantOffset(Root, MRI)) { 1721 const MachineOperand &LHS = RootDef->getOperand(1); 1722 const MachineOperand &RHS = RootDef->getOperand(2); 1723 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1724 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1725 if (LHSDef && RHSDef) { 1726 int64_t PossibleOffset = 1727 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1728 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1729 (!STI.privateMemoryResourceIsRangeChecked() || 1730 KnownBits->signBitIsZero(LHS.getReg()))) { 1731 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1732 FI = LHSDef->getOperand(1).getIndex(); 1733 else 1734 VAddr = LHS.getReg(); 1735 Offset = PossibleOffset; 1736 } 1737 } 1738 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1739 FI = RootDef->getOperand(1).getIndex(); 1740 } 1741 } 1742 1743 // If we don't know this private access is a local stack object, it needs to 1744 // be relative to the entry point's scratch wave offset register. 1745 // TODO: Should split large offsets that don't fit like above. 1746 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1747 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1748 : Info->getScratchWaveOffsetReg(); 1749 1750 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1751 MIB.addReg(Info->getScratchRSrcReg()); 1752 }, 1753 [=](MachineInstrBuilder &MIB) { // vaddr 1754 if (FI.hasValue()) 1755 MIB.addFrameIndex(FI.getValue()); 1756 else 1757 MIB.addReg(VAddr); 1758 }, 1759 [=](MachineInstrBuilder &MIB) { // soffset 1760 MIB.addReg(SOffset); 1761 }, 1762 [=](MachineInstrBuilder &MIB) { // offset 1763 MIB.addImm(Offset); 1764 }}}; 1765 } 1766 1767 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, 1768 const MachineOperand &Base, 1769 int64_t Offset, 1770 unsigned OffsetBits) const { 1771 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 1772 (OffsetBits == 8 && !isUInt<8>(Offset))) 1773 return false; 1774 1775 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 1776 return true; 1777 1778 // On Southern Islands instruction with a negative base value and an offset 1779 // don't seem to work. 1780 return KnownBits->signBitIsZero(Base.getReg()); 1781 } 1782 1783 InstructionSelector::ComplexRendererFns 1784 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1785 MachineOperand &Root) const { 1786 MachineInstr *MI = Root.getParent(); 1787 MachineBasicBlock *MBB = MI->getParent(); 1788 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1789 1790 int64_t Offset = 0; 1791 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || 1792 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1793 return {}; 1794 1795 const MachineFunction *MF = MBB->getParent(); 1796 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1797 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1798 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1799 1800 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1801 ? Info->getStackPtrOffsetReg() 1802 : Info->getScratchWaveOffsetReg(); 1803 return {{ 1804 [=](MachineInstrBuilder &MIB) { 1805 MIB.addReg(Info->getScratchRSrcReg()); 1806 }, // rsrc 1807 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 1808 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 1809 }}; 1810 } 1811 1812 InstructionSelector::ComplexRendererFns 1813 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { 1814 MachineInstr *MI = Root.getParent(); 1815 MachineBasicBlock *MBB = MI->getParent(); 1816 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1817 1818 const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 1819 if (!RootDef) { 1820 return {{ 1821 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1822 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1823 }}; 1824 } 1825 1826 int64_t ConstAddr = 0; 1827 if (isBaseWithConstantOffset(Root, MRI)) { 1828 const MachineOperand &LHS = RootDef->getOperand(1); 1829 const MachineOperand &RHS = RootDef->getOperand(2); 1830 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1831 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1832 if (LHSDef && RHSDef) { 1833 int64_t PossibleOffset = 1834 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1835 if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) { 1836 // (add n0, c0) 1837 return {{ 1838 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 1839 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } 1840 }}; 1841 } 1842 } 1843 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 1844 1845 1846 1847 } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) { 1848 1849 1850 } 1851 1852 return {{ 1853 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1854 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1855 }}; 1856 } 1857