1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 #define GET_GLOBALISEL_IMPL 41 #define AMDGPUSubtarget GCNSubtarget 42 #include "AMDGPUGenGlobalISel.inc" 43 #undef GET_GLOBALISEL_IMPL 44 #undef AMDGPUSubtarget 45 46 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 47 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 48 const AMDGPUTargetMachine &TM) 49 : InstructionSelector(), TII(*STI.getInstrInfo()), 50 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 51 STI(STI), 52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 53 #define GET_GLOBALISEL_PREDICATES_INIT 54 #include "AMDGPUGenGlobalISel.inc" 55 #undef GET_GLOBALISEL_PREDICATES_INIT 56 #define GET_GLOBALISEL_TEMPORARIES_INIT 57 #include "AMDGPUGenGlobalISel.inc" 58 #undef GET_GLOBALISEL_TEMPORARIES_INIT 59 { 60 } 61 62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 63 64 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 65 if (Register::isPhysicalRegister(Reg)) 66 return Reg == AMDGPU::SCC; 67 68 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 69 const TargetRegisterClass *RC = 70 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 71 if (RC) { 72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 73 // context of the register bank has been lost. 74 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 75 return false; 76 const LLT Ty = MRI.getType(Reg); 77 return Ty.isValid() && Ty.getSizeInBits() == 1; 78 } 79 80 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 81 return RB->getID() == AMDGPU::SCCRegBankID; 82 } 83 84 bool AMDGPUInstructionSelector::isVCC(Register Reg, 85 const MachineRegisterInfo &MRI) const { 86 if (Register::isPhysicalRegister(Reg)) 87 return Reg == TRI.getVCC(); 88 89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 90 const TargetRegisterClass *RC = 91 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 92 if (RC) { 93 const LLT Ty = MRI.getType(Reg); 94 return RC->hasSuperClassEq(TRI.getBoolRC()) && 95 Ty.isValid() && Ty.getSizeInBits() == 1; 96 } 97 98 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 99 return RB->getID() == AMDGPU::VCCRegBankID; 100 } 101 102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 103 const DebugLoc &DL = I.getDebugLoc(); 104 MachineBasicBlock *BB = I.getParent(); 105 MachineFunction *MF = BB->getParent(); 106 MachineRegisterInfo &MRI = MF->getRegInfo(); 107 I.setDesc(TII.get(TargetOpcode::COPY)); 108 109 const MachineOperand &Src = I.getOperand(1); 110 MachineOperand &Dst = I.getOperand(0); 111 Register DstReg = Dst.getReg(); 112 Register SrcReg = Src.getReg(); 113 114 if (isVCC(DstReg, MRI)) { 115 if (SrcReg == AMDGPU::SCC) { 116 const TargetRegisterClass *RC 117 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 118 if (!RC) 119 return true; 120 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 121 } 122 123 if (!isVCC(SrcReg, MRI)) { 124 // TODO: Should probably leave the copy and let copyPhysReg expand it. 125 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) 126 return false; 127 128 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 129 .addImm(0) 130 .addReg(SrcReg); 131 132 if (!MRI.getRegClassOrNull(SrcReg)) 133 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 134 I.eraseFromParent(); 135 return true; 136 } 137 138 const TargetRegisterClass *RC = 139 TRI.getConstrainedRegClassForOperand(Dst, MRI); 140 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) 141 return false; 142 143 // Don't constrain the source register to a class so the def instruction 144 // handles it (unless it's undef). 145 // 146 // FIXME: This is a hack. When selecting the def, we neeed to know 147 // specifically know that the result is VCCRegBank, and not just an SGPR 148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 149 if (Src.isUndef()) { 150 const TargetRegisterClass *SrcRC = 151 TRI.getConstrainedRegClassForOperand(Src, MRI); 152 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 153 return false; 154 } 155 156 return true; 157 } 158 159 for (const MachineOperand &MO : I.operands()) { 160 if (Register::isPhysicalRegister(MO.getReg())) 161 continue; 162 163 const TargetRegisterClass *RC = 164 TRI.getConstrainedRegClassForOperand(MO, MRI); 165 if (!RC) 166 continue; 167 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 168 } 169 return true; 170 } 171 172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 173 MachineBasicBlock *BB = I.getParent(); 174 MachineFunction *MF = BB->getParent(); 175 MachineRegisterInfo &MRI = MF->getRegInfo(); 176 177 const Register DefReg = I.getOperand(0).getReg(); 178 const LLT DefTy = MRI.getType(DefReg); 179 180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 181 182 const RegClassOrRegBank &RegClassOrBank = 183 MRI.getRegClassOrRegBank(DefReg); 184 185 const TargetRegisterClass *DefRC 186 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 187 if (!DefRC) { 188 if (!DefTy.isValid()) { 189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 190 return false; 191 } 192 193 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 194 if (RB.getID() == AMDGPU::SCCRegBankID) { 195 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 196 return false; 197 } 198 199 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 200 if (!DefRC) { 201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 202 return false; 203 } 204 } 205 206 I.setDesc(TII.get(TargetOpcode::PHI)); 207 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 208 } 209 210 MachineOperand 211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 212 const TargetRegisterClass &SubRC, 213 unsigned SubIdx) const { 214 215 MachineInstr *MI = MO.getParent(); 216 MachineBasicBlock *BB = MO.getParent()->getParent(); 217 MachineFunction *MF = BB->getParent(); 218 MachineRegisterInfo &MRI = MF->getRegInfo(); 219 Register DstReg = MRI.createVirtualRegister(&SubRC); 220 221 if (MO.isReg()) { 222 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 223 unsigned Reg = MO.getReg(); 224 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 225 .addReg(Reg, 0, ComposedSubIdx); 226 227 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 228 MO.isKill(), MO.isDead(), MO.isUndef(), 229 MO.isEarlyClobber(), 0, MO.isDebug(), 230 MO.isInternalRead()); 231 } 232 233 assert(MO.isImm()); 234 235 APInt Imm(64, MO.getImm()); 236 237 switch (SubIdx) { 238 default: 239 llvm_unreachable("do not know to split immediate with this sub index."); 240 case AMDGPU::sub0: 241 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 242 case AMDGPU::sub1: 243 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 244 } 245 } 246 247 static int64_t getConstant(const MachineInstr *MI) { 248 return MI->getOperand(1).getCImm()->getSExtValue(); 249 } 250 251 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 252 switch (Opc) { 253 case AMDGPU::G_AND: 254 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 255 case AMDGPU::G_OR: 256 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 257 case AMDGPU::G_XOR: 258 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 259 default: 260 llvm_unreachable("not a bit op"); 261 } 262 } 263 264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 265 MachineBasicBlock *BB = I.getParent(); 266 MachineFunction *MF = BB->getParent(); 267 MachineRegisterInfo &MRI = MF->getRegInfo(); 268 MachineOperand &Dst = I.getOperand(0); 269 MachineOperand &Src0 = I.getOperand(1); 270 MachineOperand &Src1 = I.getOperand(2); 271 Register DstReg = Dst.getReg(); 272 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 273 274 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 275 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 276 const TargetRegisterClass *RC = TRI.getBoolRC(); 277 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 278 RC == &AMDGPU::SReg_64RegClass); 279 I.setDesc(TII.get(InstOpc)); 280 281 // FIXME: Hack to avoid turning the register bank into a register class. 282 // The selector for G_ICMP relies on seeing the register bank for the result 283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 284 // be ambiguous whether it's a scalar or vector bool. 285 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) 286 MRI.setRegClass(Src0.getReg(), RC); 287 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) 288 MRI.setRegClass(Src1.getReg(), RC); 289 290 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 291 } 292 293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 294 // the result? 295 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 296 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 297 I.setDesc(TII.get(InstOpc)); 298 299 const TargetRegisterClass *RC 300 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 301 if (!RC) 302 return false; 303 return RBI.constrainGenericRegister(DstReg, *RC, MRI) && 304 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && 305 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); 306 } 307 308 return false; 309 } 310 311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 312 MachineBasicBlock *BB = I.getParent(); 313 MachineFunction *MF = BB->getParent(); 314 MachineRegisterInfo &MRI = MF->getRegInfo(); 315 Register DstReg = I.getOperand(0).getReg(); 316 const DebugLoc &DL = I.getDebugLoc(); 317 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 318 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 319 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 320 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 321 322 if (Size == 32) { 323 if (IsSALU) { 324 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 325 MachineInstr *Add = 326 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 327 .add(I.getOperand(1)) 328 .add(I.getOperand(2)); 329 I.eraseFromParent(); 330 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 331 } 332 333 if (STI.hasAddNoCarry()) { 334 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 335 I.setDesc(TII.get(Opc)); 336 I.addOperand(*MF, MachineOperand::CreateImm(0)); 337 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 338 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 339 } 340 341 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 342 343 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 344 MachineInstr *Add 345 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 346 .addDef(UnusedCarry, RegState::Dead) 347 .add(I.getOperand(1)) 348 .add(I.getOperand(2)) 349 .addImm(0); 350 I.eraseFromParent(); 351 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 352 } 353 354 assert(!Sub && "illegal sub should not reach here"); 355 356 const TargetRegisterClass &RC 357 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 358 const TargetRegisterClass &HalfRC 359 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 360 361 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 362 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 363 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 364 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 365 366 Register DstLo = MRI.createVirtualRegister(&HalfRC); 367 Register DstHi = MRI.createVirtualRegister(&HalfRC); 368 369 if (IsSALU) { 370 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 371 .add(Lo1) 372 .add(Lo2); 373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 374 .add(Hi1) 375 .add(Hi2); 376 } else { 377 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 378 Register CarryReg = MRI.createVirtualRegister(CarryRC); 379 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 380 .addDef(CarryReg) 381 .add(Lo1) 382 .add(Lo2) 383 .addImm(0); 384 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 385 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 386 .add(Hi1) 387 .add(Hi2) 388 .addReg(CarryReg, RegState::Kill) 389 .addImm(0); 390 391 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 392 return false; 393 } 394 395 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 396 .addReg(DstLo) 397 .addImm(AMDGPU::sub0) 398 .addReg(DstHi) 399 .addImm(AMDGPU::sub1); 400 401 402 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 403 return false; 404 405 I.eraseFromParent(); 406 return true; 407 } 408 409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 410 MachineBasicBlock *BB = I.getParent(); 411 MachineFunction *MF = BB->getParent(); 412 MachineRegisterInfo &MRI = MF->getRegInfo(); 413 assert(I.getOperand(2).getImm() % 32 == 0); 414 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 415 const DebugLoc &DL = I.getDebugLoc(); 416 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 417 I.getOperand(0).getReg()) 418 .addReg(I.getOperand(1).getReg(), 0, SubReg); 419 420 for (const MachineOperand &MO : Copy->operands()) { 421 const TargetRegisterClass *RC = 422 TRI.getConstrainedRegClassForOperand(MO, MRI); 423 if (!RC) 424 continue; 425 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 426 } 427 I.eraseFromParent(); 428 return true; 429 } 430 431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 432 MachineBasicBlock *BB = MI.getParent(); 433 MachineFunction *MF = BB->getParent(); 434 MachineRegisterInfo &MRI = MF->getRegInfo(); 435 Register DstReg = MI.getOperand(0).getReg(); 436 LLT DstTy = MRI.getType(DstReg); 437 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 438 439 const unsigned SrcSize = SrcTy.getSizeInBits(); 440 if (SrcSize < 32) 441 return false; 442 443 const DebugLoc &DL = MI.getDebugLoc(); 444 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); 445 const unsigned DstSize = DstTy.getSizeInBits(); 446 const TargetRegisterClass *DstRC = 447 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); 448 if (!DstRC) 449 return false; 450 451 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 452 MachineInstrBuilder MIB = 453 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 454 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 455 MachineOperand &Src = MI.getOperand(I + 1); 456 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 457 MIB.addImm(SubRegs[I]); 458 459 const TargetRegisterClass *SrcRC 460 = TRI.getConstrainedRegClassForOperand(Src, MRI); 461 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) 462 return false; 463 } 464 465 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) 466 return false; 467 468 MI.eraseFromParent(); 469 return true; 470 } 471 472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 473 MachineBasicBlock *BB = MI.getParent(); 474 MachineFunction *MF = BB->getParent(); 475 MachineRegisterInfo &MRI = MF->getRegInfo(); 476 const int NumDst = MI.getNumOperands() - 1; 477 478 MachineOperand &Src = MI.getOperand(NumDst); 479 480 Register SrcReg = Src.getReg(); 481 Register DstReg0 = MI.getOperand(0).getReg(); 482 LLT DstTy = MRI.getType(DstReg0); 483 LLT SrcTy = MRI.getType(SrcReg); 484 485 const unsigned DstSize = DstTy.getSizeInBits(); 486 const unsigned SrcSize = SrcTy.getSizeInBits(); 487 const DebugLoc &DL = MI.getDebugLoc(); 488 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 489 490 const TargetRegisterClass *SrcRC = 491 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); 492 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 493 return false; 494 495 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 496 497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 498 // source, and this relies on the fact that the same subregister indices are 499 // used for both. 500 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 501 for (int I = 0, E = NumDst; I != E; ++I) { 502 MachineOperand &Dst = MI.getOperand(I); 503 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 504 .addReg(SrcReg, SrcFlags, SubRegs[I]); 505 506 const TargetRegisterClass *DstRC = 507 TRI.getConstrainedRegClassForOperand(Dst, MRI); 508 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) 509 return false; 510 } 511 512 MI.eraseFromParent(); 513 return true; 514 } 515 516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 517 return selectG_ADD_SUB(I); 518 } 519 520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 521 MachineBasicBlock *BB = I.getParent(); 522 MachineFunction *MF = BB->getParent(); 523 MachineRegisterInfo &MRI = MF->getRegInfo(); 524 const MachineOperand &MO = I.getOperand(0); 525 526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 527 // regbank check here is to know why getConstrainedRegClassForOperand failed. 528 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 529 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 530 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 531 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 532 return true; 533 } 534 535 return false; 536 } 537 538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 539 MachineBasicBlock *BB = I.getParent(); 540 MachineFunction *MF = BB->getParent(); 541 MachineRegisterInfo &MRI = MF->getRegInfo(); 542 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 543 DebugLoc DL = I.getDebugLoc(); 544 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 545 .addDef(I.getOperand(0).getReg()) 546 .addReg(I.getOperand(1).getReg()) 547 .addReg(I.getOperand(2).getReg()) 548 .addImm(SubReg); 549 550 for (const MachineOperand &MO : Ins->operands()) { 551 if (!MO.isReg()) 552 continue; 553 if (Register::isPhysicalRegister(MO.getReg())) 554 continue; 555 556 const TargetRegisterClass *RC = 557 TRI.getConstrainedRegClassForOperand(MO, MRI); 558 if (!RC) 559 continue; 560 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 561 } 562 I.eraseFromParent(); 563 return true; 564 } 565 566 bool AMDGPUInstructionSelector::selectG_INTRINSIC( 567 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 568 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 569 switch (IntrinsicID) { 570 case Intrinsic::amdgcn_if_break: { 571 MachineBasicBlock *BB = I.getParent(); 572 MachineFunction *MF = BB->getParent(); 573 MachineRegisterInfo &MRI = MF->getRegInfo(); 574 575 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 576 // SelectionDAG uses for wave32 vs wave64. 577 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 578 .add(I.getOperand(0)) 579 .add(I.getOperand(2)) 580 .add(I.getOperand(3)); 581 582 Register DstReg = I.getOperand(0).getReg(); 583 Register Src0Reg = I.getOperand(2).getReg(); 584 Register Src1Reg = I.getOperand(3).getReg(); 585 586 I.eraseFromParent(); 587 588 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 589 if (!MRI.getRegClassOrNull(Reg)) 590 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 591 } 592 593 return true; 594 } 595 default: 596 return selectImpl(I, CoverageInfo); 597 } 598 } 599 600 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 601 if (Size != 32 && Size != 64) 602 return -1; 603 switch (P) { 604 default: 605 llvm_unreachable("Unknown condition code!"); 606 case CmpInst::ICMP_NE: 607 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 608 case CmpInst::ICMP_EQ: 609 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 610 case CmpInst::ICMP_SGT: 611 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 612 case CmpInst::ICMP_SGE: 613 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 614 case CmpInst::ICMP_SLT: 615 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 616 case CmpInst::ICMP_SLE: 617 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 618 case CmpInst::ICMP_UGT: 619 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 620 case CmpInst::ICMP_UGE: 621 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 622 case CmpInst::ICMP_ULT: 623 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 624 case CmpInst::ICMP_ULE: 625 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 626 } 627 } 628 629 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 630 unsigned Size) const { 631 if (Size == 64) { 632 if (!STI.hasScalarCompareEq64()) 633 return -1; 634 635 switch (P) { 636 case CmpInst::ICMP_NE: 637 return AMDGPU::S_CMP_LG_U64; 638 case CmpInst::ICMP_EQ: 639 return AMDGPU::S_CMP_EQ_U64; 640 default: 641 return -1; 642 } 643 } 644 645 if (Size != 32) 646 return -1; 647 648 switch (P) { 649 case CmpInst::ICMP_NE: 650 return AMDGPU::S_CMP_LG_U32; 651 case CmpInst::ICMP_EQ: 652 return AMDGPU::S_CMP_EQ_U32; 653 case CmpInst::ICMP_SGT: 654 return AMDGPU::S_CMP_GT_I32; 655 case CmpInst::ICMP_SGE: 656 return AMDGPU::S_CMP_GE_I32; 657 case CmpInst::ICMP_SLT: 658 return AMDGPU::S_CMP_LT_I32; 659 case CmpInst::ICMP_SLE: 660 return AMDGPU::S_CMP_LE_I32; 661 case CmpInst::ICMP_UGT: 662 return AMDGPU::S_CMP_GT_U32; 663 case CmpInst::ICMP_UGE: 664 return AMDGPU::S_CMP_GE_U32; 665 case CmpInst::ICMP_ULT: 666 return AMDGPU::S_CMP_LT_U32; 667 case CmpInst::ICMP_ULE: 668 return AMDGPU::S_CMP_LE_U32; 669 default: 670 llvm_unreachable("Unknown condition code!"); 671 } 672 } 673 674 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 675 MachineBasicBlock *BB = I.getParent(); 676 MachineFunction *MF = BB->getParent(); 677 MachineRegisterInfo &MRI = MF->getRegInfo(); 678 const DebugLoc &DL = I.getDebugLoc(); 679 680 unsigned SrcReg = I.getOperand(2).getReg(); 681 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 682 683 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 684 685 unsigned CCReg = I.getOperand(0).getReg(); 686 if (isSCC(CCReg, MRI)) { 687 int Opcode = getS_CMPOpcode(Pred, Size); 688 if (Opcode == -1) 689 return false; 690 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 691 .add(I.getOperand(2)) 692 .add(I.getOperand(3)); 693 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 694 .addReg(AMDGPU::SCC); 695 bool Ret = 696 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 697 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 698 I.eraseFromParent(); 699 return Ret; 700 } 701 702 int Opcode = getV_CMPOpcode(Pred, Size); 703 if (Opcode == -1) 704 return false; 705 706 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 707 I.getOperand(0).getReg()) 708 .add(I.getOperand(2)) 709 .add(I.getOperand(3)); 710 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 711 *TRI.getBoolRC(), MRI); 712 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 713 I.eraseFromParent(); 714 return Ret; 715 } 716 717 static MachineInstr * 718 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 719 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 720 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 721 const DebugLoc &DL = Insert->getDebugLoc(); 722 MachineBasicBlock &BB = *Insert->getParent(); 723 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 724 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 725 .addImm(Tgt) 726 .addReg(Reg0) 727 .addReg(Reg1) 728 .addReg(Reg2) 729 .addReg(Reg3) 730 .addImm(VM) 731 .addImm(Compr) 732 .addImm(Enabled); 733 } 734 735 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 736 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 737 MachineBasicBlock *BB = I.getParent(); 738 MachineFunction *MF = BB->getParent(); 739 MachineRegisterInfo &MRI = MF->getRegInfo(); 740 741 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 742 switch (IntrinsicID) { 743 case Intrinsic::amdgcn_exp: { 744 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 745 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 746 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 747 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 748 749 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 750 I.getOperand(4).getReg(), 751 I.getOperand(5).getReg(), 752 I.getOperand(6).getReg(), 753 VM, false, Enabled, Done); 754 755 I.eraseFromParent(); 756 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 757 } 758 case Intrinsic::amdgcn_exp_compr: { 759 const DebugLoc &DL = I.getDebugLoc(); 760 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 761 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 762 unsigned Reg0 = I.getOperand(3).getReg(); 763 unsigned Reg1 = I.getOperand(4).getReg(); 764 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 765 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 766 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 767 768 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 769 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 770 true, Enabled, Done); 771 772 I.eraseFromParent(); 773 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 774 } 775 case Intrinsic::amdgcn_end_cf: { 776 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 777 // SelectionDAG uses for wave32 vs wave64. 778 BuildMI(*BB, &I, I.getDebugLoc(), 779 TII.get(AMDGPU::SI_END_CF)) 780 .add(I.getOperand(1)); 781 782 Register Reg = I.getOperand(1).getReg(); 783 I.eraseFromParent(); 784 785 if (!MRI.getRegClassOrNull(Reg)) 786 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 787 return true; 788 } 789 default: 790 return selectImpl(I, CoverageInfo); 791 } 792 } 793 794 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 795 MachineBasicBlock *BB = I.getParent(); 796 MachineFunction *MF = BB->getParent(); 797 MachineRegisterInfo &MRI = MF->getRegInfo(); 798 const DebugLoc &DL = I.getDebugLoc(); 799 800 unsigned DstReg = I.getOperand(0).getReg(); 801 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 802 assert(Size <= 32 || Size == 64); 803 const MachineOperand &CCOp = I.getOperand(1); 804 unsigned CCReg = CCOp.getReg(); 805 if (isSCC(CCReg, MRI)) { 806 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 807 AMDGPU::S_CSELECT_B32; 808 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 809 .addReg(CCReg); 810 811 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 812 // bank, because it does not cover the register class that we used to represent 813 // for it. So we need to manually set the register class here. 814 if (!MRI.getRegClassOrNull(CCReg)) 815 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 816 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 817 .add(I.getOperand(2)) 818 .add(I.getOperand(3)); 819 820 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 821 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 822 I.eraseFromParent(); 823 return Ret; 824 } 825 826 // Wide VGPR select should have been split in RegBankSelect. 827 if (Size > 32) 828 return false; 829 830 MachineInstr *Select = 831 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 832 .addImm(0) 833 .add(I.getOperand(3)) 834 .addImm(0) 835 .add(I.getOperand(2)) 836 .add(I.getOperand(1)); 837 838 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 839 I.eraseFromParent(); 840 return Ret; 841 } 842 843 bool AMDGPUInstructionSelector::selectG_STORE( 844 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 845 initM0(I); 846 return selectImpl(I, CoverageInfo); 847 } 848 849 static int sizeToSubRegIndex(unsigned Size) { 850 switch (Size) { 851 case 32: 852 return AMDGPU::sub0; 853 case 64: 854 return AMDGPU::sub0_sub1; 855 case 96: 856 return AMDGPU::sub0_sub1_sub2; 857 case 128: 858 return AMDGPU::sub0_sub1_sub2_sub3; 859 case 256: 860 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 861 default: 862 if (Size < 32) 863 return AMDGPU::sub0; 864 if (Size > 256) 865 return -1; 866 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 867 } 868 } 869 870 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 871 MachineBasicBlock *BB = I.getParent(); 872 MachineFunction *MF = BB->getParent(); 873 MachineRegisterInfo &MRI = MF->getRegInfo(); 874 875 unsigned DstReg = I.getOperand(0).getReg(); 876 unsigned SrcReg = I.getOperand(1).getReg(); 877 const LLT DstTy = MRI.getType(DstReg); 878 const LLT SrcTy = MRI.getType(SrcReg); 879 if (!DstTy.isScalar()) 880 return false; 881 882 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 883 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 884 if (SrcRB != DstRB) 885 return false; 886 887 unsigned DstSize = DstTy.getSizeInBits(); 888 unsigned SrcSize = SrcTy.getSizeInBits(); 889 890 const TargetRegisterClass *SrcRC 891 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 892 const TargetRegisterClass *DstRC 893 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 894 895 if (SrcSize > 32) { 896 int SubRegIdx = sizeToSubRegIndex(DstSize); 897 if (SubRegIdx == -1) 898 return false; 899 900 // Deal with weird cases where the class only partially supports the subreg 901 // index. 902 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 903 if (!SrcRC) 904 return false; 905 906 I.getOperand(1).setSubReg(SubRegIdx); 907 } 908 909 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 910 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 911 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 912 return false; 913 } 914 915 I.setDesc(TII.get(TargetOpcode::COPY)); 916 return true; 917 } 918 919 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 920 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 921 Mask = maskTrailingOnes<unsigned>(Size); 922 int SignedMask = static_cast<int>(Mask); 923 return SignedMask >= -16 && SignedMask <= 64; 924 } 925 926 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 927 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 928 const DebugLoc &DL = I.getDebugLoc(); 929 MachineBasicBlock &MBB = *I.getParent(); 930 MachineFunction &MF = *MBB.getParent(); 931 MachineRegisterInfo &MRI = MF.getRegInfo(); 932 const unsigned DstReg = I.getOperand(0).getReg(); 933 const unsigned SrcReg = I.getOperand(1).getReg(); 934 935 const LLT DstTy = MRI.getType(DstReg); 936 const LLT SrcTy = MRI.getType(SrcReg); 937 const LLT S1 = LLT::scalar(1); 938 const unsigned SrcSize = SrcTy.getSizeInBits(); 939 const unsigned DstSize = DstTy.getSizeInBits(); 940 if (!DstTy.isScalar()) 941 return false; 942 943 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 944 945 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 946 if (SrcTy != S1 || DstSize > 64) // Invalid 947 return false; 948 949 unsigned Opcode = 950 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 951 const TargetRegisterClass *DstRC = 952 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 953 954 // FIXME: Create an extra copy to avoid incorrectly constraining the result 955 // of the scc producer. 956 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 957 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 958 .addReg(SrcReg); 959 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 960 .addReg(TmpReg); 961 962 // The instruction operands are backwards from what you would expect. 963 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 964 .addImm(0) 965 .addImm(Signed ? -1 : 1); 966 I.eraseFromParent(); 967 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 968 } 969 970 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 971 if (SrcTy != S1) // Invalid 972 return false; 973 974 MachineInstr *ExtI = 975 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 976 .addImm(0) // src0_modifiers 977 .addImm(0) // src0 978 .addImm(0) // src1_modifiers 979 .addImm(Signed ? -1 : 1) // src1 980 .addUse(SrcReg); 981 I.eraseFromParent(); 982 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 983 } 984 985 if (I.getOpcode() == AMDGPU::G_ANYEXT) 986 return selectCOPY(I); 987 988 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 989 // 64-bit should have been split up in RegBankSelect 990 991 // Try to use an and with a mask if it will save code size. 992 unsigned Mask; 993 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 994 MachineInstr *ExtI = 995 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 996 .addImm(Mask) 997 .addReg(SrcReg); 998 I.eraseFromParent(); 999 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1000 } 1001 1002 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 1003 MachineInstr *ExtI = 1004 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 1005 .addReg(SrcReg) 1006 .addImm(0) // Offset 1007 .addImm(SrcSize); // Width 1008 I.eraseFromParent(); 1009 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1010 } 1011 1012 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1013 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 1014 return false; 1015 1016 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1017 const unsigned SextOpc = SrcSize == 8 ? 1018 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1019 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1020 .addReg(SrcReg); 1021 I.eraseFromParent(); 1022 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1023 } 1024 1025 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1026 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1027 1028 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1029 if (DstSize > 32 && SrcSize <= 32) { 1030 // We need a 64-bit register source, but the high bits don't matter. 1031 unsigned ExtReg 1032 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1033 unsigned UndefReg 1034 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1035 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1036 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1037 .addReg(SrcReg) 1038 .addImm(AMDGPU::sub0) 1039 .addReg(UndefReg) 1040 .addImm(AMDGPU::sub1); 1041 1042 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1043 .addReg(ExtReg) 1044 .addImm(SrcSize << 16); 1045 1046 I.eraseFromParent(); 1047 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 1048 } 1049 1050 unsigned Mask; 1051 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1052 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1053 .addReg(SrcReg) 1054 .addImm(Mask); 1055 } else { 1056 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1057 .addReg(SrcReg) 1058 .addImm(SrcSize << 16); 1059 } 1060 1061 I.eraseFromParent(); 1062 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1063 } 1064 1065 return false; 1066 } 1067 1068 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1069 MachineBasicBlock *BB = I.getParent(); 1070 MachineFunction *MF = BB->getParent(); 1071 MachineRegisterInfo &MRI = MF->getRegInfo(); 1072 MachineOperand &ImmOp = I.getOperand(1); 1073 1074 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1075 if (ImmOp.isFPImm()) { 1076 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1077 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1078 } else if (ImmOp.isCImm()) { 1079 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1080 } 1081 1082 unsigned DstReg = I.getOperand(0).getReg(); 1083 unsigned Size; 1084 bool IsSgpr; 1085 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 1086 if (RB) { 1087 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1088 Size = MRI.getType(DstReg).getSizeInBits(); 1089 } else { 1090 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 1091 IsSgpr = TRI.isSGPRClass(RC); 1092 Size = TRI.getRegSizeInBits(*RC); 1093 } 1094 1095 if (Size != 32 && Size != 64) 1096 return false; 1097 1098 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1099 if (Size == 32) { 1100 I.setDesc(TII.get(Opcode)); 1101 I.addImplicitDefUseOperands(*MF); 1102 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1103 } 1104 1105 DebugLoc DL = I.getDebugLoc(); 1106 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1107 &AMDGPU::VGPR_32RegClass; 1108 unsigned LoReg = MRI.createVirtualRegister(RC); 1109 unsigned HiReg = MRI.createVirtualRegister(RC); 1110 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1111 1112 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1113 .addImm(Imm.trunc(32).getZExtValue()); 1114 1115 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1116 .addImm(Imm.ashr(32).getZExtValue()); 1117 1118 const MachineInstr *RS = 1119 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1120 .addReg(LoReg) 1121 .addImm(AMDGPU::sub0) 1122 .addReg(HiReg) 1123 .addImm(AMDGPU::sub1); 1124 1125 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1126 // work for target independent opcodes 1127 I.eraseFromParent(); 1128 const TargetRegisterClass *DstRC = 1129 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 1130 if (!DstRC) 1131 return true; 1132 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1133 } 1134 1135 static bool isConstant(const MachineInstr &MI) { 1136 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1137 } 1138 1139 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1140 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1141 1142 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1143 1144 assert(PtrMI); 1145 1146 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1147 return; 1148 1149 GEPInfo GEPInfo(*PtrMI); 1150 1151 for (unsigned i = 1, e = 3; i < e; ++i) { 1152 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1153 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1154 assert(OpDef); 1155 if (isConstant(*OpDef)) { 1156 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 1157 // are lacking other optimizations. 1158 assert(GEPInfo.Imm == 0); 1159 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1160 continue; 1161 } 1162 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1163 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1164 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1165 else 1166 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1167 } 1168 1169 AddrInfo.push_back(GEPInfo); 1170 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1171 } 1172 1173 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1174 if (!MI.hasOneMemOperand()) 1175 return false; 1176 1177 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1178 const Value *Ptr = MMO->getValue(); 1179 1180 // UndefValue means this is a load of a kernel input. These are uniform. 1181 // Sometimes LDS instructions have constant pointers. 1182 // If Ptr is null, then that means this mem operand contains a 1183 // PseudoSourceValue like GOT. 1184 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1185 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1186 return true; 1187 1188 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1189 return true; 1190 1191 const Instruction *I = dyn_cast<Instruction>(Ptr); 1192 return I && I->getMetadata("amdgpu.uniform"); 1193 } 1194 1195 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1196 for (const GEPInfo &GEPInfo : AddrInfo) { 1197 if (!GEPInfo.VgprParts.empty()) 1198 return true; 1199 } 1200 return false; 1201 } 1202 1203 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { 1204 MachineBasicBlock *BB = I.getParent(); 1205 MachineFunction *MF = BB->getParent(); 1206 MachineRegisterInfo &MRI = MF->getRegInfo(); 1207 1208 const LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 1209 unsigned AS = PtrTy.getAddressSpace(); 1210 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && 1211 STI.ldsRequiresM0Init()) { 1212 // If DS instructions require M0 initializtion, insert it before selecting. 1213 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 1214 .addImm(-1); 1215 } 1216 } 1217 1218 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I, 1219 CodeGenCoverage &CoverageInfo) const { 1220 initM0(I); 1221 return selectImpl(I, CoverageInfo); 1222 } 1223 1224 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1225 MachineBasicBlock *BB = I.getParent(); 1226 MachineFunction *MF = BB->getParent(); 1227 MachineRegisterInfo &MRI = MF->getRegInfo(); 1228 MachineOperand &CondOp = I.getOperand(0); 1229 Register CondReg = CondOp.getReg(); 1230 const DebugLoc &DL = I.getDebugLoc(); 1231 1232 unsigned BrOpcode; 1233 Register CondPhysReg; 1234 const TargetRegisterClass *ConstrainRC; 1235 1236 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1237 // whether the branch is uniform when selecting the instruction. In 1238 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1239 // RegBankSelect knows what it's doing if the branch condition is scc, even 1240 // though it currently does not. 1241 if (isSCC(CondReg, MRI)) { 1242 CondPhysReg = AMDGPU::SCC; 1243 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1244 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1245 } else if (isVCC(CondReg, MRI)) { 1246 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1247 // We sort of know that a VCC producer based on the register bank, that ands 1248 // inactive lanes with 0. What if there was a logical operation with vcc 1249 // producers in different blocks/with different exec masks? 1250 // FIXME: Should scc->vcc copies and with exec? 1251 CondPhysReg = TRI.getVCC(); 1252 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1253 ConstrainRC = TRI.getBoolRC(); 1254 } else 1255 return false; 1256 1257 if (!MRI.getRegClassOrNull(CondReg)) 1258 MRI.setRegClass(CondReg, ConstrainRC); 1259 1260 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1261 .addReg(CondReg); 1262 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1263 .addMBB(I.getOperand(1).getMBB()); 1264 1265 I.eraseFromParent(); 1266 return true; 1267 } 1268 1269 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1270 MachineBasicBlock *BB = I.getParent(); 1271 MachineFunction *MF = BB->getParent(); 1272 MachineRegisterInfo &MRI = MF->getRegInfo(); 1273 1274 Register DstReg = I.getOperand(0).getReg(); 1275 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1276 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1277 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1278 if (IsVGPR) 1279 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1280 1281 return RBI.constrainGenericRegister( 1282 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1283 } 1284 1285 bool AMDGPUInstructionSelector::select(MachineInstr &I, 1286 CodeGenCoverage &CoverageInfo) const { 1287 if (I.isPHI()) 1288 return selectPHI(I); 1289 1290 if (!isPreISelGenericOpcode(I.getOpcode())) { 1291 if (I.isCopy()) 1292 return selectCOPY(I); 1293 return true; 1294 } 1295 1296 switch (I.getOpcode()) { 1297 case TargetOpcode::G_AND: 1298 case TargetOpcode::G_OR: 1299 case TargetOpcode::G_XOR: 1300 if (selectG_AND_OR_XOR(I)) 1301 return true; 1302 return selectImpl(I, CoverageInfo); 1303 case TargetOpcode::G_ADD: 1304 case TargetOpcode::G_SUB: 1305 if (selectG_ADD_SUB(I)) 1306 return true; 1307 LLVM_FALLTHROUGH; 1308 default: 1309 return selectImpl(I, CoverageInfo); 1310 case TargetOpcode::G_INTTOPTR: 1311 case TargetOpcode::G_BITCAST: 1312 return selectCOPY(I); 1313 case TargetOpcode::G_CONSTANT: 1314 case TargetOpcode::G_FCONSTANT: 1315 return selectG_CONSTANT(I); 1316 case TargetOpcode::G_EXTRACT: 1317 return selectG_EXTRACT(I); 1318 case TargetOpcode::G_MERGE_VALUES: 1319 case TargetOpcode::G_BUILD_VECTOR: 1320 case TargetOpcode::G_CONCAT_VECTORS: 1321 return selectG_MERGE_VALUES(I); 1322 case TargetOpcode::G_UNMERGE_VALUES: 1323 return selectG_UNMERGE_VALUES(I); 1324 case TargetOpcode::G_GEP: 1325 return selectG_GEP(I); 1326 case TargetOpcode::G_IMPLICIT_DEF: 1327 return selectG_IMPLICIT_DEF(I); 1328 case TargetOpcode::G_INSERT: 1329 return selectG_INSERT(I); 1330 case TargetOpcode::G_INTRINSIC: 1331 return selectG_INTRINSIC(I, CoverageInfo); 1332 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1333 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 1334 case TargetOpcode::G_ICMP: 1335 if (selectG_ICMP(I)) 1336 return true; 1337 return selectImpl(I, CoverageInfo); 1338 case TargetOpcode::G_LOAD: 1339 case TargetOpcode::G_ATOMIC_CMPXCHG: 1340 case TargetOpcode::G_ATOMICRMW_XCHG: 1341 case TargetOpcode::G_ATOMICRMW_ADD: 1342 case TargetOpcode::G_ATOMICRMW_SUB: 1343 case TargetOpcode::G_ATOMICRMW_AND: 1344 case TargetOpcode::G_ATOMICRMW_OR: 1345 case TargetOpcode::G_ATOMICRMW_XOR: 1346 case TargetOpcode::G_ATOMICRMW_MIN: 1347 case TargetOpcode::G_ATOMICRMW_MAX: 1348 case TargetOpcode::G_ATOMICRMW_UMIN: 1349 case TargetOpcode::G_ATOMICRMW_UMAX: 1350 case TargetOpcode::G_ATOMICRMW_FADD: 1351 return selectG_LOAD_ATOMICRMW(I, CoverageInfo); 1352 case TargetOpcode::G_SELECT: 1353 return selectG_SELECT(I); 1354 case TargetOpcode::G_STORE: 1355 return selectG_STORE(I, CoverageInfo); 1356 case TargetOpcode::G_TRUNC: 1357 return selectG_TRUNC(I); 1358 case TargetOpcode::G_SEXT: 1359 case TargetOpcode::G_ZEXT: 1360 case TargetOpcode::G_ANYEXT: 1361 return selectG_SZA_EXT(I); 1362 case TargetOpcode::G_BRCOND: 1363 return selectG_BRCOND(I); 1364 case TargetOpcode::G_FRAME_INDEX: 1365 return selectG_FRAME_INDEX(I); 1366 case TargetOpcode::G_FENCE: 1367 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1368 // is checking for G_CONSTANT 1369 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1370 return true; 1371 } 1372 return false; 1373 } 1374 1375 InstructionSelector::ComplexRendererFns 1376 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1377 return {{ 1378 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1379 }}; 1380 1381 } 1382 1383 std::pair<Register, unsigned> 1384 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1385 Register Src, const MachineRegisterInfo &MRI) const { 1386 unsigned Mods = 0; 1387 MachineInstr *MI = MRI.getVRegDef(Src); 1388 1389 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1390 Src = MI->getOperand(1).getReg(); 1391 Mods |= SISrcMods::NEG; 1392 MI = MRI.getVRegDef(Src); 1393 } 1394 1395 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1396 Src = MI->getOperand(1).getReg(); 1397 Mods |= SISrcMods::ABS; 1398 } 1399 1400 return std::make_pair(Src, Mods); 1401 } 1402 1403 /// 1404 /// This will select either an SGPR or VGPR operand and will save us from 1405 /// having to write an extra tablegen pattern. 1406 InstructionSelector::ComplexRendererFns 1407 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1408 return {{ 1409 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1410 }}; 1411 } 1412 1413 InstructionSelector::ComplexRendererFns 1414 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1415 MachineRegisterInfo &MRI 1416 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1417 1418 Register Src; 1419 unsigned Mods; 1420 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1421 1422 return {{ 1423 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1424 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1425 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1426 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1427 }}; 1428 } 1429 InstructionSelector::ComplexRendererFns 1430 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1431 return {{ 1432 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1433 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1434 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1435 }}; 1436 } 1437 1438 InstructionSelector::ComplexRendererFns 1439 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1440 MachineRegisterInfo &MRI 1441 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1442 1443 Register Src; 1444 unsigned Mods; 1445 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1446 1447 return {{ 1448 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1449 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1450 }}; 1451 } 1452 1453 InstructionSelector::ComplexRendererFns 1454 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1455 MachineRegisterInfo &MRI = 1456 Root.getParent()->getParent()->getParent()->getRegInfo(); 1457 1458 SmallVector<GEPInfo, 4> AddrInfo; 1459 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1460 1461 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1462 return None; 1463 1464 const GEPInfo &GEPInfo = AddrInfo[0]; 1465 1466 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1467 return None; 1468 1469 unsigned PtrReg = GEPInfo.SgprParts[0]; 1470 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1471 return {{ 1472 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1473 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1474 }}; 1475 } 1476 1477 InstructionSelector::ComplexRendererFns 1478 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1479 MachineRegisterInfo &MRI = 1480 Root.getParent()->getParent()->getParent()->getRegInfo(); 1481 1482 SmallVector<GEPInfo, 4> AddrInfo; 1483 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1484 1485 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1486 return None; 1487 1488 const GEPInfo &GEPInfo = AddrInfo[0]; 1489 unsigned PtrReg = GEPInfo.SgprParts[0]; 1490 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1491 if (!isUInt<32>(EncodedImm)) 1492 return None; 1493 1494 return {{ 1495 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1496 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1497 }}; 1498 } 1499 1500 InstructionSelector::ComplexRendererFns 1501 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1502 MachineInstr *MI = Root.getParent(); 1503 MachineBasicBlock *MBB = MI->getParent(); 1504 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1505 1506 SmallVector<GEPInfo, 4> AddrInfo; 1507 getAddrModeInfo(*MI, MRI, AddrInfo); 1508 1509 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1510 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1511 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1512 return None; 1513 1514 const GEPInfo &GEPInfo = AddrInfo[0]; 1515 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1516 return None; 1517 1518 // If we make it this far we have a load with an 32-bit immediate offset. 1519 // It is OK to select this using a sgpr offset, because we have already 1520 // failed trying to select this load into one of the _IMM variants since 1521 // the _IMM Patterns are considered before the _SGPR patterns. 1522 unsigned PtrReg = GEPInfo.SgprParts[0]; 1523 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1524 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1525 .addImm(GEPInfo.Imm); 1526 return {{ 1527 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1528 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1529 }}; 1530 } 1531 1532 template <bool Signed> 1533 InstructionSelector::ComplexRendererFns 1534 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1535 MachineInstr *MI = Root.getParent(); 1536 MachineBasicBlock *MBB = MI->getParent(); 1537 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1538 1539 InstructionSelector::ComplexRendererFns Default = {{ 1540 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1541 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1542 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1543 }}; 1544 1545 if (!STI.hasFlatInstOffsets()) 1546 return Default; 1547 1548 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); 1549 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1550 return Default; 1551 1552 Optional<int64_t> Offset = 1553 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); 1554 if (!Offset.hasValue()) 1555 return Default; 1556 1557 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1558 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1559 return Default; 1560 1561 Register BasePtr = OpDef->getOperand(1).getReg(); 1562 1563 return {{ 1564 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1565 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1566 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1567 }}; 1568 } 1569 1570 InstructionSelector::ComplexRendererFns 1571 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1572 return selectFlatOffsetImpl<false>(Root); 1573 } 1574 1575 InstructionSelector::ComplexRendererFns 1576 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1577 return selectFlatOffsetImpl<true>(Root); 1578 } 1579 1580 // FIXME: Implement 1581 static bool signBitIsZero(const MachineOperand &Op, 1582 const MachineRegisterInfo &MRI) { 1583 return false; 1584 } 1585 1586 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1587 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1588 return PSV && PSV->isStack(); 1589 } 1590 1591 InstructionSelector::ComplexRendererFns 1592 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1593 MachineInstr *MI = Root.getParent(); 1594 MachineBasicBlock *MBB = MI->getParent(); 1595 MachineFunction *MF = MBB->getParent(); 1596 MachineRegisterInfo &MRI = MF->getRegInfo(); 1597 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1598 1599 int64_t Offset = 0; 1600 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { 1601 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1602 1603 // TODO: Should this be inside the render function? The iterator seems to 1604 // move. 1605 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1606 HighBits) 1607 .addImm(Offset & ~4095); 1608 1609 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1610 MIB.addReg(Info->getScratchRSrcReg()); 1611 }, 1612 [=](MachineInstrBuilder &MIB) { // vaddr 1613 MIB.addReg(HighBits); 1614 }, 1615 [=](MachineInstrBuilder &MIB) { // soffset 1616 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1617 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1618 1619 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1620 ? Info->getStackPtrOffsetReg() 1621 : Info->getScratchWaveOffsetReg(); 1622 MIB.addReg(SOffsetReg); 1623 }, 1624 [=](MachineInstrBuilder &MIB) { // offset 1625 MIB.addImm(Offset & 4095); 1626 }}}; 1627 } 1628 1629 assert(Offset == 0); 1630 1631 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1632 // offsets. 1633 Optional<int> FI; 1634 Register VAddr = Root.getReg(); 1635 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { 1636 if (isBaseWithConstantOffset(Root, MRI)) { 1637 const MachineOperand &LHS = RootDef->getOperand(1); 1638 const MachineOperand &RHS = RootDef->getOperand(2); 1639 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1640 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1641 if (LHSDef && RHSDef) { 1642 int64_t PossibleOffset = 1643 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1644 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1645 (!STI.privateMemoryResourceIsRangeChecked() || 1646 signBitIsZero(LHS, MRI))) { 1647 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1648 FI = LHSDef->getOperand(1).getIndex(); 1649 else 1650 VAddr = LHS.getReg(); 1651 Offset = PossibleOffset; 1652 } 1653 } 1654 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1655 FI = RootDef->getOperand(1).getIndex(); 1656 } 1657 } 1658 1659 // If we don't know this private access is a local stack object, it needs to 1660 // be relative to the entry point's scratch wave offset register. 1661 // TODO: Should split large offsets that don't fit like above. 1662 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1663 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1664 : Info->getScratchWaveOffsetReg(); 1665 1666 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1667 MIB.addReg(Info->getScratchRSrcReg()); 1668 }, 1669 [=](MachineInstrBuilder &MIB) { // vaddr 1670 if (FI.hasValue()) 1671 MIB.addFrameIndex(FI.getValue()); 1672 else 1673 MIB.addReg(VAddr); 1674 }, 1675 [=](MachineInstrBuilder &MIB) { // soffset 1676 MIB.addReg(SOffset); 1677 }, 1678 [=](MachineInstrBuilder &MIB) { // offset 1679 MIB.addImm(Offset); 1680 }}}; 1681 } 1682 1683 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, 1684 const MachineOperand &Base, 1685 int64_t Offset, 1686 unsigned OffsetBits) const { 1687 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 1688 (OffsetBits == 8 && !isUInt<8>(Offset))) 1689 return false; 1690 1691 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 1692 return true; 1693 1694 // On Southern Islands instruction with a negative base value and an offset 1695 // don't seem to work. 1696 return signBitIsZero(Base, MRI); 1697 } 1698 1699 InstructionSelector::ComplexRendererFns 1700 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1701 MachineOperand &Root) const { 1702 MachineInstr *MI = Root.getParent(); 1703 MachineBasicBlock *MBB = MI->getParent(); 1704 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1705 1706 int64_t Offset = 0; 1707 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || 1708 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1709 return {}; 1710 1711 const MachineFunction *MF = MBB->getParent(); 1712 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1713 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1714 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1715 1716 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1717 ? Info->getStackPtrOffsetReg() 1718 : Info->getScratchWaveOffsetReg(); 1719 return {{ 1720 [=](MachineInstrBuilder &MIB) { 1721 MIB.addReg(Info->getScratchRSrcReg()); 1722 }, // rsrc 1723 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 1724 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 1725 }}; 1726 } 1727 1728 InstructionSelector::ComplexRendererFns 1729 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { 1730 MachineInstr *MI = Root.getParent(); 1731 MachineBasicBlock *MBB = MI->getParent(); 1732 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1733 1734 const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 1735 if (!RootDef) { 1736 return {{ 1737 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1738 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1739 }}; 1740 } 1741 1742 int64_t ConstAddr = 0; 1743 if (isBaseWithConstantOffset(Root, MRI)) { 1744 const MachineOperand &LHS = RootDef->getOperand(1); 1745 const MachineOperand &RHS = RootDef->getOperand(2); 1746 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1747 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1748 if (LHSDef && RHSDef) { 1749 int64_t PossibleOffset = 1750 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1751 if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) { 1752 // (add n0, c0) 1753 return {{ 1754 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 1755 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } 1756 }}; 1757 } 1758 } 1759 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 1760 1761 1762 1763 } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) { 1764 1765 1766 } 1767 1768 return {{ 1769 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1770 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1771 }}; 1772 } 1773