1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 35 #define DEBUG_TYPE "amdgpu-isel" 36 37 using namespace llvm; 38 using namespace MIPatternMatch; 39 40 #define GET_GLOBALISEL_IMPL 41 #define AMDGPUSubtarget GCNSubtarget 42 #include "AMDGPUGenGlobalISel.inc" 43 #undef GET_GLOBALISEL_IMPL 44 #undef AMDGPUSubtarget 45 46 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 47 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 48 const AMDGPUTargetMachine &TM) 49 : InstructionSelector(), TII(*STI.getInstrInfo()), 50 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 51 STI(STI), 52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 53 #define GET_GLOBALISEL_PREDICATES_INIT 54 #include "AMDGPUGenGlobalISel.inc" 55 #undef GET_GLOBALISEL_PREDICATES_INIT 56 #define GET_GLOBALISEL_TEMPORARIES_INIT 57 #include "AMDGPUGenGlobalISel.inc" 58 #undef GET_GLOBALISEL_TEMPORARIES_INIT 59 { 60 } 61 62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 63 64 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 65 if (Register::isPhysicalRegister(Reg)) 66 return Reg == AMDGPU::SCC; 67 68 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 69 const TargetRegisterClass *RC = 70 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 71 if (RC) { 72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the 73 // context of the register bank has been lost. 74 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 75 return false; 76 const LLT Ty = MRI.getType(Reg); 77 return Ty.isValid() && Ty.getSizeInBits() == 1; 78 } 79 80 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 81 return RB->getID() == AMDGPU::SCCRegBankID; 82 } 83 84 bool AMDGPUInstructionSelector::isVCC(Register Reg, 85 const MachineRegisterInfo &MRI) const { 86 if (Register::isPhysicalRegister(Reg)) 87 return Reg == TRI.getVCC(); 88 89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 90 const TargetRegisterClass *RC = 91 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 92 if (RC) { 93 const LLT Ty = MRI.getType(Reg); 94 return RC->hasSuperClassEq(TRI.getBoolRC()) && 95 Ty.isValid() && Ty.getSizeInBits() == 1; 96 } 97 98 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 99 return RB->getID() == AMDGPU::VCCRegBankID; 100 } 101 102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 103 const DebugLoc &DL = I.getDebugLoc(); 104 MachineBasicBlock *BB = I.getParent(); 105 MachineFunction *MF = BB->getParent(); 106 MachineRegisterInfo &MRI = MF->getRegInfo(); 107 I.setDesc(TII.get(TargetOpcode::COPY)); 108 109 const MachineOperand &Src = I.getOperand(1); 110 MachineOperand &Dst = I.getOperand(0); 111 Register DstReg = Dst.getReg(); 112 Register SrcReg = Src.getReg(); 113 114 if (isVCC(DstReg, MRI)) { 115 if (SrcReg == AMDGPU::SCC) { 116 const TargetRegisterClass *RC 117 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 118 if (!RC) 119 return true; 120 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 121 } 122 123 if (!isVCC(SrcReg, MRI)) { 124 // TODO: Should probably leave the copy and let copyPhysReg expand it. 125 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) 126 return false; 127 128 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 129 .addImm(0) 130 .addReg(SrcReg); 131 132 if (!MRI.getRegClassOrNull(SrcReg)) 133 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 134 I.eraseFromParent(); 135 return true; 136 } 137 138 const TargetRegisterClass *RC = 139 TRI.getConstrainedRegClassForOperand(Dst, MRI); 140 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) 141 return false; 142 143 // Don't constrain the source register to a class so the def instruction 144 // handles it (unless it's undef). 145 // 146 // FIXME: This is a hack. When selecting the def, we neeed to know 147 // specifically know that the result is VCCRegBank, and not just an SGPR 148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32. 149 if (Src.isUndef()) { 150 const TargetRegisterClass *SrcRC = 151 TRI.getConstrainedRegClassForOperand(Src, MRI); 152 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 153 return false; 154 } 155 156 return true; 157 } 158 159 for (const MachineOperand &MO : I.operands()) { 160 if (Register::isPhysicalRegister(MO.getReg())) 161 continue; 162 163 const TargetRegisterClass *RC = 164 TRI.getConstrainedRegClassForOperand(MO, MRI); 165 if (!RC) 166 continue; 167 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 168 } 169 return true; 170 } 171 172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 173 MachineBasicBlock *BB = I.getParent(); 174 MachineFunction *MF = BB->getParent(); 175 MachineRegisterInfo &MRI = MF->getRegInfo(); 176 177 const Register DefReg = I.getOperand(0).getReg(); 178 const LLT DefTy = MRI.getType(DefReg); 179 180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 181 182 const RegClassOrRegBank &RegClassOrBank = 183 MRI.getRegClassOrRegBank(DefReg); 184 185 const TargetRegisterClass *DefRC 186 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 187 if (!DefRC) { 188 if (!DefTy.isValid()) { 189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 190 return false; 191 } 192 193 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 194 if (RB.getID() == AMDGPU::SCCRegBankID) { 195 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 196 return false; 197 } 198 199 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 200 if (!DefRC) { 201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 202 return false; 203 } 204 } 205 206 I.setDesc(TII.get(TargetOpcode::PHI)); 207 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 208 } 209 210 MachineOperand 211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 212 const TargetRegisterClass &SubRC, 213 unsigned SubIdx) const { 214 215 MachineInstr *MI = MO.getParent(); 216 MachineBasicBlock *BB = MO.getParent()->getParent(); 217 MachineFunction *MF = BB->getParent(); 218 MachineRegisterInfo &MRI = MF->getRegInfo(); 219 Register DstReg = MRI.createVirtualRegister(&SubRC); 220 221 if (MO.isReg()) { 222 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 223 Register Reg = MO.getReg(); 224 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 225 .addReg(Reg, 0, ComposedSubIdx); 226 227 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 228 MO.isKill(), MO.isDead(), MO.isUndef(), 229 MO.isEarlyClobber(), 0, MO.isDebug(), 230 MO.isInternalRead()); 231 } 232 233 assert(MO.isImm()); 234 235 APInt Imm(64, MO.getImm()); 236 237 switch (SubIdx) { 238 default: 239 llvm_unreachable("do not know to split immediate with this sub index."); 240 case AMDGPU::sub0: 241 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 242 case AMDGPU::sub1: 243 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 244 } 245 } 246 247 static int64_t getConstant(const MachineInstr *MI) { 248 return MI->getOperand(1).getCImm()->getSExtValue(); 249 } 250 251 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 252 switch (Opc) { 253 case AMDGPU::G_AND: 254 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 255 case AMDGPU::G_OR: 256 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 257 case AMDGPU::G_XOR: 258 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 259 default: 260 llvm_unreachable("not a bit op"); 261 } 262 } 263 264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 265 MachineBasicBlock *BB = I.getParent(); 266 MachineFunction *MF = BB->getParent(); 267 MachineRegisterInfo &MRI = MF->getRegInfo(); 268 MachineOperand &Dst = I.getOperand(0); 269 MachineOperand &Src0 = I.getOperand(1); 270 MachineOperand &Src1 = I.getOperand(2); 271 Register DstReg = Dst.getReg(); 272 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 273 274 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 275 if (DstRB->getID() == AMDGPU::VCCRegBankID) { 276 const TargetRegisterClass *RC = TRI.getBoolRC(); 277 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), 278 RC == &AMDGPU::SReg_64RegClass); 279 I.setDesc(TII.get(InstOpc)); 280 281 // FIXME: Hack to avoid turning the register bank into a register class. 282 // The selector for G_ICMP relies on seeing the register bank for the result 283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will 284 // be ambiguous whether it's a scalar or vector bool. 285 if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) 286 MRI.setRegClass(Src0.getReg(), RC); 287 if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) 288 MRI.setRegClass(Src1.getReg(), RC); 289 290 return RBI.constrainGenericRegister(DstReg, *RC, MRI); 291 } 292 293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for 294 // the result? 295 if (DstRB->getID() == AMDGPU::SGPRRegBankID) { 296 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); 297 I.setDesc(TII.get(InstOpc)); 298 299 const TargetRegisterClass *RC 300 = TRI.getConstrainedRegClassForOperand(Dst, MRI); 301 if (!RC) 302 return false; 303 return RBI.constrainGenericRegister(DstReg, *RC, MRI) && 304 RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && 305 RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); 306 } 307 308 return false; 309 } 310 311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 312 MachineBasicBlock *BB = I.getParent(); 313 MachineFunction *MF = BB->getParent(); 314 MachineRegisterInfo &MRI = MF->getRegInfo(); 315 Register DstReg = I.getOperand(0).getReg(); 316 const DebugLoc &DL = I.getDebugLoc(); 317 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 318 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 319 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 320 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 321 322 if (Size == 32) { 323 if (IsSALU) { 324 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 325 MachineInstr *Add = 326 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 327 .add(I.getOperand(1)) 328 .add(I.getOperand(2)); 329 I.eraseFromParent(); 330 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 331 } 332 333 if (STI.hasAddNoCarry()) { 334 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 335 I.setDesc(TII.get(Opc)); 336 I.addOperand(*MF, MachineOperand::CreateImm(0)); 337 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 338 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 339 } 340 341 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; 342 343 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 344 MachineInstr *Add 345 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 346 .addDef(UnusedCarry, RegState::Dead) 347 .add(I.getOperand(1)) 348 .add(I.getOperand(2)) 349 .addImm(0); 350 I.eraseFromParent(); 351 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 352 } 353 354 assert(!Sub && "illegal sub should not reach here"); 355 356 const TargetRegisterClass &RC 357 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 358 const TargetRegisterClass &HalfRC 359 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 360 361 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 362 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 363 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 364 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 365 366 Register DstLo = MRI.createVirtualRegister(&HalfRC); 367 Register DstHi = MRI.createVirtualRegister(&HalfRC); 368 369 if (IsSALU) { 370 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 371 .add(Lo1) 372 .add(Lo2); 373 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 374 .add(Hi1) 375 .add(Hi2); 376 } else { 377 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 378 Register CarryReg = MRI.createVirtualRegister(CarryRC); 379 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 380 .addDef(CarryReg) 381 .add(Lo1) 382 .add(Lo2) 383 .addImm(0); 384 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 385 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 386 .add(Hi1) 387 .add(Hi2) 388 .addReg(CarryReg, RegState::Kill) 389 .addImm(0); 390 391 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 392 return false; 393 } 394 395 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 396 .addReg(DstLo) 397 .addImm(AMDGPU::sub0) 398 .addReg(DstHi) 399 .addImm(AMDGPU::sub1); 400 401 402 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 403 return false; 404 405 I.eraseFromParent(); 406 return true; 407 } 408 409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 410 MachineBasicBlock *BB = I.getParent(); 411 MachineFunction *MF = BB->getParent(); 412 MachineRegisterInfo &MRI = MF->getRegInfo(); 413 assert(I.getOperand(2).getImm() % 32 == 0); 414 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 415 const DebugLoc &DL = I.getDebugLoc(); 416 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 417 I.getOperand(0).getReg()) 418 .addReg(I.getOperand(1).getReg(), 0, SubReg); 419 420 for (const MachineOperand &MO : Copy->operands()) { 421 const TargetRegisterClass *RC = 422 TRI.getConstrainedRegClassForOperand(MO, MRI); 423 if (!RC) 424 continue; 425 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 426 } 427 I.eraseFromParent(); 428 return true; 429 } 430 431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 432 MachineBasicBlock *BB = MI.getParent(); 433 MachineFunction *MF = BB->getParent(); 434 MachineRegisterInfo &MRI = MF->getRegInfo(); 435 Register DstReg = MI.getOperand(0).getReg(); 436 LLT DstTy = MRI.getType(DstReg); 437 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 438 439 const unsigned SrcSize = SrcTy.getSizeInBits(); 440 if (SrcSize < 32) 441 return false; 442 443 const DebugLoc &DL = MI.getDebugLoc(); 444 const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); 445 const unsigned DstSize = DstTy.getSizeInBits(); 446 const TargetRegisterClass *DstRC = 447 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); 448 if (!DstRC) 449 return false; 450 451 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 452 MachineInstrBuilder MIB = 453 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 454 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 455 MachineOperand &Src = MI.getOperand(I + 1); 456 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 457 MIB.addImm(SubRegs[I]); 458 459 const TargetRegisterClass *SrcRC 460 = TRI.getConstrainedRegClassForOperand(Src, MRI); 461 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) 462 return false; 463 } 464 465 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) 466 return false; 467 468 MI.eraseFromParent(); 469 return true; 470 } 471 472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 473 MachineBasicBlock *BB = MI.getParent(); 474 MachineFunction *MF = BB->getParent(); 475 MachineRegisterInfo &MRI = MF->getRegInfo(); 476 const int NumDst = MI.getNumOperands() - 1; 477 478 MachineOperand &Src = MI.getOperand(NumDst); 479 480 Register SrcReg = Src.getReg(); 481 Register DstReg0 = MI.getOperand(0).getReg(); 482 LLT DstTy = MRI.getType(DstReg0); 483 LLT SrcTy = MRI.getType(SrcReg); 484 485 const unsigned DstSize = DstTy.getSizeInBits(); 486 const unsigned SrcSize = SrcTy.getSizeInBits(); 487 const DebugLoc &DL = MI.getDebugLoc(); 488 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 489 490 const TargetRegisterClass *SrcRC = 491 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); 492 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) 493 return false; 494 495 const unsigned SrcFlags = getUndefRegState(Src.isUndef()); 496 497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR 498 // source, and this relies on the fact that the same subregister indices are 499 // used for both. 500 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 501 for (int I = 0, E = NumDst; I != E; ++I) { 502 MachineOperand &Dst = MI.getOperand(I); 503 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 504 .addReg(SrcReg, SrcFlags, SubRegs[I]); 505 506 const TargetRegisterClass *DstRC = 507 TRI.getConstrainedRegClassForOperand(Dst, MRI); 508 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) 509 return false; 510 } 511 512 MI.eraseFromParent(); 513 return true; 514 } 515 516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 517 return selectG_ADD_SUB(I); 518 } 519 520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 521 MachineBasicBlock *BB = I.getParent(); 522 MachineFunction *MF = BB->getParent(); 523 MachineRegisterInfo &MRI = MF->getRegInfo(); 524 const MachineOperand &MO = I.getOperand(0); 525 526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 527 // regbank check here is to know why getConstrainedRegClassForOperand failed. 528 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 529 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 530 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 531 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 532 return true; 533 } 534 535 return false; 536 } 537 538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 539 MachineBasicBlock *BB = I.getParent(); 540 MachineFunction *MF = BB->getParent(); 541 MachineRegisterInfo &MRI = MF->getRegInfo(); 542 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 543 DebugLoc DL = I.getDebugLoc(); 544 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 545 .addDef(I.getOperand(0).getReg()) 546 .addReg(I.getOperand(1).getReg()) 547 .addReg(I.getOperand(2).getReg()) 548 .addImm(SubReg); 549 550 for (const MachineOperand &MO : Ins->operands()) { 551 if (!MO.isReg()) 552 continue; 553 if (Register::isPhysicalRegister(MO.getReg())) 554 continue; 555 556 const TargetRegisterClass *RC = 557 TRI.getConstrainedRegClassForOperand(MO, MRI); 558 if (!RC) 559 continue; 560 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 561 } 562 I.eraseFromParent(); 563 return true; 564 } 565 566 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { 567 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 568 switch (IntrinsicID) { 569 case Intrinsic::amdgcn_if_break: { 570 MachineBasicBlock *BB = I.getParent(); 571 MachineFunction *MF = BB->getParent(); 572 MachineRegisterInfo &MRI = MF->getRegInfo(); 573 574 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 575 // SelectionDAG uses for wave32 vs wave64. 576 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 577 .add(I.getOperand(0)) 578 .add(I.getOperand(2)) 579 .add(I.getOperand(3)); 580 581 Register DstReg = I.getOperand(0).getReg(); 582 Register Src0Reg = I.getOperand(2).getReg(); 583 Register Src1Reg = I.getOperand(3).getReg(); 584 585 I.eraseFromParent(); 586 587 for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { 588 if (!MRI.getRegClassOrNull(Reg)) 589 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 590 } 591 592 return true; 593 } 594 default: 595 return selectImpl(I, *CoverageInfo); 596 } 597 } 598 599 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 600 if (Size != 32 && Size != 64) 601 return -1; 602 switch (P) { 603 default: 604 llvm_unreachable("Unknown condition code!"); 605 case CmpInst::ICMP_NE: 606 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 607 case CmpInst::ICMP_EQ: 608 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 609 case CmpInst::ICMP_SGT: 610 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 611 case CmpInst::ICMP_SGE: 612 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 613 case CmpInst::ICMP_SLT: 614 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 615 case CmpInst::ICMP_SLE: 616 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 617 case CmpInst::ICMP_UGT: 618 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 619 case CmpInst::ICMP_UGE: 620 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 621 case CmpInst::ICMP_ULT: 622 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 623 case CmpInst::ICMP_ULE: 624 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 625 } 626 } 627 628 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 629 unsigned Size) const { 630 if (Size == 64) { 631 if (!STI.hasScalarCompareEq64()) 632 return -1; 633 634 switch (P) { 635 case CmpInst::ICMP_NE: 636 return AMDGPU::S_CMP_LG_U64; 637 case CmpInst::ICMP_EQ: 638 return AMDGPU::S_CMP_EQ_U64; 639 default: 640 return -1; 641 } 642 } 643 644 if (Size != 32) 645 return -1; 646 647 switch (P) { 648 case CmpInst::ICMP_NE: 649 return AMDGPU::S_CMP_LG_U32; 650 case CmpInst::ICMP_EQ: 651 return AMDGPU::S_CMP_EQ_U32; 652 case CmpInst::ICMP_SGT: 653 return AMDGPU::S_CMP_GT_I32; 654 case CmpInst::ICMP_SGE: 655 return AMDGPU::S_CMP_GE_I32; 656 case CmpInst::ICMP_SLT: 657 return AMDGPU::S_CMP_LT_I32; 658 case CmpInst::ICMP_SLE: 659 return AMDGPU::S_CMP_LE_I32; 660 case CmpInst::ICMP_UGT: 661 return AMDGPU::S_CMP_GT_U32; 662 case CmpInst::ICMP_UGE: 663 return AMDGPU::S_CMP_GE_U32; 664 case CmpInst::ICMP_ULT: 665 return AMDGPU::S_CMP_LT_U32; 666 case CmpInst::ICMP_ULE: 667 return AMDGPU::S_CMP_LE_U32; 668 default: 669 llvm_unreachable("Unknown condition code!"); 670 } 671 } 672 673 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 674 MachineBasicBlock *BB = I.getParent(); 675 MachineFunction *MF = BB->getParent(); 676 MachineRegisterInfo &MRI = MF->getRegInfo(); 677 const DebugLoc &DL = I.getDebugLoc(); 678 679 Register SrcReg = I.getOperand(2).getReg(); 680 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 681 682 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 683 684 Register CCReg = I.getOperand(0).getReg(); 685 if (isSCC(CCReg, MRI)) { 686 int Opcode = getS_CMPOpcode(Pred, Size); 687 if (Opcode == -1) 688 return false; 689 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 690 .add(I.getOperand(2)) 691 .add(I.getOperand(3)); 692 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 693 .addReg(AMDGPU::SCC); 694 bool Ret = 695 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 696 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 697 I.eraseFromParent(); 698 return Ret; 699 } 700 701 int Opcode = getV_CMPOpcode(Pred, Size); 702 if (Opcode == -1) 703 return false; 704 705 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 706 I.getOperand(0).getReg()) 707 .add(I.getOperand(2)) 708 .add(I.getOperand(3)); 709 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 710 *TRI.getBoolRC(), MRI); 711 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 712 I.eraseFromParent(); 713 return Ret; 714 } 715 716 static MachineInstr * 717 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 718 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 719 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 720 const DebugLoc &DL = Insert->getDebugLoc(); 721 MachineBasicBlock &BB = *Insert->getParent(); 722 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 723 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 724 .addImm(Tgt) 725 .addReg(Reg0) 726 .addReg(Reg1) 727 .addReg(Reg2) 728 .addReg(Reg3) 729 .addImm(VM) 730 .addImm(Compr) 731 .addImm(Enabled); 732 } 733 734 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 735 MachineInstr &I) const { 736 MachineBasicBlock *BB = I.getParent(); 737 MachineFunction *MF = BB->getParent(); 738 MachineRegisterInfo &MRI = MF->getRegInfo(); 739 740 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 741 switch (IntrinsicID) { 742 case Intrinsic::amdgcn_exp: { 743 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 744 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 745 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 746 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 747 748 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 749 I.getOperand(4).getReg(), 750 I.getOperand(5).getReg(), 751 I.getOperand(6).getReg(), 752 VM, false, Enabled, Done); 753 754 I.eraseFromParent(); 755 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 756 } 757 case Intrinsic::amdgcn_exp_compr: { 758 const DebugLoc &DL = I.getDebugLoc(); 759 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 760 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 761 Register Reg0 = I.getOperand(3).getReg(); 762 Register Reg1 = I.getOperand(4).getReg(); 763 Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 764 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 765 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 766 767 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 768 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 769 true, Enabled, Done); 770 771 I.eraseFromParent(); 772 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 773 } 774 case Intrinsic::amdgcn_end_cf: { 775 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick 776 // SelectionDAG uses for wave32 vs wave64. 777 BuildMI(*BB, &I, I.getDebugLoc(), 778 TII.get(AMDGPU::SI_END_CF)) 779 .add(I.getOperand(1)); 780 781 Register Reg = I.getOperand(1).getReg(); 782 I.eraseFromParent(); 783 784 if (!MRI.getRegClassOrNull(Reg)) 785 MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); 786 return true; 787 } 788 default: 789 return selectImpl(I, *CoverageInfo); 790 } 791 } 792 793 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 794 MachineBasicBlock *BB = I.getParent(); 795 MachineFunction *MF = BB->getParent(); 796 MachineRegisterInfo &MRI = MF->getRegInfo(); 797 const DebugLoc &DL = I.getDebugLoc(); 798 799 Register DstReg = I.getOperand(0).getReg(); 800 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 801 assert(Size <= 32 || Size == 64); 802 const MachineOperand &CCOp = I.getOperand(1); 803 Register CCReg = CCOp.getReg(); 804 if (isSCC(CCReg, MRI)) { 805 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 806 AMDGPU::S_CSELECT_B32; 807 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 808 .addReg(CCReg); 809 810 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 811 // bank, because it does not cover the register class that we used to represent 812 // for it. So we need to manually set the register class here. 813 if (!MRI.getRegClassOrNull(CCReg)) 814 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 815 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 816 .add(I.getOperand(2)) 817 .add(I.getOperand(3)); 818 819 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 820 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 821 I.eraseFromParent(); 822 return Ret; 823 } 824 825 // Wide VGPR select should have been split in RegBankSelect. 826 if (Size > 32) 827 return false; 828 829 MachineInstr *Select = 830 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 831 .addImm(0) 832 .add(I.getOperand(3)) 833 .addImm(0) 834 .add(I.getOperand(2)) 835 .add(I.getOperand(1)); 836 837 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 838 I.eraseFromParent(); 839 return Ret; 840 } 841 842 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 843 initM0(I); 844 return selectImpl(I, *CoverageInfo); 845 } 846 847 static int sizeToSubRegIndex(unsigned Size) { 848 switch (Size) { 849 case 32: 850 return AMDGPU::sub0; 851 case 64: 852 return AMDGPU::sub0_sub1; 853 case 96: 854 return AMDGPU::sub0_sub1_sub2; 855 case 128: 856 return AMDGPU::sub0_sub1_sub2_sub3; 857 case 256: 858 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 859 default: 860 if (Size < 32) 861 return AMDGPU::sub0; 862 if (Size > 256) 863 return -1; 864 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 865 } 866 } 867 868 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 869 MachineBasicBlock *BB = I.getParent(); 870 MachineFunction *MF = BB->getParent(); 871 MachineRegisterInfo &MRI = MF->getRegInfo(); 872 873 Register DstReg = I.getOperand(0).getReg(); 874 Register SrcReg = I.getOperand(1).getReg(); 875 const LLT DstTy = MRI.getType(DstReg); 876 const LLT SrcTy = MRI.getType(SrcReg); 877 if (!DstTy.isScalar()) 878 return false; 879 880 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 881 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 882 if (SrcRB != DstRB) 883 return false; 884 885 unsigned DstSize = DstTy.getSizeInBits(); 886 unsigned SrcSize = SrcTy.getSizeInBits(); 887 888 const TargetRegisterClass *SrcRC 889 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 890 const TargetRegisterClass *DstRC 891 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 892 893 if (SrcSize > 32) { 894 int SubRegIdx = sizeToSubRegIndex(DstSize); 895 if (SubRegIdx == -1) 896 return false; 897 898 // Deal with weird cases where the class only partially supports the subreg 899 // index. 900 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 901 if (!SrcRC) 902 return false; 903 904 I.getOperand(1).setSubReg(SubRegIdx); 905 } 906 907 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 908 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 909 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 910 return false; 911 } 912 913 I.setDesc(TII.get(TargetOpcode::COPY)); 914 return true; 915 } 916 917 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 918 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 919 Mask = maskTrailingOnes<unsigned>(Size); 920 int SignedMask = static_cast<int>(Mask); 921 return SignedMask >= -16 && SignedMask <= 64; 922 } 923 924 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 925 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 926 const DebugLoc &DL = I.getDebugLoc(); 927 MachineBasicBlock &MBB = *I.getParent(); 928 MachineFunction &MF = *MBB.getParent(); 929 MachineRegisterInfo &MRI = MF.getRegInfo(); 930 const Register DstReg = I.getOperand(0).getReg(); 931 const Register SrcReg = I.getOperand(1).getReg(); 932 933 const LLT DstTy = MRI.getType(DstReg); 934 const LLT SrcTy = MRI.getType(SrcReg); 935 const LLT S1 = LLT::scalar(1); 936 const unsigned SrcSize = SrcTy.getSizeInBits(); 937 const unsigned DstSize = DstTy.getSizeInBits(); 938 if (!DstTy.isScalar()) 939 return false; 940 941 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 942 943 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 944 if (SrcTy != S1 || DstSize > 64) // Invalid 945 return false; 946 947 unsigned Opcode = 948 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 949 const TargetRegisterClass *DstRC = 950 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 951 952 // FIXME: Create an extra copy to avoid incorrectly constraining the result 953 // of the scc producer. 954 Register TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 955 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 956 .addReg(SrcReg); 957 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 958 .addReg(TmpReg); 959 960 // The instruction operands are backwards from what you would expect. 961 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 962 .addImm(0) 963 .addImm(Signed ? -1 : 1); 964 I.eraseFromParent(); 965 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 966 } 967 968 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 969 if (SrcTy != S1) // Invalid 970 return false; 971 972 MachineInstr *ExtI = 973 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 974 .addImm(0) // src0_modifiers 975 .addImm(0) // src0 976 .addImm(0) // src1_modifiers 977 .addImm(Signed ? -1 : 1) // src1 978 .addUse(SrcReg); 979 I.eraseFromParent(); 980 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 981 } 982 983 if (I.getOpcode() == AMDGPU::G_ANYEXT) 984 return selectCOPY(I); 985 986 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 987 // 64-bit should have been split up in RegBankSelect 988 989 // Try to use an and with a mask if it will save code size. 990 unsigned Mask; 991 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 992 MachineInstr *ExtI = 993 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 994 .addImm(Mask) 995 .addReg(SrcReg); 996 I.eraseFromParent(); 997 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 998 } 999 1000 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 1001 MachineInstr *ExtI = 1002 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 1003 .addReg(SrcReg) 1004 .addImm(0) // Offset 1005 .addImm(SrcSize); // Width 1006 I.eraseFromParent(); 1007 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 1008 } 1009 1010 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 1011 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 1012 return false; 1013 1014 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 1015 const unsigned SextOpc = SrcSize == 8 ? 1016 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 1017 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 1018 .addReg(SrcReg); 1019 I.eraseFromParent(); 1020 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1021 } 1022 1023 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 1024 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1025 1026 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 1027 if (DstSize > 32 && SrcSize <= 32) { 1028 // We need a 64-bit register source, but the high bits don't matter. 1029 Register ExtReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1030 Register UndefReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1031 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 1032 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 1033 .addReg(SrcReg) 1034 .addImm(AMDGPU::sub0) 1035 .addReg(UndefReg) 1036 .addImm(AMDGPU::sub1); 1037 1038 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 1039 .addReg(ExtReg) 1040 .addImm(SrcSize << 16); 1041 1042 I.eraseFromParent(); 1043 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 1044 } 1045 1046 unsigned Mask; 1047 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 1048 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 1049 .addReg(SrcReg) 1050 .addImm(Mask); 1051 } else { 1052 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 1053 .addReg(SrcReg) 1054 .addImm(SrcSize << 16); 1055 } 1056 1057 I.eraseFromParent(); 1058 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 1059 } 1060 1061 return false; 1062 } 1063 1064 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 1065 MachineBasicBlock *BB = I.getParent(); 1066 MachineFunction *MF = BB->getParent(); 1067 MachineRegisterInfo &MRI = MF->getRegInfo(); 1068 MachineOperand &ImmOp = I.getOperand(1); 1069 1070 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 1071 if (ImmOp.isFPImm()) { 1072 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 1073 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 1074 } else if (ImmOp.isCImm()) { 1075 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 1076 } 1077 1078 Register DstReg = I.getOperand(0).getReg(); 1079 unsigned Size; 1080 bool IsSgpr; 1081 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 1082 if (RB) { 1083 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 1084 Size = MRI.getType(DstReg).getSizeInBits(); 1085 } else { 1086 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 1087 IsSgpr = TRI.isSGPRClass(RC); 1088 Size = TRI.getRegSizeInBits(*RC); 1089 } 1090 1091 if (Size != 32 && Size != 64) 1092 return false; 1093 1094 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1095 if (Size == 32) { 1096 I.setDesc(TII.get(Opcode)); 1097 I.addImplicitDefUseOperands(*MF); 1098 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1099 } 1100 1101 DebugLoc DL = I.getDebugLoc(); 1102 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 1103 &AMDGPU::VGPR_32RegClass; 1104 Register LoReg = MRI.createVirtualRegister(RC); 1105 Register HiReg = MRI.createVirtualRegister(RC); 1106 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 1107 1108 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 1109 .addImm(Imm.trunc(32).getZExtValue()); 1110 1111 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 1112 .addImm(Imm.ashr(32).getZExtValue()); 1113 1114 const MachineInstr *RS = 1115 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 1116 .addReg(LoReg) 1117 .addImm(AMDGPU::sub0) 1118 .addReg(HiReg) 1119 .addImm(AMDGPU::sub1); 1120 1121 // We can't call constrainSelectedInstRegOperands here, because it doesn't 1122 // work for target independent opcodes 1123 I.eraseFromParent(); 1124 const TargetRegisterClass *DstRC = 1125 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 1126 if (!DstRC) 1127 return true; 1128 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 1129 } 1130 1131 static bool isConstant(const MachineInstr &MI) { 1132 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 1133 } 1134 1135 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 1136 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 1137 1138 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 1139 1140 assert(PtrMI); 1141 1142 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 1143 return; 1144 1145 GEPInfo GEPInfo(*PtrMI); 1146 1147 for (unsigned i = 1, e = 3; i < e; ++i) { 1148 const MachineOperand &GEPOp = PtrMI->getOperand(i); 1149 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 1150 assert(OpDef); 1151 if (isConstant(*OpDef)) { 1152 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 1153 // are lacking other optimizations. 1154 assert(GEPInfo.Imm == 0); 1155 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 1156 continue; 1157 } 1158 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 1159 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 1160 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 1161 else 1162 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 1163 } 1164 1165 AddrInfo.push_back(GEPInfo); 1166 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 1167 } 1168 1169 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 1170 if (!MI.hasOneMemOperand()) 1171 return false; 1172 1173 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1174 const Value *Ptr = MMO->getValue(); 1175 1176 // UndefValue means this is a load of a kernel input. These are uniform. 1177 // Sometimes LDS instructions have constant pointers. 1178 // If Ptr is null, then that means this mem operand contains a 1179 // PseudoSourceValue like GOT. 1180 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 1181 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 1182 return true; 1183 1184 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1185 return true; 1186 1187 const Instruction *I = dyn_cast<Instruction>(Ptr); 1188 return I && I->getMetadata("amdgpu.uniform"); 1189 } 1190 1191 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1192 for (const GEPInfo &GEPInfo : AddrInfo) { 1193 if (!GEPInfo.VgprParts.empty()) 1194 return true; 1195 } 1196 return false; 1197 } 1198 1199 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { 1200 MachineBasicBlock *BB = I.getParent(); 1201 MachineFunction *MF = BB->getParent(); 1202 MachineRegisterInfo &MRI = MF->getRegInfo(); 1203 1204 const LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 1205 unsigned AS = PtrTy.getAddressSpace(); 1206 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && 1207 STI.ldsRequiresM0Init()) { 1208 // If DS instructions require M0 initializtion, insert it before selecting. 1209 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 1210 .addImm(-1); 1211 } 1212 } 1213 1214 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const { 1215 initM0(I); 1216 return selectImpl(I, *CoverageInfo); 1217 } 1218 1219 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1220 MachineBasicBlock *BB = I.getParent(); 1221 MachineFunction *MF = BB->getParent(); 1222 MachineRegisterInfo &MRI = MF->getRegInfo(); 1223 MachineOperand &CondOp = I.getOperand(0); 1224 Register CondReg = CondOp.getReg(); 1225 const DebugLoc &DL = I.getDebugLoc(); 1226 1227 unsigned BrOpcode; 1228 Register CondPhysReg; 1229 const TargetRegisterClass *ConstrainRC; 1230 1231 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1232 // whether the branch is uniform when selecting the instruction. In 1233 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1234 // RegBankSelect knows what it's doing if the branch condition is scc, even 1235 // though it currently does not. 1236 if (isSCC(CondReg, MRI)) { 1237 CondPhysReg = AMDGPU::SCC; 1238 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1239 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1240 } else if (isVCC(CondReg, MRI)) { 1241 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1242 // We sort of know that a VCC producer based on the register bank, that ands 1243 // inactive lanes with 0. What if there was a logical operation with vcc 1244 // producers in different blocks/with different exec masks? 1245 // FIXME: Should scc->vcc copies and with exec? 1246 CondPhysReg = TRI.getVCC(); 1247 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1248 ConstrainRC = TRI.getBoolRC(); 1249 } else 1250 return false; 1251 1252 if (!MRI.getRegClassOrNull(CondReg)) 1253 MRI.setRegClass(CondReg, ConstrainRC); 1254 1255 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1256 .addReg(CondReg); 1257 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1258 .addMBB(I.getOperand(1).getMBB()); 1259 1260 I.eraseFromParent(); 1261 return true; 1262 } 1263 1264 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1265 MachineBasicBlock *BB = I.getParent(); 1266 MachineFunction *MF = BB->getParent(); 1267 MachineRegisterInfo &MRI = MF->getRegInfo(); 1268 1269 Register DstReg = I.getOperand(0).getReg(); 1270 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1271 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1272 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1273 if (IsVGPR) 1274 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1275 1276 return RBI.constrainGenericRegister( 1277 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1278 } 1279 1280 bool AMDGPUInstructionSelector::select(MachineInstr &I) { 1281 if (I.isPHI()) 1282 return selectPHI(I); 1283 1284 if (!isPreISelGenericOpcode(I.getOpcode())) { 1285 if (I.isCopy()) 1286 return selectCOPY(I); 1287 return true; 1288 } 1289 1290 switch (I.getOpcode()) { 1291 case TargetOpcode::G_AND: 1292 case TargetOpcode::G_OR: 1293 case TargetOpcode::G_XOR: 1294 if (selectG_AND_OR_XOR(I)) 1295 return true; 1296 return selectImpl(I, *CoverageInfo); 1297 case TargetOpcode::G_ADD: 1298 case TargetOpcode::G_SUB: 1299 if (selectG_ADD_SUB(I)) 1300 return true; 1301 LLVM_FALLTHROUGH; 1302 default: 1303 return selectImpl(I, *CoverageInfo); 1304 case TargetOpcode::G_INTTOPTR: 1305 case TargetOpcode::G_BITCAST: 1306 return selectCOPY(I); 1307 case TargetOpcode::G_CONSTANT: 1308 case TargetOpcode::G_FCONSTANT: 1309 return selectG_CONSTANT(I); 1310 case TargetOpcode::G_EXTRACT: 1311 return selectG_EXTRACT(I); 1312 case TargetOpcode::G_MERGE_VALUES: 1313 case TargetOpcode::G_BUILD_VECTOR: 1314 case TargetOpcode::G_CONCAT_VECTORS: 1315 return selectG_MERGE_VALUES(I); 1316 case TargetOpcode::G_UNMERGE_VALUES: 1317 return selectG_UNMERGE_VALUES(I); 1318 case TargetOpcode::G_GEP: 1319 return selectG_GEP(I); 1320 case TargetOpcode::G_IMPLICIT_DEF: 1321 return selectG_IMPLICIT_DEF(I); 1322 case TargetOpcode::G_INSERT: 1323 return selectG_INSERT(I); 1324 case TargetOpcode::G_INTRINSIC: 1325 return selectG_INTRINSIC(I); 1326 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1327 return selectG_INTRINSIC_W_SIDE_EFFECTS(I); 1328 case TargetOpcode::G_ICMP: 1329 if (selectG_ICMP(I)) 1330 return true; 1331 return selectImpl(I, *CoverageInfo); 1332 case TargetOpcode::G_LOAD: 1333 case TargetOpcode::G_ATOMIC_CMPXCHG: 1334 case TargetOpcode::G_ATOMICRMW_XCHG: 1335 case TargetOpcode::G_ATOMICRMW_ADD: 1336 case TargetOpcode::G_ATOMICRMW_SUB: 1337 case TargetOpcode::G_ATOMICRMW_AND: 1338 case TargetOpcode::G_ATOMICRMW_OR: 1339 case TargetOpcode::G_ATOMICRMW_XOR: 1340 case TargetOpcode::G_ATOMICRMW_MIN: 1341 case TargetOpcode::G_ATOMICRMW_MAX: 1342 case TargetOpcode::G_ATOMICRMW_UMIN: 1343 case TargetOpcode::G_ATOMICRMW_UMAX: 1344 case TargetOpcode::G_ATOMICRMW_FADD: 1345 return selectG_LOAD_ATOMICRMW(I); 1346 case TargetOpcode::G_SELECT: 1347 return selectG_SELECT(I); 1348 case TargetOpcode::G_STORE: 1349 return selectG_STORE(I); 1350 case TargetOpcode::G_TRUNC: 1351 return selectG_TRUNC(I); 1352 case TargetOpcode::G_SEXT: 1353 case TargetOpcode::G_ZEXT: 1354 case TargetOpcode::G_ANYEXT: 1355 return selectG_SZA_EXT(I); 1356 case TargetOpcode::G_BRCOND: 1357 return selectG_BRCOND(I); 1358 case TargetOpcode::G_FRAME_INDEX: 1359 return selectG_FRAME_INDEX(I); 1360 case TargetOpcode::G_FENCE: 1361 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1362 // is checking for G_CONSTANT 1363 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1364 return true; 1365 } 1366 return false; 1367 } 1368 1369 InstructionSelector::ComplexRendererFns 1370 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1371 return {{ 1372 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1373 }}; 1374 1375 } 1376 1377 std::pair<Register, unsigned> 1378 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1379 Register Src, const MachineRegisterInfo &MRI) const { 1380 unsigned Mods = 0; 1381 MachineInstr *MI = MRI.getVRegDef(Src); 1382 1383 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1384 Src = MI->getOperand(1).getReg(); 1385 Mods |= SISrcMods::NEG; 1386 MI = MRI.getVRegDef(Src); 1387 } 1388 1389 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1390 Src = MI->getOperand(1).getReg(); 1391 Mods |= SISrcMods::ABS; 1392 } 1393 1394 return std::make_pair(Src, Mods); 1395 } 1396 1397 /// 1398 /// This will select either an SGPR or VGPR operand and will save us from 1399 /// having to write an extra tablegen pattern. 1400 InstructionSelector::ComplexRendererFns 1401 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1402 return {{ 1403 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1404 }}; 1405 } 1406 1407 InstructionSelector::ComplexRendererFns 1408 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1409 MachineRegisterInfo &MRI 1410 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1411 1412 Register Src; 1413 unsigned Mods; 1414 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1415 1416 return {{ 1417 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1418 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1419 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1420 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1421 }}; 1422 } 1423 InstructionSelector::ComplexRendererFns 1424 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1425 return {{ 1426 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1427 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1428 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1429 }}; 1430 } 1431 1432 InstructionSelector::ComplexRendererFns 1433 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1434 MachineRegisterInfo &MRI 1435 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1436 1437 Register Src; 1438 unsigned Mods; 1439 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1440 1441 return {{ 1442 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1443 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1444 }}; 1445 } 1446 1447 InstructionSelector::ComplexRendererFns 1448 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1449 MachineRegisterInfo &MRI = 1450 Root.getParent()->getParent()->getParent()->getRegInfo(); 1451 1452 SmallVector<GEPInfo, 4> AddrInfo; 1453 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1454 1455 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1456 return None; 1457 1458 const GEPInfo &GEPInfo = AddrInfo[0]; 1459 1460 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1461 return None; 1462 1463 unsigned PtrReg = GEPInfo.SgprParts[0]; 1464 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1465 return {{ 1466 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1467 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1468 }}; 1469 } 1470 1471 InstructionSelector::ComplexRendererFns 1472 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1473 MachineRegisterInfo &MRI = 1474 Root.getParent()->getParent()->getParent()->getRegInfo(); 1475 1476 SmallVector<GEPInfo, 4> AddrInfo; 1477 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1478 1479 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1480 return None; 1481 1482 const GEPInfo &GEPInfo = AddrInfo[0]; 1483 unsigned PtrReg = GEPInfo.SgprParts[0]; 1484 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1485 if (!isUInt<32>(EncodedImm)) 1486 return None; 1487 1488 return {{ 1489 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1490 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1491 }}; 1492 } 1493 1494 InstructionSelector::ComplexRendererFns 1495 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1496 MachineInstr *MI = Root.getParent(); 1497 MachineBasicBlock *MBB = MI->getParent(); 1498 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1499 1500 SmallVector<GEPInfo, 4> AddrInfo; 1501 getAddrModeInfo(*MI, MRI, AddrInfo); 1502 1503 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1504 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1505 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1506 return None; 1507 1508 const GEPInfo &GEPInfo = AddrInfo[0]; 1509 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1510 return None; 1511 1512 // If we make it this far we have a load with an 32-bit immediate offset. 1513 // It is OK to select this using a sgpr offset, because we have already 1514 // failed trying to select this load into one of the _IMM variants since 1515 // the _IMM Patterns are considered before the _SGPR patterns. 1516 unsigned PtrReg = GEPInfo.SgprParts[0]; 1517 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1518 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1519 .addImm(GEPInfo.Imm); 1520 return {{ 1521 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1522 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1523 }}; 1524 } 1525 1526 template <bool Signed> 1527 InstructionSelector::ComplexRendererFns 1528 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { 1529 MachineInstr *MI = Root.getParent(); 1530 MachineBasicBlock *MBB = MI->getParent(); 1531 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1532 1533 InstructionSelector::ComplexRendererFns Default = {{ 1534 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, 1535 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset 1536 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1537 }}; 1538 1539 if (!STI.hasFlatInstOffsets()) 1540 return Default; 1541 1542 const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); 1543 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) 1544 return Default; 1545 1546 Optional<int64_t> Offset = 1547 getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); 1548 if (!Offset.hasValue()) 1549 return Default; 1550 1551 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 1552 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) 1553 return Default; 1554 1555 Register BasePtr = OpDef->getOperand(1).getReg(); 1556 1557 return {{ 1558 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, 1559 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, 1560 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc 1561 }}; 1562 } 1563 1564 InstructionSelector::ComplexRendererFns 1565 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 1566 return selectFlatOffsetImpl<false>(Root); 1567 } 1568 1569 InstructionSelector::ComplexRendererFns 1570 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { 1571 return selectFlatOffsetImpl<true>(Root); 1572 } 1573 1574 // FIXME: Implement 1575 static bool signBitIsZero(const MachineOperand &Op, 1576 const MachineRegisterInfo &MRI) { 1577 return false; 1578 } 1579 1580 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1581 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1582 return PSV && PSV->isStack(); 1583 } 1584 1585 InstructionSelector::ComplexRendererFns 1586 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 1587 MachineInstr *MI = Root.getParent(); 1588 MachineBasicBlock *MBB = MI->getParent(); 1589 MachineFunction *MF = MBB->getParent(); 1590 MachineRegisterInfo &MRI = MF->getRegInfo(); 1591 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1592 1593 int64_t Offset = 0; 1594 if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { 1595 Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1596 1597 // TODO: Should this be inside the render function? The iterator seems to 1598 // move. 1599 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 1600 HighBits) 1601 .addImm(Offset & ~4095); 1602 1603 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1604 MIB.addReg(Info->getScratchRSrcReg()); 1605 }, 1606 [=](MachineInstrBuilder &MIB) { // vaddr 1607 MIB.addReg(HighBits); 1608 }, 1609 [=](MachineInstrBuilder &MIB) { // soffset 1610 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1611 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1612 1613 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1614 ? Info->getStackPtrOffsetReg() 1615 : Info->getScratchWaveOffsetReg(); 1616 MIB.addReg(SOffsetReg); 1617 }, 1618 [=](MachineInstrBuilder &MIB) { // offset 1619 MIB.addImm(Offset & 4095); 1620 }}}; 1621 } 1622 1623 assert(Offset == 0); 1624 1625 // Try to fold a frame index directly into the MUBUF vaddr field, and any 1626 // offsets. 1627 Optional<int> FI; 1628 Register VAddr = Root.getReg(); 1629 if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { 1630 if (isBaseWithConstantOffset(Root, MRI)) { 1631 const MachineOperand &LHS = RootDef->getOperand(1); 1632 const MachineOperand &RHS = RootDef->getOperand(2); 1633 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1634 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1635 if (LHSDef && RHSDef) { 1636 int64_t PossibleOffset = 1637 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1638 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && 1639 (!STI.privateMemoryResourceIsRangeChecked() || 1640 signBitIsZero(LHS, MRI))) { 1641 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 1642 FI = LHSDef->getOperand(1).getIndex(); 1643 else 1644 VAddr = LHS.getReg(); 1645 Offset = PossibleOffset; 1646 } 1647 } 1648 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 1649 FI = RootDef->getOperand(1).getIndex(); 1650 } 1651 } 1652 1653 // If we don't know this private access is a local stack object, it needs to 1654 // be relative to the entry point's scratch wave offset register. 1655 // TODO: Should split large offsets that don't fit like above. 1656 // TODO: Don't use scratch wave offset just because the offset didn't fit. 1657 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() 1658 : Info->getScratchWaveOffsetReg(); 1659 1660 return {{[=](MachineInstrBuilder &MIB) { // rsrc 1661 MIB.addReg(Info->getScratchRSrcReg()); 1662 }, 1663 [=](MachineInstrBuilder &MIB) { // vaddr 1664 if (FI.hasValue()) 1665 MIB.addFrameIndex(FI.getValue()); 1666 else 1667 MIB.addReg(VAddr); 1668 }, 1669 [=](MachineInstrBuilder &MIB) { // soffset 1670 MIB.addReg(SOffset); 1671 }, 1672 [=](MachineInstrBuilder &MIB) { // offset 1673 MIB.addImm(Offset); 1674 }}}; 1675 } 1676 1677 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, 1678 const MachineOperand &Base, 1679 int64_t Offset, 1680 unsigned OffsetBits) const { 1681 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 1682 (OffsetBits == 8 && !isUInt<8>(Offset))) 1683 return false; 1684 1685 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 1686 return true; 1687 1688 // On Southern Islands instruction with a negative base value and an offset 1689 // don't seem to work. 1690 return signBitIsZero(Base, MRI); 1691 } 1692 1693 InstructionSelector::ComplexRendererFns 1694 AMDGPUInstructionSelector::selectMUBUFScratchOffset( 1695 MachineOperand &Root) const { 1696 MachineInstr *MI = Root.getParent(); 1697 MachineBasicBlock *MBB = MI->getParent(); 1698 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1699 1700 int64_t Offset = 0; 1701 if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || 1702 !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) 1703 return {}; 1704 1705 const MachineFunction *MF = MBB->getParent(); 1706 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 1707 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1708 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); 1709 1710 Register SOffsetReg = isStackPtrRelative(PtrInfo) 1711 ? Info->getStackPtrOffsetReg() 1712 : Info->getScratchWaveOffsetReg(); 1713 return {{ 1714 [=](MachineInstrBuilder &MIB) { 1715 MIB.addReg(Info->getScratchRSrcReg()); 1716 }, // rsrc 1717 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset 1718 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 1719 }}; 1720 } 1721 1722 InstructionSelector::ComplexRendererFns 1723 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { 1724 MachineInstr *MI = Root.getParent(); 1725 MachineBasicBlock *MBB = MI->getParent(); 1726 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1727 1728 const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 1729 if (!RootDef) { 1730 return {{ 1731 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1732 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1733 }}; 1734 } 1735 1736 int64_t ConstAddr = 0; 1737 if (isBaseWithConstantOffset(Root, MRI)) { 1738 const MachineOperand &LHS = RootDef->getOperand(1); 1739 const MachineOperand &RHS = RootDef->getOperand(2); 1740 const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 1741 const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 1742 if (LHSDef && RHSDef) { 1743 int64_t PossibleOffset = 1744 RHSDef->getOperand(1).getCImm()->getSExtValue(); 1745 if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) { 1746 // (add n0, c0) 1747 return {{ 1748 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 1749 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } 1750 }}; 1751 } 1752 } 1753 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 1754 1755 1756 1757 } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) { 1758 1759 1760 } 1761 1762 return {{ 1763 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1764 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } 1765 }}; 1766 } 1767