1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 #define DEBUG_TYPE "amdgpu-isel" 35 36 using namespace llvm; 37 38 #define GET_GLOBALISEL_IMPL 39 #define AMDGPUSubtarget GCNSubtarget 40 #include "AMDGPUGenGlobalISel.inc" 41 #undef GET_GLOBALISEL_IMPL 42 #undef AMDGPUSubtarget 43 44 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 45 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 46 const AMDGPUTargetMachine &TM) 47 : InstructionSelector(), TII(*STI.getInstrInfo()), 48 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 49 STI(STI), 50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 51 #define GET_GLOBALISEL_PREDICATES_INIT 52 #include "AMDGPUGenGlobalISel.inc" 53 #undef GET_GLOBALISEL_PREDICATES_INIT 54 #define GET_GLOBALISEL_TEMPORARIES_INIT 55 #include "AMDGPUGenGlobalISel.inc" 56 #undef GET_GLOBALISEL_TEMPORARIES_INIT 57 { 58 } 59 60 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 61 62 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { 63 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 64 return Reg == AMDGPU::SCC; 65 66 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 67 const TargetRegisterClass *RC = 68 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 69 if (RC) { 70 if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) 71 return false; 72 const LLT Ty = MRI.getType(Reg); 73 return Ty.isValid() && Ty.getSizeInBits() == 1; 74 } 75 76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 77 return RB->getID() == AMDGPU::SCCRegBankID; 78 } 79 80 bool AMDGPUInstructionSelector::isVCC(Register Reg, 81 const MachineRegisterInfo &MRI) const { 82 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 83 return Reg == TRI.getVCC(); 84 85 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 86 const TargetRegisterClass *RC = 87 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 88 if (RC) { 89 return RC->hasSuperClassEq(TRI.getBoolRC()) && 90 MRI.getType(Reg).getSizeInBits() == 1; 91 } 92 93 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 94 return RB->getID() == AMDGPU::VCCRegBankID; 95 } 96 97 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 98 MachineBasicBlock *BB = I.getParent(); 99 MachineFunction *MF = BB->getParent(); 100 MachineRegisterInfo &MRI = MF->getRegInfo(); 101 I.setDesc(TII.get(TargetOpcode::COPY)); 102 103 // Special case for COPY from the scc register bank. The scc register bank 104 // is modeled using 32-bit sgprs. 105 const MachineOperand &Src = I.getOperand(1); 106 unsigned SrcReg = Src.getReg(); 107 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { 108 unsigned DstReg = I.getOperand(0).getReg(); 109 110 // Specially handle scc->vcc copies. 111 if (isVCC(DstReg, MRI)) { 112 const DebugLoc &DL = I.getDebugLoc(); 113 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 114 .addImm(0) 115 .addReg(SrcReg); 116 if (!MRI.getRegClassOrNull(SrcReg)) 117 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 118 I.eraseFromParent(); 119 return true; 120 } 121 } 122 123 for (const MachineOperand &MO : I.operands()) { 124 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 125 continue; 126 127 const TargetRegisterClass *RC = 128 TRI.getConstrainedRegClassForOperand(MO, MRI); 129 if (!RC) 130 continue; 131 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 132 } 133 return true; 134 } 135 136 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 137 MachineBasicBlock *BB = I.getParent(); 138 MachineFunction *MF = BB->getParent(); 139 MachineRegisterInfo &MRI = MF->getRegInfo(); 140 141 const Register DefReg = I.getOperand(0).getReg(); 142 const LLT DefTy = MRI.getType(DefReg); 143 144 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 145 146 const RegClassOrRegBank &RegClassOrBank = 147 MRI.getRegClassOrRegBank(DefReg); 148 149 const TargetRegisterClass *DefRC 150 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 151 if (!DefRC) { 152 if (!DefTy.isValid()) { 153 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 154 return false; 155 } 156 157 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 158 if (RB.getID() == AMDGPU::SCCRegBankID) { 159 LLVM_DEBUG(dbgs() << "illegal scc phi\n"); 160 return false; 161 } 162 163 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); 164 if (!DefRC) { 165 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 166 return false; 167 } 168 } 169 170 I.setDesc(TII.get(TargetOpcode::PHI)); 171 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 172 } 173 174 MachineOperand 175 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 176 const TargetRegisterClass &SubRC, 177 unsigned SubIdx) const { 178 179 MachineInstr *MI = MO.getParent(); 180 MachineBasicBlock *BB = MO.getParent()->getParent(); 181 MachineFunction *MF = BB->getParent(); 182 MachineRegisterInfo &MRI = MF->getRegInfo(); 183 Register DstReg = MRI.createVirtualRegister(&SubRC); 184 185 if (MO.isReg()) { 186 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 187 unsigned Reg = MO.getReg(); 188 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 189 .addReg(Reg, 0, ComposedSubIdx); 190 191 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 192 MO.isKill(), MO.isDead(), MO.isUndef(), 193 MO.isEarlyClobber(), 0, MO.isDebug(), 194 MO.isInternalRead()); 195 } 196 197 assert(MO.isImm()); 198 199 APInt Imm(64, MO.getImm()); 200 201 switch (SubIdx) { 202 default: 203 llvm_unreachable("do not know to split immediate with this sub index."); 204 case AMDGPU::sub0: 205 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 206 case AMDGPU::sub1: 207 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 208 } 209 } 210 211 static int64_t getConstant(const MachineInstr *MI) { 212 return MI->getOperand(1).getCImm()->getSExtValue(); 213 } 214 215 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 216 MachineBasicBlock *BB = I.getParent(); 217 MachineFunction *MF = BB->getParent(); 218 MachineRegisterInfo &MRI = MF->getRegInfo(); 219 Register DstReg = I.getOperand(0).getReg(); 220 const DebugLoc &DL = I.getDebugLoc(); 221 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 222 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 223 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 224 225 if (Size == 32) { 226 if (IsSALU) { 227 MachineInstr *Add = 228 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg) 229 .add(I.getOperand(1)) 230 .add(I.getOperand(2)); 231 I.eraseFromParent(); 232 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 233 } 234 235 if (STI.hasAddNoCarry()) { 236 I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64)); 237 I.addOperand(*MF, MachineOperand::CreateImm(0)); 238 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 239 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 240 } 241 242 Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); 243 MachineInstr *Add 244 = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg) 245 .addDef(UnusedCarry, RegState::Dead) 246 .add(I.getOperand(1)) 247 .add(I.getOperand(2)) 248 .addImm(0); 249 I.eraseFromParent(); 250 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 251 } 252 253 const TargetRegisterClass &RC 254 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 255 const TargetRegisterClass &HalfRC 256 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 257 258 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 259 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 260 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 261 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 262 263 Register DstLo = MRI.createVirtualRegister(&HalfRC); 264 Register DstHi = MRI.createVirtualRegister(&HalfRC); 265 266 if (IsSALU) { 267 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 268 .add(Lo1) 269 .add(Lo2); 270 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 271 .add(Hi1) 272 .add(Hi2); 273 } else { 274 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 275 Register CarryReg = MRI.createVirtualRegister(CarryRC); 276 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) 277 .addDef(CarryReg) 278 .add(Lo1) 279 .add(Lo2) 280 .addImm(0); 281 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 282 .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) 283 .add(Hi1) 284 .add(Hi2) 285 .addReg(CarryReg, RegState::Kill) 286 .addImm(0); 287 288 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 289 return false; 290 } 291 292 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 293 .addReg(DstLo) 294 .addImm(AMDGPU::sub0) 295 .addReg(DstHi) 296 .addImm(AMDGPU::sub1); 297 298 299 if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) 300 return false; 301 302 I.eraseFromParent(); 303 return true; 304 } 305 306 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 307 MachineBasicBlock *BB = I.getParent(); 308 MachineFunction *MF = BB->getParent(); 309 MachineRegisterInfo &MRI = MF->getRegInfo(); 310 assert(I.getOperand(2).getImm() % 32 == 0); 311 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 312 const DebugLoc &DL = I.getDebugLoc(); 313 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 314 I.getOperand(0).getReg()) 315 .addReg(I.getOperand(1).getReg(), 0, SubReg); 316 317 for (const MachineOperand &MO : Copy->operands()) { 318 const TargetRegisterClass *RC = 319 TRI.getConstrainedRegClassForOperand(MO, MRI); 320 if (!RC) 321 continue; 322 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 323 } 324 I.eraseFromParent(); 325 return true; 326 } 327 328 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 329 return selectG_ADD(I); 330 } 331 332 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 333 MachineBasicBlock *BB = I.getParent(); 334 MachineFunction *MF = BB->getParent(); 335 MachineRegisterInfo &MRI = MF->getRegInfo(); 336 const MachineOperand &MO = I.getOperand(0); 337 338 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 339 // regbank check here is to know why getConstrainedRegClassForOperand failed. 340 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); 341 if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || 342 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { 343 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 344 return true; 345 } 346 347 return false; 348 } 349 350 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 351 MachineBasicBlock *BB = I.getParent(); 352 MachineFunction *MF = BB->getParent(); 353 MachineRegisterInfo &MRI = MF->getRegInfo(); 354 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 355 DebugLoc DL = I.getDebugLoc(); 356 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 357 .addDef(I.getOperand(0).getReg()) 358 .addReg(I.getOperand(1).getReg()) 359 .addReg(I.getOperand(2).getReg()) 360 .addImm(SubReg); 361 362 for (const MachineOperand &MO : Ins->operands()) { 363 if (!MO.isReg()) 364 continue; 365 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 366 continue; 367 368 const TargetRegisterClass *RC = 369 TRI.getConstrainedRegClassForOperand(MO, MRI); 370 if (!RC) 371 continue; 372 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 373 } 374 I.eraseFromParent(); 375 return true; 376 } 377 378 bool AMDGPUInstructionSelector::selectG_INTRINSIC( 379 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 380 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 381 switch (IntrinsicID) { 382 case Intrinsic::maxnum: 383 case Intrinsic::minnum: 384 case Intrinsic::amdgcn_cvt_pkrtz: 385 return selectImpl(I, CoverageInfo); 386 default: 387 return selectImpl(I, CoverageInfo); 388 } 389 } 390 391 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 392 if (Size != 32 && Size != 64) 393 return -1; 394 switch (P) { 395 default: 396 llvm_unreachable("Unknown condition code!"); 397 case CmpInst::ICMP_NE: 398 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 399 case CmpInst::ICMP_EQ: 400 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 401 case CmpInst::ICMP_SGT: 402 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 403 case CmpInst::ICMP_SGE: 404 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 405 case CmpInst::ICMP_SLT: 406 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 407 case CmpInst::ICMP_SLE: 408 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 409 case CmpInst::ICMP_UGT: 410 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 411 case CmpInst::ICMP_UGE: 412 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 413 case CmpInst::ICMP_ULT: 414 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 415 case CmpInst::ICMP_ULE: 416 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 417 } 418 } 419 420 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 421 unsigned Size) const { 422 if (Size == 64) { 423 if (!STI.hasScalarCompareEq64()) 424 return -1; 425 426 switch (P) { 427 case CmpInst::ICMP_NE: 428 return AMDGPU::S_CMP_LG_U64; 429 case CmpInst::ICMP_EQ: 430 return AMDGPU::S_CMP_EQ_U64; 431 default: 432 return -1; 433 } 434 } 435 436 if (Size != 32) 437 return -1; 438 439 switch (P) { 440 case CmpInst::ICMP_NE: 441 return AMDGPU::S_CMP_LG_U32; 442 case CmpInst::ICMP_EQ: 443 return AMDGPU::S_CMP_EQ_U32; 444 case CmpInst::ICMP_SGT: 445 return AMDGPU::S_CMP_GT_I32; 446 case CmpInst::ICMP_SGE: 447 return AMDGPU::S_CMP_GE_I32; 448 case CmpInst::ICMP_SLT: 449 return AMDGPU::S_CMP_LT_I32; 450 case CmpInst::ICMP_SLE: 451 return AMDGPU::S_CMP_LE_I32; 452 case CmpInst::ICMP_UGT: 453 return AMDGPU::S_CMP_GT_U32; 454 case CmpInst::ICMP_UGE: 455 return AMDGPU::S_CMP_GE_U32; 456 case CmpInst::ICMP_ULT: 457 return AMDGPU::S_CMP_LT_U32; 458 case CmpInst::ICMP_ULE: 459 return AMDGPU::S_CMP_LE_U32; 460 default: 461 llvm_unreachable("Unknown condition code!"); 462 } 463 } 464 465 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 466 MachineBasicBlock *BB = I.getParent(); 467 MachineFunction *MF = BB->getParent(); 468 MachineRegisterInfo &MRI = MF->getRegInfo(); 469 DebugLoc DL = I.getDebugLoc(); 470 471 unsigned SrcReg = I.getOperand(2).getReg(); 472 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 473 474 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 475 476 unsigned CCReg = I.getOperand(0).getReg(); 477 if (isSCC(CCReg, MRI)) { 478 int Opcode = getS_CMPOpcode(Pred, Size); 479 if (Opcode == -1) 480 return false; 481 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 482 .add(I.getOperand(2)) 483 .add(I.getOperand(3)); 484 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 485 .addReg(AMDGPU::SCC); 486 bool Ret = 487 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 488 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); 489 I.eraseFromParent(); 490 return Ret; 491 } 492 493 int Opcode = getV_CMPOpcode(Pred, Size); 494 if (Opcode == -1) 495 return false; 496 497 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 498 I.getOperand(0).getReg()) 499 .add(I.getOperand(2)) 500 .add(I.getOperand(3)); 501 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 502 AMDGPU::SReg_64RegClass, MRI); 503 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 504 I.eraseFromParent(); 505 return Ret; 506 } 507 508 static MachineInstr * 509 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 510 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 511 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 512 const DebugLoc &DL = Insert->getDebugLoc(); 513 MachineBasicBlock &BB = *Insert->getParent(); 514 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 515 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 516 .addImm(Tgt) 517 .addReg(Reg0) 518 .addReg(Reg1) 519 .addReg(Reg2) 520 .addReg(Reg3) 521 .addImm(VM) 522 .addImm(Compr) 523 .addImm(Enabled); 524 } 525 526 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 527 MachineInstr &I, CodeGenCoverage &CoverageInfo) const { 528 MachineBasicBlock *BB = I.getParent(); 529 MachineFunction *MF = BB->getParent(); 530 MachineRegisterInfo &MRI = MF->getRegInfo(); 531 532 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 533 switch (IntrinsicID) { 534 case Intrinsic::amdgcn_exp: { 535 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 536 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 537 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 538 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 539 540 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 541 I.getOperand(4).getReg(), 542 I.getOperand(5).getReg(), 543 I.getOperand(6).getReg(), 544 VM, false, Enabled, Done); 545 546 I.eraseFromParent(); 547 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 548 } 549 case Intrinsic::amdgcn_exp_compr: { 550 const DebugLoc &DL = I.getDebugLoc(); 551 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 552 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 553 unsigned Reg0 = I.getOperand(3).getReg(); 554 unsigned Reg1 = I.getOperand(4).getReg(); 555 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 556 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 557 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 558 559 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 560 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 561 true, Enabled, Done); 562 563 I.eraseFromParent(); 564 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 565 } 566 default: 567 return selectImpl(I, CoverageInfo); 568 } 569 } 570 571 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 572 MachineBasicBlock *BB = I.getParent(); 573 MachineFunction *MF = BB->getParent(); 574 MachineRegisterInfo &MRI = MF->getRegInfo(); 575 const DebugLoc &DL = I.getDebugLoc(); 576 577 unsigned DstReg = I.getOperand(0).getReg(); 578 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 579 assert(Size <= 32 || Size == 64); 580 const MachineOperand &CCOp = I.getOperand(1); 581 unsigned CCReg = CCOp.getReg(); 582 if (isSCC(CCReg, MRI)) { 583 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 584 AMDGPU::S_CSELECT_B32; 585 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 586 .addReg(CCReg); 587 588 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 589 // bank, because it does not cover the register class that we used to represent 590 // for it. So we need to manually set the register class here. 591 if (!MRI.getRegClassOrNull(CCReg)) 592 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 593 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 594 .add(I.getOperand(2)) 595 .add(I.getOperand(3)); 596 597 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 598 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 599 I.eraseFromParent(); 600 return Ret; 601 } 602 603 // Wide VGPR select should have been split in RegBankSelect. 604 if (Size > 32) 605 return false; 606 607 MachineInstr *Select = 608 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 609 .addImm(0) 610 .add(I.getOperand(3)) 611 .addImm(0) 612 .add(I.getOperand(2)) 613 .add(I.getOperand(1)); 614 615 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 616 I.eraseFromParent(); 617 return Ret; 618 } 619 620 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 621 MachineBasicBlock *BB = I.getParent(); 622 MachineFunction *MF = BB->getParent(); 623 MachineRegisterInfo &MRI = MF->getRegInfo(); 624 DebugLoc DL = I.getDebugLoc(); 625 unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI); 626 if (PtrSize != 64) { 627 LLVM_DEBUG(dbgs() << "Unhandled address space\n"); 628 return false; 629 } 630 631 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 632 unsigned Opcode; 633 634 // FIXME: Select store instruction based on address space 635 switch (StoreSize) { 636 default: 637 return false; 638 case 32: 639 Opcode = AMDGPU::FLAT_STORE_DWORD; 640 break; 641 case 64: 642 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 643 break; 644 case 96: 645 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 646 break; 647 case 128: 648 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 649 break; 650 } 651 652 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 653 .add(I.getOperand(1)) 654 .add(I.getOperand(0)) 655 .addImm(0) // offset 656 .addImm(0) // glc 657 .addImm(0) // slc 658 .addImm(0); // dlc 659 660 661 // Now that we selected an opcode, we need to constrain the register 662 // operands to use appropriate classes. 663 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 664 665 I.eraseFromParent(); 666 return Ret; 667 } 668 669 static int sizeToSubRegIndex(unsigned Size) { 670 switch (Size) { 671 case 32: 672 return AMDGPU::sub0; 673 case 64: 674 return AMDGPU::sub0_sub1; 675 case 96: 676 return AMDGPU::sub0_sub1_sub2; 677 case 128: 678 return AMDGPU::sub0_sub1_sub2_sub3; 679 case 256: 680 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 681 default: 682 if (Size < 32) 683 return AMDGPU::sub0; 684 if (Size > 256) 685 return -1; 686 return sizeToSubRegIndex(PowerOf2Ceil(Size)); 687 } 688 } 689 690 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 691 MachineBasicBlock *BB = I.getParent(); 692 MachineFunction *MF = BB->getParent(); 693 MachineRegisterInfo &MRI = MF->getRegInfo(); 694 695 unsigned DstReg = I.getOperand(0).getReg(); 696 unsigned SrcReg = I.getOperand(1).getReg(); 697 const LLT DstTy = MRI.getType(DstReg); 698 const LLT SrcTy = MRI.getType(SrcReg); 699 if (!DstTy.isScalar()) 700 return false; 701 702 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 703 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); 704 if (SrcRB != DstRB) 705 return false; 706 707 unsigned DstSize = DstTy.getSizeInBits(); 708 unsigned SrcSize = SrcTy.getSizeInBits(); 709 710 const TargetRegisterClass *SrcRC 711 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); 712 const TargetRegisterClass *DstRC 713 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); 714 715 if (SrcSize > 32) { 716 int SubRegIdx = sizeToSubRegIndex(DstSize); 717 if (SubRegIdx == -1) 718 return false; 719 720 // Deal with weird cases where the class only partially supports the subreg 721 // index. 722 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 723 if (!SrcRC) 724 return false; 725 726 I.getOperand(1).setSubReg(SubRegIdx); 727 } 728 729 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 730 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 731 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 732 return false; 733 } 734 735 I.setDesc(TII.get(TargetOpcode::COPY)); 736 return true; 737 } 738 739 /// \returns true if a bitmask for \p Size bits will be an inline immediate. 740 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 741 Mask = maskTrailingOnes<unsigned>(Size); 742 int SignedMask = static_cast<int>(Mask); 743 return SignedMask >= -16 && SignedMask <= 64; 744 } 745 746 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 747 bool Signed = I.getOpcode() == AMDGPU::G_SEXT; 748 const DebugLoc &DL = I.getDebugLoc(); 749 MachineBasicBlock &MBB = *I.getParent(); 750 MachineFunction &MF = *MBB.getParent(); 751 MachineRegisterInfo &MRI = MF.getRegInfo(); 752 const unsigned DstReg = I.getOperand(0).getReg(); 753 const unsigned SrcReg = I.getOperand(1).getReg(); 754 755 const LLT DstTy = MRI.getType(DstReg); 756 const LLT SrcTy = MRI.getType(SrcReg); 757 const LLT S1 = LLT::scalar(1); 758 const unsigned SrcSize = SrcTy.getSizeInBits(); 759 const unsigned DstSize = DstTy.getSizeInBits(); 760 if (!DstTy.isScalar()) 761 return false; 762 763 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); 764 765 if (SrcBank->getID() == AMDGPU::SCCRegBankID) { 766 if (SrcTy != S1 || DstSize > 64) // Invalid 767 return false; 768 769 unsigned Opcode = 770 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; 771 const TargetRegisterClass *DstRC = 772 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; 773 774 // FIXME: Create an extra copy to avoid incorrectly constraining the result 775 // of the scc producer. 776 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 777 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) 778 .addReg(SrcReg); 779 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 780 .addReg(TmpReg); 781 782 // The instruction operands are backwards from what you would expect. 783 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) 784 .addImm(0) 785 .addImm(Signed ? -1 : 1); 786 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 787 } 788 789 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { 790 if (SrcTy != S1) // Invalid 791 return false; 792 793 MachineInstr *ExtI = 794 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 795 .addImm(0) // src0_modifiers 796 .addImm(0) // src0 797 .addImm(0) // src1_modifiers 798 .addImm(Signed ? -1 : 1) // src1 799 .addUse(SrcReg); 800 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 801 } 802 803 if (I.getOpcode() == AMDGPU::G_ANYEXT) 804 return selectCOPY(I); 805 806 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 807 // 64-bit should have been split up in RegBankSelect 808 809 // Try to use an and with a mask if it will save code size. 810 unsigned Mask; 811 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 812 MachineInstr *ExtI = 813 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 814 .addImm(Mask) 815 .addReg(SrcReg); 816 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 817 } 818 819 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32; 820 MachineInstr *ExtI = 821 BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 822 .addReg(SrcReg) 823 .addImm(0) // Offset 824 .addImm(SrcSize); // Width 825 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 826 } 827 828 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 829 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) 830 return false; 831 832 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 833 const unsigned SextOpc = SrcSize == 8 ? 834 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 835 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 836 .addReg(SrcReg); 837 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 838 } 839 840 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 841 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 842 843 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 844 if (DstSize > 32 && SrcSize <= 32) { 845 // We need a 64-bit register source, but the high bits don't matter. 846 unsigned ExtReg 847 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 848 unsigned UndefReg 849 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 850 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 851 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 852 .addReg(SrcReg) 853 .addImm(AMDGPU::sub0) 854 .addReg(UndefReg) 855 .addImm(AMDGPU::sub1); 856 857 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 858 .addReg(ExtReg) 859 .addImm(SrcSize << 16); 860 861 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); 862 } 863 864 unsigned Mask; 865 if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 866 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 867 .addReg(SrcReg) 868 .addImm(Mask); 869 } else { 870 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 871 .addReg(SrcReg) 872 .addImm(SrcSize << 16); 873 } 874 875 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); 876 } 877 878 return false; 879 } 880 881 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 882 MachineBasicBlock *BB = I.getParent(); 883 MachineFunction *MF = BB->getParent(); 884 MachineRegisterInfo &MRI = MF->getRegInfo(); 885 MachineOperand &ImmOp = I.getOperand(1); 886 887 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 888 if (ImmOp.isFPImm()) { 889 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 890 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 891 } else if (ImmOp.isCImm()) { 892 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 893 } 894 895 unsigned DstReg = I.getOperand(0).getReg(); 896 unsigned Size; 897 bool IsSgpr; 898 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 899 if (RB) { 900 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 901 Size = MRI.getType(DstReg).getSizeInBits(); 902 } else { 903 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 904 IsSgpr = TRI.isSGPRClass(RC); 905 Size = TRI.getRegSizeInBits(*RC); 906 } 907 908 if (Size != 32 && Size != 64) 909 return false; 910 911 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 912 if (Size == 32) { 913 I.setDesc(TII.get(Opcode)); 914 I.addImplicitDefUseOperands(*MF); 915 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 916 } 917 918 DebugLoc DL = I.getDebugLoc(); 919 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 920 &AMDGPU::VGPR_32RegClass; 921 unsigned LoReg = MRI.createVirtualRegister(RC); 922 unsigned HiReg = MRI.createVirtualRegister(RC); 923 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 924 925 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 926 .addImm(Imm.trunc(32).getZExtValue()); 927 928 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 929 .addImm(Imm.ashr(32).getZExtValue()); 930 931 const MachineInstr *RS = 932 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 933 .addReg(LoReg) 934 .addImm(AMDGPU::sub0) 935 .addReg(HiReg) 936 .addImm(AMDGPU::sub1); 937 938 // We can't call constrainSelectedInstRegOperands here, because it doesn't 939 // work for target independent opcodes 940 I.eraseFromParent(); 941 const TargetRegisterClass *DstRC = 942 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 943 if (!DstRC) 944 return true; 945 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 946 } 947 948 static bool isConstant(const MachineInstr &MI) { 949 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 950 } 951 952 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 953 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 954 955 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 956 957 assert(PtrMI); 958 959 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 960 return; 961 962 GEPInfo GEPInfo(*PtrMI); 963 964 for (unsigned i = 1, e = 3; i < e; ++i) { 965 const MachineOperand &GEPOp = PtrMI->getOperand(i); 966 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 967 assert(OpDef); 968 if (isConstant(*OpDef)) { 969 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 970 // are lacking other optimizations. 971 assert(GEPInfo.Imm == 0); 972 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 973 continue; 974 } 975 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 976 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 977 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 978 else 979 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 980 } 981 982 AddrInfo.push_back(GEPInfo); 983 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 984 } 985 986 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 987 if (!MI.hasOneMemOperand()) 988 return false; 989 990 const MachineMemOperand *MMO = *MI.memoperands_begin(); 991 const Value *Ptr = MMO->getValue(); 992 993 // UndefValue means this is a load of a kernel input. These are uniform. 994 // Sometimes LDS instructions have constant pointers. 995 // If Ptr is null, then that means this mem operand contains a 996 // PseudoSourceValue like GOT. 997 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 998 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 999 return true; 1000 1001 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 1002 return true; 1003 1004 const Instruction *I = dyn_cast<Instruction>(Ptr); 1005 return I && I->getMetadata("amdgpu.uniform"); 1006 } 1007 1008 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 1009 for (const GEPInfo &GEPInfo : AddrInfo) { 1010 if (!GEPInfo.VgprParts.empty()) 1011 return true; 1012 } 1013 return false; 1014 } 1015 1016 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 1017 MachineBasicBlock *BB = I.getParent(); 1018 MachineFunction *MF = BB->getParent(); 1019 MachineRegisterInfo &MRI = MF->getRegInfo(); 1020 const DebugLoc &DL = I.getDebugLoc(); 1021 Register DstReg = I.getOperand(0).getReg(); 1022 Register PtrReg = I.getOperand(1).getReg(); 1023 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 1024 unsigned Opcode; 1025 1026 if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) { 1027 LLVM_DEBUG(dbgs() << "Unhandled address space\n"); 1028 return false; 1029 } 1030 1031 SmallVector<GEPInfo, 4> AddrInfo; 1032 1033 getAddrModeInfo(I, MRI, AddrInfo); 1034 1035 switch (LoadSize) { 1036 case 32: 1037 Opcode = AMDGPU::FLAT_LOAD_DWORD; 1038 break; 1039 case 64: 1040 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 1041 break; 1042 default: 1043 LLVM_DEBUG(dbgs() << "Unhandled load size\n"); 1044 return false; 1045 } 1046 1047 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 1048 .add(I.getOperand(0)) 1049 .addReg(PtrReg) 1050 .addImm(0) // offset 1051 .addImm(0) // glc 1052 .addImm(0) // slc 1053 .addImm(0); // dlc 1054 1055 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 1056 I.eraseFromParent(); 1057 return Ret; 1058 } 1059 1060 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 1061 MachineBasicBlock *BB = I.getParent(); 1062 MachineFunction *MF = BB->getParent(); 1063 MachineRegisterInfo &MRI = MF->getRegInfo(); 1064 MachineOperand &CondOp = I.getOperand(0); 1065 Register CondReg = CondOp.getReg(); 1066 const DebugLoc &DL = I.getDebugLoc(); 1067 1068 unsigned BrOpcode; 1069 Register CondPhysReg; 1070 const TargetRegisterClass *ConstrainRC; 1071 1072 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 1073 // whether the branch is uniform when selecting the instruction. In 1074 // GlobalISel, we should push that decision into RegBankSelect. Assume for now 1075 // RegBankSelect knows what it's doing if the branch condition is scc, even 1076 // though it currently does not. 1077 if (isSCC(CondReg, MRI)) { 1078 CondPhysReg = AMDGPU::SCC; 1079 BrOpcode = AMDGPU::S_CBRANCH_SCC1; 1080 ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; 1081 } else if (isVCC(CondReg, MRI)) { 1082 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? 1083 // We sort of know that a VCC producer based on the register bank, that ands 1084 // inactive lanes with 0. What if there was a logical operation with vcc 1085 // producers in different blocks/with different exec masks? 1086 // FIXME: Should scc->vcc copies and with exec? 1087 CondPhysReg = TRI.getVCC(); 1088 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 1089 ConstrainRC = TRI.getBoolRC(); 1090 } else 1091 return false; 1092 1093 if (!MRI.getRegClassOrNull(CondReg)) 1094 MRI.setRegClass(CondReg, ConstrainRC); 1095 1096 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 1097 .addReg(CondReg); 1098 BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 1099 .addMBB(I.getOperand(1).getMBB()); 1100 1101 I.eraseFromParent(); 1102 return true; 1103 } 1104 1105 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { 1106 MachineBasicBlock *BB = I.getParent(); 1107 MachineFunction *MF = BB->getParent(); 1108 MachineRegisterInfo &MRI = MF->getRegInfo(); 1109 1110 Register DstReg = I.getOperand(0).getReg(); 1111 const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); 1112 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 1113 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 1114 if (IsVGPR) 1115 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1116 1117 return RBI.constrainGenericRegister( 1118 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); 1119 } 1120 1121 bool AMDGPUInstructionSelector::select(MachineInstr &I, 1122 CodeGenCoverage &CoverageInfo) const { 1123 if (I.isPHI()) 1124 return selectPHI(I); 1125 1126 if (!isPreISelGenericOpcode(I.getOpcode())) { 1127 if (I.isCopy()) 1128 return selectCOPY(I); 1129 return true; 1130 } 1131 1132 switch (I.getOpcode()) { 1133 case TargetOpcode::G_ADD: 1134 if (selectG_ADD(I)) 1135 return true; 1136 LLVM_FALLTHROUGH; 1137 default: 1138 return selectImpl(I, CoverageInfo); 1139 case TargetOpcode::G_INTTOPTR: 1140 case TargetOpcode::G_BITCAST: 1141 return selectCOPY(I); 1142 case TargetOpcode::G_CONSTANT: 1143 case TargetOpcode::G_FCONSTANT: 1144 return selectG_CONSTANT(I); 1145 case TargetOpcode::G_EXTRACT: 1146 return selectG_EXTRACT(I); 1147 case TargetOpcode::G_GEP: 1148 return selectG_GEP(I); 1149 case TargetOpcode::G_IMPLICIT_DEF: 1150 return selectG_IMPLICIT_DEF(I); 1151 case TargetOpcode::G_INSERT: 1152 return selectG_INSERT(I); 1153 case TargetOpcode::G_INTRINSIC: 1154 return selectG_INTRINSIC(I, CoverageInfo); 1155 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 1156 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 1157 case TargetOpcode::G_ICMP: 1158 if (selectG_ICMP(I)) 1159 return true; 1160 return selectImpl(I, CoverageInfo); 1161 case TargetOpcode::G_LOAD: 1162 if (selectImpl(I, CoverageInfo)) 1163 return true; 1164 return selectG_LOAD(I); 1165 case TargetOpcode::G_SELECT: 1166 return selectG_SELECT(I); 1167 case TargetOpcode::G_STORE: 1168 return selectG_STORE(I); 1169 case TargetOpcode::G_TRUNC: 1170 return selectG_TRUNC(I); 1171 case TargetOpcode::G_SEXT: 1172 case TargetOpcode::G_ZEXT: 1173 case TargetOpcode::G_ANYEXT: 1174 if (selectG_SZA_EXT(I)) { 1175 I.eraseFromParent(); 1176 return true; 1177 } 1178 1179 return false; 1180 case TargetOpcode::G_BRCOND: 1181 return selectG_BRCOND(I); 1182 case TargetOpcode::G_FRAME_INDEX: 1183 return selectG_FRAME_INDEX(I); 1184 case TargetOpcode::G_FENCE: 1185 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and 1186 // is checking for G_CONSTANT 1187 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); 1188 return true; 1189 } 1190 return false; 1191 } 1192 1193 InstructionSelector::ComplexRendererFns 1194 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 1195 return {{ 1196 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1197 }}; 1198 1199 } 1200 1201 std::pair<Register, unsigned> 1202 AMDGPUInstructionSelector::selectVOP3ModsImpl( 1203 Register Src, const MachineRegisterInfo &MRI) const { 1204 unsigned Mods = 0; 1205 MachineInstr *MI = MRI.getVRegDef(Src); 1206 1207 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { 1208 Src = MI->getOperand(1).getReg(); 1209 Mods |= SISrcMods::NEG; 1210 MI = MRI.getVRegDef(Src); 1211 } 1212 1213 if (MI && MI->getOpcode() == AMDGPU::G_FABS) { 1214 Src = MI->getOperand(1).getReg(); 1215 Mods |= SISrcMods::ABS; 1216 } 1217 1218 return std::make_pair(Src, Mods); 1219 } 1220 1221 /// 1222 /// This will select either an SGPR or VGPR operand and will save us from 1223 /// having to write an extra tablegen pattern. 1224 InstructionSelector::ComplexRendererFns 1225 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 1226 return {{ 1227 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 1228 }}; 1229 } 1230 1231 InstructionSelector::ComplexRendererFns 1232 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 1233 MachineRegisterInfo &MRI 1234 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1235 1236 Register Src; 1237 unsigned Mods; 1238 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1239 1240 return {{ 1241 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1242 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 1243 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1244 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1245 }}; 1246 } 1247 InstructionSelector::ComplexRendererFns 1248 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 1249 return {{ 1250 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 1251 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 1252 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 1253 }}; 1254 } 1255 1256 InstructionSelector::ComplexRendererFns 1257 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 1258 MachineRegisterInfo &MRI 1259 = Root.getParent()->getParent()->getParent()->getRegInfo(); 1260 1261 Register Src; 1262 unsigned Mods; 1263 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); 1264 1265 return {{ 1266 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 1267 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 1268 }}; 1269 } 1270 1271 InstructionSelector::ComplexRendererFns 1272 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 1273 MachineRegisterInfo &MRI = 1274 Root.getParent()->getParent()->getParent()->getRegInfo(); 1275 1276 SmallVector<GEPInfo, 4> AddrInfo; 1277 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1278 1279 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1280 return None; 1281 1282 const GEPInfo &GEPInfo = AddrInfo[0]; 1283 1284 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 1285 return None; 1286 1287 unsigned PtrReg = GEPInfo.SgprParts[0]; 1288 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1289 return {{ 1290 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1291 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1292 }}; 1293 } 1294 1295 InstructionSelector::ComplexRendererFns 1296 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 1297 MachineRegisterInfo &MRI = 1298 Root.getParent()->getParent()->getParent()->getRegInfo(); 1299 1300 SmallVector<GEPInfo, 4> AddrInfo; 1301 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 1302 1303 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1304 return None; 1305 1306 const GEPInfo &GEPInfo = AddrInfo[0]; 1307 unsigned PtrReg = GEPInfo.SgprParts[0]; 1308 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 1309 if (!isUInt<32>(EncodedImm)) 1310 return None; 1311 1312 return {{ 1313 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1314 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 1315 }}; 1316 } 1317 1318 InstructionSelector::ComplexRendererFns 1319 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 1320 MachineInstr *MI = Root.getParent(); 1321 MachineBasicBlock *MBB = MI->getParent(); 1322 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1323 1324 SmallVector<GEPInfo, 4> AddrInfo; 1325 getAddrModeInfo(*MI, MRI, AddrInfo); 1326 1327 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 1328 // then we can select all ptr + 32-bit offsets not just immediate offsets. 1329 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 1330 return None; 1331 1332 const GEPInfo &GEPInfo = AddrInfo[0]; 1333 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 1334 return None; 1335 1336 // If we make it this far we have a load with an 32-bit immediate offset. 1337 // It is OK to select this using a sgpr offset, because we have already 1338 // failed trying to select this load into one of the _IMM variants since 1339 // the _IMM Patterns are considered before the _SGPR patterns. 1340 unsigned PtrReg = GEPInfo.SgprParts[0]; 1341 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1342 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 1343 .addImm(GEPInfo.Imm); 1344 return {{ 1345 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 1346 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 1347 }}; 1348 } 1349