1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUInstructionSelector.h" 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 #define DEBUG_TYPE "amdgpu-isel" 35 36 using namespace llvm; 37 38 #define GET_GLOBALISEL_IMPL 39 #define AMDGPUSubtarget GCNSubtarget 40 #include "AMDGPUGenGlobalISel.inc" 41 #undef GET_GLOBALISEL_IMPL 42 #undef AMDGPUSubtarget 43 44 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 45 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 46 const AMDGPUTargetMachine &TM) 47 : InstructionSelector(), TII(*STI.getInstrInfo()), 48 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 49 STI(STI), 50 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 51 #define GET_GLOBALISEL_PREDICATES_INIT 52 #include "AMDGPUGenGlobalISel.inc" 53 #undef GET_GLOBALISEL_PREDICATES_INIT 54 #define GET_GLOBALISEL_TEMPORARIES_INIT 55 #include "AMDGPUGenGlobalISel.inc" 56 #undef GET_GLOBALISEL_TEMPORARIES_INIT 57 { 58 } 59 60 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 61 62 static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { 63 if (Reg == AMDGPU::SCC) 64 return true; 65 66 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 67 return false; 68 69 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 70 const TargetRegisterClass *RC = 71 RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 72 if (RC) 73 return RC->getID() == AMDGPU::SReg_32_XM0RegClassID && 74 MRI.getType(Reg).getSizeInBits() == 1; 75 76 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 77 return RB->getID() == AMDGPU::SCCRegBankID; 78 } 79 80 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 81 MachineBasicBlock *BB = I.getParent(); 82 MachineFunction *MF = BB->getParent(); 83 MachineRegisterInfo &MRI = MF->getRegInfo(); 84 I.setDesc(TII.get(TargetOpcode::COPY)); 85 86 // Special case for COPY from the scc register bank. The scc register bank 87 // is modeled using 32-bit sgprs. 88 const MachineOperand &Src = I.getOperand(1); 89 unsigned SrcReg = Src.getReg(); 90 if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) { 91 unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI); 92 unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI); 93 94 // We have a copy from a 32-bit to 64-bit register. This happens 95 // when we are selecting scc->vcc copies. 96 if (DstSize == 64) { 97 const DebugLoc &DL = I.getDebugLoc(); 98 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg()) 99 .addImm(0) 100 .addReg(SrcReg); 101 if (!MRI.getRegClassOrNull(SrcReg)) 102 MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); 103 I.eraseFromParent(); 104 return true; 105 } 106 } 107 108 for (const MachineOperand &MO : I.operands()) { 109 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 110 continue; 111 112 const TargetRegisterClass *RC = 113 TRI.getConstrainedRegClassForOperand(MO, MRI); 114 if (!RC) 115 continue; 116 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 117 } 118 return true; 119 } 120 121 MachineOperand 122 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 123 unsigned SubIdx) const { 124 125 MachineInstr *MI = MO.getParent(); 126 MachineBasicBlock *BB = MO.getParent()->getParent(); 127 MachineFunction *MF = BB->getParent(); 128 MachineRegisterInfo &MRI = MF->getRegInfo(); 129 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 130 131 if (MO.isReg()) { 132 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 133 unsigned Reg = MO.getReg(); 134 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 135 .addReg(Reg, 0, ComposedSubIdx); 136 137 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 138 MO.isKill(), MO.isDead(), MO.isUndef(), 139 MO.isEarlyClobber(), 0, MO.isDebug(), 140 MO.isInternalRead()); 141 } 142 143 assert(MO.isImm()); 144 145 APInt Imm(64, MO.getImm()); 146 147 switch (SubIdx) { 148 default: 149 llvm_unreachable("do not know to split immediate with this sub index."); 150 case AMDGPU::sub0: 151 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 152 case AMDGPU::sub1: 153 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 154 } 155 } 156 157 static int64_t getConstant(const MachineInstr *MI) { 158 return MI->getOperand(1).getCImm()->getSExtValue(); 159 } 160 161 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 162 MachineBasicBlock *BB = I.getParent(); 163 MachineFunction *MF = BB->getParent(); 164 MachineRegisterInfo &MRI = MF->getRegInfo(); 165 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 166 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 167 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 168 169 if (Size != 64) 170 return false; 171 172 DebugLoc DL = I.getDebugLoc(); 173 174 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 175 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 176 177 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 178 .add(Lo1) 179 .add(Lo2); 180 181 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 182 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 183 184 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 185 .add(Hi1) 186 .add(Hi2); 187 188 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 189 .addReg(DstLo) 190 .addImm(AMDGPU::sub0) 191 .addReg(DstHi) 192 .addImm(AMDGPU::sub1); 193 194 for (MachineOperand &MO : I.explicit_operands()) { 195 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 196 continue; 197 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 198 } 199 200 I.eraseFromParent(); 201 return true; 202 } 203 204 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 205 MachineBasicBlock *BB = I.getParent(); 206 MachineFunction *MF = BB->getParent(); 207 MachineRegisterInfo &MRI = MF->getRegInfo(); 208 assert(I.getOperand(2).getImm() % 32 == 0); 209 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); 210 const DebugLoc &DL = I.getDebugLoc(); 211 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), 212 I.getOperand(0).getReg()) 213 .addReg(I.getOperand(1).getReg(), 0, SubReg); 214 215 for (const MachineOperand &MO : Copy->operands()) { 216 const TargetRegisterClass *RC = 217 TRI.getConstrainedRegClassForOperand(MO, MRI); 218 if (!RC) 219 continue; 220 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 221 } 222 I.eraseFromParent(); 223 return true; 224 } 225 226 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 227 return selectG_ADD(I); 228 } 229 230 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 231 MachineBasicBlock *BB = I.getParent(); 232 MachineFunction *MF = BB->getParent(); 233 MachineRegisterInfo &MRI = MF->getRegInfo(); 234 const MachineOperand &MO = I.getOperand(0); 235 const TargetRegisterClass *RC = 236 TRI.getConstrainedRegClassForOperand(MO, MRI); 237 if (RC) 238 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 239 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 240 return true; 241 } 242 243 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 244 MachineBasicBlock *BB = I.getParent(); 245 MachineFunction *MF = BB->getParent(); 246 MachineRegisterInfo &MRI = MF->getRegInfo(); 247 unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); 248 DebugLoc DL = I.getDebugLoc(); 249 MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) 250 .addDef(I.getOperand(0).getReg()) 251 .addReg(I.getOperand(1).getReg()) 252 .addReg(I.getOperand(2).getReg()) 253 .addImm(SubReg); 254 255 for (const MachineOperand &MO : Ins->operands()) { 256 if (!MO.isReg()) 257 continue; 258 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 259 continue; 260 261 const TargetRegisterClass *RC = 262 TRI.getConstrainedRegClassForOperand(MO, MRI); 263 if (!RC) 264 continue; 265 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 266 } 267 I.eraseFromParent(); 268 return true; 269 } 270 271 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, 272 CodeGenCoverage &CoverageInfo) const { 273 unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); 274 switch (IntrinsicID) { 275 default: 276 break; 277 case Intrinsic::maxnum: 278 case Intrinsic::minnum: 279 case Intrinsic::amdgcn_cvt_pkrtz: 280 return selectImpl(I, CoverageInfo); 281 282 case Intrinsic::amdgcn_kernarg_segment_ptr: { 283 MachineFunction *MF = I.getParent()->getParent(); 284 MachineRegisterInfo &MRI = MF->getRegInfo(); 285 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 286 const ArgDescriptor *InputPtrReg; 287 const TargetRegisterClass *RC; 288 const DebugLoc &DL = I.getDebugLoc(); 289 290 std::tie(InputPtrReg, RC) 291 = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); 292 if (!InputPtrReg) 293 report_fatal_error("missing kernarg segment ptr"); 294 295 BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY)) 296 .add(I.getOperand(0)) 297 .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister())); 298 I.eraseFromParent(); 299 return true; 300 } 301 } 302 return false; 303 } 304 305 static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 306 assert(Size == 32 || Size == 64); 307 switch (P) { 308 default: 309 llvm_unreachable("Unknown condition code!"); 310 case CmpInst::ICMP_NE: 311 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; 312 case CmpInst::ICMP_EQ: 313 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; 314 case CmpInst::ICMP_SGT: 315 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; 316 case CmpInst::ICMP_SGE: 317 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; 318 case CmpInst::ICMP_SLT: 319 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; 320 case CmpInst::ICMP_SLE: 321 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; 322 case CmpInst::ICMP_UGT: 323 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; 324 case CmpInst::ICMP_UGE: 325 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; 326 case CmpInst::ICMP_ULT: 327 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; 328 case CmpInst::ICMP_ULE: 329 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; 330 } 331 } 332 333 static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) { 334 // FIXME: VI supports 64-bit comparse. 335 assert(Size == 32); 336 switch (P) { 337 default: 338 llvm_unreachable("Unknown condition code!"); 339 case CmpInst::ICMP_NE: 340 return AMDGPU::S_CMP_LG_U32; 341 case CmpInst::ICMP_EQ: 342 return AMDGPU::S_CMP_EQ_U32; 343 case CmpInst::ICMP_SGT: 344 return AMDGPU::S_CMP_GT_I32; 345 case CmpInst::ICMP_SGE: 346 return AMDGPU::S_CMP_GE_I32; 347 case CmpInst::ICMP_SLT: 348 return AMDGPU::S_CMP_LT_I32; 349 case CmpInst::ICMP_SLE: 350 return AMDGPU::S_CMP_LE_I32; 351 case CmpInst::ICMP_UGT: 352 return AMDGPU::S_CMP_GT_U32; 353 case CmpInst::ICMP_UGE: 354 return AMDGPU::S_CMP_GE_U32; 355 case CmpInst::ICMP_ULT: 356 return AMDGPU::S_CMP_LT_U32; 357 case CmpInst::ICMP_ULE: 358 return AMDGPU::S_CMP_LE_U32; 359 } 360 } 361 362 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { 363 MachineBasicBlock *BB = I.getParent(); 364 MachineFunction *MF = BB->getParent(); 365 MachineRegisterInfo &MRI = MF->getRegInfo(); 366 DebugLoc DL = I.getDebugLoc(); 367 368 unsigned SrcReg = I.getOperand(2).getReg(); 369 unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); 370 // FIXME: VI supports 64-bit compares. 371 assert(Size == 32); 372 373 unsigned CCReg = I.getOperand(0).getReg(); 374 if (isSCC(CCReg, MRI)) { 375 unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); 376 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 377 .add(I.getOperand(2)) 378 .add(I.getOperand(3)); 379 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 380 .addReg(AMDGPU::SCC); 381 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) | 382 constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI); 383 I.eraseFromParent(); 384 return Ret; 385 } 386 387 assert(Size == 32 || Size == 64); 388 unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size); 389 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 390 I.getOperand(0).getReg()) 391 .add(I.getOperand(2)) 392 .add(I.getOperand(3)); 393 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 394 AMDGPU::SReg_64RegClass, MRI); 395 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 396 I.eraseFromParent(); 397 return Ret; 398 } 399 400 static MachineInstr * 401 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, 402 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, 403 unsigned VM, bool Compr, unsigned Enabled, bool Done) { 404 const DebugLoc &DL = Insert->getDebugLoc(); 405 MachineBasicBlock &BB = *Insert->getParent(); 406 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP; 407 return BuildMI(BB, Insert, DL, TII.get(Opcode)) 408 .addImm(Tgt) 409 .addReg(Reg0) 410 .addReg(Reg1) 411 .addReg(Reg2) 412 .addReg(Reg3) 413 .addImm(VM) 414 .addImm(Compr) 415 .addImm(Enabled); 416 } 417 418 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 419 MachineInstr &I, 420 CodeGenCoverage &CoverageInfo) const { 421 MachineBasicBlock *BB = I.getParent(); 422 MachineFunction *MF = BB->getParent(); 423 MachineRegisterInfo &MRI = MF->getRegInfo(); 424 425 unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); 426 switch (IntrinsicID) { 427 case Intrinsic::amdgcn_exp: { 428 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 429 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 430 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); 431 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); 432 433 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), 434 I.getOperand(4).getReg(), 435 I.getOperand(5).getReg(), 436 I.getOperand(6).getReg(), 437 VM, false, Enabled, Done); 438 439 I.eraseFromParent(); 440 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 441 } 442 case Intrinsic::amdgcn_exp_compr: { 443 const DebugLoc &DL = I.getDebugLoc(); 444 int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); 445 int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); 446 unsigned Reg0 = I.getOperand(3).getReg(); 447 unsigned Reg1 = I.getOperand(4).getReg(); 448 unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 449 int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); 450 int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); 451 452 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); 453 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, 454 true, Enabled, Done); 455 456 I.eraseFromParent(); 457 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI); 458 } 459 } 460 return false; 461 } 462 463 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 464 MachineBasicBlock *BB = I.getParent(); 465 MachineFunction *MF = BB->getParent(); 466 MachineRegisterInfo &MRI = MF->getRegInfo(); 467 const DebugLoc &DL = I.getDebugLoc(); 468 469 unsigned DstReg = I.getOperand(0).getReg(); 470 unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); 471 assert(Size == 32 || Size == 64); 472 const MachineOperand &CCOp = I.getOperand(1); 473 unsigned CCReg = CCOp.getReg(); 474 if (isSCC(CCReg, MRI)) { 475 unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 : 476 AMDGPU::S_CSELECT_B64; 477 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 478 .addReg(CCReg); 479 480 // The generic constrainSelectedInstRegOperands doesn't work for the scc register 481 // bank, because it does not cover the register class that we used to represent 482 // for it. So we need to manually set the register class here. 483 if (!MRI.getRegClassOrNull(CCReg)) 484 MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); 485 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 486 .add(I.getOperand(2)) 487 .add(I.getOperand(3)); 488 489 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) | 490 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 491 I.eraseFromParent(); 492 return Ret; 493 } 494 495 assert(Size == 32); 496 // FIXME: Support 64-bit select 497 MachineInstr *Select = 498 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 499 .addImm(0) 500 .add(I.getOperand(3)) 501 .addImm(0) 502 .add(I.getOperand(2)) 503 .add(I.getOperand(1)); 504 505 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 506 I.eraseFromParent(); 507 return Ret; 508 } 509 510 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 511 MachineBasicBlock *BB = I.getParent(); 512 MachineFunction *MF = BB->getParent(); 513 MachineRegisterInfo &MRI = MF->getRegInfo(); 514 DebugLoc DL = I.getDebugLoc(); 515 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 516 unsigned Opcode; 517 518 // FIXME: Select store instruction based on address space 519 switch (StoreSize) { 520 default: 521 return false; 522 case 32: 523 Opcode = AMDGPU::FLAT_STORE_DWORD; 524 break; 525 case 64: 526 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 527 break; 528 case 96: 529 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 530 break; 531 case 128: 532 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 533 break; 534 } 535 536 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 537 .add(I.getOperand(1)) 538 .add(I.getOperand(0)) 539 .addImm(0) // offset 540 .addImm(0) // glc 541 .addImm(0) // slc 542 .addImm(0); // dlc 543 544 545 // Now that we selected an opcode, we need to constrain the register 546 // operands to use appropriate classes. 547 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 548 549 I.eraseFromParent(); 550 return Ret; 551 } 552 553 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 554 MachineBasicBlock *BB = I.getParent(); 555 MachineFunction *MF = BB->getParent(); 556 MachineRegisterInfo &MRI = MF->getRegInfo(); 557 MachineOperand &ImmOp = I.getOperand(1); 558 559 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 560 if (ImmOp.isFPImm()) { 561 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 562 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 563 } else if (ImmOp.isCImm()) { 564 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 565 } 566 567 unsigned DstReg = I.getOperand(0).getReg(); 568 unsigned Size; 569 bool IsSgpr; 570 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 571 if (RB) { 572 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 573 Size = MRI.getType(DstReg).getSizeInBits(); 574 } else { 575 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 576 IsSgpr = TRI.isSGPRClass(RC); 577 Size = TRI.getRegSizeInBits(*RC); 578 } 579 580 if (Size != 32 && Size != 64) 581 return false; 582 583 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 584 if (Size == 32) { 585 I.setDesc(TII.get(Opcode)); 586 I.addImplicitDefUseOperands(*MF); 587 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 588 } 589 590 DebugLoc DL = I.getDebugLoc(); 591 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 592 &AMDGPU::VGPR_32RegClass; 593 unsigned LoReg = MRI.createVirtualRegister(RC); 594 unsigned HiReg = MRI.createVirtualRegister(RC); 595 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 596 597 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 598 .addImm(Imm.trunc(32).getZExtValue()); 599 600 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 601 .addImm(Imm.ashr(32).getZExtValue()); 602 603 const MachineInstr *RS = 604 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 605 .addReg(LoReg) 606 .addImm(AMDGPU::sub0) 607 .addReg(HiReg) 608 .addImm(AMDGPU::sub1); 609 610 // We can't call constrainSelectedInstRegOperands here, because it doesn't 611 // work for target independent opcodes 612 I.eraseFromParent(); 613 const TargetRegisterClass *DstRC = 614 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 615 if (!DstRC) 616 return true; 617 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 618 } 619 620 static bool isConstant(const MachineInstr &MI) { 621 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 622 } 623 624 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 625 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 626 627 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 628 629 assert(PtrMI); 630 631 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 632 return; 633 634 GEPInfo GEPInfo(*PtrMI); 635 636 for (unsigned i = 1, e = 3; i < e; ++i) { 637 const MachineOperand &GEPOp = PtrMI->getOperand(i); 638 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 639 assert(OpDef); 640 if (isConstant(*OpDef)) { 641 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 642 // are lacking other optimizations. 643 assert(GEPInfo.Imm == 0); 644 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 645 continue; 646 } 647 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 648 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 649 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 650 else 651 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 652 } 653 654 AddrInfo.push_back(GEPInfo); 655 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 656 } 657 658 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 659 if (!MI.hasOneMemOperand()) 660 return false; 661 662 const MachineMemOperand *MMO = *MI.memoperands_begin(); 663 const Value *Ptr = MMO->getValue(); 664 665 // UndefValue means this is a load of a kernel input. These are uniform. 666 // Sometimes LDS instructions have constant pointers. 667 // If Ptr is null, then that means this mem operand contains a 668 // PseudoSourceValue like GOT. 669 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 670 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 671 return true; 672 673 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 674 return true; 675 676 const Instruction *I = dyn_cast<Instruction>(Ptr); 677 return I && I->getMetadata("amdgpu.uniform"); 678 } 679 680 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 681 for (const GEPInfo &GEPInfo : AddrInfo) { 682 if (!GEPInfo.VgprParts.empty()) 683 return true; 684 } 685 return false; 686 } 687 688 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 689 MachineBasicBlock *BB = I.getParent(); 690 MachineFunction *MF = BB->getParent(); 691 MachineRegisterInfo &MRI = MF->getRegInfo(); 692 DebugLoc DL = I.getDebugLoc(); 693 unsigned DstReg = I.getOperand(0).getReg(); 694 unsigned PtrReg = I.getOperand(1).getReg(); 695 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 696 unsigned Opcode; 697 698 SmallVector<GEPInfo, 4> AddrInfo; 699 700 getAddrModeInfo(I, MRI, AddrInfo); 701 702 switch (LoadSize) { 703 default: 704 llvm_unreachable("Load size not supported\n"); 705 case 32: 706 Opcode = AMDGPU::FLAT_LOAD_DWORD; 707 break; 708 case 64: 709 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 710 break; 711 } 712 713 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 714 .add(I.getOperand(0)) 715 .addReg(PtrReg) 716 .addImm(0) // offset 717 .addImm(0) // glc 718 .addImm(0) // slc 719 .addImm(0); // dlc 720 721 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 722 I.eraseFromParent(); 723 return Ret; 724 } 725 726 bool AMDGPUInstructionSelector::select(MachineInstr &I, 727 CodeGenCoverage &CoverageInfo) const { 728 729 if (!isPreISelGenericOpcode(I.getOpcode())) { 730 if (I.isCopy()) 731 return selectCOPY(I); 732 return true; 733 } 734 735 switch (I.getOpcode()) { 736 default: 737 return selectImpl(I, CoverageInfo); 738 case TargetOpcode::G_ADD: 739 return selectG_ADD(I); 740 case TargetOpcode::G_INTTOPTR: 741 case TargetOpcode::G_BITCAST: 742 return selectCOPY(I); 743 case TargetOpcode::G_CONSTANT: 744 case TargetOpcode::G_FCONSTANT: 745 return selectG_CONSTANT(I); 746 case TargetOpcode::G_EXTRACT: 747 return selectG_EXTRACT(I); 748 case TargetOpcode::G_GEP: 749 return selectG_GEP(I); 750 case TargetOpcode::G_IMPLICIT_DEF: 751 return selectG_IMPLICIT_DEF(I); 752 case TargetOpcode::G_INSERT: 753 return selectG_INSERT(I); 754 case TargetOpcode::G_INTRINSIC: 755 return selectG_INTRINSIC(I, CoverageInfo); 756 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 757 return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); 758 case TargetOpcode::G_ICMP: 759 return selectG_ICMP(I); 760 case TargetOpcode::G_LOAD: 761 if (selectImpl(I, CoverageInfo)) 762 return true; 763 return selectG_LOAD(I); 764 case TargetOpcode::G_SELECT: 765 return selectG_SELECT(I); 766 case TargetOpcode::G_STORE: 767 return selectG_STORE(I); 768 } 769 return false; 770 } 771 772 InstructionSelector::ComplexRendererFns 773 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 774 return {{ 775 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 776 }}; 777 778 } 779 780 /// 781 /// This will select either an SGPR or VGPR operand and will save us from 782 /// having to write an extra tablegen pattern. 783 InstructionSelector::ComplexRendererFns 784 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 785 return {{ 786 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 787 }}; 788 } 789 790 InstructionSelector::ComplexRendererFns 791 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 792 return {{ 793 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 794 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 795 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 796 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 797 }}; 798 } 799 InstructionSelector::ComplexRendererFns 800 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 801 return {{ 802 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 803 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 804 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 805 }}; 806 } 807 808 InstructionSelector::ComplexRendererFns 809 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 810 return {{ 811 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 812 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods 813 }}; 814 } 815 816 InstructionSelector::ComplexRendererFns 817 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 818 MachineRegisterInfo &MRI = 819 Root.getParent()->getParent()->getParent()->getRegInfo(); 820 821 SmallVector<GEPInfo, 4> AddrInfo; 822 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 823 824 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 825 return None; 826 827 const GEPInfo &GEPInfo = AddrInfo[0]; 828 829 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) 830 return None; 831 832 unsigned PtrReg = GEPInfo.SgprParts[0]; 833 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 834 return {{ 835 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 836 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 837 }}; 838 } 839 840 InstructionSelector::ComplexRendererFns 841 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 842 MachineRegisterInfo &MRI = 843 Root.getParent()->getParent()->getParent()->getRegInfo(); 844 845 SmallVector<GEPInfo, 4> AddrInfo; 846 getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); 847 848 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 849 return None; 850 851 const GEPInfo &GEPInfo = AddrInfo[0]; 852 unsigned PtrReg = GEPInfo.SgprParts[0]; 853 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); 854 if (!isUInt<32>(EncodedImm)) 855 return None; 856 857 return {{ 858 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 859 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } 860 }}; 861 } 862 863 InstructionSelector::ComplexRendererFns 864 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 865 MachineInstr *MI = Root.getParent(); 866 MachineBasicBlock *MBB = MI->getParent(); 867 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 868 869 SmallVector<GEPInfo, 4> AddrInfo; 870 getAddrModeInfo(*MI, MRI, AddrInfo); 871 872 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 873 // then we can select all ptr + 32-bit offsets not just immediate offsets. 874 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 875 return None; 876 877 const GEPInfo &GEPInfo = AddrInfo[0]; 878 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) 879 return None; 880 881 // If we make it this far we have a load with an 32-bit immediate offset. 882 // It is OK to select this using a sgpr offset, because we have already 883 // failed trying to select this load into one of the _IMM variants since 884 // the _IMM Patterns are considered before the _SGPR patterns. 885 unsigned PtrReg = GEPInfo.SgprParts[0]; 886 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 887 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) 888 .addImm(GEPInfo.Imm); 889 return {{ 890 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 891 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } 892 }}; 893 } 894