1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "AMDGPUTargetMachine.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 #define DEBUG_TYPE "amdgpu-isel" 35 36 using namespace llvm; 37 38 #define GET_GLOBALISEL_IMPL 39 #include "AMDGPUGenGlobalISel.inc" 40 #undef GET_GLOBALISEL_IMPL 41 42 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 43 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, 44 const AMDGPUTargetMachine &TM) 45 : InstructionSelector(), TII(*STI.getInstrInfo()), 46 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 47 STI(STI), 48 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 49 #define GET_GLOBALISEL_PREDICATES_INIT 50 #include "AMDGPUGenGlobalISel.inc" 51 #undef GET_GLOBALISEL_PREDICATES_INIT 52 #define GET_GLOBALISEL_TEMPORARIES_INIT 53 #include "AMDGPUGenGlobalISel.inc" 54 #undef GET_GLOBALISEL_TEMPORARIES_INIT 55 ,AMDGPUASI(STI.getAMDGPUAS()) 56 { 57 } 58 59 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 60 61 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 62 MachineBasicBlock *BB = I.getParent(); 63 MachineFunction *MF = BB->getParent(); 64 MachineRegisterInfo &MRI = MF->getRegInfo(); 65 I.setDesc(TII.get(TargetOpcode::COPY)); 66 for (const MachineOperand &MO : I.operands()) { 67 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 68 continue; 69 70 const TargetRegisterClass *RC = 71 TRI.getConstrainedRegClassForOperand(MO, MRI); 72 if (!RC) 73 continue; 74 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 75 } 76 return true; 77 } 78 79 MachineOperand 80 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 81 unsigned SubIdx) const { 82 83 MachineInstr *MI = MO.getParent(); 84 MachineBasicBlock *BB = MO.getParent()->getParent(); 85 MachineFunction *MF = BB->getParent(); 86 MachineRegisterInfo &MRI = MF->getRegInfo(); 87 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 88 89 if (MO.isReg()) { 90 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 91 unsigned Reg = MO.getReg(); 92 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 93 .addReg(Reg, 0, ComposedSubIdx); 94 95 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 96 MO.isKill(), MO.isDead(), MO.isUndef(), 97 MO.isEarlyClobber(), 0, MO.isDebug(), 98 MO.isInternalRead()); 99 } 100 101 assert(MO.isImm()); 102 103 APInt Imm(64, MO.getImm()); 104 105 switch (SubIdx) { 106 default: 107 llvm_unreachable("do not know to split immediate with this sub index."); 108 case AMDGPU::sub0: 109 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 110 case AMDGPU::sub1: 111 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 112 } 113 } 114 115 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 116 MachineBasicBlock *BB = I.getParent(); 117 MachineFunction *MF = BB->getParent(); 118 MachineRegisterInfo &MRI = MF->getRegInfo(); 119 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 120 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 121 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 122 123 if (Size != 64) 124 return false; 125 126 DebugLoc DL = I.getDebugLoc(); 127 128 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 129 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 130 131 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 132 .add(Lo1) 133 .add(Lo2); 134 135 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 136 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 137 138 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 139 .add(Hi1) 140 .add(Hi2); 141 142 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 143 .addReg(DstLo) 144 .addImm(AMDGPU::sub0) 145 .addReg(DstHi) 146 .addImm(AMDGPU::sub1); 147 148 for (MachineOperand &MO : I.explicit_operands()) { 149 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 150 continue; 151 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 152 } 153 154 I.eraseFromParent(); 155 return true; 156 } 157 158 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 159 return selectG_ADD(I); 160 } 161 162 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 163 MachineBasicBlock *BB = I.getParent(); 164 MachineFunction *MF = BB->getParent(); 165 MachineRegisterInfo &MRI = MF->getRegInfo(); 166 DebugLoc DL = I.getDebugLoc(); 167 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 168 unsigned Opcode; 169 170 // FIXME: Select store instruction based on address space 171 switch (StoreSize) { 172 default: 173 return false; 174 case 32: 175 Opcode = AMDGPU::FLAT_STORE_DWORD; 176 break; 177 case 64: 178 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 179 break; 180 case 96: 181 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 182 break; 183 case 128: 184 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 185 break; 186 } 187 188 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 189 .add(I.getOperand(1)) 190 .add(I.getOperand(0)) 191 .addImm(0) // offset 192 .addImm(0) // glc 193 .addImm(0); // slc 194 195 196 // Now that we selected an opcode, we need to constrain the register 197 // operands to use appropriate classes. 198 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 199 200 I.eraseFromParent(); 201 return Ret; 202 } 203 204 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 205 MachineBasicBlock *BB = I.getParent(); 206 MachineFunction *MF = BB->getParent(); 207 MachineRegisterInfo &MRI = MF->getRegInfo(); 208 MachineOperand &ImmOp = I.getOperand(1); 209 210 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 211 if (ImmOp.isFPImm()) { 212 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 213 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 214 } else if (ImmOp.isCImm()) { 215 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 216 } 217 218 unsigned DstReg = I.getOperand(0).getReg(); 219 unsigned Size; 220 bool IsSgpr; 221 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 222 if (RB) { 223 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 224 Size = MRI.getType(DstReg).getSizeInBits(); 225 } else { 226 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 227 IsSgpr = TRI.isSGPRClass(RC); 228 Size = TRI.getRegSizeInBits(*RC); 229 } 230 231 if (Size != 32 && Size != 64) 232 return false; 233 234 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 235 if (Size == 32) { 236 I.setDesc(TII.get(Opcode)); 237 I.addImplicitDefUseOperands(*MF); 238 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 239 } 240 241 DebugLoc DL = I.getDebugLoc(); 242 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 243 &AMDGPU::VGPR_32RegClass; 244 unsigned LoReg = MRI.createVirtualRegister(RC); 245 unsigned HiReg = MRI.createVirtualRegister(RC); 246 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 247 248 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 249 .addImm(Imm.trunc(32).getZExtValue()); 250 251 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 252 .addImm(Imm.ashr(32).getZExtValue()); 253 254 const MachineInstr *RS = 255 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 256 .addReg(LoReg) 257 .addImm(AMDGPU::sub0) 258 .addReg(HiReg) 259 .addImm(AMDGPU::sub1); 260 261 // We can't call constrainSelectedInstRegOperands here, because it doesn't 262 // work for target independent opcodes 263 I.eraseFromParent(); 264 const TargetRegisterClass *DstRC = 265 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 266 if (!DstRC) 267 return true; 268 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 269 } 270 271 static bool isConstant(const MachineInstr &MI) { 272 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 273 } 274 275 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 276 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 277 278 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 279 280 assert(PtrMI); 281 282 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 283 return; 284 285 GEPInfo GEPInfo(*PtrMI); 286 287 for (unsigned i = 1, e = 3; i < e; ++i) { 288 const MachineOperand &GEPOp = PtrMI->getOperand(i); 289 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 290 assert(OpDef); 291 if (isConstant(*OpDef)) { 292 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 293 // are lacking other optimizations. 294 assert(GEPInfo.Imm == 0); 295 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 296 continue; 297 } 298 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 299 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 300 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 301 else 302 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 303 } 304 305 AddrInfo.push_back(GEPInfo); 306 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 307 } 308 309 static bool isInstrUniform(const MachineInstr &MI) { 310 if (!MI.hasOneMemOperand()) 311 return false; 312 313 const MachineMemOperand *MMO = *MI.memoperands_begin(); 314 const Value *Ptr = MMO->getValue(); 315 316 // UndefValue means this is a load of a kernel input. These are uniform. 317 // Sometimes LDS instructions have constant pointers. 318 // If Ptr is null, then that means this mem operand contains a 319 // PseudoSourceValue like GOT. 320 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 321 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 322 return true; 323 324 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 325 return true; 326 327 const Instruction *I = dyn_cast<Instruction>(Ptr); 328 return I && I->getMetadata("amdgpu.uniform"); 329 } 330 331 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 332 333 if (LoadSize == 32) 334 return BaseOpcode; 335 336 switch (BaseOpcode) { 337 case AMDGPU::S_LOAD_DWORD_IMM: 338 switch (LoadSize) { 339 case 64: 340 return AMDGPU::S_LOAD_DWORDX2_IMM; 341 case 128: 342 return AMDGPU::S_LOAD_DWORDX4_IMM; 343 case 256: 344 return AMDGPU::S_LOAD_DWORDX8_IMM; 345 case 512: 346 return AMDGPU::S_LOAD_DWORDX16_IMM; 347 } 348 break; 349 case AMDGPU::S_LOAD_DWORD_IMM_ci: 350 switch (LoadSize) { 351 case 64: 352 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 353 case 128: 354 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 355 case 256: 356 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 357 case 512: 358 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 359 } 360 break; 361 case AMDGPU::S_LOAD_DWORD_SGPR: 362 switch (LoadSize) { 363 case 64: 364 return AMDGPU::S_LOAD_DWORDX2_SGPR; 365 case 128: 366 return AMDGPU::S_LOAD_DWORDX4_SGPR; 367 case 256: 368 return AMDGPU::S_LOAD_DWORDX8_SGPR; 369 case 512: 370 return AMDGPU::S_LOAD_DWORDX16_SGPR; 371 } 372 break; 373 } 374 llvm_unreachable("Invalid base smrd opcode or size"); 375 } 376 377 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 378 for (const GEPInfo &GEPInfo : AddrInfo) { 379 if (!GEPInfo.VgprParts.empty()) 380 return true; 381 } 382 return false; 383 } 384 385 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 386 ArrayRef<GEPInfo> AddrInfo) const { 387 388 if (!I.hasOneMemOperand()) 389 return false; 390 391 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS && 392 (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT) 393 return false; 394 395 if (!isInstrUniform(I)) 396 return false; 397 398 if (hasVgprParts(AddrInfo)) 399 return false; 400 401 MachineBasicBlock *BB = I.getParent(); 402 MachineFunction *MF = BB->getParent(); 403 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 404 MachineRegisterInfo &MRI = MF->getRegInfo(); 405 unsigned DstReg = I.getOperand(0).getReg(); 406 const DebugLoc &DL = I.getDebugLoc(); 407 unsigned Opcode; 408 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 409 410 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 411 412 const GEPInfo &GEPInfo = AddrInfo[0]; 413 414 unsigned PtrReg = GEPInfo.SgprParts[0]; 415 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 416 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 417 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 418 419 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 420 .addReg(PtrReg) 421 .addImm(EncodedImm) 422 .addImm(0); // glc 423 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 424 } 425 426 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 427 isUInt<32>(EncodedImm)) { 428 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 429 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 430 .addReg(PtrReg) 431 .addImm(EncodedImm) 432 .addImm(0); // glc 433 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 434 } 435 436 if (isUInt<32>(GEPInfo.Imm)) { 437 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 438 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 439 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 440 .addImm(GEPInfo.Imm); 441 442 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 443 .addReg(PtrReg) 444 .addReg(OffsetReg) 445 .addImm(0); // glc 446 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 447 } 448 } 449 450 unsigned PtrReg = I.getOperand(1).getReg(); 451 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 452 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 453 .addReg(PtrReg) 454 .addImm(0) 455 .addImm(0); // glc 456 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 457 } 458 459 460 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 461 MachineBasicBlock *BB = I.getParent(); 462 MachineFunction *MF = BB->getParent(); 463 MachineRegisterInfo &MRI = MF->getRegInfo(); 464 DebugLoc DL = I.getDebugLoc(); 465 unsigned DstReg = I.getOperand(0).getReg(); 466 unsigned PtrReg = I.getOperand(1).getReg(); 467 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 468 unsigned Opcode; 469 470 SmallVector<GEPInfo, 4> AddrInfo; 471 472 getAddrModeInfo(I, MRI, AddrInfo); 473 474 if (selectSMRD(I, AddrInfo)) { 475 I.eraseFromParent(); 476 return true; 477 } 478 479 switch (LoadSize) { 480 default: 481 llvm_unreachable("Load size not supported\n"); 482 case 32: 483 Opcode = AMDGPU::FLAT_LOAD_DWORD; 484 break; 485 case 64: 486 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 487 break; 488 } 489 490 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 491 .add(I.getOperand(0)) 492 .addReg(PtrReg) 493 .addImm(0) // offset 494 .addImm(0) // glc 495 .addImm(0); // slc 496 497 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 498 I.eraseFromParent(); 499 return Ret; 500 } 501 502 bool AMDGPUInstructionSelector::select(MachineInstr &I, 503 CodeGenCoverage &CoverageInfo) const { 504 505 if (!isPreISelGenericOpcode(I.getOpcode())) 506 return true; 507 508 switch (I.getOpcode()) { 509 default: 510 break; 511 case TargetOpcode::G_FPTOUI: 512 case TargetOpcode::G_OR: 513 return selectImpl(I, CoverageInfo); 514 case TargetOpcode::G_ADD: 515 return selectG_ADD(I); 516 case TargetOpcode::G_BITCAST: 517 return selectCOPY(I); 518 case TargetOpcode::G_CONSTANT: 519 case TargetOpcode::G_FCONSTANT: 520 return selectG_CONSTANT(I); 521 case TargetOpcode::G_GEP: 522 return selectG_GEP(I); 523 case TargetOpcode::G_LOAD: 524 return selectG_LOAD(I); 525 case TargetOpcode::G_STORE: 526 return selectG_STORE(I); 527 } 528 return false; 529 } 530 531 /// 532 /// This will select either an SGPR or VGPR operand and will save us from 533 /// having to write an extra tablegen pattern. 534 InstructionSelector::ComplexRendererFns 535 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 536 return {{ 537 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 538 }}; 539 } 540 541 InstructionSelector::ComplexRendererFns 542 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 543 return {{ 544 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 545 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 546 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 547 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 548 }}; 549 } 550