1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements the targeting of the InstructionSelector class for 11 /// AMDGPU. 12 /// \todo This should be generated by TableGen. 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstructionSelector.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterBankInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "AMDGPUTargetMachine.h" 21 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/Utils.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/IR/Type.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 #define DEBUG_TYPE "amdgpu-isel" 34 35 using namespace llvm; 36 37 #define GET_GLOBALISEL_IMPL 38 #include "AMDGPUGenGlobalISel.inc" 39 #undef GET_GLOBALISEL_IMPL 40 41 AMDGPUInstructionSelector::AMDGPUInstructionSelector( 42 const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, 43 const AMDGPUTargetMachine &TM) 44 : InstructionSelector(), TII(*STI.getInstrInfo()), 45 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 46 STI(STI), 47 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 48 #define GET_GLOBALISEL_PREDICATES_INIT 49 #include "AMDGPUGenGlobalISel.inc" 50 #undef GET_GLOBALISEL_PREDICATES_INIT 51 #define GET_GLOBALISEL_TEMPORARIES_INIT 52 #include "AMDGPUGenGlobalISel.inc" 53 #undef GET_GLOBALISEL_TEMPORARIES_INIT 54 ,AMDGPUASI(STI.getAMDGPUAS()) 55 { 56 } 57 58 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 59 60 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 61 MachineBasicBlock *BB = I.getParent(); 62 MachineFunction *MF = BB->getParent(); 63 MachineRegisterInfo &MRI = MF->getRegInfo(); 64 I.setDesc(TII.get(TargetOpcode::COPY)); 65 for (const MachineOperand &MO : I.operands()) { 66 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 67 continue; 68 69 const TargetRegisterClass *RC = 70 TRI.getConstrainedRegClassForOperand(MO, MRI); 71 if (!RC) 72 continue; 73 RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); 74 } 75 return true; 76 } 77 78 MachineOperand 79 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 80 unsigned SubIdx) const { 81 82 MachineInstr *MI = MO.getParent(); 83 MachineBasicBlock *BB = MO.getParent()->getParent(); 84 MachineFunction *MF = BB->getParent(); 85 MachineRegisterInfo &MRI = MF->getRegInfo(); 86 unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 87 88 if (MO.isReg()) { 89 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 90 unsigned Reg = MO.getReg(); 91 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 92 .addReg(Reg, 0, ComposedSubIdx); 93 94 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 95 MO.isKill(), MO.isDead(), MO.isUndef(), 96 MO.isEarlyClobber(), 0, MO.isDebug(), 97 MO.isInternalRead()); 98 } 99 100 assert(MO.isImm()); 101 102 APInt Imm(64, MO.getImm()); 103 104 switch (SubIdx) { 105 default: 106 llvm_unreachable("do not know to split immediate with this sub index."); 107 case AMDGPU::sub0: 108 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 109 case AMDGPU::sub1: 110 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 111 } 112 } 113 114 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { 115 MachineBasicBlock *BB = I.getParent(); 116 MachineFunction *MF = BB->getParent(); 117 MachineRegisterInfo &MRI = MF->getRegInfo(); 118 unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 119 unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 120 unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 121 122 if (Size != 64) 123 return false; 124 125 DebugLoc DL = I.getDebugLoc(); 126 127 MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); 128 MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); 129 130 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 131 .add(Lo1) 132 .add(Lo2); 133 134 MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); 135 MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); 136 137 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 138 .add(Hi1) 139 .add(Hi2); 140 141 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) 142 .addReg(DstLo) 143 .addImm(AMDGPU::sub0) 144 .addReg(DstHi) 145 .addImm(AMDGPU::sub1); 146 147 for (MachineOperand &MO : I.explicit_operands()) { 148 if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 149 continue; 150 RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); 151 } 152 153 I.eraseFromParent(); 154 return true; 155 } 156 157 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { 158 return selectG_ADD(I); 159 } 160 161 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { 162 MachineBasicBlock *BB = I.getParent(); 163 MachineFunction *MF = BB->getParent(); 164 MachineRegisterInfo &MRI = MF->getRegInfo(); 165 DebugLoc DL = I.getDebugLoc(); 166 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); 167 unsigned Opcode; 168 169 // FIXME: Select store instruction based on address space 170 switch (StoreSize) { 171 default: 172 return false; 173 case 32: 174 Opcode = AMDGPU::FLAT_STORE_DWORD; 175 break; 176 case 64: 177 Opcode = AMDGPU::FLAT_STORE_DWORDX2; 178 break; 179 case 96: 180 Opcode = AMDGPU::FLAT_STORE_DWORDX3; 181 break; 182 case 128: 183 Opcode = AMDGPU::FLAT_STORE_DWORDX4; 184 break; 185 } 186 187 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 188 .add(I.getOperand(1)) 189 .add(I.getOperand(0)) 190 .addImm(0) // offset 191 .addImm(0) // glc 192 .addImm(0); // slc 193 194 195 // Now that we selected an opcode, we need to constrain the register 196 // operands to use appropriate classes. 197 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 198 199 I.eraseFromParent(); 200 return Ret; 201 } 202 203 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 204 MachineBasicBlock *BB = I.getParent(); 205 MachineFunction *MF = BB->getParent(); 206 MachineRegisterInfo &MRI = MF->getRegInfo(); 207 MachineOperand &ImmOp = I.getOperand(1); 208 209 // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 210 if (ImmOp.isFPImm()) { 211 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 212 ImmOp.ChangeToImmediate(Imm.getZExtValue()); 213 } else if (ImmOp.isCImm()) { 214 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); 215 } 216 217 unsigned DstReg = I.getOperand(0).getReg(); 218 unsigned Size; 219 bool IsSgpr; 220 const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); 221 if (RB) { 222 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; 223 Size = MRI.getType(DstReg).getSizeInBits(); 224 } else { 225 const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); 226 IsSgpr = TRI.isSGPRClass(RC); 227 Size = RC->MC->getPhysRegSize() * 8; 228 } 229 230 if (Size != 32 && Size != 64) 231 return false; 232 233 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 234 if (Size == 32) { 235 I.setDesc(TII.get(Opcode)); 236 I.addImplicitDefUseOperands(*MF); 237 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 238 } 239 240 DebugLoc DL = I.getDebugLoc(); 241 const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : 242 &AMDGPU::VGPR_32RegClass; 243 unsigned LoReg = MRI.createVirtualRegister(RC); 244 unsigned HiReg = MRI.createVirtualRegister(RC); 245 const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); 246 247 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 248 .addImm(Imm.trunc(32).getZExtValue()); 249 250 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 251 .addImm(Imm.ashr(32).getZExtValue()); 252 253 const MachineInstr *RS = 254 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 255 .addReg(LoReg) 256 .addImm(AMDGPU::sub0) 257 .addReg(HiReg) 258 .addImm(AMDGPU::sub1); 259 260 // We can't call constrainSelectedInstRegOperands here, because it doesn't 261 // work for target independent opcodes 262 I.eraseFromParent(); 263 const TargetRegisterClass *DstRC = 264 TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); 265 if (!DstRC) 266 return true; 267 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 268 } 269 270 static bool isConstant(const MachineInstr &MI) { 271 return MI.getOpcode() == TargetOpcode::G_CONSTANT; 272 } 273 274 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 275 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 276 277 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); 278 279 assert(PtrMI); 280 281 if (PtrMI->getOpcode() != TargetOpcode::G_GEP) 282 return; 283 284 GEPInfo GEPInfo(*PtrMI); 285 286 for (unsigned i = 1, e = 3; i < e; ++i) { 287 const MachineOperand &GEPOp = PtrMI->getOperand(i); 288 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 289 assert(OpDef); 290 if (isConstant(*OpDef)) { 291 // FIXME: Is it possible to have multiple Imm parts? Maybe if we 292 // are lacking other optimizations. 293 assert(GEPInfo.Imm == 0); 294 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 295 continue; 296 } 297 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 298 if (OpBank->getID() == AMDGPU::SGPRRegBankID) 299 GEPInfo.SgprParts.push_back(GEPOp.getReg()); 300 else 301 GEPInfo.VgprParts.push_back(GEPOp.getReg()); 302 } 303 304 AddrInfo.push_back(GEPInfo); 305 getAddrModeInfo(*PtrMI, MRI, AddrInfo); 306 } 307 308 static bool isInstrUniform(const MachineInstr &MI) { 309 if (!MI.hasOneMemOperand()) 310 return false; 311 312 const MachineMemOperand *MMO = *MI.memoperands_begin(); 313 const Value *Ptr = MMO->getValue(); 314 315 // UndefValue means this is a load of a kernel input. These are uniform. 316 // Sometimes LDS instructions have constant pointers. 317 // If Ptr is null, then that means this mem operand contains a 318 // PseudoSourceValue like GOT. 319 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 320 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 321 return true; 322 323 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 324 return true; 325 326 const Instruction *I = dyn_cast<Instruction>(Ptr); 327 return I && I->getMetadata("amdgpu.uniform"); 328 } 329 330 static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { 331 332 if (LoadSize == 32) 333 return BaseOpcode; 334 335 switch (BaseOpcode) { 336 case AMDGPU::S_LOAD_DWORD_IMM: 337 switch (LoadSize) { 338 case 64: 339 return AMDGPU::S_LOAD_DWORDX2_IMM; 340 case 128: 341 return AMDGPU::S_LOAD_DWORDX4_IMM; 342 case 256: 343 return AMDGPU::S_LOAD_DWORDX8_IMM; 344 case 512: 345 return AMDGPU::S_LOAD_DWORDX16_IMM; 346 } 347 break; 348 case AMDGPU::S_LOAD_DWORD_IMM_ci: 349 switch (LoadSize) { 350 case 64: 351 return AMDGPU::S_LOAD_DWORDX2_IMM_ci; 352 case 128: 353 return AMDGPU::S_LOAD_DWORDX4_IMM_ci; 354 case 256: 355 return AMDGPU::S_LOAD_DWORDX8_IMM_ci; 356 case 512: 357 return AMDGPU::S_LOAD_DWORDX16_IMM_ci; 358 } 359 break; 360 case AMDGPU::S_LOAD_DWORD_SGPR: 361 switch (LoadSize) { 362 case 64: 363 return AMDGPU::S_LOAD_DWORDX2_SGPR; 364 case 128: 365 return AMDGPU::S_LOAD_DWORDX4_SGPR; 366 case 256: 367 return AMDGPU::S_LOAD_DWORDX8_SGPR; 368 case 512: 369 return AMDGPU::S_LOAD_DWORDX16_SGPR; 370 } 371 break; 372 } 373 llvm_unreachable("Invalid base smrd opcode or size"); 374 } 375 376 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 377 for (const GEPInfo &GEPInfo : AddrInfo) { 378 if (!GEPInfo.VgprParts.empty()) 379 return true; 380 } 381 return false; 382 } 383 384 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, 385 ArrayRef<GEPInfo> AddrInfo) const { 386 387 if (!I.hasOneMemOperand()) 388 return false; 389 390 if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS && 391 (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT) 392 return false; 393 394 if (!isInstrUniform(I)) 395 return false; 396 397 if (hasVgprParts(AddrInfo)) 398 return false; 399 400 MachineBasicBlock *BB = I.getParent(); 401 MachineFunction *MF = BB->getParent(); 402 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 403 MachineRegisterInfo &MRI = MF->getRegInfo(); 404 unsigned DstReg = I.getOperand(0).getReg(); 405 const DebugLoc &DL = I.getDebugLoc(); 406 unsigned Opcode; 407 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 408 409 if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { 410 411 const GEPInfo &GEPInfo = AddrInfo[0]; 412 413 unsigned PtrReg = GEPInfo.SgprParts[0]; 414 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm); 415 if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { 416 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 417 418 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 419 .addReg(PtrReg) 420 .addImm(EncodedImm) 421 .addImm(0); // glc 422 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 423 } 424 425 if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && 426 isUInt<32>(EncodedImm)) { 427 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); 428 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 429 .addReg(PtrReg) 430 .addImm(EncodedImm) 431 .addImm(0); // glc 432 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 433 } 434 435 if (isUInt<32>(GEPInfo.Imm)) { 436 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); 437 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 438 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) 439 .addImm(GEPInfo.Imm); 440 441 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 442 .addReg(PtrReg) 443 .addReg(OffsetReg) 444 .addImm(0); // glc 445 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 446 } 447 } 448 449 unsigned PtrReg = I.getOperand(1).getReg(); 450 Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); 451 MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) 452 .addReg(PtrReg) 453 .addImm(0) 454 .addImm(0); // glc 455 return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); 456 } 457 458 459 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { 460 MachineBasicBlock *BB = I.getParent(); 461 MachineFunction *MF = BB->getParent(); 462 MachineRegisterInfo &MRI = MF->getRegInfo(); 463 DebugLoc DL = I.getDebugLoc(); 464 unsigned DstReg = I.getOperand(0).getReg(); 465 unsigned PtrReg = I.getOperand(1).getReg(); 466 unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); 467 unsigned Opcode; 468 469 SmallVector<GEPInfo, 4> AddrInfo; 470 471 getAddrModeInfo(I, MRI, AddrInfo); 472 473 if (selectSMRD(I, AddrInfo)) { 474 I.eraseFromParent(); 475 return true; 476 } 477 478 switch (LoadSize) { 479 default: 480 llvm_unreachable("Load size not supported\n"); 481 case 32: 482 Opcode = AMDGPU::FLAT_LOAD_DWORD; 483 break; 484 case 64: 485 Opcode = AMDGPU::FLAT_LOAD_DWORDX2; 486 break; 487 } 488 489 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) 490 .add(I.getOperand(0)) 491 .addReg(PtrReg) 492 .addImm(0) // offset 493 .addImm(0) // glc 494 .addImm(0); // slc 495 496 bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); 497 I.eraseFromParent(); 498 return Ret; 499 } 500 501 bool AMDGPUInstructionSelector::select(MachineInstr &I, 502 CodeGenCoverage &CoverageInfo) const { 503 504 if (!isPreISelGenericOpcode(I.getOpcode())) 505 return true; 506 507 switch (I.getOpcode()) { 508 default: 509 break; 510 case TargetOpcode::G_FPTOUI: 511 case TargetOpcode::G_OR: 512 return selectImpl(I, CoverageInfo); 513 case TargetOpcode::G_ADD: 514 return selectG_ADD(I); 515 case TargetOpcode::G_BITCAST: 516 return selectCOPY(I); 517 case TargetOpcode::G_CONSTANT: 518 case TargetOpcode::G_FCONSTANT: 519 return selectG_CONSTANT(I); 520 case TargetOpcode::G_GEP: 521 return selectG_GEP(I); 522 case TargetOpcode::G_LOAD: 523 return selectG_LOAD(I); 524 case TargetOpcode::G_STORE: 525 return selectG_STORE(I); 526 } 527 return false; 528 } 529 530 /// 531 /// This will select either an SGPR or VGPR operand and will save us from 532 /// having to write an extra tablegen pattern. 533 InstructionSelector::ComplexRendererFns 534 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 535 return {{ 536 [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 537 }}; 538 } 539 540 InstructionSelector::ComplexRendererFns 541 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 542 return {{ 543 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 544 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods 545 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 546 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 547 }}; 548 } 549