1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIInstrInfo.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "SIDefines.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/CodeGen/RegisterScavenging.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/Support/Debug.h" 27 28 using namespace llvm; 29 30 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 31 : AMDGPUInstrInfo(st), RI() {} 32 33 //===----------------------------------------------------------------------===// 34 // TargetInstrInfo callbacks 35 //===----------------------------------------------------------------------===// 36 37 static unsigned getNumOperandsNoGlue(SDNode *Node) { 38 unsigned N = Node->getNumOperands(); 39 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 40 --N; 41 return N; 42 } 43 44 static SDValue findChainOperand(SDNode *Load) { 45 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 46 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 47 return LastOp; 48 } 49 50 /// \brief Returns true if both nodes have the same value for the given 51 /// operand \p Op, or if both nodes do not have this operand. 52 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 53 unsigned Opc0 = N0->getMachineOpcode(); 54 unsigned Opc1 = N1->getMachineOpcode(); 55 56 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 57 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 58 59 if (Op0Idx == -1 && Op1Idx == -1) 60 return true; 61 62 63 if ((Op0Idx == -1 && Op1Idx != -1) || 64 (Op1Idx == -1 && Op0Idx != -1)) 65 return false; 66 67 // getNamedOperandIdx returns the index for the MachineInstr's operands, 68 // which includes the result as the first operand. We are indexing into the 69 // MachineSDNode's operands, so we need to skip the result operand to get 70 // the real index. 71 --Op0Idx; 72 --Op1Idx; 73 74 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 75 } 76 77 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 78 AliasAnalysis *AA) const { 79 // TODO: The generic check fails for VALU instructions that should be 80 // rematerializable due to implicit reads of exec. We really want all of the 81 // generic logic for this except for this. 82 switch (MI->getOpcode()) { 83 case AMDGPU::V_MOV_B32_e32: 84 case AMDGPU::V_MOV_B32_e64: 85 return true; 86 default: 87 return false; 88 } 89 } 90 91 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 92 int64_t &Offset0, 93 int64_t &Offset1) const { 94 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 95 return false; 96 97 unsigned Opc0 = Load0->getMachineOpcode(); 98 unsigned Opc1 = Load1->getMachineOpcode(); 99 100 // Make sure both are actually loads. 101 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 102 return false; 103 104 if (isDS(Opc0) && isDS(Opc1)) { 105 106 // FIXME: Handle this case: 107 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 108 return false; 109 110 // Check base reg. 111 if (Load0->getOperand(1) != Load1->getOperand(1)) 112 return false; 113 114 // Check chain. 115 if (findChainOperand(Load0) != findChainOperand(Load1)) 116 return false; 117 118 // Skip read2 / write2 variants for simplicity. 119 // TODO: We should report true if the used offsets are adjacent (excluded 120 // st64 versions). 121 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 122 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 123 return false; 124 125 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 126 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 127 return true; 128 } 129 130 if (isSMRD(Opc0) && isSMRD(Opc1)) { 131 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 132 133 // Check base reg. 134 if (Load0->getOperand(0) != Load1->getOperand(0)) 135 return false; 136 137 const ConstantSDNode *Load0Offset = 138 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 139 const ConstantSDNode *Load1Offset = 140 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 141 142 if (!Load0Offset || !Load1Offset) 143 return false; 144 145 // Check chain. 146 if (findChainOperand(Load0) != findChainOperand(Load1)) 147 return false; 148 149 Offset0 = Load0Offset->getZExtValue(); 150 Offset1 = Load1Offset->getZExtValue(); 151 return true; 152 } 153 154 // MUBUF and MTBUF can access the same addresses. 155 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 156 157 // MUBUF and MTBUF have vaddr at different indices. 158 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 159 findChainOperand(Load0) != findChainOperand(Load1) || 160 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 161 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 162 return false; 163 164 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 165 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 166 167 if (OffIdx0 == -1 || OffIdx1 == -1) 168 return false; 169 170 // getNamedOperandIdx returns the index for MachineInstrs. Since they 171 // inlcude the output in the operand list, but SDNodes don't, we need to 172 // subtract the index by one. 173 --OffIdx0; 174 --OffIdx1; 175 176 SDValue Off0 = Load0->getOperand(OffIdx0); 177 SDValue Off1 = Load1->getOperand(OffIdx1); 178 179 // The offset might be a FrameIndexSDNode. 180 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 181 return false; 182 183 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 184 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 185 return true; 186 } 187 188 return false; 189 } 190 191 static bool isStride64(unsigned Opc) { 192 switch (Opc) { 193 case AMDGPU::DS_READ2ST64_B32: 194 case AMDGPU::DS_READ2ST64_B64: 195 case AMDGPU::DS_WRITE2ST64_B32: 196 case AMDGPU::DS_WRITE2ST64_B64: 197 return true; 198 default: 199 return false; 200 } 201 } 202 203 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 204 unsigned &Offset, 205 const TargetRegisterInfo *TRI) const { 206 unsigned Opc = LdSt->getOpcode(); 207 if (isDS(Opc)) { 208 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 209 AMDGPU::OpName::offset); 210 if (OffsetImm) { 211 // Normal, single offset LDS instruction. 212 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 213 AMDGPU::OpName::addr); 214 215 BaseReg = AddrReg->getReg(); 216 Offset = OffsetImm->getImm(); 217 return true; 218 } 219 220 // The 2 offset instructions use offset0 and offset1 instead. We can treat 221 // these as a load with a single offset if the 2 offsets are consecutive. We 222 // will use this for some partially aligned loads. 223 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, 224 AMDGPU::OpName::offset0); 225 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, 226 AMDGPU::OpName::offset1); 227 228 uint8_t Offset0 = Offset0Imm->getImm(); 229 uint8_t Offset1 = Offset1Imm->getImm(); 230 231 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 232 // Each of these offsets is in element sized units, so we need to convert 233 // to bytes of the individual reads. 234 235 unsigned EltSize; 236 if (LdSt->mayLoad()) 237 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; 238 else { 239 assert(LdSt->mayStore()); 240 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 241 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 242 } 243 244 if (isStride64(Opc)) 245 EltSize *= 64; 246 247 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 248 AMDGPU::OpName::addr); 249 BaseReg = AddrReg->getReg(); 250 Offset = EltSize * Offset0; 251 return true; 252 } 253 254 return false; 255 } 256 257 if (isMUBUF(Opc) || isMTBUF(Opc)) { 258 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 259 return false; 260 261 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 262 AMDGPU::OpName::vaddr); 263 if (!AddrReg) 264 return false; 265 266 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 267 AMDGPU::OpName::offset); 268 BaseReg = AddrReg->getReg(); 269 Offset = OffsetImm->getImm(); 270 return true; 271 } 272 273 if (isSMRD(Opc)) { 274 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 275 AMDGPU::OpName::offset); 276 if (!OffsetImm) 277 return false; 278 279 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, 280 AMDGPU::OpName::sbase); 281 BaseReg = SBaseReg->getReg(); 282 Offset = OffsetImm->getImm(); 283 return true; 284 } 285 286 return false; 287 } 288 289 bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 290 MachineInstr *SecondLdSt, 291 unsigned NumLoads) const { 292 unsigned Opc0 = FirstLdSt->getOpcode(); 293 unsigned Opc1 = SecondLdSt->getOpcode(); 294 295 // TODO: This needs finer tuning 296 if (NumLoads > 4) 297 return false; 298 299 if (isDS(Opc0) && isDS(Opc1)) 300 return true; 301 302 if (isSMRD(Opc0) && isSMRD(Opc1)) 303 return true; 304 305 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) 306 return true; 307 308 return false; 309 } 310 311 void 312 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 313 MachineBasicBlock::iterator MI, DebugLoc DL, 314 unsigned DestReg, unsigned SrcReg, 315 bool KillSrc) const { 316 317 // If we are trying to copy to or from SCC, there is a bug somewhere else in 318 // the backend. While it may be theoretically possible to do this, it should 319 // never be necessary. 320 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 321 322 static const int16_t Sub0_15[] = { 323 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 324 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 325 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 326 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 327 }; 328 329 static const int16_t Sub0_7[] = { 330 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 331 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 332 }; 333 334 static const int16_t Sub0_3[] = { 335 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 336 }; 337 338 static const int16_t Sub0_2[] = { 339 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 340 }; 341 342 static const int16_t Sub0_1[] = { 343 AMDGPU::sub0, AMDGPU::sub1, 0 344 }; 345 346 unsigned Opcode; 347 const int16_t *SubIndices; 348 349 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 350 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 351 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 352 .addReg(SrcReg, getKillRegState(KillSrc)); 353 return; 354 355 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 356 if (DestReg == AMDGPU::VCC) { 357 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 358 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 359 .addReg(SrcReg, getKillRegState(KillSrc)); 360 } else { 361 // FIXME: Hack until VReg_1 removed. 362 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 363 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC) 364 .addImm(0) 365 .addReg(SrcReg, getKillRegState(KillSrc)); 366 } 367 368 return; 369 } 370 371 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 372 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 373 .addReg(SrcReg, getKillRegState(KillSrc)); 374 return; 375 376 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 377 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 378 Opcode = AMDGPU::S_MOV_B32; 379 SubIndices = Sub0_3; 380 381 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 382 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 383 Opcode = AMDGPU::S_MOV_B32; 384 SubIndices = Sub0_7; 385 386 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 387 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 388 Opcode = AMDGPU::S_MOV_B32; 389 SubIndices = Sub0_15; 390 391 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 392 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 393 AMDGPU::SReg_32RegClass.contains(SrcReg)); 394 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 395 .addReg(SrcReg, getKillRegState(KillSrc)); 396 return; 397 398 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 399 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 400 AMDGPU::SReg_64RegClass.contains(SrcReg)); 401 Opcode = AMDGPU::V_MOV_B32_e32; 402 SubIndices = Sub0_1; 403 404 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 405 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 406 Opcode = AMDGPU::V_MOV_B32_e32; 407 SubIndices = Sub0_2; 408 409 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 410 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 411 AMDGPU::SReg_128RegClass.contains(SrcReg)); 412 Opcode = AMDGPU::V_MOV_B32_e32; 413 SubIndices = Sub0_3; 414 415 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 416 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 417 AMDGPU::SReg_256RegClass.contains(SrcReg)); 418 Opcode = AMDGPU::V_MOV_B32_e32; 419 SubIndices = Sub0_7; 420 421 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 422 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 423 AMDGPU::SReg_512RegClass.contains(SrcReg)); 424 Opcode = AMDGPU::V_MOV_B32_e32; 425 SubIndices = Sub0_15; 426 427 } else { 428 llvm_unreachable("Can't copy register!"); 429 } 430 431 while (unsigned SubIdx = *SubIndices++) { 432 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 433 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 434 435 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 436 437 if (*SubIndices) 438 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 439 } 440 } 441 442 int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 443 const unsigned Opcode = MI.getOpcode(); 444 445 int NewOpc; 446 447 // Try to map original to commuted opcode 448 NewOpc = AMDGPU::getCommuteRev(Opcode); 449 if (NewOpc != -1) 450 // Check if the commuted (REV) opcode exists on the target. 451 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 452 453 // Try to map commuted to original opcode 454 NewOpc = AMDGPU::getCommuteOrig(Opcode); 455 if (NewOpc != -1) 456 // Check if the original (non-REV) opcode exists on the target. 457 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 458 459 return Opcode; 460 } 461 462 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 463 464 if (DstRC->getSize() == 4) { 465 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 466 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 467 return AMDGPU::S_MOV_B64; 468 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 469 return AMDGPU::V_MOV_B64_PSEUDO; 470 } 471 return AMDGPU::COPY; 472 } 473 474 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 475 MachineBasicBlock::iterator MI, 476 unsigned SrcReg, bool isKill, 477 int FrameIndex, 478 const TargetRegisterClass *RC, 479 const TargetRegisterInfo *TRI) const { 480 MachineFunction *MF = MBB.getParent(); 481 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 482 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 483 DebugLoc DL = MBB.findDebugLoc(MI); 484 int Opcode = -1; 485 486 if (RI.isSGPRClass(RC)) { 487 // We are only allowed to create one new instruction when spilling 488 // registers, so we need to use pseudo instruction for spilling 489 // SGPRs. 490 switch (RC->getSize() * 8) { 491 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; 492 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 493 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 494 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 495 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 496 } 497 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 498 MFI->setHasSpilledVGPRs(); 499 500 switch(RC->getSize() * 8) { 501 case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; 502 case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; 503 case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break; 504 case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break; 505 case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break; 506 case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break; 507 } 508 } 509 510 if (Opcode != -1) { 511 FrameInfo->setObjectAlignment(FrameIndex, 4); 512 BuildMI(MBB, MI, DL, get(Opcode)) 513 .addReg(SrcReg) 514 .addFrameIndex(FrameIndex) 515 // Place-holder registers, these will be filled in by 516 // SIPrepareScratchRegs. 517 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 518 .addReg(AMDGPU::SGPR0, RegState::Undef); 519 } else { 520 LLVMContext &Ctx = MF->getFunction()->getContext(); 521 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 522 " spill register"); 523 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 524 .addReg(SrcReg); 525 } 526 } 527 528 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 529 MachineBasicBlock::iterator MI, 530 unsigned DestReg, int FrameIndex, 531 const TargetRegisterClass *RC, 532 const TargetRegisterInfo *TRI) const { 533 MachineFunction *MF = MBB.getParent(); 534 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 535 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 536 DebugLoc DL = MBB.findDebugLoc(MI); 537 int Opcode = -1; 538 539 if (RI.isSGPRClass(RC)){ 540 switch(RC->getSize() * 8) { 541 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; 542 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 543 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 544 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 545 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 546 } 547 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 548 switch(RC->getSize() * 8) { 549 case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; 550 case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; 551 case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break; 552 case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break; 553 case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break; 554 case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break; 555 } 556 } 557 558 if (Opcode != -1) { 559 FrameInfo->setObjectAlignment(FrameIndex, 4); 560 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 561 .addFrameIndex(FrameIndex) 562 // Place-holder registers, these will be filled in by 563 // SIPrepareScratchRegs. 564 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 565 .addReg(AMDGPU::SGPR0, RegState::Undef); 566 567 } else { 568 LLVMContext &Ctx = MF->getFunction()->getContext(); 569 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 570 " restore register"); 571 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 572 } 573 } 574 575 /// \param @Offset Offset in bytes of the FrameIndex being spilled 576 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator MI, 578 RegScavenger *RS, unsigned TmpReg, 579 unsigned FrameOffset, 580 unsigned Size) const { 581 MachineFunction *MF = MBB.getParent(); 582 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 583 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); 584 const SIRegisterInfo *TRI = 585 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); 586 DebugLoc DL = MBB.findDebugLoc(MI); 587 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 588 unsigned WavefrontSize = ST.getWavefrontSize(); 589 590 unsigned TIDReg = MFI->getTIDReg(); 591 if (!MFI->hasCalculatedTID()) { 592 MachineBasicBlock &Entry = MBB.getParent()->front(); 593 MachineBasicBlock::iterator Insert = Entry.front(); 594 DebugLoc DL = Insert->getDebugLoc(); 595 596 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 597 if (TIDReg == AMDGPU::NoRegister) 598 return TIDReg; 599 600 601 if (MFI->getShaderType() == ShaderType::COMPUTE && 602 WorkGroupSize > WavefrontSize) { 603 604 unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X); 605 unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y); 606 unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); 607 unsigned InputPtrReg = 608 TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); 609 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 610 if (!Entry.isLiveIn(Reg)) 611 Entry.addLiveIn(Reg); 612 } 613 614 RS->enterBasicBlock(&Entry); 615 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 616 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 617 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 618 .addReg(InputPtrReg) 619 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 620 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 621 .addReg(InputPtrReg) 622 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 623 624 // NGROUPS.X * NGROUPS.Y 625 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 626 .addReg(STmp1) 627 .addReg(STmp0); 628 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 629 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 630 .addReg(STmp1) 631 .addReg(TIDIGXReg); 632 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 633 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 634 .addReg(STmp0) 635 .addReg(TIDIGYReg) 636 .addReg(TIDReg); 637 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 638 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 639 .addReg(TIDReg) 640 .addReg(TIDIGZReg); 641 } else { 642 // Get the wave id 643 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 644 TIDReg) 645 .addImm(-1) 646 .addImm(0); 647 648 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 649 TIDReg) 650 .addImm(-1) 651 .addReg(TIDReg); 652 } 653 654 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 655 TIDReg) 656 .addImm(2) 657 .addReg(TIDReg); 658 MFI->setTIDReg(TIDReg); 659 } 660 661 // Add FrameIndex to LDS offset 662 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 663 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 664 .addImm(LDSOffset) 665 .addReg(TIDReg); 666 667 return TmpReg; 668 } 669 670 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 671 int Count) const { 672 while (Count > 0) { 673 int Arg; 674 if (Count >= 8) 675 Arg = 7; 676 else 677 Arg = Count - 1; 678 Count -= 8; 679 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) 680 .addImm(Arg); 681 } 682 } 683 684 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 685 MachineBasicBlock &MBB = *MI->getParent(); 686 DebugLoc DL = MBB.findDebugLoc(MI); 687 switch (MI->getOpcode()) { 688 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 689 690 case AMDGPU::SI_CONSTDATA_PTR: { 691 unsigned Reg = MI->getOperand(0).getReg(); 692 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); 693 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); 694 695 BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); 696 697 // Add 32-bit offset from this instruction to the start of the constant data. 698 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) 699 .addReg(RegLo) 700 .addTargetIndex(AMDGPU::TI_CONSTDATA_START) 701 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); 702 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) 703 .addReg(RegHi) 704 .addImm(0) 705 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) 706 .addReg(AMDGPU::SCC, RegState::Implicit); 707 MI->eraseFromParent(); 708 break; 709 } 710 case AMDGPU::SGPR_USE: 711 // This is just a placeholder for register allocation. 712 MI->eraseFromParent(); 713 break; 714 715 case AMDGPU::V_MOV_B64_PSEUDO: { 716 unsigned Dst = MI->getOperand(0).getReg(); 717 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 718 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 719 720 const MachineOperand &SrcOp = MI->getOperand(1); 721 // FIXME: Will this work for 64-bit floating point immediates? 722 assert(!SrcOp.isFPImm()); 723 if (SrcOp.isImm()) { 724 APInt Imm(64, SrcOp.getImm()); 725 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 726 .addImm(Imm.getLoBits(32).getZExtValue()) 727 .addReg(Dst, RegState::Implicit); 728 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 729 .addImm(Imm.getHiBits(32).getZExtValue()) 730 .addReg(Dst, RegState::Implicit); 731 } else { 732 assert(SrcOp.isReg()); 733 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 734 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 735 .addReg(Dst, RegState::Implicit); 736 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 737 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 738 .addReg(Dst, RegState::Implicit); 739 } 740 MI->eraseFromParent(); 741 break; 742 } 743 744 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 745 unsigned Dst = MI->getOperand(0).getReg(); 746 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 747 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 748 unsigned Src0 = MI->getOperand(1).getReg(); 749 unsigned Src1 = MI->getOperand(2).getReg(); 750 const MachineOperand &SrcCond = MI->getOperand(3); 751 752 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 753 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 754 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 755 .addOperand(SrcCond); 756 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 757 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 758 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 759 .addOperand(SrcCond); 760 MI->eraseFromParent(); 761 break; 762 } 763 } 764 return true; 765 } 766 767 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 768 bool NewMI) const { 769 770 if (MI->getNumOperands() < 3) 771 return nullptr; 772 773 int CommutedOpcode = commuteOpcode(*MI); 774 if (CommutedOpcode == -1) 775 return nullptr; 776 777 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 778 AMDGPU::OpName::src0); 779 assert(Src0Idx != -1 && "Should always have src0 operand"); 780 781 MachineOperand &Src0 = MI->getOperand(Src0Idx); 782 if (!Src0.isReg()) 783 return nullptr; 784 785 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 786 AMDGPU::OpName::src1); 787 if (Src1Idx == -1) 788 return nullptr; 789 790 MachineOperand &Src1 = MI->getOperand(Src1Idx); 791 792 // Make sure it's legal to commute operands for VOP2. 793 if (isVOP2(MI->getOpcode()) && 794 (!isOperandLegal(MI, Src0Idx, &Src1) || 795 !isOperandLegal(MI, Src1Idx, &Src0))) { 796 return nullptr; 797 } 798 799 if (!Src1.isReg()) { 800 // Allow commuting instructions with Imm operands. 801 if (NewMI || !Src1.isImm() || 802 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 803 return nullptr; 804 } 805 806 // Be sure to copy the source modifiers to the right place. 807 if (MachineOperand *Src0Mods 808 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { 809 MachineOperand *Src1Mods 810 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); 811 812 int Src0ModsVal = Src0Mods->getImm(); 813 if (!Src1Mods && Src0ModsVal != 0) 814 return nullptr; 815 816 // XXX - This assert might be a lie. It might be useful to have a neg 817 // modifier with 0.0. 818 int Src1ModsVal = Src1Mods->getImm(); 819 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 820 821 Src1Mods->setImm(Src0ModsVal); 822 Src0Mods->setImm(Src1ModsVal); 823 } 824 825 unsigned Reg = Src0.getReg(); 826 unsigned SubReg = Src0.getSubReg(); 827 if (Src1.isImm()) 828 Src0.ChangeToImmediate(Src1.getImm()); 829 else 830 llvm_unreachable("Should only have immediates"); 831 832 Src1.ChangeToRegister(Reg, false); 833 Src1.setSubReg(SubReg); 834 } else { 835 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 836 } 837 838 if (MI) 839 MI->setDesc(get(CommutedOpcode)); 840 841 return MI; 842 } 843 844 // This needs to be implemented because the source modifiers may be inserted 845 // between the true commutable operands, and the base 846 // TargetInstrInfo::commuteInstruction uses it. 847 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, 848 unsigned &SrcOpIdx1, 849 unsigned &SrcOpIdx2) const { 850 const MCInstrDesc &MCID = MI->getDesc(); 851 if (!MCID.isCommutable()) 852 return false; 853 854 unsigned Opc = MI->getOpcode(); 855 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 856 if (Src0Idx == -1) 857 return false; 858 859 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 860 // immediate. 861 if (!MI->getOperand(Src0Idx).isReg()) 862 return false; 863 864 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 865 if (Src1Idx == -1) 866 return false; 867 868 if (!MI->getOperand(Src1Idx).isReg()) 869 return false; 870 871 // If any source modifiers are set, the generic instruction commuting won't 872 // understand how to copy the source modifiers. 873 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || 874 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) 875 return false; 876 877 SrcOpIdx1 = Src0Idx; 878 SrcOpIdx2 = Src1Idx; 879 return true; 880 } 881 882 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 883 MachineBasicBlock::iterator I, 884 unsigned DstReg, 885 unsigned SrcReg) const { 886 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 887 DstReg) .addReg(SrcReg); 888 } 889 890 bool SIInstrInfo::isMov(unsigned Opcode) const { 891 switch(Opcode) { 892 default: return false; 893 case AMDGPU::S_MOV_B32: 894 case AMDGPU::S_MOV_B64: 895 case AMDGPU::V_MOV_B32_e32: 896 case AMDGPU::V_MOV_B32_e64: 897 return true; 898 } 899 } 900 901 bool 902 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 903 return RC != &AMDGPU::EXECRegRegClass; 904 } 905 906 static void removeModOperands(MachineInstr &MI) { 907 unsigned Opc = MI.getOpcode(); 908 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 909 AMDGPU::OpName::src0_modifiers); 910 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 911 AMDGPU::OpName::src1_modifiers); 912 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 913 AMDGPU::OpName::src2_modifiers); 914 915 MI.RemoveOperand(Src2ModIdx); 916 MI.RemoveOperand(Src1ModIdx); 917 MI.RemoveOperand(Src0ModIdx); 918 } 919 920 bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, 921 unsigned Reg, MachineRegisterInfo *MRI) const { 922 if (!MRI->hasOneNonDBGUse(Reg)) 923 return false; 924 925 unsigned Opc = UseMI->getOpcode(); 926 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 927 // Don't fold if we are using source modifiers. The new VOP2 instructions 928 // don't have them. 929 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || 930 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || 931 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { 932 return false; 933 } 934 935 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); 936 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); 937 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); 938 939 // Multiplied part is the constant: Use v_madmk_f32 940 // We should only expect these to be on src0 due to canonicalizations. 941 if (Src0->isReg() && Src0->getReg() == Reg) { 942 if (!Src1->isReg() || 943 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 944 return false; 945 946 if (!Src2->isReg() || 947 (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))) 948 return false; 949 950 // We need to do some weird looking operand shuffling since the madmk 951 // operands are out of the normal expected order with the multiplied 952 // constant as the last operand. 953 // 954 // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1 955 // src0 -> src2 K 956 // src1 -> src0 957 // src2 -> src1 958 959 const int64_t Imm = DefMI->getOperand(1).getImm(); 960 961 // FIXME: This would be a lot easier if we could return a new instruction 962 // instead of having to modify in place. 963 964 // Remove these first since they are at the end. 965 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 966 AMDGPU::OpName::omod)); 967 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 968 AMDGPU::OpName::clamp)); 969 970 unsigned Src1Reg = Src1->getReg(); 971 unsigned Src1SubReg = Src1->getSubReg(); 972 unsigned Src2Reg = Src2->getReg(); 973 unsigned Src2SubReg = Src2->getSubReg(); 974 Src0->setReg(Src1Reg); 975 Src0->setSubReg(Src1SubReg); 976 Src0->setIsKill(Src1->isKill()); 977 978 Src1->setReg(Src2Reg); 979 Src1->setSubReg(Src2SubReg); 980 Src1->setIsKill(Src2->isKill()); 981 982 if (Opc == AMDGPU::V_MAC_F32_e64) { 983 UseMI->untieRegOperand( 984 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 985 } 986 987 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 988 AMDGPU::OpName::src2)); 989 // ChangingToImmediate adds Src2 back to the instruction. 990 Src2->ChangeToImmediate(Imm); 991 992 removeModOperands(*UseMI); 993 UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); 994 995 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 996 if (DeleteDef) 997 DefMI->eraseFromParent(); 998 999 return true; 1000 } 1001 1002 // Added part is the constant: Use v_madak_f32 1003 if (Src2->isReg() && Src2->getReg() == Reg) { 1004 // Not allowed to use constant bus for another operand. 1005 // We can however allow an inline immediate as src0. 1006 if (!Src0->isImm() && 1007 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1008 return false; 1009 1010 if (!Src1->isReg() || 1011 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 1012 return false; 1013 1014 const int64_t Imm = DefMI->getOperand(1).getImm(); 1015 1016 // FIXME: This would be a lot easier if we could return a new instruction 1017 // instead of having to modify in place. 1018 1019 // Remove these first since they are at the end. 1020 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1021 AMDGPU::OpName::omod)); 1022 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1023 AMDGPU::OpName::clamp)); 1024 1025 if (Opc == AMDGPU::V_MAC_F32_e64) { 1026 UseMI->untieRegOperand( 1027 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1028 } 1029 1030 // ChangingToImmediate adds Src2 back to the instruction. 1031 Src2->ChangeToImmediate(Imm); 1032 1033 // These come before src2. 1034 removeModOperands(*UseMI); 1035 UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); 1036 1037 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1038 if (DeleteDef) 1039 DefMI->eraseFromParent(); 1040 1041 return true; 1042 } 1043 } 1044 1045 return false; 1046 } 1047 1048 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1049 int WidthB, int OffsetB) { 1050 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1051 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1052 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1053 return LowOffset + LowWidth <= HighOffset; 1054 } 1055 1056 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, 1057 MachineInstr *MIb) const { 1058 unsigned BaseReg0, Offset0; 1059 unsigned BaseReg1, Offset1; 1060 1061 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1062 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1063 assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() && 1064 "read2 / write2 not expected here yet"); 1065 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); 1066 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); 1067 if (BaseReg0 == BaseReg1 && 1068 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1069 return true; 1070 } 1071 } 1072 1073 return false; 1074 } 1075 1076 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 1077 MachineInstr *MIb, 1078 AliasAnalysis *AA) const { 1079 unsigned Opc0 = MIa->getOpcode(); 1080 unsigned Opc1 = MIb->getOpcode(); 1081 1082 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 1083 "MIa must load from or modify a memory location"); 1084 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 1085 "MIb must load from or modify a memory location"); 1086 1087 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) 1088 return false; 1089 1090 // XXX - Can we relax this between address spaces? 1091 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 1092 return false; 1093 1094 // TODO: Should we check the address space from the MachineMemOperand? That 1095 // would allow us to distinguish objects we know don't alias based on the 1096 // underlying addres space, even if it was lowered to a different one, 1097 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1098 // buffer. 1099 if (isDS(Opc0)) { 1100 if (isDS(Opc1)) 1101 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1102 1103 return !isFLAT(Opc1); 1104 } 1105 1106 if (isMUBUF(Opc0) || isMTBUF(Opc0)) { 1107 if (isMUBUF(Opc1) || isMTBUF(Opc1)) 1108 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1109 1110 return !isFLAT(Opc1) && !isSMRD(Opc1); 1111 } 1112 1113 if (isSMRD(Opc0)) { 1114 if (isSMRD(Opc1)) 1115 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1116 1117 return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0); 1118 } 1119 1120 if (isFLAT(Opc0)) { 1121 if (isFLAT(Opc1)) 1122 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1123 1124 return false; 1125 } 1126 1127 return false; 1128 } 1129 1130 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1131 MachineBasicBlock::iterator &MI, 1132 LiveVariables *LV) const { 1133 1134 switch (MI->getOpcode()) { 1135 default: return nullptr; 1136 case AMDGPU::V_MAC_F32_e64: break; 1137 case AMDGPU::V_MAC_F32_e32: { 1138 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1139 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1140 return nullptr; 1141 break; 1142 } 1143 } 1144 1145 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst); 1146 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1147 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); 1148 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); 1149 1150 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1151 .addOperand(*Dst) 1152 .addImm(0) // Src0 mods 1153 .addOperand(*Src0) 1154 .addImm(0) // Src1 mods 1155 .addOperand(*Src1) 1156 .addImm(0) // Src mods 1157 .addOperand(*Src2) 1158 .addImm(0) // clamp 1159 .addImm(0); // omod 1160 } 1161 1162 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1163 int64_t SVal = Imm.getSExtValue(); 1164 if (SVal >= -16 && SVal <= 64) 1165 return true; 1166 1167 if (Imm.getBitWidth() == 64) { 1168 uint64_t Val = Imm.getZExtValue(); 1169 return (DoubleToBits(0.0) == Val) || 1170 (DoubleToBits(1.0) == Val) || 1171 (DoubleToBits(-1.0) == Val) || 1172 (DoubleToBits(0.5) == Val) || 1173 (DoubleToBits(-0.5) == Val) || 1174 (DoubleToBits(2.0) == Val) || 1175 (DoubleToBits(-2.0) == Val) || 1176 (DoubleToBits(4.0) == Val) || 1177 (DoubleToBits(-4.0) == Val); 1178 } 1179 1180 // The actual type of the operand does not seem to matter as long 1181 // as the bits match one of the inline immediate values. For example: 1182 // 1183 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1184 // so it is a legal inline immediate. 1185 // 1186 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1187 // floating-point, so it is a legal inline immediate. 1188 uint32_t Val = Imm.getZExtValue(); 1189 1190 return (FloatToBits(0.0f) == Val) || 1191 (FloatToBits(1.0f) == Val) || 1192 (FloatToBits(-1.0f) == Val) || 1193 (FloatToBits(0.5f) == Val) || 1194 (FloatToBits(-0.5f) == Val) || 1195 (FloatToBits(2.0f) == Val) || 1196 (FloatToBits(-2.0f) == Val) || 1197 (FloatToBits(4.0f) == Val) || 1198 (FloatToBits(-4.0f) == Val); 1199 } 1200 1201 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1202 unsigned OpSize) const { 1203 if (MO.isImm()) { 1204 // MachineOperand provides no way to tell the true operand size, since it 1205 // only records a 64-bit value. We need to know the size to determine if a 1206 // 32-bit floating point immediate bit pattern is legal for an integer 1207 // immediate. It would be for any 32-bit integer operand, but would not be 1208 // for a 64-bit one. 1209 1210 unsigned BitSize = 8 * OpSize; 1211 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1212 } 1213 1214 return false; 1215 } 1216 1217 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1218 unsigned OpSize) const { 1219 return MO.isImm() && !isInlineConstant(MO, OpSize); 1220 } 1221 1222 static bool compareMachineOp(const MachineOperand &Op0, 1223 const MachineOperand &Op1) { 1224 if (Op0.getType() != Op1.getType()) 1225 return false; 1226 1227 switch (Op0.getType()) { 1228 case MachineOperand::MO_Register: 1229 return Op0.getReg() == Op1.getReg(); 1230 case MachineOperand::MO_Immediate: 1231 return Op0.getImm() == Op1.getImm(); 1232 default: 1233 llvm_unreachable("Didn't expect to be comparing these operand types"); 1234 } 1235 } 1236 1237 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1238 const MachineOperand &MO) const { 1239 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1240 1241 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1242 1243 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1244 return true; 1245 1246 if (OpInfo.RegClass < 0) 1247 return false; 1248 1249 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1250 if (isLiteralConstant(MO, OpSize)) 1251 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1252 1253 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1254 } 1255 1256 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1257 int Op32 = AMDGPU::getVOPe32(Opcode); 1258 if (Op32 == -1) 1259 return false; 1260 1261 return pseudoToMCOpcode(Op32) != -1; 1262 } 1263 1264 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1265 // The src0_modifier operand is present on all instructions 1266 // that have modifiers. 1267 1268 return AMDGPU::getNamedOperandIdx(Opcode, 1269 AMDGPU::OpName::src0_modifiers) != -1; 1270 } 1271 1272 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1273 unsigned OpName) const { 1274 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1275 return Mods && Mods->getImm(); 1276 } 1277 1278 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1279 const MachineOperand &MO, 1280 unsigned OpSize) const { 1281 // Literal constants use the constant bus. 1282 if (isLiteralConstant(MO, OpSize)) 1283 return true; 1284 1285 if (!MO.isReg() || !MO.isUse()) 1286 return false; 1287 1288 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1289 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1290 1291 // FLAT_SCR is just an SGPR pair. 1292 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1293 return true; 1294 1295 // EXEC register uses the constant bus. 1296 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1297 return true; 1298 1299 // SGPRs use the constant bus 1300 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 1301 (!MO.isImplicit() && 1302 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1303 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 1304 return true; 1305 } 1306 1307 return false; 1308 } 1309 1310 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1311 StringRef &ErrInfo) const { 1312 uint16_t Opcode = MI->getOpcode(); 1313 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1314 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1315 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1316 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1317 1318 // Make sure the number of operands is correct. 1319 const MCInstrDesc &Desc = get(Opcode); 1320 if (!Desc.isVariadic() && 1321 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 1322 ErrInfo = "Instruction has wrong number of operands."; 1323 return false; 1324 } 1325 1326 // Make sure the register classes are correct 1327 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1328 if (MI->getOperand(i).isFPImm()) { 1329 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1330 "all fp values to integers."; 1331 return false; 1332 } 1333 1334 int RegClass = Desc.OpInfo[i].RegClass; 1335 1336 switch (Desc.OpInfo[i].OperandType) { 1337 case MCOI::OPERAND_REGISTER: 1338 if (MI->getOperand(i).isImm()) { 1339 ErrInfo = "Illegal immediate value for operand."; 1340 return false; 1341 } 1342 break; 1343 case AMDGPU::OPERAND_REG_IMM32: 1344 break; 1345 case AMDGPU::OPERAND_REG_INLINE_C: 1346 if (isLiteralConstant(MI->getOperand(i), 1347 RI.getRegClass(RegClass)->getSize())) { 1348 ErrInfo = "Illegal immediate value for operand."; 1349 return false; 1350 } 1351 break; 1352 case MCOI::OPERAND_IMMEDIATE: 1353 // Check if this operand is an immediate. 1354 // FrameIndex operands will be replaced by immediates, so they are 1355 // allowed. 1356 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { 1357 ErrInfo = "Expected immediate, but got non-immediate"; 1358 return false; 1359 } 1360 // Fall-through 1361 default: 1362 continue; 1363 } 1364 1365 if (!MI->getOperand(i).isReg()) 1366 continue; 1367 1368 if (RegClass != -1) { 1369 unsigned Reg = MI->getOperand(i).getReg(); 1370 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1371 continue; 1372 1373 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1374 if (!RC->contains(Reg)) { 1375 ErrInfo = "Operand has incorrect register class."; 1376 return false; 1377 } 1378 } 1379 } 1380 1381 1382 // Verify VOP* 1383 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 1384 // Only look at the true operands. Only a real operand can use the constant 1385 // bus, and we don't want to check pseudo-operands like the source modifier 1386 // flags. 1387 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1388 1389 unsigned ConstantBusCount = 0; 1390 unsigned SGPRUsed = AMDGPU::NoRegister; 1391 for (int OpIdx : OpIndices) { 1392 if (OpIdx == -1) 1393 break; 1394 const MachineOperand &MO = MI->getOperand(OpIdx); 1395 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1396 if (MO.isReg()) { 1397 if (MO.getReg() != SGPRUsed) 1398 ++ConstantBusCount; 1399 SGPRUsed = MO.getReg(); 1400 } else { 1401 ++ConstantBusCount; 1402 } 1403 } 1404 } 1405 if (ConstantBusCount > 1) { 1406 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1407 return false; 1408 } 1409 } 1410 1411 // Verify misc. restrictions on specific instructions. 1412 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1413 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1414 const MachineOperand &Src0 = MI->getOperand(Src0Idx); 1415 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1416 const MachineOperand &Src2 = MI->getOperand(Src2Idx); 1417 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1418 if (!compareMachineOp(Src0, Src1) && 1419 !compareMachineOp(Src0, Src2)) { 1420 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1421 return false; 1422 } 1423 } 1424 } 1425 1426 return true; 1427 } 1428 1429 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1430 switch (MI.getOpcode()) { 1431 default: return AMDGPU::INSTRUCTION_LIST_END; 1432 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1433 case AMDGPU::COPY: return AMDGPU::COPY; 1434 case AMDGPU::PHI: return AMDGPU::PHI; 1435 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1436 case AMDGPU::S_MOV_B32: 1437 return MI.getOperand(1).isReg() ? 1438 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1439 case AMDGPU::S_ADD_I32: 1440 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1441 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1442 case AMDGPU::S_SUB_I32: 1443 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1444 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1445 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1446 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1447 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1448 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1449 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1450 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1451 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1452 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1453 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1454 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1455 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1456 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1457 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1458 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1459 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1460 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1461 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1462 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1463 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1464 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1465 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1466 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1467 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1468 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1469 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1470 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1471 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1472 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1473 case AMDGPU::S_LOAD_DWORD_IMM: 1474 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 1475 case AMDGPU::S_LOAD_DWORDX2_IMM: 1476 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 1477 case AMDGPU::S_LOAD_DWORDX4_IMM: 1478 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 1479 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1480 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1481 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1482 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1483 } 1484 } 1485 1486 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1487 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1488 } 1489 1490 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1491 unsigned OpNo) const { 1492 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1493 const MCInstrDesc &Desc = get(MI.getOpcode()); 1494 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1495 Desc.OpInfo[OpNo].RegClass == -1) { 1496 unsigned Reg = MI.getOperand(OpNo).getReg(); 1497 1498 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1499 return MRI.getRegClass(Reg); 1500 return RI.getPhysRegClass(Reg); 1501 } 1502 1503 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1504 return RI.getRegClass(RCID); 1505 } 1506 1507 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1508 switch (MI.getOpcode()) { 1509 case AMDGPU::COPY: 1510 case AMDGPU::REG_SEQUENCE: 1511 case AMDGPU::PHI: 1512 case AMDGPU::INSERT_SUBREG: 1513 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1514 default: 1515 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1516 } 1517 } 1518 1519 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1520 MachineBasicBlock::iterator I = MI; 1521 MachineBasicBlock *MBB = MI->getParent(); 1522 MachineOperand &MO = MI->getOperand(OpIdx); 1523 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1524 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1525 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1526 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1527 if (MO.isReg()) 1528 Opcode = AMDGPU::COPY; 1529 else if (RI.isSGPRClass(RC)) 1530 Opcode = AMDGPU::S_MOV_B32; 1531 1532 1533 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1534 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1535 VRC = &AMDGPU::VReg_64RegClass; 1536 else 1537 VRC = &AMDGPU::VGPR_32RegClass; 1538 1539 unsigned Reg = MRI.createVirtualRegister(VRC); 1540 DebugLoc DL = MBB->findDebugLoc(I); 1541 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) 1542 .addOperand(MO); 1543 MO.ChangeToRegister(Reg, false); 1544 } 1545 1546 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1547 MachineRegisterInfo &MRI, 1548 MachineOperand &SuperReg, 1549 const TargetRegisterClass *SuperRC, 1550 unsigned SubIdx, 1551 const TargetRegisterClass *SubRC) 1552 const { 1553 assert(SuperReg.isReg()); 1554 1555 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1556 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1557 1558 // Just in case the super register is itself a sub-register, copy it to a new 1559 // value so we don't need to worry about merging its subreg index with the 1560 // SubIdx passed to this function. The register coalescer should be able to 1561 // eliminate this extra copy. 1562 MachineBasicBlock *MBB = MI->getParent(); 1563 DebugLoc DL = MI->getDebugLoc(); 1564 1565 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1566 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1567 1568 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1569 .addReg(NewSuperReg, 0, SubIdx); 1570 1571 return SubReg; 1572 } 1573 1574 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1575 MachineBasicBlock::iterator MII, 1576 MachineRegisterInfo &MRI, 1577 MachineOperand &Op, 1578 const TargetRegisterClass *SuperRC, 1579 unsigned SubIdx, 1580 const TargetRegisterClass *SubRC) const { 1581 if (Op.isImm()) { 1582 // XXX - Is there a better way to do this? 1583 if (SubIdx == AMDGPU::sub0) 1584 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1585 if (SubIdx == AMDGPU::sub1) 1586 return MachineOperand::CreateImm(Op.getImm() >> 32); 1587 1588 llvm_unreachable("Unhandled register index for immediate"); 1589 } 1590 1591 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1592 SubIdx, SubRC); 1593 return MachineOperand::CreateReg(SubReg, false); 1594 } 1595 1596 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, 1597 MachineBasicBlock::iterator MI, 1598 MachineRegisterInfo &MRI, 1599 const TargetRegisterClass *RC, 1600 const MachineOperand &Op) const { 1601 MachineBasicBlock *MBB = MI->getParent(); 1602 DebugLoc DL = MI->getDebugLoc(); 1603 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1604 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1605 unsigned Dst = MRI.createVirtualRegister(RC); 1606 1607 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1608 LoDst) 1609 .addImm(Op.getImm() & 0xFFFFFFFF); 1610 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1611 HiDst) 1612 .addImm(Op.getImm() >> 32); 1613 1614 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) 1615 .addReg(LoDst) 1616 .addImm(AMDGPU::sub0) 1617 .addReg(HiDst) 1618 .addImm(AMDGPU::sub1); 1619 1620 Worklist.push_back(Lo); 1621 Worklist.push_back(Hi); 1622 1623 return Dst; 1624 } 1625 1626 // Change the order of operands from (0, 1, 2) to (0, 2, 1) 1627 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { 1628 assert(Inst->getNumExplicitOperands() == 3); 1629 MachineOperand Op1 = Inst->getOperand(1); 1630 Inst->RemoveOperand(1); 1631 Inst->addOperand(Op1); 1632 } 1633 1634 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1635 const MachineOperand *MO) const { 1636 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1637 const MCInstrDesc &InstDesc = get(MI->getOpcode()); 1638 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1639 const TargetRegisterClass *DefinedRC = 1640 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1641 if (!MO) 1642 MO = &MI->getOperand(OpIdx); 1643 1644 if (isVALU(InstDesc.Opcode) && 1645 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 1646 unsigned SGPRUsed = 1647 MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; 1648 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1649 if (i == OpIdx) 1650 continue; 1651 const MachineOperand &Op = MI->getOperand(i); 1652 if (Op.isReg() && Op.getReg() != SGPRUsed && 1653 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { 1654 return false; 1655 } 1656 } 1657 } 1658 1659 if (MO->isReg()) { 1660 assert(DefinedRC); 1661 const TargetRegisterClass *RC = 1662 TargetRegisterInfo::isVirtualRegister(MO->getReg()) ? 1663 MRI.getRegClass(MO->getReg()) : 1664 RI.getPhysRegClass(MO->getReg()); 1665 1666 // In order to be legal, the common sub-class must be equal to the 1667 // class of the current operand. For example: 1668 // 1669 // v_mov_b32 s0 ; Operand defined as vsrc_32 1670 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1671 // 1672 // s_sendmsg 0, s0 ; Operand defined as m0reg 1673 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1674 1675 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1676 } 1677 1678 1679 // Handle non-register types that are treated like immediates. 1680 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 1681 1682 if (!DefinedRC) { 1683 // This operand expects an immediate. 1684 return true; 1685 } 1686 1687 return isImmOperandLegal(MI, OpIdx, *MO); 1688 } 1689 1690 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 1691 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1692 1693 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1694 AMDGPU::OpName::src0); 1695 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1696 AMDGPU::OpName::src1); 1697 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1698 AMDGPU::OpName::src2); 1699 1700 // Legalize VOP2 1701 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 1702 // Legalize src0 1703 if (!isOperandLegal(MI, Src0Idx)) 1704 legalizeOpWithMove(MI, Src0Idx); 1705 1706 // Legalize src1 1707 if (isOperandLegal(MI, Src1Idx)) 1708 return; 1709 1710 // Usually src0 of VOP2 instructions allow more types of inputs 1711 // than src1, so try to commute the instruction to decrease our 1712 // chances of having to insert a MOV instruction to legalize src1. 1713 if (MI->isCommutable()) { 1714 if (commuteInstruction(MI)) 1715 // If we are successful in commuting, then we know MI is legal, so 1716 // we are done. 1717 return; 1718 } 1719 1720 legalizeOpWithMove(MI, Src1Idx); 1721 return; 1722 } 1723 1724 // XXX - Do any VOP3 instructions read VCC? 1725 // Legalize VOP3 1726 if (isVOP3(MI->getOpcode())) { 1727 int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; 1728 1729 // Find the one SGPR operand we are allowed to use. 1730 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 1731 1732 for (unsigned i = 0; i < 3; ++i) { 1733 int Idx = VOP3Idx[i]; 1734 if (Idx == -1) 1735 break; 1736 MachineOperand &MO = MI->getOperand(Idx); 1737 1738 if (MO.isReg()) { 1739 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 1740 continue; // VGPRs are legal 1741 1742 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 1743 1744 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 1745 SGPRReg = MO.getReg(); 1746 // We can use one SGPR in each VOP3 instruction. 1747 continue; 1748 } 1749 } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { 1750 // If it is not a register and not a literal constant, then it must be 1751 // an inline constant which is always legal. 1752 continue; 1753 } 1754 // If we make it this far, then the operand is not legal and we must 1755 // legalize it. 1756 legalizeOpWithMove(MI, Idx); 1757 } 1758 } 1759 1760 // Legalize REG_SEQUENCE and PHI 1761 // The register class of the operands much be the same type as the register 1762 // class of the output. 1763 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || 1764 MI->getOpcode() == AMDGPU::PHI) { 1765 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 1766 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1767 if (!MI->getOperand(i).isReg() || 1768 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1769 continue; 1770 const TargetRegisterClass *OpRC = 1771 MRI.getRegClass(MI->getOperand(i).getReg()); 1772 if (RI.hasVGPRs(OpRC)) { 1773 VRC = OpRC; 1774 } else { 1775 SRC = OpRC; 1776 } 1777 } 1778 1779 // If any of the operands are VGPR registers, then they all most be 1780 // otherwise we will create illegal VGPR->SGPR copies when legalizing 1781 // them. 1782 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 1783 if (!VRC) { 1784 assert(SRC); 1785 VRC = RI.getEquivalentVGPRClass(SRC); 1786 } 1787 RC = VRC; 1788 } else { 1789 RC = SRC; 1790 } 1791 1792 // Update all the operands so they have the same type. 1793 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1794 if (!MI->getOperand(i).isReg() || 1795 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1796 continue; 1797 unsigned DstReg = MRI.createVirtualRegister(RC); 1798 MachineBasicBlock *InsertBB; 1799 MachineBasicBlock::iterator Insert; 1800 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 1801 InsertBB = MI->getParent(); 1802 Insert = MI; 1803 } else { 1804 // MI is a PHI instruction. 1805 InsertBB = MI->getOperand(i + 1).getMBB(); 1806 Insert = InsertBB->getFirstTerminator(); 1807 } 1808 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), 1809 get(AMDGPU::COPY), DstReg) 1810 .addOperand(MI->getOperand(i)); 1811 MI->getOperand(i).setReg(DstReg); 1812 } 1813 } 1814 1815 // Legalize INSERT_SUBREG 1816 // src0 must have the same register class as dst 1817 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 1818 unsigned Dst = MI->getOperand(0).getReg(); 1819 unsigned Src0 = MI->getOperand(1).getReg(); 1820 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 1821 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 1822 if (DstRC != Src0RC) { 1823 MachineBasicBlock &MBB = *MI->getParent(); 1824 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 1825 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 1826 .addReg(Src0); 1827 MI->getOperand(1).setReg(NewSrc0); 1828 } 1829 return; 1830 } 1831 1832 // Legalize MUBUF* instructions 1833 // FIXME: If we start using the non-addr64 instructions for compute, we 1834 // may need to legalize them here. 1835 int SRsrcIdx = 1836 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 1837 if (SRsrcIdx != -1) { 1838 // We have an MUBUF instruction 1839 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); 1840 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; 1841 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 1842 RI.getRegClass(SRsrcRC))) { 1843 // The operands are legal. 1844 // FIXME: We may need to legalize operands besided srsrc. 1845 return; 1846 } 1847 1848 MachineBasicBlock &MBB = *MI->getParent(); 1849 // Extract the ptr from the resource descriptor. 1850 1851 // SRsrcPtrLo = srsrc:sub0 1852 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, 1853 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); 1854 1855 // SRsrcPtrHi = srsrc:sub1 1856 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, 1857 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); 1858 1859 // Create an empty resource descriptor 1860 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1861 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1862 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1863 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1864 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 1865 1866 // Zero64 = 0 1867 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1868 Zero64) 1869 .addImm(0); 1870 1871 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1872 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1873 SRsrcFormatLo) 1874 .addImm(RsrcDataFormat & 0xFFFFFFFF); 1875 1876 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1877 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1878 SRsrcFormatHi) 1879 .addImm(RsrcDataFormat >> 32); 1880 1881 // NewSRsrc = {Zero64, SRsrcFormat} 1882 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1883 NewSRsrc) 1884 .addReg(Zero64) 1885 .addImm(AMDGPU::sub0_sub1) 1886 .addReg(SRsrcFormatLo) 1887 .addImm(AMDGPU::sub2) 1888 .addReg(SRsrcFormatHi) 1889 .addImm(AMDGPU::sub3); 1890 1891 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1892 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 1893 unsigned NewVAddrLo; 1894 unsigned NewVAddrHi; 1895 if (VAddr) { 1896 // This is already an ADDR64 instruction so we need to add the pointer 1897 // extracted from the resource descriptor to the current value of VAddr. 1898 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1899 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1900 1901 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 1902 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 1903 NewVAddrLo) 1904 .addReg(SRsrcPtrLo) 1905 .addReg(VAddr->getReg(), 0, AMDGPU::sub0) 1906 .addReg(AMDGPU::VCC, RegState::ImplicitDefine); 1907 1908 // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 1909 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), 1910 NewVAddrHi) 1911 .addReg(SRsrcPtrHi) 1912 .addReg(VAddr->getReg(), 0, AMDGPU::sub1) 1913 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) 1914 .addReg(AMDGPU::VCC, RegState::Implicit); 1915 1916 } else { 1917 // This instructions is the _OFFSET variant, so we need to convert it to 1918 // ADDR64. 1919 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 1920 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 1921 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 1922 1923 // Create the new instruction. 1924 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 1925 MachineInstr *Addr64 = 1926 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 1927 .addOperand(*VData) 1928 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 1929 // This will be replaced later 1930 // with the new value of vaddr. 1931 .addOperand(*SRsrc) 1932 .addOperand(*SOffset) 1933 .addOperand(*Offset) 1934 .addImm(0) // glc 1935 .addImm(0) // slc 1936 .addImm(0); // tfe 1937 1938 MI->removeFromParent(); 1939 MI = Addr64; 1940 1941 NewVAddrLo = SRsrcPtrLo; 1942 NewVAddrHi = SRsrcPtrHi; 1943 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1944 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 1945 } 1946 1947 // NewVaddr = {NewVaddrHi, NewVaddrLo} 1948 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1949 NewVAddr) 1950 .addReg(NewVAddrLo) 1951 .addImm(AMDGPU::sub0) 1952 .addReg(NewVAddrHi) 1953 .addImm(AMDGPU::sub1); 1954 1955 1956 // Update the instruction to use NewVaddr 1957 VAddr->setReg(NewVAddr); 1958 // Update the instruction to use NewSRsrc 1959 SRsrc->setReg(NewSRsrc); 1960 } 1961 } 1962 1963 void SIInstrInfo::splitSMRD(MachineInstr *MI, 1964 const TargetRegisterClass *HalfRC, 1965 unsigned HalfImmOp, unsigned HalfSGPROp, 1966 MachineInstr *&Lo, MachineInstr *&Hi) const { 1967 1968 DebugLoc DL = MI->getDebugLoc(); 1969 MachineBasicBlock *MBB = MI->getParent(); 1970 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1971 unsigned RegLo = MRI.createVirtualRegister(HalfRC); 1972 unsigned RegHi = MRI.createVirtualRegister(HalfRC); 1973 unsigned HalfSize = HalfRC->getSize(); 1974 const MachineOperand *OffOp = 1975 getNamedOperand(*MI, AMDGPU::OpName::offset); 1976 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 1977 1978 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes 1979 // on VI. 1980 1981 bool IsKill = SBase->isKill(); 1982 if (OffOp) { 1983 bool isVI = 1984 MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= 1985 AMDGPUSubtarget::VOLCANIC_ISLANDS; 1986 unsigned OffScale = isVI ? 1 : 4; 1987 // Handle the _IMM variant 1988 unsigned LoOffset = OffOp->getImm() * OffScale; 1989 unsigned HiOffset = LoOffset + HalfSize; 1990 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) 1991 // Use addReg instead of addOperand 1992 // to make sure kill flag is cleared. 1993 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 1994 .addImm(LoOffset / OffScale); 1995 1996 if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { 1997 unsigned OffsetSGPR = 1998 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1999 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) 2000 .addImm(HiOffset); // The offset in register is in bytes. 2001 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) 2002 .addReg(SBase->getReg(), getKillRegState(IsKill), 2003 SBase->getSubReg()) 2004 .addReg(OffsetSGPR); 2005 } else { 2006 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) 2007 .addReg(SBase->getReg(), getKillRegState(IsKill), 2008 SBase->getSubReg()) 2009 .addImm(HiOffset / OffScale); 2010 } 2011 } else { 2012 // Handle the _SGPR variant 2013 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); 2014 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) 2015 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 2016 .addOperand(*SOff); 2017 unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 2018 BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) 2019 .addOperand(*SOff) 2020 .addImm(HalfSize); 2021 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) 2022 .addReg(SBase->getReg(), getKillRegState(IsKill), 2023 SBase->getSubReg()) 2024 .addReg(OffsetSGPR); 2025 } 2026 2027 unsigned SubLo, SubHi; 2028 switch (HalfSize) { 2029 case 4: 2030 SubLo = AMDGPU::sub0; 2031 SubHi = AMDGPU::sub1; 2032 break; 2033 case 8: 2034 SubLo = AMDGPU::sub0_sub1; 2035 SubHi = AMDGPU::sub2_sub3; 2036 break; 2037 case 16: 2038 SubLo = AMDGPU::sub0_sub1_sub2_sub3; 2039 SubHi = AMDGPU::sub4_sub5_sub6_sub7; 2040 break; 2041 case 32: 2042 SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 2043 SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15; 2044 break; 2045 default: 2046 llvm_unreachable("Unhandled HalfSize"); 2047 } 2048 2049 BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE)) 2050 .addOperand(MI->getOperand(0)) 2051 .addReg(RegLo) 2052 .addImm(SubLo) 2053 .addReg(RegHi) 2054 .addImm(SubHi); 2055 } 2056 2057 void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { 2058 MachineBasicBlock *MBB = MI->getParent(); 2059 int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); 2060 assert(DstIdx != -1); 2061 unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass; 2062 switch(RI.getRegClass(DstRCID)->getSize()) { 2063 case 4: 2064 case 8: 2065 case 16: { 2066 unsigned NewOpcode = getVALUOp(*MI); 2067 unsigned RegOffset; 2068 unsigned ImmOffset; 2069 2070 if (MI->getOperand(2).isReg()) { 2071 RegOffset = MI->getOperand(2).getReg(); 2072 ImmOffset = 0; 2073 } else { 2074 assert(MI->getOperand(2).isImm()); 2075 // SMRD instructions take a dword offsets on SI and byte offset on VI 2076 // and MUBUF instructions always take a byte offset. 2077 ImmOffset = MI->getOperand(2).getImm(); 2078 if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <= 2079 AMDGPUSubtarget::SEA_ISLANDS) 2080 ImmOffset <<= 2; 2081 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2082 2083 if (isUInt<12>(ImmOffset)) { 2084 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2085 RegOffset) 2086 .addImm(0); 2087 } else { 2088 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2089 RegOffset) 2090 .addImm(ImmOffset); 2091 ImmOffset = 0; 2092 } 2093 } 2094 2095 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2096 unsigned DWord0 = RegOffset; 2097 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2098 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2099 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2100 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2101 2102 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 2103 .addImm(0); 2104 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 2105 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2106 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 2107 .addImm(RsrcDataFormat >> 32); 2108 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 2109 .addReg(DWord0) 2110 .addImm(AMDGPU::sub0) 2111 .addReg(DWord1) 2112 .addImm(AMDGPU::sub1) 2113 .addReg(DWord2) 2114 .addImm(AMDGPU::sub2) 2115 .addReg(DWord3) 2116 .addImm(AMDGPU::sub3); 2117 MI->setDesc(get(NewOpcode)); 2118 if (MI->getOperand(2).isReg()) { 2119 MI->getOperand(2).setReg(SRsrc); 2120 } else { 2121 MI->getOperand(2).ChangeToRegister(SRsrc, false); 2122 } 2123 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); 2124 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 2125 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc 2126 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc 2127 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe 2128 2129 const TargetRegisterClass *NewDstRC = 2130 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); 2131 2132 unsigned DstReg = MI->getOperand(0).getReg(); 2133 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2134 MRI.replaceRegWith(DstReg, NewDstReg); 2135 break; 2136 } 2137 case 32: { 2138 MachineInstr *Lo, *Hi; 2139 splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM, 2140 AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi); 2141 MI->eraseFromParent(); 2142 moveSMRDToVALU(Lo, MRI); 2143 moveSMRDToVALU(Hi, MRI); 2144 break; 2145 } 2146 2147 case 64: { 2148 MachineInstr *Lo, *Hi; 2149 splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM, 2150 AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi); 2151 MI->eraseFromParent(); 2152 moveSMRDToVALU(Lo, MRI); 2153 moveSMRDToVALU(Hi, MRI); 2154 break; 2155 } 2156 } 2157 } 2158 2159 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2160 SmallVector<MachineInstr *, 128> Worklist; 2161 Worklist.push_back(&TopInst); 2162 2163 while (!Worklist.empty()) { 2164 MachineInstr *Inst = Worklist.pop_back_val(); 2165 MachineBasicBlock *MBB = Inst->getParent(); 2166 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2167 2168 unsigned Opcode = Inst->getOpcode(); 2169 unsigned NewOpcode = getVALUOp(*Inst); 2170 2171 // Handle some special cases 2172 switch (Opcode) { 2173 default: 2174 if (isSMRD(Inst->getOpcode())) { 2175 moveSMRDToVALU(Inst, MRI); 2176 } 2177 break; 2178 case AMDGPU::S_MOV_B64: { 2179 DebugLoc DL = Inst->getDebugLoc(); 2180 2181 // If the source operand is a register we can replace this with a 2182 // copy. 2183 if (Inst->getOperand(1).isReg()) { 2184 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) 2185 .addOperand(Inst->getOperand(0)) 2186 .addOperand(Inst->getOperand(1)); 2187 Worklist.push_back(Copy); 2188 } else { 2189 // Otherwise, we need to split this into two movs, because there is 2190 // no 64-bit VALU move instruction. 2191 unsigned Reg = Inst->getOperand(0).getReg(); 2192 unsigned Dst = split64BitImm(Worklist, 2193 Inst, 2194 MRI, 2195 MRI.getRegClass(Reg), 2196 Inst->getOperand(1)); 2197 MRI.replaceRegWith(Reg, Dst); 2198 } 2199 Inst->eraseFromParent(); 2200 continue; 2201 } 2202 case AMDGPU::S_AND_B64: 2203 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); 2204 Inst->eraseFromParent(); 2205 continue; 2206 2207 case AMDGPU::S_OR_B64: 2208 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); 2209 Inst->eraseFromParent(); 2210 continue; 2211 2212 case AMDGPU::S_XOR_B64: 2213 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); 2214 Inst->eraseFromParent(); 2215 continue; 2216 2217 case AMDGPU::S_NOT_B64: 2218 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 2219 Inst->eraseFromParent(); 2220 continue; 2221 2222 case AMDGPU::S_BCNT1_I32_B64: 2223 splitScalar64BitBCNT(Worklist, Inst); 2224 Inst->eraseFromParent(); 2225 continue; 2226 2227 case AMDGPU::S_BFE_I64: { 2228 splitScalar64BitBFE(Worklist, Inst); 2229 Inst->eraseFromParent(); 2230 continue; 2231 } 2232 2233 case AMDGPU::S_LSHL_B32: 2234 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2235 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2236 swapOperands(Inst); 2237 } 2238 break; 2239 case AMDGPU::S_ASHR_I32: 2240 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2241 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2242 swapOperands(Inst); 2243 } 2244 break; 2245 case AMDGPU::S_LSHR_B32: 2246 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2247 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2248 swapOperands(Inst); 2249 } 2250 break; 2251 case AMDGPU::S_LSHL_B64: 2252 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2253 NewOpcode = AMDGPU::V_LSHLREV_B64; 2254 swapOperands(Inst); 2255 } 2256 break; 2257 case AMDGPU::S_ASHR_I64: 2258 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2259 NewOpcode = AMDGPU::V_ASHRREV_I64; 2260 swapOperands(Inst); 2261 } 2262 break; 2263 case AMDGPU::S_LSHR_B64: 2264 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2265 NewOpcode = AMDGPU::V_LSHRREV_B64; 2266 swapOperands(Inst); 2267 } 2268 break; 2269 2270 case AMDGPU::S_BFE_U64: 2271 case AMDGPU::S_BFM_B64: 2272 llvm_unreachable("Moving this op to VALU not implemented"); 2273 } 2274 2275 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2276 // We cannot move this instruction to the VALU, so we should try to 2277 // legalize its operands instead. 2278 legalizeOperands(Inst); 2279 continue; 2280 } 2281 2282 // Use the new VALU Opcode. 2283 const MCInstrDesc &NewDesc = get(NewOpcode); 2284 Inst->setDesc(NewDesc); 2285 2286 // Remove any references to SCC. Vector instructions can't read from it, and 2287 // We're just about to add the implicit use / defs of VCC, and we don't want 2288 // both. 2289 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 2290 MachineOperand &Op = Inst->getOperand(i); 2291 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 2292 Inst->RemoveOperand(i); 2293 } 2294 2295 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2296 // We are converting these to a BFE, so we need to add the missing 2297 // operands for the size and offset. 2298 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2299 Inst->addOperand(MachineOperand::CreateImm(0)); 2300 Inst->addOperand(MachineOperand::CreateImm(Size)); 2301 2302 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2303 // The VALU version adds the second operand to the result, so insert an 2304 // extra 0 operand. 2305 Inst->addOperand(MachineOperand::CreateImm(0)); 2306 } 2307 2308 addDescImplicitUseDef(NewDesc, Inst); 2309 2310 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2311 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 2312 // If we need to move this to VGPRs, we need to unpack the second operand 2313 // back into the 2 separate ones for bit offset and width. 2314 assert(OffsetWidthOp.isImm() && 2315 "Scalar BFE is only implemented for constant width and offset"); 2316 uint32_t Imm = OffsetWidthOp.getImm(); 2317 2318 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2319 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2320 Inst->RemoveOperand(2); // Remove old immediate. 2321 Inst->addOperand(MachineOperand::CreateImm(Offset)); 2322 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 2323 } 2324 2325 // Update the destination register class. 2326 2327 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 2328 2329 switch (Opcode) { 2330 // For target instructions, getOpRegClass just returns the virtual 2331 // register class associated with the operand, so we need to find an 2332 // equivalent VGPR register class in order to move the instruction to the 2333 // VALU. 2334 case AMDGPU::COPY: 2335 case AMDGPU::PHI: 2336 case AMDGPU::REG_SEQUENCE: 2337 case AMDGPU::INSERT_SUBREG: 2338 if (RI.hasVGPRs(NewDstRC)) 2339 continue; 2340 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2341 if (!NewDstRC) 2342 continue; 2343 break; 2344 default: 2345 break; 2346 } 2347 2348 unsigned DstReg = Inst->getOperand(0).getReg(); 2349 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2350 MRI.replaceRegWith(DstReg, NewDstReg); 2351 2352 // Legalize the operands 2353 legalizeOperands(Inst); 2354 2355 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 2356 E = MRI.use_end(); I != E; ++I) { 2357 MachineInstr &UseMI = *I->getParent(); 2358 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2359 Worklist.push_back(&UseMI); 2360 } 2361 } 2362 } 2363 } 2364 2365 //===----------------------------------------------------------------------===// 2366 // Indirect addressing callbacks 2367 //===----------------------------------------------------------------------===// 2368 2369 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 2370 unsigned Channel) const { 2371 assert(Channel == 0); 2372 return RegIndex; 2373 } 2374 2375 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2376 return &AMDGPU::VGPR_32RegClass; 2377 } 2378 2379 void SIInstrInfo::splitScalar64BitUnaryOp( 2380 SmallVectorImpl<MachineInstr *> &Worklist, 2381 MachineInstr *Inst, 2382 unsigned Opcode) const { 2383 MachineBasicBlock &MBB = *Inst->getParent(); 2384 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2385 2386 MachineOperand &Dest = Inst->getOperand(0); 2387 MachineOperand &Src0 = Inst->getOperand(1); 2388 DebugLoc DL = Inst->getDebugLoc(); 2389 2390 MachineBasicBlock::iterator MII = Inst; 2391 2392 const MCInstrDesc &InstDesc = get(Opcode); 2393 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2394 MRI.getRegClass(Src0.getReg()) : 2395 &AMDGPU::SGPR_32RegClass; 2396 2397 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2398 2399 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2400 AMDGPU::sub0, Src0SubRC); 2401 2402 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2403 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2404 2405 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2406 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2407 .addOperand(SrcReg0Sub0); 2408 2409 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2410 AMDGPU::sub1, Src0SubRC); 2411 2412 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2413 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2414 .addOperand(SrcReg0Sub1); 2415 2416 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2417 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2418 .addReg(DestSub0) 2419 .addImm(AMDGPU::sub0) 2420 .addReg(DestSub1) 2421 .addImm(AMDGPU::sub1); 2422 2423 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2424 2425 // Try to legalize the operands in case we need to swap the order to keep it 2426 // valid. 2427 Worklist.push_back(LoHalf); 2428 Worklist.push_back(HiHalf); 2429 } 2430 2431 void SIInstrInfo::splitScalar64BitBinaryOp( 2432 SmallVectorImpl<MachineInstr *> &Worklist, 2433 MachineInstr *Inst, 2434 unsigned Opcode) const { 2435 MachineBasicBlock &MBB = *Inst->getParent(); 2436 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2437 2438 MachineOperand &Dest = Inst->getOperand(0); 2439 MachineOperand &Src0 = Inst->getOperand(1); 2440 MachineOperand &Src1 = Inst->getOperand(2); 2441 DebugLoc DL = Inst->getDebugLoc(); 2442 2443 MachineBasicBlock::iterator MII = Inst; 2444 2445 const MCInstrDesc &InstDesc = get(Opcode); 2446 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2447 MRI.getRegClass(Src0.getReg()) : 2448 &AMDGPU::SGPR_32RegClass; 2449 2450 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2451 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2452 MRI.getRegClass(Src1.getReg()) : 2453 &AMDGPU::SGPR_32RegClass; 2454 2455 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2456 2457 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2458 AMDGPU::sub0, Src0SubRC); 2459 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2460 AMDGPU::sub0, Src1SubRC); 2461 2462 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2463 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2464 2465 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2466 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2467 .addOperand(SrcReg0Sub0) 2468 .addOperand(SrcReg1Sub0); 2469 2470 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2471 AMDGPU::sub1, Src0SubRC); 2472 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2473 AMDGPU::sub1, Src1SubRC); 2474 2475 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2476 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2477 .addOperand(SrcReg0Sub1) 2478 .addOperand(SrcReg1Sub1); 2479 2480 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2481 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2482 .addReg(DestSub0) 2483 .addImm(AMDGPU::sub0) 2484 .addReg(DestSub1) 2485 .addImm(AMDGPU::sub1); 2486 2487 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2488 2489 // Try to legalize the operands in case we need to swap the order to keep it 2490 // valid. 2491 Worklist.push_back(LoHalf); 2492 Worklist.push_back(HiHalf); 2493 } 2494 2495 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 2496 MachineInstr *Inst) const { 2497 MachineBasicBlock &MBB = *Inst->getParent(); 2498 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2499 2500 MachineBasicBlock::iterator MII = Inst; 2501 DebugLoc DL = Inst->getDebugLoc(); 2502 2503 MachineOperand &Dest = Inst->getOperand(0); 2504 MachineOperand &Src = Inst->getOperand(1); 2505 2506 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2507 const TargetRegisterClass *SrcRC = Src.isReg() ? 2508 MRI.getRegClass(Src.getReg()) : 2509 &AMDGPU::SGPR_32RegClass; 2510 2511 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2512 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2513 2514 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2515 2516 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2517 AMDGPU::sub0, SrcSubRC); 2518 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2519 AMDGPU::sub1, SrcSubRC); 2520 2521 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) 2522 .addOperand(SrcRegSub0) 2523 .addImm(0); 2524 2525 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2526 .addOperand(SrcRegSub1) 2527 .addReg(MidReg); 2528 2529 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2530 2531 Worklist.push_back(First); 2532 Worklist.push_back(Second); 2533 } 2534 2535 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2536 MachineInstr *Inst) const { 2537 MachineBasicBlock &MBB = *Inst->getParent(); 2538 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2539 MachineBasicBlock::iterator MII = Inst; 2540 DebugLoc DL = Inst->getDebugLoc(); 2541 2542 MachineOperand &Dest = Inst->getOperand(0); 2543 uint32_t Imm = Inst->getOperand(2).getImm(); 2544 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2545 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2546 2547 (void) Offset; 2548 2549 // Only sext_inreg cases handled. 2550 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && 2551 BitWidth <= 32 && 2552 Offset == 0 && 2553 "Not implemented"); 2554 2555 if (BitWidth < 32) { 2556 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2557 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2558 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2559 2560 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2561 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) 2562 .addImm(0) 2563 .addImm(BitWidth); 2564 2565 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2566 .addImm(31) 2567 .addReg(MidRegLo); 2568 2569 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2570 .addReg(MidRegLo) 2571 .addImm(AMDGPU::sub0) 2572 .addReg(MidRegHi) 2573 .addImm(AMDGPU::sub1); 2574 2575 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2576 return; 2577 } 2578 2579 MachineOperand &Src = Inst->getOperand(1); 2580 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2581 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2582 2583 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2584 .addImm(31) 2585 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2586 2587 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2588 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2589 .addImm(AMDGPU::sub0) 2590 .addReg(TmpReg) 2591 .addImm(AMDGPU::sub1); 2592 2593 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2594 } 2595 2596 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, 2597 MachineInstr *Inst) const { 2598 // Add the implict and explicit register definitions. 2599 if (NewDesc.ImplicitUses) { 2600 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { 2601 unsigned Reg = NewDesc.ImplicitUses[i]; 2602 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); 2603 } 2604 } 2605 2606 if (NewDesc.ImplicitDefs) { 2607 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { 2608 unsigned Reg = NewDesc.ImplicitDefs[i]; 2609 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 2610 } 2611 } 2612 } 2613 2614 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, 2615 int OpIndices[3]) const { 2616 const MCInstrDesc &Desc = get(MI->getOpcode()); 2617 2618 // Find the one SGPR operand we are allowed to use. 2619 unsigned SGPRReg = AMDGPU::NoRegister; 2620 2621 // First we need to consider the instruction's operand requirements before 2622 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2623 // of VCC, but we are still bound by the constant bus requirement to only use 2624 // one. 2625 // 2626 // If the operand's class is an SGPR, we can never move it. 2627 2628 for (const MachineOperand &MO : MI->implicit_operands()) { 2629 // We only care about reads. 2630 if (MO.isDef()) 2631 continue; 2632 2633 if (MO.getReg() == AMDGPU::VCC) 2634 return AMDGPU::VCC; 2635 2636 if (MO.getReg() == AMDGPU::FLAT_SCR) 2637 return AMDGPU::FLAT_SCR; 2638 } 2639 2640 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2641 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2642 2643 for (unsigned i = 0; i < 3; ++i) { 2644 int Idx = OpIndices[i]; 2645 if (Idx == -1) 2646 break; 2647 2648 const MachineOperand &MO = MI->getOperand(Idx); 2649 if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) 2650 SGPRReg = MO.getReg(); 2651 2652 if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2653 UsedSGPRs[i] = MO.getReg(); 2654 } 2655 2656 if (SGPRReg != AMDGPU::NoRegister) 2657 return SGPRReg; 2658 2659 // We don't have a required SGPR operand, so we have a bit more freedom in 2660 // selecting operands to move. 2661 2662 // Try to select the most used SGPR. If an SGPR is equal to one of the 2663 // others, we choose that. 2664 // 2665 // e.g. 2666 // V_FMA_F32 v0, s0, s0, s0 -> No moves 2667 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 2668 2669 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 2670 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 2671 SGPRReg = UsedSGPRs[0]; 2672 } 2673 2674 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 2675 if (UsedSGPRs[1] == UsedSGPRs[2]) 2676 SGPRReg = UsedSGPRs[1]; 2677 } 2678 2679 return SGPRReg; 2680 } 2681 2682 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 2683 MachineBasicBlock *MBB, 2684 MachineBasicBlock::iterator I, 2685 unsigned ValueReg, 2686 unsigned Address, unsigned OffsetReg) const { 2687 const DebugLoc &DL = MBB->findDebugLoc(I); 2688 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2689 getIndirectIndexBegin(*MBB->getParent())); 2690 2691 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 2692 .addReg(IndirectBaseReg, RegState::Define) 2693 .addOperand(I->getOperand(0)) 2694 .addReg(IndirectBaseReg) 2695 .addReg(OffsetReg) 2696 .addImm(0) 2697 .addReg(ValueReg); 2698 } 2699 2700 MachineInstrBuilder SIInstrInfo::buildIndirectRead( 2701 MachineBasicBlock *MBB, 2702 MachineBasicBlock::iterator I, 2703 unsigned ValueReg, 2704 unsigned Address, unsigned OffsetReg) const { 2705 const DebugLoc &DL = MBB->findDebugLoc(I); 2706 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2707 getIndirectIndexBegin(*MBB->getParent())); 2708 2709 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 2710 .addOperand(I->getOperand(0)) 2711 .addOperand(I->getOperand(1)) 2712 .addReg(IndirectBaseReg) 2713 .addReg(OffsetReg) 2714 .addImm(0); 2715 2716 } 2717 2718 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 2719 const MachineFunction &MF) const { 2720 int End = getIndirectIndexEnd(MF); 2721 int Begin = getIndirectIndexBegin(MF); 2722 2723 if (End == -1) 2724 return; 2725 2726 2727 for (int Index = Begin; Index <= End; ++Index) 2728 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); 2729 2730 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 2731 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 2732 2733 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 2734 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 2735 2736 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 2737 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 2738 2739 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 2740 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 2741 2742 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 2743 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 2744 } 2745 2746 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 2747 unsigned OperandName) const { 2748 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 2749 if (Idx == -1) 2750 return nullptr; 2751 2752 return &MI.getOperand(Idx); 2753 } 2754 2755 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 2756 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 2757 if (ST.isAmdHsaOS()) { 2758 RsrcDataFormat |= (1ULL << 56); 2759 2760 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 2761 // Set MTYPE = 2 2762 RsrcDataFormat |= (2ULL << 59); 2763 } 2764 2765 return RsrcDataFormat; 2766 } 2767