1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIInstrInfo.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "GCNHazardRecognizer.h" 19 #include "SIDefines.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/ScheduleDAG.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/MC/MCInstrDesc.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 33 : AMDGPUInstrInfo(st), RI() {} 34 35 //===----------------------------------------------------------------------===// 36 // TargetInstrInfo callbacks 37 //===----------------------------------------------------------------------===// 38 39 static unsigned getNumOperandsNoGlue(SDNode *Node) { 40 unsigned N = Node->getNumOperands(); 41 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 42 --N; 43 return N; 44 } 45 46 static SDValue findChainOperand(SDNode *Load) { 47 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 48 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 49 return LastOp; 50 } 51 52 /// \brief Returns true if both nodes have the same value for the given 53 /// operand \p Op, or if both nodes do not have this operand. 54 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 55 unsigned Opc0 = N0->getMachineOpcode(); 56 unsigned Opc1 = N1->getMachineOpcode(); 57 58 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 59 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 60 61 if (Op0Idx == -1 && Op1Idx == -1) 62 return true; 63 64 65 if ((Op0Idx == -1 && Op1Idx != -1) || 66 (Op1Idx == -1 && Op0Idx != -1)) 67 return false; 68 69 // getNamedOperandIdx returns the index for the MachineInstr's operands, 70 // which includes the result as the first operand. We are indexing into the 71 // MachineSDNode's operands, so we need to skip the result operand to get 72 // the real index. 73 --Op0Idx; 74 --Op1Idx; 75 76 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 77 } 78 79 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 80 AliasAnalysis *AA) const { 81 // TODO: The generic check fails for VALU instructions that should be 82 // rematerializable due to implicit reads of exec. We really want all of the 83 // generic logic for this except for this. 84 switch (MI->getOpcode()) { 85 case AMDGPU::V_MOV_B32_e32: 86 case AMDGPU::V_MOV_B32_e64: 87 case AMDGPU::V_MOV_B64_PSEUDO: 88 return true; 89 default: 90 return false; 91 } 92 } 93 94 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 95 int64_t &Offset0, 96 int64_t &Offset1) const { 97 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 98 return false; 99 100 unsigned Opc0 = Load0->getMachineOpcode(); 101 unsigned Opc1 = Load1->getMachineOpcode(); 102 103 // Make sure both are actually loads. 104 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 105 return false; 106 107 if (isDS(Opc0) && isDS(Opc1)) { 108 109 // FIXME: Handle this case: 110 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 111 return false; 112 113 // Check base reg. 114 if (Load0->getOperand(1) != Load1->getOperand(1)) 115 return false; 116 117 // Check chain. 118 if (findChainOperand(Load0) != findChainOperand(Load1)) 119 return false; 120 121 // Skip read2 / write2 variants for simplicity. 122 // TODO: We should report true if the used offsets are adjacent (excluded 123 // st64 versions). 124 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 125 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 126 return false; 127 128 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 129 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 130 return true; 131 } 132 133 if (isSMRD(Opc0) && isSMRD(Opc1)) { 134 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 135 136 // Check base reg. 137 if (Load0->getOperand(0) != Load1->getOperand(0)) 138 return false; 139 140 const ConstantSDNode *Load0Offset = 141 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 142 const ConstantSDNode *Load1Offset = 143 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 144 145 if (!Load0Offset || !Load1Offset) 146 return false; 147 148 // Check chain. 149 if (findChainOperand(Load0) != findChainOperand(Load1)) 150 return false; 151 152 Offset0 = Load0Offset->getZExtValue(); 153 Offset1 = Load1Offset->getZExtValue(); 154 return true; 155 } 156 157 // MUBUF and MTBUF can access the same addresses. 158 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 159 160 // MUBUF and MTBUF have vaddr at different indices. 161 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 162 findChainOperand(Load0) != findChainOperand(Load1) || 163 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 164 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 165 return false; 166 167 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 168 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 169 170 if (OffIdx0 == -1 || OffIdx1 == -1) 171 return false; 172 173 // getNamedOperandIdx returns the index for MachineInstrs. Since they 174 // inlcude the output in the operand list, but SDNodes don't, we need to 175 // subtract the index by one. 176 --OffIdx0; 177 --OffIdx1; 178 179 SDValue Off0 = Load0->getOperand(OffIdx0); 180 SDValue Off1 = Load1->getOperand(OffIdx1); 181 182 // The offset might be a FrameIndexSDNode. 183 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 184 return false; 185 186 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 187 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 188 return true; 189 } 190 191 return false; 192 } 193 194 static bool isStride64(unsigned Opc) { 195 switch (Opc) { 196 case AMDGPU::DS_READ2ST64_B32: 197 case AMDGPU::DS_READ2ST64_B64: 198 case AMDGPU::DS_WRITE2ST64_B32: 199 case AMDGPU::DS_WRITE2ST64_B64: 200 return true; 201 default: 202 return false; 203 } 204 } 205 206 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 207 int64_t &Offset, 208 const TargetRegisterInfo *TRI) const { 209 unsigned Opc = LdSt->getOpcode(); 210 211 if (isDS(*LdSt)) { 212 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 213 AMDGPU::OpName::offset); 214 if (OffsetImm) { 215 // Normal, single offset LDS instruction. 216 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 217 AMDGPU::OpName::addr); 218 219 BaseReg = AddrReg->getReg(); 220 Offset = OffsetImm->getImm(); 221 return true; 222 } 223 224 // The 2 offset instructions use offset0 and offset1 instead. We can treat 225 // these as a load with a single offset if the 2 offsets are consecutive. We 226 // will use this for some partially aligned loads. 227 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, 228 AMDGPU::OpName::offset0); 229 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, 230 AMDGPU::OpName::offset1); 231 232 uint8_t Offset0 = Offset0Imm->getImm(); 233 uint8_t Offset1 = Offset1Imm->getImm(); 234 235 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 236 // Each of these offsets is in element sized units, so we need to convert 237 // to bytes of the individual reads. 238 239 unsigned EltSize; 240 if (LdSt->mayLoad()) 241 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; 242 else { 243 assert(LdSt->mayStore()); 244 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 245 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 246 } 247 248 if (isStride64(Opc)) 249 EltSize *= 64; 250 251 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 252 AMDGPU::OpName::addr); 253 BaseReg = AddrReg->getReg(); 254 Offset = EltSize * Offset0; 255 return true; 256 } 257 258 return false; 259 } 260 261 if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) { 262 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 263 return false; 264 265 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 266 AMDGPU::OpName::vaddr); 267 if (!AddrReg) 268 return false; 269 270 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 271 AMDGPU::OpName::offset); 272 BaseReg = AddrReg->getReg(); 273 Offset = OffsetImm->getImm(); 274 return true; 275 } 276 277 if (isSMRD(*LdSt)) { 278 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 279 AMDGPU::OpName::offset); 280 if (!OffsetImm) 281 return false; 282 283 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, 284 AMDGPU::OpName::sbase); 285 BaseReg = SBaseReg->getReg(); 286 Offset = OffsetImm->getImm(); 287 return true; 288 } 289 290 return false; 291 } 292 293 bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt, 294 MachineInstr *SecondLdSt, 295 unsigned NumLoads) const { 296 const MachineOperand *FirstDst = nullptr; 297 const MachineOperand *SecondDst = nullptr; 298 299 if (isDS(*FirstLdSt) && isDS(*SecondLdSt)) { 300 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst); 301 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst); 302 } 303 304 if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt)) { 305 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst); 306 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst); 307 } 308 309 if ((isMUBUF(*FirstLdSt) && isMUBUF(*SecondLdSt)) || 310 (isMTBUF(*FirstLdSt) && isMTBUF(*SecondLdSt))) { 311 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata); 312 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata); 313 } 314 315 if (!FirstDst || !SecondDst) 316 return false; 317 318 // Try to limit clustering based on the total number of bytes loaded 319 // rather than the number of instructions. This is done to help reduce 320 // register pressure. The method used is somewhat inexact, though, 321 // because it assumes that all loads in the cluster will load the 322 // same number of bytes as FirstLdSt. 323 324 // The unit of this value is bytes. 325 // FIXME: This needs finer tuning. 326 unsigned LoadClusterThreshold = 16; 327 328 const MachineRegisterInfo &MRI = 329 FirstLdSt->getParent()->getParent()->getRegInfo(); 330 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); 331 332 return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; 333 } 334 335 void 336 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 337 MachineBasicBlock::iterator MI, DebugLoc DL, 338 unsigned DestReg, unsigned SrcReg, 339 bool KillSrc) const { 340 341 // If we are trying to copy to or from SCC, there is a bug somewhere else in 342 // the backend. While it may be theoretically possible to do this, it should 343 // never be necessary. 344 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 345 346 static const int16_t Sub0_15[] = { 347 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 348 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 349 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 350 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 351 }; 352 353 static const int16_t Sub0_15_64[] = { 354 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 355 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 356 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 357 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 358 }; 359 360 static const int16_t Sub0_7[] = { 361 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 362 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 363 }; 364 365 static const int16_t Sub0_7_64[] = { 366 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 367 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 368 }; 369 370 static const int16_t Sub0_3[] = { 371 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 372 }; 373 374 static const int16_t Sub0_3_64[] = { 375 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 376 }; 377 378 static const int16_t Sub0_2[] = { 379 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 380 }; 381 382 static const int16_t Sub0_1[] = { 383 AMDGPU::sub0, AMDGPU::sub1, 384 }; 385 386 unsigned Opcode; 387 ArrayRef<int16_t> SubIndices; 388 bool Forward; 389 390 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 391 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 392 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 393 .addReg(SrcReg, getKillRegState(KillSrc)); 394 return; 395 396 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 397 if (DestReg == AMDGPU::VCC) { 398 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 399 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 400 .addReg(SrcReg, getKillRegState(KillSrc)); 401 } else { 402 // FIXME: Hack until VReg_1 removed. 403 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 404 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) 405 .addImm(0) 406 .addReg(SrcReg, getKillRegState(KillSrc)); 407 } 408 409 return; 410 } 411 412 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 413 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 414 .addReg(SrcReg, getKillRegState(KillSrc)); 415 return; 416 417 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 418 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 419 Opcode = AMDGPU::S_MOV_B64; 420 SubIndices = Sub0_3_64; 421 422 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 423 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 424 Opcode = AMDGPU::S_MOV_B64; 425 SubIndices = Sub0_7_64; 426 427 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 428 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 429 Opcode = AMDGPU::S_MOV_B64; 430 SubIndices = Sub0_15_64; 431 432 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 433 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 434 AMDGPU::SReg_32RegClass.contains(SrcReg)); 435 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 436 .addReg(SrcReg, getKillRegState(KillSrc)); 437 return; 438 439 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 440 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 441 AMDGPU::SReg_64RegClass.contains(SrcReg)); 442 Opcode = AMDGPU::V_MOV_B32_e32; 443 SubIndices = Sub0_1; 444 445 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 446 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 447 Opcode = AMDGPU::V_MOV_B32_e32; 448 SubIndices = Sub0_2; 449 450 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 451 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 452 AMDGPU::SReg_128RegClass.contains(SrcReg)); 453 Opcode = AMDGPU::V_MOV_B32_e32; 454 SubIndices = Sub0_3; 455 456 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 457 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 458 AMDGPU::SReg_256RegClass.contains(SrcReg)); 459 Opcode = AMDGPU::V_MOV_B32_e32; 460 SubIndices = Sub0_7; 461 462 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 463 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 464 AMDGPU::SReg_512RegClass.contains(SrcReg)); 465 Opcode = AMDGPU::V_MOV_B32_e32; 466 SubIndices = Sub0_15; 467 468 } else { 469 llvm_unreachable("Can't copy register!"); 470 } 471 472 if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg)) 473 Forward = true; 474 else 475 Forward = false; 476 477 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 478 unsigned SubIdx; 479 if (Forward) 480 SubIdx = SubIndices[Idx]; 481 else 482 SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 483 484 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 485 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 486 487 Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); 488 489 if (Idx == SubIndices.size() - 1) 490 Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit); 491 492 if (Idx == 0) 493 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 494 } 495 } 496 497 int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 498 const unsigned Opcode = MI.getOpcode(); 499 500 int NewOpc; 501 502 // Try to map original to commuted opcode 503 NewOpc = AMDGPU::getCommuteRev(Opcode); 504 if (NewOpc != -1) 505 // Check if the commuted (REV) opcode exists on the target. 506 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 507 508 // Try to map commuted to original opcode 509 NewOpc = AMDGPU::getCommuteOrig(Opcode); 510 if (NewOpc != -1) 511 // Check if the original (non-REV) opcode exists on the target. 512 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 513 514 return Opcode; 515 } 516 517 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 518 519 if (DstRC->getSize() == 4) { 520 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 521 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 522 return AMDGPU::S_MOV_B64; 523 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 524 return AMDGPU::V_MOV_B64_PSEUDO; 525 } 526 return AMDGPU::COPY; 527 } 528 529 static unsigned getSGPRSpillSaveOpcode(unsigned Size) { 530 switch (Size) { 531 case 4: 532 return AMDGPU::SI_SPILL_S32_SAVE; 533 case 8: 534 return AMDGPU::SI_SPILL_S64_SAVE; 535 case 16: 536 return AMDGPU::SI_SPILL_S128_SAVE; 537 case 32: 538 return AMDGPU::SI_SPILL_S256_SAVE; 539 case 64: 540 return AMDGPU::SI_SPILL_S512_SAVE; 541 default: 542 llvm_unreachable("unknown register size"); 543 } 544 } 545 546 static unsigned getVGPRSpillSaveOpcode(unsigned Size) { 547 switch (Size) { 548 case 4: 549 return AMDGPU::SI_SPILL_V32_SAVE; 550 case 8: 551 return AMDGPU::SI_SPILL_V64_SAVE; 552 case 12: 553 return AMDGPU::SI_SPILL_V96_SAVE; 554 case 16: 555 return AMDGPU::SI_SPILL_V128_SAVE; 556 case 32: 557 return AMDGPU::SI_SPILL_V256_SAVE; 558 case 64: 559 return AMDGPU::SI_SPILL_V512_SAVE; 560 default: 561 llvm_unreachable("unknown register size"); 562 } 563 } 564 565 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 566 MachineBasicBlock::iterator MI, 567 unsigned SrcReg, bool isKill, 568 int FrameIndex, 569 const TargetRegisterClass *RC, 570 const TargetRegisterInfo *TRI) const { 571 MachineFunction *MF = MBB.getParent(); 572 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 573 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 574 DebugLoc DL = MBB.findDebugLoc(MI); 575 576 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 577 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 578 MachinePointerInfo PtrInfo 579 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 580 MachineMemOperand *MMO 581 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 582 Size, Align); 583 584 if (RI.isSGPRClass(RC)) { 585 MFI->setHasSpilledSGPRs(); 586 587 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { 588 // m0 may not be allowed for readlane. 589 MachineRegisterInfo &MRI = MF->getRegInfo(); 590 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); 591 } 592 593 // We are only allowed to create one new instruction when spilling 594 // registers, so we need to use pseudo instruction for spilling 595 // SGPRs. 596 unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); 597 BuildMI(MBB, MI, DL, get(Opcode)) 598 .addReg(SrcReg) // src 599 .addFrameIndex(FrameIndex) // frame_idx 600 .addMemOperand(MMO); 601 602 return; 603 } 604 605 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 606 LLVMContext &Ctx = MF->getFunction()->getContext(); 607 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 608 " spill register"); 609 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 610 .addReg(SrcReg); 611 612 return; 613 } 614 615 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 616 617 unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); 618 MFI->setHasSpilledVGPRs(); 619 BuildMI(MBB, MI, DL, get(Opcode)) 620 .addReg(SrcReg) // src 621 .addFrameIndex(FrameIndex) // frame_idx 622 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 623 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 624 .addImm(0) // offset 625 .addMemOperand(MMO); 626 } 627 628 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 629 switch (Size) { 630 case 4: 631 return AMDGPU::SI_SPILL_S32_RESTORE; 632 case 8: 633 return AMDGPU::SI_SPILL_S64_RESTORE; 634 case 16: 635 return AMDGPU::SI_SPILL_S128_RESTORE; 636 case 32: 637 return AMDGPU::SI_SPILL_S256_RESTORE; 638 case 64: 639 return AMDGPU::SI_SPILL_S512_RESTORE; 640 default: 641 llvm_unreachable("unknown register size"); 642 } 643 } 644 645 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 646 switch (Size) { 647 case 4: 648 return AMDGPU::SI_SPILL_V32_RESTORE; 649 case 8: 650 return AMDGPU::SI_SPILL_V64_RESTORE; 651 case 12: 652 return AMDGPU::SI_SPILL_V96_RESTORE; 653 case 16: 654 return AMDGPU::SI_SPILL_V128_RESTORE; 655 case 32: 656 return AMDGPU::SI_SPILL_V256_RESTORE; 657 case 64: 658 return AMDGPU::SI_SPILL_V512_RESTORE; 659 default: 660 llvm_unreachable("unknown register size"); 661 } 662 } 663 664 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 665 MachineBasicBlock::iterator MI, 666 unsigned DestReg, int FrameIndex, 667 const TargetRegisterClass *RC, 668 const TargetRegisterInfo *TRI) const { 669 MachineFunction *MF = MBB.getParent(); 670 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 671 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 672 DebugLoc DL = MBB.findDebugLoc(MI); 673 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 674 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 675 676 MachinePointerInfo PtrInfo 677 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 678 679 MachineMemOperand *MMO = MF->getMachineMemOperand( 680 PtrInfo, MachineMemOperand::MOLoad, Size, Align); 681 682 if (RI.isSGPRClass(RC)) { 683 // FIXME: Maybe this should not include a memoperand because it will be 684 // lowered to non-memory instructions. 685 unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); 686 687 if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { 688 // m0 may not be allowed for readlane. 689 MachineRegisterInfo &MRI = MF->getRegInfo(); 690 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); 691 } 692 693 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 694 .addFrameIndex(FrameIndex) // frame_idx 695 .addMemOperand(MMO); 696 697 return; 698 } 699 700 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 701 LLVMContext &Ctx = MF->getFunction()->getContext(); 702 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 703 " restore register"); 704 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 705 706 return; 707 } 708 709 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 710 711 unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); 712 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 713 .addFrameIndex(FrameIndex) // frame_idx 714 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 715 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 716 .addImm(0) // offset 717 .addMemOperand(MMO); 718 } 719 720 /// \param @Offset Offset in bytes of the FrameIndex being spilled 721 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, 722 MachineBasicBlock::iterator MI, 723 RegScavenger *RS, unsigned TmpReg, 724 unsigned FrameOffset, 725 unsigned Size) const { 726 MachineFunction *MF = MBB.getParent(); 727 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 728 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); 729 const SIRegisterInfo *TRI = 730 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); 731 DebugLoc DL = MBB.findDebugLoc(MI); 732 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 733 unsigned WavefrontSize = ST.getWavefrontSize(); 734 735 unsigned TIDReg = MFI->getTIDReg(); 736 if (!MFI->hasCalculatedTID()) { 737 MachineBasicBlock &Entry = MBB.getParent()->front(); 738 MachineBasicBlock::iterator Insert = Entry.front(); 739 DebugLoc DL = Insert->getDebugLoc(); 740 741 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 742 if (TIDReg == AMDGPU::NoRegister) 743 return TIDReg; 744 745 746 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && 747 WorkGroupSize > WavefrontSize) { 748 749 unsigned TIDIGXReg 750 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X); 751 unsigned TIDIGYReg 752 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y); 753 unsigned TIDIGZReg 754 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z); 755 unsigned InputPtrReg = 756 TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); 757 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 758 if (!Entry.isLiveIn(Reg)) 759 Entry.addLiveIn(Reg); 760 } 761 762 RS->enterBasicBlock(Entry); 763 // FIXME: Can we scavenge an SReg_64 and access the subregs? 764 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 765 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 766 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 767 .addReg(InputPtrReg) 768 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 769 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 770 .addReg(InputPtrReg) 771 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 772 773 // NGROUPS.X * NGROUPS.Y 774 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 775 .addReg(STmp1) 776 .addReg(STmp0); 777 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 778 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 779 .addReg(STmp1) 780 .addReg(TIDIGXReg); 781 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 782 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 783 .addReg(STmp0) 784 .addReg(TIDIGYReg) 785 .addReg(TIDReg); 786 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 787 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 788 .addReg(TIDReg) 789 .addReg(TIDIGZReg); 790 } else { 791 // Get the wave id 792 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 793 TIDReg) 794 .addImm(-1) 795 .addImm(0); 796 797 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 798 TIDReg) 799 .addImm(-1) 800 .addReg(TIDReg); 801 } 802 803 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 804 TIDReg) 805 .addImm(2) 806 .addReg(TIDReg); 807 MFI->setTIDReg(TIDReg); 808 } 809 810 // Add FrameIndex to LDS offset 811 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 812 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 813 .addImm(LDSOffset) 814 .addReg(TIDReg); 815 816 return TmpReg; 817 } 818 819 void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, 820 MachineBasicBlock::iterator MI, 821 int Count) const { 822 DebugLoc DL = MBB.findDebugLoc(MI); 823 while (Count > 0) { 824 int Arg; 825 if (Count >= 8) 826 Arg = 7; 827 else 828 Arg = Count - 1; 829 Count -= 8; 830 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) 831 .addImm(Arg); 832 } 833 } 834 835 void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 836 MachineBasicBlock::iterator MI) const { 837 insertWaitStates(MBB, MI, 1); 838 } 839 840 unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { 841 switch (MI.getOpcode()) { 842 default: return 1; // FIXME: Do wait states equal cycles? 843 844 case AMDGPU::S_NOP: 845 return MI.getOperand(0).getImm() + 1; 846 } 847 } 848 849 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 850 MachineBasicBlock &MBB = *MI->getParent(); 851 DebugLoc DL = MBB.findDebugLoc(MI); 852 switch (MI->getOpcode()) { 853 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 854 855 case AMDGPU::SGPR_USE: 856 // This is just a placeholder for register allocation. 857 MI->eraseFromParent(); 858 break; 859 860 case AMDGPU::V_MOV_B64_PSEUDO: { 861 unsigned Dst = MI->getOperand(0).getReg(); 862 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 863 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 864 865 const MachineOperand &SrcOp = MI->getOperand(1); 866 // FIXME: Will this work for 64-bit floating point immediates? 867 assert(!SrcOp.isFPImm()); 868 if (SrcOp.isImm()) { 869 APInt Imm(64, SrcOp.getImm()); 870 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 871 .addImm(Imm.getLoBits(32).getZExtValue()) 872 .addReg(Dst, RegState::Implicit); 873 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 874 .addImm(Imm.getHiBits(32).getZExtValue()) 875 .addReg(Dst, RegState::Implicit); 876 } else { 877 assert(SrcOp.isReg()); 878 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 879 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 880 .addReg(Dst, RegState::Implicit); 881 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 882 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 883 .addReg(Dst, RegState::Implicit); 884 } 885 MI->eraseFromParent(); 886 break; 887 } 888 889 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 890 unsigned Dst = MI->getOperand(0).getReg(); 891 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 892 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 893 unsigned Src0 = MI->getOperand(1).getReg(); 894 unsigned Src1 = MI->getOperand(2).getReg(); 895 const MachineOperand &SrcCond = MI->getOperand(3); 896 897 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 898 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 899 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 900 .addOperand(SrcCond); 901 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 902 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 903 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 904 .addOperand(SrcCond); 905 MI->eraseFromParent(); 906 break; 907 } 908 909 case AMDGPU::SI_CONSTDATA_PTR: { 910 const SIRegisterInfo *TRI = 911 static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); 912 MachineFunction &MF = *MBB.getParent(); 913 unsigned Reg = MI->getOperand(0).getReg(); 914 unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); 915 unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); 916 917 // Create a bundle so these instructions won't be re-ordered by the 918 // post-RA scheduler. 919 MIBundleBuilder Bundler(MBB, MI); 920 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 921 922 // Add 32-bit offset from this instruction to the start of the 923 // constant data. 924 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) 925 .addReg(RegLo) 926 .addOperand(MI->getOperand(1))); 927 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 928 .addReg(RegHi) 929 .addImm(0)); 930 931 llvm::finalizeBundle(MBB, Bundler.begin()); 932 933 MI->eraseFromParent(); 934 break; 935 } 936 } 937 return true; 938 } 939 940 /// Commutes the operands in the given instruction. 941 /// The commutable operands are specified by their indices OpIdx0 and OpIdx1. 942 /// 943 /// Do not call this method for a non-commutable instruction or for 944 /// non-commutable pair of operand indices OpIdx0 and OpIdx1. 945 /// Even though the instruction is commutable, the method may still 946 /// fail to commute the operands, null pointer is returned in such cases. 947 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, 948 bool NewMI, 949 unsigned OpIdx0, 950 unsigned OpIdx1) const { 951 int CommutedOpcode = commuteOpcode(*MI); 952 if (CommutedOpcode == -1) 953 return nullptr; 954 955 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 956 AMDGPU::OpName::src0); 957 MachineOperand &Src0 = MI->getOperand(Src0Idx); 958 if (!Src0.isReg()) 959 return nullptr; 960 961 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 962 AMDGPU::OpName::src1); 963 964 if ((OpIdx0 != static_cast<unsigned>(Src0Idx) || 965 OpIdx1 != static_cast<unsigned>(Src1Idx)) && 966 (OpIdx0 != static_cast<unsigned>(Src1Idx) || 967 OpIdx1 != static_cast<unsigned>(Src0Idx))) 968 return nullptr; 969 970 MachineOperand &Src1 = MI->getOperand(Src1Idx); 971 972 973 if (isVOP2(*MI) || isVOPC(*MI)) { 974 const MCInstrDesc &InstrDesc = MI->getDesc(); 975 // For VOP2 and VOPC instructions, any operand type is valid to use for 976 // src0. Make sure we can use the src0 as src1. 977 // 978 // We could be stricter here and only allow commuting if there is a reason 979 // to do so. i.e. if both operands are VGPRs there is no real benefit, 980 // although MachineCSE attempts to find matches by commuting. 981 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 982 if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) 983 return nullptr; 984 } 985 986 if (!Src1.isReg()) { 987 // Allow commuting instructions with Imm operands. 988 if (NewMI || !Src1.isImm() || 989 (!isVOP2(*MI) && !isVOP3(*MI))) { 990 return nullptr; 991 } 992 // Be sure to copy the source modifiers to the right place. 993 if (MachineOperand *Src0Mods 994 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { 995 MachineOperand *Src1Mods 996 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); 997 998 int Src0ModsVal = Src0Mods->getImm(); 999 if (!Src1Mods && Src0ModsVal != 0) 1000 return nullptr; 1001 1002 // XXX - This assert might be a lie. It might be useful to have a neg 1003 // modifier with 0.0. 1004 int Src1ModsVal = Src1Mods->getImm(); 1005 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 1006 1007 Src1Mods->setImm(Src0ModsVal); 1008 Src0Mods->setImm(Src1ModsVal); 1009 } 1010 1011 unsigned Reg = Src0.getReg(); 1012 unsigned SubReg = Src0.getSubReg(); 1013 if (Src1.isImm()) 1014 Src0.ChangeToImmediate(Src1.getImm()); 1015 else 1016 llvm_unreachable("Should only have immediates"); 1017 1018 Src1.ChangeToRegister(Reg, false); 1019 Src1.setSubReg(SubReg); 1020 } else { 1021 MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); 1022 } 1023 1024 if (MI) 1025 MI->setDesc(get(CommutedOpcode)); 1026 1027 return MI; 1028 } 1029 1030 // This needs to be implemented because the source modifiers may be inserted 1031 // between the true commutable operands, and the base 1032 // TargetInstrInfo::commuteInstruction uses it. 1033 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, 1034 unsigned &SrcOpIdx0, 1035 unsigned &SrcOpIdx1) const { 1036 const MCInstrDesc &MCID = MI->getDesc(); 1037 if (!MCID.isCommutable()) 1038 return false; 1039 1040 unsigned Opc = MI->getOpcode(); 1041 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 1042 if (Src0Idx == -1) 1043 return false; 1044 1045 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 1046 // immediate. Also, immediate src0 operand is not handled in 1047 // SIInstrInfo::commuteInstruction(); 1048 if (!MI->getOperand(Src0Idx).isReg()) 1049 return false; 1050 1051 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 1052 if (Src1Idx == -1) 1053 return false; 1054 1055 MachineOperand &Src1 = MI->getOperand(Src1Idx); 1056 if (Src1.isImm()) { 1057 // SIInstrInfo::commuteInstruction() does support commuting the immediate 1058 // operand src1 in 2 and 3 operand instructions. 1059 if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode())) 1060 return false; 1061 } else if (Src1.isReg()) { 1062 // If any source modifiers are set, the generic instruction commuting won't 1063 // understand how to copy the source modifiers. 1064 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || 1065 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) 1066 return false; 1067 } else 1068 return false; 1069 1070 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 1071 } 1072 1073 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 1074 switch (Cond) { 1075 case SIInstrInfo::SCC_TRUE: 1076 return AMDGPU::S_CBRANCH_SCC1; 1077 case SIInstrInfo::SCC_FALSE: 1078 return AMDGPU::S_CBRANCH_SCC0; 1079 case SIInstrInfo::VCCNZ: 1080 return AMDGPU::S_CBRANCH_VCCNZ; 1081 case SIInstrInfo::VCCZ: 1082 return AMDGPU::S_CBRANCH_VCCZ; 1083 case SIInstrInfo::EXECNZ: 1084 return AMDGPU::S_CBRANCH_EXECNZ; 1085 case SIInstrInfo::EXECZ: 1086 return AMDGPU::S_CBRANCH_EXECZ; 1087 default: 1088 llvm_unreachable("invalid branch predicate"); 1089 } 1090 } 1091 1092 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 1093 switch (Opcode) { 1094 case AMDGPU::S_CBRANCH_SCC0: 1095 return SCC_FALSE; 1096 case AMDGPU::S_CBRANCH_SCC1: 1097 return SCC_TRUE; 1098 case AMDGPU::S_CBRANCH_VCCNZ: 1099 return VCCNZ; 1100 case AMDGPU::S_CBRANCH_VCCZ: 1101 return VCCZ; 1102 case AMDGPU::S_CBRANCH_EXECNZ: 1103 return EXECNZ; 1104 case AMDGPU::S_CBRANCH_EXECZ: 1105 return EXECZ; 1106 default: 1107 return INVALID_BR; 1108 } 1109 } 1110 1111 bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1112 MachineBasicBlock *&TBB, 1113 MachineBasicBlock *&FBB, 1114 SmallVectorImpl<MachineOperand> &Cond, 1115 bool AllowModify) const { 1116 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1117 1118 if (I == MBB.end()) 1119 return false; 1120 1121 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1122 // Unconditional Branch 1123 TBB = I->getOperand(0).getMBB(); 1124 return false; 1125 } 1126 1127 BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 1128 if (Pred == INVALID_BR) 1129 return true; 1130 1131 MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); 1132 Cond.push_back(MachineOperand::CreateImm(Pred)); 1133 1134 ++I; 1135 1136 if (I == MBB.end()) { 1137 // Conditional branch followed by fall-through. 1138 TBB = CondBB; 1139 return false; 1140 } 1141 1142 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1143 TBB = CondBB; 1144 FBB = I->getOperand(0).getMBB(); 1145 return false; 1146 } 1147 1148 return true; 1149 } 1150 1151 unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1152 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1153 1154 unsigned Count = 0; 1155 while (I != MBB.end()) { 1156 MachineBasicBlock::iterator Next = std::next(I); 1157 I->eraseFromParent(); 1158 ++Count; 1159 I = Next; 1160 } 1161 1162 return Count; 1163 } 1164 1165 unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB, 1166 MachineBasicBlock *TBB, 1167 MachineBasicBlock *FBB, 1168 ArrayRef<MachineOperand> Cond, 1169 DebugLoc DL) const { 1170 1171 if (!FBB && Cond.empty()) { 1172 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1173 .addMBB(TBB); 1174 return 1; 1175 } 1176 1177 assert(TBB && Cond[0].isImm()); 1178 1179 unsigned Opcode 1180 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 1181 1182 if (!FBB) { 1183 BuildMI(&MBB, DL, get(Opcode)) 1184 .addMBB(TBB); 1185 return 1; 1186 } 1187 1188 assert(TBB && FBB); 1189 1190 BuildMI(&MBB, DL, get(Opcode)) 1191 .addMBB(TBB); 1192 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1193 .addMBB(FBB); 1194 1195 return 2; 1196 } 1197 1198 bool SIInstrInfo::ReverseBranchCondition( 1199 SmallVectorImpl<MachineOperand> &Cond) const { 1200 assert(Cond.size() == 1); 1201 Cond[0].setImm(-Cond[0].getImm()); 1202 return false; 1203 } 1204 1205 static void removeModOperands(MachineInstr &MI) { 1206 unsigned Opc = MI.getOpcode(); 1207 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1208 AMDGPU::OpName::src0_modifiers); 1209 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1210 AMDGPU::OpName::src1_modifiers); 1211 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1212 AMDGPU::OpName::src2_modifiers); 1213 1214 MI.RemoveOperand(Src2ModIdx); 1215 MI.RemoveOperand(Src1ModIdx); 1216 MI.RemoveOperand(Src0ModIdx); 1217 } 1218 1219 // TODO: Maybe this should be removed this and custom fold everything in 1220 // SIFoldOperands? 1221 bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, 1222 unsigned Reg, MachineRegisterInfo *MRI) const { 1223 if (!MRI->hasOneNonDBGUse(Reg)) 1224 return false; 1225 1226 unsigned Opc = UseMI->getOpcode(); 1227 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 1228 // Don't fold if we are using source modifiers. The new VOP2 instructions 1229 // don't have them. 1230 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || 1231 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || 1232 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { 1233 return false; 1234 } 1235 1236 const MachineOperand &ImmOp = DefMI->getOperand(1); 1237 1238 // If this is a free constant, there's no reason to do this. 1239 // TODO: We could fold this here instead of letting SIFoldOperands do it 1240 // later. 1241 if (isInlineConstant(ImmOp, 4)) 1242 return false; 1243 1244 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); 1245 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); 1246 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); 1247 1248 // Multiplied part is the constant: Use v_madmk_f32 1249 // We should only expect these to be on src0 due to canonicalizations. 1250 if (Src0->isReg() && Src0->getReg() == Reg) { 1251 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1252 return false; 1253 1254 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 1255 return false; 1256 1257 // We need to swap operands 0 and 1 since madmk constant is at operand 1. 1258 1259 const int64_t Imm = DefMI->getOperand(1).getImm(); 1260 1261 // FIXME: This would be a lot easier if we could return a new instruction 1262 // instead of having to modify in place. 1263 1264 // Remove these first since they are at the end. 1265 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1266 AMDGPU::OpName::omod)); 1267 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1268 AMDGPU::OpName::clamp)); 1269 1270 unsigned Src1Reg = Src1->getReg(); 1271 unsigned Src1SubReg = Src1->getSubReg(); 1272 Src0->setReg(Src1Reg); 1273 Src0->setSubReg(Src1SubReg); 1274 Src0->setIsKill(Src1->isKill()); 1275 1276 if (Opc == AMDGPU::V_MAC_F32_e64) { 1277 UseMI->untieRegOperand( 1278 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1279 } 1280 1281 Src1->ChangeToImmediate(Imm); 1282 1283 removeModOperands(*UseMI); 1284 UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); 1285 1286 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1287 if (DeleteDef) 1288 DefMI->eraseFromParent(); 1289 1290 return true; 1291 } 1292 1293 // Added part is the constant: Use v_madak_f32 1294 if (Src2->isReg() && Src2->getReg() == Reg) { 1295 // Not allowed to use constant bus for another operand. 1296 // We can however allow an inline immediate as src0. 1297 if (!Src0->isImm() && 1298 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1299 return false; 1300 1301 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1302 return false; 1303 1304 const int64_t Imm = DefMI->getOperand(1).getImm(); 1305 1306 // FIXME: This would be a lot easier if we could return a new instruction 1307 // instead of having to modify in place. 1308 1309 // Remove these first since they are at the end. 1310 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1311 AMDGPU::OpName::omod)); 1312 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1313 AMDGPU::OpName::clamp)); 1314 1315 if (Opc == AMDGPU::V_MAC_F32_e64) { 1316 UseMI->untieRegOperand( 1317 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1318 } 1319 1320 // ChangingToImmediate adds Src2 back to the instruction. 1321 Src2->ChangeToImmediate(Imm); 1322 1323 // These come before src2. 1324 removeModOperands(*UseMI); 1325 UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); 1326 1327 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1328 if (DeleteDef) 1329 DefMI->eraseFromParent(); 1330 1331 return true; 1332 } 1333 } 1334 1335 return false; 1336 } 1337 1338 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1339 int WidthB, int OffsetB) { 1340 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1341 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1342 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1343 return LowOffset + LowWidth <= HighOffset; 1344 } 1345 1346 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, 1347 MachineInstr *MIb) const { 1348 unsigned BaseReg0, BaseReg1; 1349 int64_t Offset0, Offset1; 1350 1351 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1352 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1353 1354 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) { 1355 // FIXME: Handle ds_read2 / ds_write2. 1356 return false; 1357 } 1358 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); 1359 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); 1360 if (BaseReg0 == BaseReg1 && 1361 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1362 return true; 1363 } 1364 } 1365 1366 return false; 1367 } 1368 1369 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 1370 MachineInstr *MIb, 1371 AliasAnalysis *AA) const { 1372 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 1373 "MIa must load from or modify a memory location"); 1374 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 1375 "MIb must load from or modify a memory location"); 1376 1377 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) 1378 return false; 1379 1380 // XXX - Can we relax this between address spaces? 1381 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 1382 return false; 1383 1384 // TODO: Should we check the address space from the MachineMemOperand? That 1385 // would allow us to distinguish objects we know don't alias based on the 1386 // underlying address space, even if it was lowered to a different one, 1387 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1388 // buffer. 1389 if (isDS(*MIa)) { 1390 if (isDS(*MIb)) 1391 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1392 1393 return !isFLAT(*MIb); 1394 } 1395 1396 if (isMUBUF(*MIa) || isMTBUF(*MIa)) { 1397 if (isMUBUF(*MIb) || isMTBUF(*MIb)) 1398 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1399 1400 return !isFLAT(*MIb) && !isSMRD(*MIb); 1401 } 1402 1403 if (isSMRD(*MIa)) { 1404 if (isSMRD(*MIb)) 1405 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1406 1407 return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa); 1408 } 1409 1410 if (isFLAT(*MIa)) { 1411 if (isFLAT(*MIb)) 1412 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1413 1414 return false; 1415 } 1416 1417 return false; 1418 } 1419 1420 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1421 MachineBasicBlock::iterator &MI, 1422 LiveVariables *LV) const { 1423 1424 switch (MI->getOpcode()) { 1425 default: return nullptr; 1426 case AMDGPU::V_MAC_F32_e64: break; 1427 case AMDGPU::V_MAC_F32_e32: { 1428 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1429 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1430 return nullptr; 1431 break; 1432 } 1433 } 1434 1435 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::vdst); 1436 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1437 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); 1438 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); 1439 1440 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1441 .addOperand(*Dst) 1442 .addImm(0) // Src0 mods 1443 .addOperand(*Src0) 1444 .addImm(0) // Src1 mods 1445 .addOperand(*Src1) 1446 .addImm(0) // Src mods 1447 .addOperand(*Src2) 1448 .addImm(0) // clamp 1449 .addImm(0); // omod 1450 } 1451 1452 bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1453 const MachineBasicBlock *MBB, 1454 const MachineFunction &MF) const { 1455 // Target-independent instructions do not have an implicit-use of EXEC, even 1456 // when they operate on VGPRs. Treating EXEC modifications as scheduling 1457 // boundaries prevents incorrect movements of such instructions. 1458 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1459 if (MI->modifiesRegister(AMDGPU::EXEC, TRI)) 1460 return true; 1461 1462 return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF); 1463 } 1464 1465 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1466 int64_t SVal = Imm.getSExtValue(); 1467 if (SVal >= -16 && SVal <= 64) 1468 return true; 1469 1470 if (Imm.getBitWidth() == 64) { 1471 uint64_t Val = Imm.getZExtValue(); 1472 return (DoubleToBits(0.0) == Val) || 1473 (DoubleToBits(1.0) == Val) || 1474 (DoubleToBits(-1.0) == Val) || 1475 (DoubleToBits(0.5) == Val) || 1476 (DoubleToBits(-0.5) == Val) || 1477 (DoubleToBits(2.0) == Val) || 1478 (DoubleToBits(-2.0) == Val) || 1479 (DoubleToBits(4.0) == Val) || 1480 (DoubleToBits(-4.0) == Val); 1481 } 1482 1483 // The actual type of the operand does not seem to matter as long 1484 // as the bits match one of the inline immediate values. For example: 1485 // 1486 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1487 // so it is a legal inline immediate. 1488 // 1489 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1490 // floating-point, so it is a legal inline immediate. 1491 uint32_t Val = Imm.getZExtValue(); 1492 1493 return (FloatToBits(0.0f) == Val) || 1494 (FloatToBits(1.0f) == Val) || 1495 (FloatToBits(-1.0f) == Val) || 1496 (FloatToBits(0.5f) == Val) || 1497 (FloatToBits(-0.5f) == Val) || 1498 (FloatToBits(2.0f) == Val) || 1499 (FloatToBits(-2.0f) == Val) || 1500 (FloatToBits(4.0f) == Val) || 1501 (FloatToBits(-4.0f) == Val); 1502 } 1503 1504 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1505 unsigned OpSize) const { 1506 if (MO.isImm()) { 1507 // MachineOperand provides no way to tell the true operand size, since it 1508 // only records a 64-bit value. We need to know the size to determine if a 1509 // 32-bit floating point immediate bit pattern is legal for an integer 1510 // immediate. It would be for any 32-bit integer operand, but would not be 1511 // for a 64-bit one. 1512 1513 unsigned BitSize = 8 * OpSize; 1514 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1515 } 1516 1517 return false; 1518 } 1519 1520 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1521 unsigned OpSize) const { 1522 return MO.isImm() && !isInlineConstant(MO, OpSize); 1523 } 1524 1525 static bool compareMachineOp(const MachineOperand &Op0, 1526 const MachineOperand &Op1) { 1527 if (Op0.getType() != Op1.getType()) 1528 return false; 1529 1530 switch (Op0.getType()) { 1531 case MachineOperand::MO_Register: 1532 return Op0.getReg() == Op1.getReg(); 1533 case MachineOperand::MO_Immediate: 1534 return Op0.getImm() == Op1.getImm(); 1535 default: 1536 llvm_unreachable("Didn't expect to be comparing these operand types"); 1537 } 1538 } 1539 1540 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1541 const MachineOperand &MO) const { 1542 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1543 1544 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1545 1546 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1547 return true; 1548 1549 if (OpInfo.RegClass < 0) 1550 return false; 1551 1552 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1553 if (isLiteralConstant(MO, OpSize)) 1554 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1555 1556 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1557 } 1558 1559 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1560 int Op32 = AMDGPU::getVOPe32(Opcode); 1561 if (Op32 == -1) 1562 return false; 1563 1564 return pseudoToMCOpcode(Op32) != -1; 1565 } 1566 1567 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1568 // The src0_modifier operand is present on all instructions 1569 // that have modifiers. 1570 1571 return AMDGPU::getNamedOperandIdx(Opcode, 1572 AMDGPU::OpName::src0_modifiers) != -1; 1573 } 1574 1575 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1576 unsigned OpName) const { 1577 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1578 return Mods && Mods->getImm(); 1579 } 1580 1581 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1582 const MachineOperand &MO, 1583 unsigned OpSize) const { 1584 // Literal constants use the constant bus. 1585 if (isLiteralConstant(MO, OpSize)) 1586 return true; 1587 1588 if (!MO.isReg() || !MO.isUse()) 1589 return false; 1590 1591 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1592 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1593 1594 // FLAT_SCR is just an SGPR pair. 1595 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1596 return true; 1597 1598 // EXEC register uses the constant bus. 1599 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1600 return true; 1601 1602 // SGPRs use the constant bus 1603 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 || 1604 (!MO.isImplicit() && 1605 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1606 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))); 1607 } 1608 1609 static unsigned findImplicitSGPRRead(const MachineInstr &MI) { 1610 for (const MachineOperand &MO : MI.implicit_operands()) { 1611 // We only care about reads. 1612 if (MO.isDef()) 1613 continue; 1614 1615 switch (MO.getReg()) { 1616 case AMDGPU::VCC: 1617 case AMDGPU::M0: 1618 case AMDGPU::FLAT_SCR: 1619 return MO.getReg(); 1620 1621 default: 1622 break; 1623 } 1624 } 1625 1626 return AMDGPU::NoRegister; 1627 } 1628 1629 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1630 StringRef &ErrInfo) const { 1631 uint16_t Opcode = MI->getOpcode(); 1632 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1633 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1634 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1635 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1636 1637 // Make sure the number of operands is correct. 1638 const MCInstrDesc &Desc = get(Opcode); 1639 if (!Desc.isVariadic() && 1640 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 1641 ErrInfo = "Instruction has wrong number of operands."; 1642 return false; 1643 } 1644 1645 // Make sure the register classes are correct. 1646 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1647 if (MI->getOperand(i).isFPImm()) { 1648 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1649 "all fp values to integers."; 1650 return false; 1651 } 1652 1653 int RegClass = Desc.OpInfo[i].RegClass; 1654 1655 switch (Desc.OpInfo[i].OperandType) { 1656 case MCOI::OPERAND_REGISTER: 1657 if (MI->getOperand(i).isImm()) { 1658 ErrInfo = "Illegal immediate value for operand."; 1659 return false; 1660 } 1661 break; 1662 case AMDGPU::OPERAND_REG_IMM32: 1663 break; 1664 case AMDGPU::OPERAND_REG_INLINE_C: 1665 if (isLiteralConstant(MI->getOperand(i), 1666 RI.getRegClass(RegClass)->getSize())) { 1667 ErrInfo = "Illegal immediate value for operand."; 1668 return false; 1669 } 1670 break; 1671 case MCOI::OPERAND_IMMEDIATE: 1672 // Check if this operand is an immediate. 1673 // FrameIndex operands will be replaced by immediates, so they are 1674 // allowed. 1675 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { 1676 ErrInfo = "Expected immediate, but got non-immediate"; 1677 return false; 1678 } 1679 // Fall-through 1680 default: 1681 continue; 1682 } 1683 1684 if (!MI->getOperand(i).isReg()) 1685 continue; 1686 1687 if (RegClass != -1) { 1688 unsigned Reg = MI->getOperand(i).getReg(); 1689 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1690 continue; 1691 1692 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1693 if (!RC->contains(Reg)) { 1694 ErrInfo = "Operand has incorrect register class."; 1695 return false; 1696 } 1697 } 1698 } 1699 1700 1701 // Verify VOP* 1702 if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) { 1703 // Only look at the true operands. Only a real operand can use the constant 1704 // bus, and we don't want to check pseudo-operands like the source modifier 1705 // flags. 1706 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1707 1708 unsigned ConstantBusCount = 0; 1709 unsigned SGPRUsed = findImplicitSGPRRead(*MI); 1710 if (SGPRUsed != AMDGPU::NoRegister) 1711 ++ConstantBusCount; 1712 1713 for (int OpIdx : OpIndices) { 1714 if (OpIdx == -1) 1715 break; 1716 const MachineOperand &MO = MI->getOperand(OpIdx); 1717 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1718 if (MO.isReg()) { 1719 if (MO.getReg() != SGPRUsed) 1720 ++ConstantBusCount; 1721 SGPRUsed = MO.getReg(); 1722 } else { 1723 ++ConstantBusCount; 1724 } 1725 } 1726 } 1727 if (ConstantBusCount > 1) { 1728 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1729 return false; 1730 } 1731 } 1732 1733 // Verify misc. restrictions on specific instructions. 1734 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1735 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1736 const MachineOperand &Src0 = MI->getOperand(Src0Idx); 1737 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1738 const MachineOperand &Src2 = MI->getOperand(Src2Idx); 1739 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1740 if (!compareMachineOp(Src0, Src1) && 1741 !compareMachineOp(Src0, Src2)) { 1742 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1743 return false; 1744 } 1745 } 1746 } 1747 1748 // Make sure we aren't losing exec uses in the td files. This mostly requires 1749 // being careful when using let Uses to try to add other use registers. 1750 if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) { 1751 if (!MI->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 1752 ErrInfo = "VALU instruction does not implicitly read exec mask"; 1753 return false; 1754 } 1755 } 1756 1757 return true; 1758 } 1759 1760 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1761 switch (MI.getOpcode()) { 1762 default: return AMDGPU::INSTRUCTION_LIST_END; 1763 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1764 case AMDGPU::COPY: return AMDGPU::COPY; 1765 case AMDGPU::PHI: return AMDGPU::PHI; 1766 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1767 case AMDGPU::S_MOV_B32: 1768 return MI.getOperand(1).isReg() ? 1769 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1770 case AMDGPU::S_ADD_I32: 1771 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1772 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1773 case AMDGPU::S_SUB_I32: 1774 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1775 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1776 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1777 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1778 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1779 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1780 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1781 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1782 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1783 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1784 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1785 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1786 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1787 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1788 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1789 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1790 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1791 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1792 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1793 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1794 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1795 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1796 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1797 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1798 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1799 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1800 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1801 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1802 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1803 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1804 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; 1805 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; 1806 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; 1807 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; 1808 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; 1809 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; 1810 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1811 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1812 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1813 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1814 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 1815 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 1816 } 1817 } 1818 1819 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1820 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1821 } 1822 1823 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1824 unsigned OpNo) const { 1825 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1826 const MCInstrDesc &Desc = get(MI.getOpcode()); 1827 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1828 Desc.OpInfo[OpNo].RegClass == -1) { 1829 unsigned Reg = MI.getOperand(OpNo).getReg(); 1830 1831 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1832 return MRI.getRegClass(Reg); 1833 return RI.getPhysRegClass(Reg); 1834 } 1835 1836 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1837 return RI.getRegClass(RCID); 1838 } 1839 1840 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1841 switch (MI.getOpcode()) { 1842 case AMDGPU::COPY: 1843 case AMDGPU::REG_SEQUENCE: 1844 case AMDGPU::PHI: 1845 case AMDGPU::INSERT_SUBREG: 1846 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1847 default: 1848 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1849 } 1850 } 1851 1852 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1853 MachineBasicBlock::iterator I = MI; 1854 MachineBasicBlock *MBB = MI->getParent(); 1855 MachineOperand &MO = MI->getOperand(OpIdx); 1856 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1857 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1858 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1859 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1860 if (MO.isReg()) 1861 Opcode = AMDGPU::COPY; 1862 else if (RI.isSGPRClass(RC)) 1863 Opcode = AMDGPU::S_MOV_B32; 1864 1865 1866 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1867 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1868 VRC = &AMDGPU::VReg_64RegClass; 1869 else 1870 VRC = &AMDGPU::VGPR_32RegClass; 1871 1872 unsigned Reg = MRI.createVirtualRegister(VRC); 1873 DebugLoc DL = MBB->findDebugLoc(I); 1874 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) 1875 .addOperand(MO); 1876 MO.ChangeToRegister(Reg, false); 1877 } 1878 1879 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1880 MachineRegisterInfo &MRI, 1881 MachineOperand &SuperReg, 1882 const TargetRegisterClass *SuperRC, 1883 unsigned SubIdx, 1884 const TargetRegisterClass *SubRC) 1885 const { 1886 MachineBasicBlock *MBB = MI->getParent(); 1887 DebugLoc DL = MI->getDebugLoc(); 1888 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1889 1890 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 1891 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1892 .addReg(SuperReg.getReg(), 0, SubIdx); 1893 return SubReg; 1894 } 1895 1896 // Just in case the super register is itself a sub-register, copy it to a new 1897 // value so we don't need to worry about merging its subreg index with the 1898 // SubIdx passed to this function. The register coalescer should be able to 1899 // eliminate this extra copy. 1900 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1901 1902 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1903 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1904 1905 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1906 .addReg(NewSuperReg, 0, SubIdx); 1907 1908 return SubReg; 1909 } 1910 1911 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1912 MachineBasicBlock::iterator MII, 1913 MachineRegisterInfo &MRI, 1914 MachineOperand &Op, 1915 const TargetRegisterClass *SuperRC, 1916 unsigned SubIdx, 1917 const TargetRegisterClass *SubRC) const { 1918 if (Op.isImm()) { 1919 // XXX - Is there a better way to do this? 1920 if (SubIdx == AMDGPU::sub0) 1921 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1922 if (SubIdx == AMDGPU::sub1) 1923 return MachineOperand::CreateImm(Op.getImm() >> 32); 1924 1925 llvm_unreachable("Unhandled register index for immediate"); 1926 } 1927 1928 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1929 SubIdx, SubRC); 1930 return MachineOperand::CreateReg(SubReg, false); 1931 } 1932 1933 // Change the order of operands from (0, 1, 2) to (0, 2, 1) 1934 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { 1935 assert(Inst->getNumExplicitOperands() == 3); 1936 MachineOperand Op1 = Inst->getOperand(1); 1937 Inst->RemoveOperand(1); 1938 Inst->addOperand(Op1); 1939 } 1940 1941 bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 1942 const MCOperandInfo &OpInfo, 1943 const MachineOperand &MO) const { 1944 if (!MO.isReg()) 1945 return false; 1946 1947 unsigned Reg = MO.getReg(); 1948 const TargetRegisterClass *RC = 1949 TargetRegisterInfo::isVirtualRegister(Reg) ? 1950 MRI.getRegClass(Reg) : 1951 RI.getPhysRegClass(Reg); 1952 1953 const SIRegisterInfo *TRI = 1954 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 1955 RC = TRI->getSubRegClass(RC, MO.getSubReg()); 1956 1957 // In order to be legal, the common sub-class must be equal to the 1958 // class of the current operand. For example: 1959 // 1960 // v_mov_b32 s0 ; Operand defined as vsrc_32 1961 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1962 // 1963 // s_sendmsg 0, s0 ; Operand defined as m0reg 1964 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1965 1966 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1967 } 1968 1969 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1970 const MCOperandInfo &OpInfo, 1971 const MachineOperand &MO) const { 1972 if (MO.isReg()) 1973 return isLegalRegOperand(MRI, OpInfo, MO); 1974 1975 // Handle non-register types that are treated like immediates. 1976 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1977 return true; 1978 } 1979 1980 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1981 const MachineOperand *MO) const { 1982 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1983 const MCInstrDesc &InstDesc = MI->getDesc(); 1984 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1985 const TargetRegisterClass *DefinedRC = 1986 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1987 if (!MO) 1988 MO = &MI->getOperand(OpIdx); 1989 1990 if (isVALU(*MI) && 1991 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 1992 1993 RegSubRegPair SGPRUsed; 1994 if (MO->isReg()) 1995 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); 1996 1997 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1998 if (i == OpIdx) 1999 continue; 2000 const MachineOperand &Op = MI->getOperand(i); 2001 if (Op.isReg() && 2002 (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && 2003 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { 2004 return false; 2005 } 2006 } 2007 } 2008 2009 if (MO->isReg()) { 2010 assert(DefinedRC); 2011 return isLegalRegOperand(MRI, OpInfo, *MO); 2012 } 2013 2014 2015 // Handle non-register types that are treated like immediates. 2016 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 2017 2018 if (!DefinedRC) { 2019 // This operand expects an immediate. 2020 return true; 2021 } 2022 2023 return isImmOperandLegal(MI, OpIdx, *MO); 2024 } 2025 2026 void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 2027 MachineInstr *MI) const { 2028 unsigned Opc = MI->getOpcode(); 2029 const MCInstrDesc &InstrDesc = get(Opc); 2030 2031 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 2032 MachineOperand &Src1 = MI->getOperand(Src1Idx); 2033 2034 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 2035 // we need to only have one constant bus use. 2036 // 2037 // Note we do not need to worry about literal constants here. They are 2038 // disabled for the operand type for instructions because they will always 2039 // violate the one constant bus use rule. 2040 bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister; 2041 if (HasImplicitSGPR) { 2042 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2043 MachineOperand &Src0 = MI->getOperand(Src0Idx); 2044 2045 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) 2046 legalizeOpWithMove(MI, Src0Idx); 2047 } 2048 2049 // VOP2 src0 instructions support all operand types, so we don't need to check 2050 // their legality. If src1 is already legal, we don't need to do anything. 2051 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) 2052 return; 2053 2054 // We do not use commuteInstruction here because it is too aggressive and will 2055 // commute if it is possible. We only want to commute here if it improves 2056 // legality. This can be called a fairly large number of times so don't waste 2057 // compile time pointlessly swapping and checking legality again. 2058 if (HasImplicitSGPR || !MI->isCommutable()) { 2059 legalizeOpWithMove(MI, Src1Idx); 2060 return; 2061 } 2062 2063 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2064 MachineOperand &Src0 = MI->getOperand(Src0Idx); 2065 2066 // If src0 can be used as src1, commuting will make the operands legal. 2067 // Otherwise we have to give up and insert a move. 2068 // 2069 // TODO: Other immediate-like operand kinds could be commuted if there was a 2070 // MachineOperand::ChangeTo* for them. 2071 if ((!Src1.isImm() && !Src1.isReg()) || 2072 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { 2073 legalizeOpWithMove(MI, Src1Idx); 2074 return; 2075 } 2076 2077 int CommutedOpc = commuteOpcode(*MI); 2078 if (CommutedOpc == -1) { 2079 legalizeOpWithMove(MI, Src1Idx); 2080 return; 2081 } 2082 2083 MI->setDesc(get(CommutedOpc)); 2084 2085 unsigned Src0Reg = Src0.getReg(); 2086 unsigned Src0SubReg = Src0.getSubReg(); 2087 bool Src0Kill = Src0.isKill(); 2088 2089 if (Src1.isImm()) 2090 Src0.ChangeToImmediate(Src1.getImm()); 2091 else if (Src1.isReg()) { 2092 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 2093 Src0.setSubReg(Src1.getSubReg()); 2094 } else 2095 llvm_unreachable("Should only have register or immediate operands"); 2096 2097 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 2098 Src1.setSubReg(Src0SubReg); 2099 } 2100 2101 // Legalize VOP3 operands. Because all operand types are supported for any 2102 // operand, and since literal constants are not allowed and should never be 2103 // seen, we only need to worry about inserting copies if we use multiple SGPR 2104 // operands. 2105 void SIInstrInfo::legalizeOperandsVOP3( 2106 MachineRegisterInfo &MRI, 2107 MachineInstr *MI) const { 2108 unsigned Opc = MI->getOpcode(); 2109 2110 int VOP3Idx[3] = { 2111 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 2112 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 2113 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 2114 }; 2115 2116 // Find the one SGPR operand we are allowed to use. 2117 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 2118 2119 for (unsigned i = 0; i < 3; ++i) { 2120 int Idx = VOP3Idx[i]; 2121 if (Idx == -1) 2122 break; 2123 MachineOperand &MO = MI->getOperand(Idx); 2124 2125 // We should never see a VOP3 instruction with an illegal immediate operand. 2126 if (!MO.isReg()) 2127 continue; 2128 2129 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2130 continue; // VGPRs are legal 2131 2132 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 2133 SGPRReg = MO.getReg(); 2134 // We can use one SGPR in each VOP3 instruction. 2135 continue; 2136 } 2137 2138 // If we make it this far, then the operand is not legal and we must 2139 // legalize it. 2140 legalizeOpWithMove(MI, Idx); 2141 } 2142 } 2143 2144 unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, 2145 MachineRegisterInfo &MRI) const { 2146 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 2147 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 2148 unsigned DstReg = MRI.createVirtualRegister(SRC); 2149 unsigned SubRegs = VRC->getSize() / 4; 2150 2151 SmallVector<unsigned, 8> SRegs; 2152 for (unsigned i = 0; i < SubRegs; ++i) { 2153 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2154 BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), 2155 get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 2156 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 2157 SRegs.push_back(SGPR); 2158 } 2159 2160 MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI, 2161 UseMI->getDebugLoc(), 2162 get(AMDGPU::REG_SEQUENCE), DstReg); 2163 for (unsigned i = 0; i < SubRegs; ++i) { 2164 MIB.addReg(SRegs[i]); 2165 MIB.addImm(RI.getSubRegFromChannel(i)); 2166 } 2167 return DstReg; 2168 } 2169 2170 void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 2171 MachineInstr *MI) const { 2172 2173 // If the pointer is store in VGPRs, then we need to move them to 2174 // SGPRs using v_readfirstlane. This is safe because we only select 2175 // loads with uniform pointers to SMRD instruction so we know the 2176 // pointer value is uniform. 2177 MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 2178 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 2179 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 2180 SBase->setReg(SGPR); 2181 } 2182 } 2183 2184 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 2185 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2186 2187 // Legalize VOP2 2188 if (isVOP2(*MI) || isVOPC(*MI)) { 2189 legalizeOperandsVOP2(MRI, MI); 2190 return; 2191 } 2192 2193 // Legalize VOP3 2194 if (isVOP3(*MI)) { 2195 legalizeOperandsVOP3(MRI, MI); 2196 return; 2197 } 2198 2199 // Legalize SMRD 2200 if (isSMRD(*MI)) { 2201 legalizeOperandsSMRD(MRI, MI); 2202 return; 2203 } 2204 2205 // Legalize REG_SEQUENCE and PHI 2206 // The register class of the operands much be the same type as the register 2207 // class of the output. 2208 if (MI->getOpcode() == AMDGPU::PHI) { 2209 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 2210 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 2211 if (!MI->getOperand(i).isReg() || 2212 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 2213 continue; 2214 const TargetRegisterClass *OpRC = 2215 MRI.getRegClass(MI->getOperand(i).getReg()); 2216 if (RI.hasVGPRs(OpRC)) { 2217 VRC = OpRC; 2218 } else { 2219 SRC = OpRC; 2220 } 2221 } 2222 2223 // If any of the operands are VGPR registers, then they all most be 2224 // otherwise we will create illegal VGPR->SGPR copies when legalizing 2225 // them. 2226 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 2227 if (!VRC) { 2228 assert(SRC); 2229 VRC = RI.getEquivalentVGPRClass(SRC); 2230 } 2231 RC = VRC; 2232 } else { 2233 RC = SRC; 2234 } 2235 2236 // Update all the operands so they have the same type. 2237 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 2238 MachineOperand &Op = MI->getOperand(I); 2239 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2240 continue; 2241 unsigned DstReg = MRI.createVirtualRegister(RC); 2242 2243 // MI is a PHI instruction. 2244 MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB(); 2245 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 2246 2247 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) 2248 .addOperand(Op); 2249 Op.setReg(DstReg); 2250 } 2251 } 2252 2253 // REG_SEQUENCE doesn't really require operand legalization, but if one has a 2254 // VGPR dest type and SGPR sources, insert copies so all operands are 2255 // VGPRs. This seems to help operand folding / the register coalescer. 2256 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 2257 MachineBasicBlock *MBB = MI->getParent(); 2258 const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0); 2259 if (RI.hasVGPRs(DstRC)) { 2260 // Update all the operands so they are VGPR register classes. These may 2261 // not be the same register class because REG_SEQUENCE supports mixing 2262 // subregister index types e.g. sub0_sub1 + sub2 + sub3 2263 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 2264 MachineOperand &Op = MI->getOperand(I); 2265 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2266 continue; 2267 2268 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 2269 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 2270 if (VRC == OpRC) 2271 continue; 2272 2273 unsigned DstReg = MRI.createVirtualRegister(VRC); 2274 2275 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) 2276 .addOperand(Op); 2277 2278 Op.setReg(DstReg); 2279 Op.setIsKill(); 2280 } 2281 } 2282 2283 return; 2284 } 2285 2286 // Legalize INSERT_SUBREG 2287 // src0 must have the same register class as dst 2288 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 2289 unsigned Dst = MI->getOperand(0).getReg(); 2290 unsigned Src0 = MI->getOperand(1).getReg(); 2291 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 2292 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 2293 if (DstRC != Src0RC) { 2294 MachineBasicBlock &MBB = *MI->getParent(); 2295 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 2296 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 2297 .addReg(Src0); 2298 MI->getOperand(1).setReg(NewSrc0); 2299 } 2300 return; 2301 } 2302 2303 // Legalize MIMG 2304 if (isMIMG(*MI)) { 2305 MachineOperand *SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 2306 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { 2307 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); 2308 SRsrc->setReg(SGPR); 2309 } 2310 2311 MachineOperand *SSamp = getNamedOperand(*MI, AMDGPU::OpName::ssamp); 2312 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { 2313 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); 2314 SSamp->setReg(SGPR); 2315 } 2316 return; 2317 } 2318 2319 // Legalize MUBUF* instructions 2320 // FIXME: If we start using the non-addr64 instructions for compute, we 2321 // may need to legalize them here. 2322 int SRsrcIdx = 2323 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 2324 if (SRsrcIdx != -1) { 2325 // We have an MUBUF instruction 2326 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); 2327 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; 2328 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 2329 RI.getRegClass(SRsrcRC))) { 2330 // The operands are legal. 2331 // FIXME: We may need to legalize operands besided srsrc. 2332 return; 2333 } 2334 2335 MachineBasicBlock &MBB = *MI->getParent(); 2336 2337 // Extract the ptr from the resource descriptor. 2338 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, 2339 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 2340 2341 // Create an empty resource descriptor 2342 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 2343 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2344 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2345 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2346 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2347 2348 // Zero64 = 0 2349 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 2350 Zero64) 2351 .addImm(0); 2352 2353 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 2354 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2355 SRsrcFormatLo) 2356 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2357 2358 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 2359 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2360 SRsrcFormatHi) 2361 .addImm(RsrcDataFormat >> 32); 2362 2363 // NewSRsrc = {Zero64, SRsrcFormat} 2364 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) 2365 .addReg(Zero64) 2366 .addImm(AMDGPU::sub0_sub1) 2367 .addReg(SRsrcFormatLo) 2368 .addImm(AMDGPU::sub2) 2369 .addReg(SRsrcFormatHi) 2370 .addImm(AMDGPU::sub3); 2371 2372 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 2373 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2374 if (VAddr) { 2375 // This is already an ADDR64 instruction so we need to add the pointer 2376 // extracted from the resource descriptor to the current value of VAddr. 2377 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2378 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2379 2380 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 2381 DebugLoc DL = MI->getDebugLoc(); 2382 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) 2383 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2384 .addReg(VAddr->getReg(), 0, AMDGPU::sub0); 2385 2386 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 2387 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) 2388 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2389 .addReg(VAddr->getReg(), 0, AMDGPU::sub1); 2390 2391 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2392 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2393 .addReg(NewVAddrLo) 2394 .addImm(AMDGPU::sub0) 2395 .addReg(NewVAddrHi) 2396 .addImm(AMDGPU::sub1); 2397 } else { 2398 // This instructions is the _OFFSET variant, so we need to convert it to 2399 // ADDR64. 2400 assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() 2401 < AMDGPUSubtarget::VOLCANIC_ISLANDS && 2402 "FIXME: Need to emit flat atomics here"); 2403 2404 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 2405 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 2406 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 2407 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 2408 2409 // Atomics rith return have have an additional tied operand and are 2410 // missing some of the special bits. 2411 MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in); 2412 MachineInstr *Addr64; 2413 2414 if (!VDataIn) { 2415 // Regular buffer load / store. 2416 MachineInstrBuilder MIB 2417 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 2418 .addOperand(*VData) 2419 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2420 // This will be replaced later 2421 // with the new value of vaddr. 2422 .addOperand(*SRsrc) 2423 .addOperand(*SOffset) 2424 .addOperand(*Offset); 2425 2426 // Atomics do not have this operand. 2427 if (const MachineOperand *GLC 2428 = getNamedOperand(*MI, AMDGPU::OpName::glc)) { 2429 MIB.addImm(GLC->getImm()); 2430 } 2431 2432 MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)); 2433 2434 if (const MachineOperand *TFE 2435 = getNamedOperand(*MI, AMDGPU::OpName::tfe)) { 2436 MIB.addImm(TFE->getImm()); 2437 } 2438 2439 MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 2440 Addr64 = MIB; 2441 } else { 2442 // Atomics with return. 2443 Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 2444 .addOperand(*VData) 2445 .addOperand(*VDataIn) 2446 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2447 // This will be replaced later 2448 // with the new value of vaddr. 2449 .addOperand(*SRsrc) 2450 .addOperand(*SOffset) 2451 .addOperand(*Offset) 2452 .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)) 2453 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 2454 } 2455 2456 MI->removeFromParent(); 2457 MI = Addr64; 2458 2459 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2460 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2461 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2462 .addImm(AMDGPU::sub0) 2463 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2464 .addImm(AMDGPU::sub1); 2465 2466 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 2467 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 2468 } 2469 2470 // Update the instruction to use NewVaddr 2471 VAddr->setReg(NewVAddr); 2472 // Update the instruction to use NewSRsrc 2473 SRsrc->setReg(NewSRsrc); 2474 } 2475 } 2476 2477 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2478 SmallVector<MachineInstr *, 128> Worklist; 2479 Worklist.push_back(&TopInst); 2480 2481 while (!Worklist.empty()) { 2482 MachineInstr *Inst = Worklist.pop_back_val(); 2483 MachineBasicBlock *MBB = Inst->getParent(); 2484 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2485 2486 unsigned Opcode = Inst->getOpcode(); 2487 unsigned NewOpcode = getVALUOp(*Inst); 2488 2489 // Handle some special cases 2490 switch (Opcode) { 2491 default: 2492 break; 2493 case AMDGPU::S_AND_B64: 2494 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); 2495 Inst->eraseFromParent(); 2496 continue; 2497 2498 case AMDGPU::S_OR_B64: 2499 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); 2500 Inst->eraseFromParent(); 2501 continue; 2502 2503 case AMDGPU::S_XOR_B64: 2504 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); 2505 Inst->eraseFromParent(); 2506 continue; 2507 2508 case AMDGPU::S_NOT_B64: 2509 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); 2510 Inst->eraseFromParent(); 2511 continue; 2512 2513 case AMDGPU::S_BCNT1_I32_B64: 2514 splitScalar64BitBCNT(Worklist, Inst); 2515 Inst->eraseFromParent(); 2516 continue; 2517 2518 case AMDGPU::S_BFE_I64: { 2519 splitScalar64BitBFE(Worklist, Inst); 2520 Inst->eraseFromParent(); 2521 continue; 2522 } 2523 2524 case AMDGPU::S_LSHL_B32: 2525 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2526 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2527 swapOperands(Inst); 2528 } 2529 break; 2530 case AMDGPU::S_ASHR_I32: 2531 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2532 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2533 swapOperands(Inst); 2534 } 2535 break; 2536 case AMDGPU::S_LSHR_B32: 2537 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2538 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2539 swapOperands(Inst); 2540 } 2541 break; 2542 case AMDGPU::S_LSHL_B64: 2543 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2544 NewOpcode = AMDGPU::V_LSHLREV_B64; 2545 swapOperands(Inst); 2546 } 2547 break; 2548 case AMDGPU::S_ASHR_I64: 2549 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2550 NewOpcode = AMDGPU::V_ASHRREV_I64; 2551 swapOperands(Inst); 2552 } 2553 break; 2554 case AMDGPU::S_LSHR_B64: 2555 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2556 NewOpcode = AMDGPU::V_LSHRREV_B64; 2557 swapOperands(Inst); 2558 } 2559 break; 2560 2561 case AMDGPU::S_ABS_I32: 2562 lowerScalarAbs(Worklist, Inst); 2563 Inst->eraseFromParent(); 2564 continue; 2565 2566 case AMDGPU::S_CBRANCH_SCC0: 2567 case AMDGPU::S_CBRANCH_SCC1: 2568 // Clear unused bits of vcc 2569 BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) 2570 .addReg(AMDGPU::EXEC) 2571 .addReg(AMDGPU::VCC); 2572 break; 2573 2574 case AMDGPU::S_BFE_U64: 2575 case AMDGPU::S_BFM_B64: 2576 llvm_unreachable("Moving this op to VALU not implemented"); 2577 } 2578 2579 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2580 // We cannot move this instruction to the VALU, so we should try to 2581 // legalize its operands instead. 2582 legalizeOperands(Inst); 2583 continue; 2584 } 2585 2586 // Use the new VALU Opcode. 2587 const MCInstrDesc &NewDesc = get(NewOpcode); 2588 Inst->setDesc(NewDesc); 2589 2590 // Remove any references to SCC. Vector instructions can't read from it, and 2591 // We're just about to add the implicit use / defs of VCC, and we don't want 2592 // both. 2593 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 2594 MachineOperand &Op = Inst->getOperand(i); 2595 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { 2596 Inst->RemoveOperand(i); 2597 addSCCDefUsersToVALUWorklist(Inst, Worklist); 2598 } 2599 } 2600 2601 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2602 // We are converting these to a BFE, so we need to add the missing 2603 // operands for the size and offset. 2604 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2605 Inst->addOperand(MachineOperand::CreateImm(0)); 2606 Inst->addOperand(MachineOperand::CreateImm(Size)); 2607 2608 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2609 // The VALU version adds the second operand to the result, so insert an 2610 // extra 0 operand. 2611 Inst->addOperand(MachineOperand::CreateImm(0)); 2612 } 2613 2614 Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent()); 2615 2616 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2617 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 2618 // If we need to move this to VGPRs, we need to unpack the second operand 2619 // back into the 2 separate ones for bit offset and width. 2620 assert(OffsetWidthOp.isImm() && 2621 "Scalar BFE is only implemented for constant width and offset"); 2622 uint32_t Imm = OffsetWidthOp.getImm(); 2623 2624 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2625 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2626 Inst->RemoveOperand(2); // Remove old immediate. 2627 Inst->addOperand(MachineOperand::CreateImm(Offset)); 2628 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 2629 } 2630 2631 bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef(); 2632 unsigned NewDstReg = AMDGPU::NoRegister; 2633 if (HasDst) { 2634 // Update the destination register class. 2635 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); 2636 if (!NewDstRC) 2637 continue; 2638 2639 unsigned DstReg = Inst->getOperand(0).getReg(); 2640 NewDstReg = MRI.createVirtualRegister(NewDstRC); 2641 MRI.replaceRegWith(DstReg, NewDstReg); 2642 } 2643 2644 // Legalize the operands 2645 legalizeOperands(Inst); 2646 2647 if (HasDst) 2648 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 2649 } 2650 } 2651 2652 //===----------------------------------------------------------------------===// 2653 // Indirect addressing callbacks 2654 //===----------------------------------------------------------------------===// 2655 2656 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2657 return &AMDGPU::VGPR_32RegClass; 2658 } 2659 2660 void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, 2661 MachineInstr *Inst) const { 2662 MachineBasicBlock &MBB = *Inst->getParent(); 2663 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2664 MachineBasicBlock::iterator MII = Inst; 2665 DebugLoc DL = Inst->getDebugLoc(); 2666 2667 MachineOperand &Dest = Inst->getOperand(0); 2668 MachineOperand &Src = Inst->getOperand(1); 2669 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2670 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2671 2672 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) 2673 .addImm(0) 2674 .addReg(Src.getReg()); 2675 2676 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 2677 .addReg(Src.getReg()) 2678 .addReg(TmpReg); 2679 2680 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2681 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2682 } 2683 2684 void SIInstrInfo::splitScalar64BitUnaryOp( 2685 SmallVectorImpl<MachineInstr *> &Worklist, 2686 MachineInstr *Inst, 2687 unsigned Opcode) const { 2688 MachineBasicBlock &MBB = *Inst->getParent(); 2689 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2690 2691 MachineOperand &Dest = Inst->getOperand(0); 2692 MachineOperand &Src0 = Inst->getOperand(1); 2693 DebugLoc DL = Inst->getDebugLoc(); 2694 2695 MachineBasicBlock::iterator MII = Inst; 2696 2697 const MCInstrDesc &InstDesc = get(Opcode); 2698 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2699 MRI.getRegClass(Src0.getReg()) : 2700 &AMDGPU::SGPR_32RegClass; 2701 2702 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2703 2704 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2705 AMDGPU::sub0, Src0SubRC); 2706 2707 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2708 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2709 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2710 2711 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2712 BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2713 .addOperand(SrcReg0Sub0); 2714 2715 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2716 AMDGPU::sub1, Src0SubRC); 2717 2718 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2719 BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2720 .addOperand(SrcReg0Sub1); 2721 2722 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2723 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2724 .addReg(DestSub0) 2725 .addImm(AMDGPU::sub0) 2726 .addReg(DestSub1) 2727 .addImm(AMDGPU::sub1); 2728 2729 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2730 2731 // We don't need to legalizeOperands here because for a single operand, src0 2732 // will support any kind of input. 2733 2734 // Move all users of this moved value. 2735 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2736 } 2737 2738 void SIInstrInfo::splitScalar64BitBinaryOp( 2739 SmallVectorImpl<MachineInstr *> &Worklist, 2740 MachineInstr *Inst, 2741 unsigned Opcode) const { 2742 MachineBasicBlock &MBB = *Inst->getParent(); 2743 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2744 2745 MachineOperand &Dest = Inst->getOperand(0); 2746 MachineOperand &Src0 = Inst->getOperand(1); 2747 MachineOperand &Src1 = Inst->getOperand(2); 2748 DebugLoc DL = Inst->getDebugLoc(); 2749 2750 MachineBasicBlock::iterator MII = Inst; 2751 2752 const MCInstrDesc &InstDesc = get(Opcode); 2753 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2754 MRI.getRegClass(Src0.getReg()) : 2755 &AMDGPU::SGPR_32RegClass; 2756 2757 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2758 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2759 MRI.getRegClass(Src1.getReg()) : 2760 &AMDGPU::SGPR_32RegClass; 2761 2762 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2763 2764 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2765 AMDGPU::sub0, Src0SubRC); 2766 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2767 AMDGPU::sub0, Src1SubRC); 2768 2769 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2770 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2771 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2772 2773 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2774 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2775 .addOperand(SrcReg0Sub0) 2776 .addOperand(SrcReg1Sub0); 2777 2778 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2779 AMDGPU::sub1, Src0SubRC); 2780 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2781 AMDGPU::sub1, Src1SubRC); 2782 2783 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2784 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2785 .addOperand(SrcReg0Sub1) 2786 .addOperand(SrcReg1Sub1); 2787 2788 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2789 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2790 .addReg(DestSub0) 2791 .addImm(AMDGPU::sub0) 2792 .addReg(DestSub1) 2793 .addImm(AMDGPU::sub1); 2794 2795 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2796 2797 // Try to legalize the operands in case we need to swap the order to keep it 2798 // valid. 2799 legalizeOperands(LoHalf); 2800 legalizeOperands(HiHalf); 2801 2802 // Move all users of this moved vlaue. 2803 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2804 } 2805 2806 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 2807 MachineInstr *Inst) const { 2808 MachineBasicBlock &MBB = *Inst->getParent(); 2809 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2810 2811 MachineBasicBlock::iterator MII = Inst; 2812 DebugLoc DL = Inst->getDebugLoc(); 2813 2814 MachineOperand &Dest = Inst->getOperand(0); 2815 MachineOperand &Src = Inst->getOperand(1); 2816 2817 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2818 const TargetRegisterClass *SrcRC = Src.isReg() ? 2819 MRI.getRegClass(Src.getReg()) : 2820 &AMDGPU::SGPR_32RegClass; 2821 2822 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2823 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2824 2825 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2826 2827 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2828 AMDGPU::sub0, SrcSubRC); 2829 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2830 AMDGPU::sub1, SrcSubRC); 2831 2832 BuildMI(MBB, MII, DL, InstDesc, MidReg) 2833 .addOperand(SrcRegSub0) 2834 .addImm(0); 2835 2836 BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2837 .addOperand(SrcRegSub1) 2838 .addReg(MidReg); 2839 2840 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2841 2842 // We don't need to legalize operands here. src0 for etiher instruction can be 2843 // an SGPR, and the second input is unused or determined here. 2844 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2845 } 2846 2847 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2848 MachineInstr *Inst) const { 2849 MachineBasicBlock &MBB = *Inst->getParent(); 2850 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2851 MachineBasicBlock::iterator MII = Inst; 2852 DebugLoc DL = Inst->getDebugLoc(); 2853 2854 MachineOperand &Dest = Inst->getOperand(0); 2855 uint32_t Imm = Inst->getOperand(2).getImm(); 2856 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2857 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2858 2859 (void) Offset; 2860 2861 // Only sext_inreg cases handled. 2862 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && 2863 BitWidth <= 32 && 2864 Offset == 0 && 2865 "Not implemented"); 2866 2867 if (BitWidth < 32) { 2868 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2869 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2870 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2871 2872 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2873 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) 2874 .addImm(0) 2875 .addImm(BitWidth); 2876 2877 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2878 .addImm(31) 2879 .addReg(MidRegLo); 2880 2881 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2882 .addReg(MidRegLo) 2883 .addImm(AMDGPU::sub0) 2884 .addReg(MidRegHi) 2885 .addImm(AMDGPU::sub1); 2886 2887 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2888 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2889 return; 2890 } 2891 2892 MachineOperand &Src = Inst->getOperand(1); 2893 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2894 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2895 2896 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2897 .addImm(31) 2898 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2899 2900 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2901 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2902 .addImm(AMDGPU::sub0) 2903 .addReg(TmpReg) 2904 .addImm(AMDGPU::sub1); 2905 2906 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2907 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2908 } 2909 2910 void SIInstrInfo::addUsersToMoveToVALUWorklist( 2911 unsigned DstReg, 2912 MachineRegisterInfo &MRI, 2913 SmallVectorImpl<MachineInstr *> &Worklist) const { 2914 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 2915 E = MRI.use_end(); I != E; ++I) { 2916 MachineInstr &UseMI = *I->getParent(); 2917 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2918 Worklist.push_back(&UseMI); 2919 } 2920 } 2921 } 2922 2923 void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst, 2924 SmallVectorImpl<MachineInstr *> &Worklist) const { 2925 // This assumes that all the users of SCC are in the same block 2926 // as the SCC def. 2927 for (MachineBasicBlock::iterator I = SCCDefInst, 2928 E = SCCDefInst->getParent()->end(); I != E; ++I) { 2929 2930 // Exit if we find another SCC def. 2931 if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1) 2932 return; 2933 2934 if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1) 2935 Worklist.push_back(I); 2936 } 2937 } 2938 2939 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 2940 const MachineInstr &Inst) const { 2941 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 2942 2943 switch (Inst.getOpcode()) { 2944 // For target instructions, getOpRegClass just returns the virtual register 2945 // class associated with the operand, so we need to find an equivalent VGPR 2946 // register class in order to move the instruction to the VALU. 2947 case AMDGPU::COPY: 2948 case AMDGPU::PHI: 2949 case AMDGPU::REG_SEQUENCE: 2950 case AMDGPU::INSERT_SUBREG: 2951 if (RI.hasVGPRs(NewDstRC)) 2952 return nullptr; 2953 2954 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2955 if (!NewDstRC) 2956 return nullptr; 2957 return NewDstRC; 2958 default: 2959 return NewDstRC; 2960 } 2961 } 2962 2963 // Find the one SGPR operand we are allowed to use. 2964 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, 2965 int OpIndices[3]) const { 2966 const MCInstrDesc &Desc = MI->getDesc(); 2967 2968 // Find the one SGPR operand we are allowed to use. 2969 // 2970 // First we need to consider the instruction's operand requirements before 2971 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2972 // of VCC, but we are still bound by the constant bus requirement to only use 2973 // one. 2974 // 2975 // If the operand's class is an SGPR, we can never move it. 2976 2977 unsigned SGPRReg = findImplicitSGPRRead(*MI); 2978 if (SGPRReg != AMDGPU::NoRegister) 2979 return SGPRReg; 2980 2981 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2982 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2983 2984 for (unsigned i = 0; i < 3; ++i) { 2985 int Idx = OpIndices[i]; 2986 if (Idx == -1) 2987 break; 2988 2989 const MachineOperand &MO = MI->getOperand(Idx); 2990 if (!MO.isReg()) 2991 continue; 2992 2993 // Is this operand statically required to be an SGPR based on the operand 2994 // constraints? 2995 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); 2996 bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 2997 if (IsRequiredSGPR) 2998 return MO.getReg(); 2999 3000 // If this could be a VGPR or an SGPR, Check the dynamic register class. 3001 unsigned Reg = MO.getReg(); 3002 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 3003 if (RI.isSGPRClass(RegRC)) 3004 UsedSGPRs[i] = Reg; 3005 } 3006 3007 // We don't have a required SGPR operand, so we have a bit more freedom in 3008 // selecting operands to move. 3009 3010 // Try to select the most used SGPR. If an SGPR is equal to one of the 3011 // others, we choose that. 3012 // 3013 // e.g. 3014 // V_FMA_F32 v0, s0, s0, s0 -> No moves 3015 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 3016 3017 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 3018 // prefer those. 3019 3020 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 3021 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 3022 SGPRReg = UsedSGPRs[0]; 3023 } 3024 3025 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 3026 if (UsedSGPRs[1] == UsedSGPRs[2]) 3027 SGPRReg = UsedSGPRs[1]; 3028 } 3029 3030 return SGPRReg; 3031 } 3032 3033 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 3034 const MachineFunction &MF) const { 3035 int End = getIndirectIndexEnd(MF); 3036 int Begin = getIndirectIndexBegin(MF); 3037 3038 if (End == -1) 3039 return; 3040 3041 3042 for (int Index = Begin; Index <= End; ++Index) 3043 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); 3044 3045 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 3046 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 3047 3048 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 3049 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 3050 3051 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 3052 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 3053 3054 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 3055 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 3056 3057 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 3058 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 3059 } 3060 3061 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 3062 unsigned OperandName) const { 3063 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 3064 if (Idx == -1) 3065 return nullptr; 3066 3067 return &MI.getOperand(Idx); 3068 } 3069 3070 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 3071 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 3072 if (ST.isAmdHsaOS()) { 3073 RsrcDataFormat |= (1ULL << 56); 3074 3075 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 3076 // Set MTYPE = 2 3077 RsrcDataFormat |= (2ULL << 59); 3078 } 3079 3080 return RsrcDataFormat; 3081 } 3082 3083 uint64_t SIInstrInfo::getScratchRsrcWords23() const { 3084 uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 3085 AMDGPU::RSRC_TID_ENABLE | 3086 0xffffffff; // Size; 3087 3088 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; 3089 3090 Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT); 3091 3092 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 3093 // Clear them unless we want a huge stride. 3094 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 3095 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 3096 3097 return Rsrc23; 3098 } 3099 3100 bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const { 3101 unsigned Opc = MI->getOpcode(); 3102 3103 return isSMRD(Opc); 3104 } 3105 3106 bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const { 3107 unsigned Opc = MI->getOpcode(); 3108 3109 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); 3110 } 3111 3112 ArrayRef<std::pair<int, const char *>> 3113 SIInstrInfo::getSerializableTargetIndices() const { 3114 static const std::pair<int, const char *> TargetIndices[] = { 3115 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 3116 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 3117 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 3118 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 3119 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 3120 return makeArrayRef(TargetIndices); 3121 } 3122 3123 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 3124 /// post-RA version of misched uses CreateTargetMIHazardRecognizer. 3125 ScheduleHazardRecognizer * 3126 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 3127 const ScheduleDAG *DAG) const { 3128 return new GCNHazardRecognizer(DAG->MF); 3129 } 3130 3131 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 3132 /// pass. 3133 ScheduleHazardRecognizer * 3134 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 3135 return new GCNHazardRecognizer(MF); 3136 } 3137