1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SIInstrInfo.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "GCNHazardRecognizer.h" 18 #include "SIDefines.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 SIInstrInfo::SIInstrInfo(const SISubtarget &ST) 32 : AMDGPUInstrInfo(ST), RI(), ST(ST) {} 33 34 //===----------------------------------------------------------------------===// 35 // TargetInstrInfo callbacks 36 //===----------------------------------------------------------------------===// 37 38 static unsigned getNumOperandsNoGlue(SDNode *Node) { 39 unsigned N = Node->getNumOperands(); 40 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 41 --N; 42 return N; 43 } 44 45 static SDValue findChainOperand(SDNode *Load) { 46 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 47 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 48 return LastOp; 49 } 50 51 /// \brief Returns true if both nodes have the same value for the given 52 /// operand \p Op, or if both nodes do not have this operand. 53 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 54 unsigned Opc0 = N0->getMachineOpcode(); 55 unsigned Opc1 = N1->getMachineOpcode(); 56 57 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 58 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 59 60 if (Op0Idx == -1 && Op1Idx == -1) 61 return true; 62 63 64 if ((Op0Idx == -1 && Op1Idx != -1) || 65 (Op1Idx == -1 && Op0Idx != -1)) 66 return false; 67 68 // getNamedOperandIdx returns the index for the MachineInstr's operands, 69 // which includes the result as the first operand. We are indexing into the 70 // MachineSDNode's operands, so we need to skip the result operand to get 71 // the real index. 72 --Op0Idx; 73 --Op1Idx; 74 75 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 76 } 77 78 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, 79 AliasAnalysis *AA) const { 80 // TODO: The generic check fails for VALU instructions that should be 81 // rematerializable due to implicit reads of exec. We really want all of the 82 // generic logic for this except for this. 83 switch (MI.getOpcode()) { 84 case AMDGPU::V_MOV_B32_e32: 85 case AMDGPU::V_MOV_B32_e64: 86 case AMDGPU::V_MOV_B64_PSEUDO: 87 return true; 88 default: 89 return false; 90 } 91 } 92 93 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 94 int64_t &Offset0, 95 int64_t &Offset1) const { 96 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 97 return false; 98 99 unsigned Opc0 = Load0->getMachineOpcode(); 100 unsigned Opc1 = Load1->getMachineOpcode(); 101 102 // Make sure both are actually loads. 103 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 104 return false; 105 106 if (isDS(Opc0) && isDS(Opc1)) { 107 108 // FIXME: Handle this case: 109 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 110 return false; 111 112 // Check base reg. 113 if (Load0->getOperand(1) != Load1->getOperand(1)) 114 return false; 115 116 // Check chain. 117 if (findChainOperand(Load0) != findChainOperand(Load1)) 118 return false; 119 120 // Skip read2 / write2 variants for simplicity. 121 // TODO: We should report true if the used offsets are adjacent (excluded 122 // st64 versions). 123 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 124 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 125 return false; 126 127 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 128 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 129 return true; 130 } 131 132 if (isSMRD(Opc0) && isSMRD(Opc1)) { 133 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 134 135 // Check base reg. 136 if (Load0->getOperand(0) != Load1->getOperand(0)) 137 return false; 138 139 const ConstantSDNode *Load0Offset = 140 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 141 const ConstantSDNode *Load1Offset = 142 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 143 144 if (!Load0Offset || !Load1Offset) 145 return false; 146 147 // Check chain. 148 if (findChainOperand(Load0) != findChainOperand(Load1)) 149 return false; 150 151 Offset0 = Load0Offset->getZExtValue(); 152 Offset1 = Load1Offset->getZExtValue(); 153 return true; 154 } 155 156 // MUBUF and MTBUF can access the same addresses. 157 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 158 159 // MUBUF and MTBUF have vaddr at different indices. 160 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 161 findChainOperand(Load0) != findChainOperand(Load1) || 162 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 163 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 164 return false; 165 166 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 167 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 168 169 if (OffIdx0 == -1 || OffIdx1 == -1) 170 return false; 171 172 // getNamedOperandIdx returns the index for MachineInstrs. Since they 173 // inlcude the output in the operand list, but SDNodes don't, we need to 174 // subtract the index by one. 175 --OffIdx0; 176 --OffIdx1; 177 178 SDValue Off0 = Load0->getOperand(OffIdx0); 179 SDValue Off1 = Load1->getOperand(OffIdx1); 180 181 // The offset might be a FrameIndexSDNode. 182 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 183 return false; 184 185 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 186 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 187 return true; 188 } 189 190 return false; 191 } 192 193 static bool isStride64(unsigned Opc) { 194 switch (Opc) { 195 case AMDGPU::DS_READ2ST64_B32: 196 case AMDGPU::DS_READ2ST64_B64: 197 case AMDGPU::DS_WRITE2ST64_B32: 198 case AMDGPU::DS_WRITE2ST64_B64: 199 return true; 200 default: 201 return false; 202 } 203 } 204 205 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, 206 int64_t &Offset, 207 const TargetRegisterInfo *TRI) const { 208 unsigned Opc = LdSt.getOpcode(); 209 210 if (isDS(LdSt)) { 211 const MachineOperand *OffsetImm = 212 getNamedOperand(LdSt, AMDGPU::OpName::offset); 213 if (OffsetImm) { 214 // Normal, single offset LDS instruction. 215 const MachineOperand *AddrReg = 216 getNamedOperand(LdSt, AMDGPU::OpName::addr); 217 218 BaseReg = AddrReg->getReg(); 219 Offset = OffsetImm->getImm(); 220 return true; 221 } 222 223 // The 2 offset instructions use offset0 and offset1 instead. We can treat 224 // these as a load with a single offset if the 2 offsets are consecutive. We 225 // will use this for some partially aligned loads. 226 const MachineOperand *Offset0Imm = 227 getNamedOperand(LdSt, AMDGPU::OpName::offset0); 228 const MachineOperand *Offset1Imm = 229 getNamedOperand(LdSt, AMDGPU::OpName::offset1); 230 231 uint8_t Offset0 = Offset0Imm->getImm(); 232 uint8_t Offset1 = Offset1Imm->getImm(); 233 234 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 235 // Each of these offsets is in element sized units, so we need to convert 236 // to bytes of the individual reads. 237 238 unsigned EltSize; 239 if (LdSt.mayLoad()) 240 EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; 241 else { 242 assert(LdSt.mayStore()); 243 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 244 EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); 245 } 246 247 if (isStride64(Opc)) 248 EltSize *= 64; 249 250 const MachineOperand *AddrReg = 251 getNamedOperand(LdSt, AMDGPU::OpName::addr); 252 BaseReg = AddrReg->getReg(); 253 Offset = EltSize * Offset0; 254 return true; 255 } 256 257 return false; 258 } 259 260 if (isMUBUF(LdSt) || isMTBUF(LdSt)) { 261 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 262 return false; 263 264 const MachineOperand *AddrReg = 265 getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 266 if (!AddrReg) 267 return false; 268 269 const MachineOperand *OffsetImm = 270 getNamedOperand(LdSt, AMDGPU::OpName::offset); 271 BaseReg = AddrReg->getReg(); 272 Offset = OffsetImm->getImm(); 273 return true; 274 } 275 276 if (isSMRD(LdSt)) { 277 const MachineOperand *OffsetImm = 278 getNamedOperand(LdSt, AMDGPU::OpName::offset); 279 if (!OffsetImm) 280 return false; 281 282 const MachineOperand *SBaseReg = 283 getNamedOperand(LdSt, AMDGPU::OpName::sbase); 284 BaseReg = SBaseReg->getReg(); 285 Offset = OffsetImm->getImm(); 286 return true; 287 } 288 289 if (isFLAT(LdSt)) { 290 const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); 291 BaseReg = AddrReg->getReg(); 292 Offset = 0; 293 return true; 294 } 295 296 return false; 297 } 298 299 bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, 300 MachineInstr &SecondLdSt, 301 unsigned NumLoads) const { 302 const MachineOperand *FirstDst = nullptr; 303 const MachineOperand *SecondDst = nullptr; 304 305 if (isDS(FirstLdSt) && isDS(SecondLdSt)) { 306 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); 307 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); 308 } 309 310 if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { 311 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); 312 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); 313 } 314 315 if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || 316 (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) { 317 FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); 318 SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); 319 } 320 321 if (!FirstDst || !SecondDst) 322 return false; 323 324 // Try to limit clustering based on the total number of bytes loaded 325 // rather than the number of instructions. This is done to help reduce 326 // register pressure. The method used is somewhat inexact, though, 327 // because it assumes that all loads in the cluster will load the 328 // same number of bytes as FirstLdSt. 329 330 // The unit of this value is bytes. 331 // FIXME: This needs finer tuning. 332 unsigned LoadClusterThreshold = 16; 333 334 const MachineRegisterInfo &MRI = 335 FirstLdSt.getParent()->getParent()->getRegInfo(); 336 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); 337 338 return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; 339 } 340 341 void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 342 MachineBasicBlock::iterator MI, 343 const DebugLoc &DL, unsigned DestReg, 344 unsigned SrcReg, bool KillSrc) const { 345 346 // If we are trying to copy to or from SCC, there is a bug somewhere else in 347 // the backend. While it may be theoretically possible to do this, it should 348 // never be necessary. 349 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 350 351 static const int16_t Sub0_15[] = { 352 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 353 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 354 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 355 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 356 }; 357 358 static const int16_t Sub0_15_64[] = { 359 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 360 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 361 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 362 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 363 }; 364 365 static const int16_t Sub0_7[] = { 366 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 367 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 368 }; 369 370 static const int16_t Sub0_7_64[] = { 371 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 372 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 373 }; 374 375 static const int16_t Sub0_3[] = { 376 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 377 }; 378 379 static const int16_t Sub0_3_64[] = { 380 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 381 }; 382 383 static const int16_t Sub0_2[] = { 384 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 385 }; 386 387 static const int16_t Sub0_1[] = { 388 AMDGPU::sub0, AMDGPU::sub1, 389 }; 390 391 unsigned Opcode; 392 ArrayRef<int16_t> SubIndices; 393 bool Forward; 394 395 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 396 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 397 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 398 .addReg(SrcReg, getKillRegState(KillSrc)); 399 return; 400 401 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 402 if (DestReg == AMDGPU::VCC) { 403 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 404 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 405 .addReg(SrcReg, getKillRegState(KillSrc)); 406 } else { 407 // FIXME: Hack until VReg_1 removed. 408 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 409 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) 410 .addImm(0) 411 .addReg(SrcReg, getKillRegState(KillSrc)); 412 } 413 414 return; 415 } 416 417 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 418 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 419 .addReg(SrcReg, getKillRegState(KillSrc)); 420 return; 421 422 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 423 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 424 Opcode = AMDGPU::S_MOV_B64; 425 SubIndices = Sub0_3_64; 426 427 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 428 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 429 Opcode = AMDGPU::S_MOV_B64; 430 SubIndices = Sub0_7_64; 431 432 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 433 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 434 Opcode = AMDGPU::S_MOV_B64; 435 SubIndices = Sub0_15_64; 436 437 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 438 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 439 AMDGPU::SReg_32RegClass.contains(SrcReg)); 440 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 441 .addReg(SrcReg, getKillRegState(KillSrc)); 442 return; 443 444 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 445 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 446 AMDGPU::SReg_64RegClass.contains(SrcReg)); 447 Opcode = AMDGPU::V_MOV_B32_e32; 448 SubIndices = Sub0_1; 449 450 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 451 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 452 Opcode = AMDGPU::V_MOV_B32_e32; 453 SubIndices = Sub0_2; 454 455 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 456 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 457 AMDGPU::SReg_128RegClass.contains(SrcReg)); 458 Opcode = AMDGPU::V_MOV_B32_e32; 459 SubIndices = Sub0_3; 460 461 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 462 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 463 AMDGPU::SReg_256RegClass.contains(SrcReg)); 464 Opcode = AMDGPU::V_MOV_B32_e32; 465 SubIndices = Sub0_7; 466 467 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 468 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 469 AMDGPU::SReg_512RegClass.contains(SrcReg)); 470 Opcode = AMDGPU::V_MOV_B32_e32; 471 SubIndices = Sub0_15; 472 473 } else { 474 llvm_unreachable("Can't copy register!"); 475 } 476 477 if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg)) 478 Forward = true; 479 else 480 Forward = false; 481 482 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 483 unsigned SubIdx; 484 if (Forward) 485 SubIdx = SubIndices[Idx]; 486 else 487 SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 488 489 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 490 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 491 492 Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); 493 494 if (Idx == SubIndices.size() - 1) 495 Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); 496 497 if (Idx == 0) 498 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 499 } 500 } 501 502 int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 503 const unsigned Opcode = MI.getOpcode(); 504 505 int NewOpc; 506 507 // Try to map original to commuted opcode 508 NewOpc = AMDGPU::getCommuteRev(Opcode); 509 if (NewOpc != -1) 510 // Check if the commuted (REV) opcode exists on the target. 511 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 512 513 // Try to map commuted to original opcode 514 NewOpc = AMDGPU::getCommuteOrig(Opcode); 515 if (NewOpc != -1) 516 // Check if the original (non-REV) opcode exists on the target. 517 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 518 519 return Opcode; 520 } 521 522 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 523 524 if (DstRC->getSize() == 4) { 525 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 526 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 527 return AMDGPU::S_MOV_B64; 528 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 529 return AMDGPU::V_MOV_B64_PSEUDO; 530 } 531 return AMDGPU::COPY; 532 } 533 534 static unsigned getSGPRSpillSaveOpcode(unsigned Size) { 535 switch (Size) { 536 case 4: 537 return AMDGPU::SI_SPILL_S32_SAVE; 538 case 8: 539 return AMDGPU::SI_SPILL_S64_SAVE; 540 case 16: 541 return AMDGPU::SI_SPILL_S128_SAVE; 542 case 32: 543 return AMDGPU::SI_SPILL_S256_SAVE; 544 case 64: 545 return AMDGPU::SI_SPILL_S512_SAVE; 546 default: 547 llvm_unreachable("unknown register size"); 548 } 549 } 550 551 static unsigned getVGPRSpillSaveOpcode(unsigned Size) { 552 switch (Size) { 553 case 4: 554 return AMDGPU::SI_SPILL_V32_SAVE; 555 case 8: 556 return AMDGPU::SI_SPILL_V64_SAVE; 557 case 12: 558 return AMDGPU::SI_SPILL_V96_SAVE; 559 case 16: 560 return AMDGPU::SI_SPILL_V128_SAVE; 561 case 32: 562 return AMDGPU::SI_SPILL_V256_SAVE; 563 case 64: 564 return AMDGPU::SI_SPILL_V512_SAVE; 565 default: 566 llvm_unreachable("unknown register size"); 567 } 568 } 569 570 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 571 MachineBasicBlock::iterator MI, 572 unsigned SrcReg, bool isKill, 573 int FrameIndex, 574 const TargetRegisterClass *RC, 575 const TargetRegisterInfo *TRI) const { 576 MachineFunction *MF = MBB.getParent(); 577 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 578 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 579 DebugLoc DL = MBB.findDebugLoc(MI); 580 581 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 582 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 583 MachinePointerInfo PtrInfo 584 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 585 MachineMemOperand *MMO 586 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 587 Size, Align); 588 589 if (RI.isSGPRClass(RC)) { 590 MFI->setHasSpilledSGPRs(); 591 592 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { 593 // m0 may not be allowed for readlane. 594 MachineRegisterInfo &MRI = MF->getRegInfo(); 595 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); 596 } 597 598 // We are only allowed to create one new instruction when spilling 599 // registers, so we need to use pseudo instruction for spilling 600 // SGPRs. 601 unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); 602 BuildMI(MBB, MI, DL, get(Opcode)) 603 .addReg(SrcReg, getKillRegState(isKill)) // src 604 .addFrameIndex(FrameIndex) // frame_idx 605 .addMemOperand(MMO); 606 607 return; 608 } 609 610 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 611 LLVMContext &Ctx = MF->getFunction()->getContext(); 612 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 613 " spill register"); 614 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 615 .addReg(SrcReg); 616 617 return; 618 } 619 620 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 621 622 unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); 623 MFI->setHasSpilledVGPRs(); 624 BuildMI(MBB, MI, DL, get(Opcode)) 625 .addReg(SrcReg, getKillRegState(isKill)) // src 626 .addFrameIndex(FrameIndex) // frame_idx 627 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 628 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 629 .addImm(0) // offset 630 .addMemOperand(MMO); 631 } 632 633 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 634 switch (Size) { 635 case 4: 636 return AMDGPU::SI_SPILL_S32_RESTORE; 637 case 8: 638 return AMDGPU::SI_SPILL_S64_RESTORE; 639 case 16: 640 return AMDGPU::SI_SPILL_S128_RESTORE; 641 case 32: 642 return AMDGPU::SI_SPILL_S256_RESTORE; 643 case 64: 644 return AMDGPU::SI_SPILL_S512_RESTORE; 645 default: 646 llvm_unreachable("unknown register size"); 647 } 648 } 649 650 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 651 switch (Size) { 652 case 4: 653 return AMDGPU::SI_SPILL_V32_RESTORE; 654 case 8: 655 return AMDGPU::SI_SPILL_V64_RESTORE; 656 case 12: 657 return AMDGPU::SI_SPILL_V96_RESTORE; 658 case 16: 659 return AMDGPU::SI_SPILL_V128_RESTORE; 660 case 32: 661 return AMDGPU::SI_SPILL_V256_RESTORE; 662 case 64: 663 return AMDGPU::SI_SPILL_V512_RESTORE; 664 default: 665 llvm_unreachable("unknown register size"); 666 } 667 } 668 669 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 670 MachineBasicBlock::iterator MI, 671 unsigned DestReg, int FrameIndex, 672 const TargetRegisterClass *RC, 673 const TargetRegisterInfo *TRI) const { 674 MachineFunction *MF = MBB.getParent(); 675 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 676 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 677 DebugLoc DL = MBB.findDebugLoc(MI); 678 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 679 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 680 681 MachinePointerInfo PtrInfo 682 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 683 684 MachineMemOperand *MMO = MF->getMachineMemOperand( 685 PtrInfo, MachineMemOperand::MOLoad, Size, Align); 686 687 if (RI.isSGPRClass(RC)) { 688 // FIXME: Maybe this should not include a memoperand because it will be 689 // lowered to non-memory instructions. 690 unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); 691 692 if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { 693 // m0 may not be allowed for readlane. 694 MachineRegisterInfo &MRI = MF->getRegInfo(); 695 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); 696 } 697 698 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 699 .addFrameIndex(FrameIndex) // frame_idx 700 .addMemOperand(MMO); 701 702 return; 703 } 704 705 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 706 LLVMContext &Ctx = MF->getFunction()->getContext(); 707 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 708 " restore register"); 709 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 710 711 return; 712 } 713 714 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 715 716 unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); 717 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 718 .addFrameIndex(FrameIndex) // frame_idx 719 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 720 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 721 .addImm(0) // offset 722 .addMemOperand(MMO); 723 } 724 725 /// \param @Offset Offset in bytes of the FrameIndex being spilled 726 unsigned SIInstrInfo::calculateLDSSpillAddress( 727 MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, 728 unsigned FrameOffset, unsigned Size) const { 729 MachineFunction *MF = MBB.getParent(); 730 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 731 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 732 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 733 DebugLoc DL = MBB.findDebugLoc(MI); 734 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 735 unsigned WavefrontSize = ST.getWavefrontSize(); 736 737 unsigned TIDReg = MFI->getTIDReg(); 738 if (!MFI->hasCalculatedTID()) { 739 MachineBasicBlock &Entry = MBB.getParent()->front(); 740 MachineBasicBlock::iterator Insert = Entry.front(); 741 DebugLoc DL = Insert->getDebugLoc(); 742 743 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 744 if (TIDReg == AMDGPU::NoRegister) 745 return TIDReg; 746 747 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && 748 WorkGroupSize > WavefrontSize) { 749 750 unsigned TIDIGXReg 751 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X); 752 unsigned TIDIGYReg 753 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y); 754 unsigned TIDIGZReg 755 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z); 756 unsigned InputPtrReg = 757 TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); 758 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 759 if (!Entry.isLiveIn(Reg)) 760 Entry.addLiveIn(Reg); 761 } 762 763 RS->enterBasicBlock(Entry); 764 // FIXME: Can we scavenge an SReg_64 and access the subregs? 765 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 766 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 767 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 768 .addReg(InputPtrReg) 769 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 770 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 771 .addReg(InputPtrReg) 772 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 773 774 // NGROUPS.X * NGROUPS.Y 775 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 776 .addReg(STmp1) 777 .addReg(STmp0); 778 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 779 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 780 .addReg(STmp1) 781 .addReg(TIDIGXReg); 782 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 783 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 784 .addReg(STmp0) 785 .addReg(TIDIGYReg) 786 .addReg(TIDReg); 787 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 788 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 789 .addReg(TIDReg) 790 .addReg(TIDIGZReg); 791 } else { 792 // Get the wave id 793 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 794 TIDReg) 795 .addImm(-1) 796 .addImm(0); 797 798 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 799 TIDReg) 800 .addImm(-1) 801 .addReg(TIDReg); 802 } 803 804 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 805 TIDReg) 806 .addImm(2) 807 .addReg(TIDReg); 808 MFI->setTIDReg(TIDReg); 809 } 810 811 // Add FrameIndex to LDS offset 812 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 813 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 814 .addImm(LDSOffset) 815 .addReg(TIDReg); 816 817 return TmpReg; 818 } 819 820 void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, 821 MachineBasicBlock::iterator MI, 822 int Count) const { 823 DebugLoc DL = MBB.findDebugLoc(MI); 824 while (Count > 0) { 825 int Arg; 826 if (Count >= 8) 827 Arg = 7; 828 else 829 Arg = Count - 1; 830 Count -= 8; 831 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) 832 .addImm(Arg); 833 } 834 } 835 836 void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 837 MachineBasicBlock::iterator MI) const { 838 insertWaitStates(MBB, MI, 1); 839 } 840 841 unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { 842 switch (MI.getOpcode()) { 843 default: return 1; // FIXME: Do wait states equal cycles? 844 845 case AMDGPU::S_NOP: 846 return MI.getOperand(0).getImm() + 1; 847 } 848 } 849 850 bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 851 MachineBasicBlock &MBB = *MI.getParent(); 852 DebugLoc DL = MBB.findDebugLoc(MI); 853 switch (MI.getOpcode()) { 854 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 855 856 case AMDGPU::SGPR_USE: 857 // This is just a placeholder for register allocation. 858 MI.eraseFromParent(); 859 break; 860 861 case AMDGPU::V_MOV_B64_PSEUDO: { 862 unsigned Dst = MI.getOperand(0).getReg(); 863 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 864 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 865 866 const MachineOperand &SrcOp = MI.getOperand(1); 867 // FIXME: Will this work for 64-bit floating point immediates? 868 assert(!SrcOp.isFPImm()); 869 if (SrcOp.isImm()) { 870 APInt Imm(64, SrcOp.getImm()); 871 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 872 .addImm(Imm.getLoBits(32).getZExtValue()) 873 .addReg(Dst, RegState::Implicit | RegState::Define); 874 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 875 .addImm(Imm.getHiBits(32).getZExtValue()) 876 .addReg(Dst, RegState::Implicit | RegState::Define); 877 } else { 878 assert(SrcOp.isReg()); 879 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 880 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 881 .addReg(Dst, RegState::Implicit | RegState::Define); 882 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 883 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 884 .addReg(Dst, RegState::Implicit | RegState::Define); 885 } 886 MI.eraseFromParent(); 887 break; 888 } 889 890 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 891 unsigned Dst = MI.getOperand(0).getReg(); 892 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 893 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 894 unsigned Src0 = MI.getOperand(1).getReg(); 895 unsigned Src1 = MI.getOperand(2).getReg(); 896 const MachineOperand &SrcCond = MI.getOperand(3); 897 898 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 899 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 900 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 901 .addReg(SrcCond.getReg()) 902 .addReg(Dst, RegState::Implicit | RegState::Define); 903 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 904 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 905 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 906 .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill())) 907 .addReg(Dst, RegState::Implicit | RegState::Define); 908 MI.eraseFromParent(); 909 break; 910 } 911 912 case AMDGPU::SI_PC_ADD_REL_OFFSET: { 913 const SIRegisterInfo *TRI 914 = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); 915 MachineFunction &MF = *MBB.getParent(); 916 unsigned Reg = MI.getOperand(0).getReg(); 917 unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); 918 unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); 919 920 // Create a bundle so these instructions won't be re-ordered by the 921 // post-RA scheduler. 922 MIBundleBuilder Bundler(MBB, MI); 923 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 924 925 // Add 32-bit offset from this instruction to the start of the 926 // constant data. 927 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) 928 .addReg(RegLo) 929 .addOperand(MI.getOperand(1))); 930 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 931 .addReg(RegHi) 932 .addImm(0)); 933 934 llvm::finalizeBundle(MBB, Bundler.begin()); 935 936 MI.eraseFromParent(); 937 break; 938 } 939 } 940 return true; 941 } 942 943 /// Commutes the operands in the given instruction. 944 /// The commutable operands are specified by their indices OpIdx0 and OpIdx1. 945 /// 946 /// Do not call this method for a non-commutable instruction or for 947 /// non-commutable pair of operand indices OpIdx0 and OpIdx1. 948 /// Even though the instruction is commutable, the method may still 949 /// fail to commute the operands, null pointer is returned in such cases. 950 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 951 unsigned OpIdx0, 952 unsigned OpIdx1) const { 953 int CommutedOpcode = commuteOpcode(MI); 954 if (CommutedOpcode == -1) 955 return nullptr; 956 957 int Src0Idx = 958 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 959 MachineOperand &Src0 = MI.getOperand(Src0Idx); 960 if (!Src0.isReg()) 961 return nullptr; 962 963 int Src1Idx = 964 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); 965 966 if ((OpIdx0 != static_cast<unsigned>(Src0Idx) || 967 OpIdx1 != static_cast<unsigned>(Src1Idx)) && 968 (OpIdx0 != static_cast<unsigned>(Src1Idx) || 969 OpIdx1 != static_cast<unsigned>(Src0Idx))) 970 return nullptr; 971 972 MachineOperand &Src1 = MI.getOperand(Src1Idx); 973 974 if (isVOP2(MI) || isVOPC(MI)) { 975 const MCInstrDesc &InstrDesc = MI.getDesc(); 976 // For VOP2 and VOPC instructions, any operand type is valid to use for 977 // src0. Make sure we can use the src0 as src1. 978 // 979 // We could be stricter here and only allow commuting if there is a reason 980 // to do so. i.e. if both operands are VGPRs there is no real benefit, 981 // although MachineCSE attempts to find matches by commuting. 982 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 983 if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) 984 return nullptr; 985 } 986 987 MachineInstr *CommutedMI = &MI; 988 if (!Src1.isReg()) { 989 // Allow commuting instructions with Imm operands. 990 if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) { 991 return nullptr; 992 } 993 // Be sure to copy the source modifiers to the right place. 994 if (MachineOperand *Src0Mods = 995 getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) { 996 MachineOperand *Src1Mods = 997 getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 998 999 int Src0ModsVal = Src0Mods->getImm(); 1000 if (!Src1Mods && Src0ModsVal != 0) 1001 return nullptr; 1002 1003 // XXX - This assert might be a lie. It might be useful to have a neg 1004 // modifier with 0.0. 1005 int Src1ModsVal = Src1Mods->getImm(); 1006 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 1007 1008 Src1Mods->setImm(Src0ModsVal); 1009 Src0Mods->setImm(Src1ModsVal); 1010 } 1011 1012 unsigned Reg = Src0.getReg(); 1013 unsigned SubReg = Src0.getSubReg(); 1014 if (Src1.isImm()) 1015 Src0.ChangeToImmediate(Src1.getImm()); 1016 else 1017 llvm_unreachable("Should only have immediates"); 1018 1019 Src1.ChangeToRegister(Reg, false); 1020 Src1.setSubReg(SubReg); 1021 } else { 1022 CommutedMI = 1023 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); 1024 } 1025 1026 if (CommutedMI) 1027 CommutedMI->setDesc(get(CommutedOpcode)); 1028 1029 return CommutedMI; 1030 } 1031 1032 // This needs to be implemented because the source modifiers may be inserted 1033 // between the true commutable operands, and the base 1034 // TargetInstrInfo::commuteInstruction uses it. 1035 bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, 1036 unsigned &SrcOpIdx1) const { 1037 const MCInstrDesc &MCID = MI.getDesc(); 1038 if (!MCID.isCommutable()) 1039 return false; 1040 1041 unsigned Opc = MI.getOpcode(); 1042 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 1043 if (Src0Idx == -1) 1044 return false; 1045 1046 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 1047 // immediate. Also, immediate src0 operand is not handled in 1048 // SIInstrInfo::commuteInstruction(); 1049 if (!MI.getOperand(Src0Idx).isReg()) 1050 return false; 1051 1052 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 1053 if (Src1Idx == -1) 1054 return false; 1055 1056 MachineOperand &Src1 = MI.getOperand(Src1Idx); 1057 if (Src1.isImm()) { 1058 // SIInstrInfo::commuteInstruction() does support commuting the immediate 1059 // operand src1 in 2 and 3 operand instructions. 1060 if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode())) 1061 return false; 1062 } else if (Src1.isReg()) { 1063 // If any source modifiers are set, the generic instruction commuting won't 1064 // understand how to copy the source modifiers. 1065 if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || 1066 hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)) 1067 return false; 1068 } else 1069 return false; 1070 1071 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 1072 } 1073 1074 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 1075 switch (Cond) { 1076 case SIInstrInfo::SCC_TRUE: 1077 return AMDGPU::S_CBRANCH_SCC1; 1078 case SIInstrInfo::SCC_FALSE: 1079 return AMDGPU::S_CBRANCH_SCC0; 1080 case SIInstrInfo::VCCNZ: 1081 return AMDGPU::S_CBRANCH_VCCNZ; 1082 case SIInstrInfo::VCCZ: 1083 return AMDGPU::S_CBRANCH_VCCZ; 1084 case SIInstrInfo::EXECNZ: 1085 return AMDGPU::S_CBRANCH_EXECNZ; 1086 case SIInstrInfo::EXECZ: 1087 return AMDGPU::S_CBRANCH_EXECZ; 1088 default: 1089 llvm_unreachable("invalid branch predicate"); 1090 } 1091 } 1092 1093 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 1094 switch (Opcode) { 1095 case AMDGPU::S_CBRANCH_SCC0: 1096 return SCC_FALSE; 1097 case AMDGPU::S_CBRANCH_SCC1: 1098 return SCC_TRUE; 1099 case AMDGPU::S_CBRANCH_VCCNZ: 1100 return VCCNZ; 1101 case AMDGPU::S_CBRANCH_VCCZ: 1102 return VCCZ; 1103 case AMDGPU::S_CBRANCH_EXECNZ: 1104 return EXECNZ; 1105 case AMDGPU::S_CBRANCH_EXECZ: 1106 return EXECZ; 1107 default: 1108 return INVALID_BR; 1109 } 1110 } 1111 1112 bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1113 MachineBasicBlock *&TBB, 1114 MachineBasicBlock *&FBB, 1115 SmallVectorImpl<MachineOperand> &Cond, 1116 bool AllowModify) const { 1117 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1118 1119 if (I == MBB.end()) 1120 return false; 1121 1122 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1123 // Unconditional Branch 1124 TBB = I->getOperand(0).getMBB(); 1125 return false; 1126 } 1127 1128 BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 1129 if (Pred == INVALID_BR) 1130 return true; 1131 1132 MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); 1133 Cond.push_back(MachineOperand::CreateImm(Pred)); 1134 1135 ++I; 1136 1137 if (I == MBB.end()) { 1138 // Conditional branch followed by fall-through. 1139 TBB = CondBB; 1140 return false; 1141 } 1142 1143 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1144 TBB = CondBB; 1145 FBB = I->getOperand(0).getMBB(); 1146 return false; 1147 } 1148 1149 return true; 1150 } 1151 1152 unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1153 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1154 1155 unsigned Count = 0; 1156 while (I != MBB.end()) { 1157 MachineBasicBlock::iterator Next = std::next(I); 1158 I->eraseFromParent(); 1159 ++Count; 1160 I = Next; 1161 } 1162 1163 return Count; 1164 } 1165 1166 unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB, 1167 MachineBasicBlock *TBB, 1168 MachineBasicBlock *FBB, 1169 ArrayRef<MachineOperand> Cond, 1170 const DebugLoc &DL) const { 1171 1172 if (!FBB && Cond.empty()) { 1173 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1174 .addMBB(TBB); 1175 return 1; 1176 } 1177 1178 assert(TBB && Cond[0].isImm()); 1179 1180 unsigned Opcode 1181 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 1182 1183 if (!FBB) { 1184 BuildMI(&MBB, DL, get(Opcode)) 1185 .addMBB(TBB); 1186 return 1; 1187 } 1188 1189 assert(TBB && FBB); 1190 1191 BuildMI(&MBB, DL, get(Opcode)) 1192 .addMBB(TBB); 1193 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1194 .addMBB(FBB); 1195 1196 return 2; 1197 } 1198 1199 bool SIInstrInfo::ReverseBranchCondition( 1200 SmallVectorImpl<MachineOperand> &Cond) const { 1201 assert(Cond.size() == 1); 1202 Cond[0].setImm(-Cond[0].getImm()); 1203 return false; 1204 } 1205 1206 static void removeModOperands(MachineInstr &MI) { 1207 unsigned Opc = MI.getOpcode(); 1208 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1209 AMDGPU::OpName::src0_modifiers); 1210 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1211 AMDGPU::OpName::src1_modifiers); 1212 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1213 AMDGPU::OpName::src2_modifiers); 1214 1215 MI.RemoveOperand(Src2ModIdx); 1216 MI.RemoveOperand(Src1ModIdx); 1217 MI.RemoveOperand(Src0ModIdx); 1218 } 1219 1220 // TODO: Maybe this should be removed this and custom fold everything in 1221 // SIFoldOperands? 1222 bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 1223 unsigned Reg, MachineRegisterInfo *MRI) const { 1224 if (!MRI->hasOneNonDBGUse(Reg)) 1225 return false; 1226 1227 unsigned Opc = UseMI.getOpcode(); 1228 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 1229 // Don't fold if we are using source modifiers. The new VOP2 instructions 1230 // don't have them. 1231 if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) || 1232 hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) || 1233 hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) { 1234 return false; 1235 } 1236 1237 const MachineOperand &ImmOp = DefMI.getOperand(1); 1238 1239 // If this is a free constant, there's no reason to do this. 1240 // TODO: We could fold this here instead of letting SIFoldOperands do it 1241 // later. 1242 if (isInlineConstant(ImmOp, 4)) 1243 return false; 1244 1245 MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); 1246 MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); 1247 MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); 1248 1249 // Multiplied part is the constant: Use v_madmk_f32 1250 // We should only expect these to be on src0 due to canonicalizations. 1251 if (Src0->isReg() && Src0->getReg() == Reg) { 1252 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1253 return false; 1254 1255 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 1256 return false; 1257 1258 // We need to swap operands 0 and 1 since madmk constant is at operand 1. 1259 1260 const int64_t Imm = DefMI.getOperand(1).getImm(); 1261 1262 // FIXME: This would be a lot easier if we could return a new instruction 1263 // instead of having to modify in place. 1264 1265 // Remove these first since they are at the end. 1266 UseMI.RemoveOperand( 1267 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); 1268 UseMI.RemoveOperand( 1269 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); 1270 1271 unsigned Src1Reg = Src1->getReg(); 1272 unsigned Src1SubReg = Src1->getSubReg(); 1273 Src0->setReg(Src1Reg); 1274 Src0->setSubReg(Src1SubReg); 1275 Src0->setIsKill(Src1->isKill()); 1276 1277 if (Opc == AMDGPU::V_MAC_F32_e64) { 1278 UseMI.untieRegOperand( 1279 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1280 } 1281 1282 Src1->ChangeToImmediate(Imm); 1283 1284 removeModOperands(UseMI); 1285 UseMI.setDesc(get(AMDGPU::V_MADMK_F32)); 1286 1287 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1288 if (DeleteDef) 1289 DefMI.eraseFromParent(); 1290 1291 return true; 1292 } 1293 1294 // Added part is the constant: Use v_madak_f32 1295 if (Src2->isReg() && Src2->getReg() == Reg) { 1296 // Not allowed to use constant bus for another operand. 1297 // We can however allow an inline immediate as src0. 1298 if (!Src0->isImm() && 1299 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1300 return false; 1301 1302 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1303 return false; 1304 1305 const int64_t Imm = DefMI.getOperand(1).getImm(); 1306 1307 // FIXME: This would be a lot easier if we could return a new instruction 1308 // instead of having to modify in place. 1309 1310 // Remove these first since they are at the end. 1311 UseMI.RemoveOperand( 1312 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); 1313 UseMI.RemoveOperand( 1314 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); 1315 1316 if (Opc == AMDGPU::V_MAC_F32_e64) { 1317 UseMI.untieRegOperand( 1318 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1319 } 1320 1321 // ChangingToImmediate adds Src2 back to the instruction. 1322 Src2->ChangeToImmediate(Imm); 1323 1324 // These come before src2. 1325 removeModOperands(UseMI); 1326 UseMI.setDesc(get(AMDGPU::V_MADAK_F32)); 1327 1328 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1329 if (DeleteDef) 1330 DefMI.eraseFromParent(); 1331 1332 return true; 1333 } 1334 } 1335 1336 return false; 1337 } 1338 1339 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1340 int WidthB, int OffsetB) { 1341 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1342 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1343 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1344 return LowOffset + LowWidth <= HighOffset; 1345 } 1346 1347 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa, 1348 MachineInstr &MIb) const { 1349 unsigned BaseReg0, BaseReg1; 1350 int64_t Offset0, Offset1; 1351 1352 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1353 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1354 1355 if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { 1356 // FIXME: Handle ds_read2 / ds_write2. 1357 return false; 1358 } 1359 unsigned Width0 = (*MIa.memoperands_begin())->getSize(); 1360 unsigned Width1 = (*MIb.memoperands_begin())->getSize(); 1361 if (BaseReg0 == BaseReg1 && 1362 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1363 return true; 1364 } 1365 } 1366 1367 return false; 1368 } 1369 1370 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa, 1371 MachineInstr &MIb, 1372 AliasAnalysis *AA) const { 1373 assert((MIa.mayLoad() || MIa.mayStore()) && 1374 "MIa must load from or modify a memory location"); 1375 assert((MIb.mayLoad() || MIb.mayStore()) && 1376 "MIb must load from or modify a memory location"); 1377 1378 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) 1379 return false; 1380 1381 // XXX - Can we relax this between address spaces? 1382 if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 1383 return false; 1384 1385 // TODO: Should we check the address space from the MachineMemOperand? That 1386 // would allow us to distinguish objects we know don't alias based on the 1387 // underlying address space, even if it was lowered to a different one, 1388 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1389 // buffer. 1390 if (isDS(MIa)) { 1391 if (isDS(MIb)) 1392 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1393 1394 return !isFLAT(MIb); 1395 } 1396 1397 if (isMUBUF(MIa) || isMTBUF(MIa)) { 1398 if (isMUBUF(MIb) || isMTBUF(MIb)) 1399 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1400 1401 return !isFLAT(MIb) && !isSMRD(MIb); 1402 } 1403 1404 if (isSMRD(MIa)) { 1405 if (isSMRD(MIb)) 1406 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1407 1408 return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa); 1409 } 1410 1411 if (isFLAT(MIa)) { 1412 if (isFLAT(MIb)) 1413 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1414 1415 return false; 1416 } 1417 1418 return false; 1419 } 1420 1421 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1422 MachineInstr &MI, 1423 LiveVariables *LV) const { 1424 1425 switch (MI.getOpcode()) { 1426 default: 1427 return nullptr; 1428 case AMDGPU::V_MAC_F32_e64: 1429 break; 1430 case AMDGPU::V_MAC_F32_e32: { 1431 const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 1432 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1433 return nullptr; 1434 break; 1435 } 1436 } 1437 1438 const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 1439 const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 1440 const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 1441 const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 1442 1443 return BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1444 .addOperand(*Dst) 1445 .addImm(0) // Src0 mods 1446 .addOperand(*Src0) 1447 .addImm(0) // Src1 mods 1448 .addOperand(*Src1) 1449 .addImm(0) // Src mods 1450 .addOperand(*Src2) 1451 .addImm(0) // clamp 1452 .addImm(0); // omod 1453 } 1454 1455 bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1456 const MachineBasicBlock *MBB, 1457 const MachineFunction &MF) const { 1458 // Target-independent instructions do not have an implicit-use of EXEC, even 1459 // when they operate on VGPRs. Treating EXEC modifications as scheduling 1460 // boundaries prevents incorrect movements of such instructions. 1461 const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); 1462 if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) 1463 return true; 1464 1465 return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF); 1466 } 1467 1468 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1469 int64_t SVal = Imm.getSExtValue(); 1470 if (SVal >= -16 && SVal <= 64) 1471 return true; 1472 1473 if (Imm.getBitWidth() == 64) { 1474 uint64_t Val = Imm.getZExtValue(); 1475 return (DoubleToBits(0.0) == Val) || 1476 (DoubleToBits(1.0) == Val) || 1477 (DoubleToBits(-1.0) == Val) || 1478 (DoubleToBits(0.5) == Val) || 1479 (DoubleToBits(-0.5) == Val) || 1480 (DoubleToBits(2.0) == Val) || 1481 (DoubleToBits(-2.0) == Val) || 1482 (DoubleToBits(4.0) == Val) || 1483 (DoubleToBits(-4.0) == Val); 1484 } 1485 1486 // The actual type of the operand does not seem to matter as long 1487 // as the bits match one of the inline immediate values. For example: 1488 // 1489 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1490 // so it is a legal inline immediate. 1491 // 1492 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1493 // floating-point, so it is a legal inline immediate. 1494 uint32_t Val = Imm.getZExtValue(); 1495 1496 return (FloatToBits(0.0f) == Val) || 1497 (FloatToBits(1.0f) == Val) || 1498 (FloatToBits(-1.0f) == Val) || 1499 (FloatToBits(0.5f) == Val) || 1500 (FloatToBits(-0.5f) == Val) || 1501 (FloatToBits(2.0f) == Val) || 1502 (FloatToBits(-2.0f) == Val) || 1503 (FloatToBits(4.0f) == Val) || 1504 (FloatToBits(-4.0f) == Val); 1505 } 1506 1507 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1508 unsigned OpSize) const { 1509 if (MO.isImm()) { 1510 // MachineOperand provides no way to tell the true operand size, since it 1511 // only records a 64-bit value. We need to know the size to determine if a 1512 // 32-bit floating point immediate bit pattern is legal for an integer 1513 // immediate. It would be for any 32-bit integer operand, but would not be 1514 // for a 64-bit one. 1515 1516 unsigned BitSize = 8 * OpSize; 1517 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1518 } 1519 1520 return false; 1521 } 1522 1523 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1524 unsigned OpSize) const { 1525 return MO.isImm() && !isInlineConstant(MO, OpSize); 1526 } 1527 1528 static bool compareMachineOp(const MachineOperand &Op0, 1529 const MachineOperand &Op1) { 1530 if (Op0.getType() != Op1.getType()) 1531 return false; 1532 1533 switch (Op0.getType()) { 1534 case MachineOperand::MO_Register: 1535 return Op0.getReg() == Op1.getReg(); 1536 case MachineOperand::MO_Immediate: 1537 return Op0.getImm() == Op1.getImm(); 1538 default: 1539 llvm_unreachable("Didn't expect to be comparing these operand types"); 1540 } 1541 } 1542 1543 bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1544 const MachineOperand &MO) const { 1545 const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; 1546 1547 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1548 1549 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1550 return true; 1551 1552 if (OpInfo.RegClass < 0) 1553 return false; 1554 1555 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1556 if (isLiteralConstant(MO, OpSize)) 1557 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1558 1559 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1560 } 1561 1562 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1563 int Op32 = AMDGPU::getVOPe32(Opcode); 1564 if (Op32 == -1) 1565 return false; 1566 1567 return pseudoToMCOpcode(Op32) != -1; 1568 } 1569 1570 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1571 // The src0_modifier operand is present on all instructions 1572 // that have modifiers. 1573 1574 return AMDGPU::getNamedOperandIdx(Opcode, 1575 AMDGPU::OpName::src0_modifiers) != -1; 1576 } 1577 1578 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1579 unsigned OpName) const { 1580 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1581 return Mods && Mods->getImm(); 1582 } 1583 1584 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1585 const MachineOperand &MO, 1586 unsigned OpSize) const { 1587 // Literal constants use the constant bus. 1588 if (isLiteralConstant(MO, OpSize)) 1589 return true; 1590 1591 if (!MO.isReg() || !MO.isUse()) 1592 return false; 1593 1594 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1595 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1596 1597 // FLAT_SCR is just an SGPR pair. 1598 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1599 return true; 1600 1601 // EXEC register uses the constant bus. 1602 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1603 return true; 1604 1605 // SGPRs use the constant bus 1606 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 || 1607 (!MO.isImplicit() && 1608 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1609 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))); 1610 } 1611 1612 static unsigned findImplicitSGPRRead(const MachineInstr &MI) { 1613 for (const MachineOperand &MO : MI.implicit_operands()) { 1614 // We only care about reads. 1615 if (MO.isDef()) 1616 continue; 1617 1618 switch (MO.getReg()) { 1619 case AMDGPU::VCC: 1620 case AMDGPU::M0: 1621 case AMDGPU::FLAT_SCR: 1622 return MO.getReg(); 1623 1624 default: 1625 break; 1626 } 1627 } 1628 1629 return AMDGPU::NoRegister; 1630 } 1631 1632 static bool shouldReadExec(const MachineInstr &MI) { 1633 if (SIInstrInfo::isVALU(MI)) { 1634 switch (MI.getOpcode()) { 1635 case AMDGPU::V_READLANE_B32: 1636 case AMDGPU::V_READLANE_B32_si: 1637 case AMDGPU::V_READLANE_B32_vi: 1638 case AMDGPU::V_WRITELANE_B32: 1639 case AMDGPU::V_WRITELANE_B32_si: 1640 case AMDGPU::V_WRITELANE_B32_vi: 1641 return false; 1642 } 1643 1644 return true; 1645 } 1646 1647 if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || 1648 SIInstrInfo::isSALU(MI) || 1649 SIInstrInfo::isSMRD(MI)) 1650 return false; 1651 1652 return true; 1653 } 1654 1655 bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, 1656 StringRef &ErrInfo) const { 1657 uint16_t Opcode = MI.getOpcode(); 1658 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1659 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1660 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1661 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1662 1663 // Make sure the number of operands is correct. 1664 const MCInstrDesc &Desc = get(Opcode); 1665 if (!Desc.isVariadic() && 1666 Desc.getNumOperands() != MI.getNumExplicitOperands()) { 1667 ErrInfo = "Instruction has wrong number of operands."; 1668 return false; 1669 } 1670 1671 // Make sure the register classes are correct. 1672 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1673 if (MI.getOperand(i).isFPImm()) { 1674 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1675 "all fp values to integers."; 1676 return false; 1677 } 1678 1679 int RegClass = Desc.OpInfo[i].RegClass; 1680 1681 switch (Desc.OpInfo[i].OperandType) { 1682 case MCOI::OPERAND_REGISTER: 1683 if (MI.getOperand(i).isImm()) { 1684 ErrInfo = "Illegal immediate value for operand."; 1685 return false; 1686 } 1687 break; 1688 case AMDGPU::OPERAND_REG_IMM32: 1689 break; 1690 case AMDGPU::OPERAND_REG_INLINE_C: 1691 if (isLiteralConstant(MI.getOperand(i), 1692 RI.getRegClass(RegClass)->getSize())) { 1693 ErrInfo = "Illegal immediate value for operand."; 1694 return false; 1695 } 1696 break; 1697 case MCOI::OPERAND_IMMEDIATE: 1698 // Check if this operand is an immediate. 1699 // FrameIndex operands will be replaced by immediates, so they are 1700 // allowed. 1701 if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { 1702 ErrInfo = "Expected immediate, but got non-immediate"; 1703 return false; 1704 } 1705 // Fall-through 1706 default: 1707 continue; 1708 } 1709 1710 if (!MI.getOperand(i).isReg()) 1711 continue; 1712 1713 if (RegClass != -1) { 1714 unsigned Reg = MI.getOperand(i).getReg(); 1715 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1716 continue; 1717 1718 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1719 if (!RC->contains(Reg)) { 1720 ErrInfo = "Operand has incorrect register class."; 1721 return false; 1722 } 1723 } 1724 } 1725 1726 // Verify VOP* 1727 if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { 1728 // Only look at the true operands. Only a real operand can use the constant 1729 // bus, and we don't want to check pseudo-operands like the source modifier 1730 // flags. 1731 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1732 1733 unsigned ConstantBusCount = 0; 1734 unsigned SGPRUsed = findImplicitSGPRRead(MI); 1735 if (SGPRUsed != AMDGPU::NoRegister) 1736 ++ConstantBusCount; 1737 1738 for (int OpIdx : OpIndices) { 1739 if (OpIdx == -1) 1740 break; 1741 const MachineOperand &MO = MI.getOperand(OpIdx); 1742 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1743 if (MO.isReg()) { 1744 if (MO.getReg() != SGPRUsed) 1745 ++ConstantBusCount; 1746 SGPRUsed = MO.getReg(); 1747 } else { 1748 ++ConstantBusCount; 1749 } 1750 } 1751 } 1752 if (ConstantBusCount > 1) { 1753 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1754 return false; 1755 } 1756 } 1757 1758 // Verify misc. restrictions on specific instructions. 1759 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1760 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1761 const MachineOperand &Src0 = MI.getOperand(Src0Idx); 1762 const MachineOperand &Src1 = MI.getOperand(Src1Idx); 1763 const MachineOperand &Src2 = MI.getOperand(Src2Idx); 1764 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1765 if (!compareMachineOp(Src0, Src1) && 1766 !compareMachineOp(Src0, Src2)) { 1767 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1768 return false; 1769 } 1770 } 1771 } 1772 1773 // Make sure we aren't losing exec uses in the td files. This mostly requires 1774 // being careful when using let Uses to try to add other use registers. 1775 if (shouldReadExec(MI)) { 1776 if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 1777 ErrInfo = "VALU instruction does not implicitly read exec mask"; 1778 return false; 1779 } 1780 } 1781 1782 return true; 1783 } 1784 1785 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1786 switch (MI.getOpcode()) { 1787 default: return AMDGPU::INSTRUCTION_LIST_END; 1788 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1789 case AMDGPU::COPY: return AMDGPU::COPY; 1790 case AMDGPU::PHI: return AMDGPU::PHI; 1791 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1792 case AMDGPU::S_MOV_B32: 1793 return MI.getOperand(1).isReg() ? 1794 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1795 case AMDGPU::S_ADD_I32: 1796 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1797 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1798 case AMDGPU::S_SUB_I32: 1799 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1800 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1801 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1802 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1803 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1804 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1805 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1806 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1807 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1808 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1809 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1810 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1811 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1812 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1813 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1814 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1815 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1816 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1817 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1818 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1819 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1820 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1821 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1822 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1823 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1824 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1825 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1826 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1827 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1828 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1829 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; 1830 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; 1831 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; 1832 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; 1833 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; 1834 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; 1835 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1836 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1837 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1838 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1839 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 1840 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 1841 } 1842 } 1843 1844 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1845 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1846 } 1847 1848 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1849 unsigned OpNo) const { 1850 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1851 const MCInstrDesc &Desc = get(MI.getOpcode()); 1852 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1853 Desc.OpInfo[OpNo].RegClass == -1) { 1854 unsigned Reg = MI.getOperand(OpNo).getReg(); 1855 1856 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1857 return MRI.getRegClass(Reg); 1858 return RI.getPhysRegClass(Reg); 1859 } 1860 1861 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1862 return RI.getRegClass(RCID); 1863 } 1864 1865 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1866 switch (MI.getOpcode()) { 1867 case AMDGPU::COPY: 1868 case AMDGPU::REG_SEQUENCE: 1869 case AMDGPU::PHI: 1870 case AMDGPU::INSERT_SUBREG: 1871 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1872 default: 1873 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1874 } 1875 } 1876 1877 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { 1878 MachineBasicBlock::iterator I = MI; 1879 MachineBasicBlock *MBB = MI.getParent(); 1880 MachineOperand &MO = MI.getOperand(OpIdx); 1881 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1882 unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; 1883 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1884 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1885 if (MO.isReg()) 1886 Opcode = AMDGPU::COPY; 1887 else if (RI.isSGPRClass(RC)) 1888 Opcode = AMDGPU::S_MOV_B32; 1889 1890 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1891 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1892 VRC = &AMDGPU::VReg_64RegClass; 1893 else 1894 VRC = &AMDGPU::VGPR_32RegClass; 1895 1896 unsigned Reg = MRI.createVirtualRegister(VRC); 1897 DebugLoc DL = MBB->findDebugLoc(I); 1898 BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO); 1899 MO.ChangeToRegister(Reg, false); 1900 } 1901 1902 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1903 MachineRegisterInfo &MRI, 1904 MachineOperand &SuperReg, 1905 const TargetRegisterClass *SuperRC, 1906 unsigned SubIdx, 1907 const TargetRegisterClass *SubRC) 1908 const { 1909 MachineBasicBlock *MBB = MI->getParent(); 1910 DebugLoc DL = MI->getDebugLoc(); 1911 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1912 1913 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 1914 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1915 .addReg(SuperReg.getReg(), 0, SubIdx); 1916 return SubReg; 1917 } 1918 1919 // Just in case the super register is itself a sub-register, copy it to a new 1920 // value so we don't need to worry about merging its subreg index with the 1921 // SubIdx passed to this function. The register coalescer should be able to 1922 // eliminate this extra copy. 1923 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1924 1925 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1926 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1927 1928 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1929 .addReg(NewSuperReg, 0, SubIdx); 1930 1931 return SubReg; 1932 } 1933 1934 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1935 MachineBasicBlock::iterator MII, 1936 MachineRegisterInfo &MRI, 1937 MachineOperand &Op, 1938 const TargetRegisterClass *SuperRC, 1939 unsigned SubIdx, 1940 const TargetRegisterClass *SubRC) const { 1941 if (Op.isImm()) { 1942 // XXX - Is there a better way to do this? 1943 if (SubIdx == AMDGPU::sub0) 1944 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1945 if (SubIdx == AMDGPU::sub1) 1946 return MachineOperand::CreateImm(Op.getImm() >> 32); 1947 1948 llvm_unreachable("Unhandled register index for immediate"); 1949 } 1950 1951 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1952 SubIdx, SubRC); 1953 return MachineOperand::CreateReg(SubReg, false); 1954 } 1955 1956 // Change the order of operands from (0, 1, 2) to (0, 2, 1) 1957 void SIInstrInfo::swapOperands(MachineInstr &Inst) const { 1958 assert(Inst.getNumExplicitOperands() == 3); 1959 MachineOperand Op1 = Inst.getOperand(1); 1960 Inst.RemoveOperand(1); 1961 Inst.addOperand(Op1); 1962 } 1963 1964 bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 1965 const MCOperandInfo &OpInfo, 1966 const MachineOperand &MO) const { 1967 if (!MO.isReg()) 1968 return false; 1969 1970 unsigned Reg = MO.getReg(); 1971 const TargetRegisterClass *RC = 1972 TargetRegisterInfo::isVirtualRegister(Reg) ? 1973 MRI.getRegClass(Reg) : 1974 RI.getPhysRegClass(Reg); 1975 1976 const SIRegisterInfo *TRI = 1977 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 1978 RC = TRI->getSubRegClass(RC, MO.getSubReg()); 1979 1980 // In order to be legal, the common sub-class must be equal to the 1981 // class of the current operand. For example: 1982 // 1983 // v_mov_b32 s0 ; Operand defined as vsrc_32 1984 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1985 // 1986 // s_sendmsg 0, s0 ; Operand defined as m0reg 1987 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1988 1989 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1990 } 1991 1992 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1993 const MCOperandInfo &OpInfo, 1994 const MachineOperand &MO) const { 1995 if (MO.isReg()) 1996 return isLegalRegOperand(MRI, OpInfo, MO); 1997 1998 // Handle non-register types that are treated like immediates. 1999 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 2000 return true; 2001 } 2002 2003 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 2004 const MachineOperand *MO) const { 2005 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2006 const MCInstrDesc &InstDesc = MI.getDesc(); 2007 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 2008 const TargetRegisterClass *DefinedRC = 2009 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 2010 if (!MO) 2011 MO = &MI.getOperand(OpIdx); 2012 2013 if (isVALU(MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 2014 2015 RegSubRegPair SGPRUsed; 2016 if (MO->isReg()) 2017 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); 2018 2019 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 2020 if (i == OpIdx) 2021 continue; 2022 const MachineOperand &Op = MI.getOperand(i); 2023 if (Op.isReg() && 2024 (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && 2025 usesConstantBus(MRI, Op, getOpSize(MI, i))) { 2026 return false; 2027 } 2028 } 2029 } 2030 2031 if (MO->isReg()) { 2032 assert(DefinedRC); 2033 return isLegalRegOperand(MRI, OpInfo, *MO); 2034 } 2035 2036 // Handle non-register types that are treated like immediates. 2037 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 2038 2039 if (!DefinedRC) { 2040 // This operand expects an immediate. 2041 return true; 2042 } 2043 2044 return isImmOperandLegal(MI, OpIdx, *MO); 2045 } 2046 2047 void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 2048 MachineInstr &MI) const { 2049 unsigned Opc = MI.getOpcode(); 2050 const MCInstrDesc &InstrDesc = get(Opc); 2051 2052 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 2053 MachineOperand &Src1 = MI.getOperand(Src1Idx); 2054 2055 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 2056 // we need to only have one constant bus use. 2057 // 2058 // Note we do not need to worry about literal constants here. They are 2059 // disabled for the operand type for instructions because they will always 2060 // violate the one constant bus use rule. 2061 bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; 2062 if (HasImplicitSGPR) { 2063 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2064 MachineOperand &Src0 = MI.getOperand(Src0Idx); 2065 2066 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) 2067 legalizeOpWithMove(MI, Src0Idx); 2068 } 2069 2070 // VOP2 src0 instructions support all operand types, so we don't need to check 2071 // their legality. If src1 is already legal, we don't need to do anything. 2072 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) 2073 return; 2074 2075 // We do not use commuteInstruction here because it is too aggressive and will 2076 // commute if it is possible. We only want to commute here if it improves 2077 // legality. This can be called a fairly large number of times so don't waste 2078 // compile time pointlessly swapping and checking legality again. 2079 if (HasImplicitSGPR || !MI.isCommutable()) { 2080 legalizeOpWithMove(MI, Src1Idx); 2081 return; 2082 } 2083 2084 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2085 MachineOperand &Src0 = MI.getOperand(Src0Idx); 2086 2087 // If src0 can be used as src1, commuting will make the operands legal. 2088 // Otherwise we have to give up and insert a move. 2089 // 2090 // TODO: Other immediate-like operand kinds could be commuted if there was a 2091 // MachineOperand::ChangeTo* for them. 2092 if ((!Src1.isImm() && !Src1.isReg()) || 2093 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { 2094 legalizeOpWithMove(MI, Src1Idx); 2095 return; 2096 } 2097 2098 int CommutedOpc = commuteOpcode(MI); 2099 if (CommutedOpc == -1) { 2100 legalizeOpWithMove(MI, Src1Idx); 2101 return; 2102 } 2103 2104 MI.setDesc(get(CommutedOpc)); 2105 2106 unsigned Src0Reg = Src0.getReg(); 2107 unsigned Src0SubReg = Src0.getSubReg(); 2108 bool Src0Kill = Src0.isKill(); 2109 2110 if (Src1.isImm()) 2111 Src0.ChangeToImmediate(Src1.getImm()); 2112 else if (Src1.isReg()) { 2113 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 2114 Src0.setSubReg(Src1.getSubReg()); 2115 } else 2116 llvm_unreachable("Should only have register or immediate operands"); 2117 2118 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 2119 Src1.setSubReg(Src0SubReg); 2120 } 2121 2122 // Legalize VOP3 operands. Because all operand types are supported for any 2123 // operand, and since literal constants are not allowed and should never be 2124 // seen, we only need to worry about inserting copies if we use multiple SGPR 2125 // operands. 2126 void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, 2127 MachineInstr &MI) const { 2128 unsigned Opc = MI.getOpcode(); 2129 2130 int VOP3Idx[3] = { 2131 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 2132 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 2133 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 2134 }; 2135 2136 // Find the one SGPR operand we are allowed to use. 2137 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 2138 2139 for (unsigned i = 0; i < 3; ++i) { 2140 int Idx = VOP3Idx[i]; 2141 if (Idx == -1) 2142 break; 2143 MachineOperand &MO = MI.getOperand(Idx); 2144 2145 // We should never see a VOP3 instruction with an illegal immediate operand. 2146 if (!MO.isReg()) 2147 continue; 2148 2149 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2150 continue; // VGPRs are legal 2151 2152 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 2153 SGPRReg = MO.getReg(); 2154 // We can use one SGPR in each VOP3 instruction. 2155 continue; 2156 } 2157 2158 // If we make it this far, then the operand is not legal and we must 2159 // legalize it. 2160 legalizeOpWithMove(MI, Idx); 2161 } 2162 } 2163 2164 unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 2165 MachineRegisterInfo &MRI) const { 2166 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 2167 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 2168 unsigned DstReg = MRI.createVirtualRegister(SRC); 2169 unsigned SubRegs = VRC->getSize() / 4; 2170 2171 SmallVector<unsigned, 8> SRegs; 2172 for (unsigned i = 0; i < SubRegs; ++i) { 2173 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2174 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 2175 get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 2176 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 2177 SRegs.push_back(SGPR); 2178 } 2179 2180 MachineInstrBuilder MIB = 2181 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 2182 get(AMDGPU::REG_SEQUENCE), DstReg); 2183 for (unsigned i = 0; i < SubRegs; ++i) { 2184 MIB.addReg(SRegs[i]); 2185 MIB.addImm(RI.getSubRegFromChannel(i)); 2186 } 2187 return DstReg; 2188 } 2189 2190 void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 2191 MachineInstr &MI) const { 2192 2193 // If the pointer is store in VGPRs, then we need to move them to 2194 // SGPRs using v_readfirstlane. This is safe because we only select 2195 // loads with uniform pointers to SMRD instruction so we know the 2196 // pointer value is uniform. 2197 MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); 2198 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 2199 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 2200 SBase->setReg(SGPR); 2201 } 2202 } 2203 2204 void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { 2205 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2206 2207 // Legalize VOP2 2208 if (isVOP2(MI) || isVOPC(MI)) { 2209 legalizeOperandsVOP2(MRI, MI); 2210 return; 2211 } 2212 2213 // Legalize VOP3 2214 if (isVOP3(MI)) { 2215 legalizeOperandsVOP3(MRI, MI); 2216 return; 2217 } 2218 2219 // Legalize SMRD 2220 if (isSMRD(MI)) { 2221 legalizeOperandsSMRD(MRI, MI); 2222 return; 2223 } 2224 2225 // Legalize REG_SEQUENCE and PHI 2226 // The register class of the operands much be the same type as the register 2227 // class of the output. 2228 if (MI.getOpcode() == AMDGPU::PHI) { 2229 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 2230 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 2231 if (!MI.getOperand(i).isReg() || 2232 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 2233 continue; 2234 const TargetRegisterClass *OpRC = 2235 MRI.getRegClass(MI.getOperand(i).getReg()); 2236 if (RI.hasVGPRs(OpRC)) { 2237 VRC = OpRC; 2238 } else { 2239 SRC = OpRC; 2240 } 2241 } 2242 2243 // If any of the operands are VGPR registers, then they all most be 2244 // otherwise we will create illegal VGPR->SGPR copies when legalizing 2245 // them. 2246 if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { 2247 if (!VRC) { 2248 assert(SRC); 2249 VRC = RI.getEquivalentVGPRClass(SRC); 2250 } 2251 RC = VRC; 2252 } else { 2253 RC = SRC; 2254 } 2255 2256 // Update all the operands so they have the same type. 2257 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2258 MachineOperand &Op = MI.getOperand(I); 2259 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2260 continue; 2261 unsigned DstReg = MRI.createVirtualRegister(RC); 2262 2263 // MI is a PHI instruction. 2264 MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); 2265 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 2266 2267 BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) 2268 .addOperand(Op); 2269 Op.setReg(DstReg); 2270 } 2271 } 2272 2273 // REG_SEQUENCE doesn't really require operand legalization, but if one has a 2274 // VGPR dest type and SGPR sources, insert copies so all operands are 2275 // VGPRs. This seems to help operand folding / the register coalescer. 2276 if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { 2277 MachineBasicBlock *MBB = MI.getParent(); 2278 const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); 2279 if (RI.hasVGPRs(DstRC)) { 2280 // Update all the operands so they are VGPR register classes. These may 2281 // not be the same register class because REG_SEQUENCE supports mixing 2282 // subregister index types e.g. sub0_sub1 + sub2 + sub3 2283 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2284 MachineOperand &Op = MI.getOperand(I); 2285 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2286 continue; 2287 2288 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 2289 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 2290 if (VRC == OpRC) 2291 continue; 2292 2293 unsigned DstReg = MRI.createVirtualRegister(VRC); 2294 2295 BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) 2296 .addOperand(Op); 2297 2298 Op.setReg(DstReg); 2299 Op.setIsKill(); 2300 } 2301 } 2302 2303 return; 2304 } 2305 2306 // Legalize INSERT_SUBREG 2307 // src0 must have the same register class as dst 2308 if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { 2309 unsigned Dst = MI.getOperand(0).getReg(); 2310 unsigned Src0 = MI.getOperand(1).getReg(); 2311 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 2312 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 2313 if (DstRC != Src0RC) { 2314 MachineBasicBlock &MBB = *MI.getParent(); 2315 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 2316 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 2317 .addReg(Src0); 2318 MI.getOperand(1).setReg(NewSrc0); 2319 } 2320 return; 2321 } 2322 2323 // Legalize MIMG 2324 if (isMIMG(MI)) { 2325 MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); 2326 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { 2327 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); 2328 SRsrc->setReg(SGPR); 2329 } 2330 2331 MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); 2332 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { 2333 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); 2334 SSamp->setReg(SGPR); 2335 } 2336 return; 2337 } 2338 2339 // Legalize MUBUF* instructions 2340 // FIXME: If we start using the non-addr64 instructions for compute, we 2341 // may need to legalize them here. 2342 int SRsrcIdx = 2343 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 2344 if (SRsrcIdx != -1) { 2345 // We have an MUBUF instruction 2346 MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx); 2347 unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass; 2348 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 2349 RI.getRegClass(SRsrcRC))) { 2350 // The operands are legal. 2351 // FIXME: We may need to legalize operands besided srsrc. 2352 return; 2353 } 2354 2355 MachineBasicBlock &MBB = *MI.getParent(); 2356 2357 // Extract the ptr from the resource descriptor. 2358 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, 2359 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 2360 2361 // Create an empty resource descriptor 2362 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 2363 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2364 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2365 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2366 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2367 2368 // Zero64 = 0 2369 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64) 2370 .addImm(0); 2371 2372 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 2373 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo) 2374 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2375 2376 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 2377 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi) 2378 .addImm(RsrcDataFormat >> 32); 2379 2380 // NewSRsrc = {Zero64, SRsrcFormat} 2381 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) 2382 .addReg(Zero64) 2383 .addImm(AMDGPU::sub0_sub1) 2384 .addReg(SRsrcFormatLo) 2385 .addImm(AMDGPU::sub2) 2386 .addReg(SRsrcFormatHi) 2387 .addImm(AMDGPU::sub3); 2388 2389 MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 2390 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2391 if (VAddr) { 2392 // This is already an ADDR64 instruction so we need to add the pointer 2393 // extracted from the resource descriptor to the current value of VAddr. 2394 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2395 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2396 2397 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 2398 DebugLoc DL = MI.getDebugLoc(); 2399 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) 2400 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2401 .addReg(VAddr->getReg(), 0, AMDGPU::sub0); 2402 2403 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 2404 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) 2405 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2406 .addReg(VAddr->getReg(), 0, AMDGPU::sub1); 2407 2408 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2409 BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2410 .addReg(NewVAddrLo) 2411 .addImm(AMDGPU::sub0) 2412 .addReg(NewVAddrHi) 2413 .addImm(AMDGPU::sub1); 2414 } else { 2415 // This instructions is the _OFFSET variant, so we need to convert it to 2416 // ADDR64. 2417 assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration() 2418 < SISubtarget::VOLCANIC_ISLANDS && 2419 "FIXME: Need to emit flat atomics here"); 2420 2421 MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); 2422 MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 2423 MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 2424 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); 2425 2426 // Atomics rith return have have an additional tied operand and are 2427 // missing some of the special bits. 2428 MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); 2429 MachineInstr *Addr64; 2430 2431 if (!VDataIn) { 2432 // Regular buffer load / store. 2433 MachineInstrBuilder MIB = 2434 BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 2435 .addOperand(*VData) 2436 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2437 // This will be replaced later 2438 // with the new value of vaddr. 2439 .addOperand(*SRsrc) 2440 .addOperand(*SOffset) 2441 .addOperand(*Offset); 2442 2443 // Atomics do not have this operand. 2444 if (const MachineOperand *GLC = 2445 getNamedOperand(MI, AMDGPU::OpName::glc)) { 2446 MIB.addImm(GLC->getImm()); 2447 } 2448 2449 MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); 2450 2451 if (const MachineOperand *TFE = 2452 getNamedOperand(MI, AMDGPU::OpName::tfe)) { 2453 MIB.addImm(TFE->getImm()); 2454 } 2455 2456 MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 2457 Addr64 = MIB; 2458 } else { 2459 // Atomics with return. 2460 Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 2461 .addOperand(*VData) 2462 .addOperand(*VDataIn) 2463 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2464 // This will be replaced later 2465 // with the new value of vaddr. 2466 .addOperand(*SRsrc) 2467 .addOperand(*SOffset) 2468 .addOperand(*Offset) 2469 .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)) 2470 .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 2471 } 2472 2473 MI.removeFromParent(); 2474 2475 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2476 BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 2477 NewVAddr) 2478 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2479 .addImm(AMDGPU::sub0) 2480 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2481 .addImm(AMDGPU::sub1); 2482 2483 VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr); 2484 SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc); 2485 } 2486 2487 // Update the instruction to use NewVaddr 2488 VAddr->setReg(NewVAddr); 2489 // Update the instruction to use NewSRsrc 2490 SRsrc->setReg(NewSRsrc); 2491 } 2492 } 2493 2494 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2495 SmallVector<MachineInstr *, 128> Worklist; 2496 Worklist.push_back(&TopInst); 2497 2498 while (!Worklist.empty()) { 2499 MachineInstr &Inst = *Worklist.pop_back_val(); 2500 MachineBasicBlock *MBB = Inst.getParent(); 2501 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2502 2503 unsigned Opcode = Inst.getOpcode(); 2504 unsigned NewOpcode = getVALUOp(Inst); 2505 2506 // Handle some special cases 2507 switch (Opcode) { 2508 default: 2509 break; 2510 case AMDGPU::S_AND_B64: 2511 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); 2512 Inst.eraseFromParent(); 2513 continue; 2514 2515 case AMDGPU::S_OR_B64: 2516 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); 2517 Inst.eraseFromParent(); 2518 continue; 2519 2520 case AMDGPU::S_XOR_B64: 2521 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); 2522 Inst.eraseFromParent(); 2523 continue; 2524 2525 case AMDGPU::S_NOT_B64: 2526 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); 2527 Inst.eraseFromParent(); 2528 continue; 2529 2530 case AMDGPU::S_BCNT1_I32_B64: 2531 splitScalar64BitBCNT(Worklist, Inst); 2532 Inst.eraseFromParent(); 2533 continue; 2534 2535 case AMDGPU::S_BFE_I64: { 2536 splitScalar64BitBFE(Worklist, Inst); 2537 Inst.eraseFromParent(); 2538 continue; 2539 } 2540 2541 case AMDGPU::S_LSHL_B32: 2542 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2543 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2544 swapOperands(Inst); 2545 } 2546 break; 2547 case AMDGPU::S_ASHR_I32: 2548 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2549 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2550 swapOperands(Inst); 2551 } 2552 break; 2553 case AMDGPU::S_LSHR_B32: 2554 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2555 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2556 swapOperands(Inst); 2557 } 2558 break; 2559 case AMDGPU::S_LSHL_B64: 2560 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2561 NewOpcode = AMDGPU::V_LSHLREV_B64; 2562 swapOperands(Inst); 2563 } 2564 break; 2565 case AMDGPU::S_ASHR_I64: 2566 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2567 NewOpcode = AMDGPU::V_ASHRREV_I64; 2568 swapOperands(Inst); 2569 } 2570 break; 2571 case AMDGPU::S_LSHR_B64: 2572 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2573 NewOpcode = AMDGPU::V_LSHRREV_B64; 2574 swapOperands(Inst); 2575 } 2576 break; 2577 2578 case AMDGPU::S_ABS_I32: 2579 lowerScalarAbs(Worklist, Inst); 2580 Inst.eraseFromParent(); 2581 continue; 2582 2583 case AMDGPU::S_CBRANCH_SCC0: 2584 case AMDGPU::S_CBRANCH_SCC1: 2585 // Clear unused bits of vcc 2586 BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64), 2587 AMDGPU::VCC) 2588 .addReg(AMDGPU::EXEC) 2589 .addReg(AMDGPU::VCC); 2590 break; 2591 2592 case AMDGPU::S_BFE_U64: 2593 case AMDGPU::S_BFM_B64: 2594 llvm_unreachable("Moving this op to VALU not implemented"); 2595 } 2596 2597 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2598 // We cannot move this instruction to the VALU, so we should try to 2599 // legalize its operands instead. 2600 legalizeOperands(Inst); 2601 continue; 2602 } 2603 2604 // Use the new VALU Opcode. 2605 const MCInstrDesc &NewDesc = get(NewOpcode); 2606 Inst.setDesc(NewDesc); 2607 2608 // Remove any references to SCC. Vector instructions can't read from it, and 2609 // We're just about to add the implicit use / defs of VCC, and we don't want 2610 // both. 2611 for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { 2612 MachineOperand &Op = Inst.getOperand(i); 2613 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { 2614 Inst.RemoveOperand(i); 2615 addSCCDefUsersToVALUWorklist(Inst, Worklist); 2616 } 2617 } 2618 2619 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2620 // We are converting these to a BFE, so we need to add the missing 2621 // operands for the size and offset. 2622 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2623 Inst.addOperand(MachineOperand::CreateImm(0)); 2624 Inst.addOperand(MachineOperand::CreateImm(Size)); 2625 2626 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2627 // The VALU version adds the second operand to the result, so insert an 2628 // extra 0 operand. 2629 Inst.addOperand(MachineOperand::CreateImm(0)); 2630 } 2631 2632 Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); 2633 2634 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2635 const MachineOperand &OffsetWidthOp = Inst.getOperand(2); 2636 // If we need to move this to VGPRs, we need to unpack the second operand 2637 // back into the 2 separate ones for bit offset and width. 2638 assert(OffsetWidthOp.isImm() && 2639 "Scalar BFE is only implemented for constant width and offset"); 2640 uint32_t Imm = OffsetWidthOp.getImm(); 2641 2642 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2643 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2644 Inst.RemoveOperand(2); // Remove old immediate. 2645 Inst.addOperand(MachineOperand::CreateImm(Offset)); 2646 Inst.addOperand(MachineOperand::CreateImm(BitWidth)); 2647 } 2648 2649 bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); 2650 unsigned NewDstReg = AMDGPU::NoRegister; 2651 if (HasDst) { 2652 // Update the destination register class. 2653 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); 2654 if (!NewDstRC) 2655 continue; 2656 2657 unsigned DstReg = Inst.getOperand(0).getReg(); 2658 NewDstReg = MRI.createVirtualRegister(NewDstRC); 2659 MRI.replaceRegWith(DstReg, NewDstReg); 2660 } 2661 2662 // Legalize the operands 2663 legalizeOperands(Inst); 2664 2665 if (HasDst) 2666 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 2667 } 2668 } 2669 2670 //===----------------------------------------------------------------------===// 2671 // Indirect addressing callbacks 2672 //===----------------------------------------------------------------------===// 2673 2674 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2675 return &AMDGPU::VGPR_32RegClass; 2676 } 2677 2678 void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, 2679 MachineInstr &Inst) const { 2680 MachineBasicBlock &MBB = *Inst.getParent(); 2681 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2682 MachineBasicBlock::iterator MII = Inst; 2683 DebugLoc DL = Inst.getDebugLoc(); 2684 2685 MachineOperand &Dest = Inst.getOperand(0); 2686 MachineOperand &Src = Inst.getOperand(1); 2687 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2688 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2689 2690 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) 2691 .addImm(0) 2692 .addReg(Src.getReg()); 2693 2694 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 2695 .addReg(Src.getReg()) 2696 .addReg(TmpReg); 2697 2698 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2699 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2700 } 2701 2702 void SIInstrInfo::splitScalar64BitUnaryOp( 2703 SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, 2704 unsigned Opcode) const { 2705 MachineBasicBlock &MBB = *Inst.getParent(); 2706 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2707 2708 MachineOperand &Dest = Inst.getOperand(0); 2709 MachineOperand &Src0 = Inst.getOperand(1); 2710 DebugLoc DL = Inst.getDebugLoc(); 2711 2712 MachineBasicBlock::iterator MII = Inst; 2713 2714 const MCInstrDesc &InstDesc = get(Opcode); 2715 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2716 MRI.getRegClass(Src0.getReg()) : 2717 &AMDGPU::SGPR_32RegClass; 2718 2719 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2720 2721 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2722 AMDGPU::sub0, Src0SubRC); 2723 2724 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2725 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2726 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2727 2728 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2729 BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2730 .addOperand(SrcReg0Sub0); 2731 2732 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2733 AMDGPU::sub1, Src0SubRC); 2734 2735 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2736 BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2737 .addOperand(SrcReg0Sub1); 2738 2739 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2740 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2741 .addReg(DestSub0) 2742 .addImm(AMDGPU::sub0) 2743 .addReg(DestSub1) 2744 .addImm(AMDGPU::sub1); 2745 2746 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2747 2748 // We don't need to legalizeOperands here because for a single operand, src0 2749 // will support any kind of input. 2750 2751 // Move all users of this moved value. 2752 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2753 } 2754 2755 void SIInstrInfo::splitScalar64BitBinaryOp( 2756 SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, 2757 unsigned Opcode) const { 2758 MachineBasicBlock &MBB = *Inst.getParent(); 2759 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2760 2761 MachineOperand &Dest = Inst.getOperand(0); 2762 MachineOperand &Src0 = Inst.getOperand(1); 2763 MachineOperand &Src1 = Inst.getOperand(2); 2764 DebugLoc DL = Inst.getDebugLoc(); 2765 2766 MachineBasicBlock::iterator MII = Inst; 2767 2768 const MCInstrDesc &InstDesc = get(Opcode); 2769 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2770 MRI.getRegClass(Src0.getReg()) : 2771 &AMDGPU::SGPR_32RegClass; 2772 2773 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2774 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2775 MRI.getRegClass(Src1.getReg()) : 2776 &AMDGPU::SGPR_32RegClass; 2777 2778 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2779 2780 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2781 AMDGPU::sub0, Src0SubRC); 2782 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2783 AMDGPU::sub0, Src1SubRC); 2784 2785 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2786 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2787 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2788 2789 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2790 MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2791 .addOperand(SrcReg0Sub0) 2792 .addOperand(SrcReg1Sub0); 2793 2794 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2795 AMDGPU::sub1, Src0SubRC); 2796 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2797 AMDGPU::sub1, Src1SubRC); 2798 2799 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2800 MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2801 .addOperand(SrcReg0Sub1) 2802 .addOperand(SrcReg1Sub1); 2803 2804 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2805 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2806 .addReg(DestSub0) 2807 .addImm(AMDGPU::sub0) 2808 .addReg(DestSub1) 2809 .addImm(AMDGPU::sub1); 2810 2811 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2812 2813 // Try to legalize the operands in case we need to swap the order to keep it 2814 // valid. 2815 legalizeOperands(LoHalf); 2816 legalizeOperands(HiHalf); 2817 2818 // Move all users of this moved vlaue. 2819 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2820 } 2821 2822 void SIInstrInfo::splitScalar64BitBCNT( 2823 SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const { 2824 MachineBasicBlock &MBB = *Inst.getParent(); 2825 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2826 2827 MachineBasicBlock::iterator MII = Inst; 2828 DebugLoc DL = Inst.getDebugLoc(); 2829 2830 MachineOperand &Dest = Inst.getOperand(0); 2831 MachineOperand &Src = Inst.getOperand(1); 2832 2833 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2834 const TargetRegisterClass *SrcRC = Src.isReg() ? 2835 MRI.getRegClass(Src.getReg()) : 2836 &AMDGPU::SGPR_32RegClass; 2837 2838 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2839 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2840 2841 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2842 2843 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2844 AMDGPU::sub0, SrcSubRC); 2845 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2846 AMDGPU::sub1, SrcSubRC); 2847 2848 BuildMI(MBB, MII, DL, InstDesc, MidReg) 2849 .addOperand(SrcRegSub0) 2850 .addImm(0); 2851 2852 BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2853 .addOperand(SrcRegSub1) 2854 .addReg(MidReg); 2855 2856 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2857 2858 // We don't need to legalize operands here. src0 for etiher instruction can be 2859 // an SGPR, and the second input is unused or determined here. 2860 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2861 } 2862 2863 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2864 MachineInstr &Inst) const { 2865 MachineBasicBlock &MBB = *Inst.getParent(); 2866 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2867 MachineBasicBlock::iterator MII = Inst; 2868 DebugLoc DL = Inst.getDebugLoc(); 2869 2870 MachineOperand &Dest = Inst.getOperand(0); 2871 uint32_t Imm = Inst.getOperand(2).getImm(); 2872 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2873 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2874 2875 (void) Offset; 2876 2877 // Only sext_inreg cases handled. 2878 assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && 2879 Offset == 0 && "Not implemented"); 2880 2881 if (BitWidth < 32) { 2882 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2883 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2884 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2885 2886 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2887 .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) 2888 .addImm(0) 2889 .addImm(BitWidth); 2890 2891 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2892 .addImm(31) 2893 .addReg(MidRegLo); 2894 2895 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2896 .addReg(MidRegLo) 2897 .addImm(AMDGPU::sub0) 2898 .addReg(MidRegHi) 2899 .addImm(AMDGPU::sub1); 2900 2901 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2902 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2903 return; 2904 } 2905 2906 MachineOperand &Src = Inst.getOperand(1); 2907 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2908 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2909 2910 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2911 .addImm(31) 2912 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2913 2914 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2915 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2916 .addImm(AMDGPU::sub0) 2917 .addReg(TmpReg) 2918 .addImm(AMDGPU::sub1); 2919 2920 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2921 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2922 } 2923 2924 void SIInstrInfo::addUsersToMoveToVALUWorklist( 2925 unsigned DstReg, 2926 MachineRegisterInfo &MRI, 2927 SmallVectorImpl<MachineInstr *> &Worklist) const { 2928 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 2929 E = MRI.use_end(); I != E; ++I) { 2930 MachineInstr &UseMI = *I->getParent(); 2931 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2932 Worklist.push_back(&UseMI); 2933 } 2934 } 2935 } 2936 2937 void SIInstrInfo::addSCCDefUsersToVALUWorklist( 2938 MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const { 2939 // This assumes that all the users of SCC are in the same block 2940 // as the SCC def. 2941 for (MachineBasicBlock::iterator I = SCCDefInst, 2942 E = SCCDefInst.getParent()->end(); 2943 I != E; ++I) { 2944 // Exit if we find another SCC def. 2945 if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1) 2946 return; 2947 2948 if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1) 2949 Worklist.push_back(I); 2950 } 2951 } 2952 2953 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 2954 const MachineInstr &Inst) const { 2955 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 2956 2957 switch (Inst.getOpcode()) { 2958 // For target instructions, getOpRegClass just returns the virtual register 2959 // class associated with the operand, so we need to find an equivalent VGPR 2960 // register class in order to move the instruction to the VALU. 2961 case AMDGPU::COPY: 2962 case AMDGPU::PHI: 2963 case AMDGPU::REG_SEQUENCE: 2964 case AMDGPU::INSERT_SUBREG: 2965 if (RI.hasVGPRs(NewDstRC)) 2966 return nullptr; 2967 2968 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2969 if (!NewDstRC) 2970 return nullptr; 2971 return NewDstRC; 2972 default: 2973 return NewDstRC; 2974 } 2975 } 2976 2977 // Find the one SGPR operand we are allowed to use. 2978 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, 2979 int OpIndices[3]) const { 2980 const MCInstrDesc &Desc = MI.getDesc(); 2981 2982 // Find the one SGPR operand we are allowed to use. 2983 // 2984 // First we need to consider the instruction's operand requirements before 2985 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2986 // of VCC, but we are still bound by the constant bus requirement to only use 2987 // one. 2988 // 2989 // If the operand's class is an SGPR, we can never move it. 2990 2991 unsigned SGPRReg = findImplicitSGPRRead(MI); 2992 if (SGPRReg != AMDGPU::NoRegister) 2993 return SGPRReg; 2994 2995 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2996 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2997 2998 for (unsigned i = 0; i < 3; ++i) { 2999 int Idx = OpIndices[i]; 3000 if (Idx == -1) 3001 break; 3002 3003 const MachineOperand &MO = MI.getOperand(Idx); 3004 if (!MO.isReg()) 3005 continue; 3006 3007 // Is this operand statically required to be an SGPR based on the operand 3008 // constraints? 3009 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); 3010 bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 3011 if (IsRequiredSGPR) 3012 return MO.getReg(); 3013 3014 // If this could be a VGPR or an SGPR, Check the dynamic register class. 3015 unsigned Reg = MO.getReg(); 3016 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 3017 if (RI.isSGPRClass(RegRC)) 3018 UsedSGPRs[i] = Reg; 3019 } 3020 3021 // We don't have a required SGPR operand, so we have a bit more freedom in 3022 // selecting operands to move. 3023 3024 // Try to select the most used SGPR. If an SGPR is equal to one of the 3025 // others, we choose that. 3026 // 3027 // e.g. 3028 // V_FMA_F32 v0, s0, s0, s0 -> No moves 3029 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 3030 3031 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 3032 // prefer those. 3033 3034 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 3035 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 3036 SGPRReg = UsedSGPRs[0]; 3037 } 3038 3039 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 3040 if (UsedSGPRs[1] == UsedSGPRs[2]) 3041 SGPRReg = UsedSGPRs[1]; 3042 } 3043 3044 return SGPRReg; 3045 } 3046 3047 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 3048 unsigned OperandName) const { 3049 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 3050 if (Idx == -1) 3051 return nullptr; 3052 3053 return &MI.getOperand(Idx); 3054 } 3055 3056 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 3057 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 3058 if (ST.isAmdHsaOS()) { 3059 RsrcDataFormat |= (1ULL << 56); 3060 3061 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) 3062 // Set MTYPE = 2 3063 RsrcDataFormat |= (2ULL << 59); 3064 } 3065 3066 return RsrcDataFormat; 3067 } 3068 3069 uint64_t SIInstrInfo::getScratchRsrcWords23() const { 3070 uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 3071 AMDGPU::RSRC_TID_ENABLE | 3072 0xffffffff; // Size; 3073 3074 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; 3075 3076 Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) | 3077 // IndexStride = 64 3078 (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT); 3079 3080 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 3081 // Clear them unless we want a huge stride. 3082 if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) 3083 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 3084 3085 return Rsrc23; 3086 } 3087 3088 bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { 3089 unsigned Opc = MI.getOpcode(); 3090 3091 return isSMRD(Opc); 3092 } 3093 3094 bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const { 3095 unsigned Opc = MI.getOpcode(); 3096 3097 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); 3098 } 3099 3100 unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 3101 unsigned Opc = MI.getOpcode(); 3102 const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); 3103 unsigned DescSize = Desc.getSize(); 3104 3105 // If we have a definitive size, we can use it. Otherwise we need to inspect 3106 // the operands to know the size. 3107 if (DescSize == 8 || DescSize == 4) 3108 return DescSize; 3109 3110 assert(DescSize == 0); 3111 3112 // 4-byte instructions may have a 32-bit literal encoded after them. Check 3113 // operands that coud ever be literals. 3114 if (isVALU(MI) || isSALU(MI)) { 3115 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3116 if (Src0Idx == -1) 3117 return 4; // No operands. 3118 3119 if (isLiteralConstant(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx))) 3120 return 8; 3121 3122 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 3123 if (Src1Idx == -1) 3124 return 4; 3125 3126 if (isLiteralConstant(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx))) 3127 return 8; 3128 3129 return 4; 3130 } 3131 3132 switch (Opc) { 3133 case TargetOpcode::IMPLICIT_DEF: 3134 case TargetOpcode::KILL: 3135 case TargetOpcode::DBG_VALUE: 3136 case TargetOpcode::BUNDLE: 3137 case TargetOpcode::EH_LABEL: 3138 return 0; 3139 case TargetOpcode::INLINEASM: { 3140 const MachineFunction *MF = MI.getParent()->getParent(); 3141 const char *AsmStr = MI.getOperand(0).getSymbolName(); 3142 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); 3143 } 3144 default: 3145 llvm_unreachable("unable to find instruction size"); 3146 } 3147 } 3148 3149 ArrayRef<std::pair<int, const char *>> 3150 SIInstrInfo::getSerializableTargetIndices() const { 3151 static const std::pair<int, const char *> TargetIndices[] = { 3152 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 3153 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 3154 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 3155 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 3156 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 3157 return makeArrayRef(TargetIndices); 3158 } 3159 3160 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 3161 /// post-RA version of misched uses CreateTargetMIHazardRecognizer. 3162 ScheduleHazardRecognizer * 3163 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 3164 const ScheduleDAG *DAG) const { 3165 return new GCNHazardRecognizer(DAG->MF); 3166 } 3167 3168 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 3169 /// pass. 3170 ScheduleHazardRecognizer * 3171 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 3172 return new GCNHazardRecognizer(MF); 3173 } 3174