1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIInstrInfo.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "GCNHazardRecognizer.h" 19 #include "SIDefines.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/ScheduleDAG.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/MC/MCInstrDesc.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 33 : AMDGPUInstrInfo(st), RI() {} 34 35 //===----------------------------------------------------------------------===// 36 // TargetInstrInfo callbacks 37 //===----------------------------------------------------------------------===// 38 39 static unsigned getNumOperandsNoGlue(SDNode *Node) { 40 unsigned N = Node->getNumOperands(); 41 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 42 --N; 43 return N; 44 } 45 46 static SDValue findChainOperand(SDNode *Load) { 47 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 48 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 49 return LastOp; 50 } 51 52 /// \brief Returns true if both nodes have the same value for the given 53 /// operand \p Op, or if both nodes do not have this operand. 54 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 55 unsigned Opc0 = N0->getMachineOpcode(); 56 unsigned Opc1 = N1->getMachineOpcode(); 57 58 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 59 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 60 61 if (Op0Idx == -1 && Op1Idx == -1) 62 return true; 63 64 65 if ((Op0Idx == -1 && Op1Idx != -1) || 66 (Op1Idx == -1 && Op0Idx != -1)) 67 return false; 68 69 // getNamedOperandIdx returns the index for the MachineInstr's operands, 70 // which includes the result as the first operand. We are indexing into the 71 // MachineSDNode's operands, so we need to skip the result operand to get 72 // the real index. 73 --Op0Idx; 74 --Op1Idx; 75 76 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 77 } 78 79 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 80 AliasAnalysis *AA) const { 81 // TODO: The generic check fails for VALU instructions that should be 82 // rematerializable due to implicit reads of exec. We really want all of the 83 // generic logic for this except for this. 84 switch (MI->getOpcode()) { 85 case AMDGPU::V_MOV_B32_e32: 86 case AMDGPU::V_MOV_B32_e64: 87 case AMDGPU::V_MOV_B64_PSEUDO: 88 return true; 89 default: 90 return false; 91 } 92 } 93 94 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 95 int64_t &Offset0, 96 int64_t &Offset1) const { 97 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 98 return false; 99 100 unsigned Opc0 = Load0->getMachineOpcode(); 101 unsigned Opc1 = Load1->getMachineOpcode(); 102 103 // Make sure both are actually loads. 104 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 105 return false; 106 107 if (isDS(Opc0) && isDS(Opc1)) { 108 109 // FIXME: Handle this case: 110 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 111 return false; 112 113 // Check base reg. 114 if (Load0->getOperand(1) != Load1->getOperand(1)) 115 return false; 116 117 // Check chain. 118 if (findChainOperand(Load0) != findChainOperand(Load1)) 119 return false; 120 121 // Skip read2 / write2 variants for simplicity. 122 // TODO: We should report true if the used offsets are adjacent (excluded 123 // st64 versions). 124 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 125 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 126 return false; 127 128 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 129 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 130 return true; 131 } 132 133 if (isSMRD(Opc0) && isSMRD(Opc1)) { 134 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 135 136 // Check base reg. 137 if (Load0->getOperand(0) != Load1->getOperand(0)) 138 return false; 139 140 const ConstantSDNode *Load0Offset = 141 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 142 const ConstantSDNode *Load1Offset = 143 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 144 145 if (!Load0Offset || !Load1Offset) 146 return false; 147 148 // Check chain. 149 if (findChainOperand(Load0) != findChainOperand(Load1)) 150 return false; 151 152 Offset0 = Load0Offset->getZExtValue(); 153 Offset1 = Load1Offset->getZExtValue(); 154 return true; 155 } 156 157 // MUBUF and MTBUF can access the same addresses. 158 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 159 160 // MUBUF and MTBUF have vaddr at different indices. 161 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 162 findChainOperand(Load0) != findChainOperand(Load1) || 163 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 164 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 165 return false; 166 167 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 168 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 169 170 if (OffIdx0 == -1 || OffIdx1 == -1) 171 return false; 172 173 // getNamedOperandIdx returns the index for MachineInstrs. Since they 174 // inlcude the output in the operand list, but SDNodes don't, we need to 175 // subtract the index by one. 176 --OffIdx0; 177 --OffIdx1; 178 179 SDValue Off0 = Load0->getOperand(OffIdx0); 180 SDValue Off1 = Load1->getOperand(OffIdx1); 181 182 // The offset might be a FrameIndexSDNode. 183 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 184 return false; 185 186 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 187 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 188 return true; 189 } 190 191 return false; 192 } 193 194 static bool isStride64(unsigned Opc) { 195 switch (Opc) { 196 case AMDGPU::DS_READ2ST64_B32: 197 case AMDGPU::DS_READ2ST64_B64: 198 case AMDGPU::DS_WRITE2ST64_B32: 199 case AMDGPU::DS_WRITE2ST64_B64: 200 return true; 201 default: 202 return false; 203 } 204 } 205 206 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 207 int64_t &Offset, 208 const TargetRegisterInfo *TRI) const { 209 unsigned Opc = LdSt->getOpcode(); 210 211 if (isDS(*LdSt)) { 212 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 213 AMDGPU::OpName::offset); 214 if (OffsetImm) { 215 // Normal, single offset LDS instruction. 216 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 217 AMDGPU::OpName::addr); 218 219 BaseReg = AddrReg->getReg(); 220 Offset = OffsetImm->getImm(); 221 return true; 222 } 223 224 // The 2 offset instructions use offset0 and offset1 instead. We can treat 225 // these as a load with a single offset if the 2 offsets are consecutive. We 226 // will use this for some partially aligned loads. 227 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, 228 AMDGPU::OpName::offset0); 229 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, 230 AMDGPU::OpName::offset1); 231 232 uint8_t Offset0 = Offset0Imm->getImm(); 233 uint8_t Offset1 = Offset1Imm->getImm(); 234 235 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 236 // Each of these offsets is in element sized units, so we need to convert 237 // to bytes of the individual reads. 238 239 unsigned EltSize; 240 if (LdSt->mayLoad()) 241 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; 242 else { 243 assert(LdSt->mayStore()); 244 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 245 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 246 } 247 248 if (isStride64(Opc)) 249 EltSize *= 64; 250 251 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 252 AMDGPU::OpName::addr); 253 BaseReg = AddrReg->getReg(); 254 Offset = EltSize * Offset0; 255 return true; 256 } 257 258 return false; 259 } 260 261 if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) { 262 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 263 return false; 264 265 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 266 AMDGPU::OpName::vaddr); 267 if (!AddrReg) 268 return false; 269 270 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 271 AMDGPU::OpName::offset); 272 BaseReg = AddrReg->getReg(); 273 Offset = OffsetImm->getImm(); 274 return true; 275 } 276 277 if (isSMRD(*LdSt)) { 278 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 279 AMDGPU::OpName::offset); 280 if (!OffsetImm) 281 return false; 282 283 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, 284 AMDGPU::OpName::sbase); 285 BaseReg = SBaseReg->getReg(); 286 Offset = OffsetImm->getImm(); 287 return true; 288 } 289 290 if (isFLAT(*LdSt)) { 291 const MachineOperand *AddrReg = getNamedOperand(*LdSt, AMDGPU::OpName::addr); 292 BaseReg = AddrReg->getReg(); 293 Offset = 0; 294 return true; 295 } 296 297 return false; 298 } 299 300 bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt, 301 MachineInstr *SecondLdSt, 302 unsigned NumLoads) const { 303 const MachineOperand *FirstDst = nullptr; 304 const MachineOperand *SecondDst = nullptr; 305 306 if (isDS(*FirstLdSt) && isDS(*SecondLdSt)) { 307 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst); 308 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst); 309 } 310 311 if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt)) { 312 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst); 313 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst); 314 } 315 316 if ((isMUBUF(*FirstLdSt) && isMUBUF(*SecondLdSt)) || 317 (isMTBUF(*FirstLdSt) && isMTBUF(*SecondLdSt))) { 318 FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata); 319 SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata); 320 } 321 322 if (!FirstDst || !SecondDst) 323 return false; 324 325 // Try to limit clustering based on the total number of bytes loaded 326 // rather than the number of instructions. This is done to help reduce 327 // register pressure. The method used is somewhat inexact, though, 328 // because it assumes that all loads in the cluster will load the 329 // same number of bytes as FirstLdSt. 330 331 // The unit of this value is bytes. 332 // FIXME: This needs finer tuning. 333 unsigned LoadClusterThreshold = 16; 334 335 const MachineRegisterInfo &MRI = 336 FirstLdSt->getParent()->getParent()->getRegInfo(); 337 const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); 338 339 return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; 340 } 341 342 void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 343 MachineBasicBlock::iterator MI, 344 const DebugLoc &DL, unsigned DestReg, 345 unsigned SrcReg, bool KillSrc) const { 346 347 // If we are trying to copy to or from SCC, there is a bug somewhere else in 348 // the backend. While it may be theoretically possible to do this, it should 349 // never be necessary. 350 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 351 352 static const int16_t Sub0_15[] = { 353 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 354 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 355 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 356 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 357 }; 358 359 static const int16_t Sub0_15_64[] = { 360 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 361 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 362 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 363 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 364 }; 365 366 static const int16_t Sub0_7[] = { 367 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 368 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 369 }; 370 371 static const int16_t Sub0_7_64[] = { 372 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 373 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 374 }; 375 376 static const int16_t Sub0_3[] = { 377 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 378 }; 379 380 static const int16_t Sub0_3_64[] = { 381 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 382 }; 383 384 static const int16_t Sub0_2[] = { 385 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 386 }; 387 388 static const int16_t Sub0_1[] = { 389 AMDGPU::sub0, AMDGPU::sub1, 390 }; 391 392 unsigned Opcode; 393 ArrayRef<int16_t> SubIndices; 394 bool Forward; 395 396 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 397 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 398 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 399 .addReg(SrcReg, getKillRegState(KillSrc)); 400 return; 401 402 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 403 if (DestReg == AMDGPU::VCC) { 404 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 405 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 406 .addReg(SrcReg, getKillRegState(KillSrc)); 407 } else { 408 // FIXME: Hack until VReg_1 removed. 409 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 410 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) 411 .addImm(0) 412 .addReg(SrcReg, getKillRegState(KillSrc)); 413 } 414 415 return; 416 } 417 418 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 419 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 420 .addReg(SrcReg, getKillRegState(KillSrc)); 421 return; 422 423 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 424 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 425 Opcode = AMDGPU::S_MOV_B64; 426 SubIndices = Sub0_3_64; 427 428 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 429 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 430 Opcode = AMDGPU::S_MOV_B64; 431 SubIndices = Sub0_7_64; 432 433 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 434 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 435 Opcode = AMDGPU::S_MOV_B64; 436 SubIndices = Sub0_15_64; 437 438 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 439 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 440 AMDGPU::SReg_32RegClass.contains(SrcReg)); 441 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 442 .addReg(SrcReg, getKillRegState(KillSrc)); 443 return; 444 445 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 446 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 447 AMDGPU::SReg_64RegClass.contains(SrcReg)); 448 Opcode = AMDGPU::V_MOV_B32_e32; 449 SubIndices = Sub0_1; 450 451 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 452 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 453 Opcode = AMDGPU::V_MOV_B32_e32; 454 SubIndices = Sub0_2; 455 456 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 457 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 458 AMDGPU::SReg_128RegClass.contains(SrcReg)); 459 Opcode = AMDGPU::V_MOV_B32_e32; 460 SubIndices = Sub0_3; 461 462 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 463 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 464 AMDGPU::SReg_256RegClass.contains(SrcReg)); 465 Opcode = AMDGPU::V_MOV_B32_e32; 466 SubIndices = Sub0_7; 467 468 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 469 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 470 AMDGPU::SReg_512RegClass.contains(SrcReg)); 471 Opcode = AMDGPU::V_MOV_B32_e32; 472 SubIndices = Sub0_15; 473 474 } else { 475 llvm_unreachable("Can't copy register!"); 476 } 477 478 if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg)) 479 Forward = true; 480 else 481 Forward = false; 482 483 for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 484 unsigned SubIdx; 485 if (Forward) 486 SubIdx = SubIndices[Idx]; 487 else 488 SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 489 490 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 491 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 492 493 Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); 494 495 if (Idx == SubIndices.size() - 1) 496 Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); 497 498 if (Idx == 0) 499 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 500 } 501 } 502 503 int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 504 const unsigned Opcode = MI.getOpcode(); 505 506 int NewOpc; 507 508 // Try to map original to commuted opcode 509 NewOpc = AMDGPU::getCommuteRev(Opcode); 510 if (NewOpc != -1) 511 // Check if the commuted (REV) opcode exists on the target. 512 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 513 514 // Try to map commuted to original opcode 515 NewOpc = AMDGPU::getCommuteOrig(Opcode); 516 if (NewOpc != -1) 517 // Check if the original (non-REV) opcode exists on the target. 518 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 519 520 return Opcode; 521 } 522 523 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 524 525 if (DstRC->getSize() == 4) { 526 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 527 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 528 return AMDGPU::S_MOV_B64; 529 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 530 return AMDGPU::V_MOV_B64_PSEUDO; 531 } 532 return AMDGPU::COPY; 533 } 534 535 static unsigned getSGPRSpillSaveOpcode(unsigned Size) { 536 switch (Size) { 537 case 4: 538 return AMDGPU::SI_SPILL_S32_SAVE; 539 case 8: 540 return AMDGPU::SI_SPILL_S64_SAVE; 541 case 16: 542 return AMDGPU::SI_SPILL_S128_SAVE; 543 case 32: 544 return AMDGPU::SI_SPILL_S256_SAVE; 545 case 64: 546 return AMDGPU::SI_SPILL_S512_SAVE; 547 default: 548 llvm_unreachable("unknown register size"); 549 } 550 } 551 552 static unsigned getVGPRSpillSaveOpcode(unsigned Size) { 553 switch (Size) { 554 case 4: 555 return AMDGPU::SI_SPILL_V32_SAVE; 556 case 8: 557 return AMDGPU::SI_SPILL_V64_SAVE; 558 case 12: 559 return AMDGPU::SI_SPILL_V96_SAVE; 560 case 16: 561 return AMDGPU::SI_SPILL_V128_SAVE; 562 case 32: 563 return AMDGPU::SI_SPILL_V256_SAVE; 564 case 64: 565 return AMDGPU::SI_SPILL_V512_SAVE; 566 default: 567 llvm_unreachable("unknown register size"); 568 } 569 } 570 571 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 572 MachineBasicBlock::iterator MI, 573 unsigned SrcReg, bool isKill, 574 int FrameIndex, 575 const TargetRegisterClass *RC, 576 const TargetRegisterInfo *TRI) const { 577 MachineFunction *MF = MBB.getParent(); 578 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 579 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 580 DebugLoc DL = MBB.findDebugLoc(MI); 581 582 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 583 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 584 MachinePointerInfo PtrInfo 585 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 586 MachineMemOperand *MMO 587 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 588 Size, Align); 589 590 if (RI.isSGPRClass(RC)) { 591 MFI->setHasSpilledSGPRs(); 592 593 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { 594 // m0 may not be allowed for readlane. 595 MachineRegisterInfo &MRI = MF->getRegInfo(); 596 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); 597 } 598 599 // We are only allowed to create one new instruction when spilling 600 // registers, so we need to use pseudo instruction for spilling 601 // SGPRs. 602 unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); 603 BuildMI(MBB, MI, DL, get(Opcode)) 604 .addReg(SrcReg) // src 605 .addFrameIndex(FrameIndex) // frame_idx 606 .addMemOperand(MMO); 607 608 return; 609 } 610 611 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 612 LLVMContext &Ctx = MF->getFunction()->getContext(); 613 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 614 " spill register"); 615 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 616 .addReg(SrcReg); 617 618 return; 619 } 620 621 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 622 623 unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); 624 MFI->setHasSpilledVGPRs(); 625 BuildMI(MBB, MI, DL, get(Opcode)) 626 .addReg(SrcReg) // src 627 .addFrameIndex(FrameIndex) // frame_idx 628 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 629 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 630 .addImm(0) // offset 631 .addMemOperand(MMO); 632 } 633 634 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 635 switch (Size) { 636 case 4: 637 return AMDGPU::SI_SPILL_S32_RESTORE; 638 case 8: 639 return AMDGPU::SI_SPILL_S64_RESTORE; 640 case 16: 641 return AMDGPU::SI_SPILL_S128_RESTORE; 642 case 32: 643 return AMDGPU::SI_SPILL_S256_RESTORE; 644 case 64: 645 return AMDGPU::SI_SPILL_S512_RESTORE; 646 default: 647 llvm_unreachable("unknown register size"); 648 } 649 } 650 651 static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 652 switch (Size) { 653 case 4: 654 return AMDGPU::SI_SPILL_V32_RESTORE; 655 case 8: 656 return AMDGPU::SI_SPILL_V64_RESTORE; 657 case 12: 658 return AMDGPU::SI_SPILL_V96_RESTORE; 659 case 16: 660 return AMDGPU::SI_SPILL_V128_RESTORE; 661 case 32: 662 return AMDGPU::SI_SPILL_V256_RESTORE; 663 case 64: 664 return AMDGPU::SI_SPILL_V512_RESTORE; 665 default: 666 llvm_unreachable("unknown register size"); 667 } 668 } 669 670 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 671 MachineBasicBlock::iterator MI, 672 unsigned DestReg, int FrameIndex, 673 const TargetRegisterClass *RC, 674 const TargetRegisterInfo *TRI) const { 675 MachineFunction *MF = MBB.getParent(); 676 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 677 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 678 DebugLoc DL = MBB.findDebugLoc(MI); 679 unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 680 unsigned Size = FrameInfo->getObjectSize(FrameIndex); 681 682 MachinePointerInfo PtrInfo 683 = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 684 685 MachineMemOperand *MMO = MF->getMachineMemOperand( 686 PtrInfo, MachineMemOperand::MOLoad, Size, Align); 687 688 if (RI.isSGPRClass(RC)) { 689 // FIXME: Maybe this should not include a memoperand because it will be 690 // lowered to non-memory instructions. 691 unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); 692 693 if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { 694 // m0 may not be allowed for readlane. 695 MachineRegisterInfo &MRI = MF->getRegInfo(); 696 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); 697 } 698 699 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 700 .addFrameIndex(FrameIndex) // frame_idx 701 .addMemOperand(MMO); 702 703 return; 704 } 705 706 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 707 LLVMContext &Ctx = MF->getFunction()->getContext(); 708 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 709 " restore register"); 710 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 711 712 return; 713 } 714 715 assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 716 717 unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); 718 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 719 .addFrameIndex(FrameIndex) // frame_idx 720 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 721 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 722 .addImm(0) // offset 723 .addMemOperand(MMO); 724 } 725 726 /// \param @Offset Offset in bytes of the FrameIndex being spilled 727 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, 728 MachineBasicBlock::iterator MI, 729 RegScavenger *RS, unsigned TmpReg, 730 unsigned FrameOffset, 731 unsigned Size) const { 732 MachineFunction *MF = MBB.getParent(); 733 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 734 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); 735 const SIRegisterInfo *TRI = 736 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); 737 DebugLoc DL = MBB.findDebugLoc(MI); 738 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 739 unsigned WavefrontSize = ST.getWavefrontSize(); 740 741 unsigned TIDReg = MFI->getTIDReg(); 742 if (!MFI->hasCalculatedTID()) { 743 MachineBasicBlock &Entry = MBB.getParent()->front(); 744 MachineBasicBlock::iterator Insert = Entry.front(); 745 DebugLoc DL = Insert->getDebugLoc(); 746 747 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 748 if (TIDReg == AMDGPU::NoRegister) 749 return TIDReg; 750 751 752 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && 753 WorkGroupSize > WavefrontSize) { 754 755 unsigned TIDIGXReg 756 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X); 757 unsigned TIDIGYReg 758 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y); 759 unsigned TIDIGZReg 760 = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z); 761 unsigned InputPtrReg = 762 TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); 763 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 764 if (!Entry.isLiveIn(Reg)) 765 Entry.addLiveIn(Reg); 766 } 767 768 RS->enterBasicBlock(Entry); 769 // FIXME: Can we scavenge an SReg_64 and access the subregs? 770 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 771 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 772 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 773 .addReg(InputPtrReg) 774 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 775 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 776 .addReg(InputPtrReg) 777 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 778 779 // NGROUPS.X * NGROUPS.Y 780 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 781 .addReg(STmp1) 782 .addReg(STmp0); 783 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 784 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 785 .addReg(STmp1) 786 .addReg(TIDIGXReg); 787 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 788 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 789 .addReg(STmp0) 790 .addReg(TIDIGYReg) 791 .addReg(TIDReg); 792 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 793 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 794 .addReg(TIDReg) 795 .addReg(TIDIGZReg); 796 } else { 797 // Get the wave id 798 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 799 TIDReg) 800 .addImm(-1) 801 .addImm(0); 802 803 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 804 TIDReg) 805 .addImm(-1) 806 .addReg(TIDReg); 807 } 808 809 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 810 TIDReg) 811 .addImm(2) 812 .addReg(TIDReg); 813 MFI->setTIDReg(TIDReg); 814 } 815 816 // Add FrameIndex to LDS offset 817 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 818 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 819 .addImm(LDSOffset) 820 .addReg(TIDReg); 821 822 return TmpReg; 823 } 824 825 void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, 826 MachineBasicBlock::iterator MI, 827 int Count) const { 828 DebugLoc DL = MBB.findDebugLoc(MI); 829 while (Count > 0) { 830 int Arg; 831 if (Count >= 8) 832 Arg = 7; 833 else 834 Arg = Count - 1; 835 Count -= 8; 836 BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) 837 .addImm(Arg); 838 } 839 } 840 841 void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 842 MachineBasicBlock::iterator MI) const { 843 insertWaitStates(MBB, MI, 1); 844 } 845 846 unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { 847 switch (MI.getOpcode()) { 848 default: return 1; // FIXME: Do wait states equal cycles? 849 850 case AMDGPU::S_NOP: 851 return MI.getOperand(0).getImm() + 1; 852 } 853 } 854 855 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 856 MachineBasicBlock &MBB = *MI->getParent(); 857 DebugLoc DL = MBB.findDebugLoc(MI); 858 switch (MI->getOpcode()) { 859 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 860 861 case AMDGPU::SGPR_USE: 862 // This is just a placeholder for register allocation. 863 MI->eraseFromParent(); 864 break; 865 866 case AMDGPU::V_MOV_B64_PSEUDO: { 867 unsigned Dst = MI->getOperand(0).getReg(); 868 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 869 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 870 871 const MachineOperand &SrcOp = MI->getOperand(1); 872 // FIXME: Will this work for 64-bit floating point immediates? 873 assert(!SrcOp.isFPImm()); 874 if (SrcOp.isImm()) { 875 APInt Imm(64, SrcOp.getImm()); 876 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 877 .addImm(Imm.getLoBits(32).getZExtValue()) 878 .addReg(Dst, RegState::Implicit | RegState::Define); 879 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 880 .addImm(Imm.getHiBits(32).getZExtValue()) 881 .addReg(Dst, RegState::Implicit | RegState::Define); 882 } else { 883 assert(SrcOp.isReg()); 884 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 885 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 886 .addReg(Dst, RegState::Implicit | RegState::Define); 887 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 888 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 889 .addReg(Dst, RegState::Implicit | RegState::Define); 890 } 891 MI->eraseFromParent(); 892 break; 893 } 894 895 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 896 unsigned Dst = MI->getOperand(0).getReg(); 897 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 898 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 899 unsigned Src0 = MI->getOperand(1).getReg(); 900 unsigned Src1 = MI->getOperand(2).getReg(); 901 const MachineOperand &SrcCond = MI->getOperand(3); 902 903 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 904 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 905 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 906 .addReg(SrcCond.getReg()) 907 .addReg(Dst, RegState::Implicit | RegState::Define); 908 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 909 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 910 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 911 .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill())) 912 .addReg(Dst, RegState::Implicit | RegState::Define); 913 MI->eraseFromParent(); 914 break; 915 } 916 917 case AMDGPU::SI_CONSTDATA_PTR: { 918 const SIRegisterInfo *TRI = 919 static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); 920 MachineFunction &MF = *MBB.getParent(); 921 unsigned Reg = MI->getOperand(0).getReg(); 922 unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); 923 unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); 924 925 // Create a bundle so these instructions won't be re-ordered by the 926 // post-RA scheduler. 927 MIBundleBuilder Bundler(MBB, MI); 928 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 929 930 // Add 32-bit offset from this instruction to the start of the 931 // constant data. 932 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) 933 .addReg(RegLo) 934 .addOperand(MI->getOperand(1))); 935 Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 936 .addReg(RegHi) 937 .addImm(0)); 938 939 llvm::finalizeBundle(MBB, Bundler.begin()); 940 941 MI->eraseFromParent(); 942 break; 943 } 944 } 945 return true; 946 } 947 948 /// Commutes the operands in the given instruction. 949 /// The commutable operands are specified by their indices OpIdx0 and OpIdx1. 950 /// 951 /// Do not call this method for a non-commutable instruction or for 952 /// non-commutable pair of operand indices OpIdx0 and OpIdx1. 953 /// Even though the instruction is commutable, the method may still 954 /// fail to commute the operands, null pointer is returned in such cases. 955 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, 956 bool NewMI, 957 unsigned OpIdx0, 958 unsigned OpIdx1) const { 959 int CommutedOpcode = commuteOpcode(*MI); 960 if (CommutedOpcode == -1) 961 return nullptr; 962 963 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 964 AMDGPU::OpName::src0); 965 MachineOperand &Src0 = MI->getOperand(Src0Idx); 966 if (!Src0.isReg()) 967 return nullptr; 968 969 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 970 AMDGPU::OpName::src1); 971 972 if ((OpIdx0 != static_cast<unsigned>(Src0Idx) || 973 OpIdx1 != static_cast<unsigned>(Src1Idx)) && 974 (OpIdx0 != static_cast<unsigned>(Src1Idx) || 975 OpIdx1 != static_cast<unsigned>(Src0Idx))) 976 return nullptr; 977 978 MachineOperand &Src1 = MI->getOperand(Src1Idx); 979 980 981 if (isVOP2(*MI) || isVOPC(*MI)) { 982 const MCInstrDesc &InstrDesc = MI->getDesc(); 983 // For VOP2 and VOPC instructions, any operand type is valid to use for 984 // src0. Make sure we can use the src0 as src1. 985 // 986 // We could be stricter here and only allow commuting if there is a reason 987 // to do so. i.e. if both operands are VGPRs there is no real benefit, 988 // although MachineCSE attempts to find matches by commuting. 989 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 990 if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) 991 return nullptr; 992 } 993 994 if (!Src1.isReg()) { 995 // Allow commuting instructions with Imm operands. 996 if (NewMI || !Src1.isImm() || 997 (!isVOP2(*MI) && !isVOP3(*MI))) { 998 return nullptr; 999 } 1000 // Be sure to copy the source modifiers to the right place. 1001 if (MachineOperand *Src0Mods 1002 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { 1003 MachineOperand *Src1Mods 1004 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); 1005 1006 int Src0ModsVal = Src0Mods->getImm(); 1007 if (!Src1Mods && Src0ModsVal != 0) 1008 return nullptr; 1009 1010 // XXX - This assert might be a lie. It might be useful to have a neg 1011 // modifier with 0.0. 1012 int Src1ModsVal = Src1Mods->getImm(); 1013 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 1014 1015 Src1Mods->setImm(Src0ModsVal); 1016 Src0Mods->setImm(Src1ModsVal); 1017 } 1018 1019 unsigned Reg = Src0.getReg(); 1020 unsigned SubReg = Src0.getSubReg(); 1021 if (Src1.isImm()) 1022 Src0.ChangeToImmediate(Src1.getImm()); 1023 else 1024 llvm_unreachable("Should only have immediates"); 1025 1026 Src1.ChangeToRegister(Reg, false); 1027 Src1.setSubReg(SubReg); 1028 } else { 1029 MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); 1030 } 1031 1032 if (MI) 1033 MI->setDesc(get(CommutedOpcode)); 1034 1035 return MI; 1036 } 1037 1038 // This needs to be implemented because the source modifiers may be inserted 1039 // between the true commutable operands, and the base 1040 // TargetInstrInfo::commuteInstruction uses it. 1041 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, 1042 unsigned &SrcOpIdx0, 1043 unsigned &SrcOpIdx1) const { 1044 const MCInstrDesc &MCID = MI->getDesc(); 1045 if (!MCID.isCommutable()) 1046 return false; 1047 1048 unsigned Opc = MI->getOpcode(); 1049 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 1050 if (Src0Idx == -1) 1051 return false; 1052 1053 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 1054 // immediate. Also, immediate src0 operand is not handled in 1055 // SIInstrInfo::commuteInstruction(); 1056 if (!MI->getOperand(Src0Idx).isReg()) 1057 return false; 1058 1059 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 1060 if (Src1Idx == -1) 1061 return false; 1062 1063 MachineOperand &Src1 = MI->getOperand(Src1Idx); 1064 if (Src1.isImm()) { 1065 // SIInstrInfo::commuteInstruction() does support commuting the immediate 1066 // operand src1 in 2 and 3 operand instructions. 1067 if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode())) 1068 return false; 1069 } else if (Src1.isReg()) { 1070 // If any source modifiers are set, the generic instruction commuting won't 1071 // understand how to copy the source modifiers. 1072 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || 1073 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) 1074 return false; 1075 } else 1076 return false; 1077 1078 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 1079 } 1080 1081 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 1082 switch (Cond) { 1083 case SIInstrInfo::SCC_TRUE: 1084 return AMDGPU::S_CBRANCH_SCC1; 1085 case SIInstrInfo::SCC_FALSE: 1086 return AMDGPU::S_CBRANCH_SCC0; 1087 case SIInstrInfo::VCCNZ: 1088 return AMDGPU::S_CBRANCH_VCCNZ; 1089 case SIInstrInfo::VCCZ: 1090 return AMDGPU::S_CBRANCH_VCCZ; 1091 case SIInstrInfo::EXECNZ: 1092 return AMDGPU::S_CBRANCH_EXECNZ; 1093 case SIInstrInfo::EXECZ: 1094 return AMDGPU::S_CBRANCH_EXECZ; 1095 default: 1096 llvm_unreachable("invalid branch predicate"); 1097 } 1098 } 1099 1100 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 1101 switch (Opcode) { 1102 case AMDGPU::S_CBRANCH_SCC0: 1103 return SCC_FALSE; 1104 case AMDGPU::S_CBRANCH_SCC1: 1105 return SCC_TRUE; 1106 case AMDGPU::S_CBRANCH_VCCNZ: 1107 return VCCNZ; 1108 case AMDGPU::S_CBRANCH_VCCZ: 1109 return VCCZ; 1110 case AMDGPU::S_CBRANCH_EXECNZ: 1111 return EXECNZ; 1112 case AMDGPU::S_CBRANCH_EXECZ: 1113 return EXECZ; 1114 default: 1115 return INVALID_BR; 1116 } 1117 } 1118 1119 bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1120 MachineBasicBlock *&TBB, 1121 MachineBasicBlock *&FBB, 1122 SmallVectorImpl<MachineOperand> &Cond, 1123 bool AllowModify) const { 1124 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1125 1126 if (I == MBB.end()) 1127 return false; 1128 1129 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1130 // Unconditional Branch 1131 TBB = I->getOperand(0).getMBB(); 1132 return false; 1133 } 1134 1135 BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 1136 if (Pred == INVALID_BR) 1137 return true; 1138 1139 MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); 1140 Cond.push_back(MachineOperand::CreateImm(Pred)); 1141 1142 ++I; 1143 1144 if (I == MBB.end()) { 1145 // Conditional branch followed by fall-through. 1146 TBB = CondBB; 1147 return false; 1148 } 1149 1150 if (I->getOpcode() == AMDGPU::S_BRANCH) { 1151 TBB = CondBB; 1152 FBB = I->getOperand(0).getMBB(); 1153 return false; 1154 } 1155 1156 return true; 1157 } 1158 1159 unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1160 MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1161 1162 unsigned Count = 0; 1163 while (I != MBB.end()) { 1164 MachineBasicBlock::iterator Next = std::next(I); 1165 I->eraseFromParent(); 1166 ++Count; 1167 I = Next; 1168 } 1169 1170 return Count; 1171 } 1172 1173 unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB, 1174 MachineBasicBlock *TBB, 1175 MachineBasicBlock *FBB, 1176 ArrayRef<MachineOperand> Cond, 1177 const DebugLoc &DL) const { 1178 1179 if (!FBB && Cond.empty()) { 1180 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1181 .addMBB(TBB); 1182 return 1; 1183 } 1184 1185 assert(TBB && Cond[0].isImm()); 1186 1187 unsigned Opcode 1188 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 1189 1190 if (!FBB) { 1191 BuildMI(&MBB, DL, get(Opcode)) 1192 .addMBB(TBB); 1193 return 1; 1194 } 1195 1196 assert(TBB && FBB); 1197 1198 BuildMI(&MBB, DL, get(Opcode)) 1199 .addMBB(TBB); 1200 BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1201 .addMBB(FBB); 1202 1203 return 2; 1204 } 1205 1206 bool SIInstrInfo::ReverseBranchCondition( 1207 SmallVectorImpl<MachineOperand> &Cond) const { 1208 assert(Cond.size() == 1); 1209 Cond[0].setImm(-Cond[0].getImm()); 1210 return false; 1211 } 1212 1213 static void removeModOperands(MachineInstr &MI) { 1214 unsigned Opc = MI.getOpcode(); 1215 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1216 AMDGPU::OpName::src0_modifiers); 1217 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1218 AMDGPU::OpName::src1_modifiers); 1219 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1220 AMDGPU::OpName::src2_modifiers); 1221 1222 MI.RemoveOperand(Src2ModIdx); 1223 MI.RemoveOperand(Src1ModIdx); 1224 MI.RemoveOperand(Src0ModIdx); 1225 } 1226 1227 // TODO: Maybe this should be removed this and custom fold everything in 1228 // SIFoldOperands? 1229 bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, 1230 unsigned Reg, MachineRegisterInfo *MRI) const { 1231 if (!MRI->hasOneNonDBGUse(Reg)) 1232 return false; 1233 1234 unsigned Opc = UseMI->getOpcode(); 1235 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 1236 // Don't fold if we are using source modifiers. The new VOP2 instructions 1237 // don't have them. 1238 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || 1239 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || 1240 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { 1241 return false; 1242 } 1243 1244 const MachineOperand &ImmOp = DefMI->getOperand(1); 1245 1246 // If this is a free constant, there's no reason to do this. 1247 // TODO: We could fold this here instead of letting SIFoldOperands do it 1248 // later. 1249 if (isInlineConstant(ImmOp, 4)) 1250 return false; 1251 1252 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); 1253 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); 1254 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); 1255 1256 // Multiplied part is the constant: Use v_madmk_f32 1257 // We should only expect these to be on src0 due to canonicalizations. 1258 if (Src0->isReg() && Src0->getReg() == Reg) { 1259 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1260 return false; 1261 1262 if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 1263 return false; 1264 1265 // We need to swap operands 0 and 1 since madmk constant is at operand 1. 1266 1267 const int64_t Imm = DefMI->getOperand(1).getImm(); 1268 1269 // FIXME: This would be a lot easier if we could return a new instruction 1270 // instead of having to modify in place. 1271 1272 // Remove these first since they are at the end. 1273 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1274 AMDGPU::OpName::omod)); 1275 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1276 AMDGPU::OpName::clamp)); 1277 1278 unsigned Src1Reg = Src1->getReg(); 1279 unsigned Src1SubReg = Src1->getSubReg(); 1280 Src0->setReg(Src1Reg); 1281 Src0->setSubReg(Src1SubReg); 1282 Src0->setIsKill(Src1->isKill()); 1283 1284 if (Opc == AMDGPU::V_MAC_F32_e64) { 1285 UseMI->untieRegOperand( 1286 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1287 } 1288 1289 Src1->ChangeToImmediate(Imm); 1290 1291 removeModOperands(*UseMI); 1292 UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); 1293 1294 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1295 if (DeleteDef) 1296 DefMI->eraseFromParent(); 1297 1298 return true; 1299 } 1300 1301 // Added part is the constant: Use v_madak_f32 1302 if (Src2->isReg() && Src2->getReg() == Reg) { 1303 // Not allowed to use constant bus for another operand. 1304 // We can however allow an inline immediate as src0. 1305 if (!Src0->isImm() && 1306 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1307 return false; 1308 1309 if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1310 return false; 1311 1312 const int64_t Imm = DefMI->getOperand(1).getImm(); 1313 1314 // FIXME: This would be a lot easier if we could return a new instruction 1315 // instead of having to modify in place. 1316 1317 // Remove these first since they are at the end. 1318 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1319 AMDGPU::OpName::omod)); 1320 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1321 AMDGPU::OpName::clamp)); 1322 1323 if (Opc == AMDGPU::V_MAC_F32_e64) { 1324 UseMI->untieRegOperand( 1325 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1326 } 1327 1328 // ChangingToImmediate adds Src2 back to the instruction. 1329 Src2->ChangeToImmediate(Imm); 1330 1331 // These come before src2. 1332 removeModOperands(*UseMI); 1333 UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); 1334 1335 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1336 if (DeleteDef) 1337 DefMI->eraseFromParent(); 1338 1339 return true; 1340 } 1341 } 1342 1343 return false; 1344 } 1345 1346 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1347 int WidthB, int OffsetB) { 1348 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1349 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1350 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1351 return LowOffset + LowWidth <= HighOffset; 1352 } 1353 1354 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, 1355 MachineInstr *MIb) const { 1356 unsigned BaseReg0, BaseReg1; 1357 int64_t Offset0, Offset1; 1358 1359 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1360 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1361 1362 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) { 1363 // FIXME: Handle ds_read2 / ds_write2. 1364 return false; 1365 } 1366 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); 1367 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); 1368 if (BaseReg0 == BaseReg1 && 1369 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1370 return true; 1371 } 1372 } 1373 1374 return false; 1375 } 1376 1377 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 1378 MachineInstr *MIb, 1379 AliasAnalysis *AA) const { 1380 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 1381 "MIa must load from or modify a memory location"); 1382 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 1383 "MIb must load from or modify a memory location"); 1384 1385 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) 1386 return false; 1387 1388 // XXX - Can we relax this between address spaces? 1389 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 1390 return false; 1391 1392 // TODO: Should we check the address space from the MachineMemOperand? That 1393 // would allow us to distinguish objects we know don't alias based on the 1394 // underlying address space, even if it was lowered to a different one, 1395 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1396 // buffer. 1397 if (isDS(*MIa)) { 1398 if (isDS(*MIb)) 1399 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1400 1401 return !isFLAT(*MIb); 1402 } 1403 1404 if (isMUBUF(*MIa) || isMTBUF(*MIa)) { 1405 if (isMUBUF(*MIb) || isMTBUF(*MIb)) 1406 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1407 1408 return !isFLAT(*MIb) && !isSMRD(*MIb); 1409 } 1410 1411 if (isSMRD(*MIa)) { 1412 if (isSMRD(*MIb)) 1413 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1414 1415 return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa); 1416 } 1417 1418 if (isFLAT(*MIa)) { 1419 if (isFLAT(*MIb)) 1420 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1421 1422 return false; 1423 } 1424 1425 return false; 1426 } 1427 1428 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1429 MachineBasicBlock::iterator &MI, 1430 LiveVariables *LV) const { 1431 1432 switch (MI->getOpcode()) { 1433 default: return nullptr; 1434 case AMDGPU::V_MAC_F32_e64: break; 1435 case AMDGPU::V_MAC_F32_e32: { 1436 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1437 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1438 return nullptr; 1439 break; 1440 } 1441 } 1442 1443 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::vdst); 1444 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1445 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); 1446 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); 1447 1448 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1449 .addOperand(*Dst) 1450 .addImm(0) // Src0 mods 1451 .addOperand(*Src0) 1452 .addImm(0) // Src1 mods 1453 .addOperand(*Src1) 1454 .addImm(0) // Src mods 1455 .addOperand(*Src2) 1456 .addImm(0) // clamp 1457 .addImm(0); // omod 1458 } 1459 1460 bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1461 const MachineBasicBlock *MBB, 1462 const MachineFunction &MF) const { 1463 // Target-independent instructions do not have an implicit-use of EXEC, even 1464 // when they operate on VGPRs. Treating EXEC modifications as scheduling 1465 // boundaries prevents incorrect movements of such instructions. 1466 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1467 if (MI->modifiesRegister(AMDGPU::EXEC, TRI)) 1468 return true; 1469 1470 return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF); 1471 } 1472 1473 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1474 int64_t SVal = Imm.getSExtValue(); 1475 if (SVal >= -16 && SVal <= 64) 1476 return true; 1477 1478 if (Imm.getBitWidth() == 64) { 1479 uint64_t Val = Imm.getZExtValue(); 1480 return (DoubleToBits(0.0) == Val) || 1481 (DoubleToBits(1.0) == Val) || 1482 (DoubleToBits(-1.0) == Val) || 1483 (DoubleToBits(0.5) == Val) || 1484 (DoubleToBits(-0.5) == Val) || 1485 (DoubleToBits(2.0) == Val) || 1486 (DoubleToBits(-2.0) == Val) || 1487 (DoubleToBits(4.0) == Val) || 1488 (DoubleToBits(-4.0) == Val); 1489 } 1490 1491 // The actual type of the operand does not seem to matter as long 1492 // as the bits match one of the inline immediate values. For example: 1493 // 1494 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1495 // so it is a legal inline immediate. 1496 // 1497 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1498 // floating-point, so it is a legal inline immediate. 1499 uint32_t Val = Imm.getZExtValue(); 1500 1501 return (FloatToBits(0.0f) == Val) || 1502 (FloatToBits(1.0f) == Val) || 1503 (FloatToBits(-1.0f) == Val) || 1504 (FloatToBits(0.5f) == Val) || 1505 (FloatToBits(-0.5f) == Val) || 1506 (FloatToBits(2.0f) == Val) || 1507 (FloatToBits(-2.0f) == Val) || 1508 (FloatToBits(4.0f) == Val) || 1509 (FloatToBits(-4.0f) == Val); 1510 } 1511 1512 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1513 unsigned OpSize) const { 1514 if (MO.isImm()) { 1515 // MachineOperand provides no way to tell the true operand size, since it 1516 // only records a 64-bit value. We need to know the size to determine if a 1517 // 32-bit floating point immediate bit pattern is legal for an integer 1518 // immediate. It would be for any 32-bit integer operand, but would not be 1519 // for a 64-bit one. 1520 1521 unsigned BitSize = 8 * OpSize; 1522 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1523 } 1524 1525 return false; 1526 } 1527 1528 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1529 unsigned OpSize) const { 1530 return MO.isImm() && !isInlineConstant(MO, OpSize); 1531 } 1532 1533 static bool compareMachineOp(const MachineOperand &Op0, 1534 const MachineOperand &Op1) { 1535 if (Op0.getType() != Op1.getType()) 1536 return false; 1537 1538 switch (Op0.getType()) { 1539 case MachineOperand::MO_Register: 1540 return Op0.getReg() == Op1.getReg(); 1541 case MachineOperand::MO_Immediate: 1542 return Op0.getImm() == Op1.getImm(); 1543 default: 1544 llvm_unreachable("Didn't expect to be comparing these operand types"); 1545 } 1546 } 1547 1548 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1549 const MachineOperand &MO) const { 1550 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1551 1552 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1553 1554 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1555 return true; 1556 1557 if (OpInfo.RegClass < 0) 1558 return false; 1559 1560 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1561 if (isLiteralConstant(MO, OpSize)) 1562 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1563 1564 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1565 } 1566 1567 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1568 int Op32 = AMDGPU::getVOPe32(Opcode); 1569 if (Op32 == -1) 1570 return false; 1571 1572 return pseudoToMCOpcode(Op32) != -1; 1573 } 1574 1575 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1576 // The src0_modifier operand is present on all instructions 1577 // that have modifiers. 1578 1579 return AMDGPU::getNamedOperandIdx(Opcode, 1580 AMDGPU::OpName::src0_modifiers) != -1; 1581 } 1582 1583 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1584 unsigned OpName) const { 1585 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1586 return Mods && Mods->getImm(); 1587 } 1588 1589 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1590 const MachineOperand &MO, 1591 unsigned OpSize) const { 1592 // Literal constants use the constant bus. 1593 if (isLiteralConstant(MO, OpSize)) 1594 return true; 1595 1596 if (!MO.isReg() || !MO.isUse()) 1597 return false; 1598 1599 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1600 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1601 1602 // FLAT_SCR is just an SGPR pair. 1603 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1604 return true; 1605 1606 // EXEC register uses the constant bus. 1607 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1608 return true; 1609 1610 // SGPRs use the constant bus 1611 return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 || 1612 (!MO.isImplicit() && 1613 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1614 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))); 1615 } 1616 1617 static unsigned findImplicitSGPRRead(const MachineInstr &MI) { 1618 for (const MachineOperand &MO : MI.implicit_operands()) { 1619 // We only care about reads. 1620 if (MO.isDef()) 1621 continue; 1622 1623 switch (MO.getReg()) { 1624 case AMDGPU::VCC: 1625 case AMDGPU::M0: 1626 case AMDGPU::FLAT_SCR: 1627 return MO.getReg(); 1628 1629 default: 1630 break; 1631 } 1632 } 1633 1634 return AMDGPU::NoRegister; 1635 } 1636 1637 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1638 StringRef &ErrInfo) const { 1639 uint16_t Opcode = MI->getOpcode(); 1640 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1641 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1642 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1643 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1644 1645 // Make sure the number of operands is correct. 1646 const MCInstrDesc &Desc = get(Opcode); 1647 if (!Desc.isVariadic() && 1648 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 1649 ErrInfo = "Instruction has wrong number of operands."; 1650 return false; 1651 } 1652 1653 // Make sure the register classes are correct. 1654 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1655 if (MI->getOperand(i).isFPImm()) { 1656 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1657 "all fp values to integers."; 1658 return false; 1659 } 1660 1661 int RegClass = Desc.OpInfo[i].RegClass; 1662 1663 switch (Desc.OpInfo[i].OperandType) { 1664 case MCOI::OPERAND_REGISTER: 1665 if (MI->getOperand(i).isImm()) { 1666 ErrInfo = "Illegal immediate value for operand."; 1667 return false; 1668 } 1669 break; 1670 case AMDGPU::OPERAND_REG_IMM32: 1671 break; 1672 case AMDGPU::OPERAND_REG_INLINE_C: 1673 if (isLiteralConstant(MI->getOperand(i), 1674 RI.getRegClass(RegClass)->getSize())) { 1675 ErrInfo = "Illegal immediate value for operand."; 1676 return false; 1677 } 1678 break; 1679 case MCOI::OPERAND_IMMEDIATE: 1680 // Check if this operand is an immediate. 1681 // FrameIndex operands will be replaced by immediates, so they are 1682 // allowed. 1683 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { 1684 ErrInfo = "Expected immediate, but got non-immediate"; 1685 return false; 1686 } 1687 // Fall-through 1688 default: 1689 continue; 1690 } 1691 1692 if (!MI->getOperand(i).isReg()) 1693 continue; 1694 1695 if (RegClass != -1) { 1696 unsigned Reg = MI->getOperand(i).getReg(); 1697 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1698 continue; 1699 1700 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1701 if (!RC->contains(Reg)) { 1702 ErrInfo = "Operand has incorrect register class."; 1703 return false; 1704 } 1705 } 1706 } 1707 1708 1709 // Verify VOP* 1710 if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) { 1711 // Only look at the true operands. Only a real operand can use the constant 1712 // bus, and we don't want to check pseudo-operands like the source modifier 1713 // flags. 1714 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1715 1716 unsigned ConstantBusCount = 0; 1717 unsigned SGPRUsed = findImplicitSGPRRead(*MI); 1718 if (SGPRUsed != AMDGPU::NoRegister) 1719 ++ConstantBusCount; 1720 1721 for (int OpIdx : OpIndices) { 1722 if (OpIdx == -1) 1723 break; 1724 const MachineOperand &MO = MI->getOperand(OpIdx); 1725 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1726 if (MO.isReg()) { 1727 if (MO.getReg() != SGPRUsed) 1728 ++ConstantBusCount; 1729 SGPRUsed = MO.getReg(); 1730 } else { 1731 ++ConstantBusCount; 1732 } 1733 } 1734 } 1735 if (ConstantBusCount > 1) { 1736 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1737 return false; 1738 } 1739 } 1740 1741 // Verify misc. restrictions on specific instructions. 1742 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1743 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1744 const MachineOperand &Src0 = MI->getOperand(Src0Idx); 1745 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1746 const MachineOperand &Src2 = MI->getOperand(Src2Idx); 1747 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1748 if (!compareMachineOp(Src0, Src1) && 1749 !compareMachineOp(Src0, Src2)) { 1750 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1751 return false; 1752 } 1753 } 1754 } 1755 1756 // Make sure we aren't losing exec uses in the td files. This mostly requires 1757 // being careful when using let Uses to try to add other use registers. 1758 if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) { 1759 if (!MI->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 1760 ErrInfo = "VALU instruction does not implicitly read exec mask"; 1761 return false; 1762 } 1763 } 1764 1765 return true; 1766 } 1767 1768 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1769 switch (MI.getOpcode()) { 1770 default: return AMDGPU::INSTRUCTION_LIST_END; 1771 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1772 case AMDGPU::COPY: return AMDGPU::COPY; 1773 case AMDGPU::PHI: return AMDGPU::PHI; 1774 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1775 case AMDGPU::S_MOV_B32: 1776 return MI.getOperand(1).isReg() ? 1777 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1778 case AMDGPU::S_ADD_I32: 1779 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1780 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1781 case AMDGPU::S_SUB_I32: 1782 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1783 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1784 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1785 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1786 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1787 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1788 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1789 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1790 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1791 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1792 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1793 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1794 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1795 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1796 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1797 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1798 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1799 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1800 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1801 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1802 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1803 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1804 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1805 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1806 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1807 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1808 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1809 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1810 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1811 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1812 case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; 1813 case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; 1814 case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; 1815 case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; 1816 case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; 1817 case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; 1818 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1819 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1820 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1821 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1822 case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 1823 case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 1824 } 1825 } 1826 1827 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1828 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1829 } 1830 1831 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1832 unsigned OpNo) const { 1833 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1834 const MCInstrDesc &Desc = get(MI.getOpcode()); 1835 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1836 Desc.OpInfo[OpNo].RegClass == -1) { 1837 unsigned Reg = MI.getOperand(OpNo).getReg(); 1838 1839 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1840 return MRI.getRegClass(Reg); 1841 return RI.getPhysRegClass(Reg); 1842 } 1843 1844 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1845 return RI.getRegClass(RCID); 1846 } 1847 1848 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1849 switch (MI.getOpcode()) { 1850 case AMDGPU::COPY: 1851 case AMDGPU::REG_SEQUENCE: 1852 case AMDGPU::PHI: 1853 case AMDGPU::INSERT_SUBREG: 1854 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1855 default: 1856 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1857 } 1858 } 1859 1860 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1861 MachineBasicBlock::iterator I = MI; 1862 MachineBasicBlock *MBB = MI->getParent(); 1863 MachineOperand &MO = MI->getOperand(OpIdx); 1864 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1865 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1866 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1867 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1868 if (MO.isReg()) 1869 Opcode = AMDGPU::COPY; 1870 else if (RI.isSGPRClass(RC)) 1871 Opcode = AMDGPU::S_MOV_B32; 1872 1873 1874 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1875 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1876 VRC = &AMDGPU::VReg_64RegClass; 1877 else 1878 VRC = &AMDGPU::VGPR_32RegClass; 1879 1880 unsigned Reg = MRI.createVirtualRegister(VRC); 1881 DebugLoc DL = MBB->findDebugLoc(I); 1882 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) 1883 .addOperand(MO); 1884 MO.ChangeToRegister(Reg, false); 1885 } 1886 1887 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1888 MachineRegisterInfo &MRI, 1889 MachineOperand &SuperReg, 1890 const TargetRegisterClass *SuperRC, 1891 unsigned SubIdx, 1892 const TargetRegisterClass *SubRC) 1893 const { 1894 MachineBasicBlock *MBB = MI->getParent(); 1895 DebugLoc DL = MI->getDebugLoc(); 1896 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1897 1898 if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 1899 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1900 .addReg(SuperReg.getReg(), 0, SubIdx); 1901 return SubReg; 1902 } 1903 1904 // Just in case the super register is itself a sub-register, copy it to a new 1905 // value so we don't need to worry about merging its subreg index with the 1906 // SubIdx passed to this function. The register coalescer should be able to 1907 // eliminate this extra copy. 1908 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1909 1910 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1911 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1912 1913 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1914 .addReg(NewSuperReg, 0, SubIdx); 1915 1916 return SubReg; 1917 } 1918 1919 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1920 MachineBasicBlock::iterator MII, 1921 MachineRegisterInfo &MRI, 1922 MachineOperand &Op, 1923 const TargetRegisterClass *SuperRC, 1924 unsigned SubIdx, 1925 const TargetRegisterClass *SubRC) const { 1926 if (Op.isImm()) { 1927 // XXX - Is there a better way to do this? 1928 if (SubIdx == AMDGPU::sub0) 1929 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1930 if (SubIdx == AMDGPU::sub1) 1931 return MachineOperand::CreateImm(Op.getImm() >> 32); 1932 1933 llvm_unreachable("Unhandled register index for immediate"); 1934 } 1935 1936 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1937 SubIdx, SubRC); 1938 return MachineOperand::CreateReg(SubReg, false); 1939 } 1940 1941 // Change the order of operands from (0, 1, 2) to (0, 2, 1) 1942 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { 1943 assert(Inst->getNumExplicitOperands() == 3); 1944 MachineOperand Op1 = Inst->getOperand(1); 1945 Inst->RemoveOperand(1); 1946 Inst->addOperand(Op1); 1947 } 1948 1949 bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 1950 const MCOperandInfo &OpInfo, 1951 const MachineOperand &MO) const { 1952 if (!MO.isReg()) 1953 return false; 1954 1955 unsigned Reg = MO.getReg(); 1956 const TargetRegisterClass *RC = 1957 TargetRegisterInfo::isVirtualRegister(Reg) ? 1958 MRI.getRegClass(Reg) : 1959 RI.getPhysRegClass(Reg); 1960 1961 const SIRegisterInfo *TRI = 1962 static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 1963 RC = TRI->getSubRegClass(RC, MO.getSubReg()); 1964 1965 // In order to be legal, the common sub-class must be equal to the 1966 // class of the current operand. For example: 1967 // 1968 // v_mov_b32 s0 ; Operand defined as vsrc_32 1969 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1970 // 1971 // s_sendmsg 0, s0 ; Operand defined as m0reg 1972 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1973 1974 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1975 } 1976 1977 bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1978 const MCOperandInfo &OpInfo, 1979 const MachineOperand &MO) const { 1980 if (MO.isReg()) 1981 return isLegalRegOperand(MRI, OpInfo, MO); 1982 1983 // Handle non-register types that are treated like immediates. 1984 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1985 return true; 1986 } 1987 1988 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1989 const MachineOperand *MO) const { 1990 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1991 const MCInstrDesc &InstDesc = MI->getDesc(); 1992 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1993 const TargetRegisterClass *DefinedRC = 1994 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1995 if (!MO) 1996 MO = &MI->getOperand(OpIdx); 1997 1998 if (isVALU(*MI) && 1999 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 2000 2001 RegSubRegPair SGPRUsed; 2002 if (MO->isReg()) 2003 SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); 2004 2005 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2006 if (i == OpIdx) 2007 continue; 2008 const MachineOperand &Op = MI->getOperand(i); 2009 if (Op.isReg() && 2010 (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && 2011 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { 2012 return false; 2013 } 2014 } 2015 } 2016 2017 if (MO->isReg()) { 2018 assert(DefinedRC); 2019 return isLegalRegOperand(MRI, OpInfo, *MO); 2020 } 2021 2022 2023 // Handle non-register types that are treated like immediates. 2024 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 2025 2026 if (!DefinedRC) { 2027 // This operand expects an immediate. 2028 return true; 2029 } 2030 2031 return isImmOperandLegal(MI, OpIdx, *MO); 2032 } 2033 2034 void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 2035 MachineInstr *MI) const { 2036 unsigned Opc = MI->getOpcode(); 2037 const MCInstrDesc &InstrDesc = get(Opc); 2038 2039 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 2040 MachineOperand &Src1 = MI->getOperand(Src1Idx); 2041 2042 // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 2043 // we need to only have one constant bus use. 2044 // 2045 // Note we do not need to worry about literal constants here. They are 2046 // disabled for the operand type for instructions because they will always 2047 // violate the one constant bus use rule. 2048 bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister; 2049 if (HasImplicitSGPR) { 2050 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2051 MachineOperand &Src0 = MI->getOperand(Src0Idx); 2052 2053 if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) 2054 legalizeOpWithMove(MI, Src0Idx); 2055 } 2056 2057 // VOP2 src0 instructions support all operand types, so we don't need to check 2058 // their legality. If src1 is already legal, we don't need to do anything. 2059 if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) 2060 return; 2061 2062 // We do not use commuteInstruction here because it is too aggressive and will 2063 // commute if it is possible. We only want to commute here if it improves 2064 // legality. This can be called a fairly large number of times so don't waste 2065 // compile time pointlessly swapping and checking legality again. 2066 if (HasImplicitSGPR || !MI->isCommutable()) { 2067 legalizeOpWithMove(MI, Src1Idx); 2068 return; 2069 } 2070 2071 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2072 MachineOperand &Src0 = MI->getOperand(Src0Idx); 2073 2074 // If src0 can be used as src1, commuting will make the operands legal. 2075 // Otherwise we have to give up and insert a move. 2076 // 2077 // TODO: Other immediate-like operand kinds could be commuted if there was a 2078 // MachineOperand::ChangeTo* for them. 2079 if ((!Src1.isImm() && !Src1.isReg()) || 2080 !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { 2081 legalizeOpWithMove(MI, Src1Idx); 2082 return; 2083 } 2084 2085 int CommutedOpc = commuteOpcode(*MI); 2086 if (CommutedOpc == -1) { 2087 legalizeOpWithMove(MI, Src1Idx); 2088 return; 2089 } 2090 2091 MI->setDesc(get(CommutedOpc)); 2092 2093 unsigned Src0Reg = Src0.getReg(); 2094 unsigned Src0SubReg = Src0.getSubReg(); 2095 bool Src0Kill = Src0.isKill(); 2096 2097 if (Src1.isImm()) 2098 Src0.ChangeToImmediate(Src1.getImm()); 2099 else if (Src1.isReg()) { 2100 Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 2101 Src0.setSubReg(Src1.getSubReg()); 2102 } else 2103 llvm_unreachable("Should only have register or immediate operands"); 2104 2105 Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 2106 Src1.setSubReg(Src0SubReg); 2107 } 2108 2109 // Legalize VOP3 operands. Because all operand types are supported for any 2110 // operand, and since literal constants are not allowed and should never be 2111 // seen, we only need to worry about inserting copies if we use multiple SGPR 2112 // operands. 2113 void SIInstrInfo::legalizeOperandsVOP3( 2114 MachineRegisterInfo &MRI, 2115 MachineInstr *MI) const { 2116 unsigned Opc = MI->getOpcode(); 2117 2118 int VOP3Idx[3] = { 2119 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 2120 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 2121 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 2122 }; 2123 2124 // Find the one SGPR operand we are allowed to use. 2125 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 2126 2127 for (unsigned i = 0; i < 3; ++i) { 2128 int Idx = VOP3Idx[i]; 2129 if (Idx == -1) 2130 break; 2131 MachineOperand &MO = MI->getOperand(Idx); 2132 2133 // We should never see a VOP3 instruction with an illegal immediate operand. 2134 if (!MO.isReg()) 2135 continue; 2136 2137 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2138 continue; // VGPRs are legal 2139 2140 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 2141 SGPRReg = MO.getReg(); 2142 // We can use one SGPR in each VOP3 instruction. 2143 continue; 2144 } 2145 2146 // If we make it this far, then the operand is not legal and we must 2147 // legalize it. 2148 legalizeOpWithMove(MI, Idx); 2149 } 2150 } 2151 2152 unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, 2153 MachineRegisterInfo &MRI) const { 2154 const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 2155 const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 2156 unsigned DstReg = MRI.createVirtualRegister(SRC); 2157 unsigned SubRegs = VRC->getSize() / 4; 2158 2159 SmallVector<unsigned, 8> SRegs; 2160 for (unsigned i = 0; i < SubRegs; ++i) { 2161 unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2162 BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), 2163 get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 2164 .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 2165 SRegs.push_back(SGPR); 2166 } 2167 2168 MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI, 2169 UseMI->getDebugLoc(), 2170 get(AMDGPU::REG_SEQUENCE), DstReg); 2171 for (unsigned i = 0; i < SubRegs; ++i) { 2172 MIB.addReg(SRegs[i]); 2173 MIB.addImm(RI.getSubRegFromChannel(i)); 2174 } 2175 return DstReg; 2176 } 2177 2178 void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 2179 MachineInstr *MI) const { 2180 2181 // If the pointer is store in VGPRs, then we need to move them to 2182 // SGPRs using v_readfirstlane. This is safe because we only select 2183 // loads with uniform pointers to SMRD instruction so we know the 2184 // pointer value is uniform. 2185 MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 2186 if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 2187 unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 2188 SBase->setReg(SGPR); 2189 } 2190 } 2191 2192 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 2193 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2194 2195 // Legalize VOP2 2196 if (isVOP2(*MI) || isVOPC(*MI)) { 2197 legalizeOperandsVOP2(MRI, MI); 2198 return; 2199 } 2200 2201 // Legalize VOP3 2202 if (isVOP3(*MI)) { 2203 legalizeOperandsVOP3(MRI, MI); 2204 return; 2205 } 2206 2207 // Legalize SMRD 2208 if (isSMRD(*MI)) { 2209 legalizeOperandsSMRD(MRI, MI); 2210 return; 2211 } 2212 2213 // Legalize REG_SEQUENCE and PHI 2214 // The register class of the operands much be the same type as the register 2215 // class of the output. 2216 if (MI->getOpcode() == AMDGPU::PHI) { 2217 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 2218 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 2219 if (!MI->getOperand(i).isReg() || 2220 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 2221 continue; 2222 const TargetRegisterClass *OpRC = 2223 MRI.getRegClass(MI->getOperand(i).getReg()); 2224 if (RI.hasVGPRs(OpRC)) { 2225 VRC = OpRC; 2226 } else { 2227 SRC = OpRC; 2228 } 2229 } 2230 2231 // If any of the operands are VGPR registers, then they all most be 2232 // otherwise we will create illegal VGPR->SGPR copies when legalizing 2233 // them. 2234 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 2235 if (!VRC) { 2236 assert(SRC); 2237 VRC = RI.getEquivalentVGPRClass(SRC); 2238 } 2239 RC = VRC; 2240 } else { 2241 RC = SRC; 2242 } 2243 2244 // Update all the operands so they have the same type. 2245 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 2246 MachineOperand &Op = MI->getOperand(I); 2247 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2248 continue; 2249 unsigned DstReg = MRI.createVirtualRegister(RC); 2250 2251 // MI is a PHI instruction. 2252 MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB(); 2253 MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 2254 2255 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) 2256 .addOperand(Op); 2257 Op.setReg(DstReg); 2258 } 2259 } 2260 2261 // REG_SEQUENCE doesn't really require operand legalization, but if one has a 2262 // VGPR dest type and SGPR sources, insert copies so all operands are 2263 // VGPRs. This seems to help operand folding / the register coalescer. 2264 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 2265 MachineBasicBlock *MBB = MI->getParent(); 2266 const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0); 2267 if (RI.hasVGPRs(DstRC)) { 2268 // Update all the operands so they are VGPR register classes. These may 2269 // not be the same register class because REG_SEQUENCE supports mixing 2270 // subregister index types e.g. sub0_sub1 + sub2 + sub3 2271 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 2272 MachineOperand &Op = MI->getOperand(I); 2273 if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2274 continue; 2275 2276 const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 2277 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 2278 if (VRC == OpRC) 2279 continue; 2280 2281 unsigned DstReg = MRI.createVirtualRegister(VRC); 2282 2283 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) 2284 .addOperand(Op); 2285 2286 Op.setReg(DstReg); 2287 Op.setIsKill(); 2288 } 2289 } 2290 2291 return; 2292 } 2293 2294 // Legalize INSERT_SUBREG 2295 // src0 must have the same register class as dst 2296 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 2297 unsigned Dst = MI->getOperand(0).getReg(); 2298 unsigned Src0 = MI->getOperand(1).getReg(); 2299 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 2300 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 2301 if (DstRC != Src0RC) { 2302 MachineBasicBlock &MBB = *MI->getParent(); 2303 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 2304 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 2305 .addReg(Src0); 2306 MI->getOperand(1).setReg(NewSrc0); 2307 } 2308 return; 2309 } 2310 2311 // Legalize MIMG 2312 if (isMIMG(*MI)) { 2313 MachineOperand *SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 2314 if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { 2315 unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); 2316 SRsrc->setReg(SGPR); 2317 } 2318 2319 MachineOperand *SSamp = getNamedOperand(*MI, AMDGPU::OpName::ssamp); 2320 if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { 2321 unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); 2322 SSamp->setReg(SGPR); 2323 } 2324 return; 2325 } 2326 2327 // Legalize MUBUF* instructions 2328 // FIXME: If we start using the non-addr64 instructions for compute, we 2329 // may need to legalize them here. 2330 int SRsrcIdx = 2331 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 2332 if (SRsrcIdx != -1) { 2333 // We have an MUBUF instruction 2334 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); 2335 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; 2336 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 2337 RI.getRegClass(SRsrcRC))) { 2338 // The operands are legal. 2339 // FIXME: We may need to legalize operands besided srsrc. 2340 return; 2341 } 2342 2343 MachineBasicBlock &MBB = *MI->getParent(); 2344 2345 // Extract the ptr from the resource descriptor. 2346 unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, 2347 &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 2348 2349 // Create an empty resource descriptor 2350 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 2351 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2352 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2353 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2354 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2355 2356 // Zero64 = 0 2357 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 2358 Zero64) 2359 .addImm(0); 2360 2361 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 2362 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2363 SRsrcFormatLo) 2364 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2365 2366 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 2367 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2368 SRsrcFormatHi) 2369 .addImm(RsrcDataFormat >> 32); 2370 2371 // NewSRsrc = {Zero64, SRsrcFormat} 2372 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) 2373 .addReg(Zero64) 2374 .addImm(AMDGPU::sub0_sub1) 2375 .addReg(SRsrcFormatLo) 2376 .addImm(AMDGPU::sub2) 2377 .addReg(SRsrcFormatHi) 2378 .addImm(AMDGPU::sub3); 2379 2380 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 2381 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2382 if (VAddr) { 2383 // This is already an ADDR64 instruction so we need to add the pointer 2384 // extracted from the resource descriptor to the current value of VAddr. 2385 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2386 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2387 2388 // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 2389 DebugLoc DL = MI->getDebugLoc(); 2390 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) 2391 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2392 .addReg(VAddr->getReg(), 0, AMDGPU::sub0); 2393 2394 // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 2395 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) 2396 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2397 .addReg(VAddr->getReg(), 0, AMDGPU::sub1); 2398 2399 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2400 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2401 .addReg(NewVAddrLo) 2402 .addImm(AMDGPU::sub0) 2403 .addReg(NewVAddrHi) 2404 .addImm(AMDGPU::sub1); 2405 } else { 2406 // This instructions is the _OFFSET variant, so we need to convert it to 2407 // ADDR64. 2408 assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() 2409 < AMDGPUSubtarget::VOLCANIC_ISLANDS && 2410 "FIXME: Need to emit flat atomics here"); 2411 2412 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 2413 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 2414 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 2415 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 2416 2417 // Atomics rith return have have an additional tied operand and are 2418 // missing some of the special bits. 2419 MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in); 2420 MachineInstr *Addr64; 2421 2422 if (!VDataIn) { 2423 // Regular buffer load / store. 2424 MachineInstrBuilder MIB 2425 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 2426 .addOperand(*VData) 2427 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2428 // This will be replaced later 2429 // with the new value of vaddr. 2430 .addOperand(*SRsrc) 2431 .addOperand(*SOffset) 2432 .addOperand(*Offset); 2433 2434 // Atomics do not have this operand. 2435 if (const MachineOperand *GLC 2436 = getNamedOperand(*MI, AMDGPU::OpName::glc)) { 2437 MIB.addImm(GLC->getImm()); 2438 } 2439 2440 MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)); 2441 2442 if (const MachineOperand *TFE 2443 = getNamedOperand(*MI, AMDGPU::OpName::tfe)) { 2444 MIB.addImm(TFE->getImm()); 2445 } 2446 2447 MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 2448 Addr64 = MIB; 2449 } else { 2450 // Atomics with return. 2451 Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 2452 .addOperand(*VData) 2453 .addOperand(*VDataIn) 2454 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2455 // This will be replaced later 2456 // with the new value of vaddr. 2457 .addOperand(*SRsrc) 2458 .addOperand(*SOffset) 2459 .addOperand(*Offset) 2460 .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)) 2461 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 2462 } 2463 2464 MI->removeFromParent(); 2465 MI = Addr64; 2466 2467 // NewVaddr = {NewVaddrHi, NewVaddrLo} 2468 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2469 .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2470 .addImm(AMDGPU::sub0) 2471 .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2472 .addImm(AMDGPU::sub1); 2473 2474 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 2475 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 2476 } 2477 2478 // Update the instruction to use NewVaddr 2479 VAddr->setReg(NewVAddr); 2480 // Update the instruction to use NewSRsrc 2481 SRsrc->setReg(NewSRsrc); 2482 } 2483 } 2484 2485 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2486 SmallVector<MachineInstr *, 128> Worklist; 2487 Worklist.push_back(&TopInst); 2488 2489 while (!Worklist.empty()) { 2490 MachineInstr *Inst = Worklist.pop_back_val(); 2491 MachineBasicBlock *MBB = Inst->getParent(); 2492 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2493 2494 unsigned Opcode = Inst->getOpcode(); 2495 unsigned NewOpcode = getVALUOp(*Inst); 2496 2497 // Handle some special cases 2498 switch (Opcode) { 2499 default: 2500 break; 2501 case AMDGPU::S_AND_B64: 2502 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); 2503 Inst->eraseFromParent(); 2504 continue; 2505 2506 case AMDGPU::S_OR_B64: 2507 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); 2508 Inst->eraseFromParent(); 2509 continue; 2510 2511 case AMDGPU::S_XOR_B64: 2512 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); 2513 Inst->eraseFromParent(); 2514 continue; 2515 2516 case AMDGPU::S_NOT_B64: 2517 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); 2518 Inst->eraseFromParent(); 2519 continue; 2520 2521 case AMDGPU::S_BCNT1_I32_B64: 2522 splitScalar64BitBCNT(Worklist, Inst); 2523 Inst->eraseFromParent(); 2524 continue; 2525 2526 case AMDGPU::S_BFE_I64: { 2527 splitScalar64BitBFE(Worklist, Inst); 2528 Inst->eraseFromParent(); 2529 continue; 2530 } 2531 2532 case AMDGPU::S_LSHL_B32: 2533 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2534 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2535 swapOperands(Inst); 2536 } 2537 break; 2538 case AMDGPU::S_ASHR_I32: 2539 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2540 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2541 swapOperands(Inst); 2542 } 2543 break; 2544 case AMDGPU::S_LSHR_B32: 2545 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2546 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2547 swapOperands(Inst); 2548 } 2549 break; 2550 case AMDGPU::S_LSHL_B64: 2551 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2552 NewOpcode = AMDGPU::V_LSHLREV_B64; 2553 swapOperands(Inst); 2554 } 2555 break; 2556 case AMDGPU::S_ASHR_I64: 2557 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2558 NewOpcode = AMDGPU::V_ASHRREV_I64; 2559 swapOperands(Inst); 2560 } 2561 break; 2562 case AMDGPU::S_LSHR_B64: 2563 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2564 NewOpcode = AMDGPU::V_LSHRREV_B64; 2565 swapOperands(Inst); 2566 } 2567 break; 2568 2569 case AMDGPU::S_ABS_I32: 2570 lowerScalarAbs(Worklist, Inst); 2571 Inst->eraseFromParent(); 2572 continue; 2573 2574 case AMDGPU::S_CBRANCH_SCC0: 2575 case AMDGPU::S_CBRANCH_SCC1: 2576 // Clear unused bits of vcc 2577 BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) 2578 .addReg(AMDGPU::EXEC) 2579 .addReg(AMDGPU::VCC); 2580 break; 2581 2582 case AMDGPU::S_BFE_U64: 2583 case AMDGPU::S_BFM_B64: 2584 llvm_unreachable("Moving this op to VALU not implemented"); 2585 } 2586 2587 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2588 // We cannot move this instruction to the VALU, so we should try to 2589 // legalize its operands instead. 2590 legalizeOperands(Inst); 2591 continue; 2592 } 2593 2594 // Use the new VALU Opcode. 2595 const MCInstrDesc &NewDesc = get(NewOpcode); 2596 Inst->setDesc(NewDesc); 2597 2598 // Remove any references to SCC. Vector instructions can't read from it, and 2599 // We're just about to add the implicit use / defs of VCC, and we don't want 2600 // both. 2601 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 2602 MachineOperand &Op = Inst->getOperand(i); 2603 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { 2604 Inst->RemoveOperand(i); 2605 addSCCDefUsersToVALUWorklist(Inst, Worklist); 2606 } 2607 } 2608 2609 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2610 // We are converting these to a BFE, so we need to add the missing 2611 // operands for the size and offset. 2612 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2613 Inst->addOperand(MachineOperand::CreateImm(0)); 2614 Inst->addOperand(MachineOperand::CreateImm(Size)); 2615 2616 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2617 // The VALU version adds the second operand to the result, so insert an 2618 // extra 0 operand. 2619 Inst->addOperand(MachineOperand::CreateImm(0)); 2620 } 2621 2622 Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent()); 2623 2624 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2625 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 2626 // If we need to move this to VGPRs, we need to unpack the second operand 2627 // back into the 2 separate ones for bit offset and width. 2628 assert(OffsetWidthOp.isImm() && 2629 "Scalar BFE is only implemented for constant width and offset"); 2630 uint32_t Imm = OffsetWidthOp.getImm(); 2631 2632 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2633 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2634 Inst->RemoveOperand(2); // Remove old immediate. 2635 Inst->addOperand(MachineOperand::CreateImm(Offset)); 2636 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 2637 } 2638 2639 bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef(); 2640 unsigned NewDstReg = AMDGPU::NoRegister; 2641 if (HasDst) { 2642 // Update the destination register class. 2643 const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); 2644 if (!NewDstRC) 2645 continue; 2646 2647 unsigned DstReg = Inst->getOperand(0).getReg(); 2648 NewDstReg = MRI.createVirtualRegister(NewDstRC); 2649 MRI.replaceRegWith(DstReg, NewDstReg); 2650 } 2651 2652 // Legalize the operands 2653 legalizeOperands(Inst); 2654 2655 if (HasDst) 2656 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 2657 } 2658 } 2659 2660 //===----------------------------------------------------------------------===// 2661 // Indirect addressing callbacks 2662 //===----------------------------------------------------------------------===// 2663 2664 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2665 return &AMDGPU::VGPR_32RegClass; 2666 } 2667 2668 void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, 2669 MachineInstr *Inst) const { 2670 MachineBasicBlock &MBB = *Inst->getParent(); 2671 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2672 MachineBasicBlock::iterator MII = Inst; 2673 DebugLoc DL = Inst->getDebugLoc(); 2674 2675 MachineOperand &Dest = Inst->getOperand(0); 2676 MachineOperand &Src = Inst->getOperand(1); 2677 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2678 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2679 2680 BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) 2681 .addImm(0) 2682 .addReg(Src.getReg()); 2683 2684 BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 2685 .addReg(Src.getReg()) 2686 .addReg(TmpReg); 2687 2688 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2689 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2690 } 2691 2692 void SIInstrInfo::splitScalar64BitUnaryOp( 2693 SmallVectorImpl<MachineInstr *> &Worklist, 2694 MachineInstr *Inst, 2695 unsigned Opcode) const { 2696 MachineBasicBlock &MBB = *Inst->getParent(); 2697 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2698 2699 MachineOperand &Dest = Inst->getOperand(0); 2700 MachineOperand &Src0 = Inst->getOperand(1); 2701 DebugLoc DL = Inst->getDebugLoc(); 2702 2703 MachineBasicBlock::iterator MII = Inst; 2704 2705 const MCInstrDesc &InstDesc = get(Opcode); 2706 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2707 MRI.getRegClass(Src0.getReg()) : 2708 &AMDGPU::SGPR_32RegClass; 2709 2710 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2711 2712 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2713 AMDGPU::sub0, Src0SubRC); 2714 2715 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2716 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2717 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2718 2719 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2720 BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2721 .addOperand(SrcReg0Sub0); 2722 2723 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2724 AMDGPU::sub1, Src0SubRC); 2725 2726 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2727 BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2728 .addOperand(SrcReg0Sub1); 2729 2730 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2731 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2732 .addReg(DestSub0) 2733 .addImm(AMDGPU::sub0) 2734 .addReg(DestSub1) 2735 .addImm(AMDGPU::sub1); 2736 2737 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2738 2739 // We don't need to legalizeOperands here because for a single operand, src0 2740 // will support any kind of input. 2741 2742 // Move all users of this moved value. 2743 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2744 } 2745 2746 void SIInstrInfo::splitScalar64BitBinaryOp( 2747 SmallVectorImpl<MachineInstr *> &Worklist, 2748 MachineInstr *Inst, 2749 unsigned Opcode) const { 2750 MachineBasicBlock &MBB = *Inst->getParent(); 2751 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2752 2753 MachineOperand &Dest = Inst->getOperand(0); 2754 MachineOperand &Src0 = Inst->getOperand(1); 2755 MachineOperand &Src1 = Inst->getOperand(2); 2756 DebugLoc DL = Inst->getDebugLoc(); 2757 2758 MachineBasicBlock::iterator MII = Inst; 2759 2760 const MCInstrDesc &InstDesc = get(Opcode); 2761 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2762 MRI.getRegClass(Src0.getReg()) : 2763 &AMDGPU::SGPR_32RegClass; 2764 2765 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2766 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2767 MRI.getRegClass(Src1.getReg()) : 2768 &AMDGPU::SGPR_32RegClass; 2769 2770 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2771 2772 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2773 AMDGPU::sub0, Src0SubRC); 2774 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2775 AMDGPU::sub0, Src1SubRC); 2776 2777 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2778 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2779 const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2780 2781 unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2782 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2783 .addOperand(SrcReg0Sub0) 2784 .addOperand(SrcReg1Sub0); 2785 2786 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2787 AMDGPU::sub1, Src0SubRC); 2788 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2789 AMDGPU::sub1, Src1SubRC); 2790 2791 unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2792 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2793 .addOperand(SrcReg0Sub1) 2794 .addOperand(SrcReg1Sub1); 2795 2796 unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2797 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2798 .addReg(DestSub0) 2799 .addImm(AMDGPU::sub0) 2800 .addReg(DestSub1) 2801 .addImm(AMDGPU::sub1); 2802 2803 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2804 2805 // Try to legalize the operands in case we need to swap the order to keep it 2806 // valid. 2807 legalizeOperands(LoHalf); 2808 legalizeOperands(HiHalf); 2809 2810 // Move all users of this moved vlaue. 2811 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2812 } 2813 2814 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 2815 MachineInstr *Inst) const { 2816 MachineBasicBlock &MBB = *Inst->getParent(); 2817 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2818 2819 MachineBasicBlock::iterator MII = Inst; 2820 DebugLoc DL = Inst->getDebugLoc(); 2821 2822 MachineOperand &Dest = Inst->getOperand(0); 2823 MachineOperand &Src = Inst->getOperand(1); 2824 2825 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2826 const TargetRegisterClass *SrcRC = Src.isReg() ? 2827 MRI.getRegClass(Src.getReg()) : 2828 &AMDGPU::SGPR_32RegClass; 2829 2830 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2831 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2832 2833 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2834 2835 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2836 AMDGPU::sub0, SrcSubRC); 2837 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2838 AMDGPU::sub1, SrcSubRC); 2839 2840 BuildMI(MBB, MII, DL, InstDesc, MidReg) 2841 .addOperand(SrcRegSub0) 2842 .addImm(0); 2843 2844 BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2845 .addOperand(SrcRegSub1) 2846 .addReg(MidReg); 2847 2848 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2849 2850 // We don't need to legalize operands here. src0 for etiher instruction can be 2851 // an SGPR, and the second input is unused or determined here. 2852 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2853 } 2854 2855 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2856 MachineInstr *Inst) const { 2857 MachineBasicBlock &MBB = *Inst->getParent(); 2858 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2859 MachineBasicBlock::iterator MII = Inst; 2860 DebugLoc DL = Inst->getDebugLoc(); 2861 2862 MachineOperand &Dest = Inst->getOperand(0); 2863 uint32_t Imm = Inst->getOperand(2).getImm(); 2864 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2865 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2866 2867 (void) Offset; 2868 2869 // Only sext_inreg cases handled. 2870 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && 2871 BitWidth <= 32 && 2872 Offset == 0 && 2873 "Not implemented"); 2874 2875 if (BitWidth < 32) { 2876 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2877 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2878 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2879 2880 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2881 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) 2882 .addImm(0) 2883 .addImm(BitWidth); 2884 2885 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2886 .addImm(31) 2887 .addReg(MidRegLo); 2888 2889 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2890 .addReg(MidRegLo) 2891 .addImm(AMDGPU::sub0) 2892 .addReg(MidRegHi) 2893 .addImm(AMDGPU::sub1); 2894 2895 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2896 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2897 return; 2898 } 2899 2900 MachineOperand &Src = Inst->getOperand(1); 2901 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2902 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2903 2904 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2905 .addImm(31) 2906 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2907 2908 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2909 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2910 .addImm(AMDGPU::sub0) 2911 .addReg(TmpReg) 2912 .addImm(AMDGPU::sub1); 2913 2914 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2915 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2916 } 2917 2918 void SIInstrInfo::addUsersToMoveToVALUWorklist( 2919 unsigned DstReg, 2920 MachineRegisterInfo &MRI, 2921 SmallVectorImpl<MachineInstr *> &Worklist) const { 2922 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 2923 E = MRI.use_end(); I != E; ++I) { 2924 MachineInstr &UseMI = *I->getParent(); 2925 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2926 Worklist.push_back(&UseMI); 2927 } 2928 } 2929 } 2930 2931 void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst, 2932 SmallVectorImpl<MachineInstr *> &Worklist) const { 2933 // This assumes that all the users of SCC are in the same block 2934 // as the SCC def. 2935 for (MachineBasicBlock::iterator I = SCCDefInst, 2936 E = SCCDefInst->getParent()->end(); I != E; ++I) { 2937 2938 // Exit if we find another SCC def. 2939 if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1) 2940 return; 2941 2942 if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1) 2943 Worklist.push_back(I); 2944 } 2945 } 2946 2947 const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 2948 const MachineInstr &Inst) const { 2949 const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 2950 2951 switch (Inst.getOpcode()) { 2952 // For target instructions, getOpRegClass just returns the virtual register 2953 // class associated with the operand, so we need to find an equivalent VGPR 2954 // register class in order to move the instruction to the VALU. 2955 case AMDGPU::COPY: 2956 case AMDGPU::PHI: 2957 case AMDGPU::REG_SEQUENCE: 2958 case AMDGPU::INSERT_SUBREG: 2959 if (RI.hasVGPRs(NewDstRC)) 2960 return nullptr; 2961 2962 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2963 if (!NewDstRC) 2964 return nullptr; 2965 return NewDstRC; 2966 default: 2967 return NewDstRC; 2968 } 2969 } 2970 2971 // Find the one SGPR operand we are allowed to use. 2972 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, 2973 int OpIndices[3]) const { 2974 const MCInstrDesc &Desc = MI->getDesc(); 2975 2976 // Find the one SGPR operand we are allowed to use. 2977 // 2978 // First we need to consider the instruction's operand requirements before 2979 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2980 // of VCC, but we are still bound by the constant bus requirement to only use 2981 // one. 2982 // 2983 // If the operand's class is an SGPR, we can never move it. 2984 2985 unsigned SGPRReg = findImplicitSGPRRead(*MI); 2986 if (SGPRReg != AMDGPU::NoRegister) 2987 return SGPRReg; 2988 2989 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2990 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2991 2992 for (unsigned i = 0; i < 3; ++i) { 2993 int Idx = OpIndices[i]; 2994 if (Idx == -1) 2995 break; 2996 2997 const MachineOperand &MO = MI->getOperand(Idx); 2998 if (!MO.isReg()) 2999 continue; 3000 3001 // Is this operand statically required to be an SGPR based on the operand 3002 // constraints? 3003 const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); 3004 bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 3005 if (IsRequiredSGPR) 3006 return MO.getReg(); 3007 3008 // If this could be a VGPR or an SGPR, Check the dynamic register class. 3009 unsigned Reg = MO.getReg(); 3010 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 3011 if (RI.isSGPRClass(RegRC)) 3012 UsedSGPRs[i] = Reg; 3013 } 3014 3015 // We don't have a required SGPR operand, so we have a bit more freedom in 3016 // selecting operands to move. 3017 3018 // Try to select the most used SGPR. If an SGPR is equal to one of the 3019 // others, we choose that. 3020 // 3021 // e.g. 3022 // V_FMA_F32 v0, s0, s0, s0 -> No moves 3023 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 3024 3025 // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 3026 // prefer those. 3027 3028 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 3029 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 3030 SGPRReg = UsedSGPRs[0]; 3031 } 3032 3033 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 3034 if (UsedSGPRs[1] == UsedSGPRs[2]) 3035 SGPRReg = UsedSGPRs[1]; 3036 } 3037 3038 return SGPRReg; 3039 } 3040 3041 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 3042 const MachineFunction &MF) const { 3043 int End = getIndirectIndexEnd(MF); 3044 int Begin = getIndirectIndexBegin(MF); 3045 3046 if (End == -1) 3047 return; 3048 3049 3050 for (int Index = Begin; Index <= End; ++Index) 3051 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); 3052 3053 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 3054 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 3055 3056 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 3057 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 3058 3059 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 3060 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 3061 3062 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 3063 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 3064 3065 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 3066 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 3067 } 3068 3069 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 3070 unsigned OperandName) const { 3071 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 3072 if (Idx == -1) 3073 return nullptr; 3074 3075 return &MI.getOperand(Idx); 3076 } 3077 3078 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 3079 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 3080 if (ST.isAmdHsaOS()) { 3081 RsrcDataFormat |= (1ULL << 56); 3082 3083 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 3084 // Set MTYPE = 2 3085 RsrcDataFormat |= (2ULL << 59); 3086 } 3087 3088 return RsrcDataFormat; 3089 } 3090 3091 uint64_t SIInstrInfo::getScratchRsrcWords23() const { 3092 uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 3093 AMDGPU::RSRC_TID_ENABLE | 3094 0xffffffff; // Size; 3095 3096 uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; 3097 3098 Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) | 3099 // IndexStride = 64 3100 (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT); 3101 3102 // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 3103 // Clear them unless we want a huge stride. 3104 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 3105 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 3106 3107 return Rsrc23; 3108 } 3109 3110 bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const { 3111 unsigned Opc = MI->getOpcode(); 3112 3113 return isSMRD(Opc); 3114 } 3115 3116 bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const { 3117 unsigned Opc = MI->getOpcode(); 3118 3119 return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); 3120 } 3121 3122 unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 3123 unsigned Opc = MI.getOpcode(); 3124 const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); 3125 unsigned DescSize = Desc.getSize(); 3126 3127 // If we have a definitive size, we can use it. Otherwise we need to inspect 3128 // the operands to know the size. 3129 if (DescSize == 8 || DescSize == 4) 3130 return DescSize; 3131 3132 assert(DescSize == 0); 3133 3134 // 4-byte instructions may have a 32-bit literal encoded after them. Check 3135 // operands that coud ever be literals. 3136 if (isVALU(MI) || isSALU(MI)) { 3137 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3138 if (Src0Idx == -1) 3139 return 4; // No operands. 3140 3141 if (isLiteralConstant(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx))) 3142 return 8; 3143 3144 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 3145 if (Src1Idx == -1) 3146 return 4; 3147 3148 if (isLiteralConstant(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx))) 3149 return 8; 3150 3151 return 4; 3152 } 3153 3154 switch (Opc) { 3155 case TargetOpcode::IMPLICIT_DEF: 3156 case TargetOpcode::KILL: 3157 case TargetOpcode::DBG_VALUE: 3158 case TargetOpcode::BUNDLE: 3159 case TargetOpcode::EH_LABEL: 3160 return 0; 3161 case TargetOpcode::INLINEASM: { 3162 const MachineFunction *MF = MI.getParent()->getParent(); 3163 const char *AsmStr = MI.getOperand(0).getSymbolName(); 3164 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); 3165 } 3166 default: 3167 llvm_unreachable("unable to find instruction size"); 3168 } 3169 } 3170 3171 ArrayRef<std::pair<int, const char *>> 3172 SIInstrInfo::getSerializableTargetIndices() const { 3173 static const std::pair<int, const char *> TargetIndices[] = { 3174 {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 3175 {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 3176 {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 3177 {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 3178 {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 3179 return makeArrayRef(TargetIndices); 3180 } 3181 3182 /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 3183 /// post-RA version of misched uses CreateTargetMIHazardRecognizer. 3184 ScheduleHazardRecognizer * 3185 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 3186 const ScheduleDAG *DAG) const { 3187 return new GCNHazardRecognizer(DAG->MF); 3188 } 3189 3190 /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 3191 /// pass. 3192 ScheduleHazardRecognizer * 3193 SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 3194 return new GCNHazardRecognizer(MF); 3195 } 3196