1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIInstrInfo.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "SIDefines.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/CodeGen/RegisterScavenging.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/Support/Debug.h" 27 28 using namespace llvm; 29 30 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 31 : AMDGPUInstrInfo(st), RI() {} 32 33 //===----------------------------------------------------------------------===// 34 // TargetInstrInfo callbacks 35 //===----------------------------------------------------------------------===// 36 37 static unsigned getNumOperandsNoGlue(SDNode *Node) { 38 unsigned N = Node->getNumOperands(); 39 while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 40 --N; 41 return N; 42 } 43 44 static SDValue findChainOperand(SDNode *Load) { 45 SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 46 assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 47 return LastOp; 48 } 49 50 /// \brief Returns true if both nodes have the same value for the given 51 /// operand \p Op, or if both nodes do not have this operand. 52 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 53 unsigned Opc0 = N0->getMachineOpcode(); 54 unsigned Opc1 = N1->getMachineOpcode(); 55 56 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 57 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 58 59 if (Op0Idx == -1 && Op1Idx == -1) 60 return true; 61 62 63 if ((Op0Idx == -1 && Op1Idx != -1) || 64 (Op1Idx == -1 && Op0Idx != -1)) 65 return false; 66 67 // getNamedOperandIdx returns the index for the MachineInstr's operands, 68 // which includes the result as the first operand. We are indexing into the 69 // MachineSDNode's operands, so we need to skip the result operand to get 70 // the real index. 71 --Op0Idx; 72 --Op1Idx; 73 74 return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 75 } 76 77 bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 78 AliasAnalysis *AA) const { 79 // TODO: The generic check fails for VALU instructions that should be 80 // rematerializable due to implicit reads of exec. We really want all of the 81 // generic logic for this except for this. 82 switch (MI->getOpcode()) { 83 case AMDGPU::V_MOV_B32_e32: 84 case AMDGPU::V_MOV_B32_e64: 85 return true; 86 default: 87 return false; 88 } 89 } 90 91 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 92 int64_t &Offset0, 93 int64_t &Offset1) const { 94 if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 95 return false; 96 97 unsigned Opc0 = Load0->getMachineOpcode(); 98 unsigned Opc1 = Load1->getMachineOpcode(); 99 100 // Make sure both are actually loads. 101 if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 102 return false; 103 104 if (isDS(Opc0) && isDS(Opc1)) { 105 106 // FIXME: Handle this case: 107 if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 108 return false; 109 110 // Check base reg. 111 if (Load0->getOperand(1) != Load1->getOperand(1)) 112 return false; 113 114 // Check chain. 115 if (findChainOperand(Load0) != findChainOperand(Load1)) 116 return false; 117 118 // Skip read2 / write2 variants for simplicity. 119 // TODO: We should report true if the used offsets are adjacent (excluded 120 // st64 versions). 121 if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 122 AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 123 return false; 124 125 Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 126 Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 127 return true; 128 } 129 130 if (isSMRD(Opc0) && isSMRD(Opc1)) { 131 assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 132 133 // Check base reg. 134 if (Load0->getOperand(0) != Load1->getOperand(0)) 135 return false; 136 137 const ConstantSDNode *Load0Offset = 138 dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 139 const ConstantSDNode *Load1Offset = 140 dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 141 142 if (!Load0Offset || !Load1Offset) 143 return false; 144 145 // Check chain. 146 if (findChainOperand(Load0) != findChainOperand(Load1)) 147 return false; 148 149 Offset0 = Load0Offset->getZExtValue(); 150 Offset1 = Load1Offset->getZExtValue(); 151 return true; 152 } 153 154 // MUBUF and MTBUF can access the same addresses. 155 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 156 157 // MUBUF and MTBUF have vaddr at different indices. 158 if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 159 findChainOperand(Load0) != findChainOperand(Load1) || 160 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 161 !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 162 return false; 163 164 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 165 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 166 167 if (OffIdx0 == -1 || OffIdx1 == -1) 168 return false; 169 170 // getNamedOperandIdx returns the index for MachineInstrs. Since they 171 // inlcude the output in the operand list, but SDNodes don't, we need to 172 // subtract the index by one. 173 --OffIdx0; 174 --OffIdx1; 175 176 SDValue Off0 = Load0->getOperand(OffIdx0); 177 SDValue Off1 = Load1->getOperand(OffIdx1); 178 179 // The offset might be a FrameIndexSDNode. 180 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 181 return false; 182 183 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 184 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 185 return true; 186 } 187 188 return false; 189 } 190 191 static bool isStride64(unsigned Opc) { 192 switch (Opc) { 193 case AMDGPU::DS_READ2ST64_B32: 194 case AMDGPU::DS_READ2ST64_B64: 195 case AMDGPU::DS_WRITE2ST64_B32: 196 case AMDGPU::DS_WRITE2ST64_B64: 197 return true; 198 default: 199 return false; 200 } 201 } 202 203 bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 204 unsigned &Offset, 205 const TargetRegisterInfo *TRI) const { 206 unsigned Opc = LdSt->getOpcode(); 207 if (isDS(Opc)) { 208 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 209 AMDGPU::OpName::offset); 210 if (OffsetImm) { 211 // Normal, single offset LDS instruction. 212 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 213 AMDGPU::OpName::addr); 214 215 BaseReg = AddrReg->getReg(); 216 Offset = OffsetImm->getImm(); 217 return true; 218 } 219 220 // The 2 offset instructions use offset0 and offset1 instead. We can treat 221 // these as a load with a single offset if the 2 offsets are consecutive. We 222 // will use this for some partially aligned loads. 223 const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, 224 AMDGPU::OpName::offset0); 225 const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, 226 AMDGPU::OpName::offset1); 227 228 uint8_t Offset0 = Offset0Imm->getImm(); 229 uint8_t Offset1 = Offset1Imm->getImm(); 230 231 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 232 // Each of these offsets is in element sized units, so we need to convert 233 // to bytes of the individual reads. 234 235 unsigned EltSize; 236 if (LdSt->mayLoad()) 237 EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; 238 else { 239 assert(LdSt->mayStore()); 240 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 241 EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); 242 } 243 244 if (isStride64(Opc)) 245 EltSize *= 64; 246 247 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 248 AMDGPU::OpName::addr); 249 BaseReg = AddrReg->getReg(); 250 Offset = EltSize * Offset0; 251 return true; 252 } 253 254 return false; 255 } 256 257 if (isMUBUF(Opc) || isMTBUF(Opc)) { 258 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 259 return false; 260 261 const MachineOperand *AddrReg = getNamedOperand(*LdSt, 262 AMDGPU::OpName::vaddr); 263 if (!AddrReg) 264 return false; 265 266 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 267 AMDGPU::OpName::offset); 268 BaseReg = AddrReg->getReg(); 269 Offset = OffsetImm->getImm(); 270 return true; 271 } 272 273 if (isSMRD(Opc)) { 274 const MachineOperand *OffsetImm = getNamedOperand(*LdSt, 275 AMDGPU::OpName::offset); 276 if (!OffsetImm) 277 return false; 278 279 const MachineOperand *SBaseReg = getNamedOperand(*LdSt, 280 AMDGPU::OpName::sbase); 281 BaseReg = SBaseReg->getReg(); 282 Offset = OffsetImm->getImm(); 283 return true; 284 } 285 286 return false; 287 } 288 289 bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 290 MachineInstr *SecondLdSt, 291 unsigned NumLoads) const { 292 unsigned Opc0 = FirstLdSt->getOpcode(); 293 unsigned Opc1 = SecondLdSt->getOpcode(); 294 295 // TODO: This needs finer tuning 296 if (NumLoads > 4) 297 return false; 298 299 if (isDS(Opc0) && isDS(Opc1)) 300 return true; 301 302 if (isSMRD(Opc0) && isSMRD(Opc1)) 303 return true; 304 305 if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) 306 return true; 307 308 return false; 309 } 310 311 void 312 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 313 MachineBasicBlock::iterator MI, DebugLoc DL, 314 unsigned DestReg, unsigned SrcReg, 315 bool KillSrc) const { 316 317 // If we are trying to copy to or from SCC, there is a bug somewhere else in 318 // the backend. While it may be theoretically possible to do this, it should 319 // never be necessary. 320 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 321 322 static const int16_t Sub0_15[] = { 323 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 324 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 325 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 326 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 327 }; 328 329 static const int16_t Sub0_7[] = { 330 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 331 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 332 }; 333 334 static const int16_t Sub0_3[] = { 335 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 336 }; 337 338 static const int16_t Sub0_2[] = { 339 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 340 }; 341 342 static const int16_t Sub0_1[] = { 343 AMDGPU::sub0, AMDGPU::sub1, 0 344 }; 345 346 unsigned Opcode; 347 const int16_t *SubIndices; 348 349 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 350 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 351 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 352 .addReg(SrcReg, getKillRegState(KillSrc)); 353 return; 354 355 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 356 if (DestReg == AMDGPU::VCC) { 357 if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 358 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 359 .addReg(SrcReg, getKillRegState(KillSrc)); 360 } else { 361 // FIXME: Hack until VReg_1 removed. 362 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 363 BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) 364 .addImm(0) 365 .addReg(SrcReg, getKillRegState(KillSrc)); 366 } 367 368 return; 369 } 370 371 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 372 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 373 .addReg(SrcReg, getKillRegState(KillSrc)); 374 return; 375 376 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 377 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 378 Opcode = AMDGPU::S_MOV_B32; 379 SubIndices = Sub0_3; 380 381 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 382 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 383 Opcode = AMDGPU::S_MOV_B32; 384 SubIndices = Sub0_7; 385 386 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 387 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 388 Opcode = AMDGPU::S_MOV_B32; 389 SubIndices = Sub0_15; 390 391 } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 392 assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 393 AMDGPU::SReg_32RegClass.contains(SrcReg)); 394 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 395 .addReg(SrcReg, getKillRegState(KillSrc)); 396 return; 397 398 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 399 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 400 AMDGPU::SReg_64RegClass.contains(SrcReg)); 401 Opcode = AMDGPU::V_MOV_B32_e32; 402 SubIndices = Sub0_1; 403 404 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 405 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 406 Opcode = AMDGPU::V_MOV_B32_e32; 407 SubIndices = Sub0_2; 408 409 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 410 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 411 AMDGPU::SReg_128RegClass.contains(SrcReg)); 412 Opcode = AMDGPU::V_MOV_B32_e32; 413 SubIndices = Sub0_3; 414 415 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 416 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 417 AMDGPU::SReg_256RegClass.contains(SrcReg)); 418 Opcode = AMDGPU::V_MOV_B32_e32; 419 SubIndices = Sub0_7; 420 421 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 422 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 423 AMDGPU::SReg_512RegClass.contains(SrcReg)); 424 Opcode = AMDGPU::V_MOV_B32_e32; 425 SubIndices = Sub0_15; 426 427 } else { 428 llvm_unreachable("Can't copy register!"); 429 } 430 431 while (unsigned SubIdx = *SubIndices++) { 432 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 433 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 434 435 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 436 437 if (*SubIndices) 438 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 439 } 440 } 441 442 int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 443 const unsigned Opcode = MI.getOpcode(); 444 445 int NewOpc; 446 447 // Try to map original to commuted opcode 448 NewOpc = AMDGPU::getCommuteRev(Opcode); 449 if (NewOpc != -1) 450 // Check if the commuted (REV) opcode exists on the target. 451 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 452 453 // Try to map commuted to original opcode 454 NewOpc = AMDGPU::getCommuteOrig(Opcode); 455 if (NewOpc != -1) 456 // Check if the original (non-REV) opcode exists on the target. 457 return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 458 459 return Opcode; 460 } 461 462 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 463 464 if (DstRC->getSize() == 4) { 465 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 466 } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 467 return AMDGPU::S_MOV_B64; 468 } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 469 return AMDGPU::V_MOV_B64_PSEUDO; 470 } 471 return AMDGPU::COPY; 472 } 473 474 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 475 MachineBasicBlock::iterator MI, 476 unsigned SrcReg, bool isKill, 477 int FrameIndex, 478 const TargetRegisterClass *RC, 479 const TargetRegisterInfo *TRI) const { 480 MachineFunction *MF = MBB.getParent(); 481 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 482 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 483 DebugLoc DL = MBB.findDebugLoc(MI); 484 int Opcode = -1; 485 486 if (RI.isSGPRClass(RC)) { 487 // We are only allowed to create one new instruction when spilling 488 // registers, so we need to use pseudo instruction for spilling 489 // SGPRs. 490 switch (RC->getSize() * 8) { 491 case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; 492 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 493 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 494 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 495 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 496 } 497 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 498 MFI->setHasSpilledVGPRs(); 499 500 switch(RC->getSize() * 8) { 501 case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; 502 case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; 503 case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break; 504 case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break; 505 case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break; 506 case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break; 507 } 508 } 509 510 if (Opcode != -1) { 511 FrameInfo->setObjectAlignment(FrameIndex, 4); 512 BuildMI(MBB, MI, DL, get(Opcode)) 513 .addReg(SrcReg) 514 .addFrameIndex(FrameIndex) 515 // Place-holder registers, these will be filled in by 516 // SIPrepareScratchRegs. 517 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 518 .addReg(AMDGPU::SGPR0, RegState::Undef); 519 } else { 520 LLVMContext &Ctx = MF->getFunction()->getContext(); 521 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 522 " spill register"); 523 BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 524 .addReg(SrcReg); 525 } 526 } 527 528 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 529 MachineBasicBlock::iterator MI, 530 unsigned DestReg, int FrameIndex, 531 const TargetRegisterClass *RC, 532 const TargetRegisterInfo *TRI) const { 533 MachineFunction *MF = MBB.getParent(); 534 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 535 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 536 DebugLoc DL = MBB.findDebugLoc(MI); 537 int Opcode = -1; 538 539 if (RI.isSGPRClass(RC)){ 540 switch(RC->getSize() * 8) { 541 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; 542 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 543 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 544 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 545 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 546 } 547 } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { 548 switch(RC->getSize() * 8) { 549 case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; 550 case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; 551 case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break; 552 case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break; 553 case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break; 554 case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break; 555 } 556 } 557 558 if (Opcode != -1) { 559 FrameInfo->setObjectAlignment(FrameIndex, 4); 560 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 561 .addFrameIndex(FrameIndex) 562 // Place-holder registers, these will be filled in by 563 // SIPrepareScratchRegs. 564 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) 565 .addReg(AMDGPU::SGPR0, RegState::Undef); 566 567 } else { 568 LLVMContext &Ctx = MF->getFunction()->getContext(); 569 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 570 " restore register"); 571 BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 572 } 573 } 574 575 /// \param @Offset Offset in bytes of the FrameIndex being spilled 576 unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator MI, 578 RegScavenger *RS, unsigned TmpReg, 579 unsigned FrameOffset, 580 unsigned Size) const { 581 MachineFunction *MF = MBB.getParent(); 582 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 583 const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>(); 584 const SIRegisterInfo *TRI = 585 static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); 586 DebugLoc DL = MBB.findDebugLoc(MI); 587 unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 588 unsigned WavefrontSize = ST.getWavefrontSize(); 589 590 unsigned TIDReg = MFI->getTIDReg(); 591 if (!MFI->hasCalculatedTID()) { 592 MachineBasicBlock &Entry = MBB.getParent()->front(); 593 MachineBasicBlock::iterator Insert = Entry.front(); 594 DebugLoc DL = Insert->getDebugLoc(); 595 596 TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 597 if (TIDReg == AMDGPU::NoRegister) 598 return TIDReg; 599 600 601 if (MFI->getShaderType() == ShaderType::COMPUTE && 602 WorkGroupSize > WavefrontSize) { 603 604 unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X); 605 unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y); 606 unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); 607 unsigned InputPtrReg = 608 TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); 609 for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 610 if (!Entry.isLiveIn(Reg)) 611 Entry.addLiveIn(Reg); 612 } 613 614 RS->enterBasicBlock(&Entry); 615 unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 616 unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 617 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 618 .addReg(InputPtrReg) 619 .addImm(SI::KernelInputOffsets::NGROUPS_Z); 620 BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 621 .addReg(InputPtrReg) 622 .addImm(SI::KernelInputOffsets::NGROUPS_Y); 623 624 // NGROUPS.X * NGROUPS.Y 625 BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 626 .addReg(STmp1) 627 .addReg(STmp0); 628 // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 629 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 630 .addReg(STmp1) 631 .addReg(TIDIGXReg); 632 // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 633 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 634 .addReg(STmp0) 635 .addReg(TIDIGYReg) 636 .addReg(TIDReg); 637 // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 638 BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 639 .addReg(TIDReg) 640 .addReg(TIDIGZReg); 641 } else { 642 // Get the wave id 643 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 644 TIDReg) 645 .addImm(-1) 646 .addImm(0); 647 648 BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 649 TIDReg) 650 .addImm(-1) 651 .addReg(TIDReg); 652 } 653 654 BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 655 TIDReg) 656 .addImm(2) 657 .addReg(TIDReg); 658 MFI->setTIDReg(TIDReg); 659 } 660 661 // Add FrameIndex to LDS offset 662 unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 663 BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 664 .addImm(LDSOffset) 665 .addReg(TIDReg); 666 667 return TmpReg; 668 } 669 670 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 671 int Count) const { 672 while (Count > 0) { 673 int Arg; 674 if (Count >= 8) 675 Arg = 7; 676 else 677 Arg = Count - 1; 678 Count -= 8; 679 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) 680 .addImm(Arg); 681 } 682 } 683 684 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 685 MachineBasicBlock &MBB = *MI->getParent(); 686 DebugLoc DL = MBB.findDebugLoc(MI); 687 switch (MI->getOpcode()) { 688 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 689 690 case AMDGPU::SI_CONSTDATA_PTR: { 691 unsigned Reg = MI->getOperand(0).getReg(); 692 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); 693 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); 694 695 BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); 696 697 // Add 32-bit offset from this instruction to the start of the constant data. 698 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) 699 .addReg(RegLo) 700 .addTargetIndex(AMDGPU::TI_CONSTDATA_START) 701 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); 702 BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) 703 .addReg(RegHi) 704 .addImm(0) 705 .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) 706 .addReg(AMDGPU::SCC, RegState::Implicit); 707 MI->eraseFromParent(); 708 break; 709 } 710 case AMDGPU::SGPR_USE: 711 // This is just a placeholder for register allocation. 712 MI->eraseFromParent(); 713 break; 714 715 case AMDGPU::V_MOV_B64_PSEUDO: { 716 unsigned Dst = MI->getOperand(0).getReg(); 717 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 718 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 719 720 const MachineOperand &SrcOp = MI->getOperand(1); 721 // FIXME: Will this work for 64-bit floating point immediates? 722 assert(!SrcOp.isFPImm()); 723 if (SrcOp.isImm()) { 724 APInt Imm(64, SrcOp.getImm()); 725 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 726 .addImm(Imm.getLoBits(32).getZExtValue()) 727 .addReg(Dst, RegState::Implicit); 728 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 729 .addImm(Imm.getHiBits(32).getZExtValue()) 730 .addReg(Dst, RegState::Implicit); 731 } else { 732 assert(SrcOp.isReg()); 733 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 734 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 735 .addReg(Dst, RegState::Implicit); 736 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 737 .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 738 .addReg(Dst, RegState::Implicit); 739 } 740 MI->eraseFromParent(); 741 break; 742 } 743 744 case AMDGPU::V_CNDMASK_B64_PSEUDO: { 745 unsigned Dst = MI->getOperand(0).getReg(); 746 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 747 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 748 unsigned Src0 = MI->getOperand(1).getReg(); 749 unsigned Src1 = MI->getOperand(2).getReg(); 750 const MachineOperand &SrcCond = MI->getOperand(3); 751 752 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 753 .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 754 .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 755 .addOperand(SrcCond); 756 BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 757 .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 758 .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 759 .addOperand(SrcCond); 760 MI->eraseFromParent(); 761 break; 762 } 763 } 764 return true; 765 } 766 767 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 768 bool NewMI) const { 769 770 if (MI->getNumOperands() < 3) 771 return nullptr; 772 773 int CommutedOpcode = commuteOpcode(*MI); 774 if (CommutedOpcode == -1) 775 return nullptr; 776 777 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 778 AMDGPU::OpName::src0); 779 assert(Src0Idx != -1 && "Should always have src0 operand"); 780 781 MachineOperand &Src0 = MI->getOperand(Src0Idx); 782 if (!Src0.isReg()) 783 return nullptr; 784 785 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 786 AMDGPU::OpName::src1); 787 if (Src1Idx == -1) 788 return nullptr; 789 790 MachineOperand &Src1 = MI->getOperand(Src1Idx); 791 792 // Make sure it's legal to commute operands for VOP2. 793 if (isVOP2(MI->getOpcode()) && 794 (!isOperandLegal(MI, Src0Idx, &Src1) || 795 !isOperandLegal(MI, Src1Idx, &Src0))) { 796 return nullptr; 797 } 798 799 if (!Src1.isReg()) { 800 // Allow commuting instructions with Imm operands. 801 if (NewMI || !Src1.isImm() || 802 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 803 return nullptr; 804 } 805 806 // Be sure to copy the source modifiers to the right place. 807 if (MachineOperand *Src0Mods 808 = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { 809 MachineOperand *Src1Mods 810 = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); 811 812 int Src0ModsVal = Src0Mods->getImm(); 813 if (!Src1Mods && Src0ModsVal != 0) 814 return nullptr; 815 816 // XXX - This assert might be a lie. It might be useful to have a neg 817 // modifier with 0.0. 818 int Src1ModsVal = Src1Mods->getImm(); 819 assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 820 821 Src1Mods->setImm(Src0ModsVal); 822 Src0Mods->setImm(Src1ModsVal); 823 } 824 825 unsigned Reg = Src0.getReg(); 826 unsigned SubReg = Src0.getSubReg(); 827 if (Src1.isImm()) 828 Src0.ChangeToImmediate(Src1.getImm()); 829 else 830 llvm_unreachable("Should only have immediates"); 831 832 Src1.ChangeToRegister(Reg, false); 833 Src1.setSubReg(SubReg); 834 } else { 835 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 836 } 837 838 if (MI) 839 MI->setDesc(get(CommutedOpcode)); 840 841 return MI; 842 } 843 844 // This needs to be implemented because the source modifiers may be inserted 845 // between the true commutable operands, and the base 846 // TargetInstrInfo::commuteInstruction uses it. 847 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, 848 unsigned &SrcOpIdx1, 849 unsigned &SrcOpIdx2) const { 850 const MCInstrDesc &MCID = MI->getDesc(); 851 if (!MCID.isCommutable()) 852 return false; 853 854 unsigned Opc = MI->getOpcode(); 855 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 856 if (Src0Idx == -1) 857 return false; 858 859 // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 860 // immediate. 861 if (!MI->getOperand(Src0Idx).isReg()) 862 return false; 863 864 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 865 if (Src1Idx == -1) 866 return false; 867 868 if (!MI->getOperand(Src1Idx).isReg()) 869 return false; 870 871 // If any source modifiers are set, the generic instruction commuting won't 872 // understand how to copy the source modifiers. 873 if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || 874 hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) 875 return false; 876 877 SrcOpIdx1 = Src0Idx; 878 SrcOpIdx2 = Src1Idx; 879 return true; 880 } 881 882 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 883 MachineBasicBlock::iterator I, 884 unsigned DstReg, 885 unsigned SrcReg) const { 886 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 887 DstReg) .addReg(SrcReg); 888 } 889 890 bool SIInstrInfo::isMov(unsigned Opcode) const { 891 switch(Opcode) { 892 default: return false; 893 case AMDGPU::S_MOV_B32: 894 case AMDGPU::S_MOV_B64: 895 case AMDGPU::V_MOV_B32_e32: 896 case AMDGPU::V_MOV_B32_e64: 897 return true; 898 } 899 } 900 901 static void removeModOperands(MachineInstr &MI) { 902 unsigned Opc = MI.getOpcode(); 903 int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 904 AMDGPU::OpName::src0_modifiers); 905 int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 906 AMDGPU::OpName::src1_modifiers); 907 int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 908 AMDGPU::OpName::src2_modifiers); 909 910 MI.RemoveOperand(Src2ModIdx); 911 MI.RemoveOperand(Src1ModIdx); 912 MI.RemoveOperand(Src0ModIdx); 913 } 914 915 bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, 916 unsigned Reg, MachineRegisterInfo *MRI) const { 917 if (!MRI->hasOneNonDBGUse(Reg)) 918 return false; 919 920 unsigned Opc = UseMI->getOpcode(); 921 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 922 // Don't fold if we are using source modifiers. The new VOP2 instructions 923 // don't have them. 924 if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || 925 hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || 926 hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { 927 return false; 928 } 929 930 MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); 931 MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); 932 MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); 933 934 // Multiplied part is the constant: Use v_madmk_f32 935 // We should only expect these to be on src0 due to canonicalizations. 936 if (Src0->isReg() && Src0->getReg() == Reg) { 937 if (!Src1->isReg() || 938 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 939 return false; 940 941 if (!Src2->isReg() || 942 (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))) 943 return false; 944 945 // We need to do some weird looking operand shuffling since the madmk 946 // operands are out of the normal expected order with the multiplied 947 // constant as the last operand. 948 // 949 // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1 950 // src0 -> src2 K 951 // src1 -> src0 952 // src2 -> src1 953 954 const int64_t Imm = DefMI->getOperand(1).getImm(); 955 956 // FIXME: This would be a lot easier if we could return a new instruction 957 // instead of having to modify in place. 958 959 // Remove these first since they are at the end. 960 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 961 AMDGPU::OpName::omod)); 962 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 963 AMDGPU::OpName::clamp)); 964 965 unsigned Src1Reg = Src1->getReg(); 966 unsigned Src1SubReg = Src1->getSubReg(); 967 unsigned Src2Reg = Src2->getReg(); 968 unsigned Src2SubReg = Src2->getSubReg(); 969 Src0->setReg(Src1Reg); 970 Src0->setSubReg(Src1SubReg); 971 Src0->setIsKill(Src1->isKill()); 972 973 Src1->setReg(Src2Reg); 974 Src1->setSubReg(Src2SubReg); 975 Src1->setIsKill(Src2->isKill()); 976 977 if (Opc == AMDGPU::V_MAC_F32_e64) { 978 UseMI->untieRegOperand( 979 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 980 } 981 982 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 983 AMDGPU::OpName::src2)); 984 // ChangingToImmediate adds Src2 back to the instruction. 985 Src2->ChangeToImmediate(Imm); 986 987 removeModOperands(*UseMI); 988 UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); 989 990 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 991 if (DeleteDef) 992 DefMI->eraseFromParent(); 993 994 return true; 995 } 996 997 // Added part is the constant: Use v_madak_f32 998 if (Src2->isReg() && Src2->getReg() == Reg) { 999 // Not allowed to use constant bus for another operand. 1000 // We can however allow an inline immediate as src0. 1001 if (!Src0->isImm() && 1002 (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1003 return false; 1004 1005 if (!Src1->isReg() || 1006 (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) 1007 return false; 1008 1009 const int64_t Imm = DefMI->getOperand(1).getImm(); 1010 1011 // FIXME: This would be a lot easier if we could return a new instruction 1012 // instead of having to modify in place. 1013 1014 // Remove these first since they are at the end. 1015 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1016 AMDGPU::OpName::omod)); 1017 UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, 1018 AMDGPU::OpName::clamp)); 1019 1020 if (Opc == AMDGPU::V_MAC_F32_e64) { 1021 UseMI->untieRegOperand( 1022 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1023 } 1024 1025 // ChangingToImmediate adds Src2 back to the instruction. 1026 Src2->ChangeToImmediate(Imm); 1027 1028 // These come before src2. 1029 removeModOperands(*UseMI); 1030 UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); 1031 1032 bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1033 if (DeleteDef) 1034 DefMI->eraseFromParent(); 1035 1036 return true; 1037 } 1038 } 1039 1040 return false; 1041 } 1042 1043 static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 1044 int WidthB, int OffsetB) { 1045 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1046 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1047 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1048 return LowOffset + LowWidth <= HighOffset; 1049 } 1050 1051 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, 1052 MachineInstr *MIb) const { 1053 unsigned BaseReg0, Offset0; 1054 unsigned BaseReg1, Offset1; 1055 1056 if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 1057 getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1058 assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() && 1059 "read2 / write2 not expected here yet"); 1060 unsigned Width0 = (*MIa->memoperands_begin())->getSize(); 1061 unsigned Width1 = (*MIb->memoperands_begin())->getSize(); 1062 if (BaseReg0 == BaseReg1 && 1063 offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 1064 return true; 1065 } 1066 } 1067 1068 return false; 1069 } 1070 1071 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 1072 MachineInstr *MIb, 1073 AliasAnalysis *AA) const { 1074 unsigned Opc0 = MIa->getOpcode(); 1075 unsigned Opc1 = MIb->getOpcode(); 1076 1077 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 1078 "MIa must load from or modify a memory location"); 1079 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 1080 "MIb must load from or modify a memory location"); 1081 1082 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) 1083 return false; 1084 1085 // XXX - Can we relax this between address spaces? 1086 if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 1087 return false; 1088 1089 // TODO: Should we check the address space from the MachineMemOperand? That 1090 // would allow us to distinguish objects we know don't alias based on the 1091 // underlying address space, even if it was lowered to a different one, 1092 // e.g. private accesses lowered to use MUBUF instructions on a scratch 1093 // buffer. 1094 if (isDS(Opc0)) { 1095 if (isDS(Opc1)) 1096 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1097 1098 return !isFLAT(Opc1); 1099 } 1100 1101 if (isMUBUF(Opc0) || isMTBUF(Opc0)) { 1102 if (isMUBUF(Opc1) || isMTBUF(Opc1)) 1103 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1104 1105 return !isFLAT(Opc1) && !isSMRD(Opc1); 1106 } 1107 1108 if (isSMRD(Opc0)) { 1109 if (isSMRD(Opc1)) 1110 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1111 1112 return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0); 1113 } 1114 1115 if (isFLAT(Opc0)) { 1116 if (isFLAT(Opc1)) 1117 return checkInstOffsetsDoNotOverlap(MIa, MIb); 1118 1119 return false; 1120 } 1121 1122 return false; 1123 } 1124 1125 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1126 MachineBasicBlock::iterator &MI, 1127 LiveVariables *LV) const { 1128 1129 switch (MI->getOpcode()) { 1130 default: return nullptr; 1131 case AMDGPU::V_MAC_F32_e64: break; 1132 case AMDGPU::V_MAC_F32_e32: { 1133 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1134 if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1135 return nullptr; 1136 break; 1137 } 1138 } 1139 1140 const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst); 1141 const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); 1142 const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); 1143 const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); 1144 1145 return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1146 .addOperand(*Dst) 1147 .addImm(0) // Src0 mods 1148 .addOperand(*Src0) 1149 .addImm(0) // Src1 mods 1150 .addOperand(*Src1) 1151 .addImm(0) // Src mods 1152 .addOperand(*Src2) 1153 .addImm(0) // clamp 1154 .addImm(0); // omod 1155 } 1156 1157 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1158 int64_t SVal = Imm.getSExtValue(); 1159 if (SVal >= -16 && SVal <= 64) 1160 return true; 1161 1162 if (Imm.getBitWidth() == 64) { 1163 uint64_t Val = Imm.getZExtValue(); 1164 return (DoubleToBits(0.0) == Val) || 1165 (DoubleToBits(1.0) == Val) || 1166 (DoubleToBits(-1.0) == Val) || 1167 (DoubleToBits(0.5) == Val) || 1168 (DoubleToBits(-0.5) == Val) || 1169 (DoubleToBits(2.0) == Val) || 1170 (DoubleToBits(-2.0) == Val) || 1171 (DoubleToBits(4.0) == Val) || 1172 (DoubleToBits(-4.0) == Val); 1173 } 1174 1175 // The actual type of the operand does not seem to matter as long 1176 // as the bits match one of the inline immediate values. For example: 1177 // 1178 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1179 // so it is a legal inline immediate. 1180 // 1181 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1182 // floating-point, so it is a legal inline immediate. 1183 uint32_t Val = Imm.getZExtValue(); 1184 1185 return (FloatToBits(0.0f) == Val) || 1186 (FloatToBits(1.0f) == Val) || 1187 (FloatToBits(-1.0f) == Val) || 1188 (FloatToBits(0.5f) == Val) || 1189 (FloatToBits(-0.5f) == Val) || 1190 (FloatToBits(2.0f) == Val) || 1191 (FloatToBits(-2.0f) == Val) || 1192 (FloatToBits(4.0f) == Val) || 1193 (FloatToBits(-4.0f) == Val); 1194 } 1195 1196 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1197 unsigned OpSize) const { 1198 if (MO.isImm()) { 1199 // MachineOperand provides no way to tell the true operand size, since it 1200 // only records a 64-bit value. We need to know the size to determine if a 1201 // 32-bit floating point immediate bit pattern is legal for an integer 1202 // immediate. It would be for any 32-bit integer operand, but would not be 1203 // for a 64-bit one. 1204 1205 unsigned BitSize = 8 * OpSize; 1206 return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 1207 } 1208 1209 return false; 1210 } 1211 1212 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1213 unsigned OpSize) const { 1214 return MO.isImm() && !isInlineConstant(MO, OpSize); 1215 } 1216 1217 static bool compareMachineOp(const MachineOperand &Op0, 1218 const MachineOperand &Op1) { 1219 if (Op0.getType() != Op1.getType()) 1220 return false; 1221 1222 switch (Op0.getType()) { 1223 case MachineOperand::MO_Register: 1224 return Op0.getReg() == Op1.getReg(); 1225 case MachineOperand::MO_Immediate: 1226 return Op0.getImm() == Op1.getImm(); 1227 default: 1228 llvm_unreachable("Didn't expect to be comparing these operand types"); 1229 } 1230 } 1231 1232 bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, 1233 const MachineOperand &MO) const { 1234 const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; 1235 1236 assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1237 1238 if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 1239 return true; 1240 1241 if (OpInfo.RegClass < 0) 1242 return false; 1243 1244 unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1245 if (isLiteralConstant(MO, OpSize)) 1246 return RI.opCanUseLiteralConstant(OpInfo.OperandType); 1247 1248 return RI.opCanUseInlineConstant(OpInfo.OperandType); 1249 } 1250 1251 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1252 int Op32 = AMDGPU::getVOPe32(Opcode); 1253 if (Op32 == -1) 1254 return false; 1255 1256 return pseudoToMCOpcode(Op32) != -1; 1257 } 1258 1259 bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 1260 // The src0_modifier operand is present on all instructions 1261 // that have modifiers. 1262 1263 return AMDGPU::getNamedOperandIdx(Opcode, 1264 AMDGPU::OpName::src0_modifiers) != -1; 1265 } 1266 1267 bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 1268 unsigned OpName) const { 1269 const MachineOperand *Mods = getNamedOperand(MI, OpName); 1270 return Mods && Mods->getImm(); 1271 } 1272 1273 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1274 const MachineOperand &MO, 1275 unsigned OpSize) const { 1276 // Literal constants use the constant bus. 1277 if (isLiteralConstant(MO, OpSize)) 1278 return true; 1279 1280 if (!MO.isReg() || !MO.isUse()) 1281 return false; 1282 1283 if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 1284 return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 1285 1286 // FLAT_SCR is just an SGPR pair. 1287 if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 1288 return true; 1289 1290 // EXEC register uses the constant bus. 1291 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 1292 return true; 1293 1294 // SGPRs use the constant bus 1295 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 1296 (!MO.isImplicit() && 1297 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1298 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 1299 return true; 1300 } 1301 1302 return false; 1303 } 1304 1305 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 1306 StringRef &ErrInfo) const { 1307 uint16_t Opcode = MI->getOpcode(); 1308 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1309 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 1310 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 1311 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 1312 1313 // Make sure the number of operands is correct. 1314 const MCInstrDesc &Desc = get(Opcode); 1315 if (!Desc.isVariadic() && 1316 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 1317 ErrInfo = "Instruction has wrong number of operands."; 1318 return false; 1319 } 1320 1321 // Make sure the register classes are correct 1322 for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1323 if (MI->getOperand(i).isFPImm()) { 1324 ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1325 "all fp values to integers."; 1326 return false; 1327 } 1328 1329 int RegClass = Desc.OpInfo[i].RegClass; 1330 1331 switch (Desc.OpInfo[i].OperandType) { 1332 case MCOI::OPERAND_REGISTER: 1333 if (MI->getOperand(i).isImm()) { 1334 ErrInfo = "Illegal immediate value for operand."; 1335 return false; 1336 } 1337 break; 1338 case AMDGPU::OPERAND_REG_IMM32: 1339 break; 1340 case AMDGPU::OPERAND_REG_INLINE_C: 1341 if (isLiteralConstant(MI->getOperand(i), 1342 RI.getRegClass(RegClass)->getSize())) { 1343 ErrInfo = "Illegal immediate value for operand."; 1344 return false; 1345 } 1346 break; 1347 case MCOI::OPERAND_IMMEDIATE: 1348 // Check if this operand is an immediate. 1349 // FrameIndex operands will be replaced by immediates, so they are 1350 // allowed. 1351 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { 1352 ErrInfo = "Expected immediate, but got non-immediate"; 1353 return false; 1354 } 1355 // Fall-through 1356 default: 1357 continue; 1358 } 1359 1360 if (!MI->getOperand(i).isReg()) 1361 continue; 1362 1363 if (RegClass != -1) { 1364 unsigned Reg = MI->getOperand(i).getReg(); 1365 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1366 continue; 1367 1368 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 1369 if (!RC->contains(Reg)) { 1370 ErrInfo = "Operand has incorrect register class."; 1371 return false; 1372 } 1373 } 1374 } 1375 1376 1377 // Verify VOP* 1378 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 1379 // Only look at the true operands. Only a real operand can use the constant 1380 // bus, and we don't want to check pseudo-operands like the source modifier 1381 // flags. 1382 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1383 1384 unsigned ConstantBusCount = 0; 1385 unsigned SGPRUsed = AMDGPU::NoRegister; 1386 for (int OpIdx : OpIndices) { 1387 if (OpIdx == -1) 1388 break; 1389 const MachineOperand &MO = MI->getOperand(OpIdx); 1390 if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 1391 if (MO.isReg()) { 1392 if (MO.getReg() != SGPRUsed) 1393 ++ConstantBusCount; 1394 SGPRUsed = MO.getReg(); 1395 } else { 1396 ++ConstantBusCount; 1397 } 1398 } 1399 } 1400 if (ConstantBusCount > 1) { 1401 ErrInfo = "VOP* instruction uses the constant bus more than once"; 1402 return false; 1403 } 1404 } 1405 1406 // Verify misc. restrictions on specific instructions. 1407 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1408 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1409 const MachineOperand &Src0 = MI->getOperand(Src0Idx); 1410 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 1411 const MachineOperand &Src2 = MI->getOperand(Src2Idx); 1412 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1413 if (!compareMachineOp(Src0, Src1) && 1414 !compareMachineOp(Src0, Src2)) { 1415 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1416 return false; 1417 } 1418 } 1419 } 1420 1421 return true; 1422 } 1423 1424 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1425 switch (MI.getOpcode()) { 1426 default: return AMDGPU::INSTRUCTION_LIST_END; 1427 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1428 case AMDGPU::COPY: return AMDGPU::COPY; 1429 case AMDGPU::PHI: return AMDGPU::PHI; 1430 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 1431 case AMDGPU::S_MOV_B32: 1432 return MI.getOperand(1).isReg() ? 1433 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 1434 case AMDGPU::S_ADD_I32: 1435 case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 1436 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 1437 case AMDGPU::S_SUB_I32: 1438 case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 1439 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 1440 case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 1441 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 1442 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 1443 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 1444 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 1445 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 1446 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 1447 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1448 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1449 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1450 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1451 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1452 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1453 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1454 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1455 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1456 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1457 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 1458 case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1459 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1460 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1461 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1462 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1463 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1464 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1465 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1466 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1467 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1468 case AMDGPU::S_LOAD_DWORD_IMM: 1469 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 1470 case AMDGPU::S_LOAD_DWORDX2_IMM: 1471 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 1472 case AMDGPU::S_LOAD_DWORDX4_IMM: 1473 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 1474 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1475 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1476 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 1477 case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1478 } 1479 } 1480 1481 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1482 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1483 } 1484 1485 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1486 unsigned OpNo) const { 1487 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1488 const MCInstrDesc &Desc = get(MI.getOpcode()); 1489 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1490 Desc.OpInfo[OpNo].RegClass == -1) { 1491 unsigned Reg = MI.getOperand(OpNo).getReg(); 1492 1493 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1494 return MRI.getRegClass(Reg); 1495 return RI.getPhysRegClass(Reg); 1496 } 1497 1498 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1499 return RI.getRegClass(RCID); 1500 } 1501 1502 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1503 switch (MI.getOpcode()) { 1504 case AMDGPU::COPY: 1505 case AMDGPU::REG_SEQUENCE: 1506 case AMDGPU::PHI: 1507 case AMDGPU::INSERT_SUBREG: 1508 return RI.hasVGPRs(getOpRegClass(MI, 0)); 1509 default: 1510 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1511 } 1512 } 1513 1514 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 1515 MachineBasicBlock::iterator I = MI; 1516 MachineBasicBlock *MBB = MI->getParent(); 1517 MachineOperand &MO = MI->getOperand(OpIdx); 1518 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1519 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 1520 const TargetRegisterClass *RC = RI.getRegClass(RCID); 1521 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 1522 if (MO.isReg()) 1523 Opcode = AMDGPU::COPY; 1524 else if (RI.isSGPRClass(RC)) 1525 Opcode = AMDGPU::S_MOV_B32; 1526 1527 1528 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 1529 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 1530 VRC = &AMDGPU::VReg_64RegClass; 1531 else 1532 VRC = &AMDGPU::VGPR_32RegClass; 1533 1534 unsigned Reg = MRI.createVirtualRegister(VRC); 1535 DebugLoc DL = MBB->findDebugLoc(I); 1536 BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) 1537 .addOperand(MO); 1538 MO.ChangeToRegister(Reg, false); 1539 } 1540 1541 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 1542 MachineRegisterInfo &MRI, 1543 MachineOperand &SuperReg, 1544 const TargetRegisterClass *SuperRC, 1545 unsigned SubIdx, 1546 const TargetRegisterClass *SubRC) 1547 const { 1548 assert(SuperReg.isReg()); 1549 1550 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 1551 unsigned SubReg = MRI.createVirtualRegister(SubRC); 1552 1553 // Just in case the super register is itself a sub-register, copy it to a new 1554 // value so we don't need to worry about merging its subreg index with the 1555 // SubIdx passed to this function. The register coalescer should be able to 1556 // eliminate this extra copy. 1557 MachineBasicBlock *MBB = MI->getParent(); 1558 DebugLoc DL = MI->getDebugLoc(); 1559 1560 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 1561 .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 1562 1563 BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1564 .addReg(NewSuperReg, 0, SubIdx); 1565 1566 return SubReg; 1567 } 1568 1569 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 1570 MachineBasicBlock::iterator MII, 1571 MachineRegisterInfo &MRI, 1572 MachineOperand &Op, 1573 const TargetRegisterClass *SuperRC, 1574 unsigned SubIdx, 1575 const TargetRegisterClass *SubRC) const { 1576 if (Op.isImm()) { 1577 // XXX - Is there a better way to do this? 1578 if (SubIdx == AMDGPU::sub0) 1579 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 1580 if (SubIdx == AMDGPU::sub1) 1581 return MachineOperand::CreateImm(Op.getImm() >> 32); 1582 1583 llvm_unreachable("Unhandled register index for immediate"); 1584 } 1585 1586 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 1587 SubIdx, SubRC); 1588 return MachineOperand::CreateReg(SubReg, false); 1589 } 1590 1591 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, 1592 MachineBasicBlock::iterator MI, 1593 MachineRegisterInfo &MRI, 1594 const TargetRegisterClass *RC, 1595 const MachineOperand &Op) const { 1596 MachineBasicBlock *MBB = MI->getParent(); 1597 DebugLoc DL = MI->getDebugLoc(); 1598 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1599 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1600 unsigned Dst = MRI.createVirtualRegister(RC); 1601 1602 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1603 LoDst) 1604 .addImm(Op.getImm() & 0xFFFFFFFF); 1605 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 1606 HiDst) 1607 .addImm(Op.getImm() >> 32); 1608 1609 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) 1610 .addReg(LoDst) 1611 .addImm(AMDGPU::sub0) 1612 .addReg(HiDst) 1613 .addImm(AMDGPU::sub1); 1614 1615 Worklist.push_back(Lo); 1616 Worklist.push_back(Hi); 1617 1618 return Dst; 1619 } 1620 1621 // Change the order of operands from (0, 1, 2) to (0, 2, 1) 1622 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { 1623 assert(Inst->getNumExplicitOperands() == 3); 1624 MachineOperand Op1 = Inst->getOperand(1); 1625 Inst->RemoveOperand(1); 1626 Inst->addOperand(Op1); 1627 } 1628 1629 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, 1630 const MachineOperand *MO) const { 1631 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1632 const MCInstrDesc &InstDesc = get(MI->getOpcode()); 1633 const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 1634 const TargetRegisterClass *DefinedRC = 1635 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 1636 if (!MO) 1637 MO = &MI->getOperand(OpIdx); 1638 1639 if (isVALU(InstDesc.Opcode) && 1640 usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 1641 unsigned SGPRUsed = 1642 MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; 1643 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1644 if (i == OpIdx) 1645 continue; 1646 const MachineOperand &Op = MI->getOperand(i); 1647 if (Op.isReg() && Op.getReg() != SGPRUsed && 1648 usesConstantBus(MRI, Op, getOpSize(*MI, i))) { 1649 return false; 1650 } 1651 } 1652 } 1653 1654 if (MO->isReg()) { 1655 assert(DefinedRC); 1656 const TargetRegisterClass *RC = 1657 TargetRegisterInfo::isVirtualRegister(MO->getReg()) ? 1658 MRI.getRegClass(MO->getReg()) : 1659 RI.getPhysRegClass(MO->getReg()); 1660 1661 // In order to be legal, the common sub-class must be equal to the 1662 // class of the current operand. For example: 1663 // 1664 // v_mov_b32 s0 ; Operand defined as vsrc_32 1665 // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1666 // 1667 // s_sendmsg 0, s0 ; Operand defined as m0reg 1668 // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1669 1670 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1671 } 1672 1673 1674 // Handle non-register types that are treated like immediates. 1675 assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 1676 1677 if (!DefinedRC) { 1678 // This operand expects an immediate. 1679 return true; 1680 } 1681 1682 return isImmOperandLegal(MI, OpIdx, *MO); 1683 } 1684 1685 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 1686 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1687 1688 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1689 AMDGPU::OpName::src0); 1690 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1691 AMDGPU::OpName::src1); 1692 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1693 AMDGPU::OpName::src2); 1694 1695 // Legalize VOP2 1696 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 1697 // Legalize src0 1698 if (!isOperandLegal(MI, Src0Idx)) 1699 legalizeOpWithMove(MI, Src0Idx); 1700 1701 // Legalize src1 1702 if (isOperandLegal(MI, Src1Idx)) 1703 return; 1704 1705 // Usually src0 of VOP2 instructions allow more types of inputs 1706 // than src1, so try to commute the instruction to decrease our 1707 // chances of having to insert a MOV instruction to legalize src1. 1708 if (MI->isCommutable()) { 1709 if (commuteInstruction(MI)) 1710 // If we are successful in commuting, then we know MI is legal, so 1711 // we are done. 1712 return; 1713 } 1714 1715 legalizeOpWithMove(MI, Src1Idx); 1716 return; 1717 } 1718 1719 // XXX - Do any VOP3 instructions read VCC? 1720 // Legalize VOP3 1721 if (isVOP3(MI->getOpcode())) { 1722 int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; 1723 1724 // Find the one SGPR operand we are allowed to use. 1725 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 1726 1727 for (unsigned i = 0; i < 3; ++i) { 1728 int Idx = VOP3Idx[i]; 1729 if (Idx == -1) 1730 break; 1731 MachineOperand &MO = MI->getOperand(Idx); 1732 1733 if (MO.isReg()) { 1734 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 1735 continue; // VGPRs are legal 1736 1737 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 1738 1739 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 1740 SGPRReg = MO.getReg(); 1741 // We can use one SGPR in each VOP3 instruction. 1742 continue; 1743 } 1744 } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { 1745 // If it is not a register and not a literal constant, then it must be 1746 // an inline constant which is always legal. 1747 continue; 1748 } 1749 // If we make it this far, then the operand is not legal and we must 1750 // legalize it. 1751 legalizeOpWithMove(MI, Idx); 1752 } 1753 } 1754 1755 // Legalize REG_SEQUENCE and PHI 1756 // The register class of the operands much be the same type as the register 1757 // class of the output. 1758 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || 1759 MI->getOpcode() == AMDGPU::PHI) { 1760 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 1761 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1762 if (!MI->getOperand(i).isReg() || 1763 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1764 continue; 1765 const TargetRegisterClass *OpRC = 1766 MRI.getRegClass(MI->getOperand(i).getReg()); 1767 if (RI.hasVGPRs(OpRC)) { 1768 VRC = OpRC; 1769 } else { 1770 SRC = OpRC; 1771 } 1772 } 1773 1774 // If any of the operands are VGPR registers, then they all most be 1775 // otherwise we will create illegal VGPR->SGPR copies when legalizing 1776 // them. 1777 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 1778 if (!VRC) { 1779 assert(SRC); 1780 VRC = RI.getEquivalentVGPRClass(SRC); 1781 } 1782 RC = VRC; 1783 } else { 1784 RC = SRC; 1785 } 1786 1787 // Update all the operands so they have the same type. 1788 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 1789 if (!MI->getOperand(i).isReg() || 1790 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 1791 continue; 1792 unsigned DstReg = MRI.createVirtualRegister(RC); 1793 MachineBasicBlock *InsertBB; 1794 MachineBasicBlock::iterator Insert; 1795 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 1796 InsertBB = MI->getParent(); 1797 Insert = MI; 1798 } else { 1799 // MI is a PHI instruction. 1800 InsertBB = MI->getOperand(i + 1).getMBB(); 1801 Insert = InsertBB->getFirstTerminator(); 1802 } 1803 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), 1804 get(AMDGPU::COPY), DstReg) 1805 .addOperand(MI->getOperand(i)); 1806 MI->getOperand(i).setReg(DstReg); 1807 } 1808 } 1809 1810 // Legalize INSERT_SUBREG 1811 // src0 must have the same register class as dst 1812 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 1813 unsigned Dst = MI->getOperand(0).getReg(); 1814 unsigned Src0 = MI->getOperand(1).getReg(); 1815 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 1816 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 1817 if (DstRC != Src0RC) { 1818 MachineBasicBlock &MBB = *MI->getParent(); 1819 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 1820 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 1821 .addReg(Src0); 1822 MI->getOperand(1).setReg(NewSrc0); 1823 } 1824 return; 1825 } 1826 1827 // Legalize MUBUF* instructions 1828 // FIXME: If we start using the non-addr64 instructions for compute, we 1829 // may need to legalize them here. 1830 int SRsrcIdx = 1831 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 1832 if (SRsrcIdx != -1) { 1833 // We have an MUBUF instruction 1834 MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); 1835 unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; 1836 if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 1837 RI.getRegClass(SRsrcRC))) { 1838 // The operands are legal. 1839 // FIXME: We may need to legalize operands besided srsrc. 1840 return; 1841 } 1842 1843 MachineBasicBlock &MBB = *MI->getParent(); 1844 // Extract the ptr from the resource descriptor. 1845 1846 // SRsrcPtrLo = srsrc:sub0 1847 unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, 1848 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); 1849 1850 // SRsrcPtrHi = srsrc:sub1 1851 unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, 1852 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); 1853 1854 // Create an empty resource descriptor 1855 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1856 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1857 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1858 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1859 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 1860 1861 // Zero64 = 0 1862 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1863 Zero64) 1864 .addImm(0); 1865 1866 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1867 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1868 SRsrcFormatLo) 1869 .addImm(RsrcDataFormat & 0xFFFFFFFF); 1870 1871 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1872 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1873 SRsrcFormatHi) 1874 .addImm(RsrcDataFormat >> 32); 1875 1876 // NewSRsrc = {Zero64, SRsrcFormat} 1877 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1878 NewSRsrc) 1879 .addReg(Zero64) 1880 .addImm(AMDGPU::sub0_sub1) 1881 .addReg(SRsrcFormatLo) 1882 .addImm(AMDGPU::sub2) 1883 .addReg(SRsrcFormatHi) 1884 .addImm(AMDGPU::sub3); 1885 1886 MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1887 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 1888 unsigned NewVAddrLo; 1889 unsigned NewVAddrHi; 1890 if (VAddr) { 1891 // This is already an ADDR64 instruction so we need to add the pointer 1892 // extracted from the resource descriptor to the current value of VAddr. 1893 NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1894 NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1895 1896 // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 1897 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 1898 NewVAddrLo) 1899 .addReg(SRsrcPtrLo) 1900 .addReg(VAddr->getReg(), 0, AMDGPU::sub0) 1901 .addReg(AMDGPU::VCC, RegState::ImplicitDefine); 1902 1903 // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 1904 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), 1905 NewVAddrHi) 1906 .addReg(SRsrcPtrHi) 1907 .addReg(VAddr->getReg(), 0, AMDGPU::sub1) 1908 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) 1909 .addReg(AMDGPU::VCC, RegState::Implicit); 1910 1911 } else { 1912 // This instructions is the _OFFSET variant, so we need to convert it to 1913 // ADDR64. 1914 MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); 1915 MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); 1916 MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); 1917 1918 // Create the new instruction. 1919 unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); 1920 MachineInstr *Addr64 = 1921 BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) 1922 .addOperand(*VData) 1923 .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 1924 // This will be replaced later 1925 // with the new value of vaddr. 1926 .addOperand(*SRsrc) 1927 .addOperand(*SOffset) 1928 .addOperand(*Offset) 1929 .addImm(0) // glc 1930 .addImm(0) // slc 1931 .addImm(0); // tfe 1932 1933 MI->removeFromParent(); 1934 MI = Addr64; 1935 1936 NewVAddrLo = SRsrcPtrLo; 1937 NewVAddrHi = SRsrcPtrHi; 1938 VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); 1939 SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); 1940 } 1941 1942 // NewVaddr = {NewVaddrHi, NewVaddrLo} 1943 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1944 NewVAddr) 1945 .addReg(NewVAddrLo) 1946 .addImm(AMDGPU::sub0) 1947 .addReg(NewVAddrHi) 1948 .addImm(AMDGPU::sub1); 1949 1950 1951 // Update the instruction to use NewVaddr 1952 VAddr->setReg(NewVAddr); 1953 // Update the instruction to use NewSRsrc 1954 SRsrc->setReg(NewSRsrc); 1955 } 1956 } 1957 1958 void SIInstrInfo::splitSMRD(MachineInstr *MI, 1959 const TargetRegisterClass *HalfRC, 1960 unsigned HalfImmOp, unsigned HalfSGPROp, 1961 MachineInstr *&Lo, MachineInstr *&Hi) const { 1962 1963 DebugLoc DL = MI->getDebugLoc(); 1964 MachineBasicBlock *MBB = MI->getParent(); 1965 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1966 unsigned RegLo = MRI.createVirtualRegister(HalfRC); 1967 unsigned RegHi = MRI.createVirtualRegister(HalfRC); 1968 unsigned HalfSize = HalfRC->getSize(); 1969 const MachineOperand *OffOp = 1970 getNamedOperand(*MI, AMDGPU::OpName::offset); 1971 const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); 1972 1973 // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes 1974 // on VI. 1975 1976 bool IsKill = SBase->isKill(); 1977 if (OffOp) { 1978 bool isVI = 1979 MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >= 1980 AMDGPUSubtarget::VOLCANIC_ISLANDS; 1981 unsigned OffScale = isVI ? 1 : 4; 1982 // Handle the _IMM variant 1983 unsigned LoOffset = OffOp->getImm() * OffScale; 1984 unsigned HiOffset = LoOffset + HalfSize; 1985 Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo) 1986 // Use addReg instead of addOperand 1987 // to make sure kill flag is cleared. 1988 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 1989 .addImm(LoOffset / OffScale); 1990 1991 if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) { 1992 unsigned OffsetSGPR = 1993 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 1994 BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR) 1995 .addImm(HiOffset); // The offset in register is in bytes. 1996 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) 1997 .addReg(SBase->getReg(), getKillRegState(IsKill), 1998 SBase->getSubReg()) 1999 .addReg(OffsetSGPR); 2000 } else { 2001 Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi) 2002 .addReg(SBase->getReg(), getKillRegState(IsKill), 2003 SBase->getSubReg()) 2004 .addImm(HiOffset / OffScale); 2005 } 2006 } else { 2007 // Handle the _SGPR variant 2008 MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff); 2009 Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo) 2010 .addReg(SBase->getReg(), 0, SBase->getSubReg()) 2011 .addOperand(*SOff); 2012 unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 2013 BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) 2014 .addOperand(*SOff) 2015 .addImm(HalfSize); 2016 Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) 2017 .addReg(SBase->getReg(), getKillRegState(IsKill), 2018 SBase->getSubReg()) 2019 .addReg(OffsetSGPR); 2020 } 2021 2022 unsigned SubLo, SubHi; 2023 switch (HalfSize) { 2024 case 4: 2025 SubLo = AMDGPU::sub0; 2026 SubHi = AMDGPU::sub1; 2027 break; 2028 case 8: 2029 SubLo = AMDGPU::sub0_sub1; 2030 SubHi = AMDGPU::sub2_sub3; 2031 break; 2032 case 16: 2033 SubLo = AMDGPU::sub0_sub1_sub2_sub3; 2034 SubHi = AMDGPU::sub4_sub5_sub6_sub7; 2035 break; 2036 case 32: 2037 SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 2038 SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15; 2039 break; 2040 default: 2041 llvm_unreachable("Unhandled HalfSize"); 2042 } 2043 2044 BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE)) 2045 .addOperand(MI->getOperand(0)) 2046 .addReg(RegLo) 2047 .addImm(SubLo) 2048 .addReg(RegHi) 2049 .addImm(SubHi); 2050 } 2051 2052 void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { 2053 MachineBasicBlock *MBB = MI->getParent(); 2054 int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); 2055 assert(DstIdx != -1); 2056 unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass; 2057 switch(RI.getRegClass(DstRCID)->getSize()) { 2058 case 4: 2059 case 8: 2060 case 16: { 2061 unsigned NewOpcode = getVALUOp(*MI); 2062 unsigned RegOffset; 2063 unsigned ImmOffset; 2064 2065 if (MI->getOperand(2).isReg()) { 2066 RegOffset = MI->getOperand(2).getReg(); 2067 ImmOffset = 0; 2068 } else { 2069 assert(MI->getOperand(2).isImm()); 2070 // SMRD instructions take a dword offsets on SI and byte offset on VI 2071 // and MUBUF instructions always take a byte offset. 2072 ImmOffset = MI->getOperand(2).getImm(); 2073 if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <= 2074 AMDGPUSubtarget::SEA_ISLANDS) 2075 ImmOffset <<= 2; 2076 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2077 2078 if (isUInt<12>(ImmOffset)) { 2079 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2080 RegOffset) 2081 .addImm(0); 2082 } else { 2083 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 2084 RegOffset) 2085 .addImm(ImmOffset); 2086 ImmOffset = 0; 2087 } 2088 } 2089 2090 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2091 unsigned DWord0 = RegOffset; 2092 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2093 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2094 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2095 uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 2096 2097 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 2098 .addImm(0); 2099 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 2100 .addImm(RsrcDataFormat & 0xFFFFFFFF); 2101 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 2102 .addImm(RsrcDataFormat >> 32); 2103 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 2104 .addReg(DWord0) 2105 .addImm(AMDGPU::sub0) 2106 .addReg(DWord1) 2107 .addImm(AMDGPU::sub1) 2108 .addReg(DWord2) 2109 .addImm(AMDGPU::sub2) 2110 .addReg(DWord3) 2111 .addImm(AMDGPU::sub3); 2112 MI->setDesc(get(NewOpcode)); 2113 if (MI->getOperand(2).isReg()) { 2114 MI->getOperand(2).setReg(SRsrc); 2115 } else { 2116 MI->getOperand(2).ChangeToRegister(SRsrc, false); 2117 } 2118 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); 2119 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 2120 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc 2121 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc 2122 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe 2123 2124 const TargetRegisterClass *NewDstRC = 2125 RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); 2126 2127 unsigned DstReg = MI->getOperand(0).getReg(); 2128 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2129 MRI.replaceRegWith(DstReg, NewDstReg); 2130 break; 2131 } 2132 case 32: { 2133 MachineInstr *Lo, *Hi; 2134 splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM, 2135 AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi); 2136 MI->eraseFromParent(); 2137 moveSMRDToVALU(Lo, MRI); 2138 moveSMRDToVALU(Hi, MRI); 2139 break; 2140 } 2141 2142 case 64: { 2143 MachineInstr *Lo, *Hi; 2144 splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM, 2145 AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi); 2146 MI->eraseFromParent(); 2147 moveSMRDToVALU(Lo, MRI); 2148 moveSMRDToVALU(Hi, MRI); 2149 break; 2150 } 2151 } 2152 } 2153 2154 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2155 SmallVector<MachineInstr *, 128> Worklist; 2156 Worklist.push_back(&TopInst); 2157 2158 while (!Worklist.empty()) { 2159 MachineInstr *Inst = Worklist.pop_back_val(); 2160 MachineBasicBlock *MBB = Inst->getParent(); 2161 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 2162 2163 unsigned Opcode = Inst->getOpcode(); 2164 unsigned NewOpcode = getVALUOp(*Inst); 2165 2166 // Handle some special cases 2167 switch (Opcode) { 2168 default: 2169 if (isSMRD(Inst->getOpcode())) { 2170 moveSMRDToVALU(Inst, MRI); 2171 } 2172 break; 2173 case AMDGPU::S_MOV_B64: { 2174 DebugLoc DL = Inst->getDebugLoc(); 2175 2176 // If the source operand is a register we can replace this with a 2177 // copy. 2178 if (Inst->getOperand(1).isReg()) { 2179 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) 2180 .addOperand(Inst->getOperand(0)) 2181 .addOperand(Inst->getOperand(1)); 2182 Worklist.push_back(Copy); 2183 } else { 2184 // Otherwise, we need to split this into two movs, because there is 2185 // no 64-bit VALU move instruction. 2186 unsigned Reg = Inst->getOperand(0).getReg(); 2187 unsigned Dst = split64BitImm(Worklist, 2188 Inst, 2189 MRI, 2190 MRI.getRegClass(Reg), 2191 Inst->getOperand(1)); 2192 MRI.replaceRegWith(Reg, Dst); 2193 } 2194 Inst->eraseFromParent(); 2195 continue; 2196 } 2197 case AMDGPU::S_AND_B64: 2198 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); 2199 Inst->eraseFromParent(); 2200 continue; 2201 2202 case AMDGPU::S_OR_B64: 2203 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); 2204 Inst->eraseFromParent(); 2205 continue; 2206 2207 case AMDGPU::S_XOR_B64: 2208 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); 2209 Inst->eraseFromParent(); 2210 continue; 2211 2212 case AMDGPU::S_NOT_B64: 2213 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 2214 Inst->eraseFromParent(); 2215 continue; 2216 2217 case AMDGPU::S_BCNT1_I32_B64: 2218 splitScalar64BitBCNT(Worklist, Inst); 2219 Inst->eraseFromParent(); 2220 continue; 2221 2222 case AMDGPU::S_BFE_I64: { 2223 splitScalar64BitBFE(Worklist, Inst); 2224 Inst->eraseFromParent(); 2225 continue; 2226 } 2227 2228 case AMDGPU::S_LSHL_B32: 2229 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2230 NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2231 swapOperands(Inst); 2232 } 2233 break; 2234 case AMDGPU::S_ASHR_I32: 2235 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2236 NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2237 swapOperands(Inst); 2238 } 2239 break; 2240 case AMDGPU::S_LSHR_B32: 2241 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2242 NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2243 swapOperands(Inst); 2244 } 2245 break; 2246 case AMDGPU::S_LSHL_B64: 2247 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2248 NewOpcode = AMDGPU::V_LSHLREV_B64; 2249 swapOperands(Inst); 2250 } 2251 break; 2252 case AMDGPU::S_ASHR_I64: 2253 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2254 NewOpcode = AMDGPU::V_ASHRREV_I64; 2255 swapOperands(Inst); 2256 } 2257 break; 2258 case AMDGPU::S_LSHR_B64: 2259 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 2260 NewOpcode = AMDGPU::V_LSHRREV_B64; 2261 swapOperands(Inst); 2262 } 2263 break; 2264 2265 case AMDGPU::S_BFE_U64: 2266 case AMDGPU::S_BFM_B64: 2267 llvm_unreachable("Moving this op to VALU not implemented"); 2268 } 2269 2270 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 2271 // We cannot move this instruction to the VALU, so we should try to 2272 // legalize its operands instead. 2273 legalizeOperands(Inst); 2274 continue; 2275 } 2276 2277 // Use the new VALU Opcode. 2278 const MCInstrDesc &NewDesc = get(NewOpcode); 2279 Inst->setDesc(NewDesc); 2280 2281 // Remove any references to SCC. Vector instructions can't read from it, and 2282 // We're just about to add the implicit use / defs of VCC, and we don't want 2283 // both. 2284 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 2285 MachineOperand &Op = Inst->getOperand(i); 2286 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 2287 Inst->RemoveOperand(i); 2288 } 2289 2290 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2291 // We are converting these to a BFE, so we need to add the missing 2292 // operands for the size and offset. 2293 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2294 Inst->addOperand(MachineOperand::CreateImm(0)); 2295 Inst->addOperand(MachineOperand::CreateImm(Size)); 2296 2297 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2298 // The VALU version adds the second operand to the result, so insert an 2299 // extra 0 operand. 2300 Inst->addOperand(MachineOperand::CreateImm(0)); 2301 } 2302 2303 Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent()); 2304 2305 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2306 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 2307 // If we need to move this to VGPRs, we need to unpack the second operand 2308 // back into the 2 separate ones for bit offset and width. 2309 assert(OffsetWidthOp.isImm() && 2310 "Scalar BFE is only implemented for constant width and offset"); 2311 uint32_t Imm = OffsetWidthOp.getImm(); 2312 2313 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2314 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2315 Inst->RemoveOperand(2); // Remove old immediate. 2316 Inst->addOperand(MachineOperand::CreateImm(Offset)); 2317 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 2318 } 2319 2320 // Update the destination register class. 2321 2322 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 2323 2324 switch (Opcode) { 2325 // For target instructions, getOpRegClass just returns the virtual 2326 // register class associated with the operand, so we need to find an 2327 // equivalent VGPR register class in order to move the instruction to the 2328 // VALU. 2329 case AMDGPU::COPY: 2330 case AMDGPU::PHI: 2331 case AMDGPU::REG_SEQUENCE: 2332 case AMDGPU::INSERT_SUBREG: 2333 if (RI.hasVGPRs(NewDstRC)) 2334 continue; 2335 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2336 if (!NewDstRC) 2337 continue; 2338 break; 2339 default: 2340 break; 2341 } 2342 2343 unsigned DstReg = Inst->getOperand(0).getReg(); 2344 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 2345 MRI.replaceRegWith(DstReg, NewDstReg); 2346 2347 // Legalize the operands 2348 legalizeOperands(Inst); 2349 2350 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 2351 E = MRI.use_end(); I != E; ++I) { 2352 MachineInstr &UseMI = *I->getParent(); 2353 if (!canReadVGPR(UseMI, I.getOperandNo())) { 2354 Worklist.push_back(&UseMI); 2355 } 2356 } 2357 } 2358 } 2359 2360 //===----------------------------------------------------------------------===// 2361 // Indirect addressing callbacks 2362 //===----------------------------------------------------------------------===// 2363 2364 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 2365 unsigned Channel) const { 2366 assert(Channel == 0); 2367 return RegIndex; 2368 } 2369 2370 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 2371 return &AMDGPU::VGPR_32RegClass; 2372 } 2373 2374 void SIInstrInfo::splitScalar64BitUnaryOp( 2375 SmallVectorImpl<MachineInstr *> &Worklist, 2376 MachineInstr *Inst, 2377 unsigned Opcode) const { 2378 MachineBasicBlock &MBB = *Inst->getParent(); 2379 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2380 2381 MachineOperand &Dest = Inst->getOperand(0); 2382 MachineOperand &Src0 = Inst->getOperand(1); 2383 DebugLoc DL = Inst->getDebugLoc(); 2384 2385 MachineBasicBlock::iterator MII = Inst; 2386 2387 const MCInstrDesc &InstDesc = get(Opcode); 2388 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2389 MRI.getRegClass(Src0.getReg()) : 2390 &AMDGPU::SGPR_32RegClass; 2391 2392 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2393 2394 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2395 AMDGPU::sub0, Src0SubRC); 2396 2397 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2398 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2399 2400 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2401 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2402 .addOperand(SrcReg0Sub0); 2403 2404 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2405 AMDGPU::sub1, Src0SubRC); 2406 2407 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2408 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2409 .addOperand(SrcReg0Sub1); 2410 2411 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2412 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2413 .addReg(DestSub0) 2414 .addImm(AMDGPU::sub0) 2415 .addReg(DestSub1) 2416 .addImm(AMDGPU::sub1); 2417 2418 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2419 2420 // Try to legalize the operands in case we need to swap the order to keep it 2421 // valid. 2422 Worklist.push_back(LoHalf); 2423 Worklist.push_back(HiHalf); 2424 } 2425 2426 void SIInstrInfo::splitScalar64BitBinaryOp( 2427 SmallVectorImpl<MachineInstr *> &Worklist, 2428 MachineInstr *Inst, 2429 unsigned Opcode) const { 2430 MachineBasicBlock &MBB = *Inst->getParent(); 2431 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2432 2433 MachineOperand &Dest = Inst->getOperand(0); 2434 MachineOperand &Src0 = Inst->getOperand(1); 2435 MachineOperand &Src1 = Inst->getOperand(2); 2436 DebugLoc DL = Inst->getDebugLoc(); 2437 2438 MachineBasicBlock::iterator MII = Inst; 2439 2440 const MCInstrDesc &InstDesc = get(Opcode); 2441 const TargetRegisterClass *Src0RC = Src0.isReg() ? 2442 MRI.getRegClass(Src0.getReg()) : 2443 &AMDGPU::SGPR_32RegClass; 2444 2445 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2446 const TargetRegisterClass *Src1RC = Src1.isReg() ? 2447 MRI.getRegClass(Src1.getReg()) : 2448 &AMDGPU::SGPR_32RegClass; 2449 2450 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 2451 2452 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2453 AMDGPU::sub0, Src0SubRC); 2454 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2455 AMDGPU::sub0, Src1SubRC); 2456 2457 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2458 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 2459 2460 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 2461 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2462 .addOperand(SrcReg0Sub0) 2463 .addOperand(SrcReg1Sub0); 2464 2465 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2466 AMDGPU::sub1, Src0SubRC); 2467 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 2468 AMDGPU::sub1, Src1SubRC); 2469 2470 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 2471 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2472 .addOperand(SrcReg0Sub1) 2473 .addOperand(SrcReg1Sub1); 2474 2475 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 2476 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2477 .addReg(DestSub0) 2478 .addImm(AMDGPU::sub0) 2479 .addReg(DestSub1) 2480 .addImm(AMDGPU::sub1); 2481 2482 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2483 2484 // Try to legalize the operands in case we need to swap the order to keep it 2485 // valid. 2486 Worklist.push_back(LoHalf); 2487 Worklist.push_back(HiHalf); 2488 } 2489 2490 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 2491 MachineInstr *Inst) const { 2492 MachineBasicBlock &MBB = *Inst->getParent(); 2493 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2494 2495 MachineBasicBlock::iterator MII = Inst; 2496 DebugLoc DL = Inst->getDebugLoc(); 2497 2498 MachineOperand &Dest = Inst->getOperand(0); 2499 MachineOperand &Src = Inst->getOperand(1); 2500 2501 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2502 const TargetRegisterClass *SrcRC = Src.isReg() ? 2503 MRI.getRegClass(Src.getReg()) : 2504 &AMDGPU::SGPR_32RegClass; 2505 2506 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2507 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2508 2509 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2510 2511 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2512 AMDGPU::sub0, SrcSubRC); 2513 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2514 AMDGPU::sub1, SrcSubRC); 2515 2516 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) 2517 .addOperand(SrcRegSub0) 2518 .addImm(0); 2519 2520 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2521 .addOperand(SrcRegSub1) 2522 .addReg(MidReg); 2523 2524 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2525 2526 Worklist.push_back(First); 2527 Worklist.push_back(Second); 2528 } 2529 2530 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2531 MachineInstr *Inst) const { 2532 MachineBasicBlock &MBB = *Inst->getParent(); 2533 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2534 MachineBasicBlock::iterator MII = Inst; 2535 DebugLoc DL = Inst->getDebugLoc(); 2536 2537 MachineOperand &Dest = Inst->getOperand(0); 2538 uint32_t Imm = Inst->getOperand(2).getImm(); 2539 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2540 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2541 2542 (void) Offset; 2543 2544 // Only sext_inreg cases handled. 2545 assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && 2546 BitWidth <= 32 && 2547 Offset == 0 && 2548 "Not implemented"); 2549 2550 if (BitWidth < 32) { 2551 unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2552 unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2553 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2554 2555 BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2556 .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) 2557 .addImm(0) 2558 .addImm(BitWidth); 2559 2560 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 2561 .addImm(31) 2562 .addReg(MidRegLo); 2563 2564 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2565 .addReg(MidRegLo) 2566 .addImm(AMDGPU::sub0) 2567 .addReg(MidRegHi) 2568 .addImm(AMDGPU::sub1); 2569 2570 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2571 return; 2572 } 2573 2574 MachineOperand &Src = Inst->getOperand(1); 2575 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2576 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 2577 2578 BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 2579 .addImm(31) 2580 .addReg(Src.getReg(), 0, AMDGPU::sub0); 2581 2582 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 2583 .addReg(Src.getReg(), 0, AMDGPU::sub0) 2584 .addImm(AMDGPU::sub0) 2585 .addReg(TmpReg) 2586 .addImm(AMDGPU::sub1); 2587 2588 MRI.replaceRegWith(Dest.getReg(), ResultReg); 2589 } 2590 2591 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, 2592 int OpIndices[3]) const { 2593 const MCInstrDesc &Desc = get(MI->getOpcode()); 2594 2595 // Find the one SGPR operand we are allowed to use. 2596 unsigned SGPRReg = AMDGPU::NoRegister; 2597 2598 // First we need to consider the instruction's operand requirements before 2599 // legalizing. Some operands are required to be SGPRs, such as implicit uses 2600 // of VCC, but we are still bound by the constant bus requirement to only use 2601 // one. 2602 // 2603 // If the operand's class is an SGPR, we can never move it. 2604 2605 for (const MachineOperand &MO : MI->implicit_operands()) { 2606 // We only care about reads. 2607 if (MO.isDef()) 2608 continue; 2609 2610 if (MO.getReg() == AMDGPU::VCC) 2611 return AMDGPU::VCC; 2612 2613 if (MO.getReg() == AMDGPU::FLAT_SCR) 2614 return AMDGPU::FLAT_SCR; 2615 } 2616 2617 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2618 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 2619 2620 for (unsigned i = 0; i < 3; ++i) { 2621 int Idx = OpIndices[i]; 2622 if (Idx == -1) 2623 break; 2624 2625 const MachineOperand &MO = MI->getOperand(Idx); 2626 if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) 2627 SGPRReg = MO.getReg(); 2628 2629 if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2630 UsedSGPRs[i] = MO.getReg(); 2631 } 2632 2633 if (SGPRReg != AMDGPU::NoRegister) 2634 return SGPRReg; 2635 2636 // We don't have a required SGPR operand, so we have a bit more freedom in 2637 // selecting operands to move. 2638 2639 // Try to select the most used SGPR. If an SGPR is equal to one of the 2640 // others, we choose that. 2641 // 2642 // e.g. 2643 // V_FMA_F32 v0, s0, s0, s0 -> No moves 2644 // V_FMA_F32 v0, s0, s1, s0 -> Move s1 2645 2646 if (UsedSGPRs[0] != AMDGPU::NoRegister) { 2647 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 2648 SGPRReg = UsedSGPRs[0]; 2649 } 2650 2651 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 2652 if (UsedSGPRs[1] == UsedSGPRs[2]) 2653 SGPRReg = UsedSGPRs[1]; 2654 } 2655 2656 return SGPRReg; 2657 } 2658 2659 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 2660 MachineBasicBlock *MBB, 2661 MachineBasicBlock::iterator I, 2662 unsigned ValueReg, 2663 unsigned Address, unsigned OffsetReg) const { 2664 const DebugLoc &DL = MBB->findDebugLoc(I); 2665 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2666 getIndirectIndexBegin(*MBB->getParent())); 2667 2668 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 2669 .addReg(IndirectBaseReg, RegState::Define) 2670 .addOperand(I->getOperand(0)) 2671 .addReg(IndirectBaseReg) 2672 .addReg(OffsetReg) 2673 .addImm(0) 2674 .addReg(ValueReg); 2675 } 2676 2677 MachineInstrBuilder SIInstrInfo::buildIndirectRead( 2678 MachineBasicBlock *MBB, 2679 MachineBasicBlock::iterator I, 2680 unsigned ValueReg, 2681 unsigned Address, unsigned OffsetReg) const { 2682 const DebugLoc &DL = MBB->findDebugLoc(I); 2683 unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( 2684 getIndirectIndexBegin(*MBB->getParent())); 2685 2686 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 2687 .addOperand(I->getOperand(0)) 2688 .addOperand(I->getOperand(1)) 2689 .addReg(IndirectBaseReg) 2690 .addReg(OffsetReg) 2691 .addImm(0); 2692 2693 } 2694 2695 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 2696 const MachineFunction &MF) const { 2697 int End = getIndirectIndexEnd(MF); 2698 int Begin = getIndirectIndexBegin(MF); 2699 2700 if (End == -1) 2701 return; 2702 2703 2704 for (int Index = Begin; Index <= End; ++Index) 2705 Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index)); 2706 2707 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 2708 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 2709 2710 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 2711 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 2712 2713 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 2714 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 2715 2716 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 2717 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 2718 2719 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 2720 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 2721 } 2722 2723 MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 2724 unsigned OperandName) const { 2725 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 2726 if (Idx == -1) 2727 return nullptr; 2728 2729 return &MI.getOperand(Idx); 2730 } 2731 2732 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 2733 uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 2734 if (ST.isAmdHsaOS()) { 2735 RsrcDataFormat |= (1ULL << 56); 2736 2737 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 2738 // Set MTYPE = 2 2739 RsrcDataFormat |= (2ULL << 59); 2740 } 2741 2742 return RsrcDataFormat; 2743 } 2744