1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// R600 Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "R600InstrInfo.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUInstrInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "R600Defines.h" 20 #include "R600FrameLowering.h" 21 #include "R600RegisterInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "Utils/AMDGPUBaseInfo.h" 24 #include "llvm/ADT/BitVector.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineFrameInfo.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/TargetRegisterInfo.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include <algorithm> 38 #include <cassert> 39 #include <cstdint> 40 #include <cstring> 41 #include <iterator> 42 #include <utility> 43 #include <vector> 44 45 using namespace llvm; 46 47 #define GET_INSTRINFO_CTOR_DTOR 48 #include "AMDGPUGenDFAPacketizer.inc" 49 50 R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) 51 : AMDGPUInstrInfo(ST), RI(), ST(ST) {} 52 53 bool R600InstrInfo::isVector(const MachineInstr &MI) const { 54 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 55 } 56 57 void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 58 MachineBasicBlock::iterator MI, 59 const DebugLoc &DL, unsigned DestReg, 60 unsigned SrcReg, bool KillSrc) const { 61 unsigned VectorComponents = 0; 62 if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || 63 AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && 64 (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || 65 AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { 66 VectorComponents = 4; 67 } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || 68 AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && 69 (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || 70 AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { 71 VectorComponents = 2; 72 } 73 74 if (VectorComponents > 0) { 75 for (unsigned I = 0; I < VectorComponents; I++) { 76 unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I); 77 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 78 RI.getSubReg(DestReg, SubRegIndex), 79 RI.getSubReg(SrcReg, SubRegIndex)) 80 .addReg(DestReg, 81 RegState::Define | RegState::Implicit); 82 } 83 } else { 84 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 85 DestReg, SrcReg); 86 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 87 .setIsKill(KillSrc); 88 } 89 } 90 91 /// \returns true if \p MBBI can be moved into a new basic. 92 bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 93 MachineBasicBlock::iterator MBBI) const { 94 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 95 E = MBBI->operands_end(); I != E; ++I) { 96 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 97 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 98 return false; 99 } 100 return true; 101 } 102 103 bool R600InstrInfo::isMov(unsigned Opcode) const { 104 switch(Opcode) { 105 default: 106 return false; 107 case AMDGPU::MOV: 108 case AMDGPU::MOV_IMM_F32: 109 case AMDGPU::MOV_IMM_I32: 110 return true; 111 } 112 } 113 114 bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 115 return false; 116 } 117 118 bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 119 switch(Opcode) { 120 default: return false; 121 case AMDGPU::CUBE_r600_pseudo: 122 case AMDGPU::CUBE_r600_real: 123 case AMDGPU::CUBE_eg_pseudo: 124 case AMDGPU::CUBE_eg_real: 125 return true; 126 } 127 } 128 129 bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 130 unsigned TargetFlags = get(Opcode).TSFlags; 131 132 return (TargetFlags & R600_InstFlag::ALU_INST); 133 } 134 135 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 136 unsigned TargetFlags = get(Opcode).TSFlags; 137 138 return ((TargetFlags & R600_InstFlag::OP1) | 139 (TargetFlags & R600_InstFlag::OP2) | 140 (TargetFlags & R600_InstFlag::OP3)); 141 } 142 143 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 144 unsigned TargetFlags = get(Opcode).TSFlags; 145 146 return ((TargetFlags & R600_InstFlag::LDS_1A) | 147 (TargetFlags & R600_InstFlag::LDS_1A1D) | 148 (TargetFlags & R600_InstFlag::LDS_1A2D)); 149 } 150 151 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 152 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 153 } 154 155 bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { 156 if (isALUInstr(MI.getOpcode())) 157 return true; 158 if (isVector(MI) || isCubeOp(MI.getOpcode())) 159 return true; 160 switch (MI.getOpcode()) { 161 case AMDGPU::PRED_X: 162 case AMDGPU::INTERP_PAIR_XY: 163 case AMDGPU::INTERP_PAIR_ZW: 164 case AMDGPU::INTERP_VEC_LOAD: 165 case AMDGPU::COPY: 166 case AMDGPU::DOT_4: 167 return true; 168 default: 169 return false; 170 } 171 } 172 173 bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 174 if (ST.hasCaymanISA()) 175 return false; 176 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 177 } 178 179 bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { 180 return isTransOnly(MI.getOpcode()); 181 } 182 183 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 184 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 185 } 186 187 bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { 188 return isVectorOnly(MI.getOpcode()); 189 } 190 191 bool R600InstrInfo::isExport(unsigned Opcode) const { 192 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 193 } 194 195 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 196 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 197 } 198 199 bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { 200 const MachineFunction *MF = MI.getParent()->getParent(); 201 return !AMDGPU::isCompute(MF->getFunction().getCallingConv()) && 202 usesVertexCache(MI.getOpcode()); 203 } 204 205 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 206 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 207 } 208 209 bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { 210 const MachineFunction *MF = MI.getParent()->getParent(); 211 return (AMDGPU::isCompute(MF->getFunction().getCallingConv()) && 212 usesVertexCache(MI.getOpcode())) || 213 usesTextureCache(MI.getOpcode()); 214 } 215 216 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 217 switch (Opcode) { 218 case AMDGPU::KILLGT: 219 case AMDGPU::GROUP_BARRIER: 220 return true; 221 default: 222 return false; 223 } 224 } 225 226 bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { 227 return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 228 } 229 230 bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { 231 return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 232 } 233 234 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { 235 if (!isALUInstr(MI.getOpcode())) { 236 return false; 237 } 238 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 239 E = MI.operands_end(); 240 I != E; ++I) { 241 if (!I->isReg() || !I->isUse() || 242 TargetRegisterInfo::isVirtualRegister(I->getReg())) 243 continue; 244 245 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 246 return true; 247 } 248 return false; 249 } 250 251 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 252 static const unsigned SrcSelTable[][2] = { 253 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 254 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 255 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 256 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 257 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 258 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 259 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 260 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 261 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 262 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 263 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 264 }; 265 266 for (const auto &Row : SrcSelTable) { 267 if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) { 268 return getOperandIdx(Opcode, Row[1]); 269 } 270 } 271 return -1; 272 } 273 274 SmallVector<std::pair<MachineOperand *, int64_t>, 3> 275 R600InstrInfo::getSrcs(MachineInstr &MI) const { 276 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 277 278 if (MI.getOpcode() == AMDGPU::DOT_4) { 279 static const unsigned OpTable[8][2] = { 280 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 281 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 282 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 283 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 284 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 285 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 286 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 287 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 288 }; 289 290 for (unsigned j = 0; j < 8; j++) { 291 MachineOperand &MO = 292 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); 293 unsigned Reg = MO.getReg(); 294 if (Reg == AMDGPU::ALU_CONST) { 295 MachineOperand &Sel = 296 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); 297 Result.push_back(std::make_pair(&MO, Sel.getImm())); 298 continue; 299 } 300 301 } 302 return Result; 303 } 304 305 static const unsigned OpTable[3][2] = { 306 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 307 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 308 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 309 }; 310 311 for (unsigned j = 0; j < 3; j++) { 312 int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]); 313 if (SrcIdx < 0) 314 break; 315 MachineOperand &MO = MI.getOperand(SrcIdx); 316 unsigned Reg = MO.getReg(); 317 if (Reg == AMDGPU::ALU_CONST) { 318 MachineOperand &Sel = 319 MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); 320 Result.push_back(std::make_pair(&MO, Sel.getImm())); 321 continue; 322 } 323 if (Reg == AMDGPU::ALU_LITERAL_X) { 324 MachineOperand &Operand = 325 MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); 326 if (Operand.isImm()) { 327 Result.push_back(std::make_pair(&MO, Operand.getImm())); 328 continue; 329 } 330 assert(Operand.isGlobal()); 331 } 332 Result.push_back(std::make_pair(&MO, 0)); 333 } 334 return Result; 335 } 336 337 std::vector<std::pair<int, unsigned>> 338 R600InstrInfo::ExtractSrcs(MachineInstr &MI, 339 const DenseMap<unsigned, unsigned> &PV, 340 unsigned &ConstCount) const { 341 ConstCount = 0; 342 const std::pair<int, unsigned> DummyPair(-1, 0); 343 std::vector<std::pair<int, unsigned>> Result; 344 unsigned i = 0; 345 for (const auto &Src : getSrcs(MI)) { 346 ++i; 347 unsigned Reg = Src.first->getReg(); 348 int Index = RI.getEncodingValue(Reg) & 0xff; 349 if (Reg == AMDGPU::OQAP) { 350 Result.push_back(std::make_pair(Index, 0U)); 351 } 352 if (PV.find(Reg) != PV.end()) { 353 // 255 is used to tells its a PS/PV reg 354 Result.push_back(std::make_pair(255, 0U)); 355 continue; 356 } 357 if (Index > 127) { 358 ConstCount++; 359 Result.push_back(DummyPair); 360 continue; 361 } 362 unsigned Chan = RI.getHWRegChan(Reg); 363 Result.push_back(std::make_pair(Index, Chan)); 364 } 365 for (; i < 3; ++i) 366 Result.push_back(DummyPair); 367 return Result; 368 } 369 370 static std::vector<std::pair<int, unsigned>> 371 Swizzle(std::vector<std::pair<int, unsigned>> Src, 372 R600InstrInfo::BankSwizzle Swz) { 373 if (Src[0] == Src[1]) 374 Src[1].first = -1; 375 switch (Swz) { 376 case R600InstrInfo::ALU_VEC_012_SCL_210: 377 break; 378 case R600InstrInfo::ALU_VEC_021_SCL_122: 379 std::swap(Src[1], Src[2]); 380 break; 381 case R600InstrInfo::ALU_VEC_102_SCL_221: 382 std::swap(Src[0], Src[1]); 383 break; 384 case R600InstrInfo::ALU_VEC_120_SCL_212: 385 std::swap(Src[0], Src[1]); 386 std::swap(Src[0], Src[2]); 387 break; 388 case R600InstrInfo::ALU_VEC_201: 389 std::swap(Src[0], Src[2]); 390 std::swap(Src[0], Src[1]); 391 break; 392 case R600InstrInfo::ALU_VEC_210: 393 std::swap(Src[0], Src[2]); 394 break; 395 } 396 return Src; 397 } 398 399 static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 400 switch (Swz) { 401 case R600InstrInfo::ALU_VEC_012_SCL_210: { 402 unsigned Cycles[3] = { 2, 1, 0}; 403 return Cycles[Op]; 404 } 405 case R600InstrInfo::ALU_VEC_021_SCL_122: { 406 unsigned Cycles[3] = { 1, 2, 2}; 407 return Cycles[Op]; 408 } 409 case R600InstrInfo::ALU_VEC_120_SCL_212: { 410 unsigned Cycles[3] = { 2, 1, 2}; 411 return Cycles[Op]; 412 } 413 case R600InstrInfo::ALU_VEC_102_SCL_221: { 414 unsigned Cycles[3] = { 2, 2, 1}; 415 return Cycles[Op]; 416 } 417 default: 418 llvm_unreachable("Wrong Swizzle for Trans Slot"); 419 } 420 } 421 422 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 423 /// in the same Instruction Group while meeting read port limitations given a 424 /// Swz swizzle sequence. 425 unsigned R600InstrInfo::isLegalUpTo( 426 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, 427 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 428 const std::vector<std::pair<int, unsigned>> &TransSrcs, 429 R600InstrInfo::BankSwizzle TransSwz) const { 430 int Vector[4][3]; 431 memset(Vector, -1, sizeof(Vector)); 432 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 433 const std::vector<std::pair<int, unsigned>> &Srcs = 434 Swizzle(IGSrcs[i], Swz[i]); 435 for (unsigned j = 0; j < 3; j++) { 436 const std::pair<int, unsigned> &Src = Srcs[j]; 437 if (Src.first < 0 || Src.first == 255) 438 continue; 439 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 440 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 441 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 442 // The value from output queue A (denoted by register OQAP) can 443 // only be fetched during the first cycle. 444 return false; 445 } 446 // OQAP does not count towards the normal read port restrictions 447 continue; 448 } 449 if (Vector[Src.second][j] < 0) 450 Vector[Src.second][j] = Src.first; 451 if (Vector[Src.second][j] != Src.first) 452 return i; 453 } 454 } 455 // Now check Trans Alu 456 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 457 const std::pair<int, unsigned> &Src = TransSrcs[i]; 458 unsigned Cycle = getTransSwizzle(TransSwz, i); 459 if (Src.first < 0) 460 continue; 461 if (Src.first == 255) 462 continue; 463 if (Vector[Src.second][Cycle] < 0) 464 Vector[Src.second][Cycle] = Src.first; 465 if (Vector[Src.second][Cycle] != Src.first) 466 return IGSrcs.size() - 1; 467 } 468 return IGSrcs.size(); 469 } 470 471 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 472 /// (in lexicographic term) swizzle sequence assuming that all swizzles after 473 /// Idx can be skipped 474 static bool 475 NextPossibleSolution( 476 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 477 unsigned Idx) { 478 assert(Idx < SwzCandidate.size()); 479 int ResetIdx = Idx; 480 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 481 ResetIdx --; 482 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 483 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 484 } 485 if (ResetIdx == -1) 486 return false; 487 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 488 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 489 return true; 490 } 491 492 /// Enumerate all possible Swizzle sequence to find one that can meet all 493 /// read port requirements. 494 bool R600InstrInfo::FindSwizzleForVectorSlot( 495 const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, 496 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 497 const std::vector<std::pair<int, unsigned>> &TransSrcs, 498 R600InstrInfo::BankSwizzle TransSwz) const { 499 unsigned ValidUpTo = 0; 500 do { 501 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 502 if (ValidUpTo == IGSrcs.size()) 503 return true; 504 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 505 return false; 506 } 507 508 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read 509 /// a const, and can't read a gpr at cycle 1 if they read 2 const. 510 static bool 511 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 512 const std::vector<std::pair<int, unsigned>> &TransOps, 513 unsigned ConstCount) { 514 // TransALU can't read 3 constants 515 if (ConstCount > 2) 516 return false; 517 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 518 const std::pair<int, unsigned> &Src = TransOps[i]; 519 unsigned Cycle = getTransSwizzle(TransSwz, i); 520 if (Src.first < 0) 521 continue; 522 if (ConstCount > 0 && Cycle == 0) 523 return false; 524 if (ConstCount > 1 && Cycle == 1) 525 return false; 526 } 527 return true; 528 } 529 530 bool 531 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 532 const DenseMap<unsigned, unsigned> &PV, 533 std::vector<BankSwizzle> &ValidSwizzle, 534 bool isLastAluTrans) 535 const { 536 //Todo : support shared src0 - src1 operand 537 538 std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs; 539 ValidSwizzle.clear(); 540 unsigned ConstCount; 541 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 542 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 543 IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); 544 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 545 AMDGPU::OpName::bank_swizzle); 546 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 547 IG[i]->getOperand(Op).getImm()); 548 } 549 std::vector<std::pair<int, unsigned>> TransOps; 550 if (!isLastAluTrans) 551 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 552 553 TransOps = std::move(IGSrcs.back()); 554 IGSrcs.pop_back(); 555 ValidSwizzle.pop_back(); 556 557 static const R600InstrInfo::BankSwizzle TransSwz[] = { 558 ALU_VEC_012_SCL_210, 559 ALU_VEC_021_SCL_122, 560 ALU_VEC_120_SCL_212, 561 ALU_VEC_102_SCL_221 562 }; 563 for (unsigned i = 0; i < 4; i++) { 564 TransBS = TransSwz[i]; 565 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 566 continue; 567 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 568 TransBS); 569 if (Result) { 570 ValidSwizzle.push_back(TransBS); 571 return true; 572 } 573 } 574 575 return false; 576 } 577 578 bool 579 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 580 const { 581 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 582 unsigned Pair1 = 0, Pair2 = 0; 583 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 584 unsigned ReadConstHalf = Consts[i] & 2; 585 unsigned ReadConstIndex = Consts[i] & (~3); 586 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 587 if (!Pair1) { 588 Pair1 = ReadHalfConst; 589 continue; 590 } 591 if (Pair1 == ReadHalfConst) 592 continue; 593 if (!Pair2) { 594 Pair2 = ReadHalfConst; 595 continue; 596 } 597 if (Pair2 != ReadHalfConst) 598 return false; 599 } 600 return true; 601 } 602 603 bool 604 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 605 const { 606 std::vector<unsigned> Consts; 607 SmallSet<int64_t, 4> Literals; 608 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 609 MachineInstr &MI = *MIs[i]; 610 if (!isALUInstr(MI.getOpcode())) 611 continue; 612 613 for (const auto &Src : getSrcs(MI)) { 614 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 615 Literals.insert(Src.second); 616 if (Literals.size() > 4) 617 return false; 618 if (Src.first->getReg() == AMDGPU::ALU_CONST) 619 Consts.push_back(Src.second); 620 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 621 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 622 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 623 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 624 Consts.push_back((Index << 2) | Chan); 625 } 626 } 627 } 628 return fitsConstReadLimitations(Consts); 629 } 630 631 DFAPacketizer * 632 R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const { 633 const InstrItineraryData *II = STI.getInstrItineraryData(); 634 return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II); 635 } 636 637 static bool 638 isPredicateSetter(unsigned Opcode) { 639 switch (Opcode) { 640 case AMDGPU::PRED_X: 641 return true; 642 default: 643 return false; 644 } 645 } 646 647 static MachineInstr * 648 findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 649 MachineBasicBlock::iterator I) { 650 while (I != MBB.begin()) { 651 --I; 652 MachineInstr &MI = *I; 653 if (isPredicateSetter(MI.getOpcode())) 654 return &MI; 655 } 656 657 return nullptr; 658 } 659 660 static 661 bool isJump(unsigned Opcode) { 662 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 663 } 664 665 static bool isBranch(unsigned Opcode) { 666 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 667 Opcode == AMDGPU::BRANCH_COND_f32; 668 } 669 670 bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 671 MachineBasicBlock *&TBB, 672 MachineBasicBlock *&FBB, 673 SmallVectorImpl<MachineOperand> &Cond, 674 bool AllowModify) const { 675 // Most of the following comes from the ARM implementation of AnalyzeBranch 676 677 // If the block has no terminators, it just falls into the block after it. 678 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 679 if (I == MBB.end()) 680 return false; 681 682 // AMDGPU::BRANCH* instructions are only available after isel and are not 683 // handled 684 if (isBranch(I->getOpcode())) 685 return true; 686 if (!isJump(I->getOpcode())) { 687 return false; 688 } 689 690 // Remove successive JUMP 691 while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { 692 MachineBasicBlock::iterator PriorI = std::prev(I); 693 if (AllowModify) 694 I->removeFromParent(); 695 I = PriorI; 696 } 697 MachineInstr &LastInst = *I; 698 699 // If there is only one terminator instruction, process it. 700 unsigned LastOpc = LastInst.getOpcode(); 701 if (I == MBB.begin() || !isJump((--I)->getOpcode())) { 702 if (LastOpc == AMDGPU::JUMP) { 703 TBB = LastInst.getOperand(0).getMBB(); 704 return false; 705 } else if (LastOpc == AMDGPU::JUMP_COND) { 706 auto predSet = I; 707 while (!isPredicateSetter(predSet->getOpcode())) { 708 predSet = --I; 709 } 710 TBB = LastInst.getOperand(0).getMBB(); 711 Cond.push_back(predSet->getOperand(1)); 712 Cond.push_back(predSet->getOperand(2)); 713 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 714 return false; 715 } 716 return true; // Can't handle indirect branch. 717 } 718 719 // Get the instruction before it if it is a terminator. 720 MachineInstr &SecondLastInst = *I; 721 unsigned SecondLastOpc = SecondLastInst.getOpcode(); 722 723 // If the block ends with a B and a Bcc, handle it. 724 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 725 auto predSet = --I; 726 while (!isPredicateSetter(predSet->getOpcode())) { 727 predSet = --I; 728 } 729 TBB = SecondLastInst.getOperand(0).getMBB(); 730 FBB = LastInst.getOperand(0).getMBB(); 731 Cond.push_back(predSet->getOperand(1)); 732 Cond.push_back(predSet->getOperand(2)); 733 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 734 return false; 735 } 736 737 // Otherwise, can't handle this. 738 return true; 739 } 740 741 static 742 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 743 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 744 It != E; ++It) { 745 if (It->getOpcode() == AMDGPU::CF_ALU || 746 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 747 return It.getReverse(); 748 } 749 return MBB.end(); 750 } 751 752 unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, 753 MachineBasicBlock *TBB, 754 MachineBasicBlock *FBB, 755 ArrayRef<MachineOperand> Cond, 756 const DebugLoc &DL, 757 int *BytesAdded) const { 758 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 759 assert(!BytesAdded && "code size not handled"); 760 761 if (!FBB) { 762 if (Cond.empty()) { 763 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 764 return 1; 765 } else { 766 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 767 assert(PredSet && "No previous predicate !"); 768 addFlag(*PredSet, 0, MO_FLAG_PUSH); 769 PredSet->getOperand(2).setImm(Cond[1].getImm()); 770 771 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 772 .addMBB(TBB) 773 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 774 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 775 if (CfAlu == MBB.end()) 776 return 1; 777 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 778 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 779 return 1; 780 } 781 } else { 782 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 783 assert(PredSet && "No previous predicate !"); 784 addFlag(*PredSet, 0, MO_FLAG_PUSH); 785 PredSet->getOperand(2).setImm(Cond[1].getImm()); 786 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 787 .addMBB(TBB) 788 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 789 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 790 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 791 if (CfAlu == MBB.end()) 792 return 2; 793 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 794 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 795 return 2; 796 } 797 } 798 799 unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, 800 int *BytesRemoved) const { 801 assert(!BytesRemoved && "code size not handled"); 802 803 // Note : we leave PRED* instructions there. 804 // They may be needed when predicating instructions. 805 806 MachineBasicBlock::iterator I = MBB.end(); 807 808 if (I == MBB.begin()) { 809 return 0; 810 } 811 --I; 812 switch (I->getOpcode()) { 813 default: 814 return 0; 815 case AMDGPU::JUMP_COND: { 816 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 817 clearFlag(*predSet, 0, MO_FLAG_PUSH); 818 I->eraseFromParent(); 819 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 820 if (CfAlu == MBB.end()) 821 break; 822 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 823 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 824 break; 825 } 826 case AMDGPU::JUMP: 827 I->eraseFromParent(); 828 break; 829 } 830 I = MBB.end(); 831 832 if (I == MBB.begin()) { 833 return 1; 834 } 835 --I; 836 switch (I->getOpcode()) { 837 // FIXME: only one case?? 838 default: 839 return 1; 840 case AMDGPU::JUMP_COND: { 841 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 842 clearFlag(*predSet, 0, MO_FLAG_PUSH); 843 I->eraseFromParent(); 844 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 845 if (CfAlu == MBB.end()) 846 break; 847 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 848 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 849 break; 850 } 851 case AMDGPU::JUMP: 852 I->eraseFromParent(); 853 break; 854 } 855 return 2; 856 } 857 858 bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { 859 int idx = MI.findFirstPredOperandIdx(); 860 if (idx < 0) 861 return false; 862 863 unsigned Reg = MI.getOperand(idx).getReg(); 864 switch (Reg) { 865 default: return false; 866 case AMDGPU::PRED_SEL_ONE: 867 case AMDGPU::PRED_SEL_ZERO: 868 case AMDGPU::PREDICATE_BIT: 869 return true; 870 } 871 } 872 873 bool R600InstrInfo::isPredicable(const MachineInstr &MI) const { 874 // XXX: KILL* instructions can be predicated, but they must be the last 875 // instruction in a clause, so this means any instructions after them cannot 876 // be predicated. Until we have proper support for instruction clauses in the 877 // backend, we will mark KILL* instructions as unpredicable. 878 879 if (MI.getOpcode() == AMDGPU::KILLGT) { 880 return false; 881 } else if (MI.getOpcode() == AMDGPU::CF_ALU) { 882 // If the clause start in the middle of MBB then the MBB has more 883 // than a single clause, unable to predicate several clauses. 884 if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI)) 885 return false; 886 // TODO: We don't support KC merging atm 887 return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0; 888 } else if (isVector(MI)) { 889 return false; 890 } else { 891 return AMDGPUInstrInfo::isPredicable(MI); 892 } 893 } 894 895 bool 896 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 897 unsigned NumCycles, 898 unsigned ExtraPredCycles, 899 BranchProbability Probability) const{ 900 return true; 901 } 902 903 bool 904 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 905 unsigned NumTCycles, 906 unsigned ExtraTCycles, 907 MachineBasicBlock &FMBB, 908 unsigned NumFCycles, 909 unsigned ExtraFCycles, 910 BranchProbability Probability) const { 911 return true; 912 } 913 914 bool 915 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 916 unsigned NumCycles, 917 BranchProbability Probability) 918 const { 919 return true; 920 } 921 922 bool 923 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 924 MachineBasicBlock &FMBB) const { 925 return false; 926 } 927 928 bool 929 R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 930 MachineOperand &MO = Cond[1]; 931 switch (MO.getImm()) { 932 case AMDGPU::PRED_SETE_INT: 933 MO.setImm(AMDGPU::PRED_SETNE_INT); 934 break; 935 case AMDGPU::PRED_SETNE_INT: 936 MO.setImm(AMDGPU::PRED_SETE_INT); 937 break; 938 case AMDGPU::PRED_SETE: 939 MO.setImm(AMDGPU::PRED_SETNE); 940 break; 941 case AMDGPU::PRED_SETNE: 942 MO.setImm(AMDGPU::PRED_SETE); 943 break; 944 default: 945 return true; 946 } 947 948 MachineOperand &MO2 = Cond[2]; 949 switch (MO2.getReg()) { 950 case AMDGPU::PRED_SEL_ZERO: 951 MO2.setReg(AMDGPU::PRED_SEL_ONE); 952 break; 953 case AMDGPU::PRED_SEL_ONE: 954 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 955 break; 956 default: 957 return true; 958 } 959 return false; 960 } 961 962 bool R600InstrInfo::DefinesPredicate(MachineInstr &MI, 963 std::vector<MachineOperand> &Pred) const { 964 return isPredicateSetter(MI.getOpcode()); 965 } 966 967 bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, 968 ArrayRef<MachineOperand> Pred) const { 969 int PIdx = MI.findFirstPredOperandIdx(); 970 971 if (MI.getOpcode() == AMDGPU::CF_ALU) { 972 MI.getOperand(8).setImm(0); 973 return true; 974 } 975 976 if (MI.getOpcode() == AMDGPU::DOT_4) { 977 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) 978 .setReg(Pred[2].getReg()); 979 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) 980 .setReg(Pred[2].getReg()); 981 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) 982 .setReg(Pred[2].getReg()); 983 MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) 984 .setReg(Pred[2].getReg()); 985 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 986 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 987 return true; 988 } 989 990 if (PIdx != -1) { 991 MachineOperand &PMO = MI.getOperand(PIdx); 992 PMO.setReg(Pred[2].getReg()); 993 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 994 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 995 return true; 996 } 997 998 return false; 999 } 1000 1001 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const { 1002 return 2; 1003 } 1004 1005 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1006 const MachineInstr &, 1007 unsigned *PredCost) const { 1008 if (PredCost) 1009 *PredCost = 2; 1010 return 2; 1011 } 1012 1013 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1014 unsigned Channel) const { 1015 assert(Channel == 0); 1016 return RegIndex; 1017 } 1018 1019 bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1020 switch (MI.getOpcode()) { 1021 default: { 1022 MachineBasicBlock *MBB = MI.getParent(); 1023 int OffsetOpIdx = 1024 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); 1025 // addr is a custom operand with multiple MI operands, and only the 1026 // first MI operand is given a name. 1027 int RegOpIdx = OffsetOpIdx + 1; 1028 int ChanOpIdx = 1029 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); 1030 if (isRegisterLoad(MI)) { 1031 int DstOpIdx = 1032 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); 1033 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); 1034 unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); 1035 unsigned Address = calculateIndirectAddress(RegIndex, Channel); 1036 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); 1037 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { 1038 buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), 1039 getIndirectAddrRegClass()->getRegister(Address)); 1040 } else { 1041 buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address, 1042 OffsetReg); 1043 } 1044 } else if (isRegisterStore(MI)) { 1045 int ValOpIdx = 1046 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); 1047 unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); 1048 unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); 1049 unsigned Address = calculateIndirectAddress(RegIndex, Channel); 1050 unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); 1051 if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { 1052 buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), 1053 MI.getOperand(ValOpIdx).getReg()); 1054 } else { 1055 buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(), 1056 calculateIndirectAddress(RegIndex, Channel), 1057 OffsetReg); 1058 } 1059 } else { 1060 return false; 1061 } 1062 1063 MBB->erase(MI); 1064 return true; 1065 } 1066 case AMDGPU::R600_EXTRACT_ELT_V2: 1067 case AMDGPU::R600_EXTRACT_ELT_V4: 1068 buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), 1069 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address 1070 MI.getOperand(2).getReg(), 1071 RI.getHWRegChan(MI.getOperand(1).getReg())); 1072 break; 1073 case AMDGPU::R600_INSERT_ELT_V2: 1074 case AMDGPU::R600_INSERT_ELT_V4: 1075 buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value 1076 RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address 1077 MI.getOperand(3).getReg(), // Offset 1078 RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel 1079 break; 1080 } 1081 MI.eraseFromParent(); 1082 return true; 1083 } 1084 1085 void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1086 const MachineFunction &MF, 1087 const R600RegisterInfo &TRI) const { 1088 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 1089 const R600FrameLowering *TFL = ST.getFrameLowering(); 1090 1091 unsigned StackWidth = TFL->getStackWidth(MF); 1092 int End = getIndirectIndexEnd(MF); 1093 1094 if (End == -1) 1095 return; 1096 1097 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1098 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1099 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1100 TRI.reserveRegisterTuples(Reserved, Reg); 1101 } 1102 } 1103 } 1104 1105 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1106 return &AMDGPU::R600_TReg32_XRegClass; 1107 } 1108 1109 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1110 MachineBasicBlock::iterator I, 1111 unsigned ValueReg, unsigned Address, 1112 unsigned OffsetReg) const { 1113 return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0); 1114 } 1115 1116 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1117 MachineBasicBlock::iterator I, 1118 unsigned ValueReg, unsigned Address, 1119 unsigned OffsetReg, 1120 unsigned AddrChan) const { 1121 unsigned AddrReg; 1122 switch (AddrChan) { 1123 default: llvm_unreachable("Invalid Channel"); 1124 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 1125 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 1126 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 1127 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 1128 } 1129 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1130 AMDGPU::AR_X, OffsetReg); 1131 setImmOperand(*MOVA, AMDGPU::OpName::write, 0); 1132 1133 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1134 AddrReg, ValueReg) 1135 .addReg(AMDGPU::AR_X, 1136 RegState::Implicit | RegState::Kill); 1137 setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); 1138 return Mov; 1139 } 1140 1141 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1142 MachineBasicBlock::iterator I, 1143 unsigned ValueReg, unsigned Address, 1144 unsigned OffsetReg) const { 1145 return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0); 1146 } 1147 1148 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1149 MachineBasicBlock::iterator I, 1150 unsigned ValueReg, unsigned Address, 1151 unsigned OffsetReg, 1152 unsigned AddrChan) const { 1153 unsigned AddrReg; 1154 switch (AddrChan) { 1155 default: llvm_unreachable("Invalid Channel"); 1156 case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; 1157 case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; 1158 case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; 1159 case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; 1160 } 1161 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1162 AMDGPU::AR_X, 1163 OffsetReg); 1164 setImmOperand(*MOVA, AMDGPU::OpName::write, 0); 1165 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1166 ValueReg, 1167 AddrReg) 1168 .addReg(AMDGPU::AR_X, 1169 RegState::Implicit | RegState::Kill); 1170 setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); 1171 1172 return Mov; 1173 } 1174 1175 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { 1176 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1177 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1178 int Offset = -1; 1179 1180 if (MFI.getNumObjects() == 0) { 1181 return -1; 1182 } 1183 1184 if (MRI.livein_empty()) { 1185 return 0; 1186 } 1187 1188 const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); 1189 for (std::pair<unsigned, unsigned> LI : MRI.liveins()) { 1190 unsigned Reg = LI.first; 1191 if (TargetRegisterInfo::isVirtualRegister(Reg) || 1192 !IndirectRC->contains(Reg)) 1193 continue; 1194 1195 unsigned RegIndex; 1196 unsigned RegEnd; 1197 for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; 1198 ++RegIndex) { 1199 if (IndirectRC->getRegister(RegIndex) == Reg) 1200 break; 1201 } 1202 Offset = std::max(Offset, (int)RegIndex); 1203 } 1204 1205 return Offset + 1; 1206 } 1207 1208 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { 1209 int Offset = 0; 1210 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1211 1212 // Variable sized objects are not supported 1213 if (MFI.hasVarSizedObjects()) { 1214 return -1; 1215 } 1216 1217 if (MFI.getNumObjects() == 0) { 1218 return -1; 1219 } 1220 1221 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 1222 const R600FrameLowering *TFL = ST.getFrameLowering(); 1223 1224 unsigned IgnoredFrameReg; 1225 Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg); 1226 1227 return getIndirectIndexBegin(MF) + Offset; 1228 } 1229 1230 unsigned R600InstrInfo::getMaxAlusPerClause() const { 1231 return 115; 1232 } 1233 1234 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1235 MachineBasicBlock::iterator I, 1236 unsigned Opcode, 1237 unsigned DstReg, 1238 unsigned Src0Reg, 1239 unsigned Src1Reg) const { 1240 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1241 DstReg); // $dst 1242 1243 if (Src1Reg) { 1244 MIB.addImm(0) // $update_exec_mask 1245 .addImm(0); // $update_predicate 1246 } 1247 MIB.addImm(1) // $write 1248 .addImm(0) // $omod 1249 .addImm(0) // $dst_rel 1250 .addImm(0) // $dst_clamp 1251 .addReg(Src0Reg) // $src0 1252 .addImm(0) // $src0_neg 1253 .addImm(0) // $src0_rel 1254 .addImm(0) // $src0_abs 1255 .addImm(-1); // $src0_sel 1256 1257 if (Src1Reg) { 1258 MIB.addReg(Src1Reg) // $src1 1259 .addImm(0) // $src1_neg 1260 .addImm(0) // $src1_rel 1261 .addImm(0) // $src1_abs 1262 .addImm(-1); // $src1_sel 1263 } 1264 1265 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1266 //scheduling to the backend, we can change the default to 0. 1267 MIB.addImm(1) // $last 1268 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1269 .addImm(0) // $literal 1270 .addImm(0); // $bank_swizzle 1271 1272 return MIB; 1273 } 1274 1275 #define OPERAND_CASE(Label) \ 1276 case Label: { \ 1277 static const unsigned Ops[] = \ 1278 { \ 1279 Label##_X, \ 1280 Label##_Y, \ 1281 Label##_Z, \ 1282 Label##_W \ 1283 }; \ 1284 return Ops[Slot]; \ 1285 } 1286 1287 static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1288 switch (Op) { 1289 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1290 OPERAND_CASE(AMDGPU::OpName::update_pred) 1291 OPERAND_CASE(AMDGPU::OpName::write) 1292 OPERAND_CASE(AMDGPU::OpName::omod) 1293 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1294 OPERAND_CASE(AMDGPU::OpName::clamp) 1295 OPERAND_CASE(AMDGPU::OpName::src0) 1296 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1297 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1298 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1299 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1300 OPERAND_CASE(AMDGPU::OpName::src1) 1301 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1302 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1303 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1304 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1305 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1306 default: 1307 llvm_unreachable("Wrong Operand"); 1308 } 1309 } 1310 1311 #undef OPERAND_CASE 1312 1313 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1314 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1315 const { 1316 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1317 unsigned Opcode; 1318 if (ST.getGeneration() <= R600Subtarget::R700) 1319 Opcode = AMDGPU::DOT4_r600; 1320 else 1321 Opcode = AMDGPU::DOT4_eg; 1322 MachineBasicBlock::iterator I = MI; 1323 MachineOperand &Src0 = MI->getOperand( 1324 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1325 MachineOperand &Src1 = MI->getOperand( 1326 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1327 MachineInstr *MIB = buildDefaultInstruction( 1328 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1329 static const unsigned Operands[14] = { 1330 AMDGPU::OpName::update_exec_mask, 1331 AMDGPU::OpName::update_pred, 1332 AMDGPU::OpName::write, 1333 AMDGPU::OpName::omod, 1334 AMDGPU::OpName::dst_rel, 1335 AMDGPU::OpName::clamp, 1336 AMDGPU::OpName::src0_neg, 1337 AMDGPU::OpName::src0_rel, 1338 AMDGPU::OpName::src0_abs, 1339 AMDGPU::OpName::src0_sel, 1340 AMDGPU::OpName::src1_neg, 1341 AMDGPU::OpName::src1_rel, 1342 AMDGPU::OpName::src1_abs, 1343 AMDGPU::OpName::src1_sel, 1344 }; 1345 1346 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1347 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1348 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1349 .setReg(MO.getReg()); 1350 1351 for (unsigned i = 0; i < 14; i++) { 1352 MachineOperand &MO = MI->getOperand( 1353 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1354 assert (MO.isImm()); 1355 setImmOperand(*MIB, Operands[i], MO.getImm()); 1356 } 1357 MIB->getOperand(20).setImm(0); 1358 return MIB; 1359 } 1360 1361 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1362 MachineBasicBlock::iterator I, 1363 unsigned DstReg, 1364 uint64_t Imm) const { 1365 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1366 AMDGPU::ALU_LITERAL_X); 1367 setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); 1368 return MovImm; 1369 } 1370 1371 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1372 MachineBasicBlock::iterator I, 1373 unsigned DstReg, unsigned SrcReg) const { 1374 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1375 } 1376 1377 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1378 return getOperandIdx(MI.getOpcode(), Op); 1379 } 1380 1381 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1382 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1383 } 1384 1385 void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, 1386 int64_t Imm) const { 1387 int Idx = getOperandIdx(MI, Op); 1388 assert(Idx != -1 && "Operand not supported for this instruction."); 1389 assert(MI.getOperand(Idx).isImm()); 1390 MI.getOperand(Idx).setImm(Imm); 1391 } 1392 1393 //===----------------------------------------------------------------------===// 1394 // Instruction flag getters/setters 1395 //===----------------------------------------------------------------------===// 1396 1397 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx, 1398 unsigned Flag) const { 1399 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1400 int FlagIndex = 0; 1401 if (Flag != 0) { 1402 // If we pass something other than the default value of Flag to this 1403 // function, it means we are want to set a flag on an instruction 1404 // that uses native encoding. 1405 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1406 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1407 switch (Flag) { 1408 case MO_FLAG_CLAMP: 1409 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); 1410 break; 1411 case MO_FLAG_MASK: 1412 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); 1413 break; 1414 case MO_FLAG_NOT_LAST: 1415 case MO_FLAG_LAST: 1416 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); 1417 break; 1418 case MO_FLAG_NEG: 1419 switch (SrcIdx) { 1420 case 0: 1421 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); 1422 break; 1423 case 1: 1424 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); 1425 break; 1426 case 2: 1427 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); 1428 break; 1429 } 1430 break; 1431 1432 case MO_FLAG_ABS: 1433 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1434 "instructions."); 1435 (void)IsOP3; 1436 switch (SrcIdx) { 1437 case 0: 1438 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); 1439 break; 1440 case 1: 1441 FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); 1442 break; 1443 } 1444 break; 1445 1446 default: 1447 FlagIndex = -1; 1448 break; 1449 } 1450 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1451 } else { 1452 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1453 assert(FlagIndex != 0 && 1454 "Instruction flags not supported for this instruction"); 1455 } 1456 1457 MachineOperand &FlagOp = MI.getOperand(FlagIndex); 1458 assert(FlagOp.isImm()); 1459 return FlagOp; 1460 } 1461 1462 void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand, 1463 unsigned Flag) const { 1464 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1465 if (Flag == 0) { 1466 return; 1467 } 1468 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1469 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1470 if (Flag == MO_FLAG_NOT_LAST) { 1471 clearFlag(MI, Operand, MO_FLAG_LAST); 1472 } else if (Flag == MO_FLAG_MASK) { 1473 clearFlag(MI, Operand, Flag); 1474 } else { 1475 FlagOp.setImm(1); 1476 } 1477 } else { 1478 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1479 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1480 } 1481 } 1482 1483 void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, 1484 unsigned Flag) const { 1485 unsigned TargetFlags = get(MI.getOpcode()).TSFlags; 1486 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1487 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1488 FlagOp.setImm(0); 1489 } else { 1490 MachineOperand &FlagOp = getFlagOp(MI); 1491 unsigned InstFlags = FlagOp.getImm(); 1492 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1493 FlagOp.setImm(InstFlags); 1494 } 1495 } 1496 1497 unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind( 1498 PseudoSourceValue::PSVKind Kind) const { 1499 switch (Kind) { 1500 case PseudoSourceValue::Stack: 1501 case PseudoSourceValue::FixedStack: 1502 return AMDGPUASI.PRIVATE_ADDRESS; 1503 case PseudoSourceValue::ConstantPool: 1504 case PseudoSourceValue::GOT: 1505 case PseudoSourceValue::JumpTable: 1506 case PseudoSourceValue::GlobalValueCallEntry: 1507 case PseudoSourceValue::ExternalSymbolCallEntry: 1508 case PseudoSourceValue::TargetCustom: 1509 return AMDGPUASI.CONSTANT_ADDRESS; 1510 } 1511 llvm_unreachable("Invalid pseudo source kind"); 1512 return AMDGPUASI.PRIVATE_ADDRESS; 1513 } 1514