1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 // 12 /// \file 13 /// 14 /// This file contains definition for AMDGPU ISA disassembler 15 // 16 //===----------------------------------------------------------------------===// 17 18 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 19 20 #include "Disassembler/AMDGPUDisassembler.h" 21 #include "AMDGPU.h" 22 #include "AMDGPURegisterInfo.h" 23 #include "SIDefines.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/Disassembler.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/BinaryFormat/ELF.h" 31 #include "llvm/MC/MCContext.h" 32 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 33 #include "llvm/MC/MCExpr.h" 34 #include "llvm/MC/MCFixedLenDisassembler.h" 35 #include "llvm/MC/MCInst.h" 36 #include "llvm/MC/MCSubtargetInfo.h" 37 #include "llvm/Support/Endian.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetRegistry.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <algorithm> 43 #include <cassert> 44 #include <cstddef> 45 #include <cstdint> 46 #include <iterator> 47 #include <tuple> 48 #include <vector> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "amdgpu-disassembler" 53 54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 55 56 inline static MCDisassembler::DecodeStatus 57 addOperand(MCInst &Inst, const MCOperand& Opnd) { 58 Inst.addOperand(Opnd); 59 return Opnd.isValid() ? 60 MCDisassembler::Success : 61 MCDisassembler::SoftFail; 62 } 63 64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 65 uint16_t NameIdx) { 66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 67 if (OpIdx != -1) { 68 auto I = MI.begin(); 69 std::advance(I, OpIdx); 70 MI.insert(I, Op); 71 } 72 return OpIdx; 73 } 74 75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 76 uint64_t Addr, const void *Decoder) { 77 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 78 79 APInt SignedOffset(18, Imm * 4, true); 80 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 81 82 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 83 return MCDisassembler::Success; 84 return addOperand(Inst, MCOperand::createImm(Imm)); 85 } 86 87 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 88 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 89 unsigned Imm, \ 90 uint64_t /*Addr*/, \ 91 const void *Decoder) { \ 92 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 93 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 94 } 95 96 #define DECODE_OPERAND_REG(RegClass) \ 97 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 98 99 DECODE_OPERAND_REG(VGPR_32) 100 DECODE_OPERAND_REG(VS_32) 101 DECODE_OPERAND_REG(VS_64) 102 DECODE_OPERAND_REG(VS_128) 103 104 DECODE_OPERAND_REG(VReg_64) 105 DECODE_OPERAND_REG(VReg_96) 106 DECODE_OPERAND_REG(VReg_128) 107 108 DECODE_OPERAND_REG(SReg_32) 109 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 110 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 111 DECODE_OPERAND_REG(SReg_64) 112 DECODE_OPERAND_REG(SReg_64_XEXEC) 113 DECODE_OPERAND_REG(SReg_128) 114 DECODE_OPERAND_REG(SReg_256) 115 DECODE_OPERAND_REG(SReg_512) 116 117 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 118 unsigned Imm, 119 uint64_t Addr, 120 const void *Decoder) { 121 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 122 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 123 } 124 125 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 126 unsigned Imm, 127 uint64_t Addr, 128 const void *Decoder) { 129 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 130 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 131 } 132 133 #define DECODE_SDWA(DecName) \ 134 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 135 136 DECODE_SDWA(Src32) 137 DECODE_SDWA(Src16) 138 DECODE_SDWA(VopcDst) 139 140 #include "AMDGPUGenDisassemblerTables.inc" 141 142 //===----------------------------------------------------------------------===// 143 // 144 //===----------------------------------------------------------------------===// 145 146 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 147 assert(Bytes.size() >= sizeof(T)); 148 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 149 Bytes = Bytes.slice(sizeof(T)); 150 return Res; 151 } 152 153 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 154 MCInst &MI, 155 uint64_t Inst, 156 uint64_t Address) const { 157 assert(MI.getOpcode() == 0); 158 assert(MI.getNumOperands() == 0); 159 MCInst TmpInst; 160 HasLiteral = false; 161 const auto SavedBytes = Bytes; 162 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 163 MI = TmpInst; 164 return MCDisassembler::Success; 165 } 166 Bytes = SavedBytes; 167 return MCDisassembler::Fail; 168 } 169 170 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 171 ArrayRef<uint8_t> Bytes_, 172 uint64_t Address, 173 raw_ostream &WS, 174 raw_ostream &CS) const { 175 CommentStream = &CS; 176 bool IsSDWA = false; 177 178 // ToDo: AMDGPUDisassembler supports only VI ISA. 179 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 180 report_fatal_error("Disassembly not yet supported for subtarget"); 181 182 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 183 Bytes = Bytes_.slice(0, MaxInstBytesNum); 184 185 DecodeStatus Res = MCDisassembler::Fail; 186 do { 187 // ToDo: better to switch encoding length using some bit predicate 188 // but it is unknown yet, so try all we can 189 190 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 191 // encodings 192 if (Bytes.size() >= 8) { 193 const uint64_t QW = eatBytes<uint64_t>(Bytes); 194 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 195 if (Res) break; 196 197 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 198 if (Res) { IsSDWA = true; break; } 199 200 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 201 if (Res) { IsSDWA = true; break; } 202 203 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 204 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 205 if (Res) 206 break; 207 } 208 209 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 210 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 211 // table first so we print the correct name. 212 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 213 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 214 if (Res) 215 break; 216 } 217 } 218 219 // Reinitialize Bytes as DPP64 could have eaten too much 220 Bytes = Bytes_.slice(0, MaxInstBytesNum); 221 222 // Try decode 32-bit instruction 223 if (Bytes.size() < 4) break; 224 const uint32_t DW = eatBytes<uint32_t>(Bytes); 225 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 226 if (Res) break; 227 228 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 229 if (Res) break; 230 231 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 232 if (Res) break; 233 234 if (Bytes.size() < 4) break; 235 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 236 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 237 if (Res) break; 238 239 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 240 if (Res) break; 241 242 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 243 } while (false); 244 245 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 246 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 247 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 248 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 249 // Insert dummy unused src2_modifiers. 250 insertNamedMCOperand(MI, MCOperand::createImm(0), 251 AMDGPU::OpName::src2_modifiers); 252 } 253 254 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 255 Res = convertMIMGInst(MI); 256 } 257 258 if (Res && IsSDWA) 259 Res = convertSDWAInst(MI); 260 261 // if the opcode was not recognized we'll assume a Size of 4 bytes 262 // (unless there are fewer bytes left) 263 Size = Res ? (MaxInstBytesNum - Bytes.size()) 264 : std::min((size_t)4, Bytes_.size()); 265 return Res; 266 } 267 268 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 269 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 270 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 271 // VOPC - insert clamp 272 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 273 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 274 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 275 if (SDst != -1) { 276 // VOPC - insert VCC register as sdst 277 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 278 AMDGPU::OpName::sdst); 279 } else { 280 // VOP1/2 - insert omod if present in instruction 281 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 282 } 283 } 284 return MCDisassembler::Success; 285 } 286 287 // Note that MIMG format provides no information about VADDR size. 288 // Consequently, decoded instructions always show address 289 // as if it has 1 dword, which could be not really so. 290 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 291 292 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 293 AMDGPU::OpName::vdst); 294 295 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 296 AMDGPU::OpName::vdata); 297 298 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 299 AMDGPU::OpName::dmask); 300 301 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 302 AMDGPU::OpName::tfe); 303 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 304 AMDGPU::OpName::d16); 305 306 assert(VDataIdx != -1); 307 assert(DMaskIdx != -1); 308 assert(TFEIdx != -1); 309 310 bool IsAtomic = (VDstIdx != -1); 311 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 312 313 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 314 if (DMask == 0) 315 return MCDisassembler::Success; 316 317 unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); 318 if (DstSize == 1) 319 return MCDisassembler::Success; 320 321 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 322 if (D16 && AMDGPU::hasPackedD16(STI)) { 323 DstSize = (DstSize + 1) / 2; 324 } 325 326 // FIXME: Add tfe support 327 if (MI.getOperand(TFEIdx).getImm()) 328 return MCDisassembler::Success; 329 330 int NewOpcode = -1; 331 332 if (IsAtomic) { 333 if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { 334 NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); 335 } 336 if (NewOpcode == -1) return MCDisassembler::Success; 337 } else if (IsGather4) { 338 if (D16 && AMDGPU::hasPackedD16(STI)) 339 NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode()); 340 else 341 return MCDisassembler::Success; 342 } else { 343 NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize); 344 assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); 345 } 346 347 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 348 349 // Get first subregister of VData 350 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 351 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 352 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 353 354 // Widen the register to the correct number of enabled channels. 355 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 356 &MRI.getRegClass(RCID)); 357 if (NewVdata == AMDGPU::NoRegister) { 358 // It's possible to encode this such that the low register + enabled 359 // components exceeds the register count. 360 return MCDisassembler::Success; 361 } 362 363 MI.setOpcode(NewOpcode); 364 // vaddr will be always appear as a single VGPR. This will look different than 365 // how it is usually emitted because the number of register components is not 366 // in the instruction encoding. 367 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 368 369 if (IsAtomic) { 370 // Atomic operations have an additional operand (a copy of data) 371 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 372 } 373 374 return MCDisassembler::Success; 375 } 376 377 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 378 return getContext().getRegisterInfo()-> 379 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 380 } 381 382 inline 383 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 384 const Twine& ErrMsg) const { 385 *CommentStream << "Error: " + ErrMsg; 386 387 // ToDo: add support for error operands to MCInst.h 388 // return MCOperand::createError(V); 389 return MCOperand(); 390 } 391 392 inline 393 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 394 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 395 } 396 397 inline 398 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 399 unsigned Val) const { 400 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 401 if (Val >= RegCl.getNumRegs()) 402 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 403 ": unknown register " + Twine(Val)); 404 return createRegOperand(RegCl.getRegister(Val)); 405 } 406 407 inline 408 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 409 unsigned Val) const { 410 // ToDo: SI/CI have 104 SGPRs, VI - 102 411 // Valery: here we accepting as much as we can, let assembler sort it out 412 int shift = 0; 413 switch (SRegClassID) { 414 case AMDGPU::SGPR_32RegClassID: 415 case AMDGPU::TTMP_32RegClassID: 416 break; 417 case AMDGPU::SGPR_64RegClassID: 418 case AMDGPU::TTMP_64RegClassID: 419 shift = 1; 420 break; 421 case AMDGPU::SGPR_128RegClassID: 422 case AMDGPU::TTMP_128RegClassID: 423 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 424 // this bundle? 425 case AMDGPU::SGPR_256RegClassID: 426 case AMDGPU::TTMP_256RegClassID: 427 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 428 // this bundle? 429 case AMDGPU::SGPR_512RegClassID: 430 case AMDGPU::TTMP_512RegClassID: 431 shift = 2; 432 break; 433 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 434 // this bundle? 435 default: 436 llvm_unreachable("unhandled register class"); 437 } 438 439 if (Val % (1 << shift)) { 440 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 441 << ": scalar reg isn't aligned " << Val; 442 } 443 444 return createRegOperand(SRegClassID, Val >> shift); 445 } 446 447 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 448 return decodeSrcOp(OPW32, Val); 449 } 450 451 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 452 return decodeSrcOp(OPW64, Val); 453 } 454 455 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 456 return decodeSrcOp(OPW128, Val); 457 } 458 459 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 460 return decodeSrcOp(OPW16, Val); 461 } 462 463 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 464 return decodeSrcOp(OPWV216, Val); 465 } 466 467 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 468 // Some instructions have operand restrictions beyond what the encoding 469 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 470 // high bit. 471 Val &= 255; 472 473 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 474 } 475 476 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 477 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 478 } 479 480 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 481 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 482 } 483 484 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 485 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 486 } 487 488 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 489 // table-gen generated disassembler doesn't care about operand types 490 // leaving only registry class so SSrc_32 operand turns into SReg_32 491 // and therefore we accept immediates and literals here as well 492 return decodeSrcOp(OPW32, Val); 493 } 494 495 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 496 unsigned Val) const { 497 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 498 return decodeOperand_SReg_32(Val); 499 } 500 501 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 502 unsigned Val) const { 503 // SReg_32_XM0 is SReg_32 without EXEC_HI 504 return decodeOperand_SReg_32(Val); 505 } 506 507 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 508 return decodeSrcOp(OPW64, Val); 509 } 510 511 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 512 return decodeSrcOp(OPW64, Val); 513 } 514 515 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 516 return decodeSrcOp(OPW128, Val); 517 } 518 519 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 520 return decodeDstOp(OPW256, Val); 521 } 522 523 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 524 return decodeDstOp(OPW512, Val); 525 } 526 527 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 528 // For now all literal constants are supposed to be unsigned integer 529 // ToDo: deal with signed/unsigned 64-bit integer constants 530 // ToDo: deal with float/double constants 531 if (!HasLiteral) { 532 if (Bytes.size() < 4) { 533 return errOperand(0, "cannot read literal, inst bytes left " + 534 Twine(Bytes.size())); 535 } 536 HasLiteral = true; 537 Literal = eatBytes<uint32_t>(Bytes); 538 } 539 return MCOperand::createImm(Literal); 540 } 541 542 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 543 using namespace AMDGPU::EncValues; 544 545 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 546 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 547 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 548 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 549 // Cast prevents negative overflow. 550 } 551 552 static int64_t getInlineImmVal32(unsigned Imm) { 553 switch (Imm) { 554 case 240: 555 return FloatToBits(0.5f); 556 case 241: 557 return FloatToBits(-0.5f); 558 case 242: 559 return FloatToBits(1.0f); 560 case 243: 561 return FloatToBits(-1.0f); 562 case 244: 563 return FloatToBits(2.0f); 564 case 245: 565 return FloatToBits(-2.0f); 566 case 246: 567 return FloatToBits(4.0f); 568 case 247: 569 return FloatToBits(-4.0f); 570 case 248: // 1 / (2 * PI) 571 return 0x3e22f983; 572 default: 573 llvm_unreachable("invalid fp inline imm"); 574 } 575 } 576 577 static int64_t getInlineImmVal64(unsigned Imm) { 578 switch (Imm) { 579 case 240: 580 return DoubleToBits(0.5); 581 case 241: 582 return DoubleToBits(-0.5); 583 case 242: 584 return DoubleToBits(1.0); 585 case 243: 586 return DoubleToBits(-1.0); 587 case 244: 588 return DoubleToBits(2.0); 589 case 245: 590 return DoubleToBits(-2.0); 591 case 246: 592 return DoubleToBits(4.0); 593 case 247: 594 return DoubleToBits(-4.0); 595 case 248: // 1 / (2 * PI) 596 return 0x3fc45f306dc9c882; 597 default: 598 llvm_unreachable("invalid fp inline imm"); 599 } 600 } 601 602 static int64_t getInlineImmVal16(unsigned Imm) { 603 switch (Imm) { 604 case 240: 605 return 0x3800; 606 case 241: 607 return 0xB800; 608 case 242: 609 return 0x3C00; 610 case 243: 611 return 0xBC00; 612 case 244: 613 return 0x4000; 614 case 245: 615 return 0xC000; 616 case 246: 617 return 0x4400; 618 case 247: 619 return 0xC400; 620 case 248: // 1 / (2 * PI) 621 return 0x3118; 622 default: 623 llvm_unreachable("invalid fp inline imm"); 624 } 625 } 626 627 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 628 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 629 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 630 631 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 632 switch (Width) { 633 case OPW32: 634 return MCOperand::createImm(getInlineImmVal32(Imm)); 635 case OPW64: 636 return MCOperand::createImm(getInlineImmVal64(Imm)); 637 case OPW16: 638 case OPWV216: 639 return MCOperand::createImm(getInlineImmVal16(Imm)); 640 default: 641 llvm_unreachable("implement me"); 642 } 643 } 644 645 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 646 using namespace AMDGPU; 647 648 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 649 switch (Width) { 650 default: // fall 651 case OPW32: 652 case OPW16: 653 case OPWV216: 654 return VGPR_32RegClassID; 655 case OPW64: return VReg_64RegClassID; 656 case OPW128: return VReg_128RegClassID; 657 } 658 } 659 660 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 661 using namespace AMDGPU; 662 663 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 664 switch (Width) { 665 default: // fall 666 case OPW32: 667 case OPW16: 668 case OPWV216: 669 return SGPR_32RegClassID; 670 case OPW64: return SGPR_64RegClassID; 671 case OPW128: return SGPR_128RegClassID; 672 case OPW256: return SGPR_256RegClassID; 673 case OPW512: return SGPR_512RegClassID; 674 } 675 } 676 677 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 678 using namespace AMDGPU; 679 680 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 681 switch (Width) { 682 default: // fall 683 case OPW32: 684 case OPW16: 685 case OPWV216: 686 return TTMP_32RegClassID; 687 case OPW64: return TTMP_64RegClassID; 688 case OPW128: return TTMP_128RegClassID; 689 case OPW256: return TTMP_256RegClassID; 690 case OPW512: return TTMP_512RegClassID; 691 } 692 } 693 694 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 695 using namespace AMDGPU::EncValues; 696 697 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 698 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 699 700 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 701 } 702 703 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 704 using namespace AMDGPU::EncValues; 705 706 assert(Val < 512); // enum9 707 708 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 709 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 710 } 711 if (Val <= SGPR_MAX) { 712 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 713 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 714 } 715 716 int TTmpIdx = getTTmpIdx(Val); 717 if (TTmpIdx >= 0) { 718 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 719 } 720 721 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 722 return decodeIntImmed(Val); 723 724 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 725 return decodeFPImmed(Width, Val); 726 727 if (Val == LITERAL_CONST) 728 return decodeLiteralConstant(); 729 730 switch (Width) { 731 case OPW32: 732 case OPW16: 733 case OPWV216: 734 return decodeSpecialReg32(Val); 735 case OPW64: 736 return decodeSpecialReg64(Val); 737 default: 738 llvm_unreachable("unexpected immediate type"); 739 } 740 } 741 742 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 743 using namespace AMDGPU::EncValues; 744 745 assert(Val < 128); 746 assert(Width == OPW256 || Width == OPW512); 747 748 if (Val <= SGPR_MAX) { 749 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 750 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 751 } 752 753 int TTmpIdx = getTTmpIdx(Val); 754 if (TTmpIdx >= 0) { 755 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 756 } 757 758 llvm_unreachable("unknown dst register"); 759 } 760 761 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 762 using namespace AMDGPU; 763 764 switch (Val) { 765 case 102: return createRegOperand(FLAT_SCR_LO); 766 case 103: return createRegOperand(FLAT_SCR_HI); 767 case 104: return createRegOperand(XNACK_MASK_LO); 768 case 105: return createRegOperand(XNACK_MASK_HI); 769 case 106: return createRegOperand(VCC_LO); 770 case 107: return createRegOperand(VCC_HI); 771 case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); 772 case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); 773 case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); 774 case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); 775 case 124: return createRegOperand(M0); 776 case 126: return createRegOperand(EXEC_LO); 777 case 127: return createRegOperand(EXEC_HI); 778 case 235: return createRegOperand(SRC_SHARED_BASE); 779 case 236: return createRegOperand(SRC_SHARED_LIMIT); 780 case 237: return createRegOperand(SRC_PRIVATE_BASE); 781 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 782 // TODO: SRC_POPS_EXITING_WAVE_ID 783 // ToDo: no support for vccz register 784 case 251: break; 785 // ToDo: no support for execz register 786 case 252: break; 787 case 253: return createRegOperand(SCC); 788 default: break; 789 } 790 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 791 } 792 793 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 794 using namespace AMDGPU; 795 796 switch (Val) { 797 case 102: return createRegOperand(FLAT_SCR); 798 case 104: return createRegOperand(XNACK_MASK); 799 case 106: return createRegOperand(VCC); 800 case 108: assert(!isGFX9()); return createRegOperand(TBA); 801 case 110: assert(!isGFX9()); return createRegOperand(TMA); 802 case 126: return createRegOperand(EXEC); 803 default: break; 804 } 805 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 806 } 807 808 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 809 const unsigned Val) const { 810 using namespace AMDGPU::SDWA; 811 using namespace AMDGPU::EncValues; 812 813 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 814 // XXX: static_cast<int> is needed to avoid stupid warning: 815 // compare with unsigned is always true 816 if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && 817 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 818 return createRegOperand(getVgprClassId(Width), 819 Val - SDWA9EncValues::SRC_VGPR_MIN); 820 } 821 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 822 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 823 return createSRegOperand(getSgprClassId(Width), 824 Val - SDWA9EncValues::SRC_SGPR_MIN); 825 } 826 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 827 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 828 return createSRegOperand(getTtmpClassId(Width), 829 Val - SDWA9EncValues::SRC_TTMP_MIN); 830 } 831 832 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 833 834 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 835 return decodeIntImmed(SVal); 836 837 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 838 return decodeFPImmed(Width, SVal); 839 840 return decodeSpecialReg32(SVal); 841 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 842 return createRegOperand(getVgprClassId(Width), Val); 843 } 844 llvm_unreachable("unsupported target"); 845 } 846 847 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 848 return decodeSDWASrc(OPW16, Val); 849 } 850 851 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 852 return decodeSDWASrc(OPW32, Val); 853 } 854 855 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 856 using namespace AMDGPU::SDWA; 857 858 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 859 "SDWAVopcDst should be present only on GFX9"); 860 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 861 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 862 863 int TTmpIdx = getTTmpIdx(Val); 864 if (TTmpIdx >= 0) { 865 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 866 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 867 return decodeSpecialReg64(Val); 868 } else { 869 return createSRegOperand(getSgprClassId(OPW64), Val); 870 } 871 } else { 872 return createRegOperand(AMDGPU::VCC); 873 } 874 } 875 876 bool AMDGPUDisassembler::isVI() const { 877 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 878 } 879 880 bool AMDGPUDisassembler::isGFX9() const { 881 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 882 } 883 884 //===----------------------------------------------------------------------===// 885 // AMDGPUSymbolizer 886 //===----------------------------------------------------------------------===// 887 888 // Try to find symbol name for specified label 889 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 890 raw_ostream &/*cStream*/, int64_t Value, 891 uint64_t /*Address*/, bool IsBranch, 892 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 893 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 894 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 895 896 if (!IsBranch) { 897 return false; 898 } 899 900 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 901 if (!Symbols) 902 return false; 903 904 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 905 [Value](const SymbolInfoTy& Val) { 906 return std::get<0>(Val) == static_cast<uint64_t>(Value) 907 && std::get<2>(Val) == ELF::STT_NOTYPE; 908 }); 909 if (Result != Symbols->end()) { 910 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 911 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 912 Inst.addOperand(MCOperand::createExpr(Add)); 913 return true; 914 } 915 return false; 916 } 917 918 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 919 int64_t Value, 920 uint64_t Address) { 921 llvm_unreachable("unimplemented"); 922 } 923 924 //===----------------------------------------------------------------------===// 925 // Initialization 926 //===----------------------------------------------------------------------===// 927 928 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 929 LLVMOpInfoCallback /*GetOpInfo*/, 930 LLVMSymbolLookupCallback /*SymbolLookUp*/, 931 void *DisInfo, 932 MCContext *Ctx, 933 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 934 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 935 } 936 937 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 938 const MCSubtargetInfo &STI, 939 MCContext &Ctx) { 940 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 941 } 942 943 extern "C" void LLVMInitializeAMDGPUDisassembler() { 944 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 945 createAMDGPUDisassembler); 946 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 947 createAMDGPUSymbolizer); 948 } 949