1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 /// \file 12 /// 13 /// This file contains definition for AMDGPU ISA disassembler 14 // 15 //===----------------------------------------------------------------------===// 16 17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 18 19 #include "Disassembler/AMDGPUDisassembler.h" 20 #include "AMDGPU.h" 21 #include "AMDGPURegisterInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "SIDefines.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/Disassembler.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/BinaryFormat/ELF.h" 31 #include "llvm/MC/MCContext.h" 32 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 33 #include "llvm/MC/MCExpr.h" 34 #include "llvm/MC/MCFixedLenDisassembler.h" 35 #include "llvm/MC/MCInst.h" 36 #include "llvm/MC/MCSubtargetInfo.h" 37 #include "llvm/Support/Endian.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetRegistry.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <algorithm> 43 #include <cassert> 44 #include <cstddef> 45 #include <cstdint> 46 #include <iterator> 47 #include <tuple> 48 #include <vector> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "amdgpu-disassembler" 53 54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 55 56 inline static MCDisassembler::DecodeStatus 57 addOperand(MCInst &Inst, const MCOperand& Opnd) { 58 Inst.addOperand(Opnd); 59 return Opnd.isValid() ? 60 MCDisassembler::Success : 61 MCDisassembler::SoftFail; 62 } 63 64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 65 uint16_t NameIdx) { 66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 67 if (OpIdx != -1) { 68 auto I = MI.begin(); 69 std::advance(I, OpIdx); 70 MI.insert(I, Op); 71 } 72 return OpIdx; 73 } 74 75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 76 uint64_t Addr, const void *Decoder) { 77 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 78 79 APInt SignedOffset(18, Imm * 4, true); 80 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 81 82 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 83 return MCDisassembler::Success; 84 return addOperand(Inst, MCOperand::createImm(Imm)); 85 } 86 87 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 88 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 89 unsigned Imm, \ 90 uint64_t /*Addr*/, \ 91 const void *Decoder) { \ 92 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 93 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 94 } 95 96 #define DECODE_OPERAND_REG(RegClass) \ 97 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 98 99 DECODE_OPERAND_REG(VGPR_32) 100 DECODE_OPERAND_REG(VRegOrLds_32) 101 DECODE_OPERAND_REG(VS_32) 102 DECODE_OPERAND_REG(VS_64) 103 DECODE_OPERAND_REG(VS_128) 104 105 DECODE_OPERAND_REG(VReg_64) 106 DECODE_OPERAND_REG(VReg_96) 107 DECODE_OPERAND_REG(VReg_128) 108 109 DECODE_OPERAND_REG(SReg_32) 110 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 111 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 112 DECODE_OPERAND_REG(SRegOrLds_32) 113 DECODE_OPERAND_REG(SReg_64) 114 DECODE_OPERAND_REG(SReg_64_XEXEC) 115 DECODE_OPERAND_REG(SReg_128) 116 DECODE_OPERAND_REG(SReg_256) 117 DECODE_OPERAND_REG(SReg_512) 118 119 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 120 unsigned Imm, 121 uint64_t Addr, 122 const void *Decoder) { 123 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 124 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 125 } 126 127 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 128 unsigned Imm, 129 uint64_t Addr, 130 const void *Decoder) { 131 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 132 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 133 } 134 135 #define DECODE_SDWA(DecName) \ 136 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 137 138 DECODE_SDWA(Src32) 139 DECODE_SDWA(Src16) 140 DECODE_SDWA(VopcDst) 141 142 #include "AMDGPUGenDisassemblerTables.inc" 143 144 //===----------------------------------------------------------------------===// 145 // 146 //===----------------------------------------------------------------------===// 147 148 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 149 assert(Bytes.size() >= sizeof(T)); 150 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 151 Bytes = Bytes.slice(sizeof(T)); 152 return Res; 153 } 154 155 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 156 MCInst &MI, 157 uint64_t Inst, 158 uint64_t Address) const { 159 assert(MI.getOpcode() == 0); 160 assert(MI.getNumOperands() == 0); 161 MCInst TmpInst; 162 HasLiteral = false; 163 const auto SavedBytes = Bytes; 164 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 165 MI = TmpInst; 166 return MCDisassembler::Success; 167 } 168 Bytes = SavedBytes; 169 return MCDisassembler::Fail; 170 } 171 172 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 173 ArrayRef<uint8_t> Bytes_, 174 uint64_t Address, 175 raw_ostream &WS, 176 raw_ostream &CS) const { 177 CommentStream = &CS; 178 bool IsSDWA = false; 179 180 // ToDo: AMDGPUDisassembler supports only VI ISA. 181 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 182 report_fatal_error("Disassembly not yet supported for subtarget"); 183 184 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 185 Bytes = Bytes_.slice(0, MaxInstBytesNum); 186 187 DecodeStatus Res = MCDisassembler::Fail; 188 do { 189 // ToDo: better to switch encoding length using some bit predicate 190 // but it is unknown yet, so try all we can 191 192 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 193 // encodings 194 if (Bytes.size() >= 8) { 195 const uint64_t QW = eatBytes<uint64_t>(Bytes); 196 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 197 if (Res) break; 198 199 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 200 if (Res) { IsSDWA = true; break; } 201 202 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 203 if (Res) { IsSDWA = true; break; } 204 205 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 206 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 207 if (Res) 208 break; 209 } 210 211 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 212 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 213 // table first so we print the correct name. 214 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 215 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 216 if (Res) 217 break; 218 } 219 } 220 221 // Reinitialize Bytes as DPP64 could have eaten too much 222 Bytes = Bytes_.slice(0, MaxInstBytesNum); 223 224 // Try decode 32-bit instruction 225 if (Bytes.size() < 4) break; 226 const uint32_t DW = eatBytes<uint32_t>(Bytes); 227 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 228 if (Res) break; 229 230 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 231 if (Res) break; 232 233 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 234 if (Res) break; 235 236 if (Bytes.size() < 4) break; 237 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 238 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 239 if (Res) break; 240 241 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 242 if (Res) break; 243 244 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 245 } while (false); 246 247 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 248 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 249 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 250 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 251 // Insert dummy unused src2_modifiers. 252 insertNamedMCOperand(MI, MCOperand::createImm(0), 253 AMDGPU::OpName::src2_modifiers); 254 } 255 256 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 257 Res = convertMIMGInst(MI); 258 } 259 260 if (Res && IsSDWA) 261 Res = convertSDWAInst(MI); 262 263 // if the opcode was not recognized we'll assume a Size of 4 bytes 264 // (unless there are fewer bytes left) 265 Size = Res ? (MaxInstBytesNum - Bytes.size()) 266 : std::min((size_t)4, Bytes_.size()); 267 return Res; 268 } 269 270 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 271 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 272 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 273 // VOPC - insert clamp 274 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 275 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 276 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 277 if (SDst != -1) { 278 // VOPC - insert VCC register as sdst 279 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 280 AMDGPU::OpName::sdst); 281 } else { 282 // VOP1/2 - insert omod if present in instruction 283 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 284 } 285 } 286 return MCDisassembler::Success; 287 } 288 289 // Note that MIMG format provides no information about VADDR size. 290 // Consequently, decoded instructions always show address 291 // as if it has 1 dword, which could be not really so. 292 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 293 294 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 295 AMDGPU::OpName::vdst); 296 297 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 298 AMDGPU::OpName::vdata); 299 300 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 301 AMDGPU::OpName::dmask); 302 303 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 304 AMDGPU::OpName::tfe); 305 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 306 AMDGPU::OpName::d16); 307 308 assert(VDataIdx != -1); 309 assert(DMaskIdx != -1); 310 assert(TFEIdx != -1); 311 312 bool IsAtomic = (VDstIdx != -1); 313 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 314 315 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 316 if (DMask == 0) 317 return MCDisassembler::Success; 318 319 unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); 320 if (DstSize == 1) 321 return MCDisassembler::Success; 322 323 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 324 if (D16 && AMDGPU::hasPackedD16(STI)) { 325 DstSize = (DstSize + 1) / 2; 326 } 327 328 // FIXME: Add tfe support 329 if (MI.getOperand(TFEIdx).getImm()) 330 return MCDisassembler::Success; 331 332 int NewOpcode = -1; 333 334 if (IsGather4) { 335 if (D16 && AMDGPU::hasPackedD16(STI)) 336 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2); 337 else 338 return MCDisassembler::Success; 339 } else { 340 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize); 341 if (NewOpcode == -1) 342 return MCDisassembler::Success; 343 } 344 345 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 346 347 // Get first subregister of VData 348 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 349 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 350 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 351 352 // Widen the register to the correct number of enabled channels. 353 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 354 &MRI.getRegClass(RCID)); 355 if (NewVdata == AMDGPU::NoRegister) { 356 // It's possible to encode this such that the low register + enabled 357 // components exceeds the register count. 358 return MCDisassembler::Success; 359 } 360 361 MI.setOpcode(NewOpcode); 362 // vaddr will be always appear as a single VGPR. This will look different than 363 // how it is usually emitted because the number of register components is not 364 // in the instruction encoding. 365 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 366 367 if (IsAtomic) { 368 // Atomic operations have an additional operand (a copy of data) 369 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 370 } 371 372 return MCDisassembler::Success; 373 } 374 375 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 376 return getContext().getRegisterInfo()-> 377 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 378 } 379 380 inline 381 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 382 const Twine& ErrMsg) const { 383 *CommentStream << "Error: " + ErrMsg; 384 385 // ToDo: add support for error operands to MCInst.h 386 // return MCOperand::createError(V); 387 return MCOperand(); 388 } 389 390 inline 391 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 392 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 393 } 394 395 inline 396 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 397 unsigned Val) const { 398 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 399 if (Val >= RegCl.getNumRegs()) 400 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 401 ": unknown register " + Twine(Val)); 402 return createRegOperand(RegCl.getRegister(Val)); 403 } 404 405 inline 406 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 407 unsigned Val) const { 408 // ToDo: SI/CI have 104 SGPRs, VI - 102 409 // Valery: here we accepting as much as we can, let assembler sort it out 410 int shift = 0; 411 switch (SRegClassID) { 412 case AMDGPU::SGPR_32RegClassID: 413 case AMDGPU::TTMP_32RegClassID: 414 break; 415 case AMDGPU::SGPR_64RegClassID: 416 case AMDGPU::TTMP_64RegClassID: 417 shift = 1; 418 break; 419 case AMDGPU::SGPR_128RegClassID: 420 case AMDGPU::TTMP_128RegClassID: 421 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 422 // this bundle? 423 case AMDGPU::SGPR_256RegClassID: 424 case AMDGPU::TTMP_256RegClassID: 425 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 426 // this bundle? 427 case AMDGPU::SGPR_512RegClassID: 428 case AMDGPU::TTMP_512RegClassID: 429 shift = 2; 430 break; 431 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 432 // this bundle? 433 default: 434 llvm_unreachable("unhandled register class"); 435 } 436 437 if (Val % (1 << shift)) { 438 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 439 << ": scalar reg isn't aligned " << Val; 440 } 441 442 return createRegOperand(SRegClassID, Val >> shift); 443 } 444 445 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 446 return decodeSrcOp(OPW32, Val); 447 } 448 449 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 450 return decodeSrcOp(OPW64, Val); 451 } 452 453 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 454 return decodeSrcOp(OPW128, Val); 455 } 456 457 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 458 return decodeSrcOp(OPW16, Val); 459 } 460 461 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 462 return decodeSrcOp(OPWV216, Val); 463 } 464 465 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 466 // Some instructions have operand restrictions beyond what the encoding 467 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 468 // high bit. 469 Val &= 255; 470 471 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 472 } 473 474 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { 475 return decodeSrcOp(OPW32, Val); 476 } 477 478 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 479 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 480 } 481 482 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 483 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 484 } 485 486 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 487 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 488 } 489 490 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 491 // table-gen generated disassembler doesn't care about operand types 492 // leaving only registry class so SSrc_32 operand turns into SReg_32 493 // and therefore we accept immediates and literals here as well 494 return decodeSrcOp(OPW32, Val); 495 } 496 497 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 498 unsigned Val) const { 499 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 500 return decodeOperand_SReg_32(Val); 501 } 502 503 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 504 unsigned Val) const { 505 // SReg_32_XM0 is SReg_32 without EXEC_HI 506 return decodeOperand_SReg_32(Val); 507 } 508 509 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { 510 // table-gen generated disassembler doesn't care about operand types 511 // leaving only registry class so SSrc_32 operand turns into SReg_32 512 // and therefore we accept immediates and literals here as well 513 return decodeSrcOp(OPW32, Val); 514 } 515 516 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 517 return decodeSrcOp(OPW64, Val); 518 } 519 520 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 521 return decodeSrcOp(OPW64, Val); 522 } 523 524 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 525 return decodeSrcOp(OPW128, Val); 526 } 527 528 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 529 return decodeDstOp(OPW256, Val); 530 } 531 532 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 533 return decodeDstOp(OPW512, Val); 534 } 535 536 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 537 // For now all literal constants are supposed to be unsigned integer 538 // ToDo: deal with signed/unsigned 64-bit integer constants 539 // ToDo: deal with float/double constants 540 if (!HasLiteral) { 541 if (Bytes.size() < 4) { 542 return errOperand(0, "cannot read literal, inst bytes left " + 543 Twine(Bytes.size())); 544 } 545 HasLiteral = true; 546 Literal = eatBytes<uint32_t>(Bytes); 547 } 548 return MCOperand::createImm(Literal); 549 } 550 551 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 552 using namespace AMDGPU::EncValues; 553 554 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 555 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 556 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 557 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 558 // Cast prevents negative overflow. 559 } 560 561 static int64_t getInlineImmVal32(unsigned Imm) { 562 switch (Imm) { 563 case 240: 564 return FloatToBits(0.5f); 565 case 241: 566 return FloatToBits(-0.5f); 567 case 242: 568 return FloatToBits(1.0f); 569 case 243: 570 return FloatToBits(-1.0f); 571 case 244: 572 return FloatToBits(2.0f); 573 case 245: 574 return FloatToBits(-2.0f); 575 case 246: 576 return FloatToBits(4.0f); 577 case 247: 578 return FloatToBits(-4.0f); 579 case 248: // 1 / (2 * PI) 580 return 0x3e22f983; 581 default: 582 llvm_unreachable("invalid fp inline imm"); 583 } 584 } 585 586 static int64_t getInlineImmVal64(unsigned Imm) { 587 switch (Imm) { 588 case 240: 589 return DoubleToBits(0.5); 590 case 241: 591 return DoubleToBits(-0.5); 592 case 242: 593 return DoubleToBits(1.0); 594 case 243: 595 return DoubleToBits(-1.0); 596 case 244: 597 return DoubleToBits(2.0); 598 case 245: 599 return DoubleToBits(-2.0); 600 case 246: 601 return DoubleToBits(4.0); 602 case 247: 603 return DoubleToBits(-4.0); 604 case 248: // 1 / (2 * PI) 605 return 0x3fc45f306dc9c882; 606 default: 607 llvm_unreachable("invalid fp inline imm"); 608 } 609 } 610 611 static int64_t getInlineImmVal16(unsigned Imm) { 612 switch (Imm) { 613 case 240: 614 return 0x3800; 615 case 241: 616 return 0xB800; 617 case 242: 618 return 0x3C00; 619 case 243: 620 return 0xBC00; 621 case 244: 622 return 0x4000; 623 case 245: 624 return 0xC000; 625 case 246: 626 return 0x4400; 627 case 247: 628 return 0xC400; 629 case 248: // 1 / (2 * PI) 630 return 0x3118; 631 default: 632 llvm_unreachable("invalid fp inline imm"); 633 } 634 } 635 636 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 637 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 638 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 639 640 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 641 switch (Width) { 642 case OPW32: 643 return MCOperand::createImm(getInlineImmVal32(Imm)); 644 case OPW64: 645 return MCOperand::createImm(getInlineImmVal64(Imm)); 646 case OPW16: 647 case OPWV216: 648 return MCOperand::createImm(getInlineImmVal16(Imm)); 649 default: 650 llvm_unreachable("implement me"); 651 } 652 } 653 654 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 655 using namespace AMDGPU; 656 657 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 658 switch (Width) { 659 default: // fall 660 case OPW32: 661 case OPW16: 662 case OPWV216: 663 return VGPR_32RegClassID; 664 case OPW64: return VReg_64RegClassID; 665 case OPW128: return VReg_128RegClassID; 666 } 667 } 668 669 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 670 using namespace AMDGPU; 671 672 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 673 switch (Width) { 674 default: // fall 675 case OPW32: 676 case OPW16: 677 case OPWV216: 678 return SGPR_32RegClassID; 679 case OPW64: return SGPR_64RegClassID; 680 case OPW128: return SGPR_128RegClassID; 681 case OPW256: return SGPR_256RegClassID; 682 case OPW512: return SGPR_512RegClassID; 683 } 684 } 685 686 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 687 using namespace AMDGPU; 688 689 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 690 switch (Width) { 691 default: // fall 692 case OPW32: 693 case OPW16: 694 case OPWV216: 695 return TTMP_32RegClassID; 696 case OPW64: return TTMP_64RegClassID; 697 case OPW128: return TTMP_128RegClassID; 698 case OPW256: return TTMP_256RegClassID; 699 case OPW512: return TTMP_512RegClassID; 700 } 701 } 702 703 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 704 using namespace AMDGPU::EncValues; 705 706 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 707 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 708 709 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 710 } 711 712 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 713 using namespace AMDGPU::EncValues; 714 715 assert(Val < 512); // enum9 716 717 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 718 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 719 } 720 if (Val <= SGPR_MAX) { 721 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 722 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 723 } 724 725 int TTmpIdx = getTTmpIdx(Val); 726 if (TTmpIdx >= 0) { 727 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 728 } 729 730 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 731 return decodeIntImmed(Val); 732 733 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 734 return decodeFPImmed(Width, Val); 735 736 if (Val == LITERAL_CONST) 737 return decodeLiteralConstant(); 738 739 switch (Width) { 740 case OPW32: 741 case OPW16: 742 case OPWV216: 743 return decodeSpecialReg32(Val); 744 case OPW64: 745 return decodeSpecialReg64(Val); 746 default: 747 llvm_unreachable("unexpected immediate type"); 748 } 749 } 750 751 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 752 using namespace AMDGPU::EncValues; 753 754 assert(Val < 128); 755 assert(Width == OPW256 || Width == OPW512); 756 757 if (Val <= SGPR_MAX) { 758 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 759 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 760 } 761 762 int TTmpIdx = getTTmpIdx(Val); 763 if (TTmpIdx >= 0) { 764 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 765 } 766 767 llvm_unreachable("unknown dst register"); 768 } 769 770 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 771 using namespace AMDGPU; 772 773 switch (Val) { 774 case 102: return createRegOperand(FLAT_SCR_LO); 775 case 103: return createRegOperand(FLAT_SCR_HI); 776 case 104: return createRegOperand(XNACK_MASK_LO); 777 case 105: return createRegOperand(XNACK_MASK_HI); 778 case 106: return createRegOperand(VCC_LO); 779 case 107: return createRegOperand(VCC_HI); 780 case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); 781 case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); 782 case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); 783 case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); 784 case 124: return createRegOperand(M0); 785 case 126: return createRegOperand(EXEC_LO); 786 case 127: return createRegOperand(EXEC_HI); 787 case 235: return createRegOperand(SRC_SHARED_BASE); 788 case 236: return createRegOperand(SRC_SHARED_LIMIT); 789 case 237: return createRegOperand(SRC_PRIVATE_BASE); 790 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 791 // TODO: SRC_POPS_EXITING_WAVE_ID 792 // ToDo: no support for vccz register 793 case 251: break; 794 // ToDo: no support for execz register 795 case 252: break; 796 case 253: return createRegOperand(SCC); 797 case 254: return createRegOperand(LDS_DIRECT); 798 default: break; 799 } 800 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 801 } 802 803 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 804 using namespace AMDGPU; 805 806 switch (Val) { 807 case 102: return createRegOperand(FLAT_SCR); 808 case 104: return createRegOperand(XNACK_MASK); 809 case 106: return createRegOperand(VCC); 810 case 108: assert(!isGFX9()); return createRegOperand(TBA); 811 case 110: assert(!isGFX9()); return createRegOperand(TMA); 812 case 126: return createRegOperand(EXEC); 813 default: break; 814 } 815 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 816 } 817 818 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 819 const unsigned Val) const { 820 using namespace AMDGPU::SDWA; 821 using namespace AMDGPU::EncValues; 822 823 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 824 // XXX: static_cast<int> is needed to avoid stupid warning: 825 // compare with unsigned is always true 826 if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && 827 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 828 return createRegOperand(getVgprClassId(Width), 829 Val - SDWA9EncValues::SRC_VGPR_MIN); 830 } 831 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 832 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 833 return createSRegOperand(getSgprClassId(Width), 834 Val - SDWA9EncValues::SRC_SGPR_MIN); 835 } 836 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 837 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 838 return createSRegOperand(getTtmpClassId(Width), 839 Val - SDWA9EncValues::SRC_TTMP_MIN); 840 } 841 842 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 843 844 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 845 return decodeIntImmed(SVal); 846 847 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 848 return decodeFPImmed(Width, SVal); 849 850 return decodeSpecialReg32(SVal); 851 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 852 return createRegOperand(getVgprClassId(Width), Val); 853 } 854 llvm_unreachable("unsupported target"); 855 } 856 857 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 858 return decodeSDWASrc(OPW16, Val); 859 } 860 861 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 862 return decodeSDWASrc(OPW32, Val); 863 } 864 865 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 866 using namespace AMDGPU::SDWA; 867 868 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 869 "SDWAVopcDst should be present only on GFX9"); 870 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 871 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 872 873 int TTmpIdx = getTTmpIdx(Val); 874 if (TTmpIdx >= 0) { 875 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 876 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 877 return decodeSpecialReg64(Val); 878 } else { 879 return createSRegOperand(getSgprClassId(OPW64), Val); 880 } 881 } else { 882 return createRegOperand(AMDGPU::VCC); 883 } 884 } 885 886 bool AMDGPUDisassembler::isVI() const { 887 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 888 } 889 890 bool AMDGPUDisassembler::isGFX9() const { 891 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 892 } 893 894 //===----------------------------------------------------------------------===// 895 // AMDGPUSymbolizer 896 //===----------------------------------------------------------------------===// 897 898 // Try to find symbol name for specified label 899 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 900 raw_ostream &/*cStream*/, int64_t Value, 901 uint64_t /*Address*/, bool IsBranch, 902 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 903 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 904 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 905 906 if (!IsBranch) { 907 return false; 908 } 909 910 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 911 if (!Symbols) 912 return false; 913 914 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 915 [Value](const SymbolInfoTy& Val) { 916 return std::get<0>(Val) == static_cast<uint64_t>(Value) 917 && std::get<2>(Val) == ELF::STT_NOTYPE; 918 }); 919 if (Result != Symbols->end()) { 920 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 921 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 922 Inst.addOperand(MCOperand::createExpr(Add)); 923 return true; 924 } 925 return false; 926 } 927 928 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 929 int64_t Value, 930 uint64_t Address) { 931 llvm_unreachable("unimplemented"); 932 } 933 934 //===----------------------------------------------------------------------===// 935 // Initialization 936 //===----------------------------------------------------------------------===// 937 938 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 939 LLVMOpInfoCallback /*GetOpInfo*/, 940 LLVMSymbolLookupCallback /*SymbolLookUp*/, 941 void *DisInfo, 942 MCContext *Ctx, 943 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 944 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 945 } 946 947 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 948 const MCSubtargetInfo &STI, 949 MCContext &Ctx) { 950 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 951 } 952 953 extern "C" void LLVMInitializeAMDGPUDisassembler() { 954 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 955 createAMDGPUDisassembler); 956 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 957 createAMDGPUSymbolizer); 958 } 959