1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 /// \file 12 /// 13 /// This file contains definition for AMDGPU ISA disassembler 14 // 15 //===----------------------------------------------------------------------===// 16 17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 18 19 #include "Disassembler/AMDGPUDisassembler.h" 20 #include "AMDGPU.h" 21 #include "AMDGPURegisterInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "SIDefines.h" 24 #include "TargetInfo/AMDGPUTargetInfo.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/Disassembler.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/BinaryFormat/ELF.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/MC/MCContext.h" 33 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 34 #include "llvm/MC/MCExpr.h" 35 #include "llvm/MC/MCFixedLenDisassembler.h" 36 #include "llvm/MC/MCInst.h" 37 #include "llvm/MC/MCSubtargetInfo.h" 38 #include "llvm/Support/Endian.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/MathExtras.h" 41 #include "llvm/Support/TargetRegistry.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <cstdint> 47 #include <iterator> 48 #include <tuple> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "amdgpu-disassembler" 54 55 #define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ 56 : AMDGPU::EncValues::SGPR_MAX_SI) 57 58 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 59 60 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, 61 MCContext &Ctx, 62 MCInstrInfo const *MCII) : 63 MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), 64 TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {} 65 66 inline static MCDisassembler::DecodeStatus 67 addOperand(MCInst &Inst, const MCOperand& Opnd) { 68 Inst.addOperand(Opnd); 69 return Opnd.isValid() ? 70 MCDisassembler::Success : 71 MCDisassembler::SoftFail; 72 } 73 74 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 75 uint16_t NameIdx) { 76 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 77 if (OpIdx != -1) { 78 auto I = MI.begin(); 79 std::advance(I, OpIdx); 80 MI.insert(I, Op); 81 } 82 return OpIdx; 83 } 84 85 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 86 uint64_t Addr, const void *Decoder) { 87 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 88 89 // Our branches take a simm16, but we need two extra bits to account for the 90 // factor of 4. 91 APInt SignedOffset(18, Imm * 4, true); 92 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 93 94 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 95 return MCDisassembler::Success; 96 return addOperand(Inst, MCOperand::createImm(Imm)); 97 } 98 99 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 100 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 101 unsigned Imm, \ 102 uint64_t /*Addr*/, \ 103 const void *Decoder) { \ 104 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 105 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 106 } 107 108 #define DECODE_OPERAND_REG(RegClass) \ 109 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 110 111 DECODE_OPERAND_REG(VGPR_32) 112 DECODE_OPERAND_REG(VRegOrLds_32) 113 DECODE_OPERAND_REG(VS_32) 114 DECODE_OPERAND_REG(VS_64) 115 DECODE_OPERAND_REG(VS_128) 116 117 DECODE_OPERAND_REG(VReg_64) 118 DECODE_OPERAND_REG(VReg_96) 119 DECODE_OPERAND_REG(VReg_128) 120 121 DECODE_OPERAND_REG(SReg_32) 122 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 123 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 124 DECODE_OPERAND_REG(SRegOrLds_32) 125 DECODE_OPERAND_REG(SReg_64) 126 DECODE_OPERAND_REG(SReg_64_XEXEC) 127 DECODE_OPERAND_REG(SReg_128) 128 DECODE_OPERAND_REG(SReg_256) 129 DECODE_OPERAND_REG(SReg_512) 130 131 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 132 unsigned Imm, 133 uint64_t Addr, 134 const void *Decoder) { 135 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 136 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 137 } 138 139 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 140 unsigned Imm, 141 uint64_t Addr, 142 const void *Decoder) { 143 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 144 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 145 } 146 147 #define DECODE_SDWA(DecName) \ 148 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 149 150 DECODE_SDWA(Src32) 151 DECODE_SDWA(Src16) 152 DECODE_SDWA(VopcDst) 153 154 #include "AMDGPUGenDisassemblerTables.inc" 155 156 //===----------------------------------------------------------------------===// 157 // 158 //===----------------------------------------------------------------------===// 159 160 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 161 assert(Bytes.size() >= sizeof(T)); 162 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 163 Bytes = Bytes.slice(sizeof(T)); 164 return Res; 165 } 166 167 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 168 MCInst &MI, 169 uint64_t Inst, 170 uint64_t Address) const { 171 assert(MI.getOpcode() == 0); 172 assert(MI.getNumOperands() == 0); 173 MCInst TmpInst; 174 HasLiteral = false; 175 const auto SavedBytes = Bytes; 176 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 177 MI = TmpInst; 178 return MCDisassembler::Success; 179 } 180 Bytes = SavedBytes; 181 return MCDisassembler::Fail; 182 } 183 184 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 185 ArrayRef<uint8_t> Bytes_, 186 uint64_t Address, 187 raw_ostream &WS, 188 raw_ostream &CS) const { 189 CommentStream = &CS; 190 bool IsSDWA = false; 191 192 // ToDo: AMDGPUDisassembler supports only VI ISA. 193 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10()) 194 report_fatal_error("Disassembly not yet supported for subtarget"); 195 196 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size()); 197 Bytes = Bytes_.slice(0, MaxInstBytesNum); 198 199 DecodeStatus Res = MCDisassembler::Fail; 200 do { 201 // ToDo: better to switch encoding length using some bit predicate 202 // but it is unknown yet, so try all we can 203 204 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 205 // encodings 206 if (Bytes.size() >= 8) { 207 const uint64_t QW = eatBytes<uint64_t>(Bytes); 208 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 209 if (Res) break; 210 211 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 212 if (Res) { IsSDWA = true; break; } 213 214 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 215 if (Res) { IsSDWA = true; break; } 216 217 Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address); 218 if (Res) { IsSDWA = true; break; } 219 220 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 221 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 222 // table first so we print the correct name. 223 224 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 225 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 226 if (Res) break; 227 } 228 229 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 230 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 231 if (Res) 232 break; 233 } 234 235 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 236 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 237 // table first so we print the correct name. 238 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 239 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 240 if (Res) 241 break; 242 } 243 } 244 245 // Reinitialize Bytes as DPP64 could have eaten too much 246 Bytes = Bytes_.slice(0, MaxInstBytesNum); 247 248 // Try decode 32-bit instruction 249 if (Bytes.size() < 4) break; 250 const uint32_t DW = eatBytes<uint32_t>(Bytes); 251 Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address); 252 if (Res) break; 253 254 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 255 if (Res) break; 256 257 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 258 if (Res) break; 259 260 Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address); 261 if (Res) break; 262 263 if (Bytes.size() < 4) break; 264 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 265 Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address); 266 if (Res) break; 267 268 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 269 if (Res) break; 270 271 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 272 if (Res) break; 273 274 Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address); 275 } while (false); 276 277 if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral || 278 !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) { 279 MaxInstBytesNum = 8; 280 Bytes = Bytes_.slice(0, MaxInstBytesNum); 281 eatBytes<uint64_t>(Bytes); 282 } 283 284 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 285 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 286 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 || 287 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 288 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || 289 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || 290 MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) { 291 // Insert dummy unused src2_modifiers. 292 insertNamedMCOperand(MI, MCOperand::createImm(0), 293 AMDGPU::OpName::src2_modifiers); 294 } 295 296 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 297 int VAddr0Idx = 298 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 299 int RsrcIdx = 300 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 301 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1; 302 if (VAddr0Idx >= 0 && NSAArgs > 0) { 303 unsigned NSAWords = (NSAArgs + 3) / 4; 304 if (Bytes.size() < 4 * NSAWords) { 305 Res = MCDisassembler::Fail; 306 } else { 307 for (unsigned i = 0; i < NSAArgs; ++i) { 308 MI.insert(MI.begin() + VAddr0Idx + 1 + i, 309 decodeOperand_VGPR_32(Bytes[i])); 310 } 311 Bytes = Bytes.slice(4 * NSAWords); 312 } 313 } 314 315 if (Res) 316 Res = convertMIMGInst(MI); 317 } 318 319 if (Res && IsSDWA) 320 Res = convertSDWAInst(MI); 321 322 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 323 AMDGPU::OpName::vdst_in); 324 if (VDstIn_Idx != -1) { 325 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx, 326 MCOI::OperandConstraint::TIED_TO); 327 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx || 328 !MI.getOperand(VDstIn_Idx).isReg() || 329 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) { 330 if (MI.getNumOperands() > (unsigned)VDstIn_Idx) 331 MI.erase(&MI.getOperand(VDstIn_Idx)); 332 insertNamedMCOperand(MI, 333 MCOperand::createReg(MI.getOperand(Tied).getReg()), 334 AMDGPU::OpName::vdst_in); 335 } 336 } 337 338 // if the opcode was not recognized we'll assume a Size of 4 bytes 339 // (unless there are fewer bytes left) 340 Size = Res ? (MaxInstBytesNum - Bytes.size()) 341 : std::min((size_t)4, Bytes_.size()); 342 return Res; 343 } 344 345 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 346 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 347 STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 348 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 349 // VOPC - insert clamp 350 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 351 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 352 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 353 if (SDst != -1) { 354 // VOPC - insert VCC register as sdst 355 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 356 AMDGPU::OpName::sdst); 357 } else { 358 // VOP1/2 - insert omod if present in instruction 359 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 360 } 361 } 362 return MCDisassembler::Success; 363 } 364 365 // Note that before gfx10, the MIMG encoding provided no information about 366 // VADDR size. Consequently, decoded instructions always show address as if it 367 // has 1 dword, which could be not really so. 368 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 369 370 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 371 AMDGPU::OpName::vdst); 372 373 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 374 AMDGPU::OpName::vdata); 375 int VAddr0Idx = 376 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 377 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 378 AMDGPU::OpName::dmask); 379 380 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 381 AMDGPU::OpName::tfe); 382 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 383 AMDGPU::OpName::d16); 384 385 assert(VDataIdx != -1); 386 assert(DMaskIdx != -1); 387 assert(TFEIdx != -1); 388 389 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 390 bool IsAtomic = (VDstIdx != -1); 391 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 392 393 bool IsNSA = false; 394 unsigned AddrSize = Info->VAddrDwords; 395 396 if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 397 unsigned DimIdx = 398 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); 399 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 400 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 401 const AMDGPU::MIMGDimInfo *Dim = 402 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); 403 404 AddrSize = BaseOpcode->NumExtraArgs + 405 (BaseOpcode->Gradients ? Dim->NumGradients : 0) + 406 (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 407 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 408 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; 409 if (!IsNSA) { 410 if (AddrSize > 8) 411 AddrSize = 16; 412 else if (AddrSize > 4) 413 AddrSize = 8; 414 } else { 415 if (AddrSize > Info->VAddrDwords) { 416 // The NSA encoding does not contain enough operands for the combination 417 // of base opcode / dimension. Should this be an error? 418 return MCDisassembler::Success; 419 } 420 } 421 } 422 423 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 424 unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u); 425 426 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 427 if (D16 && AMDGPU::hasPackedD16(STI)) { 428 DstSize = (DstSize + 1) / 2; 429 } 430 431 // FIXME: Add tfe support 432 if (MI.getOperand(TFEIdx).getImm()) 433 return MCDisassembler::Success; 434 435 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) 436 return MCDisassembler::Success; 437 438 int NewOpcode = 439 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize); 440 if (NewOpcode == -1) 441 return MCDisassembler::Success; 442 443 // Widen the register to the correct number of enabled channels. 444 unsigned NewVdata = AMDGPU::NoRegister; 445 if (DstSize != Info->VDataDwords) { 446 auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 447 448 // Get first subregister of VData 449 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 450 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 451 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 452 453 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 454 &MRI.getRegClass(DataRCID)); 455 if (NewVdata == AMDGPU::NoRegister) { 456 // It's possible to encode this such that the low register + enabled 457 // components exceeds the register count. 458 return MCDisassembler::Success; 459 } 460 } 461 462 unsigned NewVAddr0 = AMDGPU::NoRegister; 463 if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA && 464 AddrSize != Info->VAddrDwords) { 465 unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); 466 unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); 467 VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; 468 469 auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass; 470 NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0, 471 &MRI.getRegClass(AddrRCID)); 472 if (NewVAddr0 == AMDGPU::NoRegister) 473 return MCDisassembler::Success; 474 } 475 476 MI.setOpcode(NewOpcode); 477 478 if (NewVdata != AMDGPU::NoRegister) { 479 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 480 481 if (IsAtomic) { 482 // Atomic operations have an additional operand (a copy of data) 483 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 484 } 485 } 486 487 if (NewVAddr0 != AMDGPU::NoRegister) { 488 MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0); 489 } else if (IsNSA) { 490 assert(AddrSize <= Info->VAddrDwords); 491 MI.erase(MI.begin() + VAddr0Idx + AddrSize, 492 MI.begin() + VAddr0Idx + Info->VAddrDwords); 493 } 494 495 return MCDisassembler::Success; 496 } 497 498 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 499 return getContext().getRegisterInfo()-> 500 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 501 } 502 503 inline 504 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 505 const Twine& ErrMsg) const { 506 *CommentStream << "Error: " + ErrMsg; 507 508 // ToDo: add support for error operands to MCInst.h 509 // return MCOperand::createError(V); 510 return MCOperand(); 511 } 512 513 inline 514 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 515 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 516 } 517 518 inline 519 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 520 unsigned Val) const { 521 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 522 if (Val >= RegCl.getNumRegs()) 523 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 524 ": unknown register " + Twine(Val)); 525 return createRegOperand(RegCl.getRegister(Val)); 526 } 527 528 inline 529 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 530 unsigned Val) const { 531 // ToDo: SI/CI have 104 SGPRs, VI - 102 532 // Valery: here we accepting as much as we can, let assembler sort it out 533 int shift = 0; 534 switch (SRegClassID) { 535 case AMDGPU::SGPR_32RegClassID: 536 case AMDGPU::TTMP_32RegClassID: 537 break; 538 case AMDGPU::SGPR_64RegClassID: 539 case AMDGPU::TTMP_64RegClassID: 540 shift = 1; 541 break; 542 case AMDGPU::SGPR_128RegClassID: 543 case AMDGPU::TTMP_128RegClassID: 544 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 545 // this bundle? 546 case AMDGPU::SGPR_256RegClassID: 547 case AMDGPU::TTMP_256RegClassID: 548 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 549 // this bundle? 550 case AMDGPU::SGPR_512RegClassID: 551 case AMDGPU::TTMP_512RegClassID: 552 shift = 2; 553 break; 554 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 555 // this bundle? 556 default: 557 llvm_unreachable("unhandled register class"); 558 } 559 560 if (Val % (1 << shift)) { 561 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 562 << ": scalar reg isn't aligned " << Val; 563 } 564 565 return createRegOperand(SRegClassID, Val >> shift); 566 } 567 568 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 569 return decodeSrcOp(OPW32, Val); 570 } 571 572 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 573 return decodeSrcOp(OPW64, Val); 574 } 575 576 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 577 return decodeSrcOp(OPW128, Val); 578 } 579 580 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 581 return decodeSrcOp(OPW16, Val); 582 } 583 584 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 585 return decodeSrcOp(OPWV216, Val); 586 } 587 588 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 589 // Some instructions have operand restrictions beyond what the encoding 590 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 591 // high bit. 592 Val &= 255; 593 594 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 595 } 596 597 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { 598 return decodeSrcOp(OPW32, Val); 599 } 600 601 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 602 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 603 } 604 605 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 606 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 607 } 608 609 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 610 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 611 } 612 613 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 614 // table-gen generated disassembler doesn't care about operand types 615 // leaving only registry class so SSrc_32 operand turns into SReg_32 616 // and therefore we accept immediates and literals here as well 617 return decodeSrcOp(OPW32, Val); 618 } 619 620 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 621 unsigned Val) const { 622 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 623 return decodeOperand_SReg_32(Val); 624 } 625 626 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 627 unsigned Val) const { 628 // SReg_32_XM0 is SReg_32 without EXEC_HI 629 return decodeOperand_SReg_32(Val); 630 } 631 632 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { 633 // table-gen generated disassembler doesn't care about operand types 634 // leaving only registry class so SSrc_32 operand turns into SReg_32 635 // and therefore we accept immediates and literals here as well 636 return decodeSrcOp(OPW32, Val); 637 } 638 639 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 640 return decodeSrcOp(OPW64, Val); 641 } 642 643 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 644 return decodeSrcOp(OPW64, Val); 645 } 646 647 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 648 return decodeSrcOp(OPW128, Val); 649 } 650 651 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 652 return decodeDstOp(OPW256, Val); 653 } 654 655 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 656 return decodeDstOp(OPW512, Val); 657 } 658 659 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 660 // For now all literal constants are supposed to be unsigned integer 661 // ToDo: deal with signed/unsigned 64-bit integer constants 662 // ToDo: deal with float/double constants 663 if (!HasLiteral) { 664 if (Bytes.size() < 4) { 665 return errOperand(0, "cannot read literal, inst bytes left " + 666 Twine(Bytes.size())); 667 } 668 HasLiteral = true; 669 Literal = eatBytes<uint32_t>(Bytes); 670 } 671 return MCOperand::createImm(Literal); 672 } 673 674 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 675 using namespace AMDGPU::EncValues; 676 677 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 678 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 679 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 680 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 681 // Cast prevents negative overflow. 682 } 683 684 static int64_t getInlineImmVal32(unsigned Imm) { 685 switch (Imm) { 686 case 240: 687 return FloatToBits(0.5f); 688 case 241: 689 return FloatToBits(-0.5f); 690 case 242: 691 return FloatToBits(1.0f); 692 case 243: 693 return FloatToBits(-1.0f); 694 case 244: 695 return FloatToBits(2.0f); 696 case 245: 697 return FloatToBits(-2.0f); 698 case 246: 699 return FloatToBits(4.0f); 700 case 247: 701 return FloatToBits(-4.0f); 702 case 248: // 1 / (2 * PI) 703 return 0x3e22f983; 704 default: 705 llvm_unreachable("invalid fp inline imm"); 706 } 707 } 708 709 static int64_t getInlineImmVal64(unsigned Imm) { 710 switch (Imm) { 711 case 240: 712 return DoubleToBits(0.5); 713 case 241: 714 return DoubleToBits(-0.5); 715 case 242: 716 return DoubleToBits(1.0); 717 case 243: 718 return DoubleToBits(-1.0); 719 case 244: 720 return DoubleToBits(2.0); 721 case 245: 722 return DoubleToBits(-2.0); 723 case 246: 724 return DoubleToBits(4.0); 725 case 247: 726 return DoubleToBits(-4.0); 727 case 248: // 1 / (2 * PI) 728 return 0x3fc45f306dc9c882; 729 default: 730 llvm_unreachable("invalid fp inline imm"); 731 } 732 } 733 734 static int64_t getInlineImmVal16(unsigned Imm) { 735 switch (Imm) { 736 case 240: 737 return 0x3800; 738 case 241: 739 return 0xB800; 740 case 242: 741 return 0x3C00; 742 case 243: 743 return 0xBC00; 744 case 244: 745 return 0x4000; 746 case 245: 747 return 0xC000; 748 case 246: 749 return 0x4400; 750 case 247: 751 return 0xC400; 752 case 248: // 1 / (2 * PI) 753 return 0x3118; 754 default: 755 llvm_unreachable("invalid fp inline imm"); 756 } 757 } 758 759 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 760 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 761 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 762 763 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 764 switch (Width) { 765 case OPW32: 766 return MCOperand::createImm(getInlineImmVal32(Imm)); 767 case OPW64: 768 return MCOperand::createImm(getInlineImmVal64(Imm)); 769 case OPW16: 770 case OPWV216: 771 return MCOperand::createImm(getInlineImmVal16(Imm)); 772 default: 773 llvm_unreachable("implement me"); 774 } 775 } 776 777 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 778 using namespace AMDGPU; 779 780 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 781 switch (Width) { 782 default: // fall 783 case OPW32: 784 case OPW16: 785 case OPWV216: 786 return VGPR_32RegClassID; 787 case OPW64: return VReg_64RegClassID; 788 case OPW128: return VReg_128RegClassID; 789 } 790 } 791 792 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 793 using namespace AMDGPU; 794 795 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 796 switch (Width) { 797 default: // fall 798 case OPW32: 799 case OPW16: 800 case OPWV216: 801 return SGPR_32RegClassID; 802 case OPW64: return SGPR_64RegClassID; 803 case OPW128: return SGPR_128RegClassID; 804 case OPW256: return SGPR_256RegClassID; 805 case OPW512: return SGPR_512RegClassID; 806 } 807 } 808 809 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 810 using namespace AMDGPU; 811 812 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 813 switch (Width) { 814 default: // fall 815 case OPW32: 816 case OPW16: 817 case OPWV216: 818 return TTMP_32RegClassID; 819 case OPW64: return TTMP_64RegClassID; 820 case OPW128: return TTMP_128RegClassID; 821 case OPW256: return TTMP_256RegClassID; 822 case OPW512: return TTMP_512RegClassID; 823 } 824 } 825 826 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 827 using namespace AMDGPU::EncValues; 828 829 unsigned TTmpMin = 830 (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN; 831 unsigned TTmpMax = 832 (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX; 833 834 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 835 } 836 837 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 838 using namespace AMDGPU::EncValues; 839 840 assert(Val < 512); // enum9 841 842 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 843 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 844 } 845 if (Val <= SGPR_MAX) { 846 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 847 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 848 } 849 850 int TTmpIdx = getTTmpIdx(Val); 851 if (TTmpIdx >= 0) { 852 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 853 } 854 855 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 856 return decodeIntImmed(Val); 857 858 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 859 return decodeFPImmed(Width, Val); 860 861 if (Val == LITERAL_CONST) 862 return decodeLiteralConstant(); 863 864 switch (Width) { 865 case OPW32: 866 case OPW16: 867 case OPWV216: 868 return decodeSpecialReg32(Val); 869 case OPW64: 870 return decodeSpecialReg64(Val); 871 default: 872 llvm_unreachable("unexpected immediate type"); 873 } 874 } 875 876 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 877 using namespace AMDGPU::EncValues; 878 879 assert(Val < 128); 880 assert(Width == OPW256 || Width == OPW512); 881 882 if (Val <= SGPR_MAX) { 883 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 884 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 885 } 886 887 int TTmpIdx = getTTmpIdx(Val); 888 if (TTmpIdx >= 0) { 889 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 890 } 891 892 llvm_unreachable("unknown dst register"); 893 } 894 895 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 896 using namespace AMDGPU; 897 898 switch (Val) { 899 case 102: return createRegOperand(FLAT_SCR_LO); 900 case 103: return createRegOperand(FLAT_SCR_HI); 901 case 104: return createRegOperand(XNACK_MASK_LO); 902 case 105: return createRegOperand(XNACK_MASK_HI); 903 case 106: return createRegOperand(VCC_LO); 904 case 107: return createRegOperand(VCC_HI); 905 case 108: return createRegOperand(TBA_LO); 906 case 109: return createRegOperand(TBA_HI); 907 case 110: return createRegOperand(TMA_LO); 908 case 111: return createRegOperand(TMA_HI); 909 case 124: return createRegOperand(M0); 910 case 125: return createRegOperand(SGPR_NULL); 911 case 126: return createRegOperand(EXEC_LO); 912 case 127: return createRegOperand(EXEC_HI); 913 case 235: return createRegOperand(SRC_SHARED_BASE); 914 case 236: return createRegOperand(SRC_SHARED_LIMIT); 915 case 237: return createRegOperand(SRC_PRIVATE_BASE); 916 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 917 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 918 // ToDo: no support for vccz register 919 case 251: break; 920 // ToDo: no support for execz register 921 case 252: break; 922 case 253: return createRegOperand(SCC); 923 case 254: return createRegOperand(LDS_DIRECT); 924 default: break; 925 } 926 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 927 } 928 929 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 930 using namespace AMDGPU; 931 932 switch (Val) { 933 case 102: return createRegOperand(FLAT_SCR); 934 case 104: return createRegOperand(XNACK_MASK); 935 case 106: return createRegOperand(VCC); 936 case 108: return createRegOperand(TBA); 937 case 110: return createRegOperand(TMA); 938 case 126: return createRegOperand(EXEC); 939 case 235: return createRegOperand(SRC_SHARED_BASE); 940 case 236: return createRegOperand(SRC_SHARED_LIMIT); 941 case 237: return createRegOperand(SRC_PRIVATE_BASE); 942 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 943 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 944 default: break; 945 } 946 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 947 } 948 949 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 950 const unsigned Val) const { 951 using namespace AMDGPU::SDWA; 952 using namespace AMDGPU::EncValues; 953 954 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 955 STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { 956 // XXX: cast to int is needed to avoid stupid warning: 957 // compare with unsigned is always true 958 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && 959 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 960 return createRegOperand(getVgprClassId(Width), 961 Val - SDWA9EncValues::SRC_VGPR_MIN); 962 } 963 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 964 Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 965 : SDWA9EncValues::SRC_SGPR_MAX_SI)) { 966 return createSRegOperand(getSgprClassId(Width), 967 Val - SDWA9EncValues::SRC_SGPR_MIN); 968 } 969 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 970 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 971 return createSRegOperand(getTtmpClassId(Width), 972 Val - SDWA9EncValues::SRC_TTMP_MIN); 973 } 974 975 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 976 977 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 978 return decodeIntImmed(SVal); 979 980 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 981 return decodeFPImmed(Width, SVal); 982 983 return decodeSpecialReg32(SVal); 984 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 985 return createRegOperand(getVgprClassId(Width), Val); 986 } 987 llvm_unreachable("unsupported target"); 988 } 989 990 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 991 return decodeSDWASrc(OPW16, Val); 992 } 993 994 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 995 return decodeSDWASrc(OPW32, Val); 996 } 997 998 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 999 using namespace AMDGPU::SDWA; 1000 1001 assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] || 1002 STI.getFeatureBits()[AMDGPU::FeatureGFX10]) && 1003 "SDWAVopcDst should be present only on GFX9+"); 1004 1005 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 1006 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 1007 1008 int TTmpIdx = getTTmpIdx(Val); 1009 if (TTmpIdx >= 0) { 1010 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 1011 } else if (Val > SGPR_MAX) { 1012 return decodeSpecialReg64(Val); 1013 } else { 1014 return createSRegOperand(getSgprClassId(OPW64), Val); 1015 } 1016 } else { 1017 return createRegOperand(AMDGPU::VCC); 1018 } 1019 } 1020 1021 bool AMDGPUDisassembler::isVI() const { 1022 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1023 } 1024 1025 bool AMDGPUDisassembler::isGFX9() const { 1026 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1027 } 1028 1029 bool AMDGPUDisassembler::isGFX10() const { 1030 return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1031 } 1032 1033 //===----------------------------------------------------------------------===// 1034 // AMDGPUSymbolizer 1035 //===----------------------------------------------------------------------===// 1036 1037 // Try to find symbol name for specified label 1038 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 1039 raw_ostream &/*cStream*/, int64_t Value, 1040 uint64_t /*Address*/, bool IsBranch, 1041 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 1042 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 1043 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 1044 1045 if (!IsBranch) { 1046 return false; 1047 } 1048 1049 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 1050 if (!Symbols) 1051 return false; 1052 1053 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 1054 [Value](const SymbolInfoTy& Val) { 1055 return std::get<0>(Val) == static_cast<uint64_t>(Value) 1056 && std::get<2>(Val) == ELF::STT_NOTYPE; 1057 }); 1058 if (Result != Symbols->end()) { 1059 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 1060 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 1061 Inst.addOperand(MCOperand::createExpr(Add)); 1062 return true; 1063 } 1064 return false; 1065 } 1066 1067 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 1068 int64_t Value, 1069 uint64_t Address) { 1070 llvm_unreachable("unimplemented"); 1071 } 1072 1073 //===----------------------------------------------------------------------===// 1074 // Initialization 1075 //===----------------------------------------------------------------------===// 1076 1077 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 1078 LLVMOpInfoCallback /*GetOpInfo*/, 1079 LLVMSymbolLookupCallback /*SymbolLookUp*/, 1080 void *DisInfo, 1081 MCContext *Ctx, 1082 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 1083 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 1084 } 1085 1086 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 1087 const MCSubtargetInfo &STI, 1088 MCContext &Ctx) { 1089 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 1090 } 1091 1092 extern "C" void LLVMInitializeAMDGPUDisassembler() { 1093 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 1094 createAMDGPUDisassembler); 1095 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 1096 createAMDGPUSymbolizer); 1097 } 1098