1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 //
12 /// \file
13 ///
14 /// This file contains definition for AMDGPU ISA disassembler
15 //
16 //===----------------------------------------------------------------------===//
17 
18 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
19 
20 #include "Disassembler/AMDGPUDisassembler.h"
21 #include "AMDGPU.h"
22 #include "AMDGPURegisterInfo.h"
23 #include "SIDefines.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/Disassembler.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/BinaryFormat/ELF.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCFixedLenDisassembler.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/Support/Endian.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetRegistry.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <cstdint>
46 #include <iterator>
47 #include <tuple>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-disassembler"
53 
54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
55 
56 inline static MCDisassembler::DecodeStatus
57 addOperand(MCInst &Inst, const MCOperand& Opnd) {
58   Inst.addOperand(Opnd);
59   return Opnd.isValid() ?
60     MCDisassembler::Success :
61     MCDisassembler::SoftFail;
62 }
63 
64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                 uint16_t NameIdx) {
66   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67   if (OpIdx != -1) {
68     auto I = MI.begin();
69     std::advance(I, OpIdx);
70     MI.insert(I, Op);
71   }
72   return OpIdx;
73 }
74 
75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
76                                        uint64_t Addr, const void *Decoder) {
77   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
78 
79   APInt SignedOffset(18, Imm * 4, true);
80   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
81 
82   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
83     return MCDisassembler::Success;
84   return addOperand(Inst, MCOperand::createImm(Imm));
85 }
86 
87 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
88 static DecodeStatus StaticDecoderName(MCInst &Inst, \
89                                        unsigned Imm, \
90                                        uint64_t /*Addr*/, \
91                                        const void *Decoder) { \
92   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
93   return addOperand(Inst, DAsm->DecoderName(Imm)); \
94 }
95 
96 #define DECODE_OPERAND_REG(RegClass) \
97 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
98 
99 DECODE_OPERAND_REG(VGPR_32)
100 DECODE_OPERAND_REG(VS_32)
101 DECODE_OPERAND_REG(VS_64)
102 DECODE_OPERAND_REG(VS_128)
103 
104 DECODE_OPERAND_REG(VReg_64)
105 DECODE_OPERAND_REG(VReg_96)
106 DECODE_OPERAND_REG(VReg_128)
107 
108 DECODE_OPERAND_REG(SReg_32)
109 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
110 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
111 DECODE_OPERAND_REG(SReg_64)
112 DECODE_OPERAND_REG(SReg_64_XEXEC)
113 DECODE_OPERAND_REG(SReg_128)
114 DECODE_OPERAND_REG(SReg_256)
115 DECODE_OPERAND_REG(SReg_512)
116 
117 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
118                                          unsigned Imm,
119                                          uint64_t Addr,
120                                          const void *Decoder) {
121   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
123 }
124 
125 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
126                                          unsigned Imm,
127                                          uint64_t Addr,
128                                          const void *Decoder) {
129   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
130   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
131 }
132 
133 #define DECODE_SDWA(DecName) \
134 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
135 
136 DECODE_SDWA(Src32)
137 DECODE_SDWA(Src16)
138 DECODE_SDWA(VopcDst)
139 
140 #include "AMDGPUGenDisassemblerTables.inc"
141 
142 //===----------------------------------------------------------------------===//
143 //
144 //===----------------------------------------------------------------------===//
145 
146 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
147   assert(Bytes.size() >= sizeof(T));
148   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
149   Bytes = Bytes.slice(sizeof(T));
150   return Res;
151 }
152 
153 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
154                                                MCInst &MI,
155                                                uint64_t Inst,
156                                                uint64_t Address) const {
157   assert(MI.getOpcode() == 0);
158   assert(MI.getNumOperands() == 0);
159   MCInst TmpInst;
160   HasLiteral = false;
161   const auto SavedBytes = Bytes;
162   if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
163     MI = TmpInst;
164     return MCDisassembler::Success;
165   }
166   Bytes = SavedBytes;
167   return MCDisassembler::Fail;
168 }
169 
170 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
171                                                 ArrayRef<uint8_t> Bytes_,
172                                                 uint64_t Address,
173                                                 raw_ostream &WS,
174                                                 raw_ostream &CS) const {
175   CommentStream = &CS;
176   bool IsSDWA = false;
177 
178   // ToDo: AMDGPUDisassembler supports only VI ISA.
179   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
180     report_fatal_error("Disassembly not yet supported for subtarget");
181 
182   const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
183   Bytes = Bytes_.slice(0, MaxInstBytesNum);
184 
185   DecodeStatus Res = MCDisassembler::Fail;
186   do {
187     // ToDo: better to switch encoding length using some bit predicate
188     // but it is unknown yet, so try all we can
189 
190     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
191     // encodings
192     if (Bytes.size() >= 8) {
193       const uint64_t QW = eatBytes<uint64_t>(Bytes);
194       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
195       if (Res) break;
196 
197       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
198       if (Res) { IsSDWA = true;  break; }
199 
200       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
201       if (Res) { IsSDWA = true;  break; }
202 
203       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
204         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
205         if (Res)
206           break;
207       }
208 
209       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
210       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
211       // table first so we print the correct name.
212       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
213         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
214         if (Res)
215           break;
216       }
217     }
218 
219     // Reinitialize Bytes as DPP64 could have eaten too much
220     Bytes = Bytes_.slice(0, MaxInstBytesNum);
221 
222     // Try decode 32-bit instruction
223     if (Bytes.size() < 4) break;
224     const uint32_t DW = eatBytes<uint32_t>(Bytes);
225     Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
226     if (Res) break;
227 
228     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
229     if (Res) break;
230 
231     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
232     if (Res) break;
233 
234     if (Bytes.size() < 4) break;
235     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
236     Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
237     if (Res) break;
238 
239     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
240     if (Res) break;
241 
242     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
243   } while (false);
244 
245   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
246               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
247               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
248               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) {
249     // Insert dummy unused src2_modifiers.
250     insertNamedMCOperand(MI, MCOperand::createImm(0),
251                          AMDGPU::OpName::src2_modifiers);
252   }
253 
254   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
255     Res = convertMIMGInst(MI);
256   }
257 
258   if (Res && IsSDWA)
259     Res = convertSDWAInst(MI);
260 
261   // if the opcode was not recognized we'll assume a Size of 4 bytes
262   // (unless there are fewer bytes left)
263   Size = Res ? (MaxInstBytesNum - Bytes.size())
264              : std::min((size_t)4, Bytes_.size());
265   return Res;
266 }
267 
268 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
269   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
270     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
271       // VOPC - insert clamp
272       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
273   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
274     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
275     if (SDst != -1) {
276       // VOPC - insert VCC register as sdst
277       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
278                            AMDGPU::OpName::sdst);
279     } else {
280       // VOP1/2 - insert omod if present in instruction
281       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
282     }
283   }
284   return MCDisassembler::Success;
285 }
286 
287 // Note that MIMG format provides no information about VADDR size.
288 // Consequently, decoded instructions always show address
289 // as if it has 1 dword, which could be not really so.
290 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
291 
292   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
293                                            AMDGPU::OpName::vdst);
294 
295   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
296                                             AMDGPU::OpName::vdata);
297 
298   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
299                                             AMDGPU::OpName::dmask);
300 
301   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
302                                             AMDGPU::OpName::tfe);
303   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
304                                             AMDGPU::OpName::d16);
305 
306   assert(VDataIdx != -1);
307   assert(DMaskIdx != -1);
308   assert(TFEIdx != -1);
309 
310   bool IsAtomic = (VDstIdx != -1);
311   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
312 
313   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
314   if (DMask == 0)
315     return MCDisassembler::Success;
316 
317   unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
318   if (DstSize == 1)
319     return MCDisassembler::Success;
320 
321   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
322   if (D16 && AMDGPU::hasPackedD16(STI)) {
323     DstSize = (DstSize + 1) / 2;
324   }
325 
326   // FIXME: Add tfe support
327   if (MI.getOperand(TFEIdx).getImm())
328     return MCDisassembler::Success;
329 
330   int NewOpcode = -1;
331 
332   if (IsAtomic) {
333     if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) {
334       NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize);
335     }
336     if (NewOpcode == -1) return MCDisassembler::Success;
337   } else if (IsGather4) {
338     if (D16 && AMDGPU::hasPackedD16(STI))
339       NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode());
340     else
341       return MCDisassembler::Success;
342   } else {
343     NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize);
344     assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
345   }
346 
347   auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
348 
349   // Get first subregister of VData
350   unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
351   unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
352   Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
353 
354   // Widen the register to the correct number of enabled channels.
355   auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
356                                           &MRI.getRegClass(RCID));
357   if (NewVdata == AMDGPU::NoRegister) {
358     // It's possible to encode this such that the low register + enabled
359     // components exceeds the register count.
360     return MCDisassembler::Success;
361   }
362 
363   MI.setOpcode(NewOpcode);
364   // vaddr will be always appear as a single VGPR. This will look different than
365   // how it is usually emitted because the number of register components is not
366   // in the instruction encoding.
367   MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
368 
369   if (IsAtomic) {
370     // Atomic operations have an additional operand (a copy of data)
371     MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
372   }
373 
374   return MCDisassembler::Success;
375 }
376 
377 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
378   return getContext().getRegisterInfo()->
379     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
380 }
381 
382 inline
383 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
384                                          const Twine& ErrMsg) const {
385   *CommentStream << "Error: " + ErrMsg;
386 
387   // ToDo: add support for error operands to MCInst.h
388   // return MCOperand::createError(V);
389   return MCOperand();
390 }
391 
392 inline
393 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
394   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
395 }
396 
397 inline
398 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
399                                                unsigned Val) const {
400   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
401   if (Val >= RegCl.getNumRegs())
402     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
403                            ": unknown register " + Twine(Val));
404   return createRegOperand(RegCl.getRegister(Val));
405 }
406 
407 inline
408 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
409                                                 unsigned Val) const {
410   // ToDo: SI/CI have 104 SGPRs, VI - 102
411   // Valery: here we accepting as much as we can, let assembler sort it out
412   int shift = 0;
413   switch (SRegClassID) {
414   case AMDGPU::SGPR_32RegClassID:
415   case AMDGPU::TTMP_32RegClassID:
416     break;
417   case AMDGPU::SGPR_64RegClassID:
418   case AMDGPU::TTMP_64RegClassID:
419     shift = 1;
420     break;
421   case AMDGPU::SGPR_128RegClassID:
422   case AMDGPU::TTMP_128RegClassID:
423   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
424   // this bundle?
425   case AMDGPU::SGPR_256RegClassID:
426   case AMDGPU::TTMP_256RegClassID:
427     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
428   // this bundle?
429   case AMDGPU::SGPR_512RegClassID:
430   case AMDGPU::TTMP_512RegClassID:
431     shift = 2;
432     break;
433   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
434   // this bundle?
435   default:
436     llvm_unreachable("unhandled register class");
437   }
438 
439   if (Val % (1 << shift)) {
440     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
441                    << ": scalar reg isn't aligned " << Val;
442   }
443 
444   return createRegOperand(SRegClassID, Val >> shift);
445 }
446 
447 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
448   return decodeSrcOp(OPW32, Val);
449 }
450 
451 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
452   return decodeSrcOp(OPW64, Val);
453 }
454 
455 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
456   return decodeSrcOp(OPW128, Val);
457 }
458 
459 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
460   return decodeSrcOp(OPW16, Val);
461 }
462 
463 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
464   return decodeSrcOp(OPWV216, Val);
465 }
466 
467 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
468   // Some instructions have operand restrictions beyond what the encoding
469   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
470   // high bit.
471   Val &= 255;
472 
473   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
474 }
475 
476 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
477   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
478 }
479 
480 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
481   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
482 }
483 
484 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
485   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
486 }
487 
488 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
489   // table-gen generated disassembler doesn't care about operand types
490   // leaving only registry class so SSrc_32 operand turns into SReg_32
491   // and therefore we accept immediates and literals here as well
492   return decodeSrcOp(OPW32, Val);
493 }
494 
495 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
496   unsigned Val) const {
497   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
498   return decodeOperand_SReg_32(Val);
499 }
500 
501 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
502   unsigned Val) const {
503   // SReg_32_XM0 is SReg_32 without EXEC_HI
504   return decodeOperand_SReg_32(Val);
505 }
506 
507 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
508   return decodeSrcOp(OPW64, Val);
509 }
510 
511 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
512   return decodeSrcOp(OPW64, Val);
513 }
514 
515 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
516   return decodeSrcOp(OPW128, Val);
517 }
518 
519 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
520   return decodeDstOp(OPW256, Val);
521 }
522 
523 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
524   return decodeDstOp(OPW512, Val);
525 }
526 
527 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
528   // For now all literal constants are supposed to be unsigned integer
529   // ToDo: deal with signed/unsigned 64-bit integer constants
530   // ToDo: deal with float/double constants
531   if (!HasLiteral) {
532     if (Bytes.size() < 4) {
533       return errOperand(0, "cannot read literal, inst bytes left " +
534                         Twine(Bytes.size()));
535     }
536     HasLiteral = true;
537     Literal = eatBytes<uint32_t>(Bytes);
538   }
539   return MCOperand::createImm(Literal);
540 }
541 
542 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
543   using namespace AMDGPU::EncValues;
544 
545   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
546   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
547     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
548     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
549       // Cast prevents negative overflow.
550 }
551 
552 static int64_t getInlineImmVal32(unsigned Imm) {
553   switch (Imm) {
554   case 240:
555     return FloatToBits(0.5f);
556   case 241:
557     return FloatToBits(-0.5f);
558   case 242:
559     return FloatToBits(1.0f);
560   case 243:
561     return FloatToBits(-1.0f);
562   case 244:
563     return FloatToBits(2.0f);
564   case 245:
565     return FloatToBits(-2.0f);
566   case 246:
567     return FloatToBits(4.0f);
568   case 247:
569     return FloatToBits(-4.0f);
570   case 248: // 1 / (2 * PI)
571     return 0x3e22f983;
572   default:
573     llvm_unreachable("invalid fp inline imm");
574   }
575 }
576 
577 static int64_t getInlineImmVal64(unsigned Imm) {
578   switch (Imm) {
579   case 240:
580     return DoubleToBits(0.5);
581   case 241:
582     return DoubleToBits(-0.5);
583   case 242:
584     return DoubleToBits(1.0);
585   case 243:
586     return DoubleToBits(-1.0);
587   case 244:
588     return DoubleToBits(2.0);
589   case 245:
590     return DoubleToBits(-2.0);
591   case 246:
592     return DoubleToBits(4.0);
593   case 247:
594     return DoubleToBits(-4.0);
595   case 248: // 1 / (2 * PI)
596     return 0x3fc45f306dc9c882;
597   default:
598     llvm_unreachable("invalid fp inline imm");
599   }
600 }
601 
602 static int64_t getInlineImmVal16(unsigned Imm) {
603   switch (Imm) {
604   case 240:
605     return 0x3800;
606   case 241:
607     return 0xB800;
608   case 242:
609     return 0x3C00;
610   case 243:
611     return 0xBC00;
612   case 244:
613     return 0x4000;
614   case 245:
615     return 0xC000;
616   case 246:
617     return 0x4400;
618   case 247:
619     return 0xC400;
620   case 248: // 1 / (2 * PI)
621     return 0x3118;
622   default:
623     llvm_unreachable("invalid fp inline imm");
624   }
625 }
626 
627 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
628   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
629       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
630 
631   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
632   switch (Width) {
633   case OPW32:
634     return MCOperand::createImm(getInlineImmVal32(Imm));
635   case OPW64:
636     return MCOperand::createImm(getInlineImmVal64(Imm));
637   case OPW16:
638   case OPWV216:
639     return MCOperand::createImm(getInlineImmVal16(Imm));
640   default:
641     llvm_unreachable("implement me");
642   }
643 }
644 
645 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
646   using namespace AMDGPU;
647 
648   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
649   switch (Width) {
650   default: // fall
651   case OPW32:
652   case OPW16:
653   case OPWV216:
654     return VGPR_32RegClassID;
655   case OPW64: return VReg_64RegClassID;
656   case OPW128: return VReg_128RegClassID;
657   }
658 }
659 
660 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
661   using namespace AMDGPU;
662 
663   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
664   switch (Width) {
665   default: // fall
666   case OPW32:
667   case OPW16:
668   case OPWV216:
669     return SGPR_32RegClassID;
670   case OPW64: return SGPR_64RegClassID;
671   case OPW128: return SGPR_128RegClassID;
672   case OPW256: return SGPR_256RegClassID;
673   case OPW512: return SGPR_512RegClassID;
674   }
675 }
676 
677 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
678   using namespace AMDGPU;
679 
680   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
681   switch (Width) {
682   default: // fall
683   case OPW32:
684   case OPW16:
685   case OPWV216:
686     return TTMP_32RegClassID;
687   case OPW64: return TTMP_64RegClassID;
688   case OPW128: return TTMP_128RegClassID;
689   case OPW256: return TTMP_256RegClassID;
690   case OPW512: return TTMP_512RegClassID;
691   }
692 }
693 
694 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
695   using namespace AMDGPU::EncValues;
696 
697   unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN;
698   unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX;
699 
700   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
701 }
702 
703 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
704   using namespace AMDGPU::EncValues;
705 
706   assert(Val < 512); // enum9
707 
708   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
709     return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
710   }
711   if (Val <= SGPR_MAX) {
712     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
713     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
714   }
715 
716   int TTmpIdx = getTTmpIdx(Val);
717   if (TTmpIdx >= 0) {
718     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
719   }
720 
721   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
722     return decodeIntImmed(Val);
723 
724   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
725     return decodeFPImmed(Width, Val);
726 
727   if (Val == LITERAL_CONST)
728     return decodeLiteralConstant();
729 
730   switch (Width) {
731   case OPW32:
732   case OPW16:
733   case OPWV216:
734     return decodeSpecialReg32(Val);
735   case OPW64:
736     return decodeSpecialReg64(Val);
737   default:
738     llvm_unreachable("unexpected immediate type");
739   }
740 }
741 
742 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
743   using namespace AMDGPU::EncValues;
744 
745   assert(Val < 128);
746   assert(Width == OPW256 || Width == OPW512);
747 
748   if (Val <= SGPR_MAX) {
749     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
750     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
751   }
752 
753   int TTmpIdx = getTTmpIdx(Val);
754   if (TTmpIdx >= 0) {
755     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
756   }
757 
758   llvm_unreachable("unknown dst register");
759 }
760 
761 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
762   using namespace AMDGPU;
763 
764   switch (Val) {
765   case 102: return createRegOperand(FLAT_SCR_LO);
766   case 103: return createRegOperand(FLAT_SCR_HI);
767   case 104: return createRegOperand(XNACK_MASK_LO);
768   case 105: return createRegOperand(XNACK_MASK_HI);
769   case 106: return createRegOperand(VCC_LO);
770   case 107: return createRegOperand(VCC_HI);
771   case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
772   case 109: assert(!isGFX9()); return createRegOperand(TBA_HI);
773   case 110: assert(!isGFX9()); return createRegOperand(TMA_LO);
774   case 111: assert(!isGFX9()); return createRegOperand(TMA_HI);
775   case 124: return createRegOperand(M0);
776   case 126: return createRegOperand(EXEC_LO);
777   case 127: return createRegOperand(EXEC_HI);
778   case 235: return createRegOperand(SRC_SHARED_BASE);
779   case 236: return createRegOperand(SRC_SHARED_LIMIT);
780   case 237: return createRegOperand(SRC_PRIVATE_BASE);
781   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
782     // TODO: SRC_POPS_EXITING_WAVE_ID
783     // ToDo: no support for vccz register
784   case 251: break;
785     // ToDo: no support for execz register
786   case 252: break;
787   case 253: return createRegOperand(SCC);
788   default: break;
789   }
790   return errOperand(Val, "unknown operand encoding " + Twine(Val));
791 }
792 
793 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
794   using namespace AMDGPU;
795 
796   switch (Val) {
797   case 102: return createRegOperand(FLAT_SCR);
798   case 104: return createRegOperand(XNACK_MASK);
799   case 106: return createRegOperand(VCC);
800   case 108: assert(!isGFX9()); return createRegOperand(TBA);
801   case 110: assert(!isGFX9()); return createRegOperand(TMA);
802   case 126: return createRegOperand(EXEC);
803   default: break;
804   }
805   return errOperand(Val, "unknown operand encoding " + Twine(Val));
806 }
807 
808 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
809                                             const unsigned Val) const {
810   using namespace AMDGPU::SDWA;
811   using namespace AMDGPU::EncValues;
812 
813   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
814     // XXX: static_cast<int> is needed to avoid stupid warning:
815     // compare with unsigned is always true
816     if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
817         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
818       return createRegOperand(getVgprClassId(Width),
819                               Val - SDWA9EncValues::SRC_VGPR_MIN);
820     }
821     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
822         Val <= SDWA9EncValues::SRC_SGPR_MAX) {
823       return createSRegOperand(getSgprClassId(Width),
824                                Val - SDWA9EncValues::SRC_SGPR_MIN);
825     }
826     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
827         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
828       return createSRegOperand(getTtmpClassId(Width),
829                                Val - SDWA9EncValues::SRC_TTMP_MIN);
830     }
831 
832     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
833 
834     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
835       return decodeIntImmed(SVal);
836 
837     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
838       return decodeFPImmed(Width, SVal);
839 
840     return decodeSpecialReg32(SVal);
841   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
842     return createRegOperand(getVgprClassId(Width), Val);
843   }
844   llvm_unreachable("unsupported target");
845 }
846 
847 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
848   return decodeSDWASrc(OPW16, Val);
849 }
850 
851 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
852   return decodeSDWASrc(OPW32, Val);
853 }
854 
855 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
856   using namespace AMDGPU::SDWA;
857 
858   assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
859          "SDWAVopcDst should be present only on GFX9");
860   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
861     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
862 
863     int TTmpIdx = getTTmpIdx(Val);
864     if (TTmpIdx >= 0) {
865       return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
866     } else if (Val > AMDGPU::EncValues::SGPR_MAX) {
867       return decodeSpecialReg64(Val);
868     } else {
869       return createSRegOperand(getSgprClassId(OPW64), Val);
870     }
871   } else {
872     return createRegOperand(AMDGPU::VCC);
873   }
874 }
875 
876 bool AMDGPUDisassembler::isVI() const {
877   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
878 }
879 
880 bool AMDGPUDisassembler::isGFX9() const {
881   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
882 }
883 
884 //===----------------------------------------------------------------------===//
885 // AMDGPUSymbolizer
886 //===----------------------------------------------------------------------===//
887 
888 // Try to find symbol name for specified label
889 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
890                                 raw_ostream &/*cStream*/, int64_t Value,
891                                 uint64_t /*Address*/, bool IsBranch,
892                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
893   using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>;
894   using SectionSymbolsTy = std::vector<SymbolInfoTy>;
895 
896   if (!IsBranch) {
897     return false;
898   }
899 
900   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
901   if (!Symbols)
902     return false;
903 
904   auto Result = std::find_if(Symbols->begin(), Symbols->end(),
905                              [Value](const SymbolInfoTy& Val) {
906                                 return std::get<0>(Val) == static_cast<uint64_t>(Value)
907                                     && std::get<2>(Val) == ELF::STT_NOTYPE;
908                              });
909   if (Result != Symbols->end()) {
910     auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
911     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
912     Inst.addOperand(MCOperand::createExpr(Add));
913     return true;
914   }
915   return false;
916 }
917 
918 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
919                                                        int64_t Value,
920                                                        uint64_t Address) {
921   llvm_unreachable("unimplemented");
922 }
923 
924 //===----------------------------------------------------------------------===//
925 // Initialization
926 //===----------------------------------------------------------------------===//
927 
928 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
929                               LLVMOpInfoCallback /*GetOpInfo*/,
930                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
931                               void *DisInfo,
932                               MCContext *Ctx,
933                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
934   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
935 }
936 
937 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
938                                                 const MCSubtargetInfo &STI,
939                                                 MCContext &Ctx) {
940   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
941 }
942 
943 extern "C" void LLVMInitializeAMDGPUDisassembler() {
944   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
945                                          createAMDGPUDisassembler);
946   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
947                                        createAMDGPUSymbolizer);
948 }
949