11a427287SDan Gohman //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
21a427287SDan Gohman //
31a427287SDan Gohman //                     The LLVM Compiler Infrastructure
41a427287SDan Gohman //
51a427287SDan Gohman // This file is distributed under the University of Illinois Open Source
61a427287SDan Gohman // License. See LICENSE.TXT for details.
71a427287SDan Gohman //
81a427287SDan Gohman //===----------------------------------------------------------------------===//
91a427287SDan Gohman ///
101a427287SDan Gohman /// \file
115f8f34e4SAdrian Prantl /// This file is part of the WebAssembly Disassembler.
121a427287SDan Gohman ///
131a427287SDan Gohman /// It contains code to translate the data produced by the decoder into
141a427287SDan Gohman /// MCInsts.
151a427287SDan Gohman ///
161a427287SDan Gohman //===----------------------------------------------------------------------===//
171a427287SDan Gohman 
181a427287SDan Gohman #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
196bda14b3SChandler Carruth #include "WebAssembly.h"
201a427287SDan Gohman #include "llvm/MC/MCContext.h"
21c50b8907SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h"
2216c16827SSam Clegg #include "llvm/MC/MCFixedLenDisassembler.h"
231a427287SDan Gohman #include "llvm/MC/MCInst.h"
241a427287SDan Gohman #include "llvm/MC/MCInstrInfo.h"
251a427287SDan Gohman #include "llvm/MC/MCSubtargetInfo.h"
261a427287SDan Gohman #include "llvm/MC/MCSymbol.h"
271a427287SDan Gohman #include "llvm/Support/Endian.h"
2816c16827SSam Clegg #include "llvm/Support/LEB128.h"
291a427287SDan Gohman #include "llvm/Support/TargetRegistry.h"
3016c16827SSam Clegg 
311a427287SDan Gohman using namespace llvm;
321a427287SDan Gohman 
331a427287SDan Gohman #define DEBUG_TYPE "wasm-disassembler"
341a427287SDan Gohman 
3516c16827SSam Clegg using DecodeStatus = MCDisassembler::DecodeStatus;
3616c16827SSam Clegg 
3716c16827SSam Clegg #include "WebAssemblyGenDisassemblerTables.inc"
3816c16827SSam Clegg 
391a427287SDan Gohman namespace {
401a427287SDan Gohman class WebAssemblyDisassembler final : public MCDisassembler {
411a427287SDan Gohman   std::unique_ptr<const MCInstrInfo> MCII;
421a427287SDan Gohman 
431a427287SDan Gohman   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
441a427287SDan Gohman                               ArrayRef<uint8_t> Bytes, uint64_t Address,
451a427287SDan Gohman                               raw_ostream &VStream,
461a427287SDan Gohman                               raw_ostream &CStream) const override;
471a427287SDan Gohman 
481a427287SDan Gohman public:
491a427287SDan Gohman   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
501a427287SDan Gohman                           std::unique_ptr<const MCInstrInfo> MCII)
511a427287SDan Gohman       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
521a427287SDan Gohman };
531a427287SDan Gohman } // end anonymous namespace
541a427287SDan Gohman 
551a427287SDan Gohman static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
561a427287SDan Gohman                                                      const MCSubtargetInfo &STI,
571a427287SDan Gohman                                                      MCContext &Ctx) {
581a427287SDan Gohman   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
591a427287SDan Gohman   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
601a427287SDan Gohman }
611a427287SDan Gohman 
621a427287SDan Gohman extern "C" void LLVMInitializeWebAssemblyDisassembler() {
631a427287SDan Gohman   // Register the disassembler for each target.
64f42454b9SMehdi Amini   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
651a427287SDan Gohman                                          createWebAssemblyDisassembler);
66f42454b9SMehdi Amini   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
671a427287SDan Gohman                                          createWebAssemblyDisassembler);
681a427287SDan Gohman }
691a427287SDan Gohman 
7016c16827SSam Clegg static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
7116c16827SSam Clegg   if (Size >= Bytes.size())
7216c16827SSam Clegg     return -1;
7316c16827SSam Clegg   auto V = Bytes[Size];
7416c16827SSam Clegg   Size++;
7516c16827SSam Clegg   return V;
7616c16827SSam Clegg }
7716c16827SSam Clegg 
7816c16827SSam Clegg static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
7916c16827SSam Clegg                               ArrayRef<uint8_t> Bytes, bool Signed) {
8016c16827SSam Clegg   unsigned N = 0;
8116c16827SSam Clegg   const char *Error = nullptr;
8216c16827SSam Clegg   auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
8316c16827SSam Clegg                                     Bytes.data() + Bytes.size(), &Error)
8416c16827SSam Clegg                     : static_cast<int64_t>(
8516c16827SSam Clegg                           decodeULEB128(Bytes.data() + Size, &N,
8616c16827SSam Clegg                                         Bytes.data() + Bytes.size(), &Error));
8716c16827SSam Clegg   if (Error)
8816c16827SSam Clegg     return false;
8916c16827SSam Clegg   Size += N;
9016c16827SSam Clegg   MI.addOperand(MCOperand::createImm(Val));
9116c16827SSam Clegg   return true;
9216c16827SSam Clegg }
9316c16827SSam Clegg 
9416c16827SSam Clegg template <typename T>
95*22442924SThomas Lively bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
9616c16827SSam Clegg   if (Size + sizeof(T) > Bytes.size())
9716c16827SSam Clegg     return false;
9816c16827SSam Clegg   T Val;
9916c16827SSam Clegg   memcpy(&Val, Bytes.data() + Size, sizeof(T));
10016c16827SSam Clegg   support::endian::byte_swap<T, support::endianness::little>(Val);
10116c16827SSam Clegg   Size += sizeof(T);
102*22442924SThomas Lively   if (std::is_floating_point<T>::value) {
10316c16827SSam Clegg     MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
104*22442924SThomas Lively   } else {
105*22442924SThomas Lively     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
106*22442924SThomas Lively   }
10716c16827SSam Clegg   return true;
10816c16827SSam Clegg }
10916c16827SSam Clegg 
1101a427287SDan Gohman MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
1111a427287SDan Gohman     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
11216c16827SSam Clegg     raw_ostream & /*OS*/, raw_ostream &CS) const {
11316c16827SSam Clegg   CommentStream = &CS;
11416c16827SSam Clegg   Size = 0;
11516c16827SSam Clegg   auto Opc = nextByte(Bytes, Size);
11616c16827SSam Clegg   if (Opc < 0)
1171a427287SDan Gohman     return MCDisassembler::Fail;
11816c16827SSam Clegg   const auto *WasmInst = &InstructionTable0[Opc];
11916c16827SSam Clegg   // If this is a prefix byte, indirect to another table.
12016c16827SSam Clegg   if (WasmInst->ET == ET_Prefix) {
12116c16827SSam Clegg     WasmInst = nullptr;
12216c16827SSam Clegg     // Linear search, so far only 2 entries.
12316c16827SSam Clegg     for (auto PT = PrefixTable; PT->Table; PT++) {
12416c16827SSam Clegg       if (PT->Prefix == Opc) {
12516c16827SSam Clegg         WasmInst = PT->Table;
12616c16827SSam Clegg         break;
12716c16827SSam Clegg       }
12816c16827SSam Clegg     }
12916c16827SSam Clegg     if (!WasmInst)
13016c16827SSam Clegg       return MCDisassembler::Fail;
13116c16827SSam Clegg     Opc = nextByte(Bytes, Size);
13216c16827SSam Clegg     if (Opc < 0)
13316c16827SSam Clegg       return MCDisassembler::Fail;
13416c16827SSam Clegg     WasmInst += Opc;
13516c16827SSam Clegg   }
13616c16827SSam Clegg   if (WasmInst->ET == ET_Unused)
13716c16827SSam Clegg     return MCDisassembler::Fail;
13816c16827SSam Clegg   // At this point we must have a valid instruction to decode.
13916c16827SSam Clegg   assert(WasmInst->ET == ET_Instruction);
14016c16827SSam Clegg   MI.setOpcode(WasmInst->Opcode);
14116c16827SSam Clegg   // Parse any operands.
14216c16827SSam Clegg   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
14316c16827SSam Clegg     switch (WasmInst->Operands[OPI]) {
14416c16827SSam Clegg     // ULEB operands:
14516c16827SSam Clegg     case WebAssembly::OPERAND_BASIC_BLOCK:
14616c16827SSam Clegg     case WebAssembly::OPERAND_LOCAL:
14716c16827SSam Clegg     case WebAssembly::OPERAND_GLOBAL:
14816c16827SSam Clegg     case WebAssembly::OPERAND_FUNCTION32:
14916c16827SSam Clegg     case WebAssembly::OPERAND_OFFSET32:
15016c16827SSam Clegg     case WebAssembly::OPERAND_P2ALIGN:
15116c16827SSam Clegg     case WebAssembly::OPERAND_TYPEINDEX:
15216c16827SSam Clegg     case MCOI::OPERAND_IMMEDIATE: {
15316c16827SSam Clegg       if (!parseLEBImmediate(MI, Size, Bytes, false))
15416c16827SSam Clegg         return MCDisassembler::Fail;
15516c16827SSam Clegg       break;
15616c16827SSam Clegg     }
15716c16827SSam Clegg     // SLEB operands:
15816c16827SSam Clegg     case WebAssembly::OPERAND_I32IMM:
15916c16827SSam Clegg     case WebAssembly::OPERAND_I64IMM:
16016c16827SSam Clegg     case WebAssembly::OPERAND_SIGNATURE: {
16116c16827SSam Clegg       if (!parseLEBImmediate(MI, Size, Bytes, true))
16216c16827SSam Clegg         return MCDisassembler::Fail;
16316c16827SSam Clegg       break;
16416c16827SSam Clegg     }
16516c16827SSam Clegg     // FP operands.
16616c16827SSam Clegg     case WebAssembly::OPERAND_F32IMM: {
167*22442924SThomas Lively       if (!parseImmediate<float>(MI, Size, Bytes))
16816c16827SSam Clegg         return MCDisassembler::Fail;
16916c16827SSam Clegg       break;
17016c16827SSam Clegg     }
17116c16827SSam Clegg     case WebAssembly::OPERAND_F64IMM: {
172*22442924SThomas Lively       if (!parseImmediate<double>(MI, Size, Bytes))
173*22442924SThomas Lively         return MCDisassembler::Fail;
174*22442924SThomas Lively       break;
175*22442924SThomas Lively     }
176*22442924SThomas Lively     // Vector lane operands (not LEB encoded).
177*22442924SThomas Lively     case WebAssembly::OPERAND_VEC_I8IMM: {
178*22442924SThomas Lively       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
179*22442924SThomas Lively         return MCDisassembler::Fail;
180*22442924SThomas Lively       break;
181*22442924SThomas Lively     }
182*22442924SThomas Lively     case WebAssembly::OPERAND_VEC_I16IMM: {
183*22442924SThomas Lively       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
184*22442924SThomas Lively         return MCDisassembler::Fail;
185*22442924SThomas Lively       break;
186*22442924SThomas Lively     }
187*22442924SThomas Lively     case WebAssembly::OPERAND_VEC_I32IMM: {
188*22442924SThomas Lively       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
189*22442924SThomas Lively         return MCDisassembler::Fail;
190*22442924SThomas Lively       break;
191*22442924SThomas Lively     }
192*22442924SThomas Lively     case WebAssembly::OPERAND_VEC_I64IMM: {
193*22442924SThomas Lively       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
19416c16827SSam Clegg         return MCDisassembler::Fail;
19516c16827SSam Clegg       break;
19616c16827SSam Clegg     }
19716c16827SSam Clegg     case MCOI::OPERAND_REGISTER: {
19816c16827SSam Clegg       // These are NOT actually in the instruction stream, but MC is going to
19916c16827SSam Clegg       // expect operands to be present for them!
20016c16827SSam Clegg       // FIXME: can MC re-generate register assignments or do we have to
20116c16827SSam Clegg       // do this? Since this function decodes a single instruction, we don't
20216c16827SSam Clegg       // have the proper context for tracking an operand stack here.
20316c16827SSam Clegg       MI.addOperand(MCOperand::createReg(0));
20416c16827SSam Clegg       break;
20516c16827SSam Clegg     }
20616c16827SSam Clegg     default:
20716c16827SSam Clegg       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
20816c16827SSam Clegg     }
20916c16827SSam Clegg   }
21016c16827SSam Clegg   return MCDisassembler::Success;
2111a427287SDan Gohman }
212