11a427287SDan Gohman //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 21a427287SDan Gohman // 31a427287SDan Gohman // The LLVM Compiler Infrastructure 41a427287SDan Gohman // 51a427287SDan Gohman // This file is distributed under the University of Illinois Open Source 61a427287SDan Gohman // License. See LICENSE.TXT for details. 71a427287SDan Gohman // 81a427287SDan Gohman //===----------------------------------------------------------------------===// 91a427287SDan Gohman /// 101a427287SDan Gohman /// \file 115f8f34e4SAdrian Prantl /// This file is part of the WebAssembly Disassembler. 121a427287SDan Gohman /// 131a427287SDan Gohman /// It contains code to translate the data produced by the decoder into 141a427287SDan Gohman /// MCInsts. 151a427287SDan Gohman /// 161a427287SDan Gohman //===----------------------------------------------------------------------===// 171a427287SDan Gohman 181a427287SDan Gohman #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 196bda14b3SChandler Carruth #include "WebAssembly.h" 201a427287SDan Gohman #include "llvm/MC/MCContext.h" 21c50b8907SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h" 2216c16827SSam Clegg #include "llvm/MC/MCFixedLenDisassembler.h" 231a427287SDan Gohman #include "llvm/MC/MCInst.h" 241a427287SDan Gohman #include "llvm/MC/MCInstrInfo.h" 251a427287SDan Gohman #include "llvm/MC/MCSubtargetInfo.h" 261a427287SDan Gohman #include "llvm/MC/MCSymbol.h" 271a427287SDan Gohman #include "llvm/Support/Endian.h" 2816c16827SSam Clegg #include "llvm/Support/LEB128.h" 291a427287SDan Gohman #include "llvm/Support/TargetRegistry.h" 3016c16827SSam Clegg 311a427287SDan Gohman using namespace llvm; 321a427287SDan Gohman 331a427287SDan Gohman #define DEBUG_TYPE "wasm-disassembler" 341a427287SDan Gohman 3516c16827SSam Clegg using DecodeStatus = MCDisassembler::DecodeStatus; 3616c16827SSam Clegg 3716c16827SSam Clegg #include "WebAssemblyGenDisassemblerTables.inc" 3816c16827SSam Clegg 391a427287SDan Gohman namespace { 401a427287SDan Gohman class WebAssemblyDisassembler final : public MCDisassembler { 411a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII; 421a427287SDan Gohman 431a427287SDan Gohman DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 441a427287SDan Gohman ArrayRef<uint8_t> Bytes, uint64_t Address, 451a427287SDan Gohman raw_ostream &VStream, 461a427287SDan Gohman raw_ostream &CStream) const override; 471a427287SDan Gohman 481a427287SDan Gohman public: 491a427287SDan Gohman WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 501a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII) 511a427287SDan Gohman : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 521a427287SDan Gohman }; 531a427287SDan Gohman } // end anonymous namespace 541a427287SDan Gohman 551a427287SDan Gohman static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 561a427287SDan Gohman const MCSubtargetInfo &STI, 571a427287SDan Gohman MCContext &Ctx) { 581a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 591a427287SDan Gohman return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 601a427287SDan Gohman } 611a427287SDan Gohman 621a427287SDan Gohman extern "C" void LLVMInitializeWebAssemblyDisassembler() { 631a427287SDan Gohman // Register the disassembler for each target. 64f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 651a427287SDan Gohman createWebAssemblyDisassembler); 66f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 671a427287SDan Gohman createWebAssemblyDisassembler); 681a427287SDan Gohman } 691a427287SDan Gohman 7016c16827SSam Clegg static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 7116c16827SSam Clegg if (Size >= Bytes.size()) 7216c16827SSam Clegg return -1; 7316c16827SSam Clegg auto V = Bytes[Size]; 7416c16827SSam Clegg Size++; 7516c16827SSam Clegg return V; 7616c16827SSam Clegg } 7716c16827SSam Clegg 7816c16827SSam Clegg static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 7916c16827SSam Clegg ArrayRef<uint8_t> Bytes, bool Signed) { 8016c16827SSam Clegg unsigned N = 0; 8116c16827SSam Clegg const char *Error = nullptr; 8216c16827SSam Clegg auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 8316c16827SSam Clegg Bytes.data() + Bytes.size(), &Error) 8416c16827SSam Clegg : static_cast<int64_t>( 8516c16827SSam Clegg decodeULEB128(Bytes.data() + Size, &N, 8616c16827SSam Clegg Bytes.data() + Bytes.size(), &Error)); 8716c16827SSam Clegg if (Error) 8816c16827SSam Clegg return false; 8916c16827SSam Clegg Size += N; 9016c16827SSam Clegg MI.addOperand(MCOperand::createImm(Val)); 9116c16827SSam Clegg return true; 9216c16827SSam Clegg } 9316c16827SSam Clegg 9416c16827SSam Clegg template <typename T> 95*22442924SThomas Lively bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 9616c16827SSam Clegg if (Size + sizeof(T) > Bytes.size()) 9716c16827SSam Clegg return false; 9816c16827SSam Clegg T Val; 9916c16827SSam Clegg memcpy(&Val, Bytes.data() + Size, sizeof(T)); 10016c16827SSam Clegg support::endian::byte_swap<T, support::endianness::little>(Val); 10116c16827SSam Clegg Size += sizeof(T); 102*22442924SThomas Lively if (std::is_floating_point<T>::value) { 10316c16827SSam Clegg MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 104*22442924SThomas Lively } else { 105*22442924SThomas Lively MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 106*22442924SThomas Lively } 10716c16827SSam Clegg return true; 10816c16827SSam Clegg } 10916c16827SSam Clegg 1101a427287SDan Gohman MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1111a427287SDan Gohman MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 11216c16827SSam Clegg raw_ostream & /*OS*/, raw_ostream &CS) const { 11316c16827SSam Clegg CommentStream = &CS; 11416c16827SSam Clegg Size = 0; 11516c16827SSam Clegg auto Opc = nextByte(Bytes, Size); 11616c16827SSam Clegg if (Opc < 0) 1171a427287SDan Gohman return MCDisassembler::Fail; 11816c16827SSam Clegg const auto *WasmInst = &InstructionTable0[Opc]; 11916c16827SSam Clegg // If this is a prefix byte, indirect to another table. 12016c16827SSam Clegg if (WasmInst->ET == ET_Prefix) { 12116c16827SSam Clegg WasmInst = nullptr; 12216c16827SSam Clegg // Linear search, so far only 2 entries. 12316c16827SSam Clegg for (auto PT = PrefixTable; PT->Table; PT++) { 12416c16827SSam Clegg if (PT->Prefix == Opc) { 12516c16827SSam Clegg WasmInst = PT->Table; 12616c16827SSam Clegg break; 12716c16827SSam Clegg } 12816c16827SSam Clegg } 12916c16827SSam Clegg if (!WasmInst) 13016c16827SSam Clegg return MCDisassembler::Fail; 13116c16827SSam Clegg Opc = nextByte(Bytes, Size); 13216c16827SSam Clegg if (Opc < 0) 13316c16827SSam Clegg return MCDisassembler::Fail; 13416c16827SSam Clegg WasmInst += Opc; 13516c16827SSam Clegg } 13616c16827SSam Clegg if (WasmInst->ET == ET_Unused) 13716c16827SSam Clegg return MCDisassembler::Fail; 13816c16827SSam Clegg // At this point we must have a valid instruction to decode. 13916c16827SSam Clegg assert(WasmInst->ET == ET_Instruction); 14016c16827SSam Clegg MI.setOpcode(WasmInst->Opcode); 14116c16827SSam Clegg // Parse any operands. 14216c16827SSam Clegg for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 14316c16827SSam Clegg switch (WasmInst->Operands[OPI]) { 14416c16827SSam Clegg // ULEB operands: 14516c16827SSam Clegg case WebAssembly::OPERAND_BASIC_BLOCK: 14616c16827SSam Clegg case WebAssembly::OPERAND_LOCAL: 14716c16827SSam Clegg case WebAssembly::OPERAND_GLOBAL: 14816c16827SSam Clegg case WebAssembly::OPERAND_FUNCTION32: 14916c16827SSam Clegg case WebAssembly::OPERAND_OFFSET32: 15016c16827SSam Clegg case WebAssembly::OPERAND_P2ALIGN: 15116c16827SSam Clegg case WebAssembly::OPERAND_TYPEINDEX: 15216c16827SSam Clegg case MCOI::OPERAND_IMMEDIATE: { 15316c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, false)) 15416c16827SSam Clegg return MCDisassembler::Fail; 15516c16827SSam Clegg break; 15616c16827SSam Clegg } 15716c16827SSam Clegg // SLEB operands: 15816c16827SSam Clegg case WebAssembly::OPERAND_I32IMM: 15916c16827SSam Clegg case WebAssembly::OPERAND_I64IMM: 16016c16827SSam Clegg case WebAssembly::OPERAND_SIGNATURE: { 16116c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, true)) 16216c16827SSam Clegg return MCDisassembler::Fail; 16316c16827SSam Clegg break; 16416c16827SSam Clegg } 16516c16827SSam Clegg // FP operands. 16616c16827SSam Clegg case WebAssembly::OPERAND_F32IMM: { 167*22442924SThomas Lively if (!parseImmediate<float>(MI, Size, Bytes)) 16816c16827SSam Clegg return MCDisassembler::Fail; 16916c16827SSam Clegg break; 17016c16827SSam Clegg } 17116c16827SSam Clegg case WebAssembly::OPERAND_F64IMM: { 172*22442924SThomas Lively if (!parseImmediate<double>(MI, Size, Bytes)) 173*22442924SThomas Lively return MCDisassembler::Fail; 174*22442924SThomas Lively break; 175*22442924SThomas Lively } 176*22442924SThomas Lively // Vector lane operands (not LEB encoded). 177*22442924SThomas Lively case WebAssembly::OPERAND_VEC_I8IMM: { 178*22442924SThomas Lively if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 179*22442924SThomas Lively return MCDisassembler::Fail; 180*22442924SThomas Lively break; 181*22442924SThomas Lively } 182*22442924SThomas Lively case WebAssembly::OPERAND_VEC_I16IMM: { 183*22442924SThomas Lively if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 184*22442924SThomas Lively return MCDisassembler::Fail; 185*22442924SThomas Lively break; 186*22442924SThomas Lively } 187*22442924SThomas Lively case WebAssembly::OPERAND_VEC_I32IMM: { 188*22442924SThomas Lively if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 189*22442924SThomas Lively return MCDisassembler::Fail; 190*22442924SThomas Lively break; 191*22442924SThomas Lively } 192*22442924SThomas Lively case WebAssembly::OPERAND_VEC_I64IMM: { 193*22442924SThomas Lively if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 19416c16827SSam Clegg return MCDisassembler::Fail; 19516c16827SSam Clegg break; 19616c16827SSam Clegg } 19716c16827SSam Clegg case MCOI::OPERAND_REGISTER: { 19816c16827SSam Clegg // These are NOT actually in the instruction stream, but MC is going to 19916c16827SSam Clegg // expect operands to be present for them! 20016c16827SSam Clegg // FIXME: can MC re-generate register assignments or do we have to 20116c16827SSam Clegg // do this? Since this function decodes a single instruction, we don't 20216c16827SSam Clegg // have the proper context for tracking an operand stack here. 20316c16827SSam Clegg MI.addOperand(MCOperand::createReg(0)); 20416c16827SSam Clegg break; 20516c16827SSam Clegg } 20616c16827SSam Clegg default: 20716c16827SSam Clegg llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 20816c16827SSam Clegg } 20916c16827SSam Clegg } 21016c16827SSam Clegg return MCDisassembler::Success; 2111a427287SDan Gohman } 212