11a427287SDan Gohman //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 21a427287SDan Gohman // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61a427287SDan Gohman // 71a427287SDan Gohman //===----------------------------------------------------------------------===// 81a427287SDan Gohman /// 91a427287SDan Gohman /// \file 105f8f34e4SAdrian Prantl /// This file is part of the WebAssembly Disassembler. 111a427287SDan Gohman /// 121a427287SDan Gohman /// It contains code to translate the data produced by the decoder into 131a427287SDan Gohman /// MCInsts. 141a427287SDan Gohman /// 151a427287SDan Gohman //===----------------------------------------------------------------------===// 161a427287SDan Gohman 17a263aa25SDavid L. Jones #include "MCTargetDesc/WebAssemblyInstPrinter.h" 181a427287SDan Gohman #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 19c6c42137SRichard Trieu #include "TargetInfo/WebAssemblyTargetInfo.h" 201a427287SDan Gohman #include "llvm/MC/MCContext.h" 21c50b8907SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h" 2216c16827SSam Clegg #include "llvm/MC/MCFixedLenDisassembler.h" 231a427287SDan Gohman #include "llvm/MC/MCInst.h" 241a427287SDan Gohman #include "llvm/MC/MCInstrInfo.h" 251a427287SDan Gohman #include "llvm/MC/MCSubtargetInfo.h" 261a427287SDan Gohman #include "llvm/MC/MCSymbol.h" 272cb27072SThomas Lively #include "llvm/MC/MCSymbolWasm.h" 281a427287SDan Gohman #include "llvm/Support/Endian.h" 2916c16827SSam Clegg #include "llvm/Support/LEB128.h" 301a427287SDan Gohman #include "llvm/Support/TargetRegistry.h" 3116c16827SSam Clegg 321a427287SDan Gohman using namespace llvm; 331a427287SDan Gohman 341a427287SDan Gohman #define DEBUG_TYPE "wasm-disassembler" 351a427287SDan Gohman 3616c16827SSam Clegg using DecodeStatus = MCDisassembler::DecodeStatus; 3716c16827SSam Clegg 3816c16827SSam Clegg #include "WebAssemblyGenDisassemblerTables.inc" 3916c16827SSam Clegg 401a427287SDan Gohman namespace { 4149550663SFangrui Song static constexpr int WebAssemblyInstructionTableSize = 256; 4249550663SFangrui Song 431a427287SDan Gohman class WebAssemblyDisassembler final : public MCDisassembler { 441a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII; 451a427287SDan Gohman 461a427287SDan Gohman DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 471a427287SDan Gohman ArrayRef<uint8_t> Bytes, uint64_t Address, 481a427287SDan Gohman raw_ostream &CStream) const override; 495bd33de9SRonak Chauhan Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 50480a16d5SRonak Chauhan ArrayRef<uint8_t> Bytes, 51480a16d5SRonak Chauhan uint64_t Address, 52f3b762a0SWouter van Oortmerssen raw_ostream &CStream) const override; 531a427287SDan Gohman 541a427287SDan Gohman public: 551a427287SDan Gohman WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 561a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII) 571a427287SDan Gohman : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 581a427287SDan Gohman }; 591a427287SDan Gohman } // end anonymous namespace 601a427287SDan Gohman 611a427287SDan Gohman static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 621a427287SDan Gohman const MCSubtargetInfo &STI, 631a427287SDan Gohman MCContext &Ctx) { 641a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 651a427287SDan Gohman return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 661a427287SDan Gohman } 671a427287SDan Gohman 680dbcb363STom Stellard extern "C" LLVM_EXTERNAL_VISIBILITY void 690dbcb363STom Stellard LLVMInitializeWebAssemblyDisassembler() { 701a427287SDan Gohman // Register the disassembler for each target. 71f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 721a427287SDan Gohman createWebAssemblyDisassembler); 73f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 741a427287SDan Gohman createWebAssemblyDisassembler); 751a427287SDan Gohman } 761a427287SDan Gohman 77fc3163b6SThomas Lively static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 7816c16827SSam Clegg if (Size >= Bytes.size()) 7916c16827SSam Clegg return -1; 8016c16827SSam Clegg auto V = Bytes[Size]; 8116c16827SSam Clegg Size++; 8216c16827SSam Clegg return V; 8316c16827SSam Clegg } 8416c16827SSam Clegg 852faf0794SThomas Lively static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 86f3b762a0SWouter van Oortmerssen bool Signed) { 8716c16827SSam Clegg unsigned N = 0; 8816c16827SSam Clegg const char *Error = nullptr; 892faf0794SThomas Lively Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 9016c16827SSam Clegg Bytes.data() + Bytes.size(), &Error) 912faf0794SThomas Lively : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 922faf0794SThomas Lively Bytes.data() + Bytes.size(), 932faf0794SThomas Lively &Error)); 9416c16827SSam Clegg if (Error) 9516c16827SSam Clegg return false; 9616c16827SSam Clegg Size += N; 972faf0794SThomas Lively return true; 982faf0794SThomas Lively } 992faf0794SThomas Lively 1002faf0794SThomas Lively static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 1012faf0794SThomas Lively ArrayRef<uint8_t> Bytes, bool Signed) { 1022faf0794SThomas Lively int64_t Val; 1032faf0794SThomas Lively if (!nextLEB(Val, Bytes, Size, Signed)) 1042faf0794SThomas Lively return false; 10516c16827SSam Clegg MI.addOperand(MCOperand::createImm(Val)); 10616c16827SSam Clegg return true; 10716c16827SSam Clegg } 10816c16827SSam Clegg 10916c16827SSam Clegg template <typename T> 11022442924SThomas Lively bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 11116c16827SSam Clegg if (Size + sizeof(T) > Bytes.size()) 11216c16827SSam Clegg return false; 113df6770f0SHeejin Ahn T Val = support::endian::read<T, support::endianness::little, 1>( 1140d9f3f7fSWouter van Oortmerssen Bytes.data() + Size); 11516c16827SSam Clegg Size += sizeof(T); 11622442924SThomas Lively if (std::is_floating_point<T>::value) { 117*e5269da9SFangrui Song MI.addOperand( 118*e5269da9SFangrui Song MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 11922442924SThomas Lively } else { 12022442924SThomas Lively MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 12122442924SThomas Lively } 12216c16827SSam Clegg return true; 12316c16827SSam Clegg } 12416c16827SSam Clegg 125480a16d5SRonak Chauhan Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart( 1265bd33de9SRonak Chauhan SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 1275bd33de9SRonak Chauhan uint64_t Address, raw_ostream &CStream) const { 128f3b762a0SWouter van Oortmerssen Size = 0; 129f3b762a0SWouter van Oortmerssen if (Address == 0) { 130f3b762a0SWouter van Oortmerssen // Start of a code section: we're parsing only the function count. 131f3b762a0SWouter van Oortmerssen int64_t FunctionCount; 132f3b762a0SWouter van Oortmerssen if (!nextLEB(FunctionCount, Bytes, Size, false)) 133480a16d5SRonak Chauhan return None; 134f3b762a0SWouter van Oortmerssen outs() << " # " << FunctionCount << " functions in section."; 135f3b762a0SWouter van Oortmerssen } else { 136f3b762a0SWouter van Oortmerssen // Parse the start of a single function. 137f3b762a0SWouter van Oortmerssen int64_t BodySize, LocalEntryCount; 138f3b762a0SWouter van Oortmerssen if (!nextLEB(BodySize, Bytes, Size, false) || 139f3b762a0SWouter van Oortmerssen !nextLEB(LocalEntryCount, Bytes, Size, false)) 140480a16d5SRonak Chauhan return None; 141f3b762a0SWouter van Oortmerssen if (LocalEntryCount) { 142f3b762a0SWouter van Oortmerssen outs() << " .local "; 143f3b762a0SWouter van Oortmerssen for (int64_t I = 0; I < LocalEntryCount; I++) { 144f3b762a0SWouter van Oortmerssen int64_t Count, Type; 145f3b762a0SWouter van Oortmerssen if (!nextLEB(Count, Bytes, Size, false) || 146f3b762a0SWouter van Oortmerssen !nextLEB(Type, Bytes, Size, false)) 147480a16d5SRonak Chauhan return None; 148f3b762a0SWouter van Oortmerssen for (int64_t J = 0; J < Count; J++) { 149f3b762a0SWouter van Oortmerssen if (I || J) 150f3b762a0SWouter van Oortmerssen outs() << ", "; 151f3b762a0SWouter van Oortmerssen outs() << WebAssembly::anyTypeToString(Type); 152f3b762a0SWouter van Oortmerssen } 153f3b762a0SWouter van Oortmerssen } 154f3b762a0SWouter van Oortmerssen } 155f3b762a0SWouter van Oortmerssen } 156f3b762a0SWouter van Oortmerssen outs() << "\n"; 157f3b762a0SWouter van Oortmerssen return MCDisassembler::Success; 158f3b762a0SWouter van Oortmerssen } 159f3b762a0SWouter van Oortmerssen 1601a427287SDan Gohman MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1611a427287SDan Gohman MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 1626fdd6a7bSFangrui Song raw_ostream &CS) const { 16316c16827SSam Clegg CommentStream = &CS; 16416c16827SSam Clegg Size = 0; 165fc3163b6SThomas Lively int Opc = nextByte(Bytes, Size); 16616c16827SSam Clegg if (Opc < 0) 1671a427287SDan Gohman return MCDisassembler::Fail; 16816c16827SSam Clegg const auto *WasmInst = &InstructionTable0[Opc]; 16916c16827SSam Clegg // If this is a prefix byte, indirect to another table. 17016c16827SSam Clegg if (WasmInst->ET == ET_Prefix) { 17116c16827SSam Clegg WasmInst = nullptr; 17216c16827SSam Clegg // Linear search, so far only 2 entries. 17316c16827SSam Clegg for (auto PT = PrefixTable; PT->Table; PT++) { 17416c16827SSam Clegg if (PT->Prefix == Opc) { 17516c16827SSam Clegg WasmInst = PT->Table; 17616c16827SSam Clegg break; 17716c16827SSam Clegg } 17816c16827SSam Clegg } 17916c16827SSam Clegg if (!WasmInst) 18016c16827SSam Clegg return MCDisassembler::Fail; 1812faf0794SThomas Lively int64_t PrefixedOpc; 182f3b762a0SWouter van Oortmerssen if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 18316c16827SSam Clegg return MCDisassembler::Fail; 1842faf0794SThomas Lively if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 1852faf0794SThomas Lively return MCDisassembler::Fail; 1862faf0794SThomas Lively WasmInst += PrefixedOpc; 18716c16827SSam Clegg } 18816c16827SSam Clegg if (WasmInst->ET == ET_Unused) 18916c16827SSam Clegg return MCDisassembler::Fail; 19016c16827SSam Clegg // At this point we must have a valid instruction to decode. 19116c16827SSam Clegg assert(WasmInst->ET == ET_Instruction); 19216c16827SSam Clegg MI.setOpcode(WasmInst->Opcode); 19316c16827SSam Clegg // Parse any operands. 19416c16827SSam Clegg for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 195820c6263SWouter van Oortmerssen auto OT = OperandTable[WasmInst->OperandStart + OPI]; 196820c6263SWouter van Oortmerssen switch (OT) { 19716c16827SSam Clegg // ULEB operands: 19816c16827SSam Clegg case WebAssembly::OPERAND_BASIC_BLOCK: 19916c16827SSam Clegg case WebAssembly::OPERAND_LOCAL: 20016c16827SSam Clegg case WebAssembly::OPERAND_GLOBAL: 20116c16827SSam Clegg case WebAssembly::OPERAND_FUNCTION32: 20269e2797eSPaulo Matos case WebAssembly::OPERAND_TABLE: 20316c16827SSam Clegg case WebAssembly::OPERAND_OFFSET32: 204d9e0bbd1SWouter van Oortmerssen case WebAssembly::OPERAND_OFFSET64: 20516c16827SSam Clegg case WebAssembly::OPERAND_P2ALIGN: 20616c16827SSam Clegg case WebAssembly::OPERAND_TYPEINDEX: 2078a28ce1aSWouter van Oortmerssen case WebAssembly::OPERAND_EVENT: 20816c16827SSam Clegg case MCOI::OPERAND_IMMEDIATE: { 20916c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, false)) 21016c16827SSam Clegg return MCDisassembler::Fail; 21116c16827SSam Clegg break; 21216c16827SSam Clegg } 21316c16827SSam Clegg // SLEB operands: 21416c16827SSam Clegg case WebAssembly::OPERAND_I32IMM: 215ad72f685SWouter van Oortmerssen case WebAssembly::OPERAND_I64IMM: { 21616c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, true)) 21716c16827SSam Clegg return MCDisassembler::Fail; 21816c16827SSam Clegg break; 21916c16827SSam Clegg } 2202cb27072SThomas Lively // block_type operands: 221ad72f685SWouter van Oortmerssen case WebAssembly::OPERAND_SIGNATURE: { 2222cb27072SThomas Lively int64_t Val; 2232cb27072SThomas Lively uint64_t PrevSize = Size; 2242cb27072SThomas Lively if (!nextLEB(Val, Bytes, Size, true)) 225ad72f685SWouter van Oortmerssen return MCDisassembler::Fail; 2262cb27072SThomas Lively if (Val < 0) { 2272cb27072SThomas Lively // Negative values are single septet value types or empty types 2282cb27072SThomas Lively if (Size != PrevSize + 1) { 2292cb27072SThomas Lively MI.addOperand( 2302cb27072SThomas Lively MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 2312cb27072SThomas Lively } else { 2322cb27072SThomas Lively MI.addOperand(MCOperand::createImm(Val & 0x7f)); 2332cb27072SThomas Lively } 2342cb27072SThomas Lively } else { 2352cb27072SThomas Lively // We don't have access to the signature, so create a symbol without one 2362cb27072SThomas Lively MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 2372cb27072SThomas Lively auto *WasmSym = cast<MCSymbolWasm>(Sym); 2382cb27072SThomas Lively WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 2392cb27072SThomas Lively const MCExpr *Expr = MCSymbolRefExpr::create( 2402cb27072SThomas Lively WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 2412cb27072SThomas Lively MI.addOperand(MCOperand::createExpr(Expr)); 2422cb27072SThomas Lively } 243ad72f685SWouter van Oortmerssen break; 244ad72f685SWouter van Oortmerssen } 245107c3a12SAndy Wingo // heap_type operands, for e.g. ref.null: 246107c3a12SAndy Wingo case WebAssembly::OPERAND_HEAPTYPE: { 247107c3a12SAndy Wingo int64_t Val; 248107c3a12SAndy Wingo uint64_t PrevSize = Size; 249107c3a12SAndy Wingo if (!nextLEB(Val, Bytes, Size, true)) 250107c3a12SAndy Wingo return MCDisassembler::Fail; 251107c3a12SAndy Wingo if (Val < 0 && Size == PrevSize + 1) { 252107c3a12SAndy Wingo // The HeapType encoding is like BlockType, in that encodings that 253107c3a12SAndy Wingo // decode as negative values indicate ValTypes. In practice we expect 254107c3a12SAndy Wingo // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here. 255107c3a12SAndy Wingo // 256107c3a12SAndy Wingo // The positive SLEB values are reserved for future expansion and are 257107c3a12SAndy Wingo // expected to be type indices in the typed function references 258107c3a12SAndy Wingo // proposal, and should disassemble as MCSymbolRefExpr as in BlockType 259107c3a12SAndy Wingo // above. 260107c3a12SAndy Wingo MI.addOperand(MCOperand::createImm(Val & 0x7f)); 261107c3a12SAndy Wingo } else { 262107c3a12SAndy Wingo MI.addOperand( 263107c3a12SAndy Wingo MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid))); 264107c3a12SAndy Wingo } 265107c3a12SAndy Wingo break; 266107c3a12SAndy Wingo } 26716c16827SSam Clegg // FP operands. 26816c16827SSam Clegg case WebAssembly::OPERAND_F32IMM: { 26922442924SThomas Lively if (!parseImmediate<float>(MI, Size, Bytes)) 27016c16827SSam Clegg return MCDisassembler::Fail; 27116c16827SSam Clegg break; 27216c16827SSam Clegg } 27316c16827SSam Clegg case WebAssembly::OPERAND_F64IMM: { 27422442924SThomas Lively if (!parseImmediate<double>(MI, Size, Bytes)) 27522442924SThomas Lively return MCDisassembler::Fail; 27622442924SThomas Lively break; 27722442924SThomas Lively } 27822442924SThomas Lively // Vector lane operands (not LEB encoded). 27922442924SThomas Lively case WebAssembly::OPERAND_VEC_I8IMM: { 28022442924SThomas Lively if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 28122442924SThomas Lively return MCDisassembler::Fail; 28222442924SThomas Lively break; 28322442924SThomas Lively } 28422442924SThomas Lively case WebAssembly::OPERAND_VEC_I16IMM: { 28522442924SThomas Lively if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 28622442924SThomas Lively return MCDisassembler::Fail; 28722442924SThomas Lively break; 28822442924SThomas Lively } 28922442924SThomas Lively case WebAssembly::OPERAND_VEC_I32IMM: { 29022442924SThomas Lively if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 29122442924SThomas Lively return MCDisassembler::Fail; 29222442924SThomas Lively break; 29322442924SThomas Lively } 29422442924SThomas Lively case WebAssembly::OPERAND_VEC_I64IMM: { 29522442924SThomas Lively if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 29616c16827SSam Clegg return MCDisassembler::Fail; 29716c16827SSam Clegg break; 29816c16827SSam Clegg } 299820c6263SWouter van Oortmerssen case WebAssembly::OPERAND_BRLIST: { 300820c6263SWouter van Oortmerssen int64_t TargetTableLen; 301820c6263SWouter van Oortmerssen if (!nextLEB(TargetTableLen, Bytes, Size, false)) 302820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 303820c6263SWouter van Oortmerssen for (int64_t I = 0; I < TargetTableLen; I++) { 304820c6263SWouter van Oortmerssen if (!parseLEBImmediate(MI, Size, Bytes, false)) 305820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 306820c6263SWouter van Oortmerssen } 307820c6263SWouter van Oortmerssen // Default case. 308820c6263SWouter van Oortmerssen if (!parseLEBImmediate(MI, Size, Bytes, false)) 309820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 310820c6263SWouter van Oortmerssen break; 311820c6263SWouter van Oortmerssen } 312a733d08dSWouter van Oortmerssen case MCOI::OPERAND_REGISTER: 313a733d08dSWouter van Oortmerssen // The tablegen header currently does not have any register operands since 314a733d08dSWouter van Oortmerssen // we use only the stack (_S) instructions. 315a733d08dSWouter van Oortmerssen // If you hit this that probably means a bad instruction definition in 316a733d08dSWouter van Oortmerssen // tablegen. 317a733d08dSWouter van Oortmerssen llvm_unreachable("Register operand in WebAssemblyDisassembler"); 31816c16827SSam Clegg default: 31916c16827SSam Clegg llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 32016c16827SSam Clegg } 32116c16827SSam Clegg } 32216c16827SSam Clegg return MCDisassembler::Success; 3231a427287SDan Gohman } 324