1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file is part of the WebAssembly Assembler.
12 ///
13 /// It contains code to translate a parsed .s file into MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
19 #include "WebAssembly.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
24 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCSymbol.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/TargetRegistry.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "wasm-asm-parser"
34 
35 namespace {
36 
37 /// WebAssemblyOperand - Instances of this class represent the operands in a
38 /// parsed WASM machine instruction.
39 struct WebAssemblyOperand : public MCParsedAsmOperand {
40   enum KindTy { Token, Integer, Float, Symbol } Kind;
41 
42   SMLoc StartLoc, EndLoc;
43 
44   struct TokOp {
45     StringRef Tok;
46   };
47 
48   struct IntOp {
49     int64_t Val;
50   };
51 
52   struct FltOp {
53     double Val;
54   };
55 
56   struct SymOp {
57     const MCExpr *Exp;
58   };
59 
60   union {
61     struct TokOp Tok;
62     struct IntOp Int;
63     struct FltOp Flt;
64     struct SymOp Sym;
65   };
66 
67   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
68       : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
69   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
70       : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
71   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
72       : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
73   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
74       : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
75 
76   bool isToken() const override { return Kind == Token; }
77   bool isImm() const override {
78     return Kind == Integer || Kind == Float || Kind == Symbol;
79   }
80   bool isMem() const override { return false; }
81   bool isReg() const override { return false; }
82 
83   unsigned getReg() const override {
84     llvm_unreachable("Assembly inspects a register operand");
85     return 0;
86   }
87 
88   StringRef getToken() const {
89     assert(isToken());
90     return Tok.Tok;
91   }
92 
93   SMLoc getStartLoc() const override { return StartLoc; }
94   SMLoc getEndLoc() const override { return EndLoc; }
95 
96   void addRegOperands(MCInst &, unsigned) const {
97     // Required by the assembly matcher.
98     llvm_unreachable("Assembly matcher creates register operands");
99   }
100 
101   void addImmOperands(MCInst &Inst, unsigned N) const {
102     assert(N == 1 && "Invalid number of operands!");
103     if (Kind == Integer)
104       Inst.addOperand(MCOperand::createImm(Int.Val));
105     else if (Kind == Float)
106       Inst.addOperand(MCOperand::createFPImm(Flt.Val));
107     else if (Kind == Symbol)
108       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
109     else
110       llvm_unreachable("Should be immediate or symbol!");
111   }
112 
113   void print(raw_ostream &OS) const override {
114     switch (Kind) {
115     case Token:
116       OS << "Tok:" << Tok.Tok;
117       break;
118     case Integer:
119       OS << "Int:" << Int.Val;
120       break;
121     case Float:
122       OS << "Flt:" << Flt.Val;
123       break;
124     case Symbol:
125       OS << "Sym:" << Sym.Exp;
126       break;
127     }
128   }
129 };
130 
131 class WebAssemblyAsmParser final : public MCTargetAsmParser {
132   MCAsmParser &Parser;
133   MCAsmLexer &Lexer;
134   MCSymbol *LastLabel;
135 
136 public:
137   WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
138                        const MCInstrInfo &mii, const MCTargetOptions &Options)
139       : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
140         Lexer(Parser.getLexer()), LastLabel(nullptr) {
141     setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
142   }
143 
144 #define GET_ASSEMBLER_HEADER
145 #include "WebAssemblyGenAsmMatcher.inc"
146 
147   // TODO: This is required to be implemented, but appears unused.
148   bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
149                      SMLoc & /*EndLoc*/) override {
150     llvm_unreachable("ParseRegister is not implemented.");
151   }
152 
153   bool Error(const StringRef &msg, const AsmToken &tok) {
154     return Parser.Error(tok.getLoc(), msg + tok.getString());
155   }
156 
157   bool IsNext(AsmToken::TokenKind Kind) {
158     auto ok = Lexer.is(Kind);
159     if (ok)
160       Parser.Lex();
161     return ok;
162   }
163 
164   bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
165     if (!IsNext(Kind))
166       return Error(std::string("Expected ") + KindName + ", instead got: ",
167                    Lexer.getTok());
168     return false;
169   }
170 
171   MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
172     // Derive type from .param .local decls, or the instruction itself.
173     return StringSwitch<MVT::SimpleValueType>(RegType)
174         .Case("i32", MVT::i32)
175         .Case("i64", MVT::i64)
176         .Case("f32", MVT::f32)
177         .Case("f64", MVT::f64)
178         .Case("i8x16", MVT::v16i8)
179         .Case("i16x8", MVT::v8i16)
180         .Case("i32x4", MVT::v4i32)
181         .Case("i64x2", MVT::v2i64)
182         .Case("f32x4", MVT::v4f32)
183         .Case("f64x2", MVT::v2f64)
184         // arbitrarily chosen vector type to associate with "v128"
185         // FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
186         // to accept more specific SIMD register types?
187         .Case("v128", MVT::v16i8)
188         .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
189   }
190 
191   void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
192     auto &Int = Lexer.getTok();
193     int64_t Val = Int.getIntVal();
194     if (IsNegative)
195       Val = -Val;
196     Operands.push_back(make_unique<WebAssemblyOperand>(
197         WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
198         WebAssemblyOperand::IntOp{Val}));
199     Parser.Lex();
200   }
201 
202   bool ParseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
203                                        StringRef InstName) {
204     ParseSingleInteger(IsNegative, Operands);
205     // FIXME: there is probably a cleaner way to do this.
206     auto IsLoadStore = InstName.startswith("load") ||
207                        InstName.startswith("store") ||
208                        InstName.startswith("atomic_load") ||
209                        InstName.startswith("atomic_store");
210     if (IsLoadStore) {
211       // Parse load/store operands of the form: offset align
212       auto &Offset = Lexer.getTok();
213       if (Offset.is(AsmToken::Integer)) {
214         ParseSingleInteger(false, Operands);
215       } else {
216         // Alignment not specified.
217         // FIXME: correctly derive a default from the instruction.
218         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
219         // an opcode until after the assembly matcher.
220         Operands.push_back(make_unique<WebAssemblyOperand>(
221             WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
222             WebAssemblyOperand::IntOp{0}));
223       }
224     }
225     return false;
226   }
227 
228   bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
229                         SMLoc NameLoc, OperandVector &Operands) override {
230     Operands.push_back(make_unique<WebAssemblyOperand>(
231         WebAssemblyOperand::Token, NameLoc,
232         SMLoc::getFromPointer(NameLoc.getPointer() + Name.size()),
233         WebAssemblyOperand::TokOp{
234             StringRef(NameLoc.getPointer(), Name.size())}));
235     auto NamePair = Name.split('.');
236     // If no '.', there is no type prefix.
237     if (NamePair.second.empty())
238       std::swap(NamePair.first, NamePair.second);
239     while (Lexer.isNot(AsmToken::EndOfStatement)) {
240       auto &Tok = Lexer.getTok();
241       switch (Tok.getKind()) {
242       case AsmToken::Identifier: {
243         auto &Id = Lexer.getTok();
244         const MCExpr *Val;
245         SMLoc End;
246         if (Parser.parsePrimaryExpr(Val, End))
247           return Error("Cannot parse symbol: ", Lexer.getTok());
248         Operands.push_back(make_unique<WebAssemblyOperand>(
249             WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
250             WebAssemblyOperand::SymOp{Val}));
251         break;
252       }
253       case AsmToken::Minus:
254         Parser.Lex();
255         if (Lexer.isNot(AsmToken::Integer))
256           return Error("Expected integer instead got: ", Lexer.getTok());
257         if (ParseOperandStartingWithInteger(true, Operands, NamePair.second))
258           return true;
259         break;
260       case AsmToken::Integer:
261         if (ParseOperandStartingWithInteger(false, Operands, NamePair.second))
262           return true;
263         break;
264       case AsmToken::Real: {
265         double Val;
266         if (Tok.getString().getAsDouble(Val, false))
267           return Error("Cannot parse real: ", Tok);
268         Operands.push_back(make_unique<WebAssemblyOperand>(
269             WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
270             WebAssemblyOperand::FltOp{Val}));
271         Parser.Lex();
272         break;
273       }
274       default:
275         return Error("Unexpected token in operand: ", Tok);
276       }
277       if (Lexer.isNot(AsmToken::EndOfStatement)) {
278         if (Expect(AsmToken::Comma, ","))
279           return true;
280       }
281     }
282     Parser.Lex();
283     // Block instructions require a signature index, but these are missing in
284     // assembly, so we add a dummy one explicitly (since we have no control
285     // over signature tables here, we assume these will be regenerated when
286     // the wasm module is generated).
287     if (NamePair.second == "block" || NamePair.second == "loop") {
288       Operands.push_back(make_unique<WebAssemblyOperand>(
289           WebAssemblyOperand::Integer, NameLoc, NameLoc,
290           WebAssemblyOperand::IntOp{-1}));
291     }
292     return false;
293   }
294 
295   void onLabelParsed(MCSymbol *Symbol) override { LastLabel = Symbol; }
296 
297   bool ParseDirective(AsmToken DirectiveID) override {
298     assert(DirectiveID.getKind() == AsmToken::Identifier);
299     auto &Out = getStreamer();
300     auto &TOut =
301         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
302     // TODO: we're just parsing the subset of directives we're interested in,
303     // and ignoring ones we don't recognise. We should ideally verify
304     // all directives here.
305     if (DirectiveID.getString() == ".type") {
306       // This could be the start of a function, check if followed by
307       // "label,@function"
308       if (!(IsNext(AsmToken::Identifier) && IsNext(AsmToken::Comma) &&
309             IsNext(AsmToken::At) && Lexer.is(AsmToken::Identifier)))
310         return Error("Expected label,@type declaration, got: ", Lexer.getTok());
311       Parser.Lex();
312       // Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
313     } else if (DirectiveID.getString() == ".param" ||
314                DirectiveID.getString() == ".local") {
315       // Track the number of locals, needed for correct virtual register
316       // assignment elsewhere.
317       // Also output a directive to the streamer.
318       std::vector<MVT> Params;
319       std::vector<MVT> Locals;
320       while (Lexer.is(AsmToken::Identifier)) {
321         auto RegType = ParseRegType(Lexer.getTok().getString());
322         if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE)
323           return true;
324         if (DirectiveID.getString() == ".param") {
325           Params.push_back(RegType);
326         } else {
327           Locals.push_back(RegType);
328         }
329         Parser.Lex();
330         if (!IsNext(AsmToken::Comma))
331           break;
332       }
333       assert(LastLabel);
334       TOut.emitParam(LastLabel, Params);
335       TOut.emitLocal(Locals);
336     } else {
337       // For now, ignore anydirective we don't recognize:
338       while (Lexer.isNot(AsmToken::EndOfStatement))
339         Parser.Lex();
340     }
341     return Expect(AsmToken::EndOfStatement, "EOL");
342   }
343 
344   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
345                                OperandVector &Operands, MCStreamer &Out,
346                                uint64_t &ErrorInfo,
347                                bool MatchingInlineAsm) override {
348     MCInst Inst;
349     unsigned MatchResult =
350         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
351     switch (MatchResult) {
352     case Match_Success: {
353       Out.EmitInstruction(Inst, getSTI());
354       return false;
355     }
356     case Match_MissingFeature:
357       return Parser.Error(
358           IDLoc, "instruction requires a WASM feature not currently enabled");
359     case Match_MnemonicFail:
360       return Parser.Error(IDLoc, "invalid instruction");
361     case Match_NearMisses:
362       return Parser.Error(IDLoc, "ambiguous instruction");
363     case Match_InvalidTiedOperand:
364     case Match_InvalidOperand: {
365       SMLoc ErrorLoc = IDLoc;
366       if (ErrorInfo != ~0ULL) {
367         if (ErrorInfo >= Operands.size())
368           return Parser.Error(IDLoc, "too few operands for instruction");
369         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
370         if (ErrorLoc == SMLoc())
371           ErrorLoc = IDLoc;
372       }
373       return Parser.Error(ErrorLoc, "invalid operand for instruction");
374     }
375     }
376     llvm_unreachable("Implement any new match types added!");
377   }
378 };
379 } // end anonymous namespace
380 
381 // Force static initialization.
382 extern "C" void LLVMInitializeWebAssemblyAsmParser() {
383   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
384   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
385 }
386 
387 #define GET_REGISTER_MATCHER
388 #define GET_MATCHER_IMPLEMENTATION
389 #include "WebAssemblyGenAsmMatcher.inc"
390