1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file is part of the WebAssembly Assembler.
12 ///
13 /// It contains code to translate a parsed .s file into MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
19 #include "WebAssembly.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/TargetRegistry.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "wasm-asm-parser"
34 
35 namespace {
36 
37 /// WebAssemblyOperand - Instances of this class represent the operands in a
38 /// parsed WASM machine instruction.
39 struct WebAssemblyOperand : public MCParsedAsmOperand {
40   enum KindTy { Token, Integer, Float, Symbol } Kind;
41 
42   SMLoc StartLoc, EndLoc;
43 
44   struct TokOp {
45     StringRef Tok;
46   };
47 
48   struct IntOp {
49     int64_t Val;
50   };
51 
52   struct FltOp {
53     double Val;
54   };
55 
56   struct SymOp {
57     const MCExpr *Exp;
58   };
59 
60   union {
61     struct TokOp Tok;
62     struct IntOp Int;
63     struct FltOp Flt;
64     struct SymOp Sym;
65   };
66 
67   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
68     : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
69   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
70     : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
71   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
72     : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
73   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
74     : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
75 
76   bool isToken() const override { return Kind == Token; }
77   bool isImm() const override { return Kind == Integer ||
78                                        Kind == Float ||
79                                        Kind == Symbol; }
80   bool isMem() const override { return false; }
81   bool isReg() const override { return false; }
82 
83   unsigned getReg() const override {
84     llvm_unreachable("Assembly inspects a register operand");
85     return 0;
86   }
87 
88   StringRef getToken() const {
89     assert(isToken());
90     return Tok.Tok;
91   }
92 
93   SMLoc getStartLoc() const override { return StartLoc; }
94   SMLoc getEndLoc() const override { return EndLoc; }
95 
96   void addRegOperands(MCInst &, unsigned) const {
97     // Required by the assembly matcher.
98     llvm_unreachable("Assembly matcher creates register operands");
99   }
100 
101   void addImmOperands(MCInst &Inst, unsigned N) const {
102     assert(N == 1 && "Invalid number of operands!");
103     if (Kind == Integer)
104       Inst.addOperand(MCOperand::createImm(Int.Val));
105     else if (Kind == Float)
106       Inst.addOperand(MCOperand::createFPImm(Flt.Val));
107     else if (Kind == Symbol)
108       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
109     else
110       llvm_unreachable("Should be immediate or symbol!");
111   }
112 
113   void print(raw_ostream &OS) const override {
114     switch (Kind) {
115     case Token:
116       OS << "Tok:" << Tok.Tok;
117       break;
118     case Integer:
119       OS << "Int:" << Int.Val;
120       break;
121     case Float:
122       OS << "Flt:" << Flt.Val;
123       break;
124     case Symbol:
125       OS << "Sym:" << Sym.Exp;
126       break;
127     }
128   }
129 };
130 
131 class WebAssemblyAsmParser final : public MCTargetAsmParser {
132   MCAsmParser &Parser;
133   MCAsmLexer &Lexer;
134   MCSymbol *LastLabel;
135 
136 public:
137   WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
138                        const MCInstrInfo &mii, const MCTargetOptions &Options)
139       : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
140         Lexer(Parser.getLexer()), LastLabel(nullptr) {
141     setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
142   }
143 
144 #define GET_ASSEMBLER_HEADER
145 #include "WebAssemblyGenAsmMatcher.inc"
146 
147   // TODO: This is required to be implemented, but appears unused.
148   bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/,
149                      SMLoc &/*EndLoc*/) override {
150     llvm_unreachable("ParseRegister is not implemented.");
151   }
152 
153   bool Error(const StringRef &msg, const AsmToken &tok) {
154     return Parser.Error(tok.getLoc(), msg + tok.getString());
155   }
156 
157   bool IsNext(AsmToken::TokenKind Kind) {
158     auto ok = Lexer.is(Kind);
159     if (ok) Parser.Lex();
160     return ok;
161   }
162 
163   bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
164     if (!IsNext(Kind))
165       return Error(std::string("Expected ") + KindName + ", instead got: ",
166                    Lexer.getTok());
167     return false;
168   }
169 
170   MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
171     // Derive type from .param .local decls, or the instruction itself.
172     return StringSwitch<MVT::SimpleValueType>(RegType)
173         .Case("i32", MVT::i32)
174         .Case("i64", MVT::i64)
175         .Case("f32", MVT::f32)
176         .Case("f64", MVT::f64)
177         .Case("i8x16", MVT::v16i8)
178         .Case("i16x8", MVT::v8i16)
179         .Case("i32x4", MVT::v4i32)
180         .Case("i64x2", MVT::v2i64)
181         .Case("f32x4", MVT::v4f32)
182         .Case("f64x2", MVT::v2f64)
183         // arbitrarily chosen vector type to associate with "v128"
184         // FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
185         // to accept more specific SIMD register types?
186         .Case("v128", MVT::v16i8)
187         .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
188   }
189 
190   void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
191     auto &Int = Lexer.getTok();
192     int64_t Val = Int.getIntVal();
193     if (IsNegative) Val = -Val;
194     Operands.push_back(make_unique<WebAssemblyOperand>(
195                          WebAssemblyOperand::Integer, Int.getLoc(),
196                          Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
197     Parser.Lex();
198   }
199 
200   bool ParseOperandStartingWithInteger(bool IsNegative,
201                                        OperandVector &Operands,
202                                        StringRef InstName) {
203     ParseSingleInteger(IsNegative, Operands);
204     // FIXME: there is probably a cleaner way to do this.
205     auto IsLoadStore = InstName.startswith("load") ||
206                        InstName.startswith("store") ||
207                        InstName.startswith("atomic_load") ||
208                        InstName.startswith("atomic_store");
209     if (IsLoadStore) {
210       // Parse load/store operands of the form: offset align
211       auto &Offset = Lexer.getTok();
212       if (Offset.is(AsmToken::Integer)) {
213         ParseSingleInteger(false, Operands);
214       } else {
215         // Alignment not specified.
216         // FIXME: correctly derive a default from the instruction.
217         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
218         // an opcode until after the assembly matcher.
219         Operands.push_back(make_unique<WebAssemblyOperand>(
220                              WebAssemblyOperand::Integer, Offset.getLoc(),
221                              Offset.getEndLoc(), WebAssemblyOperand::IntOp{0}));
222       }
223     }
224     return false;
225   }
226 
227   bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name,
228                         SMLoc NameLoc, OperandVector &Operands) override {
229     Operands.push_back(
230           make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc,
231                                           SMLoc::getFromPointer(
232                                             NameLoc.getPointer() + Name.size()),
233                                           WebAssemblyOperand::TokOp{
234                                             StringRef(NameLoc.getPointer(),
235                                                     Name.size())}));
236     auto NamePair = Name.split('.');
237     // If no '.', there is no type prefix.
238     if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second);
239     while (Lexer.isNot(AsmToken::EndOfStatement)) {
240       auto &Tok = Lexer.getTok();
241       switch (Tok.getKind()) {
242       case AsmToken::Identifier: {
243         auto &Id = Lexer.getTok();
244         const MCExpr *Val;
245         SMLoc End;
246         if (Parser.parsePrimaryExpr(Val, End))
247           return Error("Cannot parse symbol: ", Lexer.getTok());
248         Operands.push_back(make_unique<WebAssemblyOperand>(
249                              WebAssemblyOperand::Symbol, Id.getLoc(),
250                              Id.getEndLoc(), WebAssemblyOperand::SymOp{Val}));
251         break;
252       }
253       case AsmToken::Minus:
254         Parser.Lex();
255         if (Lexer.isNot(AsmToken::Integer))
256           return Error("Expected integer instead got: ", Lexer.getTok());
257         if (ParseOperandStartingWithInteger(true, Operands, NamePair.second))
258           return true;
259         break;
260       case AsmToken::Integer:
261         if (ParseOperandStartingWithInteger(false, Operands, NamePair.second))
262           return true;
263         break;
264       case AsmToken::Real: {
265         double Val;
266         if (Tok.getString().getAsDouble(Val, false))
267           return Error("Cannot parse real: ", Tok);
268         Operands.push_back(make_unique<WebAssemblyOperand>(
269                              WebAssemblyOperand::Float, Tok.getLoc(),
270                              Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
271         Parser.Lex();
272         break;
273       }
274       default:
275         return Error("Unexpected token in operand: ", Tok);
276       }
277       if (Lexer.isNot(AsmToken::EndOfStatement)) {
278         if (Expect(AsmToken::Comma, ",")) return true;
279       }
280     }
281     Parser.Lex();
282     // Block instructions require a signature index, but these are missing in
283     // assembly, so we add a dummy one explicitly (since we have no control
284     // over signature tables here, we assume these will be regenerated when
285     // the wasm module is generated).
286     if (NamePair.second == "block" || NamePair.second == "loop") {
287       Operands.push_back(make_unique<WebAssemblyOperand>(
288                            WebAssemblyOperand::Integer, NameLoc,
289                            NameLoc, WebAssemblyOperand::IntOp{-1}));
290     }
291     return false;
292   }
293 
294   void onLabelParsed(MCSymbol *Symbol) override {
295     LastLabel = Symbol;
296   }
297 
298   bool ParseDirective(AsmToken DirectiveID) override {
299     assert(DirectiveID.getKind() == AsmToken::Identifier);
300     auto &Out = getStreamer();
301     auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
302                    *Out.getTargetStreamer());
303     // TODO: we're just parsing the subset of directives we're interested in,
304     // and ignoring ones we don't recognise. We should ideally verify
305     // all directives here.
306     if (DirectiveID.getString() == ".type") {
307       // This could be the start of a function, check if followed by
308       // "label,@function"
309       if (!(IsNext(AsmToken::Identifier) &&
310             IsNext(AsmToken::Comma) &&
311             IsNext(AsmToken::At) &&
312             Lexer.is(AsmToken::Identifier)))
313         return Error("Expected label,@type declaration, got: ", Lexer.getTok());
314       Parser.Lex();
315       //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
316     } else if (DirectiveID.getString() == ".param" ||
317                DirectiveID.getString() == ".local") {
318       // Track the number of locals, needed for correct virtual register
319       // assignment elsewhere.
320       // Also output a directive to the streamer.
321       std::vector<MVT> Params;
322       std::vector<MVT> Locals;
323       while (Lexer.is(AsmToken::Identifier)) {
324         auto RegType = ParseRegType(Lexer.getTok().getString());
325         if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true;
326         if (DirectiveID.getString() == ".param") {
327           Params.push_back(RegType);
328         } else {
329           Locals.push_back(RegType);
330         }
331         Parser.Lex();
332         if (!IsNext(AsmToken::Comma)) break;
333       }
334       assert(LastLabel);
335       TOut.emitParam(LastLabel, Params);
336       TOut.emitLocal(Locals);
337     } else {
338       // For now, ignore anydirective we don't recognize:
339       while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex();
340     }
341     return Expect(AsmToken::EndOfStatement, "EOL");
342   }
343 
344   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/,
345                                OperandVector &Operands,
346                                MCStreamer &Out, uint64_t &ErrorInfo,
347                                bool MatchingInlineAsm) override {
348     MCInst Inst;
349     unsigned MatchResult =
350         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
351     switch (MatchResult) {
352     case Match_Success: {
353       Out.EmitInstruction(Inst, getSTI());
354       return false;
355     }
356     case Match_MissingFeature:
357       return Parser.Error(IDLoc,
358           "instruction requires a WASM feature not currently enabled");
359     case Match_MnemonicFail:
360       return Parser.Error(IDLoc, "invalid instruction");
361     case Match_NearMisses:
362       return Parser.Error(IDLoc, "ambiguous instruction");
363     case Match_InvalidTiedOperand:
364     case Match_InvalidOperand: {
365       SMLoc ErrorLoc = IDLoc;
366       if (ErrorInfo != ~0ULL) {
367         if (ErrorInfo >= Operands.size())
368           return Parser.Error(IDLoc, "too few operands for instruction");
369         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
370         if (ErrorLoc == SMLoc())
371           ErrorLoc = IDLoc;
372       }
373       return Parser.Error(ErrorLoc, "invalid operand for instruction");
374     }
375     }
376     llvm_unreachable("Implement any new match types added!");
377   }
378 };
379 } // end anonymous namespace
380 
381 // Force static initialization.
382 extern "C" void LLVMInitializeWebAssemblyAsmParser() {
383   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
384   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
385 }
386 
387 #define GET_REGISTER_MATCHER
388 #define GET_MATCHER_IMPLEMENTATION
389 #include "WebAssemblyGenAsmMatcher.inc"
390