1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file is part of the WebAssembly Assembler. 12 /// 13 /// It contains code to translate a parsed .s file into MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h" 19 #include "WebAssembly.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCInst.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 24 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 25 #include "llvm/MC/MCStreamer.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/MC/MCSymbol.h" 28 #include "llvm/MC/MCSymbolWasm.h" 29 #include "llvm/Support/Endian.h" 30 #include "llvm/Support/TargetRegistry.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "wasm-asm-parser" 35 36 namespace { 37 38 /// WebAssemblyOperand - Instances of this class represent the operands in a 39 /// parsed WASM machine instruction. 40 struct WebAssemblyOperand : public MCParsedAsmOperand { 41 enum KindTy { Token, Integer, Float, Symbol, BrList } Kind; 42 43 SMLoc StartLoc, EndLoc; 44 45 struct TokOp { 46 StringRef Tok; 47 }; 48 49 struct IntOp { 50 int64_t Val; 51 }; 52 53 struct FltOp { 54 double Val; 55 }; 56 57 struct SymOp { 58 const MCExpr *Exp; 59 }; 60 61 struct BrLOp { 62 std::vector<unsigned> List; 63 }; 64 65 union { 66 struct TokOp Tok; 67 struct IntOp Int; 68 struct FltOp Flt; 69 struct SymOp Sym; 70 struct BrLOp BrL; 71 }; 72 73 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T) 74 : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {} 75 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I) 76 : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {} 77 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F) 78 : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {} 79 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S) 80 : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {} 81 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End) 82 : Kind(K), StartLoc(Start), EndLoc(End), BrL() {} 83 84 ~WebAssemblyOperand() { 85 if (isBrList()) 86 BrL.~BrLOp(); 87 } 88 89 bool isToken() const override { return Kind == Token; } 90 bool isImm() const override { 91 return Kind == Integer || Kind == Float || Kind == Symbol; 92 } 93 bool isMem() const override { return false; } 94 bool isReg() const override { return false; } 95 bool isBrList() const { return Kind == BrList; } 96 97 unsigned getReg() const override { 98 llvm_unreachable("Assembly inspects a register operand"); 99 return 0; 100 } 101 102 StringRef getToken() const { 103 assert(isToken()); 104 return Tok.Tok; 105 } 106 107 SMLoc getStartLoc() const override { return StartLoc; } 108 SMLoc getEndLoc() const override { return EndLoc; } 109 110 void addRegOperands(MCInst &, unsigned) const { 111 // Required by the assembly matcher. 112 llvm_unreachable("Assembly matcher creates register operands"); 113 } 114 115 void addImmOperands(MCInst &Inst, unsigned N) const { 116 assert(N == 1 && "Invalid number of operands!"); 117 if (Kind == Integer) 118 Inst.addOperand(MCOperand::createImm(Int.Val)); 119 else if (Kind == Float) 120 Inst.addOperand(MCOperand::createFPImm(Flt.Val)); 121 else if (Kind == Symbol) 122 Inst.addOperand(MCOperand::createExpr(Sym.Exp)); 123 else 124 llvm_unreachable("Should be immediate or symbol!"); 125 } 126 127 void addBrListOperands(MCInst &Inst, unsigned N) const { 128 assert(N == 1 && isBrList() && "Invalid BrList!"); 129 for (auto Br : BrL.List) 130 Inst.addOperand(MCOperand::createImm(Br)); 131 } 132 133 void print(raw_ostream &OS) const override { 134 switch (Kind) { 135 case Token: 136 OS << "Tok:" << Tok.Tok; 137 break; 138 case Integer: 139 OS << "Int:" << Int.Val; 140 break; 141 case Float: 142 OS << "Flt:" << Flt.Val; 143 break; 144 case Symbol: 145 OS << "Sym:" << Sym.Exp; 146 break; 147 case BrList: 148 OS << "BrList:" << BrL.List.size(); 149 break; 150 } 151 } 152 }; 153 154 class WebAssemblyAsmParser final : public MCTargetAsmParser { 155 MCAsmParser &Parser; 156 MCAsmLexer &Lexer; 157 158 // Much like WebAssemblyAsmPrinter in the backend, we have to own these. 159 std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; 160 161 // Order of labels, directives and instructions in a .s file have no 162 // syntactical enforcement. This class is a callback from the actual parser, 163 // and yet we have to be feeding data to the streamer in a very particular 164 // order to ensure a correct binary encoding that matches the regular backend 165 // (the streamer does not enforce this). This "state machine" enum helps 166 // guarantee that correct order. 167 enum ParserState { 168 FileStart, 169 Label, 170 FunctionStart, 171 FunctionLocals, 172 Instructions, 173 } CurrentState = FileStart; 174 175 // We track this to see if a .functype following a label is the same, 176 // as this is how we recognize the start of a function. 177 MCSymbol *LastLabel = nullptr; 178 179 public: 180 WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, 181 const MCInstrInfo &MII, const MCTargetOptions &Options) 182 : MCTargetAsmParser(Options, STI, MII), Parser(Parser), 183 Lexer(Parser.getLexer()) { 184 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 185 } 186 187 void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) { 188 Signatures.push_back(std::move(Sig)); 189 } 190 191 #define GET_ASSEMBLER_HEADER 192 #include "WebAssemblyGenAsmMatcher.inc" 193 194 // TODO: This is required to be implemented, but appears unused. 195 bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/, 196 SMLoc & /*EndLoc*/) override { 197 llvm_unreachable("ParseRegister is not implemented."); 198 } 199 200 bool error(const StringRef &Msg, const AsmToken &Tok) { 201 return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); 202 } 203 204 bool isNext(AsmToken::TokenKind Kind) { 205 auto Ok = Lexer.is(Kind); 206 if (Ok) 207 Parser.Lex(); 208 return Ok; 209 } 210 211 bool expect(AsmToken::TokenKind Kind, const char *KindName) { 212 if (!isNext(Kind)) 213 return error(std::string("Expected ") + KindName + ", instead got: ", 214 Lexer.getTok()); 215 return false; 216 } 217 218 StringRef expectIdent() { 219 if (!Lexer.is(AsmToken::Identifier)) { 220 error("Expected identifier, got: ", Lexer.getTok()); 221 return StringRef(); 222 } 223 auto Name = Lexer.getTok().getString(); 224 Parser.Lex(); 225 return Name; 226 } 227 228 Optional<wasm::ValType> parseType(const StringRef &Type) { 229 // FIXME: can't use StringSwitch because wasm::ValType doesn't have a 230 // "invalid" value. 231 if (Type == "i32") 232 return wasm::ValType::I32; 233 if (Type == "i64") 234 return wasm::ValType::I64; 235 if (Type == "f32") 236 return wasm::ValType::F32; 237 if (Type == "f64") 238 return wasm::ValType::F64; 239 if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || 240 Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || 241 Type == "f64x2") 242 return wasm::ValType::V128; 243 return Optional<wasm::ValType>(); 244 } 245 246 bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) { 247 while (Lexer.is(AsmToken::Identifier)) { 248 auto Type = parseType(Lexer.getTok().getString()); 249 if (!Type) 250 return true; 251 Types.push_back(Type.getValue()); 252 Parser.Lex(); 253 if (!isNext(AsmToken::Comma)) 254 break; 255 } 256 return false; 257 } 258 259 void parseSingleInteger(bool IsNegative, OperandVector &Operands) { 260 auto &Int = Lexer.getTok(); 261 int64_t Val = Int.getIntVal(); 262 if (IsNegative) 263 Val = -Val; 264 Operands.push_back(make_unique<WebAssemblyOperand>( 265 WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), 266 WebAssemblyOperand::IntOp{Val})); 267 Parser.Lex(); 268 } 269 270 bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands, 271 StringRef InstName) { 272 parseSingleInteger(IsNegative, Operands); 273 // FIXME: there is probably a cleaner way to do this. 274 auto IsLoadStore = InstName.startswith("load") || 275 InstName.startswith("store") || 276 InstName.startswith("atomic_load") || 277 InstName.startswith("atomic_store"); 278 if (IsLoadStore) { 279 // Parse load/store operands of the form: offset align 280 auto &Offset = Lexer.getTok(); 281 if (Offset.is(AsmToken::Integer)) { 282 parseSingleInteger(false, Operands); 283 } else { 284 // Alignment not specified. 285 // FIXME: correctly derive a default from the instruction. 286 // We can't just call WebAssembly::GetDefaultP2Align since we don't have 287 // an opcode until after the assembly matcher. 288 Operands.push_back(make_unique<WebAssemblyOperand>( 289 WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(), 290 WebAssemblyOperand::IntOp{0})); 291 } 292 } 293 return false; 294 } 295 296 bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, 297 SMLoc NameLoc, OperandVector &Operands) override { 298 // Note: Name does NOT point into the sourcecode, but to a local, so 299 // use NameLoc instead. 300 Name = StringRef(NameLoc.getPointer(), Name.size()); 301 302 // WebAssembly has instructions with / in them, which AsmLexer parses 303 // as seperate tokens, so if we find such tokens immediately adjacent (no 304 // whitespace), expand the name to include them: 305 for (;;) { 306 auto &Sep = Lexer.getTok(); 307 if (Sep.getLoc().getPointer() != Name.end() || 308 Sep.getKind() != AsmToken::Slash) 309 break; 310 // Extend name with / 311 Name = StringRef(Name.begin(), Name.size() + Sep.getString().size()); 312 Parser.Lex(); 313 // We must now find another identifier, or error. 314 auto &Id = Lexer.getTok(); 315 if (Id.getKind() != AsmToken::Identifier || 316 Id.getLoc().getPointer() != Name.end()) 317 return error("Incomplete instruction name: ", Id); 318 Name = StringRef(Name.begin(), Name.size() + Id.getString().size()); 319 Parser.Lex(); 320 } 321 322 // Now construct the name as first operand. 323 Operands.push_back(make_unique<WebAssemblyOperand>( 324 WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), 325 WebAssemblyOperand::TokOp{Name})); 326 auto NamePair = Name.split('.'); 327 // If no '.', there is no type prefix. 328 auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second; 329 330 while (Lexer.isNot(AsmToken::EndOfStatement)) { 331 auto &Tok = Lexer.getTok(); 332 switch (Tok.getKind()) { 333 case AsmToken::Identifier: { 334 auto &Id = Lexer.getTok(); 335 const MCExpr *Val; 336 SMLoc End; 337 if (Parser.parsePrimaryExpr(Val, End)) 338 return error("Cannot parse symbol: ", Lexer.getTok()); 339 Operands.push_back(make_unique<WebAssemblyOperand>( 340 WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), 341 WebAssemblyOperand::SymOp{Val})); 342 break; 343 } 344 case AsmToken::Minus: 345 Parser.Lex(); 346 if (Lexer.isNot(AsmToken::Integer)) 347 return error("Expected integer instead got: ", Lexer.getTok()); 348 if (parseOperandStartingWithInteger(true, Operands, BaseName)) 349 return true; 350 break; 351 case AsmToken::Integer: 352 if (parseOperandStartingWithInteger(false, Operands, BaseName)) 353 return true; 354 break; 355 case AsmToken::Real: { 356 double Val; 357 if (Tok.getString().getAsDouble(Val, false)) 358 return error("Cannot parse real: ", Tok); 359 Operands.push_back(make_unique<WebAssemblyOperand>( 360 WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(), 361 WebAssemblyOperand::FltOp{Val})); 362 Parser.Lex(); 363 break; 364 } 365 case AsmToken::LCurly: { 366 Parser.Lex(); 367 auto Op = make_unique<WebAssemblyOperand>( 368 WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc()); 369 if (!Lexer.is(AsmToken::RCurly)) 370 for (;;) { 371 Op->BrL.List.push_back(Lexer.getTok().getIntVal()); 372 expect(AsmToken::Integer, "integer"); 373 if (!isNext(AsmToken::Comma)) 374 break; 375 } 376 expect(AsmToken::RCurly, "}"); 377 Operands.push_back(std::move(Op)); 378 break; 379 } 380 default: 381 return error("Unexpected token in operand: ", Tok); 382 } 383 if (Lexer.isNot(AsmToken::EndOfStatement)) { 384 if (expect(AsmToken::Comma, ",")) 385 return true; 386 } 387 } 388 Parser.Lex(); 389 390 // Block instructions require a signature index, but these are missing in 391 // assembly, so we add a dummy one explicitly (since we have no control 392 // over signature tables here, we assume these will be regenerated when 393 // the wasm module is generated). 394 if (BaseName == "block" || BaseName == "loop" || BaseName == "try") { 395 Operands.push_back(make_unique<WebAssemblyOperand>( 396 WebAssemblyOperand::Integer, NameLoc, NameLoc, 397 WebAssemblyOperand::IntOp{-1})); 398 } 399 return false; 400 } 401 402 void onLabelParsed(MCSymbol *Symbol) override { 403 LastLabel = Symbol; 404 CurrentState = Label; 405 } 406 407 bool parseSignature(wasm::WasmSignature *Signature) { 408 if (expect(AsmToken::LParen, "(")) 409 return true; 410 if (parseRegTypeList(Signature->Params)) 411 return true; 412 if (expect(AsmToken::RParen, ")")) 413 return true; 414 if (expect(AsmToken::MinusGreater, "->")) 415 return true; 416 if (expect(AsmToken::LParen, "(")) 417 return true; 418 if (parseRegTypeList(Signature->Returns)) 419 return true; 420 if (expect(AsmToken::RParen, ")")) 421 return true; 422 return false; 423 } 424 425 // This function processes wasm-specific directives streamed to 426 // WebAssemblyTargetStreamer, all others go to the generic parser 427 // (see WasmAsmParser). 428 bool ParseDirective(AsmToken DirectiveID) override { 429 // This function has a really weird return value behavior that is different 430 // from all the other parsing functions: 431 // - return true && no tokens consumed -> don't know this directive / let 432 // the generic parser handle it. 433 // - return true && tokens consumed -> a parsing error occurred. 434 // - return false -> processed this directive successfully. 435 assert(DirectiveID.getKind() == AsmToken::Identifier); 436 auto &Out = getStreamer(); 437 auto &TOut = 438 reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer()); 439 440 // TODO: any time we return an error, at least one token must have been 441 // consumed, otherwise this will not signal an error to the caller. 442 if (DirectiveID.getString() == ".globaltype") { 443 auto SymName = expectIdent(); 444 if (SymName.empty()) 445 return true; 446 if (expect(AsmToken::Comma, ",")) 447 return true; 448 auto TypeTok = Lexer.getTok(); 449 auto TypeName = expectIdent(); 450 if (TypeName.empty()) 451 return true; 452 auto Type = parseType(TypeName); 453 if (!Type) 454 return error("Unknown type in .globaltype directive: ", TypeTok); 455 // Now set this symbol with the correct type. 456 auto WasmSym = cast<MCSymbolWasm>( 457 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 458 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); 459 WasmSym->setGlobalType( 460 wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); 461 // And emit the directive again. 462 TOut.emitGlobalType(WasmSym); 463 return expect(AsmToken::EndOfStatement, "EOL"); 464 } 465 466 if (DirectiveID.getString() == ".functype") { 467 // This code has to send things to the streamer similar to 468 // WebAssemblyAsmPrinter::EmitFunctionBodyStart. 469 // TODO: would be good to factor this into a common function, but the 470 // assembler and backend really don't share any common code, and this code 471 // parses the locals seperately. 472 auto SymName = expectIdent(); 473 if (SymName.empty()) 474 return true; 475 auto WasmSym = cast<MCSymbolWasm>( 476 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 477 if (CurrentState == Label && WasmSym == LastLabel) { 478 // This .functype indicates a start of a function. 479 CurrentState = FunctionStart; 480 } 481 auto Signature = make_unique<wasm::WasmSignature>(); 482 if (parseSignature(Signature.get())) 483 return true; 484 WasmSym->setSignature(Signature.get()); 485 addSignature(std::move(Signature)); 486 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 487 TOut.emitFunctionType(WasmSym); 488 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. 489 return expect(AsmToken::EndOfStatement, "EOL"); 490 } 491 492 if (DirectiveID.getString() == ".eventtype") { 493 auto SymName = expectIdent(); 494 if (SymName.empty()) 495 return true; 496 auto WasmSym = cast<MCSymbolWasm>( 497 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 498 auto Signature = make_unique<wasm::WasmSignature>(); 499 if (parseRegTypeList(Signature->Params)) 500 return true; 501 WasmSym->setSignature(Signature.get()); 502 addSignature(std::move(Signature)); 503 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT); 504 TOut.emitEventType(WasmSym); 505 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. 506 return expect(AsmToken::EndOfStatement, "EOL"); 507 } 508 509 if (DirectiveID.getString() == ".local") { 510 if (CurrentState != FunctionStart) 511 return error(".local directive should follow the start of a function", 512 Lexer.getTok()); 513 SmallVector<wasm::ValType, 4> Locals; 514 if (parseRegTypeList(Locals)) 515 return true; 516 TOut.emitLocal(Locals); 517 CurrentState = FunctionLocals; 518 return expect(AsmToken::EndOfStatement, "EOL"); 519 } 520 521 return true; // We didn't process this directive. 522 } 523 524 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, 525 OperandVector &Operands, MCStreamer &Out, 526 uint64_t &ErrorInfo, 527 bool MatchingInlineAsm) override { 528 MCInst Inst; 529 unsigned MatchResult = 530 MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); 531 switch (MatchResult) { 532 case Match_Success: { 533 if (CurrentState == FunctionStart) { 534 // This is the first instruction in a function, but we haven't seen 535 // a .local directive yet. The streamer requires locals to be encoded 536 // as a prelude to the instructions, so emit an empty list of locals 537 // here. 538 auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>( 539 *Out.getTargetStreamer()); 540 TOut.emitLocal(SmallVector<wasm::ValType, 0>()); 541 } 542 CurrentState = Instructions; 543 Out.EmitInstruction(Inst, getSTI()); 544 return false; 545 } 546 case Match_MissingFeature: 547 return Parser.Error( 548 IDLoc, "instruction requires a WASM feature not currently enabled"); 549 case Match_MnemonicFail: 550 return Parser.Error(IDLoc, "invalid instruction"); 551 case Match_NearMisses: 552 return Parser.Error(IDLoc, "ambiguous instruction"); 553 case Match_InvalidTiedOperand: 554 case Match_InvalidOperand: { 555 SMLoc ErrorLoc = IDLoc; 556 if (ErrorInfo != ~0ULL) { 557 if (ErrorInfo >= Operands.size()) 558 return Parser.Error(IDLoc, "too few operands for instruction"); 559 ErrorLoc = Operands[ErrorInfo]->getStartLoc(); 560 if (ErrorLoc == SMLoc()) 561 ErrorLoc = IDLoc; 562 } 563 return Parser.Error(ErrorLoc, "invalid operand for instruction"); 564 } 565 } 566 llvm_unreachable("Implement any new match types added!"); 567 } 568 }; 569 } // end anonymous namespace 570 571 // Force static initialization. 572 extern "C" void LLVMInitializeWebAssemblyAsmParser() { 573 RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32()); 574 RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64()); 575 } 576 577 #define GET_REGISTER_MATCHER 578 #define GET_MATCHER_IMPLEMENTATION 579 #include "WebAssemblyGenAsmMatcher.inc" 580