1 //===- MILexer.cpp - Machine instructions lexer implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the lexing of machine instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MILexer.h" 14 #include "llvm/ADT/APSInt.h" 15 #include "llvm/ADT/None.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include <algorithm> 22 #include <cassert> 23 #include <cctype> 24 #include <string> 25 26 using namespace llvm; 27 28 namespace { 29 30 using ErrorCallbackType = 31 function_ref<void(StringRef::iterator Loc, const Twine &)>; 32 33 /// This class provides a way to iterate and get characters from the source 34 /// string. 35 class Cursor { 36 const char *Ptr = nullptr; 37 const char *End = nullptr; 38 39 public: 40 Cursor(NoneType) {} 41 42 explicit Cursor(StringRef Str) { 43 Ptr = Str.data(); 44 End = Ptr + Str.size(); 45 } 46 47 bool isEOF() const { return Ptr == End; } 48 49 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 50 51 void advance(unsigned I = 1) { Ptr += I; } 52 53 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 54 55 StringRef upto(Cursor C) const { 56 assert(C.Ptr >= Ptr && C.Ptr <= End); 57 return StringRef(Ptr, C.Ptr - Ptr); 58 } 59 60 StringRef::iterator location() const { return Ptr; } 61 62 operator bool() const { return Ptr != nullptr; } 63 }; 64 65 } // end anonymous namespace 66 67 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 68 this->Kind = Kind; 69 this->Range = Range; 70 return *this; 71 } 72 73 MIToken &MIToken::setStringValue(StringRef StrVal) { 74 StringValue = StrVal; 75 return *this; 76 } 77 78 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 79 StringValueStorage = std::move(StrVal); 80 StringValue = StringValueStorage; 81 return *this; 82 } 83 84 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 85 this->IntVal = std::move(IntVal); 86 return *this; 87 } 88 89 /// Skip the leading whitespace characters and return the updated cursor. 90 static Cursor skipWhitespace(Cursor C) { 91 while (isblank(C.peek())) 92 C.advance(); 93 return C; 94 } 95 96 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 97 98 /// Skip a line comment and return the updated cursor. 99 static Cursor skipComment(Cursor C) { 100 if (C.peek() != ';') 101 return C; 102 while (!isNewlineChar(C.peek()) && !C.isEOF()) 103 C.advance(); 104 return C; 105 } 106 107 /// Return true if the given character satisfies the following regular 108 /// expression: [-a-zA-Z$._0-9] 109 static bool isIdentifierChar(char C) { 110 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 111 C == '$'; 112 } 113 114 /// Unescapes the given string value. 115 /// 116 /// Expects the string value to be quoted. 117 static std::string unescapeQuotedString(StringRef Value) { 118 assert(Value.front() == '"' && Value.back() == '"'); 119 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 120 121 std::string Str; 122 Str.reserve(C.remaining().size()); 123 while (!C.isEOF()) { 124 char Char = C.peek(); 125 if (Char == '\\') { 126 if (C.peek(1) == '\\') { 127 // Two '\' become one 128 Str += '\\'; 129 C.advance(2); 130 continue; 131 } 132 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 133 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 134 C.advance(3); 135 continue; 136 } 137 } 138 Str += Char; 139 C.advance(); 140 } 141 return Str; 142 } 143 144 /// Lex a string constant using the following regular expression: \"[^\"]*\" 145 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { 146 assert(C.peek() == '"'); 147 for (C.advance(); C.peek() != '"'; C.advance()) { 148 if (C.isEOF() || isNewlineChar(C.peek())) { 149 ErrorCallback( 150 C.location(), 151 "end of machine instruction reached before the closing '\"'"); 152 return None; 153 } 154 } 155 C.advance(); 156 return C; 157 } 158 159 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, 160 unsigned PrefixLength, ErrorCallbackType ErrorCallback) { 161 auto Range = C; 162 C.advance(PrefixLength); 163 if (C.peek() == '"') { 164 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 165 StringRef String = Range.upto(R); 166 Token.reset(Type, String) 167 .setOwnedStringValue( 168 unescapeQuotedString(String.drop_front(PrefixLength))); 169 return R; 170 } 171 Token.reset(MIToken::Error, Range.remaining()); 172 return Range; 173 } 174 while (isIdentifierChar(C.peek())) 175 C.advance(); 176 Token.reset(Type, Range.upto(C)) 177 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 178 return C; 179 } 180 181 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 182 return StringSwitch<MIToken::TokenKind>(Identifier) 183 .Case("_", MIToken::underscore) 184 .Case("implicit", MIToken::kw_implicit) 185 .Case("implicit-def", MIToken::kw_implicit_define) 186 .Case("def", MIToken::kw_def) 187 .Case("dead", MIToken::kw_dead) 188 .Case("killed", MIToken::kw_killed) 189 .Case("undef", MIToken::kw_undef) 190 .Case("internal", MIToken::kw_internal) 191 .Case("early-clobber", MIToken::kw_early_clobber) 192 .Case("debug-use", MIToken::kw_debug_use) 193 .Case("renamable", MIToken::kw_renamable) 194 .Case("tied-def", MIToken::kw_tied_def) 195 .Case("frame-setup", MIToken::kw_frame_setup) 196 .Case("frame-destroy", MIToken::kw_frame_destroy) 197 .Case("nnan", MIToken::kw_nnan) 198 .Case("ninf", MIToken::kw_ninf) 199 .Case("nsz", MIToken::kw_nsz) 200 .Case("arcp", MIToken::kw_arcp) 201 .Case("contract", MIToken::kw_contract) 202 .Case("afn", MIToken::kw_afn) 203 .Case("reassoc", MIToken::kw_reassoc) 204 .Case("nuw" , MIToken::kw_nuw) 205 .Case("nsw" , MIToken::kw_nsw) 206 .Case("exact" , MIToken::kw_exact) 207 .Case("debug-location", MIToken::kw_debug_location) 208 .Case("same_value", MIToken::kw_cfi_same_value) 209 .Case("offset", MIToken::kw_cfi_offset) 210 .Case("rel_offset", MIToken::kw_cfi_rel_offset) 211 .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) 212 .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 213 .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) 214 .Case("escape", MIToken::kw_cfi_escape) 215 .Case("def_cfa", MIToken::kw_cfi_def_cfa) 216 .Case("remember_state", MIToken::kw_cfi_remember_state) 217 .Case("restore", MIToken::kw_cfi_restore) 218 .Case("restore_state", MIToken::kw_cfi_restore_state) 219 .Case("undefined", MIToken::kw_cfi_undefined) 220 .Case("register", MIToken::kw_cfi_register) 221 .Case("window_save", MIToken::kw_cfi_window_save) 222 .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) 223 .Case("blockaddress", MIToken::kw_blockaddress) 224 .Case("intrinsic", MIToken::kw_intrinsic) 225 .Case("target-index", MIToken::kw_target_index) 226 .Case("half", MIToken::kw_half) 227 .Case("float", MIToken::kw_float) 228 .Case("double", MIToken::kw_double) 229 .Case("x86_fp80", MIToken::kw_x86_fp80) 230 .Case("fp128", MIToken::kw_fp128) 231 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 232 .Case("target-flags", MIToken::kw_target_flags) 233 .Case("volatile", MIToken::kw_volatile) 234 .Case("non-temporal", MIToken::kw_non_temporal) 235 .Case("dereferenceable", MIToken::kw_dereferenceable) 236 .Case("invariant", MIToken::kw_invariant) 237 .Case("align", MIToken::kw_align) 238 .Case("addrspace", MIToken::kw_addrspace) 239 .Case("stack", MIToken::kw_stack) 240 .Case("got", MIToken::kw_got) 241 .Case("jump-table", MIToken::kw_jump_table) 242 .Case("constant-pool", MIToken::kw_constant_pool) 243 .Case("call-entry", MIToken::kw_call_entry) 244 .Case("liveout", MIToken::kw_liveout) 245 .Case("address-taken", MIToken::kw_address_taken) 246 .Case("landing-pad", MIToken::kw_landing_pad) 247 .Case("liveins", MIToken::kw_liveins) 248 .Case("successors", MIToken::kw_successors) 249 .Case("floatpred", MIToken::kw_floatpred) 250 .Case("intpred", MIToken::kw_intpred) 251 .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) 252 .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) 253 .Case("unknown-size", MIToken::kw_unknown_size) 254 .Default(MIToken::Identifier); 255 } 256 257 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 258 if (!isalpha(C.peek()) && C.peek() != '_') 259 return None; 260 auto Range = C; 261 while (isIdentifierChar(C.peek())) 262 C.advance(); 263 auto Identifier = Range.upto(C); 264 Token.reset(getIdentifierKind(Identifier), Identifier) 265 .setStringValue(Identifier); 266 return C; 267 } 268 269 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, 270 ErrorCallbackType ErrorCallback) { 271 bool IsReference = C.remaining().startswith("%bb."); 272 if (!IsReference && !C.remaining().startswith("bb.")) 273 return None; 274 auto Range = C; 275 unsigned PrefixLength = IsReference ? 4 : 3; 276 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 277 if (!isdigit(C.peek())) { 278 Token.reset(MIToken::Error, C.remaining()); 279 ErrorCallback(C.location(), "expected a number after '%bb.'"); 280 return C; 281 } 282 auto NumberRange = C; 283 while (isdigit(C.peek())) 284 C.advance(); 285 StringRef Number = NumberRange.upto(C); 286 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 287 // TODO: The format bb.<id>.<irname> is supported only when it's not a 288 // reference. Once we deprecate the format where the irname shows up, we 289 // should only lex forward if it is a reference. 290 if (C.peek() == '.') { 291 C.advance(); // Skip '.' 292 ++StringOffset; 293 while (isIdentifierChar(C.peek())) 294 C.advance(); 295 } 296 Token.reset(IsReference ? MIToken::MachineBasicBlock 297 : MIToken::MachineBasicBlockLabel, 298 Range.upto(C)) 299 .setIntegerValue(APSInt(Number)) 300 .setStringValue(Range.upto(C).drop_front(StringOffset)); 301 return C; 302 } 303 304 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 305 MIToken::TokenKind Kind) { 306 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 307 return None; 308 auto Range = C; 309 C.advance(Rule.size()); 310 auto NumberRange = C; 311 while (isdigit(C.peek())) 312 C.advance(); 313 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 314 return C; 315 } 316 317 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 318 MIToken::TokenKind Kind) { 319 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 320 return None; 321 auto Range = C; 322 C.advance(Rule.size()); 323 auto NumberRange = C; 324 while (isdigit(C.peek())) 325 C.advance(); 326 StringRef Number = NumberRange.upto(C); 327 unsigned StringOffset = Rule.size() + Number.size(); 328 if (C.peek() == '.') { 329 C.advance(); 330 ++StringOffset; 331 while (isIdentifierChar(C.peek())) 332 C.advance(); 333 } 334 Token.reset(Kind, Range.upto(C)) 335 .setIntegerValue(APSInt(Number)) 336 .setStringValue(Range.upto(C).drop_front(StringOffset)); 337 return C; 338 } 339 340 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 341 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 342 } 343 344 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 345 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 346 } 347 348 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 349 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 350 } 351 352 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 353 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 354 } 355 356 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, 357 ErrorCallbackType ErrorCallback) { 358 const StringRef Rule = "%subreg."; 359 if (!C.remaining().startswith(Rule)) 360 return None; 361 return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), 362 ErrorCallback); 363 } 364 365 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, 366 ErrorCallbackType ErrorCallback) { 367 const StringRef Rule = "%ir-block."; 368 if (!C.remaining().startswith(Rule)) 369 return None; 370 if (isdigit(C.peek(Rule.size()))) 371 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 372 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 373 } 374 375 static Cursor maybeLexIRValue(Cursor C, MIToken &Token, 376 ErrorCallbackType ErrorCallback) { 377 const StringRef Rule = "%ir."; 378 if (!C.remaining().startswith(Rule)) 379 return None; 380 if (isdigit(C.peek(Rule.size()))) 381 return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 382 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 383 } 384 385 static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, 386 ErrorCallbackType ErrorCallback) { 387 if (C.peek() != '"') 388 return None; 389 return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, 390 ErrorCallback); 391 } 392 393 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 394 auto Range = C; 395 C.advance(); // Skip '%' 396 auto NumberRange = C; 397 while (isdigit(C.peek())) 398 C.advance(); 399 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 400 .setIntegerValue(APSInt(NumberRange.upto(C))); 401 return C; 402 } 403 404 /// Returns true for a character allowed in a register name. 405 static bool isRegisterChar(char C) { 406 return isIdentifierChar(C) && C != '.'; 407 } 408 409 static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { 410 Cursor Range = C; 411 C.advance(); // Skip '%' 412 while (isRegisterChar(C.peek())) 413 C.advance(); 414 Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) 415 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 416 return C; 417 } 418 419 static Cursor maybeLexRegister(Cursor C, MIToken &Token, 420 ErrorCallbackType ErrorCallback) { 421 if (C.peek() != '%' && C.peek() != '$') 422 return None; 423 424 if (C.peek() == '%') { 425 if (isdigit(C.peek(1))) 426 return lexVirtualRegister(C, Token); 427 428 if (isRegisterChar(C.peek(1))) 429 return lexNamedVirtualRegister(C, Token); 430 431 return None; 432 } 433 434 assert(C.peek() == '$'); 435 auto Range = C; 436 C.advance(); // Skip '$' 437 while (isRegisterChar(C.peek())) 438 C.advance(); 439 Token.reset(MIToken::NamedRegister, Range.upto(C)) 440 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' 441 return C; 442 } 443 444 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, 445 ErrorCallbackType ErrorCallback) { 446 if (C.peek() != '@') 447 return None; 448 if (!isdigit(C.peek(1))) 449 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 450 ErrorCallback); 451 auto Range = C; 452 C.advance(1); // Skip the '@' 453 auto NumberRange = C; 454 while (isdigit(C.peek())) 455 C.advance(); 456 Token.reset(MIToken::GlobalValue, Range.upto(C)) 457 .setIntegerValue(APSInt(NumberRange.upto(C))); 458 return C; 459 } 460 461 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, 462 ErrorCallbackType ErrorCallback) { 463 if (C.peek() != '&') 464 return None; 465 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 466 ErrorCallback); 467 } 468 469 static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, 470 ErrorCallbackType ErrorCallback) { 471 const StringRef Rule = "<mcsymbol "; 472 if (!C.remaining().startswith(Rule)) 473 return None; 474 auto Start = C; 475 C.advance(Rule.size()); 476 477 // Try a simple unquoted name. 478 if (C.peek() != '"') { 479 while (isIdentifierChar(C.peek())) 480 C.advance(); 481 StringRef String = Start.upto(C).drop_front(Rule.size()); 482 if (C.peek() != '>') { 483 ErrorCallback(C.location(), 484 "expected the '<mcsymbol ...' to be closed by a '>'"); 485 Token.reset(MIToken::Error, Start.remaining()); 486 return Start; 487 } 488 C.advance(); 489 490 Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); 491 return C; 492 } 493 494 // Otherwise lex out a quoted name. 495 Cursor R = lexStringConstant(C, ErrorCallback); 496 if (!R) { 497 ErrorCallback(C.location(), 498 "unable to parse quoted string from opening quote"); 499 Token.reset(MIToken::Error, Start.remaining()); 500 return Start; 501 } 502 StringRef String = Start.upto(R).drop_front(Rule.size()); 503 if (R.peek() != '>') { 504 ErrorCallback(R.location(), 505 "expected the '<mcsymbol ...' to be closed by a '>'"); 506 Token.reset(MIToken::Error, Start.remaining()); 507 return Start; 508 } 509 R.advance(); 510 511 Token.reset(MIToken::MCSymbol, Start.upto(R)) 512 .setOwnedStringValue(unescapeQuotedString(String)); 513 return R; 514 } 515 516 static bool isValidHexFloatingPointPrefix(char C) { 517 return C == 'H' || C == 'K' || C == 'L' || C == 'M'; 518 } 519 520 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 521 C.advance(); 522 // Skip over [0-9]*([eE][-+]?[0-9]+)? 523 while (isdigit(C.peek())) 524 C.advance(); 525 if ((C.peek() == 'e' || C.peek() == 'E') && 526 (isdigit(C.peek(1)) || 527 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 528 C.advance(2); 529 while (isdigit(C.peek())) 530 C.advance(); 531 } 532 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 533 return C; 534 } 535 536 static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { 537 if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) 538 return None; 539 Cursor Range = C; 540 C.advance(2); 541 unsigned PrefLen = 2; 542 if (isValidHexFloatingPointPrefix(C.peek())) { 543 C.advance(); 544 PrefLen++; 545 } 546 while (isxdigit(C.peek())) 547 C.advance(); 548 StringRef StrVal = Range.upto(C); 549 if (StrVal.size() <= PrefLen) 550 return None; 551 if (PrefLen == 2) 552 Token.reset(MIToken::HexLiteral, Range.upto(C)); 553 else // It must be 3, which means that there was a floating-point prefix. 554 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 555 return C; 556 } 557 558 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 559 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 560 return None; 561 auto Range = C; 562 C.advance(); 563 while (isdigit(C.peek())) 564 C.advance(); 565 if (C.peek() == '.') 566 return lexFloatingPointLiteral(Range, C, Token); 567 StringRef StrVal = Range.upto(C); 568 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 569 return C; 570 } 571 572 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 573 return StringSwitch<MIToken::TokenKind>(Identifier) 574 .Case("!tbaa", MIToken::md_tbaa) 575 .Case("!alias.scope", MIToken::md_alias_scope) 576 .Case("!noalias", MIToken::md_noalias) 577 .Case("!range", MIToken::md_range) 578 .Case("!DIExpression", MIToken::md_diexpr) 579 .Case("!DILocation", MIToken::md_dilocation) 580 .Default(MIToken::Error); 581 } 582 583 static Cursor maybeLexExlaim(Cursor C, MIToken &Token, 584 ErrorCallbackType ErrorCallback) { 585 if (C.peek() != '!') 586 return None; 587 auto Range = C; 588 C.advance(1); 589 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 590 Token.reset(MIToken::exclaim, Range.upto(C)); 591 return C; 592 } 593 while (isIdentifierChar(C.peek())) 594 C.advance(); 595 StringRef StrVal = Range.upto(C); 596 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 597 if (Token.isError()) 598 ErrorCallback(Token.location(), 599 "use of unknown metadata keyword '" + StrVal + "'"); 600 return C; 601 } 602 603 static MIToken::TokenKind symbolToken(char C) { 604 switch (C) { 605 case ',': 606 return MIToken::comma; 607 case '.': 608 return MIToken::dot; 609 case '=': 610 return MIToken::equal; 611 case ':': 612 return MIToken::colon; 613 case '(': 614 return MIToken::lparen; 615 case ')': 616 return MIToken::rparen; 617 case '{': 618 return MIToken::lbrace; 619 case '}': 620 return MIToken::rbrace; 621 case '+': 622 return MIToken::plus; 623 case '-': 624 return MIToken::minus; 625 case '<': 626 return MIToken::less; 627 case '>': 628 return MIToken::greater; 629 default: 630 return MIToken::Error; 631 } 632 } 633 634 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 635 MIToken::TokenKind Kind; 636 unsigned Length = 1; 637 if (C.peek() == ':' && C.peek(1) == ':') { 638 Kind = MIToken::coloncolon; 639 Length = 2; 640 } else 641 Kind = symbolToken(C.peek()); 642 if (Kind == MIToken::Error) 643 return None; 644 auto Range = C; 645 C.advance(Length); 646 Token.reset(Kind, Range.upto(C)); 647 return C; 648 } 649 650 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 651 if (!isNewlineChar(C.peek())) 652 return None; 653 auto Range = C; 654 C.advance(); 655 Token.reset(MIToken::Newline, Range.upto(C)); 656 return C; 657 } 658 659 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, 660 ErrorCallbackType ErrorCallback) { 661 if (C.peek() != '`') 662 return None; 663 auto Range = C; 664 C.advance(); 665 auto StrRange = C; 666 while (C.peek() != '`') { 667 if (C.isEOF() || isNewlineChar(C.peek())) { 668 ErrorCallback( 669 C.location(), 670 "end of machine instruction reached before the closing '`'"); 671 Token.reset(MIToken::Error, Range.remaining()); 672 return C; 673 } 674 C.advance(); 675 } 676 StringRef Value = StrRange.upto(C); 677 C.advance(); 678 Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 679 return C; 680 } 681 682 StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, 683 ErrorCallbackType ErrorCallback) { 684 auto C = skipComment(skipWhitespace(Cursor(Source))); 685 if (C.isEOF()) { 686 Token.reset(MIToken::Eof, C.remaining()); 687 return C.remaining(); 688 } 689 690 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 691 return R.remaining(); 692 if (Cursor R = maybeLexIdentifier(C, Token)) 693 return R.remaining(); 694 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 695 return R.remaining(); 696 if (Cursor R = maybeLexStackObject(C, Token)) 697 return R.remaining(); 698 if (Cursor R = maybeLexFixedStackObject(C, Token)) 699 return R.remaining(); 700 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 701 return R.remaining(); 702 if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) 703 return R.remaining(); 704 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 705 return R.remaining(); 706 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 707 return R.remaining(); 708 if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) 709 return R.remaining(); 710 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 711 return R.remaining(); 712 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 713 return R.remaining(); 714 if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) 715 return R.remaining(); 716 if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) 717 return R.remaining(); 718 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 719 return R.remaining(); 720 if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) 721 return R.remaining(); 722 if (Cursor R = maybeLexSymbol(C, Token)) 723 return R.remaining(); 724 if (Cursor R = maybeLexNewline(C, Token)) 725 return R.remaining(); 726 if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 727 return R.remaining(); 728 if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) 729 return R.remaining(); 730 731 Token.reset(MIToken::Error, C.remaining()); 732 ErrorCallback(C.location(), 733 Twine("unexpected character '") + Twine(C.peek()) + "'"); 734 return C.remaining(); 735 } 736