1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 typedef function_ref<void(StringRef::iterator Loc, const Twine &)> 25 ErrorCallbackType; 26 27 /// This class provides a way to iterate and get characters from the source 28 /// string. 29 class Cursor { 30 const char *Ptr; 31 const char *End; 32 33 public: 34 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 35 36 explicit Cursor(StringRef Str) { 37 Ptr = Str.data(); 38 End = Ptr + Str.size(); 39 } 40 41 bool isEOF() const { return Ptr == End; } 42 43 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 44 45 void advance(unsigned I = 1) { Ptr += I; } 46 47 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 48 49 StringRef upto(Cursor C) const { 50 assert(C.Ptr >= Ptr && C.Ptr <= End); 51 return StringRef(Ptr, C.Ptr - Ptr); 52 } 53 54 StringRef::iterator location() const { return Ptr; } 55 56 operator bool() const { return Ptr != nullptr; } 57 }; 58 59 } // end anonymous namespace 60 61 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 62 this->Kind = Kind; 63 this->Range = Range; 64 return *this; 65 } 66 67 MIToken &MIToken::setStringValue(StringRef StrVal) { 68 StringValue = StrVal; 69 return *this; 70 } 71 72 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 73 StringValueStorage = std::move(StrVal); 74 StringValue = StringValueStorage; 75 return *this; 76 } 77 78 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 79 this->IntVal = std::move(IntVal); 80 return *this; 81 } 82 83 /// Skip the leading whitespace characters and return the updated cursor. 84 static Cursor skipWhitespace(Cursor C) { 85 while (isblank(C.peek())) 86 C.advance(); 87 return C; 88 } 89 90 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 91 92 /// Skip a line comment and return the updated cursor. 93 static Cursor skipComment(Cursor C) { 94 if (C.peek() != ';') 95 return C; 96 while (!isNewlineChar(C.peek()) && !C.isEOF()) 97 C.advance(); 98 return C; 99 } 100 101 /// Return true if the given character satisfies the following regular 102 /// expression: [-a-zA-Z$._0-9] 103 static bool isIdentifierChar(char C) { 104 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 105 C == '$'; 106 } 107 108 /// Unescapes the given string value. 109 /// 110 /// Expects the string value to be quoted. 111 static std::string unescapeQuotedString(StringRef Value) { 112 assert(Value.front() == '"' && Value.back() == '"'); 113 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 114 115 std::string Str; 116 Str.reserve(C.remaining().size()); 117 while (!C.isEOF()) { 118 char Char = C.peek(); 119 if (Char == '\\') { 120 if (C.peek(1) == '\\') { 121 // Two '\' become one 122 Str += '\\'; 123 C.advance(2); 124 continue; 125 } 126 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 127 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 128 C.advance(3); 129 continue; 130 } 131 } 132 Str += Char; 133 C.advance(); 134 } 135 return Str; 136 } 137 138 /// Lex a string constant using the following regular expression: \"[^\"]*\" 139 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { 140 assert(C.peek() == '"'); 141 for (C.advance(); C.peek() != '"'; C.advance()) { 142 if (C.isEOF() || isNewlineChar(C.peek())) { 143 ErrorCallback( 144 C.location(), 145 "end of machine instruction reached before the closing '\"'"); 146 return None; 147 } 148 } 149 C.advance(); 150 return C; 151 } 152 153 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, 154 unsigned PrefixLength, ErrorCallbackType ErrorCallback) { 155 auto Range = C; 156 C.advance(PrefixLength); 157 if (C.peek() == '"') { 158 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 159 StringRef String = Range.upto(R); 160 Token.reset(Type, String) 161 .setOwnedStringValue( 162 unescapeQuotedString(String.drop_front(PrefixLength))); 163 return R; 164 } 165 Token.reset(MIToken::Error, Range.remaining()); 166 return Range; 167 } 168 while (isIdentifierChar(C.peek())) 169 C.advance(); 170 Token.reset(Type, Range.upto(C)) 171 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 172 return C; 173 } 174 175 static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { 176 if (C.peek() != 'i' || !isdigit(C.peek(1))) 177 return None; 178 auto Range = C; 179 C.advance(); // Skip 'i' 180 while (isdigit(C.peek())) 181 C.advance(); 182 Token.reset(MIToken::IntegerType, Range.upto(C)); 183 return C; 184 } 185 186 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 187 return StringSwitch<MIToken::TokenKind>(Identifier) 188 .Case("_", MIToken::underscore) 189 .Case("implicit", MIToken::kw_implicit) 190 .Case("implicit-def", MIToken::kw_implicit_define) 191 .Case("def", MIToken::kw_def) 192 .Case("dead", MIToken::kw_dead) 193 .Case("killed", MIToken::kw_killed) 194 .Case("undef", MIToken::kw_undef) 195 .Case("internal", MIToken::kw_internal) 196 .Case("early-clobber", MIToken::kw_early_clobber) 197 .Case("debug-use", MIToken::kw_debug_use) 198 .Case("tied-def", MIToken::kw_tied_def) 199 .Case("frame-setup", MIToken::kw_frame_setup) 200 .Case("debug-location", MIToken::kw_debug_location) 201 .Case(".cfi_same_value", MIToken::kw_cfi_same_value) 202 .Case(".cfi_offset", MIToken::kw_cfi_offset) 203 .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) 204 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 205 .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) 206 .Case("blockaddress", MIToken::kw_blockaddress) 207 .Case("target-index", MIToken::kw_target_index) 208 .Case("half", MIToken::kw_half) 209 .Case("float", MIToken::kw_float) 210 .Case("double", MIToken::kw_double) 211 .Case("x86_fp80", MIToken::kw_x86_fp80) 212 .Case("fp128", MIToken::kw_fp128) 213 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 214 .Case("target-flags", MIToken::kw_target_flags) 215 .Case("volatile", MIToken::kw_volatile) 216 .Case("non-temporal", MIToken::kw_non_temporal) 217 .Case("invariant", MIToken::kw_invariant) 218 .Case("align", MIToken::kw_align) 219 .Case("stack", MIToken::kw_stack) 220 .Case("got", MIToken::kw_got) 221 .Case("jump-table", MIToken::kw_jump_table) 222 .Case("constant-pool", MIToken::kw_constant_pool) 223 .Case("call-entry", MIToken::kw_call_entry) 224 .Case("liveout", MIToken::kw_liveout) 225 .Case("address-taken", MIToken::kw_address_taken) 226 .Case("landing-pad", MIToken::kw_landing_pad) 227 .Case("liveins", MIToken::kw_liveins) 228 .Case("successors", MIToken::kw_successors) 229 .Default(MIToken::Identifier); 230 } 231 232 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 233 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 234 return None; 235 auto Range = C; 236 while (isIdentifierChar(C.peek())) 237 C.advance(); 238 auto Identifier = Range.upto(C); 239 Token.reset(getIdentifierKind(Identifier), Identifier) 240 .setStringValue(Identifier); 241 return C; 242 } 243 244 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, 245 ErrorCallbackType ErrorCallback) { 246 bool IsReference = C.remaining().startswith("%bb."); 247 if (!IsReference && !C.remaining().startswith("bb.")) 248 return None; 249 auto Range = C; 250 unsigned PrefixLength = IsReference ? 4 : 3; 251 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 252 if (!isdigit(C.peek())) { 253 Token.reset(MIToken::Error, C.remaining()); 254 ErrorCallback(C.location(), "expected a number after '%bb.'"); 255 return C; 256 } 257 auto NumberRange = C; 258 while (isdigit(C.peek())) 259 C.advance(); 260 StringRef Number = NumberRange.upto(C); 261 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 262 if (C.peek() == '.') { 263 C.advance(); // Skip '.' 264 ++StringOffset; 265 while (isIdentifierChar(C.peek())) 266 C.advance(); 267 } 268 Token.reset(IsReference ? MIToken::MachineBasicBlock 269 : MIToken::MachineBasicBlockLabel, 270 Range.upto(C)) 271 .setIntegerValue(APSInt(Number)) 272 .setStringValue(Range.upto(C).drop_front(StringOffset)); 273 return C; 274 } 275 276 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 277 MIToken::TokenKind Kind) { 278 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 279 return None; 280 auto Range = C; 281 C.advance(Rule.size()); 282 auto NumberRange = C; 283 while (isdigit(C.peek())) 284 C.advance(); 285 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 286 return C; 287 } 288 289 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 290 MIToken::TokenKind Kind) { 291 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 292 return None; 293 auto Range = C; 294 C.advance(Rule.size()); 295 auto NumberRange = C; 296 while (isdigit(C.peek())) 297 C.advance(); 298 StringRef Number = NumberRange.upto(C); 299 unsigned StringOffset = Rule.size() + Number.size(); 300 if (C.peek() == '.') { 301 C.advance(); 302 ++StringOffset; 303 while (isIdentifierChar(C.peek())) 304 C.advance(); 305 } 306 Token.reset(Kind, Range.upto(C)) 307 .setIntegerValue(APSInt(Number)) 308 .setStringValue(Range.upto(C).drop_front(StringOffset)); 309 return C; 310 } 311 312 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 313 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 314 } 315 316 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 317 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 318 } 319 320 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 321 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 322 } 323 324 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 325 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 326 } 327 328 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, 329 ErrorCallbackType ErrorCallback) { 330 const StringRef Rule = "%subreg."; 331 if (!C.remaining().startswith(Rule)) 332 return None; 333 return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), 334 ErrorCallback); 335 } 336 337 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, 338 ErrorCallbackType ErrorCallback) { 339 const StringRef Rule = "%ir-block."; 340 if (!C.remaining().startswith(Rule)) 341 return None; 342 if (isdigit(C.peek(Rule.size()))) 343 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 344 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 345 } 346 347 static Cursor maybeLexIRValue(Cursor C, MIToken &Token, 348 ErrorCallbackType ErrorCallback) { 349 const StringRef Rule = "%ir."; 350 if (!C.remaining().startswith(Rule)) 351 return None; 352 if (isdigit(C.peek(Rule.size()))) 353 return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 354 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 355 } 356 357 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 358 auto Range = C; 359 C.advance(); // Skip '%' 360 auto NumberRange = C; 361 while (isdigit(C.peek())) 362 C.advance(); 363 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 364 .setIntegerValue(APSInt(NumberRange.upto(C))); 365 return C; 366 } 367 368 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 369 if (C.peek() != '%') 370 return None; 371 if (isdigit(C.peek(1))) 372 return lexVirtualRegister(C, Token); 373 auto Range = C; 374 C.advance(); // Skip '%' 375 while (isIdentifierChar(C.peek())) 376 C.advance(); 377 Token.reset(MIToken::NamedRegister, Range.upto(C)) 378 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 379 return C; 380 } 381 382 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, 383 ErrorCallbackType ErrorCallback) { 384 if (C.peek() != '@') 385 return None; 386 if (!isdigit(C.peek(1))) 387 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 388 ErrorCallback); 389 auto Range = C; 390 C.advance(1); // Skip the '@' 391 auto NumberRange = C; 392 while (isdigit(C.peek())) 393 C.advance(); 394 Token.reset(MIToken::GlobalValue, Range.upto(C)) 395 .setIntegerValue(APSInt(NumberRange.upto(C))); 396 return C; 397 } 398 399 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, 400 ErrorCallbackType ErrorCallback) { 401 if (C.peek() != '$') 402 return None; 403 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 404 ErrorCallback); 405 } 406 407 static bool isValidHexFloatingPointPrefix(char C) { 408 return C == 'H' || C == 'K' || C == 'L' || C == 'M'; 409 } 410 411 static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { 412 if (C.peek() != '0' || C.peek(1) != 'x') 413 return None; 414 Cursor Range = C; 415 C.advance(2); // Skip '0x' 416 if (isValidHexFloatingPointPrefix(C.peek())) 417 C.advance(); 418 while (isxdigit(C.peek())) 419 C.advance(); 420 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 421 return C; 422 } 423 424 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 425 C.advance(); 426 // Skip over [0-9]*([eE][-+]?[0-9]+)? 427 while (isdigit(C.peek())) 428 C.advance(); 429 if ((C.peek() == 'e' || C.peek() == 'E') && 430 (isdigit(C.peek(1)) || 431 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 432 C.advance(2); 433 while (isdigit(C.peek())) 434 C.advance(); 435 } 436 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 437 return C; 438 } 439 440 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 441 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 442 return None; 443 auto Range = C; 444 C.advance(); 445 while (isdigit(C.peek())) 446 C.advance(); 447 if (C.peek() == '.') 448 return lexFloatingPointLiteral(Range, C, Token); 449 StringRef StrVal = Range.upto(C); 450 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 451 return C; 452 } 453 454 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 455 return StringSwitch<MIToken::TokenKind>(Identifier) 456 .Case("!tbaa", MIToken::md_tbaa) 457 .Case("!alias.scope", MIToken::md_alias_scope) 458 .Case("!noalias", MIToken::md_noalias) 459 .Case("!range", MIToken::md_range) 460 .Default(MIToken::Error); 461 } 462 463 static Cursor maybeLexExlaim(Cursor C, MIToken &Token, 464 ErrorCallbackType ErrorCallback) { 465 if (C.peek() != '!') 466 return None; 467 auto Range = C; 468 C.advance(1); 469 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 470 Token.reset(MIToken::exclaim, Range.upto(C)); 471 return C; 472 } 473 while (isIdentifierChar(C.peek())) 474 C.advance(); 475 StringRef StrVal = Range.upto(C); 476 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 477 if (Token.isError()) 478 ErrorCallback(Token.location(), 479 "use of unknown metadata keyword '" + StrVal + "'"); 480 return C; 481 } 482 483 static MIToken::TokenKind symbolToken(char C) { 484 switch (C) { 485 case ',': 486 return MIToken::comma; 487 case '=': 488 return MIToken::equal; 489 case ':': 490 return MIToken::colon; 491 case '(': 492 return MIToken::lparen; 493 case ')': 494 return MIToken::rparen; 495 case '{': 496 return MIToken::lbrace; 497 case '}': 498 return MIToken::rbrace; 499 case '+': 500 return MIToken::plus; 501 case '-': 502 return MIToken::minus; 503 case '<': 504 return MIToken::less; 505 case '>': 506 return MIToken::greater; 507 default: 508 return MIToken::Error; 509 } 510 } 511 512 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 513 MIToken::TokenKind Kind; 514 unsigned Length = 1; 515 if (C.peek() == ':' && C.peek(1) == ':') { 516 Kind = MIToken::coloncolon; 517 Length = 2; 518 } else 519 Kind = symbolToken(C.peek()); 520 if (Kind == MIToken::Error) 521 return None; 522 auto Range = C; 523 C.advance(Length); 524 Token.reset(Kind, Range.upto(C)); 525 return C; 526 } 527 528 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 529 if (!isNewlineChar(C.peek())) 530 return None; 531 auto Range = C; 532 C.advance(); 533 Token.reset(MIToken::Newline, Range.upto(C)); 534 return C; 535 } 536 537 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, 538 ErrorCallbackType ErrorCallback) { 539 if (C.peek() != '`') 540 return None; 541 auto Range = C; 542 C.advance(); 543 auto StrRange = C; 544 while (C.peek() != '`') { 545 if (C.isEOF() || isNewlineChar(C.peek())) { 546 ErrorCallback( 547 C.location(), 548 "end of machine instruction reached before the closing '`'"); 549 Token.reset(MIToken::Error, Range.remaining()); 550 return C; 551 } 552 C.advance(); 553 } 554 StringRef Value = StrRange.upto(C); 555 C.advance(); 556 Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 557 return C; 558 } 559 560 StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, 561 ErrorCallbackType ErrorCallback) { 562 auto C = skipComment(skipWhitespace(Cursor(Source))); 563 if (C.isEOF()) { 564 Token.reset(MIToken::Eof, C.remaining()); 565 return C.remaining(); 566 } 567 568 if (Cursor R = maybeLexIntegerType(C, Token)) 569 return R.remaining(); 570 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 571 return R.remaining(); 572 if (Cursor R = maybeLexIdentifier(C, Token)) 573 return R.remaining(); 574 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 575 return R.remaining(); 576 if (Cursor R = maybeLexStackObject(C, Token)) 577 return R.remaining(); 578 if (Cursor R = maybeLexFixedStackObject(C, Token)) 579 return R.remaining(); 580 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 581 return R.remaining(); 582 if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) 583 return R.remaining(); 584 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 585 return R.remaining(); 586 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 587 return R.remaining(); 588 if (Cursor R = maybeLexRegister(C, Token)) 589 return R.remaining(); 590 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 591 return R.remaining(); 592 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 593 return R.remaining(); 594 if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) 595 return R.remaining(); 596 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 597 return R.remaining(); 598 if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) 599 return R.remaining(); 600 if (Cursor R = maybeLexSymbol(C, Token)) 601 return R.remaining(); 602 if (Cursor R = maybeLexNewline(C, Token)) 603 return R.remaining(); 604 if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 605 return R.remaining(); 606 607 Token.reset(MIToken::Error, C.remaining()); 608 ErrorCallback(C.location(), 609 Twine("unexpected character '") + Twine(C.peek()) + "'"); 610 return C.remaining(); 611 } 612