1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 /// This class provides a way to iterate and get characters from the source 25 /// string. 26 class Cursor { 27 const char *Ptr; 28 const char *End; 29 30 public: 31 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 32 33 explicit Cursor(StringRef Str) { 34 Ptr = Str.data(); 35 End = Ptr + Str.size(); 36 } 37 38 bool isEOF() const { return Ptr == End; } 39 40 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 41 42 void advance(unsigned I = 1) { Ptr += I; } 43 44 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 45 46 StringRef upto(Cursor C) const { 47 assert(C.Ptr >= Ptr && C.Ptr <= End); 48 return StringRef(Ptr, C.Ptr - Ptr); 49 } 50 51 StringRef::iterator location() const { return Ptr; } 52 53 operator bool() const { return Ptr != nullptr; } 54 }; 55 56 } // end anonymous namespace 57 58 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 59 this->Kind = Kind; 60 this->Range = Range; 61 return *this; 62 } 63 64 MIToken &MIToken::setStringValue(StringRef StrVal) { 65 StringValue = StrVal; 66 return *this; 67 } 68 69 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 70 StringValueStorage = std::move(StrVal); 71 StringValue = StringValueStorage; 72 return *this; 73 } 74 75 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 76 this->IntVal = std::move(IntVal); 77 return *this; 78 } 79 80 /// Skip the leading whitespace characters and return the updated cursor. 81 static Cursor skipWhitespace(Cursor C) { 82 while (isblank(C.peek())) 83 C.advance(); 84 return C; 85 } 86 87 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 88 89 /// Skip a line comment and return the updated cursor. 90 static Cursor skipComment(Cursor C) { 91 if (C.peek() != ';') 92 return C; 93 while (!isNewlineChar(C.peek()) && !C.isEOF()) 94 C.advance(); 95 return C; 96 } 97 98 /// Return true if the given character satisfies the following regular 99 /// expression: [-a-zA-Z$._0-9] 100 static bool isIdentifierChar(char C) { 101 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 102 C == '$'; 103 } 104 105 /// Unescapes the given string value. 106 /// 107 /// Expects the string value to be quoted. 108 static std::string unescapeQuotedString(StringRef Value) { 109 assert(Value.front() == '"' && Value.back() == '"'); 110 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 111 112 std::string Str; 113 Str.reserve(C.remaining().size()); 114 while (!C.isEOF()) { 115 char Char = C.peek(); 116 if (Char == '\\') { 117 if (C.peek(1) == '\\') { 118 // Two '\' become one 119 Str += '\\'; 120 C.advance(2); 121 continue; 122 } 123 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 124 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 125 C.advance(3); 126 continue; 127 } 128 } 129 Str += Char; 130 C.advance(); 131 } 132 return Str; 133 } 134 135 /// Lex a string constant using the following regular expression: \"[^\"]*\" 136 static Cursor lexStringConstant( 137 Cursor C, 138 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 139 assert(C.peek() == '"'); 140 for (C.advance(); C.peek() != '"'; C.advance()) { 141 if (C.isEOF() || isNewlineChar(C.peek())) { 142 ErrorCallback( 143 C.location(), 144 "end of machine instruction reached before the closing '\"'"); 145 return None; 146 } 147 } 148 C.advance(); 149 return C; 150 } 151 152 static Cursor lexName( 153 Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, 154 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 155 auto Range = C; 156 C.advance(PrefixLength); 157 if (C.peek() == '"') { 158 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 159 StringRef String = Range.upto(R); 160 Token.reset(Type, String) 161 .setOwnedStringValue( 162 unescapeQuotedString(String.drop_front(PrefixLength))); 163 return R; 164 } 165 Token.reset(MIToken::Error, Range.remaining()); 166 return Range; 167 } 168 while (isIdentifierChar(C.peek())) 169 C.advance(); 170 Token.reset(Type, Range.upto(C)) 171 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 172 return C; 173 } 174 175 static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { 176 if (C.peek() != 'i' || !isdigit(C.peek(1))) 177 return None; 178 auto Range = C; 179 C.advance(); // Skip 'i' 180 while (isdigit(C.peek())) 181 C.advance(); 182 Token.reset(MIToken::IntegerType, Range.upto(C)); 183 return C; 184 } 185 186 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 187 return StringSwitch<MIToken::TokenKind>(Identifier) 188 .Case("_", MIToken::underscore) 189 .Case("implicit", MIToken::kw_implicit) 190 .Case("implicit-def", MIToken::kw_implicit_define) 191 .Case("dead", MIToken::kw_dead) 192 .Case("killed", MIToken::kw_killed) 193 .Case("undef", MIToken::kw_undef) 194 .Case("internal", MIToken::kw_internal) 195 .Case("early-clobber", MIToken::kw_early_clobber) 196 .Case("debug-use", MIToken::kw_debug_use) 197 .Case("frame-setup", MIToken::kw_frame_setup) 198 .Case("debug-location", MIToken::kw_debug_location) 199 .Case(".cfi_same_value", MIToken::kw_cfi_same_value) 200 .Case(".cfi_offset", MIToken::kw_cfi_offset) 201 .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) 202 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 203 .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) 204 .Case("blockaddress", MIToken::kw_blockaddress) 205 .Case("target-index", MIToken::kw_target_index) 206 .Case("half", MIToken::kw_half) 207 .Case("float", MIToken::kw_float) 208 .Case("double", MIToken::kw_double) 209 .Case("x86_fp80", MIToken::kw_x86_fp80) 210 .Case("fp128", MIToken::kw_fp128) 211 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 212 .Case("target-flags", MIToken::kw_target_flags) 213 .Case("volatile", MIToken::kw_volatile) 214 .Case("non-temporal", MIToken::kw_non_temporal) 215 .Case("invariant", MIToken::kw_invariant) 216 .Case("align", MIToken::kw_align) 217 .Case("stack", MIToken::kw_stack) 218 .Case("got", MIToken::kw_got) 219 .Case("jump-table", MIToken::kw_jump_table) 220 .Case("constant-pool", MIToken::kw_constant_pool) 221 .Case("liveout", MIToken::kw_liveout) 222 .Case("address-taken", MIToken::kw_address_taken) 223 .Case("landing-pad", MIToken::kw_landing_pad) 224 .Case("liveins", MIToken::kw_liveins) 225 .Case("successors", MIToken::kw_successors) 226 .Default(MIToken::Identifier); 227 } 228 229 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 230 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 231 return None; 232 auto Range = C; 233 while (isIdentifierChar(C.peek())) 234 C.advance(); 235 auto Identifier = Range.upto(C); 236 Token.reset(getIdentifierKind(Identifier), Identifier) 237 .setStringValue(Identifier); 238 return C; 239 } 240 241 static Cursor maybeLexMachineBasicBlock( 242 Cursor C, MIToken &Token, 243 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 244 bool IsReference = C.remaining().startswith("%bb."); 245 if (!IsReference && !C.remaining().startswith("bb.")) 246 return None; 247 auto Range = C; 248 unsigned PrefixLength = IsReference ? 4 : 3; 249 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 250 if (!isdigit(C.peek())) { 251 Token.reset(MIToken::Error, C.remaining()); 252 ErrorCallback(C.location(), "expected a number after '%bb.'"); 253 return C; 254 } 255 auto NumberRange = C; 256 while (isdigit(C.peek())) 257 C.advance(); 258 StringRef Number = NumberRange.upto(C); 259 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 260 if (C.peek() == '.') { 261 C.advance(); // Skip '.' 262 ++StringOffset; 263 while (isIdentifierChar(C.peek())) 264 C.advance(); 265 } 266 Token.reset(IsReference ? MIToken::MachineBasicBlock 267 : MIToken::MachineBasicBlockLabel, 268 Range.upto(C)) 269 .setIntegerValue(APSInt(Number)) 270 .setStringValue(Range.upto(C).drop_front(StringOffset)); 271 return C; 272 } 273 274 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 275 MIToken::TokenKind Kind) { 276 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 277 return None; 278 auto Range = C; 279 C.advance(Rule.size()); 280 auto NumberRange = C; 281 while (isdigit(C.peek())) 282 C.advance(); 283 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 284 return C; 285 } 286 287 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 288 MIToken::TokenKind Kind) { 289 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 290 return None; 291 auto Range = C; 292 C.advance(Rule.size()); 293 auto NumberRange = C; 294 while (isdigit(C.peek())) 295 C.advance(); 296 StringRef Number = NumberRange.upto(C); 297 unsigned StringOffset = Rule.size() + Number.size(); 298 if (C.peek() == '.') { 299 C.advance(); 300 ++StringOffset; 301 while (isIdentifierChar(C.peek())) 302 C.advance(); 303 } 304 Token.reset(Kind, Range.upto(C)) 305 .setIntegerValue(APSInt(Number)) 306 .setStringValue(Range.upto(C).drop_front(StringOffset)); 307 return C; 308 } 309 310 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 311 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 312 } 313 314 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 315 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 316 } 317 318 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 319 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 320 } 321 322 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 323 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 324 } 325 326 static Cursor maybeLexIRBlock( 327 Cursor C, MIToken &Token, 328 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 329 const StringRef Rule = "%ir-block."; 330 if (!C.remaining().startswith(Rule)) 331 return None; 332 if (isdigit(C.peek(Rule.size()))) 333 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 334 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 335 } 336 337 static Cursor maybeLexIRValue( 338 Cursor C, MIToken &Token, 339 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 340 const StringRef Rule = "%ir."; 341 if (!C.remaining().startswith(Rule)) 342 return None; 343 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 344 } 345 346 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 347 auto Range = C; 348 C.advance(); // Skip '%' 349 auto NumberRange = C; 350 while (isdigit(C.peek())) 351 C.advance(); 352 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 353 .setIntegerValue(APSInt(NumberRange.upto(C))); 354 return C; 355 } 356 357 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 358 if (C.peek() != '%') 359 return None; 360 if (isdigit(C.peek(1))) 361 return lexVirtualRegister(C, Token); 362 auto Range = C; 363 C.advance(); // Skip '%' 364 while (isIdentifierChar(C.peek())) 365 C.advance(); 366 Token.reset(MIToken::NamedRegister, Range.upto(C)) 367 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 368 return C; 369 } 370 371 static Cursor maybeLexGlobalValue( 372 Cursor C, MIToken &Token, 373 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 374 if (C.peek() != '@') 375 return None; 376 if (!isdigit(C.peek(1))) 377 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 378 ErrorCallback); 379 auto Range = C; 380 C.advance(1); // Skip the '@' 381 auto NumberRange = C; 382 while (isdigit(C.peek())) 383 C.advance(); 384 Token.reset(MIToken::GlobalValue, Range.upto(C)) 385 .setIntegerValue(APSInt(NumberRange.upto(C))); 386 return C; 387 } 388 389 static Cursor maybeLexExternalSymbol( 390 Cursor C, MIToken &Token, 391 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 392 if (C.peek() != '$') 393 return None; 394 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 395 ErrorCallback); 396 } 397 398 static bool isValidHexFloatingPointPrefix(char C) { 399 return C == 'H' || C == 'K' || C == 'L' || C == 'M'; 400 } 401 402 static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { 403 if (C.peek() != '0' || C.peek(1) != 'x') 404 return None; 405 Cursor Range = C; 406 C.advance(2); // Skip '0x' 407 if (isValidHexFloatingPointPrefix(C.peek())) 408 C.advance(); 409 while (isxdigit(C.peek())) 410 C.advance(); 411 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 412 return C; 413 } 414 415 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 416 C.advance(); 417 // Skip over [0-9]*([eE][-+]?[0-9]+)? 418 while (isdigit(C.peek())) 419 C.advance(); 420 if ((C.peek() == 'e' || C.peek() == 'E') && 421 (isdigit(C.peek(1)) || 422 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 423 C.advance(2); 424 while (isdigit(C.peek())) 425 C.advance(); 426 } 427 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 428 return C; 429 } 430 431 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 432 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 433 return None; 434 auto Range = C; 435 C.advance(); 436 while (isdigit(C.peek())) 437 C.advance(); 438 if (C.peek() == '.') 439 return lexFloatingPointLiteral(Range, C, Token); 440 StringRef StrVal = Range.upto(C); 441 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 442 return C; 443 } 444 445 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 446 return StringSwitch<MIToken::TokenKind>(Identifier) 447 .Case("!tbaa", MIToken::md_tbaa) 448 .Case("!alias.scope", MIToken::md_alias_scope) 449 .Case("!noalias", MIToken::md_noalias) 450 .Case("!range", MIToken::md_range) 451 .Default(MIToken::Error); 452 } 453 454 static Cursor maybeLexExlaim( 455 Cursor C, MIToken &Token, 456 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 457 if (C.peek() != '!') 458 return None; 459 auto Range = C; 460 C.advance(1); 461 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 462 Token.reset(MIToken::exclaim, Range.upto(C)); 463 return C; 464 } 465 while (isIdentifierChar(C.peek())) 466 C.advance(); 467 StringRef StrVal = Range.upto(C); 468 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 469 if (Token.isError()) 470 ErrorCallback(Token.location(), 471 "use of unknown metadata keyword '" + StrVal + "'"); 472 return C; 473 } 474 475 static MIToken::TokenKind symbolToken(char C) { 476 switch (C) { 477 case ',': 478 return MIToken::comma; 479 case '=': 480 return MIToken::equal; 481 case ':': 482 return MIToken::colon; 483 case '(': 484 return MIToken::lparen; 485 case ')': 486 return MIToken::rparen; 487 case '{': 488 return MIToken::lbrace; 489 case '}': 490 return MIToken::rbrace; 491 case '+': 492 return MIToken::plus; 493 case '-': 494 return MIToken::minus; 495 default: 496 return MIToken::Error; 497 } 498 } 499 500 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 501 MIToken::TokenKind Kind; 502 unsigned Length = 1; 503 if (C.peek() == ':' && C.peek(1) == ':') { 504 Kind = MIToken::coloncolon; 505 Length = 2; 506 } else 507 Kind = symbolToken(C.peek()); 508 if (Kind == MIToken::Error) 509 return None; 510 auto Range = C; 511 C.advance(Length); 512 Token.reset(Kind, Range.upto(C)); 513 return C; 514 } 515 516 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 517 if (!isNewlineChar(C.peek())) 518 return None; 519 auto Range = C; 520 C.advance(); 521 Token.reset(MIToken::Newline, Range.upto(C)); 522 return C; 523 } 524 525 StringRef llvm::lexMIToken( 526 StringRef Source, MIToken &Token, 527 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 528 auto C = skipComment(skipWhitespace(Cursor(Source))); 529 if (C.isEOF()) { 530 Token.reset(MIToken::Eof, C.remaining()); 531 return C.remaining(); 532 } 533 534 if (Cursor R = maybeLexIntegerType(C, Token)) 535 return R.remaining(); 536 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 537 return R.remaining(); 538 if (Cursor R = maybeLexIdentifier(C, Token)) 539 return R.remaining(); 540 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 541 return R.remaining(); 542 if (Cursor R = maybeLexStackObject(C, Token)) 543 return R.remaining(); 544 if (Cursor R = maybeLexFixedStackObject(C, Token)) 545 return R.remaining(); 546 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 547 return R.remaining(); 548 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 549 return R.remaining(); 550 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 551 return R.remaining(); 552 if (Cursor R = maybeLexRegister(C, Token)) 553 return R.remaining(); 554 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 555 return R.remaining(); 556 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 557 return R.remaining(); 558 if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) 559 return R.remaining(); 560 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 561 return R.remaining(); 562 if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) 563 return R.remaining(); 564 if (Cursor R = maybeLexSymbol(C, Token)) 565 return R.remaining(); 566 if (Cursor R = maybeLexNewline(C, Token)) 567 return R.remaining(); 568 569 Token.reset(MIToken::Error, C.remaining()); 570 ErrorCallback(C.location(), 571 Twine("unexpected character '") + Twine(C.peek()) + "'"); 572 return C.remaining(); 573 } 574