1 //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implement the Lexer for TableGen. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "TGLexer.h" 14 #include "llvm/ADT/StringSwitch.h" 15 #include "llvm/ADT/Twine.h" 16 #include "llvm/Config/config.h" // for strtoull()/strtoll() define 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/Support/SourceMgr.h" 20 #include "llvm/TableGen/Error.h" 21 #include <algorithm> 22 #include <cctype> 23 #include <cerrno> 24 #include <cstdint> 25 #include <cstdio> 26 #include <cstdlib> 27 #include <cstring> 28 29 using namespace llvm; 30 31 namespace { 32 // A list of supported preprocessing directives with their 33 // internal token kinds and names. 34 struct { 35 tgtok::TokKind Kind; 36 const char *Word; 37 } PreprocessorDirs[] = { 38 { tgtok::Ifdef, "ifdef" }, 39 { tgtok::Else, "else" }, 40 { tgtok::Endif, "endif" }, 41 { tgtok::Define, "define" } 42 }; 43 } // end anonymous namespace 44 45 TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { 46 CurBuffer = SrcMgr.getMainFileID(); 47 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 48 CurPtr = CurBuf.begin(); 49 TokStart = nullptr; 50 51 // Pretend that we enter the "top-level" include file. 52 PrepIncludeStack.push_back( 53 make_unique<std::vector<PreprocessorControlDesc>>()); 54 55 // Put all macros defined in the command line into the DefinedMacros set. 56 std::for_each(Macros.begin(), Macros.end(), 57 [this](const std::string &MacroName) { 58 DefinedMacros.insert(MacroName); 59 }); 60 } 61 62 SMLoc TGLexer::getLoc() const { 63 return SMLoc::getFromPointer(TokStart); 64 } 65 66 /// ReturnError - Set the error to the specified string at the specified 67 /// location. This is defined to always return tgtok::Error. 68 tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { 69 PrintError(Loc, Msg); 70 return tgtok::Error; 71 } 72 73 tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { 74 return ReturnError(SMLoc::getFromPointer(Loc), Msg); 75 } 76 77 bool TGLexer::processEOF() { 78 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 79 if (ParentIncludeLoc != SMLoc()) { 80 // If prepExitInclude() detects a problem with the preprocessing 81 // control stack, it will return false. Pretend that we reached 82 // the final EOF and stop lexing more tokens by returning false 83 // to LexToken(). 84 if (!prepExitInclude(false)) 85 return false; 86 87 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); 88 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 89 CurPtr = ParentIncludeLoc.getPointer(); 90 // Make sure TokStart points into the parent file's buffer. 91 // LexToken() assigns to it before calling getNextChar(), 92 // so it is pointing into the included file now. 93 TokStart = CurPtr; 94 return true; 95 } 96 97 // Pretend that we exit the "top-level" include file. 98 // Note that in case of an error (e.g. control stack imbalance) 99 // the routine will issue a fatal error. 100 prepExitInclude(true); 101 return false; 102 } 103 104 int TGLexer::getNextChar() { 105 char CurChar = *CurPtr++; 106 switch (CurChar) { 107 default: 108 return (unsigned char)CurChar; 109 case 0: { 110 // A nul character in the stream is either the end of the current buffer or 111 // a random nul in the file. Disambiguate that here. 112 if (CurPtr-1 != CurBuf.end()) 113 return 0; // Just whitespace. 114 115 // Otherwise, return end of file. 116 --CurPtr; // Another call to lex will return EOF again. 117 return EOF; 118 } 119 case '\n': 120 case '\r': 121 // Handle the newline character by ignoring it and incrementing the line 122 // count. However, be careful about 'dos style' files with \n\r in them. 123 // Only treat a \n\r or \r\n as a single line. 124 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 125 *CurPtr != CurChar) 126 ++CurPtr; // Eat the two char newline sequence. 127 return '\n'; 128 } 129 } 130 131 int TGLexer::peekNextChar(int Index) const { 132 return *(CurPtr + Index); 133 } 134 135 tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { 136 TokStart = CurPtr; 137 // This always consumes at least one character. 138 int CurChar = getNextChar(); 139 140 switch (CurChar) { 141 default: 142 // Handle letters: [a-zA-Z_] 143 if (isalpha(CurChar) || CurChar == '_') 144 return LexIdentifier(); 145 146 // Unknown character, emit an error. 147 return ReturnError(TokStart, "Unexpected character"); 148 case EOF: 149 // Lex next token, if we just left an include file. 150 // Note that leaving an include file means that the next 151 // symbol is located at the end of 'include "..."' 152 // construct, so LexToken() is called with default 153 // false parameter. 154 if (processEOF()) 155 return LexToken(); 156 157 // Return EOF denoting the end of lexing. 158 return tgtok::Eof; 159 160 case ':': return tgtok::colon; 161 case ';': return tgtok::semi; 162 case '.': return tgtok::period; 163 case ',': return tgtok::comma; 164 case '<': return tgtok::less; 165 case '>': return tgtok::greater; 166 case ']': return tgtok::r_square; 167 case '{': return tgtok::l_brace; 168 case '}': return tgtok::r_brace; 169 case '(': return tgtok::l_paren; 170 case ')': return tgtok::r_paren; 171 case '=': return tgtok::equal; 172 case '?': return tgtok::question; 173 case '#': 174 if (FileOrLineStart) { 175 tgtok::TokKind Kind = prepIsDirective(); 176 if (Kind != tgtok::Error) 177 return lexPreprocessor(Kind); 178 } 179 180 return tgtok::paste; 181 182 case '\r': 183 PrintFatalError("getNextChar() must never return '\r'"); 184 return tgtok::Error; 185 186 case 0: 187 case ' ': 188 case '\t': 189 // Ignore whitespace. 190 return LexToken(FileOrLineStart); 191 case '\n': 192 // Ignore whitespace, and identify the new line. 193 return LexToken(true); 194 case '/': 195 // If this is the start of a // comment, skip until the end of the line or 196 // the end of the buffer. 197 if (*CurPtr == '/') 198 SkipBCPLComment(); 199 else if (*CurPtr == '*') { 200 if (SkipCComment()) 201 return tgtok::Error; 202 } else // Otherwise, this is an error. 203 return ReturnError(TokStart, "Unexpected character"); 204 return LexToken(FileOrLineStart); 205 case '-': case '+': 206 case '0': case '1': case '2': case '3': case '4': case '5': case '6': 207 case '7': case '8': case '9': { 208 int NextChar = 0; 209 if (isdigit(CurChar)) { 210 // Allow identifiers to start with a number if it is followed by 211 // an identifier. This can happen with paste operations like 212 // foo#8i. 213 int i = 0; 214 do { 215 NextChar = peekNextChar(i++); 216 } while (isdigit(NextChar)); 217 218 if (NextChar == 'x' || NextChar == 'b') { 219 // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most 220 // likely a number. 221 int NextNextChar = peekNextChar(i); 222 switch (NextNextChar) { 223 default: 224 break; 225 case '0': case '1': 226 if (NextChar == 'b') 227 return LexNumber(); 228 LLVM_FALLTHROUGH; 229 case '2': case '3': case '4': case '5': 230 case '6': case '7': case '8': case '9': 231 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 232 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 233 if (NextChar == 'x') 234 return LexNumber(); 235 break; 236 } 237 } 238 } 239 240 if (isalpha(NextChar) || NextChar == '_') 241 return LexIdentifier(); 242 243 return LexNumber(); 244 } 245 case '"': return LexString(); 246 case '$': return LexVarName(); 247 case '[': return LexBracket(); 248 case '!': return LexExclaim(); 249 } 250 } 251 252 /// LexString - Lex "[^"]*" 253 tgtok::TokKind TGLexer::LexString() { 254 const char *StrStart = CurPtr; 255 256 CurStrVal = ""; 257 258 while (*CurPtr != '"') { 259 // If we hit the end of the buffer, report an error. 260 if (*CurPtr == 0 && CurPtr == CurBuf.end()) 261 return ReturnError(StrStart, "End of file in string literal"); 262 263 if (*CurPtr == '\n' || *CurPtr == '\r') 264 return ReturnError(StrStart, "End of line in string literal"); 265 266 if (*CurPtr != '\\') { 267 CurStrVal += *CurPtr++; 268 continue; 269 } 270 271 ++CurPtr; 272 273 switch (*CurPtr) { 274 case '\\': case '\'': case '"': 275 // These turn into their literal character. 276 CurStrVal += *CurPtr++; 277 break; 278 case 't': 279 CurStrVal += '\t'; 280 ++CurPtr; 281 break; 282 case 'n': 283 CurStrVal += '\n'; 284 ++CurPtr; 285 break; 286 287 case '\n': 288 case '\r': 289 return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); 290 291 // If we hit the end of the buffer, report an error. 292 case '\0': 293 if (CurPtr == CurBuf.end()) 294 return ReturnError(StrStart, "End of file in string literal"); 295 LLVM_FALLTHROUGH; 296 default: 297 return ReturnError(CurPtr, "invalid escape in string literal"); 298 } 299 } 300 301 ++CurPtr; 302 return tgtok::StrVal; 303 } 304 305 tgtok::TokKind TGLexer::LexVarName() { 306 if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') 307 return ReturnError(TokStart, "Invalid variable name"); 308 309 // Otherwise, we're ok, consume the rest of the characters. 310 const char *VarNameStart = CurPtr++; 311 312 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 313 ++CurPtr; 314 315 CurStrVal.assign(VarNameStart, CurPtr); 316 return tgtok::VarName; 317 } 318 319 tgtok::TokKind TGLexer::LexIdentifier() { 320 // The first letter is [a-zA-Z_]. 321 const char *IdentStart = TokStart; 322 323 // Match the rest of the identifier regex: [0-9a-zA-Z_]* 324 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 325 ++CurPtr; 326 327 // Check to see if this identifier is a keyword. 328 StringRef Str(IdentStart, CurPtr-IdentStart); 329 330 if (Str == "include") { 331 if (LexInclude()) return tgtok::Error; 332 return Lex(); 333 } 334 335 tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) 336 .Case("int", tgtok::Int) 337 .Case("bit", tgtok::Bit) 338 .Case("bits", tgtok::Bits) 339 .Case("string", tgtok::String) 340 .Case("list", tgtok::List) 341 .Case("code", tgtok::Code) 342 .Case("dag", tgtok::Dag) 343 .Case("class", tgtok::Class) 344 .Case("def", tgtok::Def) 345 .Case("foreach", tgtok::Foreach) 346 .Case("defm", tgtok::Defm) 347 .Case("defset", tgtok::Defset) 348 .Case("multiclass", tgtok::MultiClass) 349 .Case("field", tgtok::Field) 350 .Case("let", tgtok::Let) 351 .Case("in", tgtok::In) 352 .Default(tgtok::Id); 353 354 if (Kind == tgtok::Id) 355 CurStrVal.assign(Str.begin(), Str.end()); 356 return Kind; 357 } 358 359 /// LexInclude - We just read the "include" token. Get the string token that 360 /// comes next and enter the include. 361 bool TGLexer::LexInclude() { 362 // The token after the include must be a string. 363 tgtok::TokKind Tok = LexToken(); 364 if (Tok == tgtok::Error) return true; 365 if (Tok != tgtok::StrVal) { 366 PrintError(getLoc(), "Expected filename after include"); 367 return true; 368 } 369 370 // Get the string. 371 std::string Filename = CurStrVal; 372 std::string IncludedFile; 373 374 CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), 375 IncludedFile); 376 if (!CurBuffer) { 377 PrintError(getLoc(), "Could not find include file '" + Filename + "'"); 378 return true; 379 } 380 381 DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile); 382 if (Found != Dependencies.end()) { 383 PrintError(getLoc(), 384 "File '" + IncludedFile + "' has already been included."); 385 SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note, 386 "previously included here"); 387 return true; 388 } 389 Dependencies.insert(std::make_pair(IncludedFile, getLoc())); 390 // Save the line number and lex buffer of the includer. 391 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 392 CurPtr = CurBuf.begin(); 393 394 PrepIncludeStack.push_back( 395 make_unique<std::vector<PreprocessorControlDesc>>()); 396 return false; 397 } 398 399 void TGLexer::SkipBCPLComment() { 400 ++CurPtr; // skip the second slash. 401 while (true) { 402 switch (*CurPtr) { 403 case '\n': 404 case '\r': 405 return; // Newline is end of comment. 406 case 0: 407 // If this is the end of the buffer, end the comment. 408 if (CurPtr == CurBuf.end()) 409 return; 410 break; 411 } 412 // Otherwise, skip the character. 413 ++CurPtr; 414 } 415 } 416 417 /// SkipCComment - This skips C-style /**/ comments. The only difference from C 418 /// is that we allow nesting. 419 bool TGLexer::SkipCComment() { 420 ++CurPtr; // skip the star. 421 unsigned CommentDepth = 1; 422 423 while (true) { 424 int CurChar = getNextChar(); 425 switch (CurChar) { 426 case EOF: 427 PrintError(TokStart, "Unterminated comment!"); 428 return true; 429 case '*': 430 // End of the comment? 431 if (CurPtr[0] != '/') break; 432 433 ++CurPtr; // End the */. 434 if (--CommentDepth == 0) 435 return false; 436 break; 437 case '/': 438 // Start of a nested comment? 439 if (CurPtr[0] != '*') break; 440 ++CurPtr; 441 ++CommentDepth; 442 break; 443 } 444 } 445 } 446 447 /// LexNumber - Lex: 448 /// [-+]?[0-9]+ 449 /// 0x[0-9a-fA-F]+ 450 /// 0b[01]+ 451 tgtok::TokKind TGLexer::LexNumber() { 452 if (CurPtr[-1] == '0') { 453 if (CurPtr[0] == 'x') { 454 ++CurPtr; 455 const char *NumStart = CurPtr; 456 while (isxdigit(CurPtr[0])) 457 ++CurPtr; 458 459 // Requires at least one hex digit. 460 if (CurPtr == NumStart) 461 return ReturnError(TokStart, "Invalid hexadecimal number"); 462 463 errno = 0; 464 CurIntVal = strtoll(NumStart, nullptr, 16); 465 if (errno == EINVAL) 466 return ReturnError(TokStart, "Invalid hexadecimal number"); 467 if (errno == ERANGE) { 468 errno = 0; 469 CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16); 470 if (errno == EINVAL) 471 return ReturnError(TokStart, "Invalid hexadecimal number"); 472 if (errno == ERANGE) 473 return ReturnError(TokStart, "Hexadecimal number out of range"); 474 } 475 return tgtok::IntVal; 476 } else if (CurPtr[0] == 'b') { 477 ++CurPtr; 478 const char *NumStart = CurPtr; 479 while (CurPtr[0] == '0' || CurPtr[0] == '1') 480 ++CurPtr; 481 482 // Requires at least one binary digit. 483 if (CurPtr == NumStart) 484 return ReturnError(CurPtr-2, "Invalid binary number"); 485 CurIntVal = strtoll(NumStart, nullptr, 2); 486 return tgtok::BinaryIntVal; 487 } 488 } 489 490 // Check for a sign without a digit. 491 if (!isdigit(CurPtr[0])) { 492 if (CurPtr[-1] == '-') 493 return tgtok::minus; 494 else if (CurPtr[-1] == '+') 495 return tgtok::plus; 496 } 497 498 while (isdigit(CurPtr[0])) 499 ++CurPtr; 500 CurIntVal = strtoll(TokStart, nullptr, 10); 501 return tgtok::IntVal; 502 } 503 504 /// LexBracket - We just read '['. If this is a code block, return it, 505 /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' 506 tgtok::TokKind TGLexer::LexBracket() { 507 if (CurPtr[0] != '{') 508 return tgtok::l_square; 509 ++CurPtr; 510 const char *CodeStart = CurPtr; 511 while (true) { 512 int Char = getNextChar(); 513 if (Char == EOF) break; 514 515 if (Char != '}') continue; 516 517 Char = getNextChar(); 518 if (Char == EOF) break; 519 if (Char == ']') { 520 CurStrVal.assign(CodeStart, CurPtr-2); 521 return tgtok::CodeFragment; 522 } 523 } 524 525 return ReturnError(CodeStart-2, "Unterminated Code Block"); 526 } 527 528 /// LexExclaim - Lex '!' and '![a-zA-Z]+'. 529 tgtok::TokKind TGLexer::LexExclaim() { 530 if (!isalpha(*CurPtr)) 531 return ReturnError(CurPtr - 1, "Invalid \"!operator\""); 532 533 const char *Start = CurPtr++; 534 while (isalpha(*CurPtr)) 535 ++CurPtr; 536 537 // Check to see which operator this is. 538 tgtok::TokKind Kind = 539 StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) 540 .Case("eq", tgtok::XEq) 541 .Case("ne", tgtok::XNe) 542 .Case("le", tgtok::XLe) 543 .Case("lt", tgtok::XLt) 544 .Case("ge", tgtok::XGe) 545 .Case("gt", tgtok::XGt) 546 .Case("if", tgtok::XIf) 547 .Case("cond", tgtok::XCond) 548 .Case("isa", tgtok::XIsA) 549 .Case("head", tgtok::XHead) 550 .Case("tail", tgtok::XTail) 551 .Case("size", tgtok::XSize) 552 .Case("con", tgtok::XConcat) 553 .Case("dag", tgtok::XDag) 554 .Case("add", tgtok::XADD) 555 .Case("mul", tgtok::XMUL) 556 .Case("and", tgtok::XAND) 557 .Case("or", tgtok::XOR) 558 .Case("shl", tgtok::XSHL) 559 .Case("sra", tgtok::XSRA) 560 .Case("srl", tgtok::XSRL) 561 .Case("cast", tgtok::XCast) 562 .Case("empty", tgtok::XEmpty) 563 .Case("subst", tgtok::XSubst) 564 .Case("foldl", tgtok::XFoldl) 565 .Case("foreach", tgtok::XForEach) 566 .Case("listconcat", tgtok::XListConcat) 567 .Case("strconcat", tgtok::XStrConcat) 568 .Default(tgtok::Error); 569 570 return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); 571 } 572 573 bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { 574 // Report an error, if preprocessor control stack for the current 575 // file is not empty. 576 if (!PrepIncludeStack.back()->empty()) { 577 prepReportPreprocessorStackError(); 578 579 return false; 580 } 581 582 // Pop the preprocessing controls from the include stack. 583 if (PrepIncludeStack.empty()) { 584 PrintFatalError("Preprocessor include stack is empty"); 585 } 586 587 PrepIncludeStack.pop_back(); 588 589 if (IncludeStackMustBeEmpty) { 590 if (!PrepIncludeStack.empty()) 591 PrintFatalError("Preprocessor include stack is not empty"); 592 } else { 593 if (PrepIncludeStack.empty()) 594 PrintFatalError("Preprocessor include stack is empty"); 595 } 596 597 return true; 598 } 599 600 tgtok::TokKind TGLexer::prepIsDirective() const { 601 for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) { 602 int NextChar = *CurPtr; 603 bool Match = true; 604 unsigned I = 0; 605 for (; I < strlen(PreprocessorDirs[ID].Word); ++I) { 606 if (NextChar != PreprocessorDirs[ID].Word[I]) { 607 Match = false; 608 break; 609 } 610 611 NextChar = peekNextChar(I + 1); 612 } 613 614 // Check for whitespace after the directive. If there is no whitespace, 615 // then we do not recognize it as a preprocessing directive. 616 if (Match) { 617 tgtok::TokKind Kind = PreprocessorDirs[ID].Kind; 618 619 // New line and EOF may follow only #else/#endif. It will be reported 620 // as an error for #ifdef/#define after the call to prepLexMacroName(). 621 if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || 622 NextChar == '\n' || 623 // It looks like TableGen does not support '\r' as the actual 624 // carriage return, e.g. getNextChar() treats a single '\r' 625 // as '\n'. So we do the same here. 626 NextChar == '\r') 627 return Kind; 628 629 // Allow comments after some directives, e.g.: 630 // #else// OR #else/**/ 631 // #endif// OR #endif/**/ 632 // 633 // Note that we do allow comments after #ifdef/#define here, e.g. 634 // #ifdef/**/ AND #ifdef// 635 // #define/**/ AND #define// 636 // 637 // These cases will be reported as incorrect after calling 638 // prepLexMacroName(). We could have supported C-style comments 639 // after #ifdef/#define, but this would complicate the code 640 // for little benefit. 641 if (NextChar == '/') { 642 NextChar = peekNextChar(I + 1); 643 644 if (NextChar == '*' || NextChar == '/') 645 return Kind; 646 647 // Pretend that we do not recognize the directive. 648 } 649 } 650 } 651 652 return tgtok::Error; 653 } 654 655 bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { 656 TokStart = CurPtr; 657 658 for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) 659 if (PreprocessorDirs[ID].Kind == Kind) { 660 // Advance CurPtr to the end of the preprocessing word. 661 CurPtr += strlen(PreprocessorDirs[ID].Word); 662 return true; 663 } 664 665 PrintFatalError("Unsupported preprocessing token in " 666 "prepEatPreprocessorDirective()"); 667 return false; 668 } 669 670 tgtok::TokKind TGLexer::lexPreprocessor( 671 tgtok::TokKind Kind, bool ReturnNextLiveToken) { 672 673 // We must be looking at a preprocessing directive. Eat it! 674 if (!prepEatPreprocessorDirective(Kind)) 675 PrintFatalError("lexPreprocessor() called for unknown " 676 "preprocessor directive"); 677 678 if (Kind == tgtok::Ifdef) { 679 StringRef MacroName = prepLexMacroName(); 680 if (MacroName.empty()) 681 return ReturnError(TokStart, "Expected macro name after #ifdef"); 682 683 bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; 684 685 // Regardless of whether we are processing tokens or not, 686 // we put the #ifdef control on stack. 687 PrepIncludeStack.back()->push_back( 688 {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); 689 690 if (!prepSkipDirectiveEnd()) 691 return ReturnError(CurPtr, 692 "Only comments are supported after #ifdef NAME"); 693 694 // If we were not processing tokens before this #ifdef, 695 // then just return back to the lines skipping code. 696 if (!ReturnNextLiveToken) 697 return Kind; 698 699 // If we were processing tokens before this #ifdef, 700 // and the macro is defined, then just return the next token. 701 if (MacroIsDefined) 702 return LexToken(); 703 704 // We were processing tokens before this #ifdef, and the macro 705 // is not defined, so we have to start skipping the lines. 706 // If the skipping is successful, it will return the token following 707 // either #else or #endif corresponding to this #ifdef. 708 if (prepSkipRegion(ReturnNextLiveToken)) 709 return LexToken(); 710 711 return tgtok::Error; 712 } else if (Kind == tgtok::Else) { 713 // Check if this #else is correct before calling prepSkipDirectiveEnd(), 714 // which will move CurPtr away from the beginning of #else. 715 if (PrepIncludeStack.back()->empty()) 716 return ReturnError(TokStart, "#else without #ifdef"); 717 718 PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back(); 719 720 if (IfdefEntry.Kind != tgtok::Ifdef) { 721 PrintError(TokStart, "double #else"); 722 return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); 723 } 724 725 // Replace the corresponding #ifdef's control with its negation 726 // on the control stack. 727 PrepIncludeStack.back()->pop_back(); 728 PrepIncludeStack.back()->push_back( 729 {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); 730 731 if (!prepSkipDirectiveEnd()) 732 return ReturnError(CurPtr, "Only comments are supported after #else"); 733 734 // If we were processing tokens before this #else, 735 // we have to start skipping lines until the matching #endif. 736 if (ReturnNextLiveToken) { 737 if (prepSkipRegion(ReturnNextLiveToken)) 738 return LexToken(); 739 740 return tgtok::Error; 741 } 742 743 // Return to the lines skipping code. 744 return Kind; 745 } else if (Kind == tgtok::Endif) { 746 // Check if this #endif is correct before calling prepSkipDirectiveEnd(), 747 // which will move CurPtr away from the beginning of #endif. 748 if (PrepIncludeStack.back()->empty()) 749 return ReturnError(TokStart, "#endif without #ifdef"); 750 751 auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); 752 753 if (IfdefOrElseEntry.Kind != tgtok::Ifdef && 754 IfdefOrElseEntry.Kind != tgtok::Else) { 755 PrintFatalError("Invalid preprocessor control on the stack"); 756 return tgtok::Error; 757 } 758 759 if (!prepSkipDirectiveEnd()) 760 return ReturnError(CurPtr, "Only comments are supported after #endif"); 761 762 PrepIncludeStack.back()->pop_back(); 763 764 // If we were processing tokens before this #endif, then 765 // we should continue it. 766 if (ReturnNextLiveToken) { 767 return LexToken(); 768 } 769 770 // Return to the lines skipping code. 771 return Kind; 772 } else if (Kind == tgtok::Define) { 773 StringRef MacroName = prepLexMacroName(); 774 if (MacroName.empty()) 775 return ReturnError(TokStart, "Expected macro name after #define"); 776 777 if (!DefinedMacros.insert(MacroName).second) 778 PrintWarning(getLoc(), 779 "Duplicate definition of macro: " + Twine(MacroName)); 780 781 if (!prepSkipDirectiveEnd()) 782 return ReturnError(CurPtr, 783 "Only comments are supported after #define NAME"); 784 785 if (!ReturnNextLiveToken) { 786 PrintFatalError("#define must be ignored during the lines skipping"); 787 return tgtok::Error; 788 } 789 790 return LexToken(); 791 } 792 793 PrintFatalError("Preprocessing directive is not supported"); 794 return tgtok::Error; 795 } 796 797 bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { 798 if (!MustNeverBeFalse) 799 PrintFatalError("Invalid recursion."); 800 801 do { 802 // Skip all symbols to the line end. 803 prepSkipToLineEnd(); 804 805 // Find the first non-whitespace symbol in the next line(s). 806 if (!prepSkipLineBegin()) 807 return false; 808 809 // If the first non-blank/comment symbol on the line is '#', 810 // it may be a start of preprocessing directive. 811 // 812 // If it is not '#' just go to the next line. 813 if (*CurPtr == '#') 814 ++CurPtr; 815 else 816 continue; 817 818 tgtok::TokKind Kind = prepIsDirective(); 819 820 // If we did not find a preprocessing directive or it is #define, 821 // then just skip to the next line. We do not have to do anything 822 // for #define in the line-skipping mode. 823 if (Kind == tgtok::Error || Kind == tgtok::Define) 824 continue; 825 826 tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); 827 828 // If lexPreprocessor() encountered an error during lexing this 829 // preprocessor idiom, then return false to the calling lexPreprocessor(). 830 // This will force tgtok::Error to be returned to the tokens processing. 831 if (ProcessedKind == tgtok::Error) 832 return false; 833 834 if (Kind != ProcessedKind) 835 PrintFatalError("prepIsDirective() and lexPreprocessor() " 836 "returned different token kinds"); 837 838 // If this preprocessing directive enables tokens processing, 839 // then return to the lexPreprocessor() and get to the next token. 840 // We can move from line-skipping mode to processing tokens only 841 // due to #else or #endif. 842 if (prepIsProcessingEnabled()) { 843 if (Kind != tgtok::Else && Kind != tgtok::Endif) { 844 PrintFatalError("Tokens processing was enabled by an unexpected " 845 "preprocessing directive"); 846 return false; 847 } 848 849 return true; 850 } 851 } while (CurPtr != CurBuf.end()); 852 853 // We have reached the end of the file, but never left the lines-skipping 854 // mode. This means there is no matching #endif. 855 prepReportPreprocessorStackError(); 856 return false; 857 } 858 859 StringRef TGLexer::prepLexMacroName() { 860 // Skip whitespaces between the preprocessing directive and the macro name. 861 while (*CurPtr == ' ' || *CurPtr == '\t') 862 ++CurPtr; 863 864 TokStart = CurPtr; 865 // Macro names start with [a-zA-Z_]. 866 if (*CurPtr != '_' && !isalpha(*CurPtr)) 867 return ""; 868 869 // Match the rest of the identifier regex: [0-9a-zA-Z_]* 870 while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 871 ++CurPtr; 872 873 return StringRef(TokStart, CurPtr - TokStart); 874 } 875 876 bool TGLexer::prepSkipLineBegin() { 877 while (CurPtr != CurBuf.end()) { 878 switch (*CurPtr) { 879 case ' ': 880 case '\t': 881 case '\n': 882 case '\r': 883 break; 884 885 case '/': { 886 int NextChar = peekNextChar(1); 887 if (NextChar == '*') { 888 // Skip C-style comment. 889 // Note that we do not care about skipping the C++-style comments. 890 // If the line contains "//", it may not contain any processable 891 // preprocessing directive. Just return CurPtr pointing to 892 // the first '/' in this case. We also do not care about 893 // incorrect symbols after the first '/' - we are in lines-skipping 894 // mode, so incorrect code is allowed to some extent. 895 896 // Set TokStart to the beginning of the comment to enable proper 897 // diagnostic printing in case of error in SkipCComment(). 898 TokStart = CurPtr; 899 900 // CurPtr must point to '*' before call to SkipCComment(). 901 ++CurPtr; 902 if (SkipCComment()) 903 return false; 904 } else { 905 // CurPtr points to the non-whitespace '/'. 906 return true; 907 } 908 909 // We must not increment CurPtr after the comment was lexed. 910 continue; 911 } 912 913 default: 914 return true; 915 } 916 917 ++CurPtr; 918 } 919 920 // We have reached the end of the file. Return to the lines skipping 921 // code, and allow it to handle the EOF as needed. 922 return true; 923 } 924 925 bool TGLexer::prepSkipDirectiveEnd() { 926 while (CurPtr != CurBuf.end()) { 927 switch (*CurPtr) { 928 case ' ': 929 case '\t': 930 break; 931 932 case '\n': 933 case '\r': 934 return true; 935 936 case '/': { 937 int NextChar = peekNextChar(1); 938 if (NextChar == '/') { 939 // Skip C++-style comment. 940 // We may just return true now, but let's skip to the line/buffer end 941 // to simplify the method specification. 942 ++CurPtr; 943 SkipBCPLComment(); 944 } else if (NextChar == '*') { 945 // When we are skipping C-style comment at the end of a preprocessing 946 // directive, we can skip several lines. If any meaningful TD token 947 // follows the end of the C-style comment on the same line, it will 948 // be considered as an invalid usage of TD token. 949 // For example, we want to forbid usages like this one: 950 // #define MACRO class Class {} 951 // But with C-style comments we also disallow the following: 952 // #define MACRO /* This macro is used 953 // to ... */ class Class {} 954 // One can argue that this should be allowed, but it does not seem 955 // to be worth of the complication. Moreover, this matches 956 // the C preprocessor behavior. 957 958 // Set TokStart to the beginning of the comment to enable proper 959 // diagnostic printer in case of error in SkipCComment(). 960 TokStart = CurPtr; 961 ++CurPtr; 962 if (SkipCComment()) 963 return false; 964 } else { 965 TokStart = CurPtr; 966 PrintError(CurPtr, "Unexpected character"); 967 return false; 968 } 969 970 // We must not increment CurPtr after the comment was lexed. 971 continue; 972 } 973 974 default: 975 // Do not allow any non-whitespaces after the directive. 976 TokStart = CurPtr; 977 return false; 978 } 979 980 ++CurPtr; 981 } 982 983 return true; 984 } 985 986 void TGLexer::prepSkipToLineEnd() { 987 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) 988 ++CurPtr; 989 } 990 991 bool TGLexer::prepIsProcessingEnabled() { 992 for (auto I = PrepIncludeStack.back()->rbegin(), 993 E = PrepIncludeStack.back()->rend(); 994 I != E; ++I) { 995 if (!I->IsDefined) 996 return false; 997 } 998 999 return true; 1000 } 1001 1002 void TGLexer::prepReportPreprocessorStackError() { 1003 if (PrepIncludeStack.back()->empty()) 1004 PrintFatalError("prepReportPreprocessorStackError() called with " 1005 "empty control stack"); 1006 1007 auto &PrepControl = PrepIncludeStack.back()->back(); 1008 PrintError(CurBuf.end(), "Reached EOF without matching #endif"); 1009 PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); 1010 1011 TokStart = CurPtr; 1012 } 1013