184c287e3SPeter Collingbourne //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
284c287e3SPeter Collingbourne //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
684c287e3SPeter Collingbourne //
784c287e3SPeter Collingbourne //===----------------------------------------------------------------------===//
884c287e3SPeter Collingbourne //
984c287e3SPeter Collingbourne // Implement the Lexer for TableGen.
1084c287e3SPeter Collingbourne //
1184c287e3SPeter Collingbourne //===----------------------------------------------------------------------===//
1284c287e3SPeter Collingbourne 
1384c287e3SPeter Collingbourne #include "TGLexer.h"
140caaf402SSimon Pilgrim #include "llvm/ADT/ArrayRef.h"
1584c287e3SPeter Collingbourne #include "llvm/ADT/StringSwitch.h"
1684c287e3SPeter Collingbourne #include "llvm/ADT/Twine.h"
17ed0881b2SChandler Carruth #include "llvm/Config/config.h" // for strtoull()/strtoll() define
1833d7b762SEugene Zelenko #include "llvm/Support/Compiler.h"
19ed0881b2SChandler Carruth #include "llvm/Support/MemoryBuffer.h"
20ed0881b2SChandler Carruth #include "llvm/Support/SourceMgr.h"
21ed0881b2SChandler Carruth #include "llvm/TableGen/Error.h"
22f7d079e9SVyacheslav Zakharin #include <algorithm>
2384c287e3SPeter Collingbourne #include <cctype>
24ed0881b2SChandler Carruth #include <cerrno>
2533d7b762SEugene Zelenko #include <cstdint>
2684c287e3SPeter Collingbourne #include <cstdio>
2784c287e3SPeter Collingbourne #include <cstdlib>
2884c287e3SPeter Collingbourne #include <cstring>
29345b7430SDylan Noblesmith 
3084c287e3SPeter Collingbourne using namespace llvm;
3184c287e3SPeter Collingbourne 
32f7d079e9SVyacheslav Zakharin namespace {
33f7d079e9SVyacheslav Zakharin // A list of supported preprocessing directives with their
34f7d079e9SVyacheslav Zakharin // internal token kinds and names.
35f7d079e9SVyacheslav Zakharin struct {
36f7d079e9SVyacheslav Zakharin   tgtok::TokKind Kind;
37f7d079e9SVyacheslav Zakharin   const char *Word;
38f7d079e9SVyacheslav Zakharin } PreprocessorDirs[] = {
39f7d079e9SVyacheslav Zakharin   { tgtok::Ifdef, "ifdef" },
40717b62a1STim Northover   { tgtok::Ifndef, "ifndef" },
41f7d079e9SVyacheslav Zakharin   { tgtok::Else, "else" },
42f7d079e9SVyacheslav Zakharin   { tgtok::Endif, "endif" },
43f7d079e9SVyacheslav Zakharin   { tgtok::Define, "define" }
44f7d079e9SVyacheslav Zakharin };
45f7d079e9SVyacheslav Zakharin } // end anonymous namespace
46f7d079e9SVyacheslav Zakharin 
TGLexer(SourceMgr & SM,ArrayRef<std::string> Macros)47f7d079e9SVyacheslav Zakharin TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
48a55b95b5SAlp Toker   CurBuffer = SrcMgr.getMainFileID();
49a3c65096SRafael Espindola   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
50a3c65096SRafael Espindola   CurPtr = CurBuf.begin();
51011817a0SCraig Topper   TokStart = nullptr;
52f7d079e9SVyacheslav Zakharin 
53f7d079e9SVyacheslav Zakharin   // Pretend that we enter the "top-level" include file.
54f7d079e9SVyacheslav Zakharin   PrepIncludeStack.push_back(
550eaee545SJonas Devlieghere       std::make_unique<std::vector<PreprocessorControlDesc>>());
56f7d079e9SVyacheslav Zakharin 
57f7d079e9SVyacheslav Zakharin   // Put all macros defined in the command line into the DefinedMacros set.
583b9707dbSKazu Hirata   for (const std::string &MacroName : Macros)
59f7d079e9SVyacheslav Zakharin     DefinedMacros.insert(MacroName);
6084c287e3SPeter Collingbourne }
6184c287e3SPeter Collingbourne 
getLoc() const6284c287e3SPeter Collingbourne SMLoc TGLexer::getLoc() const {
6384c287e3SPeter Collingbourne   return SMLoc::getFromPointer(TokStart);
6484c287e3SPeter Collingbourne }
6584c287e3SPeter Collingbourne 
6684c287e3SPeter Collingbourne /// ReturnError - Set the error to the specified string at the specified
6784c287e3SPeter Collingbourne /// location.  This is defined to always return tgtok::Error.
ReturnError(SMLoc Loc,const Twine & Msg)68f7d079e9SVyacheslav Zakharin tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
6984c287e3SPeter Collingbourne   PrintError(Loc, Msg);
7084c287e3SPeter Collingbourne   return tgtok::Error;
7184c287e3SPeter Collingbourne }
7284c287e3SPeter Collingbourne 
ReturnError(const char * Loc,const Twine & Msg)73f7d079e9SVyacheslav Zakharin tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
74f7d079e9SVyacheslav Zakharin   return ReturnError(SMLoc::getFromPointer(Loc), Msg);
75f7d079e9SVyacheslav Zakharin }
76f7d079e9SVyacheslav Zakharin 
processEOF()77f7d079e9SVyacheslav Zakharin bool TGLexer::processEOF() {
78f7d079e9SVyacheslav Zakharin   SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
79f7d079e9SVyacheslav Zakharin   if (ParentIncludeLoc != SMLoc()) {
80f7d079e9SVyacheslav Zakharin     // If prepExitInclude() detects a problem with the preprocessing
81f7d079e9SVyacheslav Zakharin     // control stack, it will return false.  Pretend that we reached
82f7d079e9SVyacheslav Zakharin     // the final EOF and stop lexing more tokens by returning false
83f7d079e9SVyacheslav Zakharin     // to LexToken().
84f7d079e9SVyacheslav Zakharin     if (!prepExitInclude(false))
85f7d079e9SVyacheslav Zakharin       return false;
86f7d079e9SVyacheslav Zakharin 
87f7d079e9SVyacheslav Zakharin     CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
88f7d079e9SVyacheslav Zakharin     CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
89f7d079e9SVyacheslav Zakharin     CurPtr = ParentIncludeLoc.getPointer();
90f7d079e9SVyacheslav Zakharin     // Make sure TokStart points into the parent file's buffer.
91f7d079e9SVyacheslav Zakharin     // LexToken() assigns to it before calling getNextChar(),
92f7d079e9SVyacheslav Zakharin     // so it is pointing into the included file now.
93f7d079e9SVyacheslav Zakharin     TokStart = CurPtr;
94f7d079e9SVyacheslav Zakharin     return true;
95f7d079e9SVyacheslav Zakharin   }
96f7d079e9SVyacheslav Zakharin 
97f7d079e9SVyacheslav Zakharin   // Pretend that we exit the "top-level" include file.
98f7d079e9SVyacheslav Zakharin   // Note that in case of an error (e.g. control stack imbalance)
99f7d079e9SVyacheslav Zakharin   // the routine will issue a fatal error.
100f7d079e9SVyacheslav Zakharin   prepExitInclude(true);
101f7d079e9SVyacheslav Zakharin   return false;
102f7d079e9SVyacheslav Zakharin }
103f7d079e9SVyacheslav Zakharin 
getNextChar()10484c287e3SPeter Collingbourne int TGLexer::getNextChar() {
10584c287e3SPeter Collingbourne   char CurChar = *CurPtr++;
10684c287e3SPeter Collingbourne   switch (CurChar) {
10784c287e3SPeter Collingbourne   default:
10884c287e3SPeter Collingbourne     return (unsigned char)CurChar;
10946402eb1SPaul C. Anagnostopoulos 
110fe9101c3SPaul C. Anagnostopoulos   case 0: {
111fe9101c3SPaul C. Anagnostopoulos     // A NUL character in the stream is either the end of the current buffer or
112fe9101c3SPaul C. Anagnostopoulos     // a spurious NUL in the file.  Disambiguate that here.
113fe9101c3SPaul C. Anagnostopoulos     if (CurPtr - 1 == CurBuf.end()) {
114fe9101c3SPaul C. Anagnostopoulos       --CurPtr; // Arrange for another call to return EOF again.
11584c287e3SPeter Collingbourne       return EOF;
11684c287e3SPeter Collingbourne     }
117fe9101c3SPaul C. Anagnostopoulos     PrintError(getLoc(),
118fe9101c3SPaul C. Anagnostopoulos                "NUL character is invalid in source; treated as space");
119fe9101c3SPaul C. Anagnostopoulos     return ' ';
120fe9101c3SPaul C. Anagnostopoulos   }
121fe9101c3SPaul C. Anagnostopoulos 
12284c287e3SPeter Collingbourne   case '\n':
12384c287e3SPeter Collingbourne   case '\r':
12484c287e3SPeter Collingbourne     // Handle the newline character by ignoring it and incrementing the line
12584c287e3SPeter Collingbourne     // count.  However, be careful about 'dos style' files with \n\r in them.
12684c287e3SPeter Collingbourne     // Only treat a \n\r or \r\n as a single line.
12784c287e3SPeter Collingbourne     if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
12884c287e3SPeter Collingbourne         *CurPtr != CurChar)
12984c287e3SPeter Collingbourne       ++CurPtr;  // Eat the two char newline sequence.
13084c287e3SPeter Collingbourne     return '\n';
13184c287e3SPeter Collingbourne   }
13284c287e3SPeter Collingbourne }
13384c287e3SPeter Collingbourne 
peekNextChar(int Index) const134f7d079e9SVyacheslav Zakharin int TGLexer::peekNextChar(int Index) const {
1359ba42085SDavid Greene   return *(CurPtr + Index);
1369ba42085SDavid Greene }
1379ba42085SDavid Greene 
LexToken(bool FileOrLineStart)138f7d079e9SVyacheslav Zakharin tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
13984c287e3SPeter Collingbourne   TokStart = CurPtr;
14084c287e3SPeter Collingbourne   // This always consumes at least one character.
14184c287e3SPeter Collingbourne   int CurChar = getNextChar();
14284c287e3SPeter Collingbourne 
14384c287e3SPeter Collingbourne   switch (CurChar) {
14484c287e3SPeter Collingbourne   default:
1458e85b480SDavid Greene     // Handle letters: [a-zA-Z_]
1468e85b480SDavid Greene     if (isalpha(CurChar) || CurChar == '_')
14784c287e3SPeter Collingbourne       return LexIdentifier();
14884c287e3SPeter Collingbourne 
14984c287e3SPeter Collingbourne     // Unknown character, emit an error.
15084c287e3SPeter Collingbourne     return ReturnError(TokStart, "Unexpected character");
151f7d079e9SVyacheslav Zakharin   case EOF:
152f7d079e9SVyacheslav Zakharin     // Lex next token, if we just left an include file.
153f7d079e9SVyacheslav Zakharin     // Note that leaving an include file means that the next
154ae2cb4f4SPaul C. Anagnostopoulos     // symbol is located at the end of the 'include "..."'
155f7d079e9SVyacheslav Zakharin     // construct, so LexToken() is called with default
156f7d079e9SVyacheslav Zakharin     // false parameter.
157f7d079e9SVyacheslav Zakharin     if (processEOF())
158f7d079e9SVyacheslav Zakharin       return LexToken();
159f7d079e9SVyacheslav Zakharin 
160f7d079e9SVyacheslav Zakharin     // Return EOF denoting the end of lexing.
161f7d079e9SVyacheslav Zakharin     return tgtok::Eof;
162f7d079e9SVyacheslav Zakharin 
16384c287e3SPeter Collingbourne   case ':': return tgtok::colon;
16484c287e3SPeter Collingbourne   case ';': return tgtok::semi;
16584c287e3SPeter Collingbourne   case ',': return tgtok::comma;
16684c287e3SPeter Collingbourne   case '<': return tgtok::less;
16784c287e3SPeter Collingbourne   case '>': return tgtok::greater;
16884c287e3SPeter Collingbourne   case ']': return tgtok::r_square;
16984c287e3SPeter Collingbourne   case '{': return tgtok::l_brace;
17084c287e3SPeter Collingbourne   case '}': return tgtok::r_brace;
17184c287e3SPeter Collingbourne   case '(': return tgtok::l_paren;
17284c287e3SPeter Collingbourne   case ')': return tgtok::r_paren;
17384c287e3SPeter Collingbourne   case '=': return tgtok::equal;
17484c287e3SPeter Collingbourne   case '?': return tgtok::question;
175f7d079e9SVyacheslav Zakharin   case '#':
176f7d079e9SVyacheslav Zakharin     if (FileOrLineStart) {
177f7d079e9SVyacheslav Zakharin       tgtok::TokKind Kind = prepIsDirective();
178f7d079e9SVyacheslav Zakharin       if (Kind != tgtok::Error)
179f7d079e9SVyacheslav Zakharin         return lexPreprocessor(Kind);
180f7d079e9SVyacheslav Zakharin     }
181f7d079e9SVyacheslav Zakharin 
182f7d079e9SVyacheslav Zakharin     return tgtok::paste;
183f7d079e9SVyacheslav Zakharin 
184196e6f9fSPaul C. Anagnostopoulos   // The period is a separate case so we can recognize the "..."
185196e6f9fSPaul C. Anagnostopoulos   // range punctuator.
186196e6f9fSPaul C. Anagnostopoulos   case '.':
187196e6f9fSPaul C. Anagnostopoulos     if (peekNextChar(0) == '.') {
188196e6f9fSPaul C. Anagnostopoulos       ++CurPtr; // Eat second dot.
189196e6f9fSPaul C. Anagnostopoulos       if (peekNextChar(0) == '.') {
190196e6f9fSPaul C. Anagnostopoulos         ++CurPtr; // Eat third dot.
191196e6f9fSPaul C. Anagnostopoulos         return tgtok::dotdotdot;
192196e6f9fSPaul C. Anagnostopoulos       }
193196e6f9fSPaul C. Anagnostopoulos       return ReturnError(TokStart, "Invalid '..' punctuation");
194196e6f9fSPaul C. Anagnostopoulos     }
195196e6f9fSPaul C. Anagnostopoulos     return tgtok::dot;
196196e6f9fSPaul C. Anagnostopoulos 
197f7d079e9SVyacheslav Zakharin   case '\r':
198f7d079e9SVyacheslav Zakharin     PrintFatalError("getNextChar() must never return '\r'");
199f7d079e9SVyacheslav Zakharin     return tgtok::Error;
20084c287e3SPeter Collingbourne 
20184c287e3SPeter Collingbourne   case ' ':
20284c287e3SPeter Collingbourne   case '\t':
2036a5d5ac4SVyacheslav Zakharin     // Ignore whitespace.
204f7d079e9SVyacheslav Zakharin     return LexToken(FileOrLineStart);
205f7d079e9SVyacheslav Zakharin   case '\n':
206f7d079e9SVyacheslav Zakharin     // Ignore whitespace, and identify the new line.
207f7d079e9SVyacheslav Zakharin     return LexToken(true);
20884c287e3SPeter Collingbourne   case '/':
20984c287e3SPeter Collingbourne     // If this is the start of a // comment, skip until the end of the line or
21084c287e3SPeter Collingbourne     // the end of the buffer.
21184c287e3SPeter Collingbourne     if (*CurPtr == '/')
21284c287e3SPeter Collingbourne       SkipBCPLComment();
21384c287e3SPeter Collingbourne     else if (*CurPtr == '*') {
21484c287e3SPeter Collingbourne       if (SkipCComment())
21584c287e3SPeter Collingbourne         return tgtok::Error;
21684c287e3SPeter Collingbourne     } else // Otherwise, this is an error.
21784c287e3SPeter Collingbourne       return ReturnError(TokStart, "Unexpected character");
218f7d079e9SVyacheslav Zakharin     return LexToken(FileOrLineStart);
21984c287e3SPeter Collingbourne   case '-': case '+':
22084c287e3SPeter Collingbourne   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
2215c9fa026SDavid Greene   case '7': case '8': case '9': {
2225c9fa026SDavid Greene     int NextChar = 0;
2235c9fa026SDavid Greene     if (isdigit(CurChar)) {
2245c9fa026SDavid Greene       // Allow identifiers to start with a number if it is followed by
2255c9fa026SDavid Greene       // an identifier.  This can happen with paste operations like
2265c9fa026SDavid Greene       // foo#8i.
2275c9fa026SDavid Greene       int i = 0;
2285c9fa026SDavid Greene       do {
2295c9fa026SDavid Greene         NextChar = peekNextChar(i++);
2305c9fa026SDavid Greene       } while (isdigit(NextChar));
2315c9fa026SDavid Greene 
2325c9fa026SDavid Greene       if (NextChar == 'x' || NextChar == 'b') {
2335c9fa026SDavid Greene         // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
2345c9fa026SDavid Greene         // likely a number.
2355c9fa026SDavid Greene         int NextNextChar = peekNextChar(i);
2365c9fa026SDavid Greene         switch (NextNextChar) {
2375c9fa026SDavid Greene         default:
2385c9fa026SDavid Greene           break;
2395c9fa026SDavid Greene         case '0': case '1':
2405c9fa026SDavid Greene           if (NextChar == 'b')
24184c287e3SPeter Collingbourne             return LexNumber();
242b03fd12cSJustin Bogner           LLVM_FALLTHROUGH;
2435c9fa026SDavid Greene         case '2': case '3': case '4': case '5':
2445c9fa026SDavid Greene         case '6': case '7': case '8': case '9':
2455c9fa026SDavid Greene         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2465c9fa026SDavid Greene         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2475c9fa026SDavid Greene           if (NextChar == 'x')
2485c9fa026SDavid Greene             return LexNumber();
2495c9fa026SDavid Greene           break;
2505c9fa026SDavid Greene         }
2515c9fa026SDavid Greene       }
2525c9fa026SDavid Greene     }
2535c9fa026SDavid Greene 
2545c9fa026SDavid Greene     if (isalpha(NextChar) || NextChar == '_')
2555c9fa026SDavid Greene       return LexIdentifier();
2565c9fa026SDavid Greene 
2575c9fa026SDavid Greene     return LexNumber();
2585c9fa026SDavid Greene   }
25984c287e3SPeter Collingbourne   case '"': return LexString();
26084c287e3SPeter Collingbourne   case '$': return LexVarName();
26184c287e3SPeter Collingbourne   case '[': return LexBracket();
26284c287e3SPeter Collingbourne   case '!': return LexExclaim();
26384c287e3SPeter Collingbourne   }
26484c287e3SPeter Collingbourne }
26584c287e3SPeter Collingbourne 
26684c287e3SPeter Collingbourne /// LexString - Lex "[^"]*"
LexString()26784c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexString() {
26884c287e3SPeter Collingbourne   const char *StrStart = CurPtr;
26984c287e3SPeter Collingbourne 
27084c287e3SPeter Collingbourne   CurStrVal = "";
27184c287e3SPeter Collingbourne 
27284c287e3SPeter Collingbourne   while (*CurPtr != '"') {
27384c287e3SPeter Collingbourne     // If we hit the end of the buffer, report an error.
274a3c65096SRafael Espindola     if (*CurPtr == 0 && CurPtr == CurBuf.end())
27584c287e3SPeter Collingbourne       return ReturnError(StrStart, "End of file in string literal");
27684c287e3SPeter Collingbourne 
27784c287e3SPeter Collingbourne     if (*CurPtr == '\n' || *CurPtr == '\r')
27884c287e3SPeter Collingbourne       return ReturnError(StrStart, "End of line in string literal");
27984c287e3SPeter Collingbourne 
28084c287e3SPeter Collingbourne     if (*CurPtr != '\\') {
28184c287e3SPeter Collingbourne       CurStrVal += *CurPtr++;
28284c287e3SPeter Collingbourne       continue;
28384c287e3SPeter Collingbourne     }
28484c287e3SPeter Collingbourne 
28584c287e3SPeter Collingbourne     ++CurPtr;
28684c287e3SPeter Collingbourne 
28784c287e3SPeter Collingbourne     switch (*CurPtr) {
28884c287e3SPeter Collingbourne     case '\\': case '\'': case '"':
28984c287e3SPeter Collingbourne       // These turn into their literal character.
29084c287e3SPeter Collingbourne       CurStrVal += *CurPtr++;
29184c287e3SPeter Collingbourne       break;
29284c287e3SPeter Collingbourne     case 't':
29384c287e3SPeter Collingbourne       CurStrVal += '\t';
29484c287e3SPeter Collingbourne       ++CurPtr;
29584c287e3SPeter Collingbourne       break;
29684c287e3SPeter Collingbourne     case 'n':
29784c287e3SPeter Collingbourne       CurStrVal += '\n';
29884c287e3SPeter Collingbourne       ++CurPtr;
29984c287e3SPeter Collingbourne       break;
30084c287e3SPeter Collingbourne 
30184c287e3SPeter Collingbourne     case '\n':
30284c287e3SPeter Collingbourne     case '\r':
30384c287e3SPeter Collingbourne       return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
30484c287e3SPeter Collingbourne 
30584c287e3SPeter Collingbourne     // If we hit the end of the buffer, report an error.
30684c287e3SPeter Collingbourne     case '\0':
307a3c65096SRafael Espindola       if (CurPtr == CurBuf.end())
30884c287e3SPeter Collingbourne         return ReturnError(StrStart, "End of file in string literal");
309cd1d5aafSJustin Bogner       LLVM_FALLTHROUGH;
31084c287e3SPeter Collingbourne     default:
31184c287e3SPeter Collingbourne       return ReturnError(CurPtr, "invalid escape in string literal");
31284c287e3SPeter Collingbourne     }
31384c287e3SPeter Collingbourne   }
31484c287e3SPeter Collingbourne 
31584c287e3SPeter Collingbourne   ++CurPtr;
31684c287e3SPeter Collingbourne   return tgtok::StrVal;
31784c287e3SPeter Collingbourne }
31884c287e3SPeter Collingbourne 
LexVarName()31984c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexVarName() {
32084c287e3SPeter Collingbourne   if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
32184c287e3SPeter Collingbourne     return ReturnError(TokStart, "Invalid variable name");
32284c287e3SPeter Collingbourne 
32384c287e3SPeter Collingbourne   // Otherwise, we're ok, consume the rest of the characters.
32484c287e3SPeter Collingbourne   const char *VarNameStart = CurPtr++;
32584c287e3SPeter Collingbourne 
32684c287e3SPeter Collingbourne   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
32784c287e3SPeter Collingbourne     ++CurPtr;
32884c287e3SPeter Collingbourne 
32984c287e3SPeter Collingbourne   CurStrVal.assign(VarNameStart, CurPtr);
33084c287e3SPeter Collingbourne   return tgtok::VarName;
33184c287e3SPeter Collingbourne }
33284c287e3SPeter Collingbourne 
LexIdentifier()33384c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexIdentifier() {
334f7d079e9SVyacheslav Zakharin   // The first letter is [a-zA-Z_].
33584c287e3SPeter Collingbourne   const char *IdentStart = TokStart;
33684c287e3SPeter Collingbourne 
337f7d079e9SVyacheslav Zakharin   // Match the rest of the identifier regex: [0-9a-zA-Z_]*
3388e85b480SDavid Greene   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
33984c287e3SPeter Collingbourne     ++CurPtr;
34084c287e3SPeter Collingbourne 
341ae2cb4f4SPaul C. Anagnostopoulos   // Check to see if this identifier is a reserved keyword.
342a54985eeSBenjamin Kramer   StringRef Str(IdentStart, CurPtr-IdentStart);
34384c287e3SPeter Collingbourne 
344f9389a36SBenjamin Kramer   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
345f9389a36SBenjamin Kramer     .Case("int", tgtok::Int)
346f9389a36SBenjamin Kramer     .Case("bit", tgtok::Bit)
347f9389a36SBenjamin Kramer     .Case("bits", tgtok::Bits)
348f9389a36SBenjamin Kramer     .Case("string", tgtok::String)
349f9389a36SBenjamin Kramer     .Case("list", tgtok::List)
350f9389a36SBenjamin Kramer     .Case("code", tgtok::Code)
351f9389a36SBenjamin Kramer     .Case("dag", tgtok::Dag)
352f9389a36SBenjamin Kramer     .Case("class", tgtok::Class)
353f9389a36SBenjamin Kramer     .Case("def", tgtok::Def)
354ae2cb4f4SPaul C. Anagnostopoulos     .Case("true", tgtok::TrueVal)
355ae2cb4f4SPaul C. Anagnostopoulos     .Case("false", tgtok::FalseVal)
356fb927af2SDavid Greene     .Case("foreach", tgtok::Foreach)
357f9389a36SBenjamin Kramer     .Case("defm", tgtok::Defm)
358fcd6525aSNicolai Haehnle     .Case("defset", tgtok::Defset)
359f9389a36SBenjamin Kramer     .Case("multiclass", tgtok::MultiClass)
360f9389a36SBenjamin Kramer     .Case("field", tgtok::Field)
361f9389a36SBenjamin Kramer     .Case("let", tgtok::Let)
362f9389a36SBenjamin Kramer     .Case("in", tgtok::In)
3633388b0f5SSimon Tatham     .Case("defvar", tgtok::Defvar)
364ae2cb4f4SPaul C. Anagnostopoulos     .Case("include", tgtok::Include)
365ddbc0b1eSSimon Tatham     .Case("if", tgtok::If)
366ddbc0b1eSSimon Tatham     .Case("then", tgtok::Then)
367ddbc0b1eSSimon Tatham     .Case("else", tgtok::ElseKW)
3686e2b6351SPaul C. Anagnostopoulos     .Case("assert", tgtok::Assert)
369f9389a36SBenjamin Kramer     .Default(tgtok::Id);
370f9389a36SBenjamin Kramer 
371ae2cb4f4SPaul C. Anagnostopoulos   // A couple of tokens require special processing.
372ae2cb4f4SPaul C. Anagnostopoulos   switch (Kind) {
373ae2cb4f4SPaul C. Anagnostopoulos     case tgtok::Include:
374ae2cb4f4SPaul C. Anagnostopoulos       if (LexInclude()) return tgtok::Error;
375ae2cb4f4SPaul C. Anagnostopoulos       return Lex();
376ae2cb4f4SPaul C. Anagnostopoulos     case tgtok::Id:
377a54985eeSBenjamin Kramer       CurStrVal.assign(Str.begin(), Str.end());
378ae2cb4f4SPaul C. Anagnostopoulos       break;
379ae2cb4f4SPaul C. Anagnostopoulos     default:
380ae2cb4f4SPaul C. Anagnostopoulos       break;
381ae2cb4f4SPaul C. Anagnostopoulos   }
382ae2cb4f4SPaul C. Anagnostopoulos 
383f9389a36SBenjamin Kramer   return Kind;
38484c287e3SPeter Collingbourne }
38584c287e3SPeter Collingbourne 
38684c287e3SPeter Collingbourne /// LexInclude - We just read the "include" token.  Get the string token that
38784c287e3SPeter Collingbourne /// comes next and enter the include.
LexInclude()38884c287e3SPeter Collingbourne bool TGLexer::LexInclude() {
38984c287e3SPeter Collingbourne   // The token after the include must be a string.
39084c287e3SPeter Collingbourne   tgtok::TokKind Tok = LexToken();
39184c287e3SPeter Collingbourne   if (Tok == tgtok::Error) return true;
39284c287e3SPeter Collingbourne   if (Tok != tgtok::StrVal) {
39384c287e3SPeter Collingbourne     PrintError(getLoc(), "Expected filename after include");
39484c287e3SPeter Collingbourne     return true;
39584c287e3SPeter Collingbourne   }
39684c287e3SPeter Collingbourne 
39784c287e3SPeter Collingbourne   // Get the string.
39884c287e3SPeter Collingbourne   std::string Filename = CurStrVal;
39984c287e3SPeter Collingbourne   std::string IncludedFile;
40084c287e3SPeter Collingbourne 
40184c287e3SPeter Collingbourne   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
40284c287e3SPeter Collingbourne                                     IncludedFile);
403a55b95b5SAlp Toker   if (!CurBuffer) {
40484c287e3SPeter Collingbourne     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
40584c287e3SPeter Collingbourne     return true;
40684c287e3SPeter Collingbourne   }
40784c287e3SPeter Collingbourne 
408ee9b49eeSRiver Riddle   Dependencies.insert(IncludedFile);
40984c287e3SPeter Collingbourne   // Save the line number and lex buffer of the includer.
410a3c65096SRafael Espindola   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
411a3c65096SRafael Espindola   CurPtr = CurBuf.begin();
412f7d079e9SVyacheslav Zakharin 
413f7d079e9SVyacheslav Zakharin   PrepIncludeStack.push_back(
4140eaee545SJonas Devlieghere       std::make_unique<std::vector<PreprocessorControlDesc>>());
41584c287e3SPeter Collingbourne   return false;
41684c287e3SPeter Collingbourne }
41784c287e3SPeter Collingbourne 
418fe9101c3SPaul C. Anagnostopoulos /// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
419fe9101c3SPaul C. Anagnostopoulos /// Or we may end up at the end of the buffer.
SkipBCPLComment()42084c287e3SPeter Collingbourne void TGLexer::SkipBCPLComment() {
42184c287e3SPeter Collingbourne   ++CurPtr;  // skip the second slash.
422fe9101c3SPaul C. Anagnostopoulos   auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
423fe9101c3SPaul C. Anagnostopoulos   CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
42484c287e3SPeter Collingbourne }
42584c287e3SPeter Collingbourne 
42684c287e3SPeter Collingbourne /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
42784c287e3SPeter Collingbourne /// is that we allow nesting.
SkipCComment()42884c287e3SPeter Collingbourne bool TGLexer::SkipCComment() {
42984c287e3SPeter Collingbourne   ++CurPtr;  // skip the star.
43084c287e3SPeter Collingbourne   unsigned CommentDepth = 1;
43184c287e3SPeter Collingbourne 
43233d7b762SEugene Zelenko   while (true) {
43384c287e3SPeter Collingbourne     int CurChar = getNextChar();
43484c287e3SPeter Collingbourne     switch (CurChar) {
43584c287e3SPeter Collingbourne     case EOF:
43684c287e3SPeter Collingbourne       PrintError(TokStart, "Unterminated comment!");
43784c287e3SPeter Collingbourne       return true;
43884c287e3SPeter Collingbourne     case '*':
43984c287e3SPeter Collingbourne       // End of the comment?
44084c287e3SPeter Collingbourne       if (CurPtr[0] != '/') break;
44184c287e3SPeter Collingbourne 
44284c287e3SPeter Collingbourne       ++CurPtr;   // End the */.
44384c287e3SPeter Collingbourne       if (--CommentDepth == 0)
44484c287e3SPeter Collingbourne         return false;
44584c287e3SPeter Collingbourne       break;
44684c287e3SPeter Collingbourne     case '/':
44784c287e3SPeter Collingbourne       // Start of a nested comment?
44884c287e3SPeter Collingbourne       if (CurPtr[0] != '*') break;
44984c287e3SPeter Collingbourne       ++CurPtr;
45084c287e3SPeter Collingbourne       ++CommentDepth;
45184c287e3SPeter Collingbourne       break;
45284c287e3SPeter Collingbourne     }
45384c287e3SPeter Collingbourne   }
45484c287e3SPeter Collingbourne }
45584c287e3SPeter Collingbourne 
45684c287e3SPeter Collingbourne /// LexNumber - Lex:
45784c287e3SPeter Collingbourne ///    [-+]?[0-9]+
45884c287e3SPeter Collingbourne ///    0x[0-9a-fA-F]+
45984c287e3SPeter Collingbourne ///    0b[01]+
LexNumber()46084c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexNumber() {
46184c287e3SPeter Collingbourne   if (CurPtr[-1] == '0') {
46284c287e3SPeter Collingbourne     if (CurPtr[0] == 'x') {
46384c287e3SPeter Collingbourne       ++CurPtr;
46484c287e3SPeter Collingbourne       const char *NumStart = CurPtr;
46584c287e3SPeter Collingbourne       while (isxdigit(CurPtr[0]))
46684c287e3SPeter Collingbourne         ++CurPtr;
46784c287e3SPeter Collingbourne 
46884c287e3SPeter Collingbourne       // Requires at least one hex digit.
46984c287e3SPeter Collingbourne       if (CurPtr == NumStart)
47084c287e3SPeter Collingbourne         return ReturnError(TokStart, "Invalid hexadecimal number");
47184c287e3SPeter Collingbourne 
47284c287e3SPeter Collingbourne       errno = 0;
473011817a0SCraig Topper       CurIntVal = strtoll(NumStart, nullptr, 16);
47484c287e3SPeter Collingbourne       if (errno == EINVAL)
47584c287e3SPeter Collingbourne         return ReturnError(TokStart, "Invalid hexadecimal number");
47684c287e3SPeter Collingbourne       if (errno == ERANGE) {
47784c287e3SPeter Collingbourne         errno = 0;
478011817a0SCraig Topper         CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
47984c287e3SPeter Collingbourne         if (errno == EINVAL)
48084c287e3SPeter Collingbourne           return ReturnError(TokStart, "Invalid hexadecimal number");
48184c287e3SPeter Collingbourne         if (errno == ERANGE)
48284c287e3SPeter Collingbourne           return ReturnError(TokStart, "Hexadecimal number out of range");
48384c287e3SPeter Collingbourne       }
48484c287e3SPeter Collingbourne       return tgtok::IntVal;
48584c287e3SPeter Collingbourne     } else if (CurPtr[0] == 'b') {
48684c287e3SPeter Collingbourne       ++CurPtr;
48784c287e3SPeter Collingbourne       const char *NumStart = CurPtr;
48884c287e3SPeter Collingbourne       while (CurPtr[0] == '0' || CurPtr[0] == '1')
48984c287e3SPeter Collingbourne         ++CurPtr;
49084c287e3SPeter Collingbourne 
49184c287e3SPeter Collingbourne       // Requires at least one binary digit.
49284c287e3SPeter Collingbourne       if (CurPtr == NumStart)
49384c287e3SPeter Collingbourne         return ReturnError(CurPtr-2, "Invalid binary number");
494011817a0SCraig Topper       CurIntVal = strtoll(NumStart, nullptr, 2);
4952597764aSPete Cooper       return tgtok::BinaryIntVal;
49684c287e3SPeter Collingbourne     }
49784c287e3SPeter Collingbourne   }
49884c287e3SPeter Collingbourne 
49984c287e3SPeter Collingbourne   // Check for a sign without a digit.
50084c287e3SPeter Collingbourne   if (!isdigit(CurPtr[0])) {
50184c287e3SPeter Collingbourne     if (CurPtr[-1] == '-')
50284c287e3SPeter Collingbourne       return tgtok::minus;
50384c287e3SPeter Collingbourne     else if (CurPtr[-1] == '+')
50484c287e3SPeter Collingbourne       return tgtok::plus;
50584c287e3SPeter Collingbourne   }
50684c287e3SPeter Collingbourne 
50784c287e3SPeter Collingbourne   while (isdigit(CurPtr[0]))
50884c287e3SPeter Collingbourne     ++CurPtr;
509011817a0SCraig Topper   CurIntVal = strtoll(TokStart, nullptr, 10);
51084c287e3SPeter Collingbourne   return tgtok::IntVal;
51184c287e3SPeter Collingbourne }
51284c287e3SPeter Collingbourne 
51384c287e3SPeter Collingbourne /// LexBracket - We just read '['.  If this is a code block, return it,
51484c287e3SPeter Collingbourne /// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
LexBracket()51584c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexBracket() {
51684c287e3SPeter Collingbourne   if (CurPtr[0] != '{')
51784c287e3SPeter Collingbourne     return tgtok::l_square;
51884c287e3SPeter Collingbourne   ++CurPtr;
51984c287e3SPeter Collingbourne   const char *CodeStart = CurPtr;
52033d7b762SEugene Zelenko   while (true) {
52184c287e3SPeter Collingbourne     int Char = getNextChar();
52284c287e3SPeter Collingbourne     if (Char == EOF) break;
52384c287e3SPeter Collingbourne 
52484c287e3SPeter Collingbourne     if (Char != '}') continue;
52584c287e3SPeter Collingbourne 
52684c287e3SPeter Collingbourne     Char = getNextChar();
52784c287e3SPeter Collingbourne     if (Char == EOF) break;
52884c287e3SPeter Collingbourne     if (Char == ']') {
52984c287e3SPeter Collingbourne       CurStrVal.assign(CodeStart, CurPtr-2);
53084c287e3SPeter Collingbourne       return tgtok::CodeFragment;
53184c287e3SPeter Collingbourne     }
53284c287e3SPeter Collingbourne   }
53384c287e3SPeter Collingbourne 
534415fab6fSPaul C. Anagnostopoulos   return ReturnError(CodeStart - 2, "Unterminated code block");
53584c287e3SPeter Collingbourne }
53684c287e3SPeter Collingbourne 
53784c287e3SPeter Collingbourne /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
LexExclaim()53884c287e3SPeter Collingbourne tgtok::TokKind TGLexer::LexExclaim() {
53984c287e3SPeter Collingbourne   if (!isalpha(*CurPtr))
54084c287e3SPeter Collingbourne     return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
54184c287e3SPeter Collingbourne 
54284c287e3SPeter Collingbourne   const char *Start = CurPtr++;
54384c287e3SPeter Collingbourne   while (isalpha(*CurPtr))
54484c287e3SPeter Collingbourne     ++CurPtr;
54584c287e3SPeter Collingbourne 
54684c287e3SPeter Collingbourne   // Check to see which operator this is.
54784c287e3SPeter Collingbourne   tgtok::TokKind Kind =
54884c287e3SPeter Collingbourne     StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
54984c287e3SPeter Collingbourne     .Case("eq", tgtok::XEq)
550aa9ca691SNicolai Haehnle     .Case("ne", tgtok::XNe)
551aa9ca691SNicolai Haehnle     .Case("le", tgtok::XLe)
552aa9ca691SNicolai Haehnle     .Case("lt", tgtok::XLt)
553aa9ca691SNicolai Haehnle     .Case("ge", tgtok::XGe)
554aa9ca691SNicolai Haehnle     .Case("gt", tgtok::XGt)
55584c287e3SPeter Collingbourne     .Case("if", tgtok::XIf)
556a3e3d852SJaved Absar     .Case("cond", tgtok::XCond)
557b5376059SNicolai Haehnle     .Case("isa", tgtok::XIsA)
55884c287e3SPeter Collingbourne     .Case("head", tgtok::XHead)
55984c287e3SPeter Collingbourne     .Case("tail", tgtok::XTail)
5600243aaf4SNicolai Haehnle     .Case("size", tgtok::XSize)
56184c287e3SPeter Collingbourne     .Case("con", tgtok::XConcat)
5626c118656SNicolai Haehnle     .Case("dag", tgtok::XDag)
563c7d4dc13SHal Finkel     .Case("add", tgtok::XADD)
5649d72065cSPaul C. Anagnostopoulos     .Case("sub", tgtok::XSUB)
565a8967569SNicola Zaghen     .Case("mul", tgtok::XMUL)
5664767bb2cSPaul C. Anagnostopoulos     .Case("not", tgtok::XNOT)
5676b41a990SJoerg Sonnenberger     .Case("and", tgtok::XAND)
5681c8d9338SMatt Arsenault     .Case("or", tgtok::XOR)
5694767bb2cSPaul C. Anagnostopoulos     .Case("xor", tgtok::XXOR)
57084c287e3SPeter Collingbourne     .Case("shl", tgtok::XSHL)
57184c287e3SPeter Collingbourne     .Case("sra", tgtok::XSRA)
57284c287e3SPeter Collingbourne     .Case("srl", tgtok::XSRL)
57384c287e3SPeter Collingbourne     .Case("cast", tgtok::XCast)
57484c287e3SPeter Collingbourne     .Case("empty", tgtok::XEmpty)
57584c287e3SPeter Collingbourne     .Case("subst", tgtok::XSubst)
576d34f6843SNicolai Haehnle     .Case("foldl", tgtok::XFoldl)
57784c287e3SPeter Collingbourne     .Case("foreach", tgtok::XForEach)
57891d2e5c8SPaul C. Anagnostopoulos     .Case("filter", tgtok::XFilter)
579314e80e5SDaniel Sanders     .Case("listconcat", tgtok::XListConcat)
5805d9f656bSRoman Lebedev     .Case("listsplat", tgtok::XListSplat)
58184c287e3SPeter Collingbourne     .Case("strconcat", tgtok::XStrConcat)
582d56cd429SPaul C. Anagnostopoulos     .Case("interleave", tgtok::XInterleave)
583e122a71aSPaul C. Anagnostopoulos     .Case("substr", tgtok::XSubstr)
584952c6dddSPaul C. Anagnostopoulos     .Case("find", tgtok::XFind)
585876af264SPaul C. Anagnostopoulos     .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
586876af264SPaul C. Anagnostopoulos     .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
587*63448488Swangpc     .Case("exists", tgtok::XExists)
58884c287e3SPeter Collingbourne     .Default(tgtok::Error);
58984c287e3SPeter Collingbourne 
59084c287e3SPeter Collingbourne   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
59184c287e3SPeter Collingbourne }
592f7d079e9SVyacheslav Zakharin 
prepExitInclude(bool IncludeStackMustBeEmpty)593f7d079e9SVyacheslav Zakharin bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
594f7d079e9SVyacheslav Zakharin   // Report an error, if preprocessor control stack for the current
595f7d079e9SVyacheslav Zakharin   // file is not empty.
596f7d079e9SVyacheslav Zakharin   if (!PrepIncludeStack.back()->empty()) {
597f7d079e9SVyacheslav Zakharin     prepReportPreprocessorStackError();
598f7d079e9SVyacheslav Zakharin 
599f7d079e9SVyacheslav Zakharin     return false;
600f7d079e9SVyacheslav Zakharin   }
601f7d079e9SVyacheslav Zakharin 
602f7d079e9SVyacheslav Zakharin   // Pop the preprocessing controls from the include stack.
603f7d079e9SVyacheslav Zakharin   if (PrepIncludeStack.empty()) {
604f7d079e9SVyacheslav Zakharin     PrintFatalError("Preprocessor include stack is empty");
605f7d079e9SVyacheslav Zakharin   }
606f7d079e9SVyacheslav Zakharin 
607f7d079e9SVyacheslav Zakharin   PrepIncludeStack.pop_back();
608f7d079e9SVyacheslav Zakharin 
609f7d079e9SVyacheslav Zakharin   if (IncludeStackMustBeEmpty) {
610f7d079e9SVyacheslav Zakharin     if (!PrepIncludeStack.empty())
611f7d079e9SVyacheslav Zakharin       PrintFatalError("Preprocessor include stack is not empty");
612f7d079e9SVyacheslav Zakharin   } else {
613f7d079e9SVyacheslav Zakharin     if (PrepIncludeStack.empty())
614f7d079e9SVyacheslav Zakharin       PrintFatalError("Preprocessor include stack is empty");
615f7d079e9SVyacheslav Zakharin   }
616f7d079e9SVyacheslav Zakharin 
617f7d079e9SVyacheslav Zakharin   return true;
618f7d079e9SVyacheslav Zakharin }
619f7d079e9SVyacheslav Zakharin 
prepIsDirective() const620f7d079e9SVyacheslav Zakharin tgtok::TokKind TGLexer::prepIsDirective() const {
621dd9a6411SKazu Hirata   for (const auto &PD : PreprocessorDirs) {
622f7d079e9SVyacheslav Zakharin     int NextChar = *CurPtr;
623f7d079e9SVyacheslav Zakharin     bool Match = true;
624f7d079e9SVyacheslav Zakharin     unsigned I = 0;
625dd9a6411SKazu Hirata     for (; I < strlen(PD.Word); ++I) {
626dd9a6411SKazu Hirata       if (NextChar != PD.Word[I]) {
627f7d079e9SVyacheslav Zakharin         Match = false;
628f7d079e9SVyacheslav Zakharin         break;
629f7d079e9SVyacheslav Zakharin       }
630f7d079e9SVyacheslav Zakharin 
631f7d079e9SVyacheslav Zakharin       NextChar = peekNextChar(I + 1);
632f7d079e9SVyacheslav Zakharin     }
633f7d079e9SVyacheslav Zakharin 
634f7d079e9SVyacheslav Zakharin     // Check for whitespace after the directive.  If there is no whitespace,
635f7d079e9SVyacheslav Zakharin     // then we do not recognize it as a preprocessing directive.
636f7d079e9SVyacheslav Zakharin     if (Match) {
637dd9a6411SKazu Hirata       tgtok::TokKind Kind = PD.Kind;
638f7d079e9SVyacheslav Zakharin 
639f7d079e9SVyacheslav Zakharin       // New line and EOF may follow only #else/#endif.  It will be reported
640f7d079e9SVyacheslav Zakharin       // as an error for #ifdef/#define after the call to prepLexMacroName().
641f7d079e9SVyacheslav Zakharin       if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
642f7d079e9SVyacheslav Zakharin           NextChar == '\n' ||
643f7d079e9SVyacheslav Zakharin           // It looks like TableGen does not support '\r' as the actual
644f7d079e9SVyacheslav Zakharin           // carriage return, e.g. getNextChar() treats a single '\r'
645f7d079e9SVyacheslav Zakharin           // as '\n'.  So we do the same here.
646f7d079e9SVyacheslav Zakharin           NextChar == '\r')
647f7d079e9SVyacheslav Zakharin         return Kind;
648f7d079e9SVyacheslav Zakharin 
649f7d079e9SVyacheslav Zakharin       // Allow comments after some directives, e.g.:
650f7d079e9SVyacheslav Zakharin       //     #else// OR #else/**/
651f7d079e9SVyacheslav Zakharin       //     #endif// OR #endif/**/
652f7d079e9SVyacheslav Zakharin       //
653f7d079e9SVyacheslav Zakharin       // Note that we do allow comments after #ifdef/#define here, e.g.
654f7d079e9SVyacheslav Zakharin       //     #ifdef/**/ AND #ifdef//
655f7d079e9SVyacheslav Zakharin       //     #define/**/ AND #define//
656f7d079e9SVyacheslav Zakharin       //
657f7d079e9SVyacheslav Zakharin       // These cases will be reported as incorrect after calling
658f7d079e9SVyacheslav Zakharin       // prepLexMacroName().  We could have supported C-style comments
659f7d079e9SVyacheslav Zakharin       // after #ifdef/#define, but this would complicate the code
660f7d079e9SVyacheslav Zakharin       // for little benefit.
661f7d079e9SVyacheslav Zakharin       if (NextChar == '/') {
662f7d079e9SVyacheslav Zakharin         NextChar = peekNextChar(I + 1);
663f7d079e9SVyacheslav Zakharin 
664f7d079e9SVyacheslav Zakharin         if (NextChar == '*' || NextChar == '/')
665f7d079e9SVyacheslav Zakharin           return Kind;
666f7d079e9SVyacheslav Zakharin 
667f7d079e9SVyacheslav Zakharin         // Pretend that we do not recognize the directive.
668f7d079e9SVyacheslav Zakharin       }
669f7d079e9SVyacheslav Zakharin     }
670f7d079e9SVyacheslav Zakharin   }
671f7d079e9SVyacheslav Zakharin 
672f7d079e9SVyacheslav Zakharin   return tgtok::Error;
673f7d079e9SVyacheslav Zakharin }
674f7d079e9SVyacheslav Zakharin 
prepEatPreprocessorDirective(tgtok::TokKind Kind)675f7d079e9SVyacheslav Zakharin bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
676f7d079e9SVyacheslav Zakharin   TokStart = CurPtr;
677f7d079e9SVyacheslav Zakharin 
678dd9a6411SKazu Hirata   for (const auto &PD : PreprocessorDirs)
679dd9a6411SKazu Hirata     if (PD.Kind == Kind) {
680f7d079e9SVyacheslav Zakharin       // Advance CurPtr to the end of the preprocessing word.
681dd9a6411SKazu Hirata       CurPtr += strlen(PD.Word);
682f7d079e9SVyacheslav Zakharin       return true;
683f7d079e9SVyacheslav Zakharin     }
684f7d079e9SVyacheslav Zakharin 
685f7d079e9SVyacheslav Zakharin   PrintFatalError("Unsupported preprocessing token in "
686f7d079e9SVyacheslav Zakharin                   "prepEatPreprocessorDirective()");
687f7d079e9SVyacheslav Zakharin   return false;
688f7d079e9SVyacheslav Zakharin }
689f7d079e9SVyacheslav Zakharin 
lexPreprocessor(tgtok::TokKind Kind,bool ReturnNextLiveToken)690f7d079e9SVyacheslav Zakharin tgtok::TokKind TGLexer::lexPreprocessor(
691f7d079e9SVyacheslav Zakharin     tgtok::TokKind Kind, bool ReturnNextLiveToken) {
692f7d079e9SVyacheslav Zakharin 
693f7d079e9SVyacheslav Zakharin   // We must be looking at a preprocessing directive.  Eat it!
694f7d079e9SVyacheslav Zakharin   if (!prepEatPreprocessorDirective(Kind))
695f7d079e9SVyacheslav Zakharin     PrintFatalError("lexPreprocessor() called for unknown "
696f7d079e9SVyacheslav Zakharin                     "preprocessor directive");
697f7d079e9SVyacheslav Zakharin 
698717b62a1STim Northover   if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
699f7d079e9SVyacheslav Zakharin     StringRef MacroName = prepLexMacroName();
700717b62a1STim Northover     StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
701f7d079e9SVyacheslav Zakharin     if (MacroName.empty())
702717b62a1STim Northover       return ReturnError(TokStart, "Expected macro name after " + IfTokName);
703f7d079e9SVyacheslav Zakharin 
704f7d079e9SVyacheslav Zakharin     bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
705f7d079e9SVyacheslav Zakharin 
706717b62a1STim Northover     // Canonicalize ifndef to ifdef equivalent
707717b62a1STim Northover     if (Kind == tgtok::Ifndef) {
708717b62a1STim Northover       MacroIsDefined = !MacroIsDefined;
709717b62a1STim Northover       Kind = tgtok::Ifdef;
710717b62a1STim Northover     }
711717b62a1STim Northover 
712f7d079e9SVyacheslav Zakharin     // Regardless of whether we are processing tokens or not,
713f7d079e9SVyacheslav Zakharin     // we put the #ifdef control on stack.
714f7d079e9SVyacheslav Zakharin     PrepIncludeStack.back()->push_back(
715f7d079e9SVyacheslav Zakharin         {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
716f7d079e9SVyacheslav Zakharin 
717f7d079e9SVyacheslav Zakharin     if (!prepSkipDirectiveEnd())
718717b62a1STim Northover       return ReturnError(CurPtr, "Only comments are supported after " +
719717b62a1STim Northover                                      IfTokName + " NAME");
720f7d079e9SVyacheslav Zakharin 
721f7d079e9SVyacheslav Zakharin     // If we were not processing tokens before this #ifdef,
722f7d079e9SVyacheslav Zakharin     // then just return back to the lines skipping code.
723f7d079e9SVyacheslav Zakharin     if (!ReturnNextLiveToken)
724f7d079e9SVyacheslav Zakharin       return Kind;
725f7d079e9SVyacheslav Zakharin 
726f7d079e9SVyacheslav Zakharin     // If we were processing tokens before this #ifdef,
727f7d079e9SVyacheslav Zakharin     // and the macro is defined, then just return the next token.
728f7d079e9SVyacheslav Zakharin     if (MacroIsDefined)
729f7d079e9SVyacheslav Zakharin       return LexToken();
730f7d079e9SVyacheslav Zakharin 
731f7d079e9SVyacheslav Zakharin     // We were processing tokens before this #ifdef, and the macro
732f7d079e9SVyacheslav Zakharin     // is not defined, so we have to start skipping the lines.
733f7d079e9SVyacheslav Zakharin     // If the skipping is successful, it will return the token following
734f7d079e9SVyacheslav Zakharin     // either #else or #endif corresponding to this #ifdef.
735f7d079e9SVyacheslav Zakharin     if (prepSkipRegion(ReturnNextLiveToken))
736f7d079e9SVyacheslav Zakharin       return LexToken();
737f7d079e9SVyacheslav Zakharin 
738f7d079e9SVyacheslav Zakharin     return tgtok::Error;
739f7d079e9SVyacheslav Zakharin   } else if (Kind == tgtok::Else) {
740f7d079e9SVyacheslav Zakharin     // Check if this #else is correct before calling prepSkipDirectiveEnd(),
741f7d079e9SVyacheslav Zakharin     // which will move CurPtr away from the beginning of #else.
742f7d079e9SVyacheslav Zakharin     if (PrepIncludeStack.back()->empty())
743717b62a1STim Northover       return ReturnError(TokStart, "#else without #ifdef or #ifndef");
744f7d079e9SVyacheslav Zakharin 
745f7d079e9SVyacheslav Zakharin     PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
746f7d079e9SVyacheslav Zakharin 
747f7d079e9SVyacheslav Zakharin     if (IfdefEntry.Kind != tgtok::Ifdef) {
748f7d079e9SVyacheslav Zakharin       PrintError(TokStart, "double #else");
749f7d079e9SVyacheslav Zakharin       return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
750f7d079e9SVyacheslav Zakharin     }
751f7d079e9SVyacheslav Zakharin 
752f7d079e9SVyacheslav Zakharin     // Replace the corresponding #ifdef's control with its negation
753f7d079e9SVyacheslav Zakharin     // on the control stack.
754f7d079e9SVyacheslav Zakharin     PrepIncludeStack.back()->pop_back();
755f7d079e9SVyacheslav Zakharin     PrepIncludeStack.back()->push_back(
756f7d079e9SVyacheslav Zakharin         {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
757f7d079e9SVyacheslav Zakharin 
758f7d079e9SVyacheslav Zakharin     if (!prepSkipDirectiveEnd())
759f7d079e9SVyacheslav Zakharin       return ReturnError(CurPtr, "Only comments are supported after #else");
760f7d079e9SVyacheslav Zakharin 
761f7d079e9SVyacheslav Zakharin     // If we were processing tokens before this #else,
762f7d079e9SVyacheslav Zakharin     // we have to start skipping lines until the matching #endif.
763f7d079e9SVyacheslav Zakharin     if (ReturnNextLiveToken) {
764f7d079e9SVyacheslav Zakharin       if (prepSkipRegion(ReturnNextLiveToken))
765f7d079e9SVyacheslav Zakharin         return LexToken();
766f7d079e9SVyacheslav Zakharin 
767f7d079e9SVyacheslav Zakharin       return tgtok::Error;
768f7d079e9SVyacheslav Zakharin     }
769f7d079e9SVyacheslav Zakharin 
770f7d079e9SVyacheslav Zakharin     // Return to the lines skipping code.
771f7d079e9SVyacheslav Zakharin     return Kind;
772f7d079e9SVyacheslav Zakharin   } else if (Kind == tgtok::Endif) {
773f7d079e9SVyacheslav Zakharin     // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
774f7d079e9SVyacheslav Zakharin     // which will move CurPtr away from the beginning of #endif.
775f7d079e9SVyacheslav Zakharin     if (PrepIncludeStack.back()->empty())
776f7d079e9SVyacheslav Zakharin       return ReturnError(TokStart, "#endif without #ifdef");
777f7d079e9SVyacheslav Zakharin 
778f7d079e9SVyacheslav Zakharin     auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
779f7d079e9SVyacheslav Zakharin 
780f7d079e9SVyacheslav Zakharin     if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
781f7d079e9SVyacheslav Zakharin         IfdefOrElseEntry.Kind != tgtok::Else) {
782f7d079e9SVyacheslav Zakharin       PrintFatalError("Invalid preprocessor control on the stack");
783f7d079e9SVyacheslav Zakharin       return tgtok::Error;
784f7d079e9SVyacheslav Zakharin     }
785f7d079e9SVyacheslav Zakharin 
786f7d079e9SVyacheslav Zakharin     if (!prepSkipDirectiveEnd())
787f7d079e9SVyacheslav Zakharin       return ReturnError(CurPtr, "Only comments are supported after #endif");
788f7d079e9SVyacheslav Zakharin 
789f7d079e9SVyacheslav Zakharin     PrepIncludeStack.back()->pop_back();
790f7d079e9SVyacheslav Zakharin 
791f7d079e9SVyacheslav Zakharin     // If we were processing tokens before this #endif, then
792f7d079e9SVyacheslav Zakharin     // we should continue it.
793f7d079e9SVyacheslav Zakharin     if (ReturnNextLiveToken) {
794f7d079e9SVyacheslav Zakharin       return LexToken();
795f7d079e9SVyacheslav Zakharin     }
796f7d079e9SVyacheslav Zakharin 
797f7d079e9SVyacheslav Zakharin     // Return to the lines skipping code.
798f7d079e9SVyacheslav Zakharin     return Kind;
799f7d079e9SVyacheslav Zakharin   } else if (Kind == tgtok::Define) {
800f7d079e9SVyacheslav Zakharin     StringRef MacroName = prepLexMacroName();
801f7d079e9SVyacheslav Zakharin     if (MacroName.empty())
802f7d079e9SVyacheslav Zakharin       return ReturnError(TokStart, "Expected macro name after #define");
803f7d079e9SVyacheslav Zakharin 
804f7d079e9SVyacheslav Zakharin     if (!DefinedMacros.insert(MacroName).second)
805f7d079e9SVyacheslav Zakharin       PrintWarning(getLoc(),
806f7d079e9SVyacheslav Zakharin                    "Duplicate definition of macro: " + Twine(MacroName));
807f7d079e9SVyacheslav Zakharin 
808f7d079e9SVyacheslav Zakharin     if (!prepSkipDirectiveEnd())
809f7d079e9SVyacheslav Zakharin       return ReturnError(CurPtr,
810f7d079e9SVyacheslav Zakharin                          "Only comments are supported after #define NAME");
811f7d079e9SVyacheslav Zakharin 
812f7d079e9SVyacheslav Zakharin     if (!ReturnNextLiveToken) {
813f7d079e9SVyacheslav Zakharin       PrintFatalError("#define must be ignored during the lines skipping");
814f7d079e9SVyacheslav Zakharin       return tgtok::Error;
815f7d079e9SVyacheslav Zakharin     }
816f7d079e9SVyacheslav Zakharin 
817f7d079e9SVyacheslav Zakharin     return LexToken();
818f7d079e9SVyacheslav Zakharin   }
819f7d079e9SVyacheslav Zakharin 
820f7d079e9SVyacheslav Zakharin   PrintFatalError("Preprocessing directive is not supported");
821f7d079e9SVyacheslav Zakharin   return tgtok::Error;
822f7d079e9SVyacheslav Zakharin }
823f7d079e9SVyacheslav Zakharin 
prepSkipRegion(bool MustNeverBeFalse)824f7d079e9SVyacheslav Zakharin bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
825f7d079e9SVyacheslav Zakharin   if (!MustNeverBeFalse)
826f7d079e9SVyacheslav Zakharin     PrintFatalError("Invalid recursion.");
827f7d079e9SVyacheslav Zakharin 
828f7d079e9SVyacheslav Zakharin   do {
829f7d079e9SVyacheslav Zakharin     // Skip all symbols to the line end.
830f7d079e9SVyacheslav Zakharin     prepSkipToLineEnd();
831f7d079e9SVyacheslav Zakharin 
832f7d079e9SVyacheslav Zakharin     // Find the first non-whitespace symbol in the next line(s).
833f7d079e9SVyacheslav Zakharin     if (!prepSkipLineBegin())
834f7d079e9SVyacheslav Zakharin       return false;
835f7d079e9SVyacheslav Zakharin 
836f7d079e9SVyacheslav Zakharin     // If the first non-blank/comment symbol on the line is '#',
837f7d079e9SVyacheslav Zakharin     // it may be a start of preprocessing directive.
838f7d079e9SVyacheslav Zakharin     //
839f7d079e9SVyacheslav Zakharin     // If it is not '#' just go to the next line.
840f7d079e9SVyacheslav Zakharin     if (*CurPtr == '#')
841f7d079e9SVyacheslav Zakharin       ++CurPtr;
842f7d079e9SVyacheslav Zakharin     else
843f7d079e9SVyacheslav Zakharin       continue;
844f7d079e9SVyacheslav Zakharin 
845f7d079e9SVyacheslav Zakharin     tgtok::TokKind Kind = prepIsDirective();
846f7d079e9SVyacheslav Zakharin 
847f7d079e9SVyacheslav Zakharin     // If we did not find a preprocessing directive or it is #define,
848f7d079e9SVyacheslav Zakharin     // then just skip to the next line.  We do not have to do anything
849f7d079e9SVyacheslav Zakharin     // for #define in the line-skipping mode.
850f7d079e9SVyacheslav Zakharin     if (Kind == tgtok::Error || Kind == tgtok::Define)
851f7d079e9SVyacheslav Zakharin       continue;
852f7d079e9SVyacheslav Zakharin 
853f7d079e9SVyacheslav Zakharin     tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
854f7d079e9SVyacheslav Zakharin 
855f7d079e9SVyacheslav Zakharin     // If lexPreprocessor() encountered an error during lexing this
856f7d079e9SVyacheslav Zakharin     // preprocessor idiom, then return false to the calling lexPreprocessor().
857f7d079e9SVyacheslav Zakharin     // This will force tgtok::Error to be returned to the tokens processing.
858f7d079e9SVyacheslav Zakharin     if (ProcessedKind == tgtok::Error)
859f7d079e9SVyacheslav Zakharin       return false;
860f7d079e9SVyacheslav Zakharin 
861f7d079e9SVyacheslav Zakharin     if (Kind != ProcessedKind)
862f7d079e9SVyacheslav Zakharin       PrintFatalError("prepIsDirective() and lexPreprocessor() "
863f7d079e9SVyacheslav Zakharin                       "returned different token kinds");
864f7d079e9SVyacheslav Zakharin 
865f7d079e9SVyacheslav Zakharin     // If this preprocessing directive enables tokens processing,
866f7d079e9SVyacheslav Zakharin     // then return to the lexPreprocessor() and get to the next token.
867f7d079e9SVyacheslav Zakharin     // We can move from line-skipping mode to processing tokens only
868f7d079e9SVyacheslav Zakharin     // due to #else or #endif.
869f7d079e9SVyacheslav Zakharin     if (prepIsProcessingEnabled()) {
870f7d079e9SVyacheslav Zakharin       if (Kind != tgtok::Else && Kind != tgtok::Endif) {
871f7d079e9SVyacheslav Zakharin         PrintFatalError("Tokens processing was enabled by an unexpected "
872f7d079e9SVyacheslav Zakharin                         "preprocessing directive");
873f7d079e9SVyacheslav Zakharin         return false;
874f7d079e9SVyacheslav Zakharin       }
875f7d079e9SVyacheslav Zakharin 
876f7d079e9SVyacheslav Zakharin       return true;
877f7d079e9SVyacheslav Zakharin     }
878f7d079e9SVyacheslav Zakharin   } while (CurPtr != CurBuf.end());
879f7d079e9SVyacheslav Zakharin 
880f7d079e9SVyacheslav Zakharin   // We have reached the end of the file, but never left the lines-skipping
881f7d079e9SVyacheslav Zakharin   // mode.  This means there is no matching #endif.
882f7d079e9SVyacheslav Zakharin   prepReportPreprocessorStackError();
883f7d079e9SVyacheslav Zakharin   return false;
884f7d079e9SVyacheslav Zakharin }
885f7d079e9SVyacheslav Zakharin 
prepLexMacroName()886f7d079e9SVyacheslav Zakharin StringRef TGLexer::prepLexMacroName() {
887f7d079e9SVyacheslav Zakharin   // Skip whitespaces between the preprocessing directive and the macro name.
888f7d079e9SVyacheslav Zakharin   while (*CurPtr == ' ' || *CurPtr == '\t')
889f7d079e9SVyacheslav Zakharin     ++CurPtr;
890f7d079e9SVyacheslav Zakharin 
891f7d079e9SVyacheslav Zakharin   TokStart = CurPtr;
892f7d079e9SVyacheslav Zakharin   // Macro names start with [a-zA-Z_].
893f7d079e9SVyacheslav Zakharin   if (*CurPtr != '_' && !isalpha(*CurPtr))
894f7d079e9SVyacheslav Zakharin     return "";
895f7d079e9SVyacheslav Zakharin 
896f7d079e9SVyacheslav Zakharin   // Match the rest of the identifier regex: [0-9a-zA-Z_]*
897f7d079e9SVyacheslav Zakharin   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
898f7d079e9SVyacheslav Zakharin     ++CurPtr;
899f7d079e9SVyacheslav Zakharin 
900f7d079e9SVyacheslav Zakharin   return StringRef(TokStart, CurPtr - TokStart);
901f7d079e9SVyacheslav Zakharin }
902f7d079e9SVyacheslav Zakharin 
prepSkipLineBegin()903f7d079e9SVyacheslav Zakharin bool TGLexer::prepSkipLineBegin() {
904f7d079e9SVyacheslav Zakharin   while (CurPtr != CurBuf.end()) {
905f7d079e9SVyacheslav Zakharin     switch (*CurPtr) {
906f7d079e9SVyacheslav Zakharin     case ' ':
907f7d079e9SVyacheslav Zakharin     case '\t':
908f7d079e9SVyacheslav Zakharin     case '\n':
909f7d079e9SVyacheslav Zakharin     case '\r':
910f7d079e9SVyacheslav Zakharin       break;
911f7d079e9SVyacheslav Zakharin 
912f7d079e9SVyacheslav Zakharin     case '/': {
913f7d079e9SVyacheslav Zakharin       int NextChar = peekNextChar(1);
914f7d079e9SVyacheslav Zakharin       if (NextChar == '*') {
915f7d079e9SVyacheslav Zakharin         // Skip C-style comment.
916f7d079e9SVyacheslav Zakharin         // Note that we do not care about skipping the C++-style comments.
917f7d079e9SVyacheslav Zakharin         // If the line contains "//", it may not contain any processable
918f7d079e9SVyacheslav Zakharin         // preprocessing directive.  Just return CurPtr pointing to
919f7d079e9SVyacheslav Zakharin         // the first '/' in this case.  We also do not care about
920f7d079e9SVyacheslav Zakharin         // incorrect symbols after the first '/' - we are in lines-skipping
921f7d079e9SVyacheslav Zakharin         // mode, so incorrect code is allowed to some extent.
922f7d079e9SVyacheslav Zakharin 
923f7d079e9SVyacheslav Zakharin         // Set TokStart to the beginning of the comment to enable proper
924f7d079e9SVyacheslav Zakharin         // diagnostic printing in case of error in SkipCComment().
925f7d079e9SVyacheslav Zakharin         TokStart = CurPtr;
926f7d079e9SVyacheslav Zakharin 
927f7d079e9SVyacheslav Zakharin         // CurPtr must point to '*' before call to SkipCComment().
928f7d079e9SVyacheslav Zakharin         ++CurPtr;
929f7d079e9SVyacheslav Zakharin         if (SkipCComment())
930f7d079e9SVyacheslav Zakharin           return false;
931f7d079e9SVyacheslav Zakharin       } else {
932f7d079e9SVyacheslav Zakharin         // CurPtr points to the non-whitespace '/'.
933f7d079e9SVyacheslav Zakharin         return true;
934f7d079e9SVyacheslav Zakharin       }
935f7d079e9SVyacheslav Zakharin 
936f7d079e9SVyacheslav Zakharin       // We must not increment CurPtr after the comment was lexed.
937f7d079e9SVyacheslav Zakharin       continue;
938f7d079e9SVyacheslav Zakharin     }
939f7d079e9SVyacheslav Zakharin 
940f7d079e9SVyacheslav Zakharin     default:
941f7d079e9SVyacheslav Zakharin       return true;
942f7d079e9SVyacheslav Zakharin     }
943f7d079e9SVyacheslav Zakharin 
944f7d079e9SVyacheslav Zakharin     ++CurPtr;
945f7d079e9SVyacheslav Zakharin   }
946f7d079e9SVyacheslav Zakharin 
947f7d079e9SVyacheslav Zakharin   // We have reached the end of the file.  Return to the lines skipping
948f7d079e9SVyacheslav Zakharin   // code, and allow it to handle the EOF as needed.
949f7d079e9SVyacheslav Zakharin   return true;
950f7d079e9SVyacheslav Zakharin }
951f7d079e9SVyacheslav Zakharin 
prepSkipDirectiveEnd()952f7d079e9SVyacheslav Zakharin bool TGLexer::prepSkipDirectiveEnd() {
953f7d079e9SVyacheslav Zakharin   while (CurPtr != CurBuf.end()) {
954f7d079e9SVyacheslav Zakharin     switch (*CurPtr) {
955f7d079e9SVyacheslav Zakharin     case ' ':
956f7d079e9SVyacheslav Zakharin     case '\t':
957f7d079e9SVyacheslav Zakharin       break;
958f7d079e9SVyacheslav Zakharin 
959f7d079e9SVyacheslav Zakharin     case '\n':
960f7d079e9SVyacheslav Zakharin     case '\r':
961f7d079e9SVyacheslav Zakharin       return true;
962f7d079e9SVyacheslav Zakharin 
963f7d079e9SVyacheslav Zakharin     case '/': {
964f7d079e9SVyacheslav Zakharin       int NextChar = peekNextChar(1);
965f7d079e9SVyacheslav Zakharin       if (NextChar == '/') {
966f7d079e9SVyacheslav Zakharin         // Skip C++-style comment.
967f7d079e9SVyacheslav Zakharin         // We may just return true now, but let's skip to the line/buffer end
968f7d079e9SVyacheslav Zakharin         // to simplify the method specification.
969f7d079e9SVyacheslav Zakharin         ++CurPtr;
970f7d079e9SVyacheslav Zakharin         SkipBCPLComment();
971f7d079e9SVyacheslav Zakharin       } else if (NextChar == '*') {
972f7d079e9SVyacheslav Zakharin         // When we are skipping C-style comment at the end of a preprocessing
973f7d079e9SVyacheslav Zakharin         // directive, we can skip several lines.  If any meaningful TD token
974f7d079e9SVyacheslav Zakharin         // follows the end of the C-style comment on the same line, it will
975f7d079e9SVyacheslav Zakharin         // be considered as an invalid usage of TD token.
976f7d079e9SVyacheslav Zakharin         // For example, we want to forbid usages like this one:
977f7d079e9SVyacheslav Zakharin         //     #define MACRO class Class {}
978f7d079e9SVyacheslav Zakharin         // But with C-style comments we also disallow the following:
979f7d079e9SVyacheslav Zakharin         //     #define MACRO /* This macro is used
980f7d079e9SVyacheslav Zakharin         //                      to ... */ class Class {}
981f7d079e9SVyacheslav Zakharin         // One can argue that this should be allowed, but it does not seem
982f7d079e9SVyacheslav Zakharin         // to be worth of the complication.  Moreover, this matches
983f7d079e9SVyacheslav Zakharin         // the C preprocessor behavior.
984f7d079e9SVyacheslav Zakharin 
985f7d079e9SVyacheslav Zakharin         // Set TokStart to the beginning of the comment to enable proper
986f7d079e9SVyacheslav Zakharin         // diagnostic printer in case of error in SkipCComment().
987f7d079e9SVyacheslav Zakharin         TokStart = CurPtr;
988f7d079e9SVyacheslav Zakharin         ++CurPtr;
989f7d079e9SVyacheslav Zakharin         if (SkipCComment())
990f7d079e9SVyacheslav Zakharin           return false;
991f7d079e9SVyacheslav Zakharin       } else {
992f7d079e9SVyacheslav Zakharin         TokStart = CurPtr;
993f7d079e9SVyacheslav Zakharin         PrintError(CurPtr, "Unexpected character");
994f7d079e9SVyacheslav Zakharin         return false;
995f7d079e9SVyacheslav Zakharin       }
996f7d079e9SVyacheslav Zakharin 
997f7d079e9SVyacheslav Zakharin       // We must not increment CurPtr after the comment was lexed.
998f7d079e9SVyacheslav Zakharin       continue;
999f7d079e9SVyacheslav Zakharin     }
1000f7d079e9SVyacheslav Zakharin 
1001f7d079e9SVyacheslav Zakharin     default:
1002f7d079e9SVyacheslav Zakharin       // Do not allow any non-whitespaces after the directive.
1003f7d079e9SVyacheslav Zakharin       TokStart = CurPtr;
1004f7d079e9SVyacheslav Zakharin       return false;
1005f7d079e9SVyacheslav Zakharin     }
1006f7d079e9SVyacheslav Zakharin 
1007f7d079e9SVyacheslav Zakharin     ++CurPtr;
1008f7d079e9SVyacheslav Zakharin   }
1009f7d079e9SVyacheslav Zakharin 
1010f7d079e9SVyacheslav Zakharin   return true;
1011f7d079e9SVyacheslav Zakharin }
1012f7d079e9SVyacheslav Zakharin 
prepSkipToLineEnd()1013f7d079e9SVyacheslav Zakharin void TGLexer::prepSkipToLineEnd() {
1014f7d079e9SVyacheslav Zakharin   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end())
1015f7d079e9SVyacheslav Zakharin     ++CurPtr;
1016f7d079e9SVyacheslav Zakharin }
1017f7d079e9SVyacheslav Zakharin 
prepIsProcessingEnabled()1018f7d079e9SVyacheslav Zakharin bool TGLexer::prepIsProcessingEnabled() {
1019fd7d4064SKazu Hirata   for (const PreprocessorControlDesc &I :
1020fd7d4064SKazu Hirata        llvm::reverse(*PrepIncludeStack.back()))
1021fd7d4064SKazu Hirata     if (!I.IsDefined)
1022f7d079e9SVyacheslav Zakharin       return false;
1023f7d079e9SVyacheslav Zakharin 
1024f7d079e9SVyacheslav Zakharin   return true;
1025f7d079e9SVyacheslav Zakharin }
1026f7d079e9SVyacheslav Zakharin 
prepReportPreprocessorStackError()1027f7d079e9SVyacheslav Zakharin void TGLexer::prepReportPreprocessorStackError() {
1028f7d079e9SVyacheslav Zakharin   if (PrepIncludeStack.back()->empty())
1029f7d079e9SVyacheslav Zakharin     PrintFatalError("prepReportPreprocessorStackError() called with "
1030f7d079e9SVyacheslav Zakharin                     "empty control stack");
1031f7d079e9SVyacheslav Zakharin 
1032f7d079e9SVyacheslav Zakharin   auto &PrepControl = PrepIncludeStack.back()->back();
1033f7d079e9SVyacheslav Zakharin   PrintError(CurBuf.end(), "Reached EOF without matching #endif");
1034f7d079e9SVyacheslav Zakharin   PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
1035f7d079e9SVyacheslav Zakharin 
1036f7d079e9SVyacheslav Zakharin   TokStart = CurPtr;
1037f7d079e9SVyacheslav Zakharin }
1038