1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_
11 
12 #include "mlir/Support/LLVM.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/SMLoc.h"
15 
16 namespace llvm {
17 class SourceMgr;
18 } // namespace llvm
19 
20 namespace mlir {
21 struct LogicalResult;
22 
23 namespace pdll {
24 class CodeCompleteContext;
25 
26 namespace ast {
27 class DiagnosticEngine;
28 } // namespace ast
29 
30 //===----------------------------------------------------------------------===//
31 // Token
32 //===----------------------------------------------------------------------===//
33 
34 class Token {
35 public:
36   enum Kind {
37     /// Markers.
38     eof,
39     error,
40     /// Token signifying a code completion location.
41     code_complete,
42     /// Token signifying a code completion location within a string.
43     code_complete_string,
44 
45     /// Keywords.
46     KW_BEGIN,
47     /// Dependent keywords, i.e. those that are treated as keywords depending on
48     /// the current parser context.
49     KW_DEPENDENT_BEGIN,
50     kw_attr,
51     kw_op,
52     kw_type,
53     KW_DEPENDENT_END,
54 
55     /// General keywords.
56     kw_Attr,
57     kw_erase,
58     kw_let,
59     kw_Constraint,
60     kw_Op,
61     kw_OpName,
62     kw_Pattern,
63     kw_replace,
64     kw_return,
65     kw_rewrite,
66     kw_Rewrite,
67     kw_Type,
68     kw_TypeRange,
69     kw_Value,
70     kw_ValueRange,
71     kw_with,
72     KW_END,
73 
74     /// Punctuation.
75     arrow,
76     colon,
77     comma,
78     dot,
79     equal,
80     equal_arrow,
81     semicolon,
82     /// Paired punctuation.
83     less,
84     greater,
85     l_brace,
86     r_brace,
87     l_paren,
88     r_paren,
89     l_square,
90     r_square,
91     underscore,
92 
93     /// Tokens.
94     directive,
95     identifier,
96     integer,
97     string_block,
98     string
99   };
Token(Kind kind,StringRef spelling)100   Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
101 
102   /// Given a token containing a string literal, return its value, including
103   /// removing the quote characters and unescaping the contents of the string.
104   std::string getStringValue() const;
105 
106   /// Returns true if the current token is a string literal.
isString()107   bool isString() const { return isAny(Token::string, Token::string_block); }
108 
109   /// Returns true if the current token is a keyword.
isKeyword()110   bool isKeyword() const {
111     return kind > Token::KW_BEGIN && kind < Token::KW_END;
112   }
113 
114   /// Returns true if the current token is a keyword in a dependent context, and
115   /// in any other situation (e.g. variable names) may be treated as an
116   /// identifier.
isDependentKeyword()117   bool isDependentKeyword() const {
118     return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
119   }
120 
121   /// Return the bytes that make up this token.
getSpelling()122   StringRef getSpelling() const { return spelling; }
123 
124   /// Return the kind of this token.
getKind()125   Kind getKind() const { return kind; }
126 
127   /// Return true if this token is one of the specified kinds.
isAny(Kind k1,Kind k2)128   bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
129   template <typename... T>
isAny(Kind k1,Kind k2,Kind k3,T...others)130   bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
131     return is(k1) || isAny(k2, k3, others...);
132   }
133 
134   /// Return if the token does not have the given kind.
isNot(Kind k)135   bool isNot(Kind k) const { return k != kind; }
136   template <typename... T>
isNot(Kind k1,Kind k2,T...others)137   bool isNot(Kind k1, Kind k2, T... others) const {
138     return !isAny(k1, k2, others...);
139   }
140 
141   /// Return if the token has the given kind.
is(Kind k)142   bool is(Kind k) const { return kind == k; }
143 
144   /// Return a location for the start of this token.
getStartLoc()145   SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
146   /// Return a location at the end of this token.
getEndLoc()147   SMLoc getEndLoc() const {
148     return SMLoc::getFromPointer(spelling.data() + spelling.size());
149   }
150   /// Return a location for the range of this token.
getLoc()151   SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
152 
153 private:
154   /// Discriminator that indicates the kind of token this is.
155   Kind kind;
156 
157   /// A reference to the entire token contents; this is always a pointer into
158   /// a memory buffer owned by the source manager.
159   StringRef spelling;
160 };
161 
162 //===----------------------------------------------------------------------===//
163 // Lexer
164 //===----------------------------------------------------------------------===//
165 
166 class Lexer {
167 public:
168   Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
169         CodeCompleteContext *codeCompleteContext);
170   ~Lexer();
171 
172   /// Return a reference to the source manager used by the lexer.
getSourceMgr()173   llvm::SourceMgr &getSourceMgr() { return srcMgr; }
174 
175   /// Return a reference to the diagnostic engine used by the lexer.
getDiagEngine()176   ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
177 
178   /// Push an include of the given file. This will cause the lexer to start
179   /// processing the provided file. Returns failure if the file could not be
180   /// opened, success otherwise.
181   LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
182 
183   /// Lex the next token and return it.
184   Token lexToken();
185 
186   /// Change the position of the lexer cursor. The next token we lex will start
187   /// at the designated point in the input.
resetPointer(const char * newPointer)188   void resetPointer(const char *newPointer) { curPtr = newPointer; }
189 
190   /// Emit an error to the lexer with the given location and message.
191   Token emitError(SMRange loc, const Twine &msg);
192   Token emitError(const char *loc, const Twine &msg);
193   Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
194                          const Twine &note);
195 
196 private:
formToken(Token::Kind kind,const char * tokStart)197   Token formToken(Token::Kind kind, const char *tokStart) {
198     return Token(kind, StringRef(tokStart, curPtr - tokStart));
199   }
200 
201   /// Return the next character in the stream.
202   int getNextChar();
203 
204   /// Lex methods.
205   void lexComment();
206   Token lexDirective(const char *tokStart);
207   Token lexIdentifier(const char *tokStart);
208   Token lexNumber(const char *tokStart);
209   Token lexString(const char *tokStart, bool isStringBlock);
210 
211   llvm::SourceMgr &srcMgr;
212   int curBufferID;
213   StringRef curBuffer;
214   const char *curPtr;
215 
216   /// The engine used to emit diagnostics during lexing/parsing.
217   ast::DiagnosticEngine &diagEngine;
218 
219   /// A flag indicating if we added a default diagnostic handler to the provided
220   /// diagEngine.
221   bool addedHandlerToDiagEngine;
222 
223   /// The optional code completion point within the input file.
224   const char *codeCompletionLocation;
225 };
226 } // namespace pdll
227 } // namespace mlir
228 
229 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_
230