1 //===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef MLIR_LIB_PARSER_TOKEN_H
10 #define MLIR_LIB_PARSER_TOKEN_H
11 
12 #include "mlir/Support/LLVM.h"
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/SMLoc.h"
16 
17 namespace mlir {
18 
19 /// This represents a token in the MLIR syntax.
20 class Token {
21 public:
22   enum Kind {
23 #define TOK_MARKER(NAME) NAME,
24 #define TOK_IDENTIFIER(NAME) NAME,
25 #define TOK_LITERAL(NAME) NAME,
26 #define TOK_PUNCTUATION(NAME, SPELLING) NAME,
27 #define TOK_KEYWORD(SPELLING) kw_##SPELLING,
28 #include "TokenKinds.def"
29   };
30 
Token(Kind kind,StringRef spelling)31   Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
32 
33   // Return the bytes that make up this token.
getSpelling()34   StringRef getSpelling() const { return spelling; }
35 
36   // Token classification.
getKind()37   Kind getKind() const { return kind; }
is(Kind k)38   bool is(Kind k) const { return kind == k; }
39 
isAny(Kind k1,Kind k2)40   bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
41 
42   /// Return true if this token is one of the specified kinds.
43   template <typename... T>
isAny(Kind k1,Kind k2,Kind k3,T...others)44   bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
45     if (is(k1))
46       return true;
47     return isAny(k2, k3, others...);
48   }
49 
isNot(Kind k)50   bool isNot(Kind k) const { return kind != k; }
51 
52   /// Return true if this token isn't one of the specified kinds.
53   template <typename... T>
isNot(Kind k1,Kind k2,T...others)54   bool isNot(Kind k1, Kind k2, T... others) const {
55     return !isAny(k1, k2, others...);
56   }
57 
58   /// Return true if this is one of the keyword token kinds (e.g. kw_if).
59   bool isKeyword() const;
60 
61   /// Returns true if the current token represents a code completion.
isCodeCompletion()62   bool isCodeCompletion() const { return is(code_complete); }
63 
64   /// Returns true if the current token represents a code completion for the
65   /// "normal" token type.
66   bool isCodeCompletionFor(Kind kind) const;
67 
68   /// Returns true if the current token is the given type, or represents a code
69   /// completion for that type.
isOrIsCodeCompletionFor(Kind kind)70   bool isOrIsCodeCompletionFor(Kind kind) const {
71     return is(kind) || isCodeCompletionFor(kind);
72   }
73 
74   // Helpers to decode specific sorts of tokens.
75 
76   /// For an integer token, return its value as an unsigned.  If it doesn't fit,
77   /// return None.
78   Optional<unsigned> getUnsignedIntegerValue() const;
79 
80   /// For an integer token, return its value as an uint64_t.  If it doesn't fit,
81   /// return None.
82   static Optional<uint64_t> getUInt64IntegerValue(StringRef spelling);
getUInt64IntegerValue()83   Optional<uint64_t> getUInt64IntegerValue() const {
84     return getUInt64IntegerValue(getSpelling());
85   }
86 
87   /// For a floatliteral token, return its value as a double. Returns None in
88   /// the case of underflow or overflow.
89   Optional<double> getFloatingPointValue() const;
90 
91   /// For an inttype token, return its bitwidth.
92   Optional<unsigned> getIntTypeBitwidth() const;
93 
94   /// For an inttype token, return its signedness semantics: llvm::None means no
95   /// signedness semantics; true means signed integer type; false means unsigned
96   /// integer type.
97   Optional<bool> getIntTypeSignedness() const;
98 
99   /// Given a hash_identifier token like #123, try to parse the number out of
100   /// the identifier, returning None if it is a named identifier like #x or
101   /// if the integer doesn't fit.
102   Optional<unsigned> getHashIdentifierNumber() const;
103 
104   /// Given a token containing a string literal, return its value, including
105   /// removing the quote characters and unescaping the contents of the string.
106   std::string getStringValue() const;
107 
108   /// Given a token containing a hex string literal, return its value or None if
109   /// the token does not contain a valid hex string. A hex string literal is a
110   /// string starting with `0x` and only containing hex digits.
111   Optional<std::string> getHexStringValue() const;
112 
113   /// Given a token containing a symbol reference, return the unescaped string
114   /// value.
115   std::string getSymbolReference() const;
116 
117   // Location processing.
118   SMLoc getLoc() const;
119   SMLoc getEndLoc() const;
120   SMRange getLocRange() const;
121 
122   /// Given a punctuation or keyword token kind, return the spelling of the
123   /// token as a string.  Warning: This will abort on markers, identifiers and
124   /// literal tokens since they have no fixed spelling.
125   static StringRef getTokenSpelling(Kind kind);
126 
127 private:
128   /// Discriminator that indicates the sort of token this is.
129   Kind kind;
130 
131   /// A reference to the entire token contents; this is always a pointer into
132   /// a memory buffer owned by the source manager.
133   StringRef spelling;
134 };
135 
136 } // namespace mlir
137 
138 #endif // MLIR_LIB_PARSER_TOKEN_H
139