1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Token class for the MLIR textual form.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Token.h"
14 #include "llvm/ADT/StringExtras.h"
15 
16 using namespace mlir;
17 
getLoc() const18 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
19 
getEndLoc() const20 SMLoc Token::getEndLoc() const {
21   return SMLoc::getFromPointer(spelling.data() + spelling.size());
22 }
23 
getLocRange() const24 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
25 
26 /// For an integer token, return its value as an unsigned.  If it doesn't fit,
27 /// return None.
getUnsignedIntegerValue() const28 Optional<unsigned> Token::getUnsignedIntegerValue() const {
29   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
30 
31   unsigned result = 0;
32   if (spelling.getAsInteger(isHex ? 0 : 10, result))
33     return None;
34   return result;
35 }
36 
37 /// For an integer token, return its value as a uint64_t.  If it doesn't fit,
38 /// return None.
getUInt64IntegerValue(StringRef spelling)39 Optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
40   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
41 
42   uint64_t result = 0;
43   if (spelling.getAsInteger(isHex ? 0 : 10, result))
44     return None;
45   return result;
46 }
47 
48 /// For a floatliteral, return its value as a double. Return None if the value
49 /// underflows or overflows.
getFloatingPointValue() const50 Optional<double> Token::getFloatingPointValue() const {
51   double result = 0;
52   if (spelling.getAsDouble(result))
53     return None;
54   return result;
55 }
56 
57 /// For an inttype token, return its bitwidth.
getIntTypeBitwidth() const58 Optional<unsigned> Token::getIntTypeBitwidth() const {
59   assert(getKind() == inttype);
60   unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
61   unsigned result = 0;
62   if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
63     return None;
64   return result;
65 }
66 
getIntTypeSignedness() const67 Optional<bool> Token::getIntTypeSignedness() const {
68   assert(getKind() == inttype);
69   if (spelling[0] == 'i')
70     return llvm::None;
71   if (spelling[0] == 's')
72     return true;
73   assert(spelling[0] == 'u');
74   return false;
75 }
76 
77 /// Given a token containing a string literal, return its value, including
78 /// removing the quote characters and unescaping the contents of the string. The
79 /// lexer has already verified that this token is valid.
getStringValue() const80 std::string Token::getStringValue() const {
81   assert(getKind() == string || getKind() == code_complete ||
82          (getKind() == at_identifier && getSpelling()[1] == '"'));
83   // Start by dropping the quotes.
84   StringRef bytes = getSpelling().drop_front();
85   if (getKind() != Token::code_complete) {
86     bytes = bytes.drop_back();
87     if (getKind() == at_identifier)
88       bytes = bytes.drop_front();
89   }
90 
91   std::string result;
92   result.reserve(bytes.size());
93   for (unsigned i = 0, e = bytes.size(); i != e;) {
94     auto c = bytes[i++];
95     if (c != '\\') {
96       result.push_back(c);
97       continue;
98     }
99 
100     assert(i + 1 <= e && "invalid string should be caught by lexer");
101     auto c1 = bytes[i++];
102     switch (c1) {
103     case '"':
104     case '\\':
105       result.push_back(c1);
106       continue;
107     case 'n':
108       result.push_back('\n');
109       continue;
110     case 't':
111       result.push_back('\t');
112       continue;
113     default:
114       break;
115     }
116 
117     assert(i + 1 <= e && "invalid string should be caught by lexer");
118     auto c2 = bytes[i++];
119 
120     assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
121     result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
122   }
123 
124   return result;
125 }
126 
127 /// Given a token containing a hex string literal, return its value or None if
128 /// the token does not contain a valid hex string.
getHexStringValue() const129 Optional<std::string> Token::getHexStringValue() const {
130   assert(getKind() == string);
131 
132   // Get the internal string data, without the quotes.
133   StringRef bytes = getSpelling().drop_front().drop_back();
134 
135   // Try to extract the binary data from the hex string. We expect the hex
136   // string to start with `0x` and have an even number of hex nibbles (nibbles
137   // should come in pairs).
138   std::string hex;
139   if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
140       !llvm::tryGetFromHex(bytes, hex))
141     return llvm::None;
142   return hex;
143 }
144 
145 /// Given a token containing a symbol reference, return the unescaped string
146 /// value.
getSymbolReference() const147 std::string Token::getSymbolReference() const {
148   assert(is(Token::at_identifier) && "expected valid @-identifier");
149   StringRef nameStr = getSpelling().drop_front();
150 
151   // Check to see if the reference is a string literal, or a bare identifier.
152   if (nameStr.front() == '"')
153     return getStringValue();
154   return std::string(nameStr);
155 }
156 
157 /// Given a hash_identifier token like #123, try to parse the number out of
158 /// the identifier, returning None if it is a named identifier like #x or
159 /// if the integer doesn't fit.
getHashIdentifierNumber() const160 Optional<unsigned> Token::getHashIdentifierNumber() const {
161   assert(getKind() == hash_identifier);
162   unsigned result = 0;
163   if (spelling.drop_front().getAsInteger(10, result))
164     return None;
165   return result;
166 }
167 
168 /// Given a punctuation or keyword token kind, return the spelling of the
169 /// token as a string.  Warning: This will abort on markers, identifiers and
170 /// literal tokens since they have no fixed spelling.
getTokenSpelling(Kind kind)171 StringRef Token::getTokenSpelling(Kind kind) {
172   switch (kind) {
173   default:
174     llvm_unreachable("This token kind has no fixed spelling");
175 #define TOK_PUNCTUATION(NAME, SPELLING)                                        \
176   case NAME:                                                                   \
177     return SPELLING;
178 #define TOK_KEYWORD(SPELLING)                                                  \
179   case kw_##SPELLING:                                                          \
180     return #SPELLING;
181 #include "TokenKinds.def"
182   }
183 }
184 
185 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
isKeyword() const186 bool Token::isKeyword() const {
187   switch (kind) {
188   default:
189     return false;
190 #define TOK_KEYWORD(SPELLING)                                                  \
191   case kw_##SPELLING:                                                          \
192     return true;
193 #include "TokenKinds.def"
194   }
195 }
196 
isCodeCompletionFor(Kind kind) const197 bool Token::isCodeCompletionFor(Kind kind) const {
198   if (!isCodeCompletion() || spelling.empty())
199     return false;
200   switch (kind) {
201   case Kind::string:
202     return spelling[0] == '"';
203   case Kind::hash_identifier:
204     return spelling[0] == '#';
205   case Kind::percent_identifier:
206     return spelling[0] == '%';
207   case Kind::caret_identifier:
208     return spelling[0] == '^';
209   case Kind::exclamation_identifier:
210     return spelling[0] == '!';
211   default:
212     return false;
213   }
214 }
215