1*c60b897dSRiver Riddle //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2*c60b897dSRiver Riddle //
3*c60b897dSRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*c60b897dSRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
5*c60b897dSRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*c60b897dSRiver Riddle //
7*c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
8*c60b897dSRiver Riddle //
9*c60b897dSRiver Riddle // This file implements the Token class for the MLIR textual form.
10*c60b897dSRiver Riddle //
11*c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
12*c60b897dSRiver Riddle 
13*c60b897dSRiver Riddle #include "Token.h"
14*c60b897dSRiver Riddle #include "llvm/ADT/StringExtras.h"
15*c60b897dSRiver Riddle 
16*c60b897dSRiver Riddle using namespace mlir;
17*c60b897dSRiver Riddle 
getLoc() const18*c60b897dSRiver Riddle SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
19*c60b897dSRiver Riddle 
getEndLoc() const20*c60b897dSRiver Riddle SMLoc Token::getEndLoc() const {
21*c60b897dSRiver Riddle   return SMLoc::getFromPointer(spelling.data() + spelling.size());
22*c60b897dSRiver Riddle }
23*c60b897dSRiver Riddle 
getLocRange() const24*c60b897dSRiver Riddle SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
25*c60b897dSRiver Riddle 
26*c60b897dSRiver Riddle /// For an integer token, return its value as an unsigned.  If it doesn't fit,
27*c60b897dSRiver Riddle /// return None.
getUnsignedIntegerValue() const28*c60b897dSRiver Riddle Optional<unsigned> Token::getUnsignedIntegerValue() const {
29*c60b897dSRiver Riddle   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
30*c60b897dSRiver Riddle 
31*c60b897dSRiver Riddle   unsigned result = 0;
32*c60b897dSRiver Riddle   if (spelling.getAsInteger(isHex ? 0 : 10, result))
33*c60b897dSRiver Riddle     return None;
34*c60b897dSRiver Riddle   return result;
35*c60b897dSRiver Riddle }
36*c60b897dSRiver Riddle 
37*c60b897dSRiver Riddle /// For an integer token, return its value as a uint64_t.  If it doesn't fit,
38*c60b897dSRiver Riddle /// return None.
getUInt64IntegerValue(StringRef spelling)39*c60b897dSRiver Riddle Optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
40*c60b897dSRiver Riddle   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
41*c60b897dSRiver Riddle 
42*c60b897dSRiver Riddle   uint64_t result = 0;
43*c60b897dSRiver Riddle   if (spelling.getAsInteger(isHex ? 0 : 10, result))
44*c60b897dSRiver Riddle     return None;
45*c60b897dSRiver Riddle   return result;
46*c60b897dSRiver Riddle }
47*c60b897dSRiver Riddle 
48*c60b897dSRiver Riddle /// For a floatliteral, return its value as a double. Return None if the value
49*c60b897dSRiver Riddle /// underflows or overflows.
getFloatingPointValue() const50*c60b897dSRiver Riddle Optional<double> Token::getFloatingPointValue() const {
51*c60b897dSRiver Riddle   double result = 0;
52*c60b897dSRiver Riddle   if (spelling.getAsDouble(result))
53*c60b897dSRiver Riddle     return None;
54*c60b897dSRiver Riddle   return result;
55*c60b897dSRiver Riddle }
56*c60b897dSRiver Riddle 
57*c60b897dSRiver Riddle /// For an inttype token, return its bitwidth.
getIntTypeBitwidth() const58*c60b897dSRiver Riddle Optional<unsigned> Token::getIntTypeBitwidth() const {
59*c60b897dSRiver Riddle   assert(getKind() == inttype);
60*c60b897dSRiver Riddle   unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
61*c60b897dSRiver Riddle   unsigned result = 0;
62*c60b897dSRiver Riddle   if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
63*c60b897dSRiver Riddle     return None;
64*c60b897dSRiver Riddle   return result;
65*c60b897dSRiver Riddle }
66*c60b897dSRiver Riddle 
getIntTypeSignedness() const67*c60b897dSRiver Riddle Optional<bool> Token::getIntTypeSignedness() const {
68*c60b897dSRiver Riddle   assert(getKind() == inttype);
69*c60b897dSRiver Riddle   if (spelling[0] == 'i')
70*c60b897dSRiver Riddle     return llvm::None;
71*c60b897dSRiver Riddle   if (spelling[0] == 's')
72*c60b897dSRiver Riddle     return true;
73*c60b897dSRiver Riddle   assert(spelling[0] == 'u');
74*c60b897dSRiver Riddle   return false;
75*c60b897dSRiver Riddle }
76*c60b897dSRiver Riddle 
77*c60b897dSRiver Riddle /// Given a token containing a string literal, return its value, including
78*c60b897dSRiver Riddle /// removing the quote characters and unescaping the contents of the string. The
79*c60b897dSRiver Riddle /// lexer has already verified that this token is valid.
getStringValue() const80*c60b897dSRiver Riddle std::string Token::getStringValue() const {
81*c60b897dSRiver Riddle   assert(getKind() == string || getKind() == code_complete ||
82*c60b897dSRiver Riddle          (getKind() == at_identifier && getSpelling()[1] == '"'));
83*c60b897dSRiver Riddle   // Start by dropping the quotes.
84*c60b897dSRiver Riddle   StringRef bytes = getSpelling().drop_front();
85*c60b897dSRiver Riddle   if (getKind() != Token::code_complete) {
86*c60b897dSRiver Riddle     bytes = bytes.drop_back();
87*c60b897dSRiver Riddle     if (getKind() == at_identifier)
88*c60b897dSRiver Riddle       bytes = bytes.drop_front();
89*c60b897dSRiver Riddle   }
90*c60b897dSRiver Riddle 
91*c60b897dSRiver Riddle   std::string result;
92*c60b897dSRiver Riddle   result.reserve(bytes.size());
93*c60b897dSRiver Riddle   for (unsigned i = 0, e = bytes.size(); i != e;) {
94*c60b897dSRiver Riddle     auto c = bytes[i++];
95*c60b897dSRiver Riddle     if (c != '\\') {
96*c60b897dSRiver Riddle       result.push_back(c);
97*c60b897dSRiver Riddle       continue;
98*c60b897dSRiver Riddle     }
99*c60b897dSRiver Riddle 
100*c60b897dSRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
101*c60b897dSRiver Riddle     auto c1 = bytes[i++];
102*c60b897dSRiver Riddle     switch (c1) {
103*c60b897dSRiver Riddle     case '"':
104*c60b897dSRiver Riddle     case '\\':
105*c60b897dSRiver Riddle       result.push_back(c1);
106*c60b897dSRiver Riddle       continue;
107*c60b897dSRiver Riddle     case 'n':
108*c60b897dSRiver Riddle       result.push_back('\n');
109*c60b897dSRiver Riddle       continue;
110*c60b897dSRiver Riddle     case 't':
111*c60b897dSRiver Riddle       result.push_back('\t');
112*c60b897dSRiver Riddle       continue;
113*c60b897dSRiver Riddle     default:
114*c60b897dSRiver Riddle       break;
115*c60b897dSRiver Riddle     }
116*c60b897dSRiver Riddle 
117*c60b897dSRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
118*c60b897dSRiver Riddle     auto c2 = bytes[i++];
119*c60b897dSRiver Riddle 
120*c60b897dSRiver Riddle     assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
121*c60b897dSRiver Riddle     result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
122*c60b897dSRiver Riddle   }
123*c60b897dSRiver Riddle 
124*c60b897dSRiver Riddle   return result;
125*c60b897dSRiver Riddle }
126*c60b897dSRiver Riddle 
127*c60b897dSRiver Riddle /// Given a token containing a hex string literal, return its value or None if
128*c60b897dSRiver Riddle /// the token does not contain a valid hex string.
getHexStringValue() const129*c60b897dSRiver Riddle Optional<std::string> Token::getHexStringValue() const {
130*c60b897dSRiver Riddle   assert(getKind() == string);
131*c60b897dSRiver Riddle 
132*c60b897dSRiver Riddle   // Get the internal string data, without the quotes.
133*c60b897dSRiver Riddle   StringRef bytes = getSpelling().drop_front().drop_back();
134*c60b897dSRiver Riddle 
135*c60b897dSRiver Riddle   // Try to extract the binary data from the hex string. We expect the hex
136*c60b897dSRiver Riddle   // string to start with `0x` and have an even number of hex nibbles (nibbles
137*c60b897dSRiver Riddle   // should come in pairs).
138*c60b897dSRiver Riddle   std::string hex;
139*c60b897dSRiver Riddle   if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
140*c60b897dSRiver Riddle       !llvm::tryGetFromHex(bytes, hex))
141*c60b897dSRiver Riddle     return llvm::None;
142*c60b897dSRiver Riddle   return hex;
143*c60b897dSRiver Riddle }
144*c60b897dSRiver Riddle 
145*c60b897dSRiver Riddle /// Given a token containing a symbol reference, return the unescaped string
146*c60b897dSRiver Riddle /// value.
getSymbolReference() const147*c60b897dSRiver Riddle std::string Token::getSymbolReference() const {
148*c60b897dSRiver Riddle   assert(is(Token::at_identifier) && "expected valid @-identifier");
149*c60b897dSRiver Riddle   StringRef nameStr = getSpelling().drop_front();
150*c60b897dSRiver Riddle 
151*c60b897dSRiver Riddle   // Check to see if the reference is a string literal, or a bare identifier.
152*c60b897dSRiver Riddle   if (nameStr.front() == '"')
153*c60b897dSRiver Riddle     return getStringValue();
154*c60b897dSRiver Riddle   return std::string(nameStr);
155*c60b897dSRiver Riddle }
156*c60b897dSRiver Riddle 
157*c60b897dSRiver Riddle /// Given a hash_identifier token like #123, try to parse the number out of
158*c60b897dSRiver Riddle /// the identifier, returning None if it is a named identifier like #x or
159*c60b897dSRiver Riddle /// if the integer doesn't fit.
getHashIdentifierNumber() const160*c60b897dSRiver Riddle Optional<unsigned> Token::getHashIdentifierNumber() const {
161*c60b897dSRiver Riddle   assert(getKind() == hash_identifier);
162*c60b897dSRiver Riddle   unsigned result = 0;
163*c60b897dSRiver Riddle   if (spelling.drop_front().getAsInteger(10, result))
164*c60b897dSRiver Riddle     return None;
165*c60b897dSRiver Riddle   return result;
166*c60b897dSRiver Riddle }
167*c60b897dSRiver Riddle 
168*c60b897dSRiver Riddle /// Given a punctuation or keyword token kind, return the spelling of the
169*c60b897dSRiver Riddle /// token as a string.  Warning: This will abort on markers, identifiers and
170*c60b897dSRiver Riddle /// literal tokens since they have no fixed spelling.
getTokenSpelling(Kind kind)171*c60b897dSRiver Riddle StringRef Token::getTokenSpelling(Kind kind) {
172*c60b897dSRiver Riddle   switch (kind) {
173*c60b897dSRiver Riddle   default:
174*c60b897dSRiver Riddle     llvm_unreachable("This token kind has no fixed spelling");
175*c60b897dSRiver Riddle #define TOK_PUNCTUATION(NAME, SPELLING)                                        \
176*c60b897dSRiver Riddle   case NAME:                                                                   \
177*c60b897dSRiver Riddle     return SPELLING;
178*c60b897dSRiver Riddle #define TOK_KEYWORD(SPELLING)                                                  \
179*c60b897dSRiver Riddle   case kw_##SPELLING:                                                          \
180*c60b897dSRiver Riddle     return #SPELLING;
181*c60b897dSRiver Riddle #include "TokenKinds.def"
182*c60b897dSRiver Riddle   }
183*c60b897dSRiver Riddle }
184*c60b897dSRiver Riddle 
185*c60b897dSRiver Riddle /// Return true if this is one of the keyword token kinds (e.g. kw_if).
isKeyword() const186*c60b897dSRiver Riddle bool Token::isKeyword() const {
187*c60b897dSRiver Riddle   switch (kind) {
188*c60b897dSRiver Riddle   default:
189*c60b897dSRiver Riddle     return false;
190*c60b897dSRiver Riddle #define TOK_KEYWORD(SPELLING)                                                  \
191*c60b897dSRiver Riddle   case kw_##SPELLING:                                                          \
192*c60b897dSRiver Riddle     return true;
193*c60b897dSRiver Riddle #include "TokenKinds.def"
194*c60b897dSRiver Riddle   }
195*c60b897dSRiver Riddle }
196*c60b897dSRiver Riddle 
isCodeCompletionFor(Kind kind) const197*c60b897dSRiver Riddle bool Token::isCodeCompletionFor(Kind kind) const {
198*c60b897dSRiver Riddle   if (!isCodeCompletion() || spelling.empty())
199*c60b897dSRiver Riddle     return false;
200*c60b897dSRiver Riddle   switch (kind) {
201*c60b897dSRiver Riddle   case Kind::string:
202*c60b897dSRiver Riddle     return spelling[0] == '"';
203*c60b897dSRiver Riddle   case Kind::hash_identifier:
204*c60b897dSRiver Riddle     return spelling[0] == '#';
205*c60b897dSRiver Riddle   case Kind::percent_identifier:
206*c60b897dSRiver Riddle     return spelling[0] == '%';
207*c60b897dSRiver Riddle   case Kind::caret_identifier:
208*c60b897dSRiver Riddle     return spelling[0] == '^';
209*c60b897dSRiver Riddle   case Kind::exclamation_identifier:
210*c60b897dSRiver Riddle     return spelling[0] == '!';
211*c60b897dSRiver Riddle   default:
212*c60b897dSRiver Riddle     return false;
213*c60b897dSRiver Riddle   }
214*c60b897dSRiver Riddle }
215