1 //===- Token.cpp - MLIR Token Implementation ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Token class for the MLIR textual form. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "Token.h" 14 #include "llvm/ADT/StringExtras.h" 15 16 using namespace mlir; 17 18 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); } 19 20 SMLoc Token::getEndLoc() const { 21 return SMLoc::getFromPointer(spelling.data() + spelling.size()); 22 } 23 24 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); } 25 26 /// For an integer token, return its value as an unsigned. If it doesn't fit, 27 /// return None. 28 Optional<unsigned> Token::getUnsignedIntegerValue() const { 29 bool isHex = spelling.size() > 1 && spelling[1] == 'x'; 30 31 unsigned result = 0; 32 if (spelling.getAsInteger(isHex ? 0 : 10, result)) 33 return None; 34 return result; 35 } 36 37 /// For an integer token, return its value as a uint64_t. If it doesn't fit, 38 /// return None. 39 Optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) { 40 bool isHex = spelling.size() > 1 && spelling[1] == 'x'; 41 42 uint64_t result = 0; 43 if (spelling.getAsInteger(isHex ? 0 : 10, result)) 44 return None; 45 return result; 46 } 47 48 /// For a floatliteral, return its value as a double. Return None if the value 49 /// underflows or overflows. 50 Optional<double> Token::getFloatingPointValue() const { 51 double result = 0; 52 if (spelling.getAsDouble(result)) 53 return None; 54 return result; 55 } 56 57 /// For an inttype token, return its bitwidth. 58 Optional<unsigned> Token::getIntTypeBitwidth() const { 59 assert(getKind() == inttype); 60 unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2); 61 unsigned result = 0; 62 if (spelling.drop_front(bitwidthStart).getAsInteger(10, result)) 63 return None; 64 return result; 65 } 66 67 Optional<bool> Token::getIntTypeSignedness() const { 68 assert(getKind() == inttype); 69 if (spelling[0] == 'i') 70 return llvm::None; 71 if (spelling[0] == 's') 72 return true; 73 assert(spelling[0] == 'u'); 74 return false; 75 } 76 77 /// Given a token containing a string literal, return its value, including 78 /// removing the quote characters and unescaping the contents of the string. The 79 /// lexer has already verified that this token is valid. 80 std::string Token::getStringValue() const { 81 assert(getKind() == string || getKind() == code_complete || 82 (getKind() == at_identifier && getSpelling()[1] == '"')); 83 // Start by dropping the quotes. 84 StringRef bytes = getSpelling().drop_front(); 85 if (getKind() != Token::code_complete) { 86 bytes = bytes.drop_back(); 87 if (getKind() == at_identifier) 88 bytes = bytes.drop_front(); 89 } 90 91 std::string result; 92 result.reserve(bytes.size()); 93 for (unsigned i = 0, e = bytes.size(); i != e;) { 94 auto c = bytes[i++]; 95 if (c != '\\') { 96 result.push_back(c); 97 continue; 98 } 99 100 assert(i + 1 <= e && "invalid string should be caught by lexer"); 101 auto c1 = bytes[i++]; 102 switch (c1) { 103 case '"': 104 case '\\': 105 result.push_back(c1); 106 continue; 107 case 'n': 108 result.push_back('\n'); 109 continue; 110 case 't': 111 result.push_back('\t'); 112 continue; 113 default: 114 break; 115 } 116 117 assert(i + 1 <= e && "invalid string should be caught by lexer"); 118 auto c2 = bytes[i++]; 119 120 assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape"); 121 result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2)); 122 } 123 124 return result; 125 } 126 127 /// Given a token containing a hex string literal, return its value or None if 128 /// the token does not contain a valid hex string. 129 Optional<std::string> Token::getHexStringValue() const { 130 assert(getKind() == string); 131 132 // Get the internal string data, without the quotes. 133 StringRef bytes = getSpelling().drop_front().drop_back(); 134 135 // Try to extract the binary data from the hex string. We expect the hex 136 // string to start with `0x` and have an even number of hex nibbles (nibbles 137 // should come in pairs). 138 std::string hex; 139 if (!bytes.consume_front("0x") || (bytes.size() & 1) || 140 !llvm::tryGetFromHex(bytes, hex)) 141 return llvm::None; 142 return hex; 143 } 144 145 /// Given a token containing a symbol reference, return the unescaped string 146 /// value. 147 std::string Token::getSymbolReference() const { 148 assert(is(Token::at_identifier) && "expected valid @-identifier"); 149 StringRef nameStr = getSpelling().drop_front(); 150 151 // Check to see if the reference is a string literal, or a bare identifier. 152 if (nameStr.front() == '"') 153 return getStringValue(); 154 return std::string(nameStr); 155 } 156 157 /// Given a hash_identifier token like #123, try to parse the number out of 158 /// the identifier, returning None if it is a named identifier like #x or 159 /// if the integer doesn't fit. 160 Optional<unsigned> Token::getHashIdentifierNumber() const { 161 assert(getKind() == hash_identifier); 162 unsigned result = 0; 163 if (spelling.drop_front().getAsInteger(10, result)) 164 return None; 165 return result; 166 } 167 168 /// Given a punctuation or keyword token kind, return the spelling of the 169 /// token as a string. Warning: This will abort on markers, identifiers and 170 /// literal tokens since they have no fixed spelling. 171 StringRef Token::getTokenSpelling(Kind kind) { 172 switch (kind) { 173 default: 174 llvm_unreachable("This token kind has no fixed spelling"); 175 #define TOK_PUNCTUATION(NAME, SPELLING) \ 176 case NAME: \ 177 return SPELLING; 178 #define TOK_KEYWORD(SPELLING) \ 179 case kw_##SPELLING: \ 180 return #SPELLING; 181 #include "TokenKinds.def" 182 } 183 } 184 185 /// Return true if this is one of the keyword token kinds (e.g. kw_if). 186 bool Token::isKeyword() const { 187 switch (kind) { 188 default: 189 return false; 190 #define TOK_KEYWORD(SPELLING) \ 191 case kw_##SPELLING: \ 192 return true; 193 #include "TokenKinds.def" 194 } 195 } 196 197 bool Token::isCodeCompletionFor(Kind kind) const { 198 if (!isCodeCompletion() || spelling.empty()) 199 return false; 200 switch (kind) { 201 case Kind::string: 202 return spelling[0] == '"'; 203 case Kind::hash_identifier: 204 return spelling[0] == '#'; 205 case Kind::percent_identifier: 206 return spelling[0] == '%'; 207 case Kind::caret_identifier: 208 return spelling[0] == '^'; 209 case Kind::exclamation_identifier: 210 return spelling[0] == '!'; 211 default: 212 return false; 213 } 214 } 215