1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "token-sequence.h" 10 #include "flang/Parser/characters.h" 11 #include "llvm/Support/raw_ostream.h" 12 13 namespace Fortran::parser { 14 15 TokenSequence &TokenSequence::operator=(TokenSequence &&that) { 16 clear(); 17 swap(that); 18 return *this; 19 } 20 21 void TokenSequence::clear() { 22 start_.clear(); 23 nextStart_ = 0; 24 char_.clear(); 25 provenances_.clear(); 26 } 27 28 void TokenSequence::pop_back() { 29 std::size_t bytes{nextStart_ - start_.back()}; 30 nextStart_ = start_.back(); 31 start_.pop_back(); 32 char_.resize(nextStart_); 33 provenances_.RemoveLastBytes(bytes); 34 } 35 36 void TokenSequence::shrink_to_fit() { 37 start_.shrink_to_fit(); 38 char_.shrink_to_fit(); 39 provenances_.shrink_to_fit(); 40 } 41 42 void TokenSequence::swap(TokenSequence &that) { 43 start_.swap(that.start_); 44 std::swap(nextStart_, that.nextStart_); 45 char_.swap(that.char_); 46 provenances_.swap(that.provenances_); 47 } 48 49 std::size_t TokenSequence::SkipBlanks(std::size_t at) const { 50 std::size_t tokens{start_.size()}; 51 for (; at < tokens; ++at) { 52 if (!TokenAt(at).IsBlank()) { 53 return at; 54 } 55 } 56 return tokens; // even if at > tokens 57 } 58 59 // C-style /*comments*/ are removed from preprocessing directive 60 // token sequences by the prescanner, but not C++ or Fortran 61 // free-form line-ending comments (//... and !...) because 62 // ignoring them is directive-specific. 63 bool TokenSequence::IsAnythingLeft(std::size_t at) const { 64 std::size_t tokens{start_.size()}; 65 for (; at < tokens; ++at) { 66 auto tok{TokenAt(at)}; 67 const char *end{tok.end()}; 68 for (const char *p{tok.begin()}; p < end; ++p) { 69 switch (*p) { 70 case '/': 71 return p + 1 >= end || p[1] != '/'; 72 case '!': 73 return false; 74 case ' ': 75 break; 76 default: 77 return true; 78 } 79 } 80 } 81 return false; 82 } 83 84 void TokenSequence::RemoveLastToken() { 85 CHECK(!start_.empty()); 86 CHECK(nextStart_ > start_.back()); 87 std::size_t bytes{nextStart_ - start_.back()}; 88 nextStart_ = start_.back(); 89 start_.pop_back(); 90 char_.erase(char_.begin() + nextStart_, char_.end()); 91 provenances_.RemoveLastBytes(bytes); 92 } 93 94 void TokenSequence::Put(const TokenSequence &that) { 95 if (nextStart_ < char_.size()) { 96 start_.push_back(nextStart_); 97 } 98 int offset = char_.size(); 99 for (int st : that.start_) { 100 start_.push_back(st + offset); 101 } 102 char_.insert(char_.end(), that.char_.begin(), that.char_.end()); 103 nextStart_ = char_.size(); 104 provenances_.Put(that.provenances_); 105 } 106 107 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { 108 std::size_t offset{0}; 109 std::size_t tokens{that.SizeInTokens()}; 110 for (std::size_t j{0}; j < tokens; ++j) { 111 CharBlock tok{that.TokenAt(j)}; 112 Put(tok, range.OffsetMember(offset)); 113 offset += tok.size(); 114 } 115 CHECK(offset == range.size()); 116 } 117 118 void TokenSequence::Put( 119 const TokenSequence &that, std::size_t at, std::size_t tokens) { 120 ProvenanceRange provenance; 121 std::size_t offset{0}; 122 for (; tokens-- > 0; ++at) { 123 CharBlock tok{that.TokenAt(at)}; 124 std::size_t tokBytes{tok.size()}; 125 for (std::size_t j{0}; j < tokBytes; ++j) { 126 if (offset == provenance.size()) { 127 provenance = that.provenances_.Map(that.start_[at] + j); 128 offset = 0; 129 } 130 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); 131 } 132 CloseToken(); 133 } 134 } 135 136 void TokenSequence::Put( 137 const char *s, std::size_t bytes, Provenance provenance) { 138 for (std::size_t j{0}; j < bytes; ++j) { 139 PutNextTokenChar(s[j], provenance + j); 140 } 141 CloseToken(); 142 } 143 144 void TokenSequence::Put(const CharBlock &t, Provenance provenance) { 145 Put(&t[0], t.size(), provenance); 146 } 147 148 void TokenSequence::Put(const std::string &s, Provenance provenance) { 149 Put(s.data(), s.size(), provenance); 150 } 151 152 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) { 153 Put(ss.str(), provenance); 154 } 155 156 TokenSequence &TokenSequence::ToLowerCase() { 157 std::size_t tokens{start_.size()}; 158 std::size_t chars{char_.size()}; 159 std::size_t atToken{0}; 160 for (std::size_t j{0}; j < chars;) { 161 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; 162 char *p{&char_[j]}, *limit{&char_[nextStart]}; 163 j = nextStart; 164 if (IsDecimalDigit(*p)) { 165 while (p < limit && IsDecimalDigit(*p)) { 166 ++p; 167 } 168 if (p >= limit) { 169 } else if (*p == 'h' || *p == 'H') { 170 // Hollerith 171 *p = 'h'; 172 } else if (*p == '_') { 173 // kind-prefixed character literal (e.g., 1_"ABC") 174 } else { 175 // exponent 176 for (; p < limit; ++p) { 177 *p = ToLowerCaseLetter(*p); 178 } 179 } 180 } else if (limit[-1] == '\'' || limit[-1] == '"') { 181 if (*p == limit[-1]) { 182 // Character literal without prefix 183 } else if (p[1] == limit[-1]) { 184 // BOZX-prefixed constant 185 for (; p < limit; ++p) { 186 *p = ToLowerCaseLetter(*p); 187 } 188 } else { 189 // Literal with kind-param prefix name (e.g., K_"ABC"). 190 for (; *p != limit[-1]; ++p) { 191 *p = ToLowerCaseLetter(*p); 192 } 193 } 194 } else { 195 for (; p < limit; ++p) { 196 *p = ToLowerCaseLetter(*p); 197 } 198 } 199 } 200 return *this; 201 } 202 203 bool TokenSequence::HasBlanks(std::size_t firstChar) const { 204 std::size_t tokens{SizeInTokens()}; 205 for (std::size_t j{0}; j < tokens; ++j) { 206 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) { 207 return true; 208 } 209 } 210 return false; 211 } 212 213 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const { 214 std::size_t tokens{SizeInTokens()}; 215 bool lastWasBlank{false}; 216 for (std::size_t j{0}; j < tokens; ++j) { 217 bool isBlank{TokenAt(j).IsBlank()}; 218 if (isBlank && lastWasBlank && start_[j] >= firstChar) { 219 return true; 220 } 221 lastWasBlank = isBlank; 222 } 223 return false; 224 } 225 226 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) { 227 std::size_t tokens{SizeInTokens()}; 228 TokenSequence result; 229 for (std::size_t j{0}; j < tokens; ++j) { 230 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) { 231 result.Put(*this, j); 232 } 233 } 234 swap(result); 235 return *this; 236 } 237 238 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) { 239 std::size_t tokens{SizeInTokens()}; 240 TokenSequence result; 241 bool lastWasBlank{false}; 242 for (std::size_t j{0}; j < tokens; ++j) { 243 bool isBlank{TokenAt(j).IsBlank()}; 244 if (!isBlank || !lastWasBlank || start_[j] < firstChar) { 245 result.Put(*this, j); 246 } 247 lastWasBlank = isBlank; 248 } 249 swap(result); 250 return *this; 251 } 252 253 TokenSequence &TokenSequence::ClipComment(bool skipFirst) { 254 std::size_t tokens{SizeInTokens()}; 255 for (std::size_t j{0}; j < tokens; ++j) { 256 if (TokenAt(j).FirstNonBlank() == '!') { 257 if (skipFirst) { 258 skipFirst = false; 259 } else { 260 TokenSequence result; 261 if (j > 0) { 262 result.Put(*this, 0, j - 1); 263 } 264 swap(result); 265 return *this; 266 } 267 } 268 } 269 return *this; 270 } 271 272 void TokenSequence::Emit(CookedSource &cooked) const { 273 cooked.Put(&char_[0], char_.size()); 274 cooked.PutProvenanceMappings(provenances_); 275 } 276 277 void TokenSequence::Dump(llvm::raw_ostream &o) const { 278 o << "TokenSequence has " << char_.size() << " chars; nextStart_ " 279 << nextStart_ << '\n'; 280 for (std::size_t j{0}; j < start_.size(); ++j) { 281 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString() 282 << "'\n"; 283 } 284 } 285 286 Provenance TokenSequence::GetTokenProvenance( 287 std::size_t token, std::size_t offset) const { 288 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 289 return range.start(); 290 } 291 292 ProvenanceRange TokenSequence::GetTokenProvenanceRange( 293 std::size_t token, std::size_t offset) const { 294 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 295 return range.Prefix(TokenBytes(token) - offset); 296 } 297 298 ProvenanceRange TokenSequence::GetIntervalProvenanceRange( 299 std::size_t token, std::size_t tokens) const { 300 if (tokens == 0) { 301 return {}; 302 } 303 ProvenanceRange range{provenances_.Map(start_[token])}; 304 while (--tokens > 0 && 305 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) { 306 } 307 return range; 308 } 309 310 ProvenanceRange TokenSequence::GetProvenanceRange() const { 311 return GetIntervalProvenanceRange(0, start_.size()); 312 } 313 } // namespace Fortran::parser 314