1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "token-sequence.h" 10 #include "flang/Parser/characters.h" 11 #include "llvm/Support/raw_ostream.h" 12 13 namespace Fortran::parser { 14 15 TokenSequence &TokenSequence::operator=(TokenSequence &&that) { 16 clear(); 17 swap(that); 18 return *this; 19 } 20 21 void TokenSequence::clear() { 22 start_.clear(); 23 nextStart_ = 0; 24 char_.clear(); 25 provenances_.clear(); 26 } 27 28 void TokenSequence::pop_back() { 29 std::size_t bytes{nextStart_ - start_.back()}; 30 nextStart_ = start_.back(); 31 start_.pop_back(); 32 char_.resize(nextStart_); 33 provenances_.RemoveLastBytes(bytes); 34 } 35 36 void TokenSequence::shrink_to_fit() { 37 start_.shrink_to_fit(); 38 char_.shrink_to_fit(); 39 provenances_.shrink_to_fit(); 40 } 41 42 void TokenSequence::swap(TokenSequence &that) { 43 start_.swap(that.start_); 44 std::swap(nextStart_, that.nextStart_); 45 char_.swap(that.char_); 46 provenances_.swap(that.provenances_); 47 } 48 49 std::size_t TokenSequence::SkipBlanks(std::size_t at) const { 50 std::size_t tokens{start_.size()}; 51 for (; at < tokens; ++at) { 52 if (!TokenAt(at).IsBlank()) { 53 return at; 54 } 55 } 56 return tokens; // even if at > tokens 57 } 58 59 void TokenSequence::RemoveLastToken() { 60 CHECK(!start_.empty()); 61 CHECK(nextStart_ > start_.back()); 62 std::size_t bytes{nextStart_ - start_.back()}; 63 nextStart_ = start_.back(); 64 start_.pop_back(); 65 char_.erase(char_.begin() + nextStart_, char_.end()); 66 provenances_.RemoveLastBytes(bytes); 67 } 68 69 void TokenSequence::Put(const TokenSequence &that) { 70 if (nextStart_ < char_.size()) { 71 start_.push_back(nextStart_); 72 } 73 int offset = char_.size(); 74 for (int st : that.start_) { 75 start_.push_back(st + offset); 76 } 77 char_.insert(char_.end(), that.char_.begin(), that.char_.end()); 78 nextStart_ = char_.size(); 79 provenances_.Put(that.provenances_); 80 } 81 82 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { 83 std::size_t offset{0}; 84 std::size_t tokens{that.SizeInTokens()}; 85 for (std::size_t j{0}; j < tokens; ++j) { 86 CharBlock tok{that.TokenAt(j)}; 87 Put(tok, range.OffsetMember(offset)); 88 offset += tok.size(); 89 } 90 CHECK(offset == range.size()); 91 } 92 93 void TokenSequence::Put( 94 const TokenSequence &that, std::size_t at, std::size_t tokens) { 95 ProvenanceRange provenance; 96 std::size_t offset{0}; 97 for (; tokens-- > 0; ++at) { 98 CharBlock tok{that.TokenAt(at)}; 99 std::size_t tokBytes{tok.size()}; 100 for (std::size_t j{0}; j < tokBytes; ++j) { 101 if (offset == provenance.size()) { 102 provenance = that.provenances_.Map(that.start_[at] + j); 103 offset = 0; 104 } 105 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); 106 } 107 CloseToken(); 108 } 109 } 110 111 void TokenSequence::Put( 112 const char *s, std::size_t bytes, Provenance provenance) { 113 for (std::size_t j{0}; j < bytes; ++j) { 114 PutNextTokenChar(s[j], provenance + j); 115 } 116 CloseToken(); 117 } 118 119 void TokenSequence::Put(const CharBlock &t, Provenance provenance) { 120 Put(&t[0], t.size(), provenance); 121 } 122 123 void TokenSequence::Put(const std::string &s, Provenance provenance) { 124 Put(s.data(), s.size(), provenance); 125 } 126 127 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) { 128 Put(ss.str(), provenance); 129 } 130 131 TokenSequence &TokenSequence::ToLowerCase() { 132 std::size_t tokens{start_.size()}; 133 std::size_t chars{char_.size()}; 134 std::size_t atToken{0}; 135 for (std::size_t j{0}; j < chars;) { 136 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; 137 char *p{&char_[j]}, *limit{&char_[nextStart]}; 138 j = nextStart; 139 if (IsDecimalDigit(*p)) { 140 while (p < limit && IsDecimalDigit(*p)) { 141 ++p; 142 } 143 if (p >= limit) { 144 } else if (*p == 'h' || *p == 'H') { 145 // Hollerith 146 *p = 'h'; 147 } else if (*p == '_') { 148 // kind-prefixed character literal (e.g., 1_"ABC") 149 } else { 150 // exponent 151 for (; p < limit; ++p) { 152 *p = ToLowerCaseLetter(*p); 153 } 154 } 155 } else if (limit[-1] == '\'' || limit[-1] == '"') { 156 if (*p == limit[-1]) { 157 // Character literal without prefix 158 } else if (p[1] == limit[-1]) { 159 // BOZX-prefixed constant 160 for (; p < limit; ++p) { 161 *p = ToLowerCaseLetter(*p); 162 } 163 } else { 164 // Literal with kind-param prefix name (e.g., K_"ABC"). 165 for (; *p != limit[-1]; ++p) { 166 *p = ToLowerCaseLetter(*p); 167 } 168 } 169 } else { 170 for (; p < limit; ++p) { 171 *p = ToLowerCaseLetter(*p); 172 } 173 } 174 } 175 return *this; 176 } 177 178 bool TokenSequence::HasBlanks(std::size_t firstChar) const { 179 std::size_t tokens{SizeInTokens()}; 180 for (std::size_t j{0}; j < tokens; ++j) { 181 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) { 182 return true; 183 } 184 } 185 return false; 186 } 187 188 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const { 189 std::size_t tokens{SizeInTokens()}; 190 bool lastWasBlank{false}; 191 for (std::size_t j{0}; j < tokens; ++j) { 192 bool isBlank{TokenAt(j).IsBlank()}; 193 if (isBlank && lastWasBlank && start_[j] >= firstChar) { 194 return true; 195 } 196 lastWasBlank = isBlank; 197 } 198 return false; 199 } 200 201 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) { 202 std::size_t tokens{SizeInTokens()}; 203 TokenSequence result; 204 for (std::size_t j{0}; j < tokens; ++j) { 205 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) { 206 result.Put(*this, j); 207 } 208 } 209 swap(result); 210 return *this; 211 } 212 213 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) { 214 std::size_t tokens{SizeInTokens()}; 215 TokenSequence result; 216 bool lastWasBlank{false}; 217 for (std::size_t j{0}; j < tokens; ++j) { 218 bool isBlank{TokenAt(j).IsBlank()}; 219 if (!isBlank || !lastWasBlank || start_[j] < firstChar) { 220 result.Put(*this, j); 221 } 222 lastWasBlank = isBlank; 223 } 224 swap(result); 225 return *this; 226 } 227 228 TokenSequence &TokenSequence::ClipComment(bool skipFirst) { 229 std::size_t tokens{SizeInTokens()}; 230 for (std::size_t j{0}; j < tokens; ++j) { 231 if (TokenAt(j).FirstNonBlank() == '!') { 232 if (skipFirst) { 233 skipFirst = false; 234 } else { 235 TokenSequence result; 236 if (j > 0) { 237 result.Put(*this, 0, j - 1); 238 } 239 swap(result); 240 return *this; 241 } 242 } 243 } 244 return *this; 245 } 246 247 void TokenSequence::Emit(CookedSource &cooked) const { 248 cooked.Put(&char_[0], char_.size()); 249 cooked.PutProvenanceMappings(provenances_); 250 } 251 252 void TokenSequence::Dump(llvm::raw_ostream &o) const { 253 o << "TokenSequence has " << char_.size() << " chars; nextStart_ " 254 << nextStart_ << '\n'; 255 for (std::size_t j{0}; j < start_.size(); ++j) { 256 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString() 257 << "'\n"; 258 } 259 } 260 261 Provenance TokenSequence::GetTokenProvenance( 262 std::size_t token, std::size_t offset) const { 263 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 264 return range.start(); 265 } 266 267 ProvenanceRange TokenSequence::GetTokenProvenanceRange( 268 std::size_t token, std::size_t offset) const { 269 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 270 return range.Prefix(TokenBytes(token) - offset); 271 } 272 273 ProvenanceRange TokenSequence::GetIntervalProvenanceRange( 274 std::size_t token, std::size_t tokens) const { 275 if (tokens == 0) { 276 return {}; 277 } 278 ProvenanceRange range{provenances_.Map(start_[token])}; 279 while (--tokens > 0 && 280 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) { 281 } 282 return range; 283 } 284 285 ProvenanceRange TokenSequence::GetProvenanceRange() const { 286 return GetIntervalProvenanceRange(0, start_.size()); 287 } 288 } // namespace Fortran::parser 289