1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "llvm/Support/raw_ostream.h"
12 
13 namespace Fortran::parser {
14 
15 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
16   clear();
17   swap(that);
18   return *this;
19 }
20 
21 void TokenSequence::clear() {
22   start_.clear();
23   nextStart_ = 0;
24   char_.clear();
25   provenances_.clear();
26 }
27 
28 void TokenSequence::pop_back() {
29   std::size_t bytes{nextStart_ - start_.back()};
30   nextStart_ = start_.back();
31   start_.pop_back();
32   char_.resize(nextStart_);
33   provenances_.RemoveLastBytes(bytes);
34 }
35 
36 void TokenSequence::shrink_to_fit() {
37   start_.shrink_to_fit();
38   char_.shrink_to_fit();
39   provenances_.shrink_to_fit();
40 }
41 
42 void TokenSequence::swap(TokenSequence &that) {
43   start_.swap(that.start_);
44   std::swap(nextStart_, that.nextStart_);
45   char_.swap(that.char_);
46   provenances_.swap(that.provenances_);
47 }
48 
49 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
50   std::size_t tokens{start_.size()};
51   for (; at < tokens; ++at) {
52     if (!TokenAt(at).IsBlank()) {
53       return at;
54     }
55   }
56   return tokens; // even if at > tokens
57 }
58 
59 void TokenSequence::RemoveLastToken() {
60   CHECK(!start_.empty());
61   CHECK(nextStart_ > start_.back());
62   std::size_t bytes{nextStart_ - start_.back()};
63   nextStart_ = start_.back();
64   start_.pop_back();
65   char_.erase(char_.begin() + nextStart_, char_.end());
66   provenances_.RemoveLastBytes(bytes);
67 }
68 
69 void TokenSequence::Put(const TokenSequence &that) {
70   if (nextStart_ < char_.size()) {
71     start_.push_back(nextStart_);
72   }
73   int offset = char_.size();
74   for (int st : that.start_) {
75     start_.push_back(st + offset);
76   }
77   char_.insert(char_.end(), that.char_.begin(), that.char_.end());
78   nextStart_ = char_.size();
79   provenances_.Put(that.provenances_);
80 }
81 
82 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
83   std::size_t offset{0};
84   std::size_t tokens{that.SizeInTokens()};
85   for (std::size_t j{0}; j < tokens; ++j) {
86     CharBlock tok{that.TokenAt(j)};
87     Put(tok, range.OffsetMember(offset));
88     offset += tok.size();
89   }
90   CHECK(offset == range.size());
91 }
92 
93 void TokenSequence::Put(
94     const TokenSequence &that, std::size_t at, std::size_t tokens) {
95   ProvenanceRange provenance;
96   std::size_t offset{0};
97   for (; tokens-- > 0; ++at) {
98     CharBlock tok{that.TokenAt(at)};
99     std::size_t tokBytes{tok.size()};
100     for (std::size_t j{0}; j < tokBytes; ++j) {
101       if (offset == provenance.size()) {
102         provenance = that.provenances_.Map(that.start_[at] + j);
103         offset = 0;
104       }
105       PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
106     }
107     CloseToken();
108   }
109 }
110 
111 void TokenSequence::Put(
112     const char *s, std::size_t bytes, Provenance provenance) {
113   for (std::size_t j{0}; j < bytes; ++j) {
114     PutNextTokenChar(s[j], provenance + j);
115   }
116   CloseToken();
117 }
118 
119 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
120   Put(&t[0], t.size(), provenance);
121 }
122 
123 void TokenSequence::Put(const std::string &s, Provenance provenance) {
124   Put(s.data(), s.size(), provenance);
125 }
126 
127 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
128   Put(ss.str(), provenance);
129 }
130 
131 TokenSequence &TokenSequence::ToLowerCase() {
132   std::size_t tokens{start_.size()};
133   std::size_t chars{char_.size()};
134   std::size_t atToken{0};
135   for (std::size_t j{0}; j < chars;) {
136     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
137     char *p{&char_[j]}, *limit{&char_[nextStart]};
138     j = nextStart;
139     if (IsDecimalDigit(*p)) {
140       while (p < limit && IsDecimalDigit(*p)) {
141         ++p;
142       }
143       if (p >= limit) {
144       } else if (*p == 'h' || *p == 'H') {
145         // Hollerith
146         *p = 'h';
147       } else if (*p == '_') {
148         // kind-prefixed character literal (e.g., 1_"ABC")
149       } else {
150         // exponent
151         for (; p < limit; ++p) {
152           *p = ToLowerCaseLetter(*p);
153         }
154       }
155     } else if (limit[-1] == '\'' || limit[-1] == '"') {
156       if (*p == limit[-1]) {
157         // Character literal without prefix
158       } else if (p[1] == limit[-1]) {
159         // BOZX-prefixed constant
160         for (; p < limit; ++p) {
161           *p = ToLowerCaseLetter(*p);
162         }
163       } else {
164         // Literal with kind-param prefix name (e.g., K_"ABC").
165         for (; *p != limit[-1]; ++p) {
166           *p = ToLowerCaseLetter(*p);
167         }
168       }
169     } else {
170       for (; p < limit; ++p) {
171         *p = ToLowerCaseLetter(*p);
172       }
173     }
174   }
175   return *this;
176 }
177 
178 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
179   std::size_t tokens{SizeInTokens()};
180   for (std::size_t j{0}; j < tokens; ++j) {
181     if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
182       return true;
183     }
184   }
185   return false;
186 }
187 
188 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
189   std::size_t tokens{SizeInTokens()};
190   bool lastWasBlank{false};
191   for (std::size_t j{0}; j < tokens; ++j) {
192     bool isBlank{TokenAt(j).IsBlank()};
193     if (isBlank && lastWasBlank && start_[j] >= firstChar) {
194       return true;
195     }
196     lastWasBlank = isBlank;
197   }
198   return false;
199 }
200 
201 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
202   std::size_t tokens{SizeInTokens()};
203   TokenSequence result;
204   for (std::size_t j{0}; j < tokens; ++j) {
205     if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
206       result.Put(*this, j);
207     }
208   }
209   swap(result);
210   return *this;
211 }
212 
213 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
214   std::size_t tokens{SizeInTokens()};
215   TokenSequence result;
216   bool lastWasBlank{false};
217   for (std::size_t j{0}; j < tokens; ++j) {
218     bool isBlank{TokenAt(j).IsBlank()};
219     if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
220       result.Put(*this, j);
221     }
222     lastWasBlank = isBlank;
223   }
224   swap(result);
225   return *this;
226 }
227 
228 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
229   std::size_t tokens{SizeInTokens()};
230   for (std::size_t j{0}; j < tokens; ++j) {
231     if (TokenAt(j).FirstNonBlank() == '!') {
232       if (skipFirst) {
233         skipFirst = false;
234       } else {
235         TokenSequence result;
236         if (j > 0) {
237           result.Put(*this, 0, j - 1);
238         }
239         swap(result);
240         return *this;
241       }
242     }
243   }
244   return *this;
245 }
246 
247 void TokenSequence::Emit(CookedSource &cooked) const {
248   cooked.Put(&char_[0], char_.size());
249   cooked.PutProvenanceMappings(provenances_);
250 }
251 
252 void TokenSequence::Dump(llvm::raw_ostream &o) const {
253   o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
254     << nextStart_ << '\n';
255   for (std::size_t j{0}; j < start_.size(); ++j) {
256     o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
257       << "'\n";
258   }
259 }
260 
261 Provenance TokenSequence::GetTokenProvenance(
262     std::size_t token, std::size_t offset) const {
263   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
264   return range.start();
265 }
266 
267 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
268     std::size_t token, std::size_t offset) const {
269   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
270   return range.Prefix(TokenBytes(token) - offset);
271 }
272 
273 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
274     std::size_t token, std::size_t tokens) const {
275   if (tokens == 0) {
276     return {};
277   }
278   ProvenanceRange range{provenances_.Map(start_[token])};
279   while (--tokens > 0 &&
280       range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
281   }
282   return range;
283 }
284 
285 ProvenanceRange TokenSequence::GetProvenanceRange() const {
286   return GetIntervalProvenanceRange(0, start_.size());
287 }
288 } // namespace Fortran::parser
289