1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "llvm/Support/raw_ostream.h"
12 
13 namespace Fortran::parser {
14 
15 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
16   clear();
17   swap(that);
18   return *this;
19 }
20 
21 void TokenSequence::clear() {
22   start_.clear();
23   nextStart_ = 0;
24   char_.clear();
25   provenances_.clear();
26 }
27 
28 void TokenSequence::pop_back() {
29   std::size_t bytes{nextStart_ - start_.back()};
30   nextStart_ = start_.back();
31   start_.pop_back();
32   char_.resize(nextStart_);
33   provenances_.RemoveLastBytes(bytes);
34 }
35 
36 void TokenSequence::shrink_to_fit() {
37   start_.shrink_to_fit();
38   char_.shrink_to_fit();
39   provenances_.shrink_to_fit();
40 }
41 
42 void TokenSequence::swap(TokenSequence &that) {
43   start_.swap(that.start_);
44   std::swap(nextStart_, that.nextStart_);
45   char_.swap(that.char_);
46   provenances_.swap(that.provenances_);
47 }
48 
49 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
50   std::size_t tokens{start_.size()};
51   for (; at < tokens; ++at) {
52     if (!TokenAt(at).IsBlank()) {
53       return at;
54     }
55   }
56   return tokens; // even if at > tokens
57 }
58 
59 // C-style /*comments*/ are removed from preprocessing directive
60 // token sequences by the prescanner, but not C++ or Fortran
61 // free-form line-ending comments (//...  and !...) because
62 // ignoring them is directive-specific.
63 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
64   std::size_t tokens{start_.size()};
65   for (; at < tokens; ++at) {
66     auto tok{TokenAt(at)};
67     const char *end{tok.end()};
68     for (const char *p{tok.begin()}; p < end; ++p) {
69       switch (*p) {
70       case '/':
71         return p + 1 >= end || p[1] != '/';
72       case '!':
73         return false;
74       case ' ':
75         break;
76       default:
77         return true;
78       }
79     }
80   }
81   return false;
82 }
83 
84 void TokenSequence::RemoveLastToken() {
85   CHECK(!start_.empty());
86   CHECK(nextStart_ > start_.back());
87   std::size_t bytes{nextStart_ - start_.back()};
88   nextStart_ = start_.back();
89   start_.pop_back();
90   char_.erase(char_.begin() + nextStart_, char_.end());
91   provenances_.RemoveLastBytes(bytes);
92 }
93 
94 void TokenSequence::Put(const TokenSequence &that) {
95   if (nextStart_ < char_.size()) {
96     start_.push_back(nextStart_);
97   }
98   int offset = char_.size();
99   for (int st : that.start_) {
100     start_.push_back(st + offset);
101   }
102   char_.insert(char_.end(), that.char_.begin(), that.char_.end());
103   nextStart_ = char_.size();
104   provenances_.Put(that.provenances_);
105 }
106 
107 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
108   std::size_t offset{0};
109   std::size_t tokens{that.SizeInTokens()};
110   for (std::size_t j{0}; j < tokens; ++j) {
111     CharBlock tok{that.TokenAt(j)};
112     Put(tok, range.OffsetMember(offset));
113     offset += tok.size();
114   }
115   CHECK(offset == range.size());
116 }
117 
118 void TokenSequence::Put(
119     const TokenSequence &that, std::size_t at, std::size_t tokens) {
120   ProvenanceRange provenance;
121   std::size_t offset{0};
122   for (; tokens-- > 0; ++at) {
123     CharBlock tok{that.TokenAt(at)};
124     std::size_t tokBytes{tok.size()};
125     for (std::size_t j{0}; j < tokBytes; ++j) {
126       if (offset == provenance.size()) {
127         provenance = that.provenances_.Map(that.start_[at] + j);
128         offset = 0;
129       }
130       PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
131     }
132     CloseToken();
133   }
134 }
135 
136 void TokenSequence::Put(
137     const char *s, std::size_t bytes, Provenance provenance) {
138   for (std::size_t j{0}; j < bytes; ++j) {
139     PutNextTokenChar(s[j], provenance + j);
140   }
141   CloseToken();
142 }
143 
144 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
145   Put(&t[0], t.size(), provenance);
146 }
147 
148 void TokenSequence::Put(const std::string &s, Provenance provenance) {
149   Put(s.data(), s.size(), provenance);
150 }
151 
152 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
153   Put(ss.str(), provenance);
154 }
155 
156 TokenSequence &TokenSequence::ToLowerCase() {
157   std::size_t tokens{start_.size()};
158   std::size_t chars{char_.size()};
159   std::size_t atToken{0};
160   for (std::size_t j{0}; j < chars;) {
161     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
162     char *p{&char_[j]}, *limit{&char_[nextStart]};
163     j = nextStart;
164     if (IsDecimalDigit(*p)) {
165       while (p < limit && IsDecimalDigit(*p)) {
166         ++p;
167       }
168       if (p >= limit) {
169       } else if (*p == 'h' || *p == 'H') {
170         // Hollerith
171         *p = 'h';
172       } else if (*p == '_') {
173         // kind-prefixed character literal (e.g., 1_"ABC")
174       } else {
175         // exponent
176         for (; p < limit; ++p) {
177           *p = ToLowerCaseLetter(*p);
178         }
179       }
180     } else if (limit[-1] == '\'' || limit[-1] == '"') {
181       if (*p == limit[-1]) {
182         // Character literal without prefix
183       } else if (p[1] == limit[-1]) {
184         // BOZX-prefixed constant
185         for (; p < limit; ++p) {
186           *p = ToLowerCaseLetter(*p);
187         }
188       } else {
189         // Literal with kind-param prefix name (e.g., K_"ABC").
190         for (; *p != limit[-1]; ++p) {
191           *p = ToLowerCaseLetter(*p);
192         }
193       }
194     } else {
195       for (; p < limit; ++p) {
196         *p = ToLowerCaseLetter(*p);
197       }
198     }
199   }
200   return *this;
201 }
202 
203 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
204   std::size_t tokens{SizeInTokens()};
205   for (std::size_t j{0}; j < tokens; ++j) {
206     if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
207       return true;
208     }
209   }
210   return false;
211 }
212 
213 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
214   std::size_t tokens{SizeInTokens()};
215   bool lastWasBlank{false};
216   for (std::size_t j{0}; j < tokens; ++j) {
217     bool isBlank{TokenAt(j).IsBlank()};
218     if (isBlank && lastWasBlank && start_[j] >= firstChar) {
219       return true;
220     }
221     lastWasBlank = isBlank;
222   }
223   return false;
224 }
225 
226 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
227   std::size_t tokens{SizeInTokens()};
228   TokenSequence result;
229   for (std::size_t j{0}; j < tokens; ++j) {
230     if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
231       result.Put(*this, j);
232     }
233   }
234   swap(result);
235   return *this;
236 }
237 
238 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
239   std::size_t tokens{SizeInTokens()};
240   TokenSequence result;
241   bool lastWasBlank{false};
242   for (std::size_t j{0}; j < tokens; ++j) {
243     bool isBlank{TokenAt(j).IsBlank()};
244     if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
245       result.Put(*this, j);
246     }
247     lastWasBlank = isBlank;
248   }
249   swap(result);
250   return *this;
251 }
252 
253 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
254   std::size_t tokens{SizeInTokens()};
255   for (std::size_t j{0}; j < tokens; ++j) {
256     if (TokenAt(j).FirstNonBlank() == '!') {
257       if (skipFirst) {
258         skipFirst = false;
259       } else {
260         TokenSequence result;
261         if (j > 0) {
262           result.Put(*this, 0, j - 1);
263         }
264         swap(result);
265         return *this;
266       }
267     }
268   }
269   return *this;
270 }
271 
272 void TokenSequence::Emit(CookedSource &cooked) const {
273   cooked.Put(&char_[0], char_.size());
274   cooked.PutProvenanceMappings(provenances_);
275 }
276 
277 void TokenSequence::Dump(llvm::raw_ostream &o) const {
278   o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
279     << nextStart_ << '\n';
280   for (std::size_t j{0}; j < start_.size(); ++j) {
281     o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
282       << "'\n";
283   }
284 }
285 
286 Provenance TokenSequence::GetTokenProvenance(
287     std::size_t token, std::size_t offset) const {
288   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
289   return range.start();
290 }
291 
292 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
293     std::size_t token, std::size_t offset) const {
294   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
295   return range.Prefix(TokenBytes(token) - offset);
296 }
297 
298 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
299     std::size_t token, std::size_t tokens) const {
300   if (tokens == 0) {
301     return {};
302   }
303   ProvenanceRange range{provenances_.Map(start_[token])};
304   while (--tokens > 0 &&
305       range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
306   }
307   return range;
308 }
309 
310 ProvenanceRange TokenSequence::GetProvenanceRange() const {
311   return GetIntervalProvenanceRange(0, start_.size());
312 }
313 } // namespace Fortran::parser
314