1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 // Implements out-of-line member functions of template class FormatControl
10
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13
14 #include "format.h"
15 #include "io-stmt.h"
16 #include "flang/Common/format.h"
17 #include "flang/Decimal/decimal.h"
18 #include "flang/Runtime/main.h"
19 #include <algorithm>
20 #include <limits>
21
22 namespace Fortran::runtime::io {
23
24 template <typename CONTEXT>
FormatControl(const Terminator & terminator,const CharType * format,std::size_t formatLength,int maxHeight)25 FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
26 const CharType *format, std::size_t formatLength, int maxHeight)
27 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
28 formatLength_{static_cast<int>(formatLength)} {
29 RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
30 RUNTIME_CHECK(
31 terminator, formatLength == static_cast<std::size_t>(formatLength_));
32 stack_[0].start = offset_;
33 stack_[0].remaining = Iteration::unlimited; // 13.4(8)
34 }
35
36 template <typename CONTEXT>
GetIntField(IoErrorHandler & handler,CharType firstCh)37 int FormatControl<CONTEXT>::GetIntField(
38 IoErrorHandler &handler, CharType firstCh) {
39 CharType ch{firstCh ? firstCh : PeekNext()};
40 if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
41 handler.SignalError(IostatErrorInFormat,
42 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
43 return 0;
44 }
45 int result{0};
46 bool negate{ch == '-'};
47 if (negate || ch == '+') {
48 if (firstCh) {
49 firstCh = '\0';
50 } else {
51 ++offset_;
52 }
53 ch = PeekNext();
54 }
55 while (ch >= '0' && ch <= '9') {
56 if (result >
57 std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) {
58 handler.SignalError(
59 IostatErrorInFormat, "FORMAT integer field out of range");
60 return result;
61 }
62 result = 10 * result + ch - '0';
63 if (firstCh) {
64 firstCh = '\0';
65 } else {
66 ++offset_;
67 }
68 ch = PeekNext();
69 }
70 if (negate && (result *= -1) > 0) {
71 handler.SignalError(
72 IostatErrorInFormat, "FORMAT integer field out of range");
73 }
74 return result;
75 }
76
77 template <typename CONTEXT>
HandleControl(CONTEXT & context,char ch,char next,int n)78 static void HandleControl(CONTEXT &context, char ch, char next, int n) {
79 MutableModes &modes{context.mutableModes()};
80 switch (ch) {
81 case 'B':
82 if (next == 'Z') {
83 modes.editingFlags |= blankZero;
84 return;
85 }
86 if (next == 'N') {
87 modes.editingFlags &= ~blankZero;
88 return;
89 }
90 break;
91 case 'D':
92 if (next == 'C') {
93 modes.editingFlags |= decimalComma;
94 return;
95 }
96 if (next == 'P') {
97 modes.editingFlags &= ~decimalComma;
98 return;
99 }
100 break;
101 case 'P':
102 if (!next) {
103 modes.scale = n; // kP - decimal scaling by 10**k
104 return;
105 }
106 break;
107 case 'R':
108 switch (next) {
109 case 'N':
110 modes.round = decimal::RoundNearest;
111 return;
112 case 'Z':
113 modes.round = decimal::RoundToZero;
114 return;
115 case 'U':
116 modes.round = decimal::RoundUp;
117 return;
118 case 'D':
119 modes.round = decimal::RoundDown;
120 return;
121 case 'C':
122 modes.round = decimal::RoundCompatible;
123 return;
124 case 'P':
125 modes.round = executionEnvironment.defaultOutputRoundingMode;
126 return;
127 default:
128 break;
129 }
130 break;
131 case 'X':
132 if (!next) {
133 context.HandleRelativePosition(n);
134 return;
135 }
136 break;
137 case 'S':
138 if (next == 'P') {
139 modes.editingFlags |= signPlus;
140 return;
141 }
142 if (!next || next == 'S') {
143 modes.editingFlags &= ~signPlus;
144 return;
145 }
146 break;
147 case 'T': {
148 if (!next) { // Tn
149 context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based
150 return;
151 }
152 if (next == 'L' || next == 'R') { // TLn & TRn
153 context.HandleRelativePosition(next == 'L' ? -n : n);
154 return;
155 }
156 } break;
157 default:
158 break;
159 }
160 if (next) {
161 context.SignalError(IostatErrorInFormat,
162 "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
163 } else {
164 context.SignalError(
165 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
166 }
167 }
168
169 // Locates the next data edit descriptor in the format.
170 // Handles all repetition counts and control edit descriptors.
171 // Generally assumes that the format string has survived the common
172 // format validator gauntlet.
173 template <typename CONTEXT>
CueUpNextDataEdit(Context & context,bool stop)174 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
175 int unlimitedLoopCheck{-1};
176 // Do repetitions remain on an unparenthesized data edit?
177 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
178 offset_ = stack_[height_ - 1].start;
179 int repeat{stack_[height_ - 1].remaining};
180 --height_;
181 if (repeat > 0) {
182 return repeat;
183 }
184 }
185 while (true) {
186 std::optional<int> repeat;
187 bool unlimited{false};
188 auto maybeReversionPoint{offset_};
189 CharType ch{GetNextChar(context)};
190 while (ch == ',' || ch == ':') {
191 // Skip commas, and don't complain if they're missing; the format
192 // validator does that.
193 if (stop && ch == ':') {
194 return 0;
195 }
196 ch = GetNextChar(context);
197 }
198 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
199 repeat = GetIntField(context, ch);
200 ch = GetNextChar(context);
201 } else if (ch == '*') {
202 unlimited = true;
203 ch = GetNextChar(context);
204 if (ch != '(') {
205 ReportBadFormat(context,
206 "Invalid FORMAT: '*' may appear only before '('",
207 maybeReversionPoint);
208 return 0;
209 }
210 if (height_ != 1) {
211 ReportBadFormat(context,
212 "Invalid FORMAT: '*' must be nested in exactly one set of "
213 "parentheses",
214 maybeReversionPoint);
215 return 0;
216 }
217 }
218 ch = Capitalize(ch);
219 if (ch == '(') {
220 if (height_ >= maxHeight_) {
221 ReportBadFormat(context,
222 "FORMAT stack overflow: too many nested parentheses",
223 maybeReversionPoint);
224 return 0;
225 }
226 stack_[height_].start = offset_ - 1; // the '('
227 RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
228 if (unlimited || height_ == 0) {
229 stack_[height_].remaining = Iteration::unlimited;
230 unlimitedLoopCheck = offset_ - 1;
231 } else if (repeat) {
232 if (*repeat <= 0) {
233 *repeat = 1; // error recovery
234 }
235 stack_[height_].remaining = *repeat - 1;
236 } else {
237 stack_[height_].remaining = 0;
238 }
239 if (height_ == 1) {
240 // Subtle point (F'2018 13.4 para 9): tha last parenthesized group
241 // at height 1 becomes the restart point after control reaches the
242 // end of the format, including its repeat count.
243 stack_[0].start = maybeReversionPoint;
244 }
245 ++height_;
246 } else if (height_ == 0) {
247 ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
248 return 0;
249 } else if (ch == ')') {
250 if (height_ == 1) {
251 if (stop) {
252 return 0; // end of FORMAT and no data items remain
253 }
254 context.AdvanceRecord(); // implied / before rightmost )
255 }
256 auto restart{stack_[height_ - 1].start};
257 if (format_[restart] == '(') {
258 ++restart;
259 }
260 if (stack_[height_ - 1].remaining == Iteration::unlimited) {
261 if (height_ > 1 && GetNextChar(context) != ')') {
262 ReportBadFormat(context,
263 "Unlimited repetition in FORMAT may not be followed by more "
264 "items",
265 restart);
266 return 0;
267 }
268 if (offset_ == unlimitedLoopCheck) {
269 ReportBadFormat(context,
270 "Unlimited repetition in FORMAT lacks data edit descriptors",
271 restart);
272 return 0;
273 }
274 offset_ = restart;
275 } else if (stack_[height_ - 1].remaining-- > 0) {
276 offset_ = restart;
277 } else {
278 --height_;
279 }
280 } else if (ch == '\'' || ch == '"') {
281 // Quoted 'character literal'
282 CharType quote{ch};
283 auto start{offset_};
284 while (offset_ < formatLength_ && format_[offset_] != quote) {
285 ++offset_;
286 }
287 if (offset_ >= formatLength_) {
288 ReportBadFormat(context,
289 "FORMAT missing closing quote on character literal",
290 maybeReversionPoint);
291 return 0;
292 }
293 ++offset_;
294 std::size_t chars{
295 static_cast<std::size_t>(&format_[offset_] - &format_[start])};
296 if (offset_ < formatLength_ && format_[offset_] == quote) {
297 // subtle: handle doubled quote character in a literal by including
298 // the first in the output, then treating the second as the start
299 // of another character literal.
300 } else {
301 --chars;
302 }
303 context.Emit(format_ + start, chars);
304 } else if (ch == 'H') {
305 // 9HHOLLERITH
306 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
307 ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
308 maybeReversionPoint);
309 return 0;
310 }
311 context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
312 offset_ += *repeat;
313 } else if (ch >= 'A' && ch <= 'Z') {
314 int start{offset_ - 1};
315 CharType next{'\0'};
316 if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
317 CharType peek{Capitalize(PeekNext())};
318 if (peek >= 'A' && peek <= 'Z') {
319 if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
320 ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
321 // Assume a two-letter edit descriptor
322 next = peek;
323 ++offset_;
324 } else {
325 // extension: assume a comma between 'ch' and 'peek'
326 }
327 }
328 }
329 if ((!next &&
330 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
331 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
332 ch == 'L')) ||
333 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
334 (ch == 'D' && next == 'T')) {
335 // Data edit descriptor found
336 offset_ = start;
337 return repeat && *repeat > 0 ? *repeat : 1;
338 } else {
339 // Control edit descriptor
340 if (ch == 'T') { // Tn, TLn, TRn
341 repeat = GetIntField(context);
342 }
343 HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
344 repeat ? *repeat : 1);
345 }
346 } else if (ch == '/') {
347 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
348 } else if (ch == '$' || ch == '\\') {
349 context.mutableModes().nonAdvancing = true;
350 } else if (ch == '\t' || ch == '\v') {
351 // Tabs (extension)
352 // TODO: any other raw characters?
353 context.Emit(format_ + offset_ - 1, 1);
354 } else {
355 ReportBadFormat(
356 context, "Invalid character in FORMAT", maybeReversionPoint);
357 return 0;
358 }
359 }
360 }
361
362 // Returns the next data edit descriptor
363 template <typename CONTEXT>
GetNextDataEdit(Context & context,int maxRepeat)364 DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
365 Context &context, int maxRepeat) {
366 int repeat{CueUpNextDataEdit(context)};
367 auto start{offset_};
368 DataEdit edit;
369 edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
370 if (edit.descriptor == 'E') {
371 if (auto next{static_cast<char>(Capitalize(PeekNext()))};
372 next == 'N' || next == 'S' || next == 'X') {
373 edit.variation = next;
374 ++offset_;
375 }
376 } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
377 // DT['iotype'][(v_list)] user-defined derived type I/O
378 edit.descriptor = DataEdit::DefinedDerivedType;
379 ++offset_;
380 if (auto quote{static_cast<char>(PeekNext())};
381 quote == '\'' || quote == '"') {
382 // Capture the quoted 'iotype'
383 bool ok{false}, tooLong{false};
384 for (++offset_; offset_ < formatLength_;) {
385 auto ch{static_cast<char>(format_[offset_++])};
386 if (ch == quote &&
387 (offset_ == formatLength_ ||
388 static_cast<char>(format_[offset_]) != quote)) {
389 ok = true;
390 break; // that was terminating quote
391 } else if (edit.ioTypeChars >= edit.maxIoTypeChars) {
392 tooLong = true;
393 } else {
394 edit.ioType[edit.ioTypeChars++] = ch;
395 if (ch == quote) {
396 ++offset_;
397 }
398 }
399 }
400 if (!ok) {
401 ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
402 } else if (tooLong) {
403 ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
404 }
405 }
406 if (PeekNext() == '(') {
407 // Capture the v_list arguments
408 bool ok{false}, tooLong{false};
409 for (++offset_; offset_ < formatLength_;) {
410 int n{GetIntField(context)};
411 if (edit.vListEntries >= edit.maxVListEntries) {
412 tooLong = true;
413 } else {
414 edit.vList[edit.vListEntries++] = n;
415 }
416 auto ch{static_cast<char>(GetNextChar(context))};
417 if (ch != ',') {
418 ok = ch == ')';
419 break;
420 }
421 }
422 if (!ok) {
423 ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
424 } else if (tooLong) {
425 ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
426 }
427 }
428 }
429 if (edit.descriptor == 'A') { // width is optional for A[w]
430 auto ch{PeekNext()};
431 if (ch >= '0' && ch <= '9') {
432 edit.width = GetIntField(context);
433 }
434 } else if (edit.descriptor != DataEdit::DefinedDerivedType) {
435 edit.width = GetIntField(context);
436 }
437 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
438 if (edit.width.value_or(-1) == 0) {
439 ReportBadFormat(context, "Input field width is zero", start);
440 }
441 }
442 if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') {
443 ++offset_;
444 edit.digits = GetIntField(context);
445 CharType ch{PeekNext()};
446 if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
447 ++offset_;
448 edit.expoDigits = GetIntField(context);
449 }
450 }
451 edit.modes = context.mutableModes();
452 // Handle repeated nonparenthesized edit descriptors
453 edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
454 if (repeat > maxRepeat) {
455 stack_[height_].start = start; // after repeat count
456 stack_[height_].remaining = repeat - edit.repeat;
457 ++height_;
458 }
459 return edit;
460 }
461
462 template <typename CONTEXT>
Finish(Context & context)463 void FormatControl<CONTEXT>::Finish(Context &context) {
464 CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
465 }
466 } // namespace Fortran::runtime::io
467 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
468