1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implements out-of-line member functions of template class FormatControl
10 
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13 
14 #include "format.h"
15 #include "io-stmt.h"
16 #include "flang/Common/format.h"
17 #include "flang/Decimal/decimal.h"
18 #include "flang/Runtime/main.h"
19 #include <algorithm>
20 #include <limits>
21 
22 namespace Fortran::runtime::io {
23 
24 template <typename CONTEXT>
FormatControl(const Terminator & terminator,const CharType * format,std::size_t formatLength,int maxHeight)25 FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
26     const CharType *format, std::size_t formatLength, int maxHeight)
27     : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
28       formatLength_{static_cast<int>(formatLength)} {
29   RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
30   RUNTIME_CHECK(
31       terminator, formatLength == static_cast<std::size_t>(formatLength_));
32   stack_[0].start = offset_;
33   stack_[0].remaining = Iteration::unlimited; // 13.4(8)
34 }
35 
36 template <typename CONTEXT>
GetIntField(IoErrorHandler & handler,CharType firstCh)37 int FormatControl<CONTEXT>::GetIntField(
38     IoErrorHandler &handler, CharType firstCh) {
39   CharType ch{firstCh ? firstCh : PeekNext()};
40   if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
41     handler.SignalError(IostatErrorInFormat,
42         "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
43     return 0;
44   }
45   int result{0};
46   bool negate{ch == '-'};
47   if (negate || ch == '+') {
48     if (firstCh) {
49       firstCh = '\0';
50     } else {
51       ++offset_;
52     }
53     ch = PeekNext();
54   }
55   while (ch >= '0' && ch <= '9') {
56     if (result >
57         std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) {
58       handler.SignalError(
59           IostatErrorInFormat, "FORMAT integer field out of range");
60       return result;
61     }
62     result = 10 * result + ch - '0';
63     if (firstCh) {
64       firstCh = '\0';
65     } else {
66       ++offset_;
67     }
68     ch = PeekNext();
69   }
70   if (negate && (result *= -1) > 0) {
71     handler.SignalError(
72         IostatErrorInFormat, "FORMAT integer field out of range");
73   }
74   return result;
75 }
76 
77 template <typename CONTEXT>
HandleControl(CONTEXT & context,char ch,char next,int n)78 static void HandleControl(CONTEXT &context, char ch, char next, int n) {
79   MutableModes &modes{context.mutableModes()};
80   switch (ch) {
81   case 'B':
82     if (next == 'Z') {
83       modes.editingFlags |= blankZero;
84       return;
85     }
86     if (next == 'N') {
87       modes.editingFlags &= ~blankZero;
88       return;
89     }
90     break;
91   case 'D':
92     if (next == 'C') {
93       modes.editingFlags |= decimalComma;
94       return;
95     }
96     if (next == 'P') {
97       modes.editingFlags &= ~decimalComma;
98       return;
99     }
100     break;
101   case 'P':
102     if (!next) {
103       modes.scale = n; // kP - decimal scaling by 10**k
104       return;
105     }
106     break;
107   case 'R':
108     switch (next) {
109     case 'N':
110       modes.round = decimal::RoundNearest;
111       return;
112     case 'Z':
113       modes.round = decimal::RoundToZero;
114       return;
115     case 'U':
116       modes.round = decimal::RoundUp;
117       return;
118     case 'D':
119       modes.round = decimal::RoundDown;
120       return;
121     case 'C':
122       modes.round = decimal::RoundCompatible;
123       return;
124     case 'P':
125       modes.round = executionEnvironment.defaultOutputRoundingMode;
126       return;
127     default:
128       break;
129     }
130     break;
131   case 'X':
132     if (!next) {
133       context.HandleRelativePosition(n);
134       return;
135     }
136     break;
137   case 'S':
138     if (next == 'P') {
139       modes.editingFlags |= signPlus;
140       return;
141     }
142     if (!next || next == 'S') {
143       modes.editingFlags &= ~signPlus;
144       return;
145     }
146     break;
147   case 'T': {
148     if (!next) { // Tn
149       context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based
150       return;
151     }
152     if (next == 'L' || next == 'R') { // TLn & TRn
153       context.HandleRelativePosition(next == 'L' ? -n : n);
154       return;
155     }
156   } break;
157   default:
158     break;
159   }
160   if (next) {
161     context.SignalError(IostatErrorInFormat,
162         "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
163   } else {
164     context.SignalError(
165         IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
166   }
167 }
168 
169 // Locates the next data edit descriptor in the format.
170 // Handles all repetition counts and control edit descriptors.
171 // Generally assumes that the format string has survived the common
172 // format validator gauntlet.
173 template <typename CONTEXT>
CueUpNextDataEdit(Context & context,bool stop)174 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
175   int unlimitedLoopCheck{-1};
176   // Do repetitions remain on an unparenthesized data edit?
177   while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
178     offset_ = stack_[height_ - 1].start;
179     int repeat{stack_[height_ - 1].remaining};
180     --height_;
181     if (repeat > 0) {
182       return repeat;
183     }
184   }
185   while (true) {
186     std::optional<int> repeat;
187     bool unlimited{false};
188     auto maybeReversionPoint{offset_};
189     CharType ch{GetNextChar(context)};
190     while (ch == ',' || ch == ':') {
191       // Skip commas, and don't complain if they're missing; the format
192       // validator does that.
193       if (stop && ch == ':') {
194         return 0;
195       }
196       ch = GetNextChar(context);
197     }
198     if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
199       repeat = GetIntField(context, ch);
200       ch = GetNextChar(context);
201     } else if (ch == '*') {
202       unlimited = true;
203       ch = GetNextChar(context);
204       if (ch != '(') {
205         ReportBadFormat(context,
206             "Invalid FORMAT: '*' may appear only before '('",
207             maybeReversionPoint);
208         return 0;
209       }
210       if (height_ != 1) {
211         ReportBadFormat(context,
212             "Invalid FORMAT: '*' must be nested in exactly one set of "
213             "parentheses",
214             maybeReversionPoint);
215         return 0;
216       }
217     }
218     ch = Capitalize(ch);
219     if (ch == '(') {
220       if (height_ >= maxHeight_) {
221         ReportBadFormat(context,
222             "FORMAT stack overflow: too many nested parentheses",
223             maybeReversionPoint);
224         return 0;
225       }
226       stack_[height_].start = offset_ - 1; // the '('
227       RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
228       if (unlimited || height_ == 0) {
229         stack_[height_].remaining = Iteration::unlimited;
230         unlimitedLoopCheck = offset_ - 1;
231       } else if (repeat) {
232         if (*repeat <= 0) {
233           *repeat = 1; // error recovery
234         }
235         stack_[height_].remaining = *repeat - 1;
236       } else {
237         stack_[height_].remaining = 0;
238       }
239       if (height_ == 1) {
240         // Subtle point (F'2018 13.4 para 9): tha last parenthesized group
241         // at height 1 becomes the restart point after control reaches the
242         // end of the format, including its repeat count.
243         stack_[0].start = maybeReversionPoint;
244       }
245       ++height_;
246     } else if (height_ == 0) {
247       ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
248       return 0;
249     } else if (ch == ')') {
250       if (height_ == 1) {
251         if (stop) {
252           return 0; // end of FORMAT and no data items remain
253         }
254         context.AdvanceRecord(); // implied / before rightmost )
255       }
256       auto restart{stack_[height_ - 1].start};
257       if (format_[restart] == '(') {
258         ++restart;
259       }
260       if (stack_[height_ - 1].remaining == Iteration::unlimited) {
261         if (height_ > 1 && GetNextChar(context) != ')') {
262           ReportBadFormat(context,
263               "Unlimited repetition in FORMAT may not be followed by more "
264               "items",
265               restart);
266           return 0;
267         }
268         if (offset_ == unlimitedLoopCheck) {
269           ReportBadFormat(context,
270               "Unlimited repetition in FORMAT lacks data edit descriptors",
271               restart);
272           return 0;
273         }
274         offset_ = restart;
275       } else if (stack_[height_ - 1].remaining-- > 0) {
276         offset_ = restart;
277       } else {
278         --height_;
279       }
280     } else if (ch == '\'' || ch == '"') {
281       // Quoted 'character literal'
282       CharType quote{ch};
283       auto start{offset_};
284       while (offset_ < formatLength_ && format_[offset_] != quote) {
285         ++offset_;
286       }
287       if (offset_ >= formatLength_) {
288         ReportBadFormat(context,
289             "FORMAT missing closing quote on character literal",
290             maybeReversionPoint);
291         return 0;
292       }
293       ++offset_;
294       std::size_t chars{
295           static_cast<std::size_t>(&format_[offset_] - &format_[start])};
296       if (offset_ < formatLength_ && format_[offset_] == quote) {
297         // subtle: handle doubled quote character in a literal by including
298         // the first in the output, then treating the second as the start
299         // of another character literal.
300       } else {
301         --chars;
302       }
303       context.Emit(format_ + start, chars);
304     } else if (ch == 'H') {
305       // 9HHOLLERITH
306       if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
307         ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
308             maybeReversionPoint);
309         return 0;
310       }
311       context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
312       offset_ += *repeat;
313     } else if (ch >= 'A' && ch <= 'Z') {
314       int start{offset_ - 1};
315       CharType next{'\0'};
316       if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
317         CharType peek{Capitalize(PeekNext())};
318         if (peek >= 'A' && peek <= 'Z') {
319           if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
320               ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
321             // Assume a two-letter edit descriptor
322             next = peek;
323             ++offset_;
324           } else {
325             // extension: assume a comma between 'ch' and 'peek'
326           }
327         }
328       }
329       if ((!next &&
330               (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
331                   ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
332                   ch == 'L')) ||
333           (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
334           (ch == 'D' && next == 'T')) {
335         // Data edit descriptor found
336         offset_ = start;
337         return repeat && *repeat > 0 ? *repeat : 1;
338       } else {
339         // Control edit descriptor
340         if (ch == 'T') { // Tn, TLn, TRn
341           repeat = GetIntField(context);
342         }
343         HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
344             repeat ? *repeat : 1);
345       }
346     } else if (ch == '/') {
347       context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
348     } else if (ch == '$' || ch == '\\') {
349       context.mutableModes().nonAdvancing = true;
350     } else if (ch == '\t' || ch == '\v') {
351       // Tabs (extension)
352       // TODO: any other raw characters?
353       context.Emit(format_ + offset_ - 1, 1);
354     } else {
355       ReportBadFormat(
356           context, "Invalid character in FORMAT", maybeReversionPoint);
357       return 0;
358     }
359   }
360 }
361 
362 // Returns the next data edit descriptor
363 template <typename CONTEXT>
GetNextDataEdit(Context & context,int maxRepeat)364 DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
365     Context &context, int maxRepeat) {
366   int repeat{CueUpNextDataEdit(context)};
367   auto start{offset_};
368   DataEdit edit;
369   edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
370   if (edit.descriptor == 'E') {
371     if (auto next{static_cast<char>(Capitalize(PeekNext()))};
372         next == 'N' || next == 'S' || next == 'X') {
373       edit.variation = next;
374       ++offset_;
375     }
376   } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
377     // DT['iotype'][(v_list)] user-defined derived type I/O
378     edit.descriptor = DataEdit::DefinedDerivedType;
379     ++offset_;
380     if (auto quote{static_cast<char>(PeekNext())};
381         quote == '\'' || quote == '"') {
382       // Capture the quoted 'iotype'
383       bool ok{false}, tooLong{false};
384       for (++offset_; offset_ < formatLength_;) {
385         auto ch{static_cast<char>(format_[offset_++])};
386         if (ch == quote &&
387             (offset_ == formatLength_ ||
388                 static_cast<char>(format_[offset_]) != quote)) {
389           ok = true;
390           break; // that was terminating quote
391         } else if (edit.ioTypeChars >= edit.maxIoTypeChars) {
392           tooLong = true;
393         } else {
394           edit.ioType[edit.ioTypeChars++] = ch;
395           if (ch == quote) {
396             ++offset_;
397           }
398         }
399       }
400       if (!ok) {
401         ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
402       } else if (tooLong) {
403         ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
404       }
405     }
406     if (PeekNext() == '(') {
407       // Capture the v_list arguments
408       bool ok{false}, tooLong{false};
409       for (++offset_; offset_ < formatLength_;) {
410         int n{GetIntField(context)};
411         if (edit.vListEntries >= edit.maxVListEntries) {
412           tooLong = true;
413         } else {
414           edit.vList[edit.vListEntries++] = n;
415         }
416         auto ch{static_cast<char>(GetNextChar(context))};
417         if (ch != ',') {
418           ok = ch == ')';
419           break;
420         }
421       }
422       if (!ok) {
423         ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
424       } else if (tooLong) {
425         ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
426       }
427     }
428   }
429   if (edit.descriptor == 'A') { // width is optional for A[w]
430     auto ch{PeekNext()};
431     if (ch >= '0' && ch <= '9') {
432       edit.width = GetIntField(context);
433     }
434   } else if (edit.descriptor != DataEdit::DefinedDerivedType) {
435     edit.width = GetIntField(context);
436   }
437   if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
438     if (edit.width.value_or(-1) == 0) {
439       ReportBadFormat(context, "Input field width is zero", start);
440     }
441   }
442   if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') {
443     ++offset_;
444     edit.digits = GetIntField(context);
445     CharType ch{PeekNext()};
446     if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
447       ++offset_;
448       edit.expoDigits = GetIntField(context);
449     }
450   }
451   edit.modes = context.mutableModes();
452   // Handle repeated nonparenthesized edit descriptors
453   edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
454   if (repeat > maxRepeat) {
455     stack_[height_].start = start; // after repeat count
456     stack_[height_].remaining = repeat - edit.repeat;
457     ++height_;
458   }
459   return edit;
460 }
461 
462 template <typename CONTEXT>
Finish(Context & context)463 void FormatControl<CONTEXT>::Finish(Context &context) {
464   CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
465 }
466 } // namespace Fortran::runtime::io
467 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
468