1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implements out-of-line member functions of template class FormatControl
10 
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13 
14 #include "format.h"
15 #include "io-stmt.h"
16 #include "flang/Common/format.h"
17 #include "flang/Decimal/decimal.h"
18 #include "flang/Runtime/main.h"
19 #include <algorithm>
20 #include <limits>
21 
22 namespace Fortran::runtime::io {
23 
24 template <typename CONTEXT>
25 FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
26     const CharType *format, std::size_t formatLength, int maxHeight)
27     : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
28       formatLength_{static_cast<int>(formatLength)} {
29   RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
30   RUNTIME_CHECK(
31       terminator, formatLength == static_cast<std::size_t>(formatLength_));
32   stack_[0].start = offset_;
33   stack_[0].remaining = Iteration::unlimited; // 13.4(8)
34 }
35 
36 template <typename CONTEXT>
37 int FormatControl<CONTEXT>::GetIntField(
38     IoErrorHandler &handler, CharType firstCh) {
39   CharType ch{firstCh ? firstCh : PeekNext()};
40   if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
41     handler.SignalError(IostatErrorInFormat,
42         "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
43     return 0;
44   }
45   int result{0};
46   bool negate{ch == '-'};
47   if (negate || ch == '+') {
48     if (firstCh) {
49       firstCh = '\0';
50     } else {
51       ++offset_;
52     }
53     ch = PeekNext();
54   }
55   while (ch >= '0' && ch <= '9') {
56     if (result >
57         std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) {
58       handler.SignalError(
59           IostatErrorInFormat, "FORMAT integer field out of range");
60       return result;
61     }
62     result = 10 * result + ch - '0';
63     if (firstCh) {
64       firstCh = '\0';
65     } else {
66       ++offset_;
67     }
68     ch = PeekNext();
69   }
70   if (negate && (result *= -1) > 0) {
71     handler.SignalError(
72         IostatErrorInFormat, "FORMAT integer field out of range");
73   }
74   return result;
75 }
76 
77 template <typename CONTEXT>
78 static void HandleControl(CONTEXT &context, char ch, char next, int n) {
79   MutableModes &modes{context.mutableModes()};
80   switch (ch) {
81   case 'B':
82     if (next == 'Z') {
83       modes.editingFlags |= blankZero;
84       return;
85     }
86     if (next == 'N') {
87       modes.editingFlags &= ~blankZero;
88       return;
89     }
90     break;
91   case 'D':
92     if (next == 'C') {
93       modes.editingFlags |= decimalComma;
94       return;
95     }
96     if (next == 'P') {
97       modes.editingFlags &= ~decimalComma;
98       return;
99     }
100     break;
101   case 'P':
102     if (!next) {
103       modes.scale = n; // kP - decimal scaling by 10**k
104       return;
105     }
106     break;
107   case 'R':
108     switch (next) {
109     case 'N':
110       modes.round = decimal::RoundNearest;
111       return;
112     case 'Z':
113       modes.round = decimal::RoundToZero;
114       return;
115     case 'U':
116       modes.round = decimal::RoundUp;
117       return;
118     case 'D':
119       modes.round = decimal::RoundDown;
120       return;
121     case 'C':
122       modes.round = decimal::RoundCompatible;
123       return;
124     case 'P':
125       modes.round = executionEnvironment.defaultOutputRoundingMode;
126       return;
127     default:
128       break;
129     }
130     break;
131   case 'X':
132     if (!next) {
133       context.HandleRelativePosition(n);
134       return;
135     }
136     break;
137   case 'S':
138     if (next == 'P') {
139       modes.editingFlags |= signPlus;
140       return;
141     }
142     if (!next || next == 'S') {
143       modes.editingFlags &= ~signPlus;
144       return;
145     }
146     break;
147   case 'T': {
148     if (!next) { // Tn
149       context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based
150       return;
151     }
152     if (next == 'L' || next == 'R') { // TLn & TRn
153       context.HandleRelativePosition(next == 'L' ? -n : n);
154       return;
155     }
156   } break;
157   default:
158     break;
159   }
160   if (next) {
161     context.SignalError(IostatErrorInFormat,
162         "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
163   } else {
164     context.SignalError(
165         IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
166   }
167 }
168 
169 // Locates the next data edit descriptor in the format.
170 // Handles all repetition counts and control edit descriptors.
171 // Generally assumes that the format string has survived the common
172 // format validator gauntlet.
173 template <typename CONTEXT>
174 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
175   int unlimitedLoopCheck{-1};
176   // Do repetitions remain on an unparenthesized data edit?
177   while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
178     offset_ = stack_[height_ - 1].start;
179     int repeat{stack_[height_ - 1].remaining};
180     --height_;
181     if (repeat > 0) {
182       return repeat;
183     }
184   }
185   while (true) {
186     std::optional<int> repeat;
187     bool unlimited{false};
188     auto maybeReversionPoint{offset_};
189     CharType ch{GetNextChar(context)};
190     while (ch == ',' || ch == ':') {
191       // Skip commas, and don't complain if they're missing; the format
192       // validator does that.
193       if (stop && ch == ':') {
194         return 0;
195       }
196       ch = GetNextChar(context);
197     }
198     if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
199       repeat = GetIntField(context, ch);
200       ch = GetNextChar(context);
201     } else if (ch == '*') {
202       unlimited = true;
203       ch = GetNextChar(context);
204       if (ch != '(') {
205         ReportBadFormat(context,
206             "Invalid FORMAT: '*' may appear only before '('",
207             maybeReversionPoint);
208         return 0;
209       }
210     }
211     ch = Capitalize(ch);
212     if (ch == '(') {
213       if (height_ >= maxHeight_) {
214         ReportBadFormat(context,
215             "FORMAT stack overflow: too many nested parentheses",
216             maybeReversionPoint);
217         return 0;
218       }
219       stack_[height_].start = offset_ - 1; // the '('
220       RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
221       if (unlimited || height_ == 0) {
222         stack_[height_].remaining = Iteration::unlimited;
223         unlimitedLoopCheck = offset_ - 1;
224       } else if (repeat) {
225         if (*repeat <= 0) {
226           *repeat = 1; // error recovery
227         }
228         stack_[height_].remaining = *repeat - 1;
229       } else {
230         stack_[height_].remaining = 0;
231       }
232       if (height_ == 1) {
233         // Subtle point (F'2018 13.4 para 9): tha last parenthesized group
234         // at height 1 becomes the restart point after control reaches the
235         // end of the format, including its repeat count.
236         stack_[0].start = maybeReversionPoint;
237       }
238       ++height_;
239     } else if (height_ == 0) {
240       ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
241       return 0;
242     } else if (ch == ')') {
243       if (height_ == 1) {
244         if (stop) {
245           return 0; // end of FORMAT and no data items remain
246         }
247         context.AdvanceRecord(); // implied / before rightmost )
248       }
249       auto restart{stack_[height_ - 1].start};
250       if (format_[restart] == '(') {
251         ++restart;
252       }
253       if (stack_[height_ - 1].remaining == Iteration::unlimited) {
254         offset_ = restart;
255         if (offset_ == unlimitedLoopCheck) {
256           ReportBadFormat(context,
257               "Unlimited repetition in FORMAT lacks data edit descriptors",
258               restart);
259         }
260       } else if (stack_[height_ - 1].remaining-- > 0) {
261         offset_ = restart;
262       } else {
263         --height_;
264       }
265     } else if (ch == '\'' || ch == '"') {
266       // Quoted 'character literal'
267       CharType quote{ch};
268       auto start{offset_};
269       while (offset_ < formatLength_ && format_[offset_] != quote) {
270         ++offset_;
271       }
272       if (offset_ >= formatLength_) {
273         ReportBadFormat(context,
274             "FORMAT missing closing quote on character literal",
275             maybeReversionPoint);
276         return 0;
277       }
278       ++offset_;
279       std::size_t chars{
280           static_cast<std::size_t>(&format_[offset_] - &format_[start])};
281       if (PeekNext() == quote) {
282         // subtle: handle doubled quote character in a literal by including
283         // the first in the output, then treating the second as the start
284         // of another character literal.
285       } else {
286         --chars;
287       }
288       context.Emit(format_ + start, chars);
289     } else if (ch == 'H') {
290       // 9HHOLLERITH
291       if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
292         ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
293             maybeReversionPoint);
294         return 0;
295       }
296       context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
297       offset_ += *repeat;
298     } else if (ch >= 'A' && ch <= 'Z') {
299       int start{offset_ - 1};
300       CharType next{'\0'};
301       if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
302         CharType peek{Capitalize(PeekNext())};
303         if (peek >= 'A' && peek <= 'Z') {
304           if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
305               ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
306             // Assume a two-letter edit descriptor
307             next = peek;
308             ++offset_;
309           } else {
310             // extension: assume a comma between 'ch' and 'peek'
311           }
312         }
313       }
314       if ((!next &&
315               (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
316                   ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
317                   ch == 'L')) ||
318           (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
319           (ch == 'D' && next == 'T')) {
320         // Data edit descriptor found
321         offset_ = start;
322         return repeat && *repeat > 0 ? *repeat : 1;
323       } else {
324         // Control edit descriptor
325         if (ch == 'T') { // Tn, TLn, TRn
326           repeat = GetIntField(context);
327         }
328         HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
329             repeat ? *repeat : 1);
330       }
331     } else if (ch == '/') {
332       context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
333     } else if (ch == '$' || ch == '\\') {
334       context.mutableModes().nonAdvancing = true;
335     } else if (ch == '\t' || ch == '\v') {
336       // Tabs (extension)
337       // TODO: any other raw characters?
338       context.Emit(format_ + offset_ - 1, 1);
339     } else {
340       ReportBadFormat(
341           context, "Invalid character in FORMAT", maybeReversionPoint);
342       return 0;
343     }
344   }
345 }
346 
347 // Returns the next data edit descriptor
348 template <typename CONTEXT>
349 DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
350     Context &context, int maxRepeat) {
351   int repeat{CueUpNextDataEdit(context)};
352   auto start{offset_};
353   DataEdit edit;
354   edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
355   if (edit.descriptor == 'E') {
356     if (auto next{static_cast<char>(Capitalize(PeekNext()))};
357         next == 'N' || next == 'S' || next == 'X') {
358       edit.variation = next;
359       ++offset_;
360     }
361   } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
362     // DT['iotype'][(v_list)] user-defined derived type I/O
363     edit.descriptor = DataEdit::DefinedDerivedType;
364     ++offset_;
365     if (auto quote{static_cast<char>(PeekNext())};
366         quote == '\'' || quote == '"') {
367       // Capture the quoted 'iotype'
368       bool ok{false}, tooLong{false};
369       for (++offset_; offset_ < formatLength_;) {
370         auto ch{static_cast<char>(format_[offset_++])};
371         if (ch == quote &&
372             (offset_ == formatLength_ ||
373                 static_cast<char>(format_[offset_]) != quote)) {
374           ok = true;
375           break; // that was terminating quote
376         } else if (edit.ioTypeChars >= edit.maxIoTypeChars) {
377           tooLong = true;
378         } else {
379           edit.ioType[edit.ioTypeChars++] = ch;
380           if (ch == quote) {
381             ++offset_;
382           }
383         }
384       }
385       if (!ok) {
386         ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
387       } else if (tooLong) {
388         ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
389       }
390     }
391     if (PeekNext() == '(') {
392       // Capture the v_list arguments
393       bool ok{false}, tooLong{false};
394       for (++offset_; offset_ < formatLength_;) {
395         int n{GetIntField(context)};
396         if (edit.vListEntries >= edit.maxVListEntries) {
397           tooLong = true;
398         } else {
399           edit.vList[edit.vListEntries++] = n;
400         }
401         auto ch{static_cast<char>(GetNextChar(context))};
402         if (ch != ',') {
403           ok = ch == ')';
404           break;
405         }
406       }
407       if (!ok) {
408         ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
409       } else if (tooLong) {
410         ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
411       }
412     }
413   }
414   if (edit.descriptor == 'A') { // width is optional for A[w]
415     auto ch{PeekNext()};
416     if (ch >= '0' && ch <= '9') {
417       edit.width = GetIntField(context);
418     }
419   } else if (edit.descriptor != DataEdit::DefinedDerivedType) {
420     edit.width = GetIntField(context);
421   }
422   if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') {
423     ++offset_;
424     edit.digits = GetIntField(context);
425     CharType ch{PeekNext()};
426     if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
427       ++offset_;
428       edit.expoDigits = GetIntField(context);
429     }
430   }
431   edit.modes = context.mutableModes();
432   // Handle repeated nonparenthesized edit descriptors
433   edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
434   if (repeat > maxRepeat) {
435     stack_[height_].start = start; // after repeat count
436     stack_[height_].remaining = repeat - edit.repeat;
437     ++height_;
438   }
439   return edit;
440 }
441 
442 template <typename CONTEXT>
443 void FormatControl<CONTEXT>::Finish(Context &context) {
444   CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
445 }
446 } // namespace Fortran::runtime::io
447 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
448