1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // Implements out-of-line member functions of template class FormatControl 10 11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 13 14 #include "format.h" 15 #include "io-stmt.h" 16 #include "flang/Common/format.h" 17 #include "flang/Decimal/decimal.h" 18 #include "flang/Runtime/main.h" 19 #include <algorithm> 20 #include <limits> 21 22 namespace Fortran::runtime::io { 23 24 template <typename CONTEXT> 25 FormatControl<CONTEXT>::FormatControl(const Terminator &terminator, 26 const CharType *format, std::size_t formatLength, int maxHeight) 27 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format}, 28 formatLength_{static_cast<int>(formatLength)} { 29 RUNTIME_CHECK(terminator, maxHeight == maxHeight_); 30 RUNTIME_CHECK( 31 terminator, formatLength == static_cast<std::size_t>(formatLength_)); 32 stack_[0].start = offset_; 33 stack_[0].remaining = Iteration::unlimited; // 13.4(8) 34 } 35 36 template <typename CONTEXT> 37 int FormatControl<CONTEXT>::GetIntField( 38 IoErrorHandler &handler, CharType firstCh) { 39 CharType ch{firstCh ? firstCh : PeekNext()}; 40 if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) { 41 handler.SignalError(IostatErrorInFormat, 42 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch)); 43 return 0; 44 } 45 int result{0}; 46 bool negate{ch == '-'}; 47 if (negate || ch == '+') { 48 if (firstCh) { 49 firstCh = '\0'; 50 } else { 51 ++offset_; 52 } 53 ch = PeekNext(); 54 } 55 while (ch >= '0' && ch <= '9') { 56 if (result > 57 std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) { 58 handler.SignalError( 59 IostatErrorInFormat, "FORMAT integer field out of range"); 60 return result; 61 } 62 result = 10 * result + ch - '0'; 63 if (firstCh) { 64 firstCh = '\0'; 65 } else { 66 ++offset_; 67 } 68 ch = PeekNext(); 69 } 70 if (negate && (result *= -1) > 0) { 71 handler.SignalError( 72 IostatErrorInFormat, "FORMAT integer field out of range"); 73 } 74 return result; 75 } 76 77 template <typename CONTEXT> 78 static void HandleControl(CONTEXT &context, char ch, char next, int n) { 79 MutableModes &modes{context.mutableModes()}; 80 switch (ch) { 81 case 'B': 82 if (next == 'Z') { 83 modes.editingFlags |= blankZero; 84 return; 85 } 86 if (next == 'N') { 87 modes.editingFlags &= ~blankZero; 88 return; 89 } 90 break; 91 case 'D': 92 if (next == 'C') { 93 modes.editingFlags |= decimalComma; 94 return; 95 } 96 if (next == 'P') { 97 modes.editingFlags &= ~decimalComma; 98 return; 99 } 100 break; 101 case 'P': 102 if (!next) { 103 modes.scale = n; // kP - decimal scaling by 10**k 104 return; 105 } 106 break; 107 case 'R': 108 switch (next) { 109 case 'N': 110 modes.round = decimal::RoundNearest; 111 return; 112 case 'Z': 113 modes.round = decimal::RoundToZero; 114 return; 115 case 'U': 116 modes.round = decimal::RoundUp; 117 return; 118 case 'D': 119 modes.round = decimal::RoundDown; 120 return; 121 case 'C': 122 modes.round = decimal::RoundCompatible; 123 return; 124 case 'P': 125 modes.round = executionEnvironment.defaultOutputRoundingMode; 126 return; 127 default: 128 break; 129 } 130 break; 131 case 'X': 132 if (!next) { 133 context.HandleRelativePosition(n); 134 return; 135 } 136 break; 137 case 'S': 138 if (next == 'P') { 139 modes.editingFlags |= signPlus; 140 return; 141 } 142 if (!next || next == 'S') { 143 modes.editingFlags &= ~signPlus; 144 return; 145 } 146 break; 147 case 'T': { 148 if (!next) { // Tn 149 context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based 150 return; 151 } 152 if (next == 'L' || next == 'R') { // TLn & TRn 153 context.HandleRelativePosition(next == 'L' ? -n : n); 154 return; 155 } 156 } break; 157 default: 158 break; 159 } 160 if (next) { 161 context.SignalError(IostatErrorInFormat, 162 "Unknown '%c%c' edit descriptor in FORMAT", ch, next); 163 } else { 164 context.SignalError( 165 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch); 166 } 167 } 168 169 // Locates the next data edit descriptor in the format. 170 // Handles all repetition counts and control edit descriptors. 171 // Generally assumes that the format string has survived the common 172 // format validator gauntlet. 173 template <typename CONTEXT> 174 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) { 175 int unlimitedLoopCheck{-1}; 176 // Do repetitions remain on an unparenthesized data edit? 177 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') { 178 offset_ = stack_[height_ - 1].start; 179 int repeat{stack_[height_ - 1].remaining}; 180 --height_; 181 if (repeat > 0) { 182 return repeat; 183 } 184 } 185 while (true) { 186 std::optional<int> repeat; 187 bool unlimited{false}; 188 auto maybeReversionPoint{offset_}; 189 CharType ch{GetNextChar(context)}; 190 while (ch == ',' || ch == ':') { 191 // Skip commas, and don't complain if they're missing; the format 192 // validator does that. 193 if (stop && ch == ':') { 194 return 0; 195 } 196 ch = GetNextChar(context); 197 } 198 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) { 199 repeat = GetIntField(context, ch); 200 ch = GetNextChar(context); 201 } else if (ch == '*') { 202 unlimited = true; 203 ch = GetNextChar(context); 204 if (ch != '(') { 205 ReportBadFormat(context, 206 "Invalid FORMAT: '*' may appear only before '('", 207 maybeReversionPoint); 208 return 0; 209 } 210 if (height_ != 1) { 211 ReportBadFormat(context, 212 "Invalid FORMAT: '*' must be nested in exactly one set of " 213 "parentheses", 214 maybeReversionPoint); 215 return 0; 216 } 217 } 218 ch = Capitalize(ch); 219 if (ch == '(') { 220 if (height_ >= maxHeight_) { 221 ReportBadFormat(context, 222 "FORMAT stack overflow: too many nested parentheses", 223 maybeReversionPoint); 224 return 0; 225 } 226 stack_[height_].start = offset_ - 1; // the '(' 227 RUNTIME_CHECK(context, format_[stack_[height_].start] == '('); 228 if (unlimited || height_ == 0) { 229 stack_[height_].remaining = Iteration::unlimited; 230 unlimitedLoopCheck = offset_ - 1; 231 } else if (repeat) { 232 if (*repeat <= 0) { 233 *repeat = 1; // error recovery 234 } 235 stack_[height_].remaining = *repeat - 1; 236 } else { 237 stack_[height_].remaining = 0; 238 } 239 if (height_ == 1) { 240 // Subtle point (F'2018 13.4 para 9): tha last parenthesized group 241 // at height 1 becomes the restart point after control reaches the 242 // end of the format, including its repeat count. 243 stack_[0].start = maybeReversionPoint; 244 } 245 ++height_; 246 } else if (height_ == 0) { 247 ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint); 248 return 0; 249 } else if (ch == ')') { 250 if (height_ == 1) { 251 if (stop) { 252 return 0; // end of FORMAT and no data items remain 253 } 254 context.AdvanceRecord(); // implied / before rightmost ) 255 } 256 auto restart{stack_[height_ - 1].start}; 257 if (format_[restart] == '(') { 258 ++restart; 259 } 260 if (stack_[height_ - 1].remaining == Iteration::unlimited) { 261 if (height_ > 1 && GetNextChar(context) != ')') { 262 ReportBadFormat(context, 263 "Unlimited repetition in FORMAT may not be followed by more " 264 "items", 265 restart); 266 return 0; 267 } 268 if (offset_ == unlimitedLoopCheck) { 269 ReportBadFormat(context, 270 "Unlimited repetition in FORMAT lacks data edit descriptors", 271 restart); 272 return 0; 273 } 274 offset_ = restart; 275 } else if (stack_[height_ - 1].remaining-- > 0) { 276 offset_ = restart; 277 } else { 278 --height_; 279 } 280 } else if (ch == '\'' || ch == '"') { 281 // Quoted 'character literal' 282 CharType quote{ch}; 283 auto start{offset_}; 284 while (offset_ < formatLength_ && format_[offset_] != quote) { 285 ++offset_; 286 } 287 if (offset_ >= formatLength_) { 288 ReportBadFormat(context, 289 "FORMAT missing closing quote on character literal", 290 maybeReversionPoint); 291 return 0; 292 } 293 ++offset_; 294 std::size_t chars{ 295 static_cast<std::size_t>(&format_[offset_] - &format_[start])}; 296 if (PeekNext() == quote) { 297 // subtle: handle doubled quote character in a literal by including 298 // the first in the output, then treating the second as the start 299 // of another character literal. 300 } else { 301 --chars; 302 } 303 context.Emit(format_ + start, chars); 304 } else if (ch == 'H') { 305 // 9HHOLLERITH 306 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) { 307 ReportBadFormat(context, "Invalid width on Hollerith in FORMAT", 308 maybeReversionPoint); 309 return 0; 310 } 311 context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat)); 312 offset_ += *repeat; 313 } else if (ch >= 'A' && ch <= 'Z') { 314 int start{offset_ - 1}; 315 CharType next{'\0'}; 316 if (ch != 'P') { // 1PE5.2 - comma not required (C1302) 317 CharType peek{Capitalize(PeekNext())}; 318 if (peek >= 'A' && peek <= 'Z') { 319 if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' || 320 ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') { 321 // Assume a two-letter edit descriptor 322 next = peek; 323 ++offset_; 324 } else { 325 // extension: assume a comma between 'ch' and 'peek' 326 } 327 } 328 } 329 if ((!next && 330 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' || 331 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' || 332 ch == 'L')) || 333 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) || 334 (ch == 'D' && next == 'T')) { 335 // Data edit descriptor found 336 offset_ = start; 337 return repeat && *repeat > 0 ? *repeat : 1; 338 } else { 339 // Control edit descriptor 340 if (ch == 'T') { // Tn, TLn, TRn 341 repeat = GetIntField(context); 342 } 343 HandleControl(context, static_cast<char>(ch), static_cast<char>(next), 344 repeat ? *repeat : 1); 345 } 346 } else if (ch == '/') { 347 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1); 348 } else if (ch == '$' || ch == '\\') { 349 context.mutableModes().nonAdvancing = true; 350 } else if (ch == '\t' || ch == '\v') { 351 // Tabs (extension) 352 // TODO: any other raw characters? 353 context.Emit(format_ + offset_ - 1, 1); 354 } else { 355 ReportBadFormat( 356 context, "Invalid character in FORMAT", maybeReversionPoint); 357 return 0; 358 } 359 } 360 } 361 362 // Returns the next data edit descriptor 363 template <typename CONTEXT> 364 DataEdit FormatControl<CONTEXT>::GetNextDataEdit( 365 Context &context, int maxRepeat) { 366 int repeat{CueUpNextDataEdit(context)}; 367 auto start{offset_}; 368 DataEdit edit; 369 edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context))); 370 if (edit.descriptor == 'E') { 371 if (auto next{static_cast<char>(Capitalize(PeekNext()))}; 372 next == 'N' || next == 'S' || next == 'X') { 373 edit.variation = next; 374 ++offset_; 375 } 376 } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') { 377 // DT['iotype'][(v_list)] user-defined derived type I/O 378 edit.descriptor = DataEdit::DefinedDerivedType; 379 ++offset_; 380 if (auto quote{static_cast<char>(PeekNext())}; 381 quote == '\'' || quote == '"') { 382 // Capture the quoted 'iotype' 383 bool ok{false}, tooLong{false}; 384 for (++offset_; offset_ < formatLength_;) { 385 auto ch{static_cast<char>(format_[offset_++])}; 386 if (ch == quote && 387 (offset_ == formatLength_ || 388 static_cast<char>(format_[offset_]) != quote)) { 389 ok = true; 390 break; // that was terminating quote 391 } else if (edit.ioTypeChars >= edit.maxIoTypeChars) { 392 tooLong = true; 393 } else { 394 edit.ioType[edit.ioTypeChars++] = ch; 395 if (ch == quote) { 396 ++offset_; 397 } 398 } 399 } 400 if (!ok) { 401 ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start); 402 } else if (tooLong) { 403 ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start); 404 } 405 } 406 if (PeekNext() == '(') { 407 // Capture the v_list arguments 408 bool ok{false}, tooLong{false}; 409 for (++offset_; offset_ < formatLength_;) { 410 int n{GetIntField(context)}; 411 if (edit.vListEntries >= edit.maxVListEntries) { 412 tooLong = true; 413 } else { 414 edit.vList[edit.vListEntries++] = n; 415 } 416 auto ch{static_cast<char>(GetNextChar(context))}; 417 if (ch != ',') { 418 ok = ch == ')'; 419 break; 420 } 421 } 422 if (!ok) { 423 ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start); 424 } else if (tooLong) { 425 ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start); 426 } 427 } 428 } 429 if (edit.descriptor == 'A') { // width is optional for A[w] 430 auto ch{PeekNext()}; 431 if (ch >= '0' && ch <= '9') { 432 edit.width = GetIntField(context); 433 } 434 } else if (edit.descriptor != DataEdit::DefinedDerivedType) { 435 edit.width = GetIntField(context); 436 } 437 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) { 438 if (edit.width.value_or(-1) == 0) { 439 ReportBadFormat(context, "Input field width is zero", start); 440 } 441 } 442 if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') { 443 ++offset_; 444 edit.digits = GetIntField(context); 445 CharType ch{PeekNext()}; 446 if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') { 447 ++offset_; 448 edit.expoDigits = GetIntField(context); 449 } 450 } 451 edit.modes = context.mutableModes(); 452 // Handle repeated nonparenthesized edit descriptors 453 edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0 454 if (repeat > maxRepeat) { 455 stack_[height_].start = start; // after repeat count 456 stack_[height_].remaining = repeat - edit.repeat; 457 ++height_; 458 } 459 return edit; 460 } 461 462 template <typename CONTEXT> 463 void FormatControl<CONTEXT>::Finish(Context &context) { 464 CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */); 465 } 466 } // namespace Fortran::runtime::io 467 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 468