1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "preprocessor.h" 11 #include "token-sequence.h" 12 #include "flang/Common/idioms.h" 13 #include "flang/Parser/characters.h" 14 #include "flang/Parser/message.h" 15 #include "flang/Parser/source.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 encoding_{allSources_.encoding()} {} 33 34 Prescanner::Prescanner(const Prescanner &that) 35 : messages_{that.messages_}, cooked_{that.cooked_}, 36 preprocessor_{that.preprocessor_}, allSources_{that.allSources_}, 37 features_{that.features_}, inFixedForm_{that.inFixedForm_}, 38 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 39 encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 40 1}, 41 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 42 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 43 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 44 45 static inline constexpr bool IsFixedFormCommentChar(char ch) { 46 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 47 } 48 49 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 50 char *p{dir.GetMutableCharData()}; 51 char *limit{p + dir.SizeInChars()}; 52 for (; p < limit; ++p) { 53 if (*p != ' ') { 54 CHECK(IsFixedFormCommentChar(*p)); 55 *p = '!'; 56 return; 57 } 58 } 59 DIE("compiler directive all blank"); 60 } 61 62 void Prescanner::Prescan(ProvenanceRange range) { 63 startProvenance_ = range.start(); 64 start_ = allSources_.GetSource(range); 65 CHECK(start_); 66 limit_ = start_ + range.size(); 67 nextLine_ = start_; 68 const bool beganInFixedForm{inFixedForm_}; 69 if (prescannerNesting_ > maxPrescannerNesting) { 70 Say(GetProvenance(start_), 71 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 72 return; 73 } 74 while (!IsAtEnd()) { 75 Statement(); 76 } 77 if (inFixedForm_ != beganInFixedForm) { 78 std::string dir{"!dir$ "}; 79 if (beganInFixedForm) { 80 dir += "fixed"; 81 } else { 82 dir += "free"; 83 } 84 dir += '\n'; 85 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 86 tokens.Emit(cooked_); 87 } 88 } 89 90 void Prescanner::Statement() { 91 TokenSequence tokens; 92 LineClassification line{ClassifyLine(nextLine_)}; 93 switch (line.kind) { 94 case LineClassification::Kind::Comment: 95 nextLine_ += line.payloadOffset; // advance to '!' or newline 96 NextLine(); 97 return; 98 case LineClassification::Kind::IncludeLine: 99 FortranInclude(nextLine_ + line.payloadOffset); 100 NextLine(); 101 return; 102 case LineClassification::Kind::ConditionalCompilationDirective: 103 case LineClassification::Kind::IncludeDirective: 104 case LineClassification::Kind::DefinitionDirective: 105 case LineClassification::Kind::PreprocessorDirective: 106 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 107 return; 108 case LineClassification::Kind::CompilerDirective: 109 directiveSentinel_ = line.sentinel; 110 CHECK(InCompilerDirective()); 111 BeginStatementAndAdvance(); 112 if (inFixedForm_) { 113 CHECK(IsFixedFormCommentChar(*at_)); 114 } else { 115 while (*at_ == ' ' || *at_ == '\t') { 116 ++at_, ++column_; 117 } 118 CHECK(*at_ == '!'); 119 } 120 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 121 // OpenMP conditional compilation line. Remove the sentinel and then 122 // treat the line as if it were normal source. 123 at_ += 2, column_ += 2; 124 if (inFixedForm_) { 125 LabelField(tokens); 126 } else { 127 SkipSpaces(); 128 } 129 } else { 130 // Compiler directive. Emit normalized sentinel. 131 EmitChar(tokens, '!'); 132 ++at_, ++column_; 133 for (const char *sp{directiveSentinel_}; *sp != '\0'; 134 ++sp, ++at_, ++column_) { 135 EmitChar(tokens, *sp); 136 } 137 if (*at_ == ' ') { 138 EmitChar(tokens, ' '); 139 ++at_, ++column_; 140 } 141 tokens.CloseToken(); 142 } 143 break; 144 case LineClassification::Kind::Source: 145 BeginStatementAndAdvance(); 146 if (inFixedForm_) { 147 LabelField(tokens); 148 } else if (skipLeadingAmpersand_) { 149 skipLeadingAmpersand_ = false; 150 const char *p{SkipWhiteSpace(at_)}; 151 if (p < limit_ && *p == '&') { 152 column_ += ++p - at_; 153 at_ = p; 154 } 155 } else { 156 SkipSpaces(); 157 } 158 break; 159 } 160 161 while (NextToken(tokens)) { 162 } 163 164 Provenance newlineProvenance{GetCurrentProvenance()}; 165 if (std::optional<TokenSequence> preprocessed{ 166 preprocessor_.MacroReplacement(tokens, *this)}) { 167 // Reprocess the preprocessed line. Append a newline temporarily. 168 preprocessed->PutNextTokenChar('\n', newlineProvenance); 169 preprocessed->CloseToken(); 170 const char *ppd{preprocessed->ToCharBlock().begin()}; 171 LineClassification ppl{ClassifyLine(ppd)}; 172 preprocessed->RemoveLastToken(); // remove the newline 173 switch (ppl.kind) { 174 case LineClassification::Kind::Comment: 175 break; 176 case LineClassification::Kind::IncludeLine: 177 FortranInclude(ppd + ppl.payloadOffset); 178 break; 179 case LineClassification::Kind::ConditionalCompilationDirective: 180 case LineClassification::Kind::IncludeDirective: 181 case LineClassification::Kind::DefinitionDirective: 182 case LineClassification::Kind::PreprocessorDirective: 183 Say(preprocessed->GetProvenanceRange(), 184 "Preprocessed line resembles a preprocessor directive"_en_US); 185 preprocessed->ToLowerCase().CheckBadFortranCharacters(messages_).Emit( 186 cooked_); 187 break; 188 case LineClassification::Kind::CompilerDirective: 189 if (preprocessed->HasRedundantBlanks()) { 190 preprocessed->RemoveRedundantBlanks(); 191 } 192 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 193 preprocessed->ToLowerCase(); 194 SourceFormChange(preprocessed->ToString()); 195 preprocessed->ClipComment(true /* skip first ! */) 196 .CheckBadFortranCharacters(messages_) 197 .Emit(cooked_); 198 break; 199 case LineClassification::Kind::Source: 200 if (inFixedForm_) { 201 if (preprocessed->HasBlanks(/*after column*/ 6)) { 202 preprocessed->RemoveBlanks(/*after column*/ 6); 203 } 204 } else { 205 if (preprocessed->HasRedundantBlanks()) { 206 preprocessed->RemoveRedundantBlanks(); 207 } 208 } 209 preprocessed->ToLowerCase() 210 .ClipComment() 211 .CheckBadFortranCharacters(messages_) 212 .Emit(cooked_); 213 break; 214 } 215 } else { 216 tokens.ToLowerCase(); 217 if (line.kind == LineClassification::Kind::CompilerDirective) { 218 SourceFormChange(tokens.ToString()); 219 } 220 tokens.CheckBadFortranCharacters(messages_).Emit(cooked_); 221 } 222 if (omitNewline_) { 223 omitNewline_ = false; 224 } else { 225 cooked_.Put('\n', newlineProvenance); 226 } 227 directiveSentinel_ = nullptr; 228 } 229 230 TokenSequence Prescanner::TokenizePreprocessorDirective() { 231 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 232 inPreprocessorDirective_ = true; 233 BeginStatementAndAdvance(); 234 TokenSequence tokens; 235 while (NextToken(tokens)) { 236 } 237 inPreprocessorDirective_ = false; 238 return tokens; 239 } 240 241 void Prescanner::NextLine() { 242 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 243 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 244 if (!v) { 245 nextLine_ = limit_; 246 } else { 247 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 248 nextLine_ = nl + 1; 249 } 250 } 251 252 void Prescanner::LabelField(TokenSequence &token) { 253 const char *bad{nullptr}; 254 int outCol{1}; 255 for (; *at_ != '\n' && column_ <= 6; ++at_) { 256 if (*at_ == '\t') { 257 ++at_; 258 column_ = 7; 259 break; 260 } 261 if (*at_ != ' ' && 262 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 263 EmitChar(token, *at_); 264 ++outCol; 265 if (!bad && !IsDecimalDigit(*at_)) { 266 bad = at_; 267 } 268 } 269 ++column_; 270 } 271 if (outCol == 1) { // empty label field 272 // Emit a space so that, if the line is rescanned after preprocessing, 273 // a leading 'C' or 'D' won't be left-justified and then accidentally 274 // misinterpreted as a comment card. 275 EmitChar(token, ' '); 276 ++outCol; 277 } else { 278 if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 279 Say(GetProvenance(bad), 280 "Character in fixed-form label field must be a digit"_en_US); 281 } 282 } 283 token.CloseToken(); 284 SkipToNextSignificantCharacter(); 285 if (IsDecimalDigit(*at_)) { 286 Say(GetProvenance(at_), 287 "Label digit is not in fixed-form label field"_en_US); 288 } 289 } 290 291 void Prescanner::SkipToEndOfLine() { 292 while (*at_ != '\n') { 293 ++at_, ++column_; 294 } 295 } 296 297 bool Prescanner::MustSkipToEndOfLine() const { 298 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 299 return true; // skip over ignored columns in right margin (73:80) 300 } else if (*at_ == '!' && !inCharLiteral_) { 301 return true; // inline comment goes to end of source line 302 } else { 303 return false; 304 } 305 } 306 307 void Prescanner::NextChar() { 308 CHECK(*at_ != '\n'); 309 ++at_, ++column_; 310 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 311 // UTF-8 byte order mark - treat this file as UTF-8 312 at_ += 3; 313 encoding_ = Encoding::UTF_8; 314 } 315 SkipToNextSignificantCharacter(); 316 } 317 318 // Skip everything that should be ignored until the next significant 319 // character is reached; handles C-style comments in preprocessing 320 // directives, Fortran ! comments, stuff after the right margin in 321 // fixed form, and all forms of line continuation. 322 void Prescanner::SkipToNextSignificantCharacter() { 323 if (inPreprocessorDirective_) { 324 SkipCComments(); 325 } else { 326 bool mightNeedSpace{false}; 327 if (MustSkipToEndOfLine()) { 328 SkipToEndOfLine(); 329 } else { 330 mightNeedSpace = *at_ == '\n'; 331 } 332 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 333 if (MustSkipToEndOfLine()) { 334 SkipToEndOfLine(); 335 } 336 } 337 if (*at_ == '\t') { 338 tabInCurrentLine_ = true; 339 } 340 } 341 } 342 343 void Prescanner::SkipCComments() { 344 while (true) { 345 if (IsCComment(at_)) { 346 if (const char *after{SkipCComment(at_)}) { 347 column_ += after - at_; 348 // May have skipped over one or more newlines; relocate the start of 349 // the next line. 350 nextLine_ = at_ = after; 351 NextLine(); 352 } else { 353 // Don't emit any messages about unclosed C-style comments, because 354 // the sequence /* can appear legally in a FORMAT statement. There's 355 // no ambiguity, since the sequence */ cannot appear legally. 356 break; 357 } 358 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 359 at_[1] == '\n' && !IsAtEnd()) { 360 BeginSourceLineAndAdvance(); 361 } else { 362 break; 363 } 364 } 365 } 366 367 void Prescanner::SkipSpaces() { 368 while (*at_ == ' ' || *at_ == '\t') { 369 NextChar(); 370 } 371 insertASpace_ = false; 372 } 373 374 const char *Prescanner::SkipWhiteSpace(const char *p) { 375 while (*p == ' ' || *p == '\t') { 376 ++p; 377 } 378 return p; 379 } 380 381 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 382 while (true) { 383 if (*p == ' ' || *p == '\t') { 384 ++p; 385 } else if (IsCComment(p)) { 386 if (const char *after{SkipCComment(p)}) { 387 p = after; 388 } else { 389 break; 390 } 391 } else { 392 break; 393 } 394 } 395 return p; 396 } 397 398 const char *Prescanner::SkipCComment(const char *p) const { 399 char star{' '}, slash{' '}; 400 p += 2; 401 while (star != '*' || slash != '/') { 402 if (p >= limit_) { 403 return nullptr; // signifies an unterminated comment 404 } 405 star = slash; 406 slash = *p++; 407 } 408 return p; 409 } 410 411 bool Prescanner::NextToken(TokenSequence &tokens) { 412 CHECK(at_ >= start_ && at_ < limit_); 413 if (InFixedFormSource()) { 414 SkipSpaces(); 415 } else { 416 if (*at_ == '/' && IsCComment(at_)) { 417 // Recognize and skip over classic C style /*comments*/ when 418 // outside a character literal. 419 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 420 Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US); 421 } 422 SkipCComments(); 423 } 424 if (*at_ == ' ' || *at_ == '\t') { 425 // Compress free-form white space into a single space character. 426 const auto theSpace{at_}; 427 char previous{at_ <= start_ ? ' ' : at_[-1]}; 428 NextChar(); 429 SkipSpaces(); 430 if (*at_ == '\n') { 431 // Discard white space at the end of a line. 432 } else if (!inPreprocessorDirective_ && 433 (previous == '(' || *at_ == '(' || *at_ == ')')) { 434 // Discard white space before/after '(' and before ')', unless in a 435 // preprocessor directive. This helps yield space-free contiguous 436 // names for generic interfaces like OPERATOR( + ) and 437 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 438 // This has the effect of silently ignoring the illegal spaces in 439 // the array constructor ( /1,2/ ) but that seems benign; it's 440 // hard to avoid that while still removing spaces from OPERATOR( / ) 441 // and OPERATOR( // ). 442 } else { 443 // Preserve the squashed white space as a single space character. 444 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 445 tokens.CloseToken(); 446 return true; 447 } 448 } 449 } 450 if (insertASpace_) { 451 tokens.PutNextTokenChar(' ', spaceProvenance_); 452 insertASpace_ = false; 453 } 454 if (*at_ == '\n') { 455 return false; 456 } 457 const char *start{at_}; 458 if (*at_ == '\'' || *at_ == '"') { 459 QuotedCharacterLiteral(tokens, start); 460 preventHollerith_ = false; 461 } else if (IsDecimalDigit(*at_)) { 462 int n{0}, digits{0}; 463 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 464 do { 465 if (n < maxHollerith) { 466 n = 10 * n + DecimalDigitValue(*at_); 467 } 468 EmitCharAndAdvance(tokens, *at_); 469 ++digits; 470 if (InFixedFormSource()) { 471 SkipSpaces(); 472 } 473 } while (IsDecimalDigit(*at_)); 474 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 475 !preventHollerith_) { 476 Hollerith(tokens, n, start); 477 } else if (*at_ == '.') { 478 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 479 } 480 ExponentAndKind(tokens); 481 } else if (ExponentAndKind(tokens)) { 482 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 483 inPreprocessorDirective_) { 484 do { 485 EmitCharAndAdvance(tokens, *at_); 486 } while (IsHexadecimalDigit(*at_)); 487 } else if (IsLetter(*at_)) { 488 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 489 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 490 EmitCharAndAdvance(tokens, *at_); 491 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 492 EmitCharAndAdvance(tokens, *at_); 493 QuotedCharacterLiteral(tokens, start); 494 } 495 preventHollerith_ = false; 496 } else if (*at_ == '.') { 497 char nch{EmitCharAndAdvance(tokens, '.')}; 498 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 499 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 500 } 501 ExponentAndKind(tokens); 502 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 503 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 504 } 505 preventHollerith_ = false; 506 } else if (IsLegalInIdentifier(*at_)) { 507 do { 508 } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))); 509 if ((*at_ == '\'' || *at_ == '"') && 510 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 511 QuotedCharacterLiteral(tokens, start); 512 } 513 preventHollerith_ = false; 514 } else if (*at_ == '*') { 515 if (EmitCharAndAdvance(tokens, '*') == '*') { 516 EmitCharAndAdvance(tokens, '*'); 517 } else { 518 // Subtle ambiguity: 519 // CHARACTER*2H declares H because *2 is a kind specifier 520 // DATAC/N*2H / is repeated Hollerith 521 preventHollerith_ = !slashInCurrentStatement_; 522 } 523 } else { 524 char ch{*at_}; 525 if (ch == '(' || ch == '[') { 526 ++delimiterNesting_; 527 } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) { 528 --delimiterNesting_; 529 } 530 char nch{EmitCharAndAdvance(tokens, ch)}; 531 preventHollerith_ = false; 532 if ((nch == '=' && 533 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 534 (ch == nch && 535 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 536 ch == '|' || ch == '<' || ch == '>')) || 537 (ch == '=' && nch == '>')) { 538 // token comprises two characters 539 EmitCharAndAdvance(tokens, nch); 540 } else if (ch == '/') { 541 slashInCurrentStatement_ = true; 542 } 543 } 544 tokens.CloseToken(); 545 return true; 546 } 547 548 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 549 char ed{ToLowerCaseLetter(*at_)}; 550 if (ed != 'e' && ed != 'd') { 551 return false; 552 } 553 EmitCharAndAdvance(tokens, ed); 554 if (*at_ == '+' || *at_ == '-') { 555 EmitCharAndAdvance(tokens, *at_); 556 } 557 while (IsDecimalDigit(*at_)) { 558 EmitCharAndAdvance(tokens, *at_); 559 } 560 if (*at_ == '_') { 561 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 562 } 563 } 564 return true; 565 } 566 567 void Prescanner::QuotedCharacterLiteral( 568 TokenSequence &tokens, const char *start) { 569 char quote{*at_}; 570 const char *end{at_ + 1}; 571 inCharLiteral_ = true; 572 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 573 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 574 bool isEscaped{false}; 575 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 576 while (true) { 577 if (*at_ == '\\') { 578 if (escapesEnabled) { 579 isEscaped = !isEscaped; 580 } else { 581 // The parser always processes escape sequences, so don't confuse it 582 // when escapes are disabled. 583 insert('\\'); 584 } 585 } else { 586 isEscaped = false; 587 } 588 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 589 Encoding::LATIN_1); 590 while (PadOutCharacterLiteral(tokens)) { 591 } 592 if (*at_ == '\n') { 593 if (!inPreprocessorDirective_) { 594 Say(GetProvenanceRange(start, end), 595 "Incomplete character literal"_err_en_US); 596 } 597 break; 598 } 599 end = at_ + 1; 600 NextChar(); 601 if (*at_ == quote && !isEscaped) { 602 // A doubled unescaped quote mark becomes a single instance of that 603 // quote character in the literal (later). There can be spaces between 604 // the quotes in fixed form source. 605 EmitChar(tokens, quote); 606 inCharLiteral_ = false; // for cases like print *, '...'!comment 607 NextChar(); 608 if (InFixedFormSource()) { 609 SkipSpaces(); 610 } 611 if (*at_ != quote) { 612 break; 613 } 614 inCharLiteral_ = true; 615 } 616 } 617 inCharLiteral_ = false; 618 } 619 620 void Prescanner::Hollerith( 621 TokenSequence &tokens, int count, const char *start) { 622 inCharLiteral_ = true; 623 CHECK(*at_ == 'h' || *at_ == 'H'); 624 EmitChar(tokens, 'H'); 625 while (count-- > 0) { 626 if (PadOutCharacterLiteral(tokens)) { 627 } else if (*at_ == '\n') { 628 Say(GetProvenanceRange(start, at_), 629 "Possible truncated Hollerith literal"_en_US); 630 break; 631 } else { 632 NextChar(); 633 // Each multi-byte character encoding counts as a single character. 634 // No escape sequences are recognized. 635 // Hollerith is always emitted to the cooked character 636 // stream in UTF-8. 637 DecodedCharacter decoded{DecodeCharacter( 638 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 639 if (decoded.bytes > 0) { 640 EncodedCharacter utf8{ 641 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 642 for (int j{0}; j < utf8.bytes; ++j) { 643 EmitChar(tokens, utf8.buffer[j]); 644 } 645 at_ += decoded.bytes - 1; 646 } else { 647 Say(GetProvenanceRange(start, at_), 648 "Bad character in Hollerith literal"_err_en_US); 649 break; 650 } 651 } 652 } 653 if (*at_ != '\n') { 654 NextChar(); 655 } 656 inCharLiteral_ = false; 657 } 658 659 // In fixed form, source card images must be processed as if they were at 660 // least 72 columns wide, at least in character literal contexts. 661 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 662 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 663 if (column_ < fixedFormColumnLimit_) { 664 tokens.PutNextTokenChar(' ', spaceProvenance_); 665 ++column_; 666 return true; 667 } 668 if (!FixedFormContinuation(false /*no need to insert space*/) || 669 tabInCurrentLine_) { 670 return false; 671 } 672 CHECK(column_ == 7); 673 --at_; // point to column 6 of continuation line 674 column_ = 6; 675 } 676 return false; 677 } 678 679 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 680 const char *p{start}; 681 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 682 ((*p == 'D' || *p == 'd') && 683 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 684 return true; 685 } 686 bool anyTabs{false}; 687 while (true) { 688 if (*p == ' ') { 689 ++p; 690 } else if (*p == '\t') { 691 anyTabs = true; 692 ++p; 693 } else if (*p == '0' && !anyTabs && p == start + 5) { 694 ++p; // 0 in column 6 must treated as a space 695 } else { 696 break; 697 } 698 } 699 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 700 return true; 701 } 702 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 703 return true; 704 } 705 return *p == '\n'; 706 } 707 708 const char *Prescanner::IsFreeFormComment(const char *p) const { 709 p = SkipWhiteSpaceAndCComments(p); 710 if (*p == '!' || *p == '\n') { 711 return p; 712 } else { 713 return nullptr; 714 } 715 } 716 717 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 718 const char *p{SkipWhiteSpace(start)}; 719 for (char ch : "include"s) { 720 if (ToLowerCaseLetter(*p++) != ch) { 721 return std::nullopt; 722 } 723 } 724 p = SkipWhiteSpace(p); 725 if (*p == '"' || *p == '\'') { 726 return {p - start}; 727 } 728 return std::nullopt; 729 } 730 731 void Prescanner::FortranInclude(const char *firstQuote) { 732 const char *p{firstQuote}; 733 while (*p != '"' && *p != '\'') { 734 ++p; 735 } 736 char quote{*p}; 737 std::string path; 738 for (++p; *p != '\n'; ++p) { 739 if (*p == quote) { 740 if (p[1] != quote) { 741 break; 742 } 743 ++p; 744 } 745 path += *p; 746 } 747 if (*p != quote) { 748 Say(GetProvenanceRange(firstQuote, p), 749 "malformed path name string"_err_en_US); 750 return; 751 } 752 p = SkipWhiteSpace(p + 1); 753 if (*p != '\n' && *p != '!') { 754 const char *garbage{p}; 755 for (; *p != '\n' && *p != '!'; ++p) { 756 } 757 Say(GetProvenanceRange(garbage, p), 758 "excess characters after path name"_en_US); 759 } 760 std::string buf; 761 llvm::raw_string_ostream error{buf}; 762 Provenance provenance{GetProvenance(nextLine_)}; 763 std::optional<std::string> prependPath; 764 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 765 prependPath = DirectoryName(currentFile->path()); 766 } 767 const SourceFile *included{ 768 allSources_.Open(path, error, std::move(prependPath))}; 769 if (!included) { 770 Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); 771 } else if (included->bytes() > 0) { 772 ProvenanceRange includeLineRange{ 773 provenance, static_cast<std::size_t>(p - nextLine_)}; 774 ProvenanceRange fileRange{ 775 allSources_.AddIncludedFile(*included, includeLineRange)}; 776 Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); 777 } 778 } 779 780 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 781 const char *p{start}; 782 for (; *p == ' '; ++p) { 783 } 784 if (*p == '#') { 785 if (inFixedForm_ && p == start + 5) { 786 return nullptr; 787 } 788 } else { 789 p = SkipWhiteSpace(p); 790 if (*p != '#') { 791 return nullptr; 792 } 793 } 794 return SkipWhiteSpace(p + 1); 795 } 796 797 bool Prescanner::IsNextLinePreprocessorDirective() const { 798 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 799 } 800 801 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 802 if (IsAtEnd()) { 803 if (afterAmpersand && prescannerNesting_ > 0) { 804 // A continuation marker at the end of the last line in an 805 // include file inhibits the newline for that line. 806 SkipToEndOfLine(); 807 omitNewline_ = true; 808 } 809 return false; 810 } 811 auto lineClass{ClassifyLine(nextLine_)}; 812 if (lineClass.kind == LineClassification::Kind::Comment) { 813 NextLine(); 814 return true; 815 } else if (inPreprocessorDirective_) { 816 return false; 817 } else if (lineClass.kind == 818 LineClassification::Kind::ConditionalCompilationDirective || 819 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 820 // Allow conditional compilation directives (e.g., #ifdef) to affect 821 // continuation lines. 822 // Allow other preprocessor directives, too, except #include 823 // (when it does not follow '&'), #define, and #undef (because 824 // they cannot be allowed to affect preceding text on a 825 // continued line). 826 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 827 return true; 828 } else if (afterAmpersand && 829 (lineClass.kind == LineClassification::Kind::IncludeDirective || 830 lineClass.kind == LineClassification::Kind::IncludeLine)) { 831 SkipToEndOfLine(); 832 omitNewline_ = true; 833 skipLeadingAmpersand_ = true; 834 return false; 835 } else { 836 return false; 837 } 838 } 839 840 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 841 if (IsAtEnd()) { 842 return nullptr; 843 } 844 tabInCurrentLine_ = false; 845 char col1{*nextLine_}; 846 if (InCompilerDirective()) { 847 // Must be a continued compiler directive. 848 if (!IsFixedFormCommentChar(col1)) { 849 return nullptr; 850 } 851 int j{1}; 852 for (; j < 5; ++j) { 853 char ch{directiveSentinel_[j - 1]}; 854 if (ch == '\0') { 855 break; 856 } 857 if (ch != ToLowerCaseLetter(nextLine_[j])) { 858 return nullptr; 859 } 860 } 861 for (; j < 5; ++j) { 862 if (nextLine_[j] != ' ') { 863 return nullptr; 864 } 865 } 866 char col6{nextLine_[5]}; 867 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 868 if (nextLine_[6] != ' ' && mightNeedSpace) { 869 insertASpace_ = true; 870 } 871 return nextLine_ + 6; 872 } 873 return nullptr; 874 } else { 875 // Normal case: not in a compiler directive. 876 if (col1 == '&' && 877 features_.IsEnabled( 878 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 879 // Extension: '&' as continuation marker 880 if (features_.ShouldWarn( 881 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 882 Say(GetProvenance(nextLine_), "nonstandard usage"_en_US); 883 } 884 return nextLine_ + 1; 885 } 886 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 887 tabInCurrentLine_ = true; 888 return nextLine_ + 2; // VAX extension 889 } 890 if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' && 891 nextLine_[3] == ' ' && nextLine_[4] == ' ') { 892 char col6{nextLine_[5]}; 893 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 894 return nextLine_ + 6; 895 } 896 } 897 if (IsImplicitContinuation()) { 898 return nextLine_; 899 } 900 } 901 return nullptr; // not a continuation line 902 } 903 904 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 905 const char *p{nextLine_}; 906 if (p >= limit_) { 907 return nullptr; 908 } 909 p = SkipWhiteSpace(p); 910 if (InCompilerDirective()) { 911 if (*p++ != '!') { 912 return nullptr; 913 } 914 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 915 if (*s != ToLowerCaseLetter(*p)) { 916 return nullptr; 917 } 918 } 919 p = SkipWhiteSpace(p); 920 if (*p == '&') { 921 if (!ampersand) { 922 insertASpace_ = true; 923 } 924 return p + 1; 925 } else if (ampersand) { 926 return p; 927 } else { 928 return nullptr; 929 } 930 } else { 931 if (*p == '&') { 932 return p + 1; 933 } else if (*p == '!' || *p == '\n' || *p == '#') { 934 return nullptr; 935 } else if (ampersand || IsImplicitContinuation()) { 936 if (p > nextLine_) { 937 --p; 938 } else { 939 insertASpace_ = true; 940 } 941 return p; 942 } else { 943 return nullptr; 944 } 945 } 946 } 947 948 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 949 // N.B. We accept '&' as a continuation indicator in fixed form, too, 950 // but not in a character literal. 951 if (*at_ == '&' && inCharLiteral_) { 952 return false; 953 } 954 do { 955 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 956 BeginSourceLine(cont); 957 column_ = 7; 958 NextLine(); 959 return true; 960 } 961 } while (SkipCommentLine(false /* not after ampersand */)); 962 return false; 963 } 964 965 bool Prescanner::FreeFormContinuation() { 966 const char *p{at_}; 967 bool ampersand{*p == '&'}; 968 if (ampersand) { 969 p = SkipWhiteSpace(p + 1); 970 } 971 if (*p != '\n') { 972 if (inCharLiteral_) { 973 return false; 974 } else if (*p != '!' && 975 features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 976 Say(GetProvenance(p), "missing ! before comment after &"_en_US); 977 } 978 } 979 do { 980 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 981 BeginSourceLine(cont); 982 NextLine(); 983 return true; 984 } 985 } while (SkipCommentLine(ampersand)); 986 return false; 987 } 988 989 // Implicit line continuation allows a preprocessor macro call with 990 // arguments to span multiple lines. 991 bool Prescanner::IsImplicitContinuation() const { 992 return !inPreprocessorDirective_ && !inCharLiteral_ && 993 delimiterNesting_ > 0 && !IsAtEnd() && 994 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 995 } 996 997 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 998 if (*at_ == '\n' || *at_ == '&') { 999 if (inFixedForm_) { 1000 return FixedFormContinuation(mightNeedFixedFormSpace); 1001 } else { 1002 return FreeFormContinuation(); 1003 } 1004 } else { 1005 return false; 1006 } 1007 } 1008 1009 std::optional<Prescanner::LineClassification> 1010 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1011 const char *p{start}; 1012 char col1{*p++}; 1013 if (!IsFixedFormCommentChar(col1)) { 1014 return std::nullopt; 1015 } 1016 char sentinel[5], *sp{sentinel}; 1017 int column{2}; 1018 for (; column < 6; ++column, ++p) { 1019 if (*p != ' ') { 1020 if (*p == '\n' || *p == '\t') { 1021 break; 1022 } 1023 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1024 // OpenMP conditional compilation line: leave the label alone 1025 break; 1026 } 1027 *sp++ = ToLowerCaseLetter(*p); 1028 } 1029 } 1030 if (column == 6) { 1031 if (*p == ' ' || *p == '\t' || *p == '0') { 1032 ++p; 1033 } else { 1034 // This is a Continuation line, not an initial directive line. 1035 return std::nullopt; 1036 } 1037 } 1038 if (sp == sentinel) { 1039 return std::nullopt; 1040 } 1041 *sp = '\0'; 1042 if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) { 1043 std::size_t payloadOffset = p - start; 1044 return {LineClassification{ 1045 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1046 } 1047 return std::nullopt; 1048 } 1049 1050 std::optional<Prescanner::LineClassification> 1051 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1052 char sentinel[8]; 1053 const char *p{SkipWhiteSpace(start)}; 1054 if (*p++ != '!') { 1055 return std::nullopt; 1056 } 1057 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { 1058 if (*p == '\n') { 1059 break; 1060 } 1061 if (*p == ' ' || *p == '\t' || *p == '&') { 1062 if (j == 0) { 1063 break; 1064 } 1065 sentinel[j] = '\0'; 1066 p = SkipWhiteSpace(p + 1); 1067 if (*p == '!') { 1068 break; 1069 } 1070 if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) { 1071 std::size_t offset = p - start; 1072 return {LineClassification{ 1073 LineClassification::Kind::CompilerDirective, offset, sp}}; 1074 } 1075 break; 1076 } 1077 sentinel[j] = ToLowerCaseLetter(*p); 1078 } 1079 return std::nullopt; 1080 } 1081 1082 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1083 std::uint64_t packed{0}; 1084 for (char ch : dir) { 1085 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1086 } 1087 compilerDirectiveBloomFilter_.set(packed % prime1); 1088 compilerDirectiveBloomFilter_.set(packed % prime2); 1089 compilerDirectiveSentinels_.insert(dir); 1090 return *this; 1091 } 1092 1093 const char *Prescanner::IsCompilerDirectiveSentinel( 1094 const char *sentinel) const { 1095 std::uint64_t packed{0}; 1096 std::size_t n{0}; 1097 for (; sentinel[n] != '\0'; ++n) { 1098 packed = (packed << 8) | (sentinel[n] & 0xff); 1099 } 1100 if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1101 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1102 return nullptr; 1103 } 1104 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))}; 1105 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1106 } 1107 1108 constexpr bool IsDirective(const char *match, const char *dir) { 1109 for (; *match; ++match) { 1110 if (*match != ToLowerCaseLetter(*dir++)) { 1111 return false; 1112 } 1113 } 1114 return true; 1115 } 1116 1117 Prescanner::LineClassification Prescanner::ClassifyLine( 1118 const char *start) const { 1119 if (inFixedForm_) { 1120 if (std::optional<LineClassification> lc{ 1121 IsFixedFormCompilerDirectiveLine(start)}) { 1122 return std::move(*lc); 1123 } 1124 if (IsFixedFormCommentLine(start)) { 1125 return {LineClassification::Kind::Comment}; 1126 } 1127 } else { 1128 if (std::optional<LineClassification> lc{ 1129 IsFreeFormCompilerDirectiveLine(start)}) { 1130 return std::move(*lc); 1131 } 1132 if (const char *bang{IsFreeFormComment(start)}) { 1133 return {LineClassification::Kind::Comment, 1134 static_cast<std::size_t>(bang - start)}; 1135 } 1136 } 1137 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1138 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1139 } 1140 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1141 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1142 IsDirective("else", dir) || IsDirective("endif", dir)) { 1143 return {LineClassification::Kind::ConditionalCompilationDirective}; 1144 } else if (IsDirective("include", dir)) { 1145 return {LineClassification::Kind::IncludeDirective}; 1146 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1147 return {LineClassification::Kind::DefinitionDirective}; 1148 } else { 1149 return {LineClassification::Kind::PreprocessorDirective}; 1150 } 1151 } 1152 return {LineClassification::Kind::Source}; 1153 } 1154 1155 void Prescanner::SourceFormChange(std::string &&dir) { 1156 if (dir == "!dir$ free") { 1157 inFixedForm_ = false; 1158 } else if (dir == "!dir$ fixed") { 1159 inFixedForm_ = true; 1160 } 1161 } 1162 } // namespace Fortran::parser 1163