1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "preprocessor.h" 11 #include "token-sequence.h" 12 #include "flang/Common/idioms.h" 13 #include "flang/Parser/characters.h" 14 #include "flang/Parser/message.h" 15 #include "flang/Parser/source.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 features_{lfc}, encoding_{cooked.allSources().encoding()} {} 32 33 Prescanner::Prescanner(const Prescanner &that) 34 : messages_{that.messages_}, cooked_{that.cooked_}, 35 preprocessor_{that.preprocessor_}, features_{that.features_}, 36 inFixedForm_{that.inFixedForm_}, 37 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 38 encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 39 1}, 40 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 41 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 42 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 43 44 static inline constexpr bool IsFixedFormCommentChar(char ch) { 45 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 46 } 47 48 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 49 char *p{dir.GetMutableCharData()}; 50 char *limit{p + dir.SizeInChars()}; 51 for (; p < limit; ++p) { 52 if (*p != ' ') { 53 CHECK(IsFixedFormCommentChar(*p)); 54 *p = '!'; 55 return; 56 } 57 } 58 DIE("compiler directive all blank"); 59 } 60 61 void Prescanner::Prescan(ProvenanceRange range) { 62 AllSources &allSources{cooked_.allSources()}; 63 startProvenance_ = range.start(); 64 std::size_t offset{0}; 65 const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)}; 66 CHECK(source); 67 start_ = source->content().data() + offset; 68 limit_ = start_ + range.size(); 69 nextLine_ = start_; 70 const bool beganInFixedForm{inFixedForm_}; 71 if (prescannerNesting_ > maxPrescannerNesting) { 72 Say(GetProvenance(start_), 73 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 74 return; 75 } 76 while (nextLine_ < limit_) { 77 Statement(); 78 } 79 if (inFixedForm_ != beganInFixedForm) { 80 std::string dir{"!dir$ "}; 81 if (beganInFixedForm) { 82 dir += "fixed"; 83 } else { 84 dir += "free"; 85 } 86 dir += '\n'; 87 TokenSequence tokens{dir, allSources.AddCompilerInsertion(dir).start()}; 88 tokens.Emit(cooked_); 89 } 90 } 91 92 void Prescanner::Statement() { 93 TokenSequence tokens; 94 LineClassification line{ClassifyLine(nextLine_)}; 95 switch (line.kind) { 96 case LineClassification::Kind::Comment: 97 nextLine_ += line.payloadOffset; // advance to '!' or newline 98 NextLine(); 99 return; 100 case LineClassification::Kind::IncludeLine: 101 FortranInclude(nextLine_ + line.payloadOffset); 102 NextLine(); 103 return; 104 case LineClassification::Kind::ConditionalCompilationDirective: 105 case LineClassification::Kind::IncludeDirective: 106 case LineClassification::Kind::DefinitionDirective: 107 case LineClassification::Kind::PreprocessorDirective: 108 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 109 return; 110 case LineClassification::Kind::CompilerDirective: 111 directiveSentinel_ = line.sentinel; 112 CHECK(InCompilerDirective()); 113 BeginSourceLineAndAdvance(); 114 if (inFixedForm_) { 115 CHECK(IsFixedFormCommentChar(*at_)); 116 } else { 117 while (*at_ == ' ' || *at_ == '\t') { 118 ++at_, ++column_; 119 } 120 CHECK(*at_ == '!'); 121 } 122 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 123 // OpenMP conditional compilation line. Remove the sentinel and then 124 // treat the line as if it were normal source. 125 at_ += 2, column_ += 2; 126 if (inFixedForm_) { 127 LabelField(tokens); 128 } else { 129 SkipSpaces(); 130 } 131 } else { 132 // Compiler directive. Emit normalized sentinel. 133 EmitChar(tokens, '!'); 134 ++at_, ++column_; 135 for (const char *sp{directiveSentinel_}; *sp != '\0'; 136 ++sp, ++at_, ++column_) { 137 EmitChar(tokens, *sp); 138 } 139 if (*at_ == ' ') { 140 EmitChar(tokens, ' '); 141 ++at_, ++column_; 142 } 143 tokens.CloseToken(); 144 } 145 break; 146 case LineClassification::Kind::Source: 147 BeginSourceLineAndAdvance(); 148 if (inFixedForm_) { 149 LabelField(tokens); 150 } else if (skipLeadingAmpersand_) { 151 skipLeadingAmpersand_ = false; 152 const char *p{SkipWhiteSpace(at_)}; 153 if (p < limit_ && *p == '&') { 154 column_ += ++p - at_; 155 at_ = p; 156 } 157 } else { 158 SkipSpaces(); 159 } 160 break; 161 } 162 163 while (NextToken(tokens)) { 164 } 165 166 Provenance newlineProvenance{GetCurrentProvenance()}; 167 if (std::optional<TokenSequence> preprocessed{ 168 preprocessor_.MacroReplacement(tokens, *this)}) { 169 // Reprocess the preprocessed line. Append a newline temporarily. 170 preprocessed->PutNextTokenChar('\n', newlineProvenance); 171 preprocessed->CloseToken(); 172 const char *ppd{preprocessed->ToCharBlock().begin()}; 173 LineClassification ppl{ClassifyLine(ppd)}; 174 preprocessed->RemoveLastToken(); // remove the newline 175 switch (ppl.kind) { 176 case LineClassification::Kind::Comment: 177 break; 178 case LineClassification::Kind::IncludeLine: 179 FortranInclude(ppd + ppl.payloadOffset); 180 break; 181 case LineClassification::Kind::ConditionalCompilationDirective: 182 case LineClassification::Kind::IncludeDirective: 183 case LineClassification::Kind::DefinitionDirective: 184 case LineClassification::Kind::PreprocessorDirective: 185 Say(preprocessed->GetProvenanceRange(), 186 "Preprocessed line resembles a preprocessor directive"_en_US); 187 preprocessed->ToLowerCase().Emit(cooked_); 188 break; 189 case LineClassification::Kind::CompilerDirective: 190 if (preprocessed->HasRedundantBlanks()) { 191 preprocessed->RemoveRedundantBlanks(); 192 } 193 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 194 preprocessed->ToLowerCase(); 195 SourceFormChange(preprocessed->ToString()); 196 preprocessed->ClipComment(true /* skip first ! */).Emit(cooked_); 197 break; 198 case LineClassification::Kind::Source: 199 if (inFixedForm_) { 200 if (preprocessed->HasBlanks(/*after column*/ 6)) { 201 preprocessed->RemoveBlanks(/*after column*/ 6); 202 } 203 } else { 204 if (preprocessed->HasRedundantBlanks()) { 205 preprocessed->RemoveRedundantBlanks(); 206 } 207 } 208 preprocessed->ToLowerCase().ClipComment().Emit(cooked_); 209 break; 210 } 211 } else { 212 tokens.ToLowerCase(); 213 if (line.kind == LineClassification::Kind::CompilerDirective) { 214 SourceFormChange(tokens.ToString()); 215 } 216 tokens.Emit(cooked_); 217 } 218 if (omitNewline_) { 219 omitNewline_ = false; 220 } else { 221 cooked_.Put('\n', newlineProvenance); 222 } 223 directiveSentinel_ = nullptr; 224 } 225 226 TokenSequence Prescanner::TokenizePreprocessorDirective() { 227 CHECK(nextLine_ < limit_ && !inPreprocessorDirective_); 228 inPreprocessorDirective_ = true; 229 BeginSourceLineAndAdvance(); 230 TokenSequence tokens; 231 while (NextToken(tokens)) { 232 } 233 inPreprocessorDirective_ = false; 234 return tokens; 235 } 236 237 void Prescanner::NextLine() { 238 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 239 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 240 if (!v) { 241 nextLine_ = limit_; 242 } else { 243 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 244 nextLine_ = nl + 1; 245 } 246 } 247 248 void Prescanner::LabelField(TokenSequence &token, int outCol) { 249 for (; *at_ != '\n' && column_ <= 6; ++at_) { 250 if (*at_ == '\t') { 251 ++at_; 252 column_ = 7; 253 break; 254 } 255 if (*at_ != ' ' && 256 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 257 EmitChar(token, *at_); 258 ++outCol; 259 } 260 ++column_; 261 } 262 if (outCol > 1) { 263 token.CloseToken(); 264 } 265 if (outCol < 7) { 266 if (outCol == 1) { 267 token.Put(" ", 6, sixSpaceProvenance_.start()); 268 } else { 269 for (; outCol < 7; ++outCol) { 270 token.PutNextTokenChar(' ', spaceProvenance_); 271 } 272 token.CloseToken(); 273 } 274 } 275 SkipToNextSignificantCharacter(); 276 } 277 278 void Prescanner::SkipToEndOfLine() { 279 while (*at_ != '\n') { 280 ++at_, ++column_; 281 } 282 } 283 284 bool Prescanner::MustSkipToEndOfLine() const { 285 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 286 return true; // skip over ignored columns in right margin (73:80) 287 } else if (*at_ == '!' && !inCharLiteral_) { 288 return true; // inline comment goes to end of source line 289 } else { 290 return false; 291 } 292 } 293 294 void Prescanner::NextChar() { 295 CHECK(*at_ != '\n'); 296 ++at_, ++column_; 297 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 298 // UTF-8 byte order mark - treat this file as UTF-8 299 at_ += 3; 300 encoding_ = Encoding::UTF_8; 301 } 302 SkipToNextSignificantCharacter(); 303 } 304 305 // Skip everything that should be ignored until the next significant 306 // character is reached; handles C-style comments in preprocessing 307 // directives, Fortran ! comments, stuff after the right margin in 308 // fixed form, and all forms of line continuation. 309 void Prescanner::SkipToNextSignificantCharacter() { 310 if (inPreprocessorDirective_) { 311 SkipCComments(); 312 } else { 313 bool mightNeedSpace{false}; 314 if (MustSkipToEndOfLine()) { 315 SkipToEndOfLine(); 316 } else { 317 mightNeedSpace = *at_ == '\n'; 318 } 319 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 320 if (MustSkipToEndOfLine()) { 321 SkipToEndOfLine(); 322 } 323 } 324 if (*at_ == '\t') { 325 tabInCurrentLine_ = true; 326 } 327 } 328 } 329 330 void Prescanner::SkipCComments() { 331 while (true) { 332 if (IsCComment(at_)) { 333 if (const char *after{SkipCComment(at_)}) { 334 column_ += after - at_; 335 // May have skipped over one or more newlines; relocate the start of 336 // the next line. 337 nextLine_ = at_ = after; 338 NextLine(); 339 } else { 340 // Don't emit any messages about unclosed C-style comments, because 341 // the sequence /* can appear legally in a FORMAT statement. There's 342 // no ambiguity, since the sequence */ cannot appear legally. 343 break; 344 } 345 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 346 at_[1] == '\n' && nextLine_ < limit_) { 347 BeginSourceLineAndAdvance(); 348 } else { 349 break; 350 } 351 } 352 } 353 354 void Prescanner::SkipSpaces() { 355 while (*at_ == ' ' || *at_ == '\t') { 356 NextChar(); 357 } 358 insertASpace_ = false; 359 } 360 361 const char *Prescanner::SkipWhiteSpace(const char *p) { 362 while (*p == ' ' || *p == '\t') { 363 ++p; 364 } 365 return p; 366 } 367 368 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 369 while (true) { 370 if (*p == ' ' || *p == '\t') { 371 ++p; 372 } else if (IsCComment(p)) { 373 if (const char *after{SkipCComment(p)}) { 374 p = after; 375 } else { 376 break; 377 } 378 } else { 379 break; 380 } 381 } 382 return p; 383 } 384 385 const char *Prescanner::SkipCComment(const char *p) const { 386 char star{' '}, slash{' '}; 387 p += 2; 388 while (star != '*' || slash != '/') { 389 if (p >= limit_) { 390 return nullptr; // signifies an unterminated comment 391 } 392 star = slash; 393 slash = *p++; 394 } 395 return p; 396 } 397 398 bool Prescanner::NextToken(TokenSequence &tokens) { 399 CHECK(at_ >= start_ && at_ < limit_); 400 if (InFixedFormSource()) { 401 SkipSpaces(); 402 } else { 403 if (*at_ == '/' && IsCComment(at_)) { 404 // Recognize and skip over classic C style /*comments*/ when 405 // outside a character literal. 406 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 407 Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US); 408 } 409 SkipCComments(); 410 } 411 if (*at_ == ' ' || *at_ == '\t') { 412 // Compress free-form white space into a single space character. 413 const auto theSpace{at_}; 414 char previous{at_ <= start_ ? ' ' : at_[-1]}; 415 NextChar(); 416 SkipSpaces(); 417 if (*at_ == '\n') { 418 // Discard white space at the end of a line. 419 } else if (!inPreprocessorDirective_ && 420 (previous == '(' || *at_ == '(' || *at_ == ')')) { 421 // Discard white space before/after '(' and before ')', unless in a 422 // preprocessor directive. This helps yield space-free contiguous 423 // names for generic interfaces like OPERATOR( + ) and 424 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 425 // This has the effect of silently ignoring the illegal spaces in 426 // the array constructor ( /1,2/ ) but that seems benign; it's 427 // hard to avoid that while still removing spaces from OPERATOR( / ) 428 // and OPERATOR( // ). 429 } else { 430 // Preserve the squashed white space as a single space character. 431 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 432 tokens.CloseToken(); 433 return true; 434 } 435 } 436 } 437 if (insertASpace_) { 438 tokens.PutNextTokenChar(' ', spaceProvenance_); 439 insertASpace_ = false; 440 } 441 if (*at_ == '\n') { 442 return false; 443 } 444 const char *start{at_}; 445 if (*at_ == '\'' || *at_ == '"') { 446 QuotedCharacterLiteral(tokens, start); 447 preventHollerith_ = false; 448 } else if (IsDecimalDigit(*at_)) { 449 int n{0}, digits{0}; 450 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 451 do { 452 if (n < maxHollerith) { 453 n = 10 * n + DecimalDigitValue(*at_); 454 } 455 EmitCharAndAdvance(tokens, *at_); 456 ++digits; 457 if (InFixedFormSource()) { 458 SkipSpaces(); 459 } 460 } while (IsDecimalDigit(*at_)); 461 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 462 !preventHollerith_) { 463 Hollerith(tokens, n, start); 464 } else if (*at_ == '.') { 465 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 466 } 467 ExponentAndKind(tokens); 468 } else if (ExponentAndKind(tokens)) { 469 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 470 inPreprocessorDirective_) { 471 do { 472 EmitCharAndAdvance(tokens, *at_); 473 } while (IsHexadecimalDigit(*at_)); 474 } else if (IsLetter(*at_)) { 475 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 476 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 477 EmitCharAndAdvance(tokens, *at_); 478 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { 479 EmitCharAndAdvance(tokens, *at_); 480 QuotedCharacterLiteral(tokens, start); 481 } 482 preventHollerith_ = false; 483 } else if (*at_ == '.') { 484 char nch{EmitCharAndAdvance(tokens, '.')}; 485 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 486 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 487 } 488 ExponentAndKind(tokens); 489 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 490 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 491 } 492 preventHollerith_ = false; 493 } else if (IsLegalInIdentifier(*at_)) { 494 do { 495 } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))); 496 if (*at_ == '\'' || *at_ == '"') { 497 QuotedCharacterLiteral(tokens, start); 498 preventHollerith_ = false; 499 } else { 500 // Subtle: Don't misrecognize labeled DO statement label as Hollerith 501 // when the loop control variable starts with 'H'. 502 preventHollerith_ = true; 503 } 504 } else if (*at_ == '*') { 505 if (EmitCharAndAdvance(tokens, '*') == '*') { 506 EmitCharAndAdvance(tokens, '*'); 507 } else { 508 // Subtle ambiguity: 509 // CHARACTER*2H declares H because *2 is a kind specifier 510 // DATAC/N*2H / is repeated Hollerith 511 preventHollerith_ = !slashInCurrentLine_; 512 } 513 } else { 514 char ch{*at_}; 515 if (ch == '(' || ch == '[') { 516 ++delimiterNesting_; 517 } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) { 518 --delimiterNesting_; 519 } 520 char nch{EmitCharAndAdvance(tokens, ch)}; 521 preventHollerith_ = false; 522 if ((nch == '=' && 523 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 524 (ch == nch && 525 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 526 ch == '|' || ch == '<' || ch == '>')) || 527 (ch == '=' && nch == '>')) { 528 // token comprises two characters 529 EmitCharAndAdvance(tokens, nch); 530 } else if (ch == '/') { 531 slashInCurrentLine_ = true; 532 } 533 } 534 tokens.CloseToken(); 535 return true; 536 } 537 538 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 539 char ed{ToLowerCaseLetter(*at_)}; 540 if (ed != 'e' && ed != 'd') { 541 return false; 542 } 543 EmitCharAndAdvance(tokens, ed); 544 if (*at_ == '+' || *at_ == '-') { 545 EmitCharAndAdvance(tokens, *at_); 546 } 547 while (IsDecimalDigit(*at_)) { 548 EmitCharAndAdvance(tokens, *at_); 549 } 550 if (*at_ == '_') { 551 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 552 } 553 } 554 return true; 555 } 556 557 void Prescanner::QuotedCharacterLiteral( 558 TokenSequence &tokens, const char *start) { 559 char quote{*at_}; 560 const char *end{at_ + 1}; 561 inCharLiteral_ = true; 562 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 563 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 564 bool isEscaped{false}; 565 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 566 while (true) { 567 if (*at_ == '\\') { 568 if (escapesEnabled) { 569 isEscaped = !isEscaped; 570 } else { 571 // The parser always processes escape sequences, so don't confuse it 572 // when escapes are disabled. 573 insert('\\'); 574 } 575 } else { 576 isEscaped = false; 577 } 578 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 579 Encoding::LATIN_1); 580 while (PadOutCharacterLiteral(tokens)) { 581 } 582 if (*at_ == '\n') { 583 if (!inPreprocessorDirective_) { 584 Say(GetProvenanceRange(start, end), 585 "Incomplete character literal"_err_en_US); 586 } 587 break; 588 } 589 end = at_ + 1; 590 NextChar(); 591 if (*at_ == quote && !isEscaped) { 592 // A doubled unescaped quote mark becomes a single instance of that 593 // quote character in the literal (later). There can be spaces between 594 // the quotes in fixed form source. 595 EmitChar(tokens, quote); 596 inCharLiteral_ = false; // for cases like print *, '...'!comment 597 NextChar(); 598 if (InFixedFormSource()) { 599 SkipSpaces(); 600 } 601 if (*at_ != quote) { 602 break; 603 } 604 inCharLiteral_ = true; 605 } 606 } 607 inCharLiteral_ = false; 608 } 609 610 void Prescanner::Hollerith( 611 TokenSequence &tokens, int count, const char *start) { 612 inCharLiteral_ = true; 613 CHECK(*at_ == 'h' || *at_ == 'H'); 614 EmitChar(tokens, 'H'); 615 while (count-- > 0) { 616 if (PadOutCharacterLiteral(tokens)) { 617 } else if (*at_ == '\n') { 618 Say(GetProvenanceRange(start, at_), 619 "Possible truncated Hollerith literal"_en_US); 620 break; 621 } else { 622 NextChar(); 623 // Each multi-byte character encoding counts as a single character. 624 // No escape sequences are recognized. 625 // Hollerith is always emitted to the cooked character 626 // stream in UTF-8. 627 DecodedCharacter decoded{DecodeCharacter( 628 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 629 if (decoded.bytes > 0) { 630 EncodedCharacter utf8{ 631 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 632 for (int j{0}; j < utf8.bytes; ++j) { 633 EmitChar(tokens, utf8.buffer[j]); 634 } 635 at_ += decoded.bytes - 1; 636 } else { 637 Say(GetProvenanceRange(start, at_), 638 "Bad character in Hollerith literal"_err_en_US); 639 break; 640 } 641 } 642 } 643 if (*at_ != '\n') { 644 NextChar(); 645 } 646 inCharLiteral_ = false; 647 } 648 649 // In fixed form, source card images must be processed as if they were at 650 // least 72 columns wide, at least in character literal contexts. 651 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 652 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 653 if (column_ < fixedFormColumnLimit_) { 654 tokens.PutNextTokenChar(' ', spaceProvenance_); 655 ++column_; 656 return true; 657 } 658 if (!FixedFormContinuation(false /*no need to insert space*/) || 659 tabInCurrentLine_) { 660 return false; 661 } 662 CHECK(column_ == 7); 663 --at_; // point to column 6 of continuation line 664 column_ = 6; 665 } 666 return false; 667 } 668 669 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 670 const char *p{start}; 671 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 672 ((*p == 'D' || *p == 'd') && 673 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 674 return true; 675 } 676 bool anyTabs{false}; 677 while (true) { 678 if (*p == ' ') { 679 ++p; 680 } else if (*p == '\t') { 681 anyTabs = true; 682 ++p; 683 } else if (*p == '0' && !anyTabs && p == start + 5) { 684 ++p; // 0 in column 6 must treated as a space 685 } else { 686 break; 687 } 688 } 689 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 690 return true; 691 } 692 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 693 return true; 694 } 695 return *p == '\n'; 696 } 697 698 const char *Prescanner::IsFreeFormComment(const char *p) const { 699 p = SkipWhiteSpaceAndCComments(p); 700 if (*p == '!' || *p == '\n') { 701 return p; 702 } else { 703 return nullptr; 704 } 705 } 706 707 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 708 const char *p{SkipWhiteSpace(start)}; 709 for (char ch : "include"s) { 710 if (ToLowerCaseLetter(*p++) != ch) { 711 return std::nullopt; 712 } 713 } 714 p = SkipWhiteSpace(p); 715 if (*p == '"' || *p == '\'') { 716 return {p - start}; 717 } 718 return std::nullopt; 719 } 720 721 void Prescanner::FortranInclude(const char *firstQuote) { 722 const char *p{firstQuote}; 723 while (*p != '"' && *p != '\'') { 724 ++p; 725 } 726 char quote{*p}; 727 std::string path; 728 for (++p; *p != '\n'; ++p) { 729 if (*p == quote) { 730 if (p[1] != quote) { 731 break; 732 } 733 ++p; 734 } 735 path += *p; 736 } 737 if (*p != quote) { 738 Say(GetProvenanceRange(firstQuote, p), 739 "malformed path name string"_err_en_US); 740 return; 741 } 742 p = SkipWhiteSpace(p + 1); 743 if (*p != '\n' && *p != '!') { 744 const char *garbage{p}; 745 for (; *p != '\n' && *p != '!'; ++p) { 746 } 747 Say(GetProvenanceRange(garbage, p), 748 "excess characters after path name"_en_US); 749 } 750 std::string buf; 751 llvm::raw_string_ostream error{buf}; 752 Provenance provenance{GetProvenance(nextLine_)}; 753 AllSources &allSources{cooked_.allSources()}; 754 const SourceFile *currentFile{allSources.GetSourceFile(provenance)}; 755 if (currentFile) { 756 allSources.PushSearchPathDirectory(DirectoryName(currentFile->path())); 757 } 758 const SourceFile *included{allSources.Open(path, error)}; 759 if (currentFile) { 760 allSources.PopSearchPathDirectory(); 761 } 762 if (!included) { 763 Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); 764 } else if (included->bytes() > 0) { 765 ProvenanceRange includeLineRange{ 766 provenance, static_cast<std::size_t>(p - nextLine_)}; 767 ProvenanceRange fileRange{ 768 allSources.AddIncludedFile(*included, includeLineRange)}; 769 Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); 770 } 771 } 772 773 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 774 const char *p{start}; 775 for (; *p == ' '; ++p) { 776 } 777 if (*p == '#') { 778 if (inFixedForm_ && p == start + 5) { 779 return nullptr; 780 } 781 } else { 782 p = SkipWhiteSpace(p); 783 if (*p != '#') { 784 return nullptr; 785 } 786 } 787 return SkipWhiteSpace(p + 1); 788 } 789 790 bool Prescanner::IsNextLinePreprocessorDirective() const { 791 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 792 } 793 794 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 795 if (nextLine_ >= limit_) { 796 if (afterAmpersand && prescannerNesting_ > 0) { 797 // A continuation marker at the end of the last line in an 798 // include file inhibits the newline for that line. 799 SkipToEndOfLine(); 800 omitNewline_ = true; 801 } 802 return false; 803 } 804 auto lineClass{ClassifyLine(nextLine_)}; 805 if (lineClass.kind == LineClassification::Kind::Comment) { 806 NextLine(); 807 return true; 808 } else if (inPreprocessorDirective_) { 809 return false; 810 } else if (lineClass.kind == 811 LineClassification::Kind::ConditionalCompilationDirective || 812 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 813 // Allow conditional compilation directives (e.g., #ifdef) to affect 814 // continuation lines. 815 // Allow other preprocessor directives, too, except #include 816 // (when it does not follow '&'), #define, and #undef (because 817 // they cannot be allowed to affect preceding text on a 818 // continued line). 819 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 820 return true; 821 } else if (afterAmpersand && 822 (lineClass.kind == LineClassification::Kind::IncludeDirective || 823 lineClass.kind == LineClassification::Kind::IncludeLine)) { 824 SkipToEndOfLine(); 825 omitNewline_ = true; 826 skipLeadingAmpersand_ = true; 827 return false; 828 } else { 829 return false; 830 } 831 } 832 833 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 834 if (nextLine_ >= limit_) { 835 return nullptr; 836 } 837 tabInCurrentLine_ = false; 838 char col1{*nextLine_}; 839 if (InCompilerDirective()) { 840 // Must be a continued compiler directive. 841 if (!IsFixedFormCommentChar(col1)) { 842 return nullptr; 843 } 844 int j{1}; 845 for (; j < 5; ++j) { 846 char ch{directiveSentinel_[j - 1]}; 847 if (ch == '\0') { 848 break; 849 } 850 if (ch != ToLowerCaseLetter(nextLine_[j])) { 851 return nullptr; 852 } 853 } 854 for (; j < 5; ++j) { 855 if (nextLine_[j] != ' ') { 856 return nullptr; 857 } 858 } 859 char col6{nextLine_[5]}; 860 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 861 if (nextLine_[6] != ' ' && mightNeedSpace) { 862 insertASpace_ = true; 863 } 864 return nextLine_ + 6; 865 } 866 return nullptr; 867 } else { 868 // Normal case: not in a compiler directive. 869 if (col1 == '&' && 870 features_.IsEnabled( 871 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 872 // Extension: '&' as continuation marker 873 if (features_.ShouldWarn( 874 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 875 Say(GetProvenance(nextLine_), "nonstandard usage"_en_US); 876 } 877 return nextLine_ + 1; 878 } 879 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 880 tabInCurrentLine_ = true; 881 return nextLine_ + 2; // VAX extension 882 } 883 if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' && 884 nextLine_[3] == ' ' && nextLine_[4] == ' ') { 885 char col6{nextLine_[5]}; 886 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 887 return nextLine_ + 6; 888 } 889 } 890 if (delimiterNesting_ > 0) { 891 if (!IsFixedFormCommentChar(col1)) { 892 return nextLine_; 893 } 894 } 895 } 896 return nullptr; // not a continuation line 897 } 898 899 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 900 const char *p{nextLine_}; 901 if (p >= limit_) { 902 return nullptr; 903 } 904 p = SkipWhiteSpace(p); 905 if (InCompilerDirective()) { 906 if (*p++ != '!') { 907 return nullptr; 908 } 909 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 910 if (*s != ToLowerCaseLetter(*p)) { 911 return nullptr; 912 } 913 } 914 p = SkipWhiteSpace(p); 915 if (*p == '&') { 916 if (!ampersand) { 917 insertASpace_ = true; 918 } 919 return p + 1; 920 } else if (ampersand) { 921 return p; 922 } else { 923 return nullptr; 924 } 925 } else { 926 if (*p == '&') { 927 return p + 1; 928 } else if (*p == '!' || *p == '\n' || *p == '#') { 929 return nullptr; 930 } else if (ampersand || delimiterNesting_ > 0) { 931 if (p > nextLine_) { 932 --p; 933 } else { 934 insertASpace_ = true; 935 } 936 return p; 937 } else { 938 return nullptr; 939 } 940 } 941 } 942 943 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 944 // N.B. We accept '&' as a continuation indicator in fixed form, too, 945 // but not in a character literal. 946 if (*at_ == '&' && inCharLiteral_) { 947 return false; 948 } 949 do { 950 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 951 BeginSourceLine(cont); 952 column_ = 7; 953 NextLine(); 954 return true; 955 } 956 } while (SkipCommentLine(false /* not after ampersand */)); 957 return false; 958 } 959 960 bool Prescanner::FreeFormContinuation() { 961 const char *p{at_}; 962 bool ampersand{*p == '&'}; 963 if (ampersand) { 964 p = SkipWhiteSpace(p + 1); 965 } 966 if (*p != '\n') { 967 if (inCharLiteral_) { 968 return false; 969 } else if (*p != '!' && 970 features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 971 Say(GetProvenance(p), "missing ! before comment after &"_en_US); 972 } 973 } 974 do { 975 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 976 BeginSourceLine(cont); 977 NextLine(); 978 return true; 979 } 980 } while (SkipCommentLine(ampersand)); 981 return false; 982 } 983 984 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 985 if (*at_ == '\n' || *at_ == '&') { 986 if (inFixedForm_) { 987 return FixedFormContinuation(mightNeedFixedFormSpace); 988 } else { 989 return FreeFormContinuation(); 990 } 991 } else { 992 return false; 993 } 994 } 995 996 std::optional<Prescanner::LineClassification> 997 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 998 const char *p{start}; 999 char col1{*p++}; 1000 if (!IsFixedFormCommentChar(col1)) { 1001 return std::nullopt; 1002 } 1003 char sentinel[5], *sp{sentinel}; 1004 int column{2}; 1005 for (; column < 6; ++column, ++p) { 1006 if (*p != ' ') { 1007 if (*p == '\n' || *p == '\t') { 1008 break; 1009 } 1010 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1011 // OpenMP conditional compilation line: leave the label alone 1012 break; 1013 } 1014 *sp++ = ToLowerCaseLetter(*p); 1015 } 1016 } 1017 if (column == 6) { 1018 if (*p == ' ' || *p == '\t' || *p == '0') { 1019 ++p; 1020 } else { 1021 // This is a Continuation line, not an initial directive line. 1022 return std::nullopt; 1023 } 1024 } 1025 if (sp == sentinel) { 1026 return std::nullopt; 1027 } 1028 *sp = '\0'; 1029 if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) { 1030 std::size_t payloadOffset = p - start; 1031 return {LineClassification{ 1032 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1033 } 1034 return std::nullopt; 1035 } 1036 1037 std::optional<Prescanner::LineClassification> 1038 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1039 char sentinel[8]; 1040 const char *p{SkipWhiteSpace(start)}; 1041 if (*p++ != '!') { 1042 return std::nullopt; 1043 } 1044 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { 1045 if (*p == '\n') { 1046 break; 1047 } 1048 if (*p == ' ' || *p == '\t' || *p == '&') { 1049 if (j == 0) { 1050 break; 1051 } 1052 sentinel[j] = '\0'; 1053 p = SkipWhiteSpace(p + 1); 1054 if (*p == '!') { 1055 break; 1056 } 1057 if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) { 1058 std::size_t offset = p - start; 1059 return {LineClassification{ 1060 LineClassification::Kind::CompilerDirective, offset, sp}}; 1061 } 1062 break; 1063 } 1064 sentinel[j] = ToLowerCaseLetter(*p); 1065 } 1066 return std::nullopt; 1067 } 1068 1069 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1070 std::uint64_t packed{0}; 1071 for (char ch : dir) { 1072 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1073 } 1074 compilerDirectiveBloomFilter_.set(packed % prime1); 1075 compilerDirectiveBloomFilter_.set(packed % prime2); 1076 compilerDirectiveSentinels_.insert(dir); 1077 return *this; 1078 } 1079 1080 const char *Prescanner::IsCompilerDirectiveSentinel( 1081 const char *sentinel) const { 1082 std::uint64_t packed{0}; 1083 std::size_t n{0}; 1084 for (; sentinel[n] != '\0'; ++n) { 1085 packed = (packed << 8) | (sentinel[n] & 0xff); 1086 } 1087 if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1088 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1089 return nullptr; 1090 } 1091 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))}; 1092 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1093 } 1094 1095 Prescanner::LineClassification Prescanner::ClassifyLine( 1096 const char *start) const { 1097 if (inFixedForm_) { 1098 if (std::optional<LineClassification> lc{ 1099 IsFixedFormCompilerDirectiveLine(start)}) { 1100 return std::move(*lc); 1101 } 1102 if (IsFixedFormCommentLine(start)) { 1103 return {LineClassification::Kind::Comment}; 1104 } 1105 } else { 1106 if (std::optional<LineClassification> lc{ 1107 IsFreeFormCompilerDirectiveLine(start)}) { 1108 return std::move(*lc); 1109 } 1110 if (const char *bang{IsFreeFormComment(start)}) { 1111 return {LineClassification::Kind::Comment, 1112 static_cast<std::size_t>(bang - start)}; 1113 } 1114 } 1115 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1116 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1117 } 1118 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1119 if (std::memcmp(dir, "if", 2) == 0 || std::memcmp(dir, "elif", 4) == 0 || 1120 std::memcmp(dir, "else", 4) == 0 || std::memcmp(dir, "endif", 5) == 0) { 1121 return {LineClassification::Kind::ConditionalCompilationDirective}; 1122 } else if (std::memcmp(dir, "include", 7) == 0) { 1123 return {LineClassification::Kind::IncludeDirective}; 1124 } else if (std::memcmp(dir, "define", 6) == 0 || 1125 std::memcmp(dir, "undef", 5) == 0) { 1126 return {LineClassification::Kind::DefinitionDirective}; 1127 } else { 1128 return {LineClassification::Kind::PreprocessorDirective}; 1129 } 1130 } 1131 return {LineClassification::Kind::Source}; 1132 } 1133 1134 void Prescanner::SourceFormChange(std::string &&dir) { 1135 if (dir == "!dir$ free") { 1136 inFixedForm_ = false; 1137 } else if (dir == "!dir$ fixed") { 1138 inFixedForm_ = true; 1139 } 1140 } 1141 } // namespace Fortran::parser 1142