1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "preprocessor.h" 11 #include "token-sequence.h" 12 #include "flang/Common/idioms.h" 13 #include "flang/Parser/characters.h" 14 #include "flang/Parser/message.h" 15 #include "flang/Parser/source.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 features_{lfc}, encoding_{cooked.allSources().encoding()} {} 32 33 Prescanner::Prescanner(const Prescanner &that) 34 : messages_{that.messages_}, cooked_{that.cooked_}, 35 preprocessor_{that.preprocessor_}, features_{that.features_}, 36 inFixedForm_{that.inFixedForm_}, 37 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 38 encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 39 1}, 40 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 41 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 42 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 43 44 static inline constexpr bool IsFixedFormCommentChar(char ch) { 45 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 46 } 47 48 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 49 char *p{dir.GetMutableCharData()}; 50 char *limit{p + dir.SizeInChars()}; 51 for (; p < limit; ++p) { 52 if (*p != ' ') { 53 CHECK(IsFixedFormCommentChar(*p)); 54 *p = '!'; 55 return; 56 } 57 } 58 DIE("compiler directive all blank"); 59 } 60 61 void Prescanner::Prescan(ProvenanceRange range) { 62 AllSources &allSources{cooked_.allSources()}; 63 startProvenance_ = range.start(); 64 std::size_t offset{0}; 65 const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)}; 66 CHECK(source); 67 start_ = source->content().data() + offset; 68 limit_ = start_ + range.size(); 69 nextLine_ = start_; 70 const bool beganInFixedForm{inFixedForm_}; 71 if (prescannerNesting_ > maxPrescannerNesting) { 72 Say(GetProvenance(start_), 73 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 74 return; 75 } 76 while (nextLine_ < limit_) { 77 Statement(); 78 } 79 if (inFixedForm_ != beganInFixedForm) { 80 std::string dir{"!dir$ "}; 81 if (beganInFixedForm) { 82 dir += "fixed"; 83 } else { 84 dir += "free"; 85 } 86 dir += '\n'; 87 TokenSequence tokens{dir, allSources.AddCompilerInsertion(dir).start()}; 88 tokens.Emit(cooked_); 89 } 90 } 91 92 void Prescanner::Statement() { 93 TokenSequence tokens; 94 LineClassification line{ClassifyLine(nextLine_)}; 95 switch (line.kind) { 96 case LineClassification::Kind::Comment: 97 nextLine_ += line.payloadOffset; // advance to '!' or newline 98 NextLine(); 99 return; 100 case LineClassification::Kind::IncludeLine: 101 FortranInclude(nextLine_ + line.payloadOffset); 102 NextLine(); 103 return; 104 case LineClassification::Kind::ConditionalCompilationDirective: 105 case LineClassification::Kind::IncludeDirective: 106 case LineClassification::Kind::DefinitionDirective: 107 case LineClassification::Kind::PreprocessorDirective: 108 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 109 return; 110 case LineClassification::Kind::CompilerDirective: 111 directiveSentinel_ = line.sentinel; 112 CHECK(InCompilerDirective()); 113 BeginSourceLineAndAdvance(); 114 if (inFixedForm_) { 115 CHECK(IsFixedFormCommentChar(*at_)); 116 } else { 117 while (*at_ == ' ' || *at_ == '\t') { 118 ++at_, ++column_; 119 } 120 CHECK(*at_ == '!'); 121 } 122 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 123 // OpenMP conditional compilation line. Remove the sentinel and then 124 // treat the line as if it were normal source. 125 at_ += 2, column_ += 2; 126 if (inFixedForm_) { 127 LabelField(tokens); 128 } else { 129 SkipSpaces(); 130 } 131 } else { 132 // Compiler directive. Emit normalized sentinel. 133 EmitChar(tokens, '!'); 134 ++at_, ++column_; 135 for (const char *sp{directiveSentinel_}; *sp != '\0'; 136 ++sp, ++at_, ++column_) { 137 EmitChar(tokens, *sp); 138 } 139 if (*at_ == ' ') { 140 EmitChar(tokens, ' '); 141 ++at_, ++column_; 142 } 143 tokens.CloseToken(); 144 } 145 break; 146 case LineClassification::Kind::Source: 147 BeginSourceLineAndAdvance(); 148 if (inFixedForm_) { 149 LabelField(tokens); 150 } else if (skipLeadingAmpersand_) { 151 skipLeadingAmpersand_ = false; 152 const char *p{SkipWhiteSpace(at_)}; 153 if (p < limit_ && *p == '&') { 154 column_ += ++p - at_; 155 at_ = p; 156 } 157 } else { 158 SkipSpaces(); 159 } 160 break; 161 } 162 163 while (NextToken(tokens)) { 164 } 165 166 Provenance newlineProvenance{GetCurrentProvenance()}; 167 if (std::optional<TokenSequence> preprocessed{ 168 preprocessor_.MacroReplacement(tokens, *this)}) { 169 // Reprocess the preprocessed line. Append a newline temporarily. 170 preprocessed->PutNextTokenChar('\n', newlineProvenance); 171 preprocessed->CloseToken(); 172 const char *ppd{preprocessed->ToCharBlock().begin()}; 173 LineClassification ppl{ClassifyLine(ppd)}; 174 preprocessed->RemoveLastToken(); // remove the newline 175 switch (ppl.kind) { 176 case LineClassification::Kind::Comment: 177 break; 178 case LineClassification::Kind::IncludeLine: 179 FortranInclude(ppd + ppl.payloadOffset); 180 break; 181 case LineClassification::Kind::ConditionalCompilationDirective: 182 case LineClassification::Kind::IncludeDirective: 183 case LineClassification::Kind::DefinitionDirective: 184 case LineClassification::Kind::PreprocessorDirective: 185 Say(preprocessed->GetProvenanceRange(), 186 "Preprocessed line resembles a preprocessor directive"_en_US); 187 preprocessed->ToLowerCase().Emit(cooked_); 188 break; 189 case LineClassification::Kind::CompilerDirective: 190 if (preprocessed->HasRedundantBlanks()) { 191 preprocessed->RemoveRedundantBlanks(); 192 } 193 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 194 preprocessed->ToLowerCase(); 195 SourceFormChange(preprocessed->ToString()); 196 preprocessed->ClipComment(true /* skip first ! */).Emit(cooked_); 197 break; 198 case LineClassification::Kind::Source: 199 if (inFixedForm_) { 200 if (preprocessed->HasBlanks(/*after column*/ 6)) { 201 preprocessed->RemoveBlanks(/*after column*/ 6); 202 } 203 } else { 204 if (preprocessed->HasRedundantBlanks()) { 205 preprocessed->RemoveRedundantBlanks(); 206 } 207 } 208 preprocessed->ToLowerCase().ClipComment().Emit(cooked_); 209 break; 210 } 211 } else { 212 tokens.ToLowerCase(); 213 if (line.kind == LineClassification::Kind::CompilerDirective) { 214 SourceFormChange(tokens.ToString()); 215 } 216 tokens.Emit(cooked_); 217 } 218 if (omitNewline_) { 219 omitNewline_ = false; 220 } else { 221 cooked_.Put('\n', newlineProvenance); 222 } 223 directiveSentinel_ = nullptr; 224 } 225 226 TokenSequence Prescanner::TokenizePreprocessorDirective() { 227 CHECK(nextLine_ < limit_ && !inPreprocessorDirective_); 228 inPreprocessorDirective_ = true; 229 BeginSourceLineAndAdvance(); 230 TokenSequence tokens; 231 while (NextToken(tokens)) { 232 } 233 inPreprocessorDirective_ = false; 234 return tokens; 235 } 236 237 void Prescanner::NextLine() { 238 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 239 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 240 if (!v) { 241 nextLine_ = limit_; 242 } else { 243 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 244 nextLine_ = nl + 1; 245 } 246 } 247 248 void Prescanner::LabelField(TokenSequence &token, int outCol) { 249 const char *bad{nullptr}; 250 for (; *at_ != '\n' && column_ <= 6; ++at_) { 251 if (*at_ == '\t') { 252 ++at_; 253 column_ = 7; 254 break; 255 } 256 if (*at_ != ' ' && 257 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 258 EmitChar(token, *at_); 259 if (!bad && !IsDecimalDigit(*at_)) { 260 bad = at_; 261 } 262 ++outCol; 263 } 264 ++column_; 265 } 266 if (outCol > 1) { 267 if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 268 Say(GetProvenance(bad), 269 "Character in fixed-form label field must be a digit"_en_US); 270 } 271 token.CloseToken(); 272 } 273 SkipToNextSignificantCharacter(); 274 if (IsDecimalDigit(*at_)) { 275 Say(GetProvenance(at_), 276 "Label digit is not in fixed-form label field"_en_US); 277 } 278 } 279 280 void Prescanner::SkipToEndOfLine() { 281 while (*at_ != '\n') { 282 ++at_, ++column_; 283 } 284 } 285 286 bool Prescanner::MustSkipToEndOfLine() const { 287 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 288 return true; // skip over ignored columns in right margin (73:80) 289 } else if (*at_ == '!' && !inCharLiteral_) { 290 return true; // inline comment goes to end of source line 291 } else { 292 return false; 293 } 294 } 295 296 void Prescanner::NextChar() { 297 CHECK(*at_ != '\n'); 298 ++at_, ++column_; 299 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 300 // UTF-8 byte order mark - treat this file as UTF-8 301 at_ += 3; 302 encoding_ = Encoding::UTF_8; 303 } 304 SkipToNextSignificantCharacter(); 305 } 306 307 // Skip everything that should be ignored until the next significant 308 // character is reached; handles C-style comments in preprocessing 309 // directives, Fortran ! comments, stuff after the right margin in 310 // fixed form, and all forms of line continuation. 311 void Prescanner::SkipToNextSignificantCharacter() { 312 if (inPreprocessorDirective_) { 313 SkipCComments(); 314 } else { 315 bool mightNeedSpace{false}; 316 if (MustSkipToEndOfLine()) { 317 SkipToEndOfLine(); 318 } else { 319 mightNeedSpace = *at_ == '\n'; 320 } 321 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 322 if (MustSkipToEndOfLine()) { 323 SkipToEndOfLine(); 324 } 325 } 326 if (*at_ == '\t') { 327 tabInCurrentLine_ = true; 328 } 329 } 330 } 331 332 void Prescanner::SkipCComments() { 333 while (true) { 334 if (IsCComment(at_)) { 335 if (const char *after{SkipCComment(at_)}) { 336 column_ += after - at_; 337 // May have skipped over one or more newlines; relocate the start of 338 // the next line. 339 nextLine_ = at_ = after; 340 NextLine(); 341 } else { 342 // Don't emit any messages about unclosed C-style comments, because 343 // the sequence /* can appear legally in a FORMAT statement. There's 344 // no ambiguity, since the sequence */ cannot appear legally. 345 break; 346 } 347 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 348 at_[1] == '\n' && nextLine_ < limit_) { 349 BeginSourceLineAndAdvance(); 350 } else { 351 break; 352 } 353 } 354 } 355 356 void Prescanner::SkipSpaces() { 357 while (*at_ == ' ' || *at_ == '\t') { 358 NextChar(); 359 } 360 insertASpace_ = false; 361 } 362 363 const char *Prescanner::SkipWhiteSpace(const char *p) { 364 while (*p == ' ' || *p == '\t') { 365 ++p; 366 } 367 return p; 368 } 369 370 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 371 while (true) { 372 if (*p == ' ' || *p == '\t') { 373 ++p; 374 } else if (IsCComment(p)) { 375 if (const char *after{SkipCComment(p)}) { 376 p = after; 377 } else { 378 break; 379 } 380 } else { 381 break; 382 } 383 } 384 return p; 385 } 386 387 const char *Prescanner::SkipCComment(const char *p) const { 388 char star{' '}, slash{' '}; 389 p += 2; 390 while (star != '*' || slash != '/') { 391 if (p >= limit_) { 392 return nullptr; // signifies an unterminated comment 393 } 394 star = slash; 395 slash = *p++; 396 } 397 return p; 398 } 399 400 bool Prescanner::NextToken(TokenSequence &tokens) { 401 CHECK(at_ >= start_ && at_ < limit_); 402 if (InFixedFormSource()) { 403 SkipSpaces(); 404 } else { 405 if (*at_ == '/' && IsCComment(at_)) { 406 // Recognize and skip over classic C style /*comments*/ when 407 // outside a character literal. 408 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 409 Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US); 410 } 411 SkipCComments(); 412 } 413 if (*at_ == ' ' || *at_ == '\t') { 414 // Compress free-form white space into a single space character. 415 const auto theSpace{at_}; 416 char previous{at_ <= start_ ? ' ' : at_[-1]}; 417 NextChar(); 418 SkipSpaces(); 419 if (*at_ == '\n') { 420 // Discard white space at the end of a line. 421 } else if (!inPreprocessorDirective_ && 422 (previous == '(' || *at_ == '(' || *at_ == ')')) { 423 // Discard white space before/after '(' and before ')', unless in a 424 // preprocessor directive. This helps yield space-free contiguous 425 // names for generic interfaces like OPERATOR( + ) and 426 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 427 // This has the effect of silently ignoring the illegal spaces in 428 // the array constructor ( /1,2/ ) but that seems benign; it's 429 // hard to avoid that while still removing spaces from OPERATOR( / ) 430 // and OPERATOR( // ). 431 } else { 432 // Preserve the squashed white space as a single space character. 433 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 434 tokens.CloseToken(); 435 return true; 436 } 437 } 438 } 439 if (insertASpace_) { 440 tokens.PutNextTokenChar(' ', spaceProvenance_); 441 insertASpace_ = false; 442 } 443 if (*at_ == '\n') { 444 return false; 445 } 446 const char *start{at_}; 447 if (*at_ == '\'' || *at_ == '"') { 448 QuotedCharacterLiteral(tokens, start); 449 preventHollerith_ = false; 450 } else if (IsDecimalDigit(*at_)) { 451 int n{0}, digits{0}; 452 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 453 do { 454 if (n < maxHollerith) { 455 n = 10 * n + DecimalDigitValue(*at_); 456 } 457 EmitCharAndAdvance(tokens, *at_); 458 ++digits; 459 if (InFixedFormSource()) { 460 SkipSpaces(); 461 } 462 } while (IsDecimalDigit(*at_)); 463 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 464 !preventHollerith_) { 465 Hollerith(tokens, n, start); 466 } else if (*at_ == '.') { 467 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 468 } 469 ExponentAndKind(tokens); 470 } else if (ExponentAndKind(tokens)) { 471 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 472 inPreprocessorDirective_) { 473 do { 474 EmitCharAndAdvance(tokens, *at_); 475 } while (IsHexadecimalDigit(*at_)); 476 } else if (IsLetter(*at_)) { 477 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 478 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 479 EmitCharAndAdvance(tokens, *at_); 480 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { 481 EmitCharAndAdvance(tokens, *at_); 482 QuotedCharacterLiteral(tokens, start); 483 } 484 preventHollerith_ = false; 485 } else if (*at_ == '.') { 486 char nch{EmitCharAndAdvance(tokens, '.')}; 487 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 488 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 489 } 490 ExponentAndKind(tokens); 491 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 492 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 493 } 494 preventHollerith_ = false; 495 } else if (IsLegalInIdentifier(*at_)) { 496 do { 497 } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))); 498 if (*at_ == '\'' || *at_ == '"') { 499 QuotedCharacterLiteral(tokens, start); 500 preventHollerith_ = false; 501 } else { 502 // Subtle: Don't misrecognize labeled DO statement label as Hollerith 503 // when the loop control variable starts with 'H'. 504 preventHollerith_ = true; 505 } 506 } else if (*at_ == '*') { 507 if (EmitCharAndAdvance(tokens, '*') == '*') { 508 EmitCharAndAdvance(tokens, '*'); 509 } else { 510 // Subtle ambiguity: 511 // CHARACTER*2H declares H because *2 is a kind specifier 512 // DATAC/N*2H / is repeated Hollerith 513 preventHollerith_ = !slashInCurrentLine_; 514 } 515 } else { 516 char ch{*at_}; 517 if (ch == '(' || ch == '[') { 518 ++delimiterNesting_; 519 } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) { 520 --delimiterNesting_; 521 } 522 char nch{EmitCharAndAdvance(tokens, ch)}; 523 preventHollerith_ = false; 524 if ((nch == '=' && 525 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 526 (ch == nch && 527 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 528 ch == '|' || ch == '<' || ch == '>')) || 529 (ch == '=' && nch == '>')) { 530 // token comprises two characters 531 EmitCharAndAdvance(tokens, nch); 532 } else if (ch == '/') { 533 slashInCurrentLine_ = true; 534 } 535 } 536 tokens.CloseToken(); 537 return true; 538 } 539 540 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 541 char ed{ToLowerCaseLetter(*at_)}; 542 if (ed != 'e' && ed != 'd') { 543 return false; 544 } 545 EmitCharAndAdvance(tokens, ed); 546 if (*at_ == '+' || *at_ == '-') { 547 EmitCharAndAdvance(tokens, *at_); 548 } 549 while (IsDecimalDigit(*at_)) { 550 EmitCharAndAdvance(tokens, *at_); 551 } 552 if (*at_ == '_') { 553 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 554 } 555 } 556 return true; 557 } 558 559 void Prescanner::QuotedCharacterLiteral( 560 TokenSequence &tokens, const char *start) { 561 char quote{*at_}; 562 const char *end{at_ + 1}; 563 inCharLiteral_ = true; 564 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 565 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 566 bool isEscaped{false}; 567 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 568 while (true) { 569 if (*at_ == '\\') { 570 if (escapesEnabled) { 571 isEscaped = !isEscaped; 572 } else { 573 // The parser always processes escape sequences, so don't confuse it 574 // when escapes are disabled. 575 insert('\\'); 576 } 577 } else { 578 isEscaped = false; 579 } 580 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 581 Encoding::LATIN_1); 582 while (PadOutCharacterLiteral(tokens)) { 583 } 584 if (*at_ == '\n') { 585 if (!inPreprocessorDirective_) { 586 Say(GetProvenanceRange(start, end), 587 "Incomplete character literal"_err_en_US); 588 } 589 break; 590 } 591 end = at_ + 1; 592 NextChar(); 593 if (*at_ == quote && !isEscaped) { 594 // A doubled unescaped quote mark becomes a single instance of that 595 // quote character in the literal (later). There can be spaces between 596 // the quotes in fixed form source. 597 EmitChar(tokens, quote); 598 inCharLiteral_ = false; // for cases like print *, '...'!comment 599 NextChar(); 600 if (InFixedFormSource()) { 601 SkipSpaces(); 602 } 603 if (*at_ != quote) { 604 break; 605 } 606 inCharLiteral_ = true; 607 } 608 } 609 inCharLiteral_ = false; 610 } 611 612 void Prescanner::Hollerith( 613 TokenSequence &tokens, int count, const char *start) { 614 inCharLiteral_ = true; 615 CHECK(*at_ == 'h' || *at_ == 'H'); 616 EmitChar(tokens, 'H'); 617 while (count-- > 0) { 618 if (PadOutCharacterLiteral(tokens)) { 619 } else if (*at_ == '\n') { 620 Say(GetProvenanceRange(start, at_), 621 "Possible truncated Hollerith literal"_en_US); 622 break; 623 } else { 624 NextChar(); 625 // Each multi-byte character encoding counts as a single character. 626 // No escape sequences are recognized. 627 // Hollerith is always emitted to the cooked character 628 // stream in UTF-8. 629 DecodedCharacter decoded{DecodeCharacter( 630 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 631 if (decoded.bytes > 0) { 632 EncodedCharacter utf8{ 633 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 634 for (int j{0}; j < utf8.bytes; ++j) { 635 EmitChar(tokens, utf8.buffer[j]); 636 } 637 at_ += decoded.bytes - 1; 638 } else { 639 Say(GetProvenanceRange(start, at_), 640 "Bad character in Hollerith literal"_err_en_US); 641 break; 642 } 643 } 644 } 645 if (*at_ != '\n') { 646 NextChar(); 647 } 648 inCharLiteral_ = false; 649 } 650 651 // In fixed form, source card images must be processed as if they were at 652 // least 72 columns wide, at least in character literal contexts. 653 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 654 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 655 if (column_ < fixedFormColumnLimit_) { 656 tokens.PutNextTokenChar(' ', spaceProvenance_); 657 ++column_; 658 return true; 659 } 660 if (!FixedFormContinuation(false /*no need to insert space*/) || 661 tabInCurrentLine_) { 662 return false; 663 } 664 CHECK(column_ == 7); 665 --at_; // point to column 6 of continuation line 666 column_ = 6; 667 } 668 return false; 669 } 670 671 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 672 const char *p{start}; 673 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 674 ((*p == 'D' || *p == 'd') && 675 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 676 return true; 677 } 678 bool anyTabs{false}; 679 while (true) { 680 if (*p == ' ') { 681 ++p; 682 } else if (*p == '\t') { 683 anyTabs = true; 684 ++p; 685 } else if (*p == '0' && !anyTabs && p == start + 5) { 686 ++p; // 0 in column 6 must treated as a space 687 } else { 688 break; 689 } 690 } 691 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 692 return true; 693 } 694 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 695 return true; 696 } 697 return *p == '\n'; 698 } 699 700 const char *Prescanner::IsFreeFormComment(const char *p) const { 701 p = SkipWhiteSpaceAndCComments(p); 702 if (*p == '!' || *p == '\n') { 703 return p; 704 } else { 705 return nullptr; 706 } 707 } 708 709 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 710 const char *p{SkipWhiteSpace(start)}; 711 for (char ch : "include"s) { 712 if (ToLowerCaseLetter(*p++) != ch) { 713 return std::nullopt; 714 } 715 } 716 p = SkipWhiteSpace(p); 717 if (*p == '"' || *p == '\'') { 718 return {p - start}; 719 } 720 return std::nullopt; 721 } 722 723 void Prescanner::FortranInclude(const char *firstQuote) { 724 const char *p{firstQuote}; 725 while (*p != '"' && *p != '\'') { 726 ++p; 727 } 728 char quote{*p}; 729 std::string path; 730 for (++p; *p != '\n'; ++p) { 731 if (*p == quote) { 732 if (p[1] != quote) { 733 break; 734 } 735 ++p; 736 } 737 path += *p; 738 } 739 if (*p != quote) { 740 Say(GetProvenanceRange(firstQuote, p), 741 "malformed path name string"_err_en_US); 742 return; 743 } 744 p = SkipWhiteSpace(p + 1); 745 if (*p != '\n' && *p != '!') { 746 const char *garbage{p}; 747 for (; *p != '\n' && *p != '!'; ++p) { 748 } 749 Say(GetProvenanceRange(garbage, p), 750 "excess characters after path name"_en_US); 751 } 752 std::string buf; 753 llvm::raw_string_ostream error{buf}; 754 Provenance provenance{GetProvenance(nextLine_)}; 755 AllSources &allSources{cooked_.allSources()}; 756 const SourceFile *currentFile{allSources.GetSourceFile(provenance)}; 757 if (currentFile) { 758 allSources.PushSearchPathDirectory(DirectoryName(currentFile->path())); 759 } 760 const SourceFile *included{allSources.Open(path, error)}; 761 if (currentFile) { 762 allSources.PopSearchPathDirectory(); 763 } 764 if (!included) { 765 Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); 766 } else if (included->bytes() > 0) { 767 ProvenanceRange includeLineRange{ 768 provenance, static_cast<std::size_t>(p - nextLine_)}; 769 ProvenanceRange fileRange{ 770 allSources.AddIncludedFile(*included, includeLineRange)}; 771 Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); 772 } 773 } 774 775 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 776 const char *p{start}; 777 for (; *p == ' '; ++p) { 778 } 779 if (*p == '#') { 780 if (inFixedForm_ && p == start + 5) { 781 return nullptr; 782 } 783 } else { 784 p = SkipWhiteSpace(p); 785 if (*p != '#') { 786 return nullptr; 787 } 788 } 789 return SkipWhiteSpace(p + 1); 790 } 791 792 bool Prescanner::IsNextLinePreprocessorDirective() const { 793 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 794 } 795 796 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 797 if (nextLine_ >= limit_) { 798 if (afterAmpersand && prescannerNesting_ > 0) { 799 // A continuation marker at the end of the last line in an 800 // include file inhibits the newline for that line. 801 SkipToEndOfLine(); 802 omitNewline_ = true; 803 } 804 return false; 805 } 806 auto lineClass{ClassifyLine(nextLine_)}; 807 if (lineClass.kind == LineClassification::Kind::Comment) { 808 NextLine(); 809 return true; 810 } else if (inPreprocessorDirective_) { 811 return false; 812 } else if (lineClass.kind == 813 LineClassification::Kind::ConditionalCompilationDirective || 814 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 815 // Allow conditional compilation directives (e.g., #ifdef) to affect 816 // continuation lines. 817 // Allow other preprocessor directives, too, except #include 818 // (when it does not follow '&'), #define, and #undef (because 819 // they cannot be allowed to affect preceding text on a 820 // continued line). 821 preprocessor_.Directive(TokenizePreprocessorDirective(), this); 822 return true; 823 } else if (afterAmpersand && 824 (lineClass.kind == LineClassification::Kind::IncludeDirective || 825 lineClass.kind == LineClassification::Kind::IncludeLine)) { 826 SkipToEndOfLine(); 827 omitNewline_ = true; 828 skipLeadingAmpersand_ = true; 829 return false; 830 } else { 831 return false; 832 } 833 } 834 835 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 836 if (nextLine_ >= limit_) { 837 return nullptr; 838 } 839 tabInCurrentLine_ = false; 840 char col1{*nextLine_}; 841 if (InCompilerDirective()) { 842 // Must be a continued compiler directive. 843 if (!IsFixedFormCommentChar(col1)) { 844 return nullptr; 845 } 846 int j{1}; 847 for (; j < 5; ++j) { 848 char ch{directiveSentinel_[j - 1]}; 849 if (ch == '\0') { 850 break; 851 } 852 if (ch != ToLowerCaseLetter(nextLine_[j])) { 853 return nullptr; 854 } 855 } 856 for (; j < 5; ++j) { 857 if (nextLine_[j] != ' ') { 858 return nullptr; 859 } 860 } 861 char col6{nextLine_[5]}; 862 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 863 if (nextLine_[6] != ' ' && mightNeedSpace) { 864 insertASpace_ = true; 865 } 866 return nextLine_ + 6; 867 } 868 return nullptr; 869 } else { 870 // Normal case: not in a compiler directive. 871 if (col1 == '&' && 872 features_.IsEnabled( 873 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 874 // Extension: '&' as continuation marker 875 if (features_.ShouldWarn( 876 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 877 Say(GetProvenance(nextLine_), "nonstandard usage"_en_US); 878 } 879 return nextLine_ + 1; 880 } 881 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 882 tabInCurrentLine_ = true; 883 return nextLine_ + 2; // VAX extension 884 } 885 if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' && 886 nextLine_[3] == ' ' && nextLine_[4] == ' ') { 887 char col6{nextLine_[5]}; 888 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 889 return nextLine_ + 6; 890 } 891 } 892 if (IsImplicitContinuation()) { 893 return nextLine_; 894 } 895 } 896 return nullptr; // not a continuation line 897 } 898 899 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 900 const char *p{nextLine_}; 901 if (p >= limit_) { 902 return nullptr; 903 } 904 p = SkipWhiteSpace(p); 905 if (InCompilerDirective()) { 906 if (*p++ != '!') { 907 return nullptr; 908 } 909 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 910 if (*s != ToLowerCaseLetter(*p)) { 911 return nullptr; 912 } 913 } 914 p = SkipWhiteSpace(p); 915 if (*p == '&') { 916 if (!ampersand) { 917 insertASpace_ = true; 918 } 919 return p + 1; 920 } else if (ampersand) { 921 return p; 922 } else { 923 return nullptr; 924 } 925 } else { 926 if (*p == '&') { 927 return p + 1; 928 } else if (*p == '!' || *p == '\n' || *p == '#') { 929 return nullptr; 930 } else if (ampersand || IsImplicitContinuation()) { 931 if (p > nextLine_) { 932 --p; 933 } else { 934 insertASpace_ = true; 935 } 936 return p; 937 } else { 938 return nullptr; 939 } 940 } 941 } 942 943 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 944 // N.B. We accept '&' as a continuation indicator in fixed form, too, 945 // but not in a character literal. 946 if (*at_ == '&' && inCharLiteral_) { 947 return false; 948 } 949 do { 950 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 951 BeginSourceLine(cont); 952 column_ = 7; 953 NextLine(); 954 return true; 955 } 956 } while (SkipCommentLine(false /* not after ampersand */)); 957 return false; 958 } 959 960 bool Prescanner::FreeFormContinuation() { 961 const char *p{at_}; 962 bool ampersand{*p == '&'}; 963 if (ampersand) { 964 p = SkipWhiteSpace(p + 1); 965 } 966 if (*p != '\n') { 967 if (inCharLiteral_) { 968 return false; 969 } else if (*p != '!' && 970 features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 971 Say(GetProvenance(p), "missing ! before comment after &"_en_US); 972 } 973 } 974 do { 975 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 976 BeginSourceLine(cont); 977 NextLine(); 978 return true; 979 } 980 } while (SkipCommentLine(ampersand)); 981 return false; 982 } 983 984 // Implicit line continuation allows a preprocessor macro call with 985 // arguments to span multiple lines. 986 bool Prescanner::IsImplicitContinuation() const { 987 return !inPreprocessorDirective_ && !inCharLiteral_ && 988 delimiterNesting_ > 0 && nextLine_ < limit_ && 989 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 990 } 991 992 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 993 if (*at_ == '\n' || *at_ == '&') { 994 if (inFixedForm_) { 995 return FixedFormContinuation(mightNeedFixedFormSpace); 996 } else { 997 return FreeFormContinuation(); 998 } 999 } else { 1000 return false; 1001 } 1002 } 1003 1004 std::optional<Prescanner::LineClassification> 1005 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1006 const char *p{start}; 1007 char col1{*p++}; 1008 if (!IsFixedFormCommentChar(col1)) { 1009 return std::nullopt; 1010 } 1011 char sentinel[5], *sp{sentinel}; 1012 int column{2}; 1013 for (; column < 6; ++column, ++p) { 1014 if (*p != ' ') { 1015 if (*p == '\n' || *p == '\t') { 1016 break; 1017 } 1018 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1019 // OpenMP conditional compilation line: leave the label alone 1020 break; 1021 } 1022 *sp++ = ToLowerCaseLetter(*p); 1023 } 1024 } 1025 if (column == 6) { 1026 if (*p == ' ' || *p == '\t' || *p == '0') { 1027 ++p; 1028 } else { 1029 // This is a Continuation line, not an initial directive line. 1030 return std::nullopt; 1031 } 1032 } 1033 if (sp == sentinel) { 1034 return std::nullopt; 1035 } 1036 *sp = '\0'; 1037 if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) { 1038 std::size_t payloadOffset = p - start; 1039 return {LineClassification{ 1040 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1041 } 1042 return std::nullopt; 1043 } 1044 1045 std::optional<Prescanner::LineClassification> 1046 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1047 char sentinel[8]; 1048 const char *p{SkipWhiteSpace(start)}; 1049 if (*p++ != '!') { 1050 return std::nullopt; 1051 } 1052 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { 1053 if (*p == '\n') { 1054 break; 1055 } 1056 if (*p == ' ' || *p == '\t' || *p == '&') { 1057 if (j == 0) { 1058 break; 1059 } 1060 sentinel[j] = '\0'; 1061 p = SkipWhiteSpace(p + 1); 1062 if (*p == '!') { 1063 break; 1064 } 1065 if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) { 1066 std::size_t offset = p - start; 1067 return {LineClassification{ 1068 LineClassification::Kind::CompilerDirective, offset, sp}}; 1069 } 1070 break; 1071 } 1072 sentinel[j] = ToLowerCaseLetter(*p); 1073 } 1074 return std::nullopt; 1075 } 1076 1077 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1078 std::uint64_t packed{0}; 1079 for (char ch : dir) { 1080 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1081 } 1082 compilerDirectiveBloomFilter_.set(packed % prime1); 1083 compilerDirectiveBloomFilter_.set(packed % prime2); 1084 compilerDirectiveSentinels_.insert(dir); 1085 return *this; 1086 } 1087 1088 const char *Prescanner::IsCompilerDirectiveSentinel( 1089 const char *sentinel) const { 1090 std::uint64_t packed{0}; 1091 std::size_t n{0}; 1092 for (; sentinel[n] != '\0'; ++n) { 1093 packed = (packed << 8) | (sentinel[n] & 0xff); 1094 } 1095 if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1096 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1097 return nullptr; 1098 } 1099 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))}; 1100 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1101 } 1102 1103 constexpr bool IsDirective(const char *match, const char *dir) { 1104 for (; *match; ++match) { 1105 if (*match != ToLowerCaseLetter(*dir++)) { 1106 return false; 1107 } 1108 } 1109 return true; 1110 } 1111 1112 Prescanner::LineClassification Prescanner::ClassifyLine( 1113 const char *start) const { 1114 if (inFixedForm_) { 1115 if (std::optional<LineClassification> lc{ 1116 IsFixedFormCompilerDirectiveLine(start)}) { 1117 return std::move(*lc); 1118 } 1119 if (IsFixedFormCommentLine(start)) { 1120 return {LineClassification::Kind::Comment}; 1121 } 1122 } else { 1123 if (std::optional<LineClassification> lc{ 1124 IsFreeFormCompilerDirectiveLine(start)}) { 1125 return std::move(*lc); 1126 } 1127 if (const char *bang{IsFreeFormComment(start)}) { 1128 return {LineClassification::Kind::Comment, 1129 static_cast<std::size_t>(bang - start)}; 1130 } 1131 } 1132 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1133 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1134 } 1135 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1136 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1137 IsDirective("else", dir) || IsDirective("endif", dir)) { 1138 return {LineClassification::Kind::ConditionalCompilationDirective}; 1139 } else if (IsDirective("include", dir)) { 1140 return {LineClassification::Kind::IncludeDirective}; 1141 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1142 return {LineClassification::Kind::DefinitionDirective}; 1143 } else { 1144 return {LineClassification::Kind::PreprocessorDirective}; 1145 } 1146 } 1147 return {LineClassification::Kind::Source}; 1148 } 1149 1150 void Prescanner::SourceFormChange(std::string &&dir) { 1151 if (dir == "!dir$ free") { 1152 inFixedForm_ = false; 1153 } else if (dir == "!dir$ fixed") { 1154 inFixedForm_ = true; 1155 } 1156 } 1157 } // namespace Fortran::parser 1158