1 //===-- lib/Parser/prescan.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "prescan.h"
10 #include "preprocessor.h"
11 #include "token-sequence.h"
12 #include "flang/Common/idioms.h"
13 #include "flang/Parser/characters.h"
14 #include "flang/Parser/message.h"
15 #include "flang/Parser/source.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include <cstddef>
18 #include <cstring>
19 #include <utility>
20 #include <vector>
21 
22 namespace Fortran::parser {
23 
24 using common::LanguageFeature;
25 
26 static constexpr int maxPrescannerNesting{100};
27 
28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
29     Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
30     : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
31       features_{lfc}, encoding_{cooked.allSources().encoding()} {}
32 
33 Prescanner::Prescanner(const Prescanner &that)
34     : messages_{that.messages_}, cooked_{that.cooked_},
35       preprocessor_{that.preprocessor_}, features_{that.features_},
36       inFixedForm_{that.inFixedForm_},
37       fixedFormColumnLimit_{that.fixedFormColumnLimit_},
38       encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ +
39                                      1},
40       skipLeadingAmpersand_{that.skipLeadingAmpersand_},
41       compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
42       compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
43 
44 static inline constexpr bool IsFixedFormCommentChar(char ch) {
45   return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
46 }
47 
48 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
49   char *p{dir.GetMutableCharData()};
50   char *limit{p + dir.SizeInChars()};
51   for (; p < limit; ++p) {
52     if (*p != ' ') {
53       CHECK(IsFixedFormCommentChar(*p));
54       *p = '!';
55       return;
56     }
57   }
58   DIE("compiler directive all blank");
59 }
60 
61 void Prescanner::Prescan(ProvenanceRange range) {
62   AllSources &allSources{cooked_.allSources()};
63   startProvenance_ = range.start();
64   std::size_t offset{0};
65   const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)};
66   CHECK(source);
67   start_ = source->content().data() + offset;
68   limit_ = start_ + range.size();
69   nextLine_ = start_;
70   const bool beganInFixedForm{inFixedForm_};
71   if (prescannerNesting_ > maxPrescannerNesting) {
72     Say(GetProvenance(start_),
73         "too many nested INCLUDE/#include files, possibly circular"_err_en_US);
74     return;
75   }
76   while (nextLine_ < limit_) {
77     Statement();
78   }
79   if (inFixedForm_ != beganInFixedForm) {
80     std::string dir{"!dir$ "};
81     if (beganInFixedForm) {
82       dir += "fixed";
83     } else {
84       dir += "free";
85     }
86     dir += '\n';
87     TokenSequence tokens{dir, allSources.AddCompilerInsertion(dir).start()};
88     tokens.Emit(cooked_);
89   }
90 }
91 
92 void Prescanner::Statement() {
93   TokenSequence tokens;
94   LineClassification line{ClassifyLine(nextLine_)};
95   switch (line.kind) {
96   case LineClassification::Kind::Comment:
97     nextLine_ += line.payloadOffset; // advance to '!' or newline
98     NextLine();
99     return;
100   case LineClassification::Kind::IncludeLine:
101     FortranInclude(nextLine_ + line.payloadOffset);
102     NextLine();
103     return;
104   case LineClassification::Kind::ConditionalCompilationDirective:
105   case LineClassification::Kind::IncludeDirective:
106   case LineClassification::Kind::DefinitionDirective:
107   case LineClassification::Kind::PreprocessorDirective:
108     preprocessor_.Directive(TokenizePreprocessorDirective(), this);
109     return;
110   case LineClassification::Kind::CompilerDirective:
111     directiveSentinel_ = line.sentinel;
112     CHECK(InCompilerDirective());
113     BeginSourceLineAndAdvance();
114     if (inFixedForm_) {
115       CHECK(IsFixedFormCommentChar(*at_));
116     } else {
117       while (*at_ == ' ' || *at_ == '\t') {
118         ++at_, ++column_;
119       }
120       CHECK(*at_ == '!');
121     }
122     if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') {
123       // OpenMP conditional compilation line.  Remove the sentinel and then
124       // treat the line as if it were normal source.
125       at_ += 2, column_ += 2;
126       if (inFixedForm_) {
127         LabelField(tokens);
128       } else {
129         SkipSpaces();
130       }
131     } else {
132       // Compiler directive.  Emit normalized sentinel.
133       EmitChar(tokens, '!');
134       ++at_, ++column_;
135       for (const char *sp{directiveSentinel_}; *sp != '\0';
136            ++sp, ++at_, ++column_) {
137         EmitChar(tokens, *sp);
138       }
139       if (*at_ == ' ') {
140         EmitChar(tokens, ' ');
141         ++at_, ++column_;
142       }
143       tokens.CloseToken();
144     }
145     break;
146   case LineClassification::Kind::Source:
147     BeginSourceLineAndAdvance();
148     if (inFixedForm_) {
149       LabelField(tokens);
150     } else if (skipLeadingAmpersand_) {
151       skipLeadingAmpersand_ = false;
152       const char *p{SkipWhiteSpace(at_)};
153       if (p < limit_ && *p == '&') {
154         column_ += ++p - at_;
155         at_ = p;
156       }
157     } else {
158       SkipSpaces();
159     }
160     break;
161   }
162 
163   while (NextToken(tokens)) {
164   }
165 
166   Provenance newlineProvenance{GetCurrentProvenance()};
167   if (std::optional<TokenSequence> preprocessed{
168           preprocessor_.MacroReplacement(tokens, *this)}) {
169     // Reprocess the preprocessed line.  Append a newline temporarily.
170     preprocessed->PutNextTokenChar('\n', newlineProvenance);
171     preprocessed->CloseToken();
172     const char *ppd{preprocessed->ToCharBlock().begin()};
173     LineClassification ppl{ClassifyLine(ppd)};
174     preprocessed->RemoveLastToken(); // remove the newline
175     switch (ppl.kind) {
176     case LineClassification::Kind::Comment:
177       break;
178     case LineClassification::Kind::IncludeLine:
179       FortranInclude(ppd + ppl.payloadOffset);
180       break;
181     case LineClassification::Kind::ConditionalCompilationDirective:
182     case LineClassification::Kind::IncludeDirective:
183     case LineClassification::Kind::DefinitionDirective:
184     case LineClassification::Kind::PreprocessorDirective:
185       Say(preprocessed->GetProvenanceRange(),
186           "Preprocessed line resembles a preprocessor directive"_en_US);
187       preprocessed->ToLowerCase().Emit(cooked_);
188       break;
189     case LineClassification::Kind::CompilerDirective:
190       if (preprocessed->HasRedundantBlanks()) {
191         preprocessed->RemoveRedundantBlanks();
192       }
193       NormalizeCompilerDirectiveCommentMarker(*preprocessed);
194       preprocessed->ToLowerCase();
195       SourceFormChange(preprocessed->ToString());
196       preprocessed->ClipComment(true /* skip first ! */).Emit(cooked_);
197       break;
198     case LineClassification::Kind::Source:
199       if (inFixedForm_) {
200         if (preprocessed->HasBlanks(/*after column*/ 6)) {
201           preprocessed->RemoveBlanks(/*after column*/ 6);
202         }
203       } else {
204         if (preprocessed->HasRedundantBlanks()) {
205           preprocessed->RemoveRedundantBlanks();
206         }
207       }
208       preprocessed->ToLowerCase().ClipComment().Emit(cooked_);
209       break;
210     }
211   } else {
212     tokens.ToLowerCase();
213     if (line.kind == LineClassification::Kind::CompilerDirective) {
214       SourceFormChange(tokens.ToString());
215     }
216     tokens.Emit(cooked_);
217   }
218   if (omitNewline_) {
219     omitNewline_ = false;
220   } else {
221     cooked_.Put('\n', newlineProvenance);
222   }
223   directiveSentinel_ = nullptr;
224 }
225 
226 TokenSequence Prescanner::TokenizePreprocessorDirective() {
227   CHECK(nextLine_ < limit_ && !inPreprocessorDirective_);
228   inPreprocessorDirective_ = true;
229   BeginSourceLineAndAdvance();
230   TokenSequence tokens;
231   while (NextToken(tokens)) {
232   }
233   inPreprocessorDirective_ = false;
234   return tokens;
235 }
236 
237 void Prescanner::NextLine() {
238   void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
239   void *v{std::memchr(vstart, '\n', limit_ - nextLine_)};
240   if (!v) {
241     nextLine_ = limit_;
242   } else {
243     const char *nl{const_cast<const char *>(static_cast<char *>(v))};
244     nextLine_ = nl + 1;
245   }
246 }
247 
248 void Prescanner::LabelField(TokenSequence &token, int outCol) {
249   const char *bad{nullptr};
250   for (; *at_ != '\n' && column_ <= 6; ++at_) {
251     if (*at_ == '\t') {
252       ++at_;
253       column_ = 7;
254       break;
255     }
256     if (*at_ != ' ' &&
257         !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
258       EmitChar(token, *at_);
259       if (!bad && !IsDecimalDigit(*at_)) {
260         bad = at_;
261       }
262       ++outCol;
263     }
264     ++column_;
265   }
266   if (outCol > 1) {
267     if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
268       Say(GetProvenance(bad),
269           "Character in fixed-form label field must be a digit"_en_US);
270     }
271     token.CloseToken();
272   }
273   SkipToNextSignificantCharacter();
274   if (IsDecimalDigit(*at_)) {
275     Say(GetProvenance(at_),
276         "Label digit is not in fixed-form label field"_en_US);
277   }
278 }
279 
280 void Prescanner::SkipToEndOfLine() {
281   while (*at_ != '\n') {
282     ++at_, ++column_;
283   }
284 }
285 
286 bool Prescanner::MustSkipToEndOfLine() const {
287   if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
288     return true; // skip over ignored columns in right margin (73:80)
289   } else if (*at_ == '!' && !inCharLiteral_) {
290     return true; // inline comment goes to end of source line
291   } else {
292     return false;
293   }
294 }
295 
296 void Prescanner::NextChar() {
297   CHECK(*at_ != '\n');
298   ++at_, ++column_;
299   while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
300     // UTF-8 byte order mark - treat this file as UTF-8
301     at_ += 3;
302     encoding_ = Encoding::UTF_8;
303   }
304   SkipToNextSignificantCharacter();
305 }
306 
307 // Skip everything that should be ignored until the next significant
308 // character is reached; handles C-style comments in preprocessing
309 // directives, Fortran ! comments, stuff after the right margin in
310 // fixed form, and all forms of line continuation.
311 void Prescanner::SkipToNextSignificantCharacter() {
312   if (inPreprocessorDirective_) {
313     SkipCComments();
314   } else {
315     bool mightNeedSpace{false};
316     if (MustSkipToEndOfLine()) {
317       SkipToEndOfLine();
318     } else {
319       mightNeedSpace = *at_ == '\n';
320     }
321     for (; Continuation(mightNeedSpace); mightNeedSpace = false) {
322       if (MustSkipToEndOfLine()) {
323         SkipToEndOfLine();
324       }
325     }
326     if (*at_ == '\t') {
327       tabInCurrentLine_ = true;
328     }
329   }
330 }
331 
332 void Prescanner::SkipCComments() {
333   while (true) {
334     if (IsCComment(at_)) {
335       if (const char *after{SkipCComment(at_)}) {
336         column_ += after - at_;
337         // May have skipped over one or more newlines; relocate the start of
338         // the next line.
339         nextLine_ = at_ = after;
340         NextLine();
341       } else {
342         // Don't emit any messages about unclosed C-style comments, because
343         // the sequence /* can appear legally in a FORMAT statement.  There's
344         // no ambiguity, since the sequence */ cannot appear legally.
345         break;
346       }
347     } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
348         at_[1] == '\n' && nextLine_ < limit_) {
349       BeginSourceLineAndAdvance();
350     } else {
351       break;
352     }
353   }
354 }
355 
356 void Prescanner::SkipSpaces() {
357   while (*at_ == ' ' || *at_ == '\t') {
358     NextChar();
359   }
360   insertASpace_ = false;
361 }
362 
363 const char *Prescanner::SkipWhiteSpace(const char *p) {
364   while (*p == ' ' || *p == '\t') {
365     ++p;
366   }
367   return p;
368 }
369 
370 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
371   while (true) {
372     if (*p == ' ' || *p == '\t') {
373       ++p;
374     } else if (IsCComment(p)) {
375       if (const char *after{SkipCComment(p)}) {
376         p = after;
377       } else {
378         break;
379       }
380     } else {
381       break;
382     }
383   }
384   return p;
385 }
386 
387 const char *Prescanner::SkipCComment(const char *p) const {
388   char star{' '}, slash{' '};
389   p += 2;
390   while (star != '*' || slash != '/') {
391     if (p >= limit_) {
392       return nullptr; // signifies an unterminated comment
393     }
394     star = slash;
395     slash = *p++;
396   }
397   return p;
398 }
399 
400 bool Prescanner::NextToken(TokenSequence &tokens) {
401   CHECK(at_ >= start_ && at_ < limit_);
402   if (InFixedFormSource()) {
403     SkipSpaces();
404   } else {
405     if (*at_ == '/' && IsCComment(at_)) {
406       // Recognize and skip over classic C style /*comments*/ when
407       // outside a character literal.
408       if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
409         Say(GetProvenance(at_), "nonstandard usage: C-style comment"_en_US);
410       }
411       SkipCComments();
412     }
413     if (*at_ == ' ' || *at_ == '\t') {
414       // Compress free-form white space into a single space character.
415       const auto theSpace{at_};
416       char previous{at_ <= start_ ? ' ' : at_[-1]};
417       NextChar();
418       SkipSpaces();
419       if (*at_ == '\n') {
420         // Discard white space at the end of a line.
421       } else if (!inPreprocessorDirective_ &&
422           (previous == '(' || *at_ == '(' || *at_ == ')')) {
423         // Discard white space before/after '(' and before ')', unless in a
424         // preprocessor directive.  This helps yield space-free contiguous
425         // names for generic interfaces like OPERATOR( + ) and
426         // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
427         // This has the effect of silently ignoring the illegal spaces in
428         // the array constructor ( /1,2/ ) but that seems benign; it's
429         // hard to avoid that while still removing spaces from OPERATOR( / )
430         // and OPERATOR( // ).
431       } else {
432         // Preserve the squashed white space as a single space character.
433         tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
434         tokens.CloseToken();
435         return true;
436       }
437     }
438   }
439   if (insertASpace_) {
440     tokens.PutNextTokenChar(' ', spaceProvenance_);
441     insertASpace_ = false;
442   }
443   if (*at_ == '\n') {
444     return false;
445   }
446   const char *start{at_};
447   if (*at_ == '\'' || *at_ == '"') {
448     QuotedCharacterLiteral(tokens, start);
449     preventHollerith_ = false;
450   } else if (IsDecimalDigit(*at_)) {
451     int n{0}, digits{0};
452     static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
453     do {
454       if (n < maxHollerith) {
455         n = 10 * n + DecimalDigitValue(*at_);
456       }
457       EmitCharAndAdvance(tokens, *at_);
458       ++digits;
459       if (InFixedFormSource()) {
460         SkipSpaces();
461       }
462     } while (IsDecimalDigit(*at_));
463     if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
464         !preventHollerith_) {
465       Hollerith(tokens, n, start);
466     } else if (*at_ == '.') {
467       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
468       }
469       ExponentAndKind(tokens);
470     } else if (ExponentAndKind(tokens)) {
471     } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
472         inPreprocessorDirective_) {
473       do {
474         EmitCharAndAdvance(tokens, *at_);
475       } while (IsHexadecimalDigit(*at_));
476     } else if (IsLetter(*at_)) {
477       // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
478       // we don't misrecognize I9HOLLERITH as an identifier in the next case.
479       EmitCharAndAdvance(tokens, *at_);
480     } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) {
481       EmitCharAndAdvance(tokens, *at_);
482       QuotedCharacterLiteral(tokens, start);
483     }
484     preventHollerith_ = false;
485   } else if (*at_ == '.') {
486     char nch{EmitCharAndAdvance(tokens, '.')};
487     if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
488       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
489       }
490       ExponentAndKind(tokens);
491     } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
492       EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
493     }
494     preventHollerith_ = false;
495   } else if (IsLegalInIdentifier(*at_)) {
496     do {
497     } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_)));
498     if (*at_ == '\'' || *at_ == '"') {
499       QuotedCharacterLiteral(tokens, start);
500       preventHollerith_ = false;
501     } else {
502       // Subtle: Don't misrecognize labeled DO statement label as Hollerith
503       // when the loop control variable starts with 'H'.
504       preventHollerith_ = true;
505     }
506   } else if (*at_ == '*') {
507     if (EmitCharAndAdvance(tokens, '*') == '*') {
508       EmitCharAndAdvance(tokens, '*');
509     } else {
510       // Subtle ambiguity:
511       //  CHARACTER*2H     declares H because *2 is a kind specifier
512       //  DATAC/N*2H  /    is repeated Hollerith
513       preventHollerith_ = !slashInCurrentLine_;
514     }
515   } else {
516     char ch{*at_};
517     if (ch == '(' || ch == '[') {
518       ++delimiterNesting_;
519     } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
520       --delimiterNesting_;
521     }
522     char nch{EmitCharAndAdvance(tokens, ch)};
523     preventHollerith_ = false;
524     if ((nch == '=' &&
525             (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
526         (ch == nch &&
527             (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
528                 ch == '|' || ch == '<' || ch == '>')) ||
529         (ch == '=' && nch == '>')) {
530       // token comprises two characters
531       EmitCharAndAdvance(tokens, nch);
532     } else if (ch == '/') {
533       slashInCurrentLine_ = true;
534     }
535   }
536   tokens.CloseToken();
537   return true;
538 }
539 
540 bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
541   char ed{ToLowerCaseLetter(*at_)};
542   if (ed != 'e' && ed != 'd') {
543     return false;
544   }
545   EmitCharAndAdvance(tokens, ed);
546   if (*at_ == '+' || *at_ == '-') {
547     EmitCharAndAdvance(tokens, *at_);
548   }
549   while (IsDecimalDigit(*at_)) {
550     EmitCharAndAdvance(tokens, *at_);
551   }
552   if (*at_ == '_') {
553     while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
554     }
555   }
556   return true;
557 }
558 
559 void Prescanner::QuotedCharacterLiteral(
560     TokenSequence &tokens, const char *start) {
561   char quote{*at_};
562   const char *end{at_ + 1};
563   inCharLiteral_ = true;
564   const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
565   const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
566   bool isEscaped{false};
567   bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
568   while (true) {
569     if (*at_ == '\\') {
570       if (escapesEnabled) {
571         isEscaped = !isEscaped;
572       } else {
573         // The parser always processes escape sequences, so don't confuse it
574         // when escapes are disabled.
575         insert('\\');
576       }
577     } else {
578       isEscaped = false;
579     }
580     EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
581         Encoding::LATIN_1);
582     while (PadOutCharacterLiteral(tokens)) {
583     }
584     if (*at_ == '\n') {
585       if (!inPreprocessorDirective_) {
586         Say(GetProvenanceRange(start, end),
587             "Incomplete character literal"_err_en_US);
588       }
589       break;
590     }
591     end = at_ + 1;
592     NextChar();
593     if (*at_ == quote && !isEscaped) {
594       // A doubled unescaped quote mark becomes a single instance of that
595       // quote character in the literal (later).  There can be spaces between
596       // the quotes in fixed form source.
597       EmitChar(tokens, quote);
598       inCharLiteral_ = false; // for cases like print *, '...'!comment
599       NextChar();
600       if (InFixedFormSource()) {
601         SkipSpaces();
602       }
603       if (*at_ != quote) {
604         break;
605       }
606       inCharLiteral_ = true;
607     }
608   }
609   inCharLiteral_ = false;
610 }
611 
612 void Prescanner::Hollerith(
613     TokenSequence &tokens, int count, const char *start) {
614   inCharLiteral_ = true;
615   CHECK(*at_ == 'h' || *at_ == 'H');
616   EmitChar(tokens, 'H');
617   while (count-- > 0) {
618     if (PadOutCharacterLiteral(tokens)) {
619     } else if (*at_ == '\n') {
620       Say(GetProvenanceRange(start, at_),
621           "Possible truncated Hollerith literal"_en_US);
622       break;
623     } else {
624       NextChar();
625       // Each multi-byte character encoding counts as a single character.
626       // No escape sequences are recognized.
627       // Hollerith is always emitted to the cooked character
628       // stream in UTF-8.
629       DecodedCharacter decoded{DecodeCharacter(
630           encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
631       if (decoded.bytes > 0) {
632         EncodedCharacter utf8{
633             EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
634         for (int j{0}; j < utf8.bytes; ++j) {
635           EmitChar(tokens, utf8.buffer[j]);
636         }
637         at_ += decoded.bytes - 1;
638       } else {
639         Say(GetProvenanceRange(start, at_),
640             "Bad character in Hollerith literal"_err_en_US);
641         break;
642       }
643     }
644   }
645   if (*at_ != '\n') {
646     NextChar();
647   }
648   inCharLiteral_ = false;
649 }
650 
651 // In fixed form, source card images must be processed as if they were at
652 // least 72 columns wide, at least in character literal contexts.
653 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
654   while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
655     if (column_ < fixedFormColumnLimit_) {
656       tokens.PutNextTokenChar(' ', spaceProvenance_);
657       ++column_;
658       return true;
659     }
660     if (!FixedFormContinuation(false /*no need to insert space*/) ||
661         tabInCurrentLine_) {
662       return false;
663     }
664     CHECK(column_ == 7);
665     --at_; // point to column 6 of continuation line
666     column_ = 6;
667   }
668   return false;
669 }
670 
671 bool Prescanner::IsFixedFormCommentLine(const char *start) const {
672   const char *p{start};
673   if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
674       ((*p == 'D' || *p == 'd') &&
675           !features_.IsEnabled(LanguageFeature::OldDebugLines))) {
676     return true;
677   }
678   bool anyTabs{false};
679   while (true) {
680     if (*p == ' ') {
681       ++p;
682     } else if (*p == '\t') {
683       anyTabs = true;
684       ++p;
685     } else if (*p == '0' && !anyTabs && p == start + 5) {
686       ++p; // 0 in column 6 must treated as a space
687     } else {
688       break;
689     }
690   }
691   if (!anyTabs && p >= start + fixedFormColumnLimit_) {
692     return true;
693   }
694   if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
695     return true;
696   }
697   return *p == '\n';
698 }
699 
700 const char *Prescanner::IsFreeFormComment(const char *p) const {
701   p = SkipWhiteSpaceAndCComments(p);
702   if (*p == '!' || *p == '\n') {
703     return p;
704   } else {
705     return nullptr;
706   }
707 }
708 
709 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
710   const char *p{SkipWhiteSpace(start)};
711   for (char ch : "include"s) {
712     if (ToLowerCaseLetter(*p++) != ch) {
713       return std::nullopt;
714     }
715   }
716   p = SkipWhiteSpace(p);
717   if (*p == '"' || *p == '\'') {
718     return {p - start};
719   }
720   return std::nullopt;
721 }
722 
723 void Prescanner::FortranInclude(const char *firstQuote) {
724   const char *p{firstQuote};
725   while (*p != '"' && *p != '\'') {
726     ++p;
727   }
728   char quote{*p};
729   std::string path;
730   for (++p; *p != '\n'; ++p) {
731     if (*p == quote) {
732       if (p[1] != quote) {
733         break;
734       }
735       ++p;
736     }
737     path += *p;
738   }
739   if (*p != quote) {
740     Say(GetProvenanceRange(firstQuote, p),
741         "malformed path name string"_err_en_US);
742     return;
743   }
744   p = SkipWhiteSpace(p + 1);
745   if (*p != '\n' && *p != '!') {
746     const char *garbage{p};
747     for (; *p != '\n' && *p != '!'; ++p) {
748     }
749     Say(GetProvenanceRange(garbage, p),
750         "excess characters after path name"_en_US);
751   }
752   std::string buf;
753   llvm::raw_string_ostream error{buf};
754   Provenance provenance{GetProvenance(nextLine_)};
755   AllSources &allSources{cooked_.allSources()};
756   const SourceFile *currentFile{allSources.GetSourceFile(provenance)};
757   if (currentFile) {
758     allSources.PushSearchPathDirectory(DirectoryName(currentFile->path()));
759   }
760   const SourceFile *included{allSources.Open(path, error)};
761   if (currentFile) {
762     allSources.PopSearchPathDirectory();
763   }
764   if (!included) {
765     Say(provenance, "INCLUDE: %s"_err_en_US, error.str());
766   } else if (included->bytes() > 0) {
767     ProvenanceRange includeLineRange{
768         provenance, static_cast<std::size_t>(p - nextLine_)};
769     ProvenanceRange fileRange{
770         allSources.AddIncludedFile(*included, includeLineRange)};
771     Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange);
772   }
773 }
774 
775 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
776   const char *p{start};
777   for (; *p == ' '; ++p) {
778   }
779   if (*p == '#') {
780     if (inFixedForm_ && p == start + 5) {
781       return nullptr;
782     }
783   } else {
784     p = SkipWhiteSpace(p);
785     if (*p != '#') {
786       return nullptr;
787     }
788   }
789   return SkipWhiteSpace(p + 1);
790 }
791 
792 bool Prescanner::IsNextLinePreprocessorDirective() const {
793   return IsPreprocessorDirectiveLine(nextLine_) != nullptr;
794 }
795 
796 bool Prescanner::SkipCommentLine(bool afterAmpersand) {
797   if (nextLine_ >= limit_) {
798     if (afterAmpersand && prescannerNesting_ > 0) {
799       // A continuation marker at the end of the last line in an
800       // include file inhibits the newline for that line.
801       SkipToEndOfLine();
802       omitNewline_ = true;
803     }
804     return false;
805   }
806   auto lineClass{ClassifyLine(nextLine_)};
807   if (lineClass.kind == LineClassification::Kind::Comment) {
808     NextLine();
809     return true;
810   } else if (inPreprocessorDirective_) {
811     return false;
812   } else if (lineClass.kind ==
813           LineClassification::Kind::ConditionalCompilationDirective ||
814       lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
815     // Allow conditional compilation directives (e.g., #ifdef) to affect
816     // continuation lines.
817     // Allow other preprocessor directives, too, except #include
818     // (when it does not follow '&'), #define, and #undef (because
819     // they cannot be allowed to affect preceding text on a
820     // continued line).
821     preprocessor_.Directive(TokenizePreprocessorDirective(), this);
822     return true;
823   } else if (afterAmpersand &&
824       (lineClass.kind == LineClassification::Kind::IncludeDirective ||
825           lineClass.kind == LineClassification::Kind::IncludeLine)) {
826     SkipToEndOfLine();
827     omitNewline_ = true;
828     skipLeadingAmpersand_ = true;
829     return false;
830   } else {
831     return false;
832   }
833 }
834 
835 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
836   if (nextLine_ >= limit_) {
837     return nullptr;
838   }
839   tabInCurrentLine_ = false;
840   char col1{*nextLine_};
841   if (InCompilerDirective()) {
842     // Must be a continued compiler directive.
843     if (!IsFixedFormCommentChar(col1)) {
844       return nullptr;
845     }
846     int j{1};
847     for (; j < 5; ++j) {
848       char ch{directiveSentinel_[j - 1]};
849       if (ch == '\0') {
850         break;
851       }
852       if (ch != ToLowerCaseLetter(nextLine_[j])) {
853         return nullptr;
854       }
855     }
856     for (; j < 5; ++j) {
857       if (nextLine_[j] != ' ') {
858         return nullptr;
859       }
860     }
861     char col6{nextLine_[5]};
862     if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
863       if (nextLine_[6] != ' ' && mightNeedSpace) {
864         insertASpace_ = true;
865       }
866       return nextLine_ + 6;
867     }
868     return nullptr;
869   } else {
870     // Normal case: not in a compiler directive.
871     if (col1 == '&' &&
872         features_.IsEnabled(
873             LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
874       // Extension: '&' as continuation marker
875       if (features_.ShouldWarn(
876               LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
877         Say(GetProvenance(nextLine_), "nonstandard usage"_en_US);
878       }
879       return nextLine_ + 1;
880     }
881     if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
882       tabInCurrentLine_ = true;
883       return nextLine_ + 2; // VAX extension
884     }
885     if (col1 == ' ' && nextLine_[1] == ' ' && nextLine_[2] == ' ' &&
886         nextLine_[3] == ' ' && nextLine_[4] == ' ') {
887       char col6{nextLine_[5]};
888       if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
889         return nextLine_ + 6;
890       }
891     }
892     if (IsImplicitContinuation()) {
893       return nextLine_;
894     }
895   }
896   return nullptr; // not a continuation line
897 }
898 
899 const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
900   const char *p{nextLine_};
901   if (p >= limit_) {
902     return nullptr;
903   }
904   p = SkipWhiteSpace(p);
905   if (InCompilerDirective()) {
906     if (*p++ != '!') {
907       return nullptr;
908     }
909     for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
910       if (*s != ToLowerCaseLetter(*p)) {
911         return nullptr;
912       }
913     }
914     p = SkipWhiteSpace(p);
915     if (*p == '&') {
916       if (!ampersand) {
917         insertASpace_ = true;
918       }
919       return p + 1;
920     } else if (ampersand) {
921       return p;
922     } else {
923       return nullptr;
924     }
925   } else {
926     if (*p == '&') {
927       return p + 1;
928     } else if (*p == '!' || *p == '\n' || *p == '#') {
929       return nullptr;
930     } else if (ampersand || IsImplicitContinuation()) {
931       if (p > nextLine_) {
932         --p;
933       } else {
934         insertASpace_ = true;
935       }
936       return p;
937     } else {
938       return nullptr;
939     }
940   }
941 }
942 
943 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
944   // N.B. We accept '&' as a continuation indicator in fixed form, too,
945   // but not in a character literal.
946   if (*at_ == '&' && inCharLiteral_) {
947     return false;
948   }
949   do {
950     if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
951       BeginSourceLine(cont);
952       column_ = 7;
953       NextLine();
954       return true;
955     }
956   } while (SkipCommentLine(false /* not after ampersand */));
957   return false;
958 }
959 
960 bool Prescanner::FreeFormContinuation() {
961   const char *p{at_};
962   bool ampersand{*p == '&'};
963   if (ampersand) {
964     p = SkipWhiteSpace(p + 1);
965   }
966   if (*p != '\n') {
967     if (inCharLiteral_) {
968       return false;
969     } else if (*p != '!' &&
970         features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
971       Say(GetProvenance(p), "missing ! before comment after &"_en_US);
972     }
973   }
974   do {
975     if (const char *cont{FreeFormContinuationLine(ampersand)}) {
976       BeginSourceLine(cont);
977       NextLine();
978       return true;
979     }
980   } while (SkipCommentLine(ampersand));
981   return false;
982 }
983 
984 // Implicit line continuation allows a preprocessor macro call with
985 // arguments to span multiple lines.
986 bool Prescanner::IsImplicitContinuation() const {
987   return !inPreprocessorDirective_ && !inCharLiteral_ &&
988       delimiterNesting_ > 0 && nextLine_ < limit_ &&
989       ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
990 }
991 
992 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
993   if (*at_ == '\n' || *at_ == '&') {
994     if (inFixedForm_) {
995       return FixedFormContinuation(mightNeedFixedFormSpace);
996     } else {
997       return FreeFormContinuation();
998     }
999   } else {
1000     return false;
1001   }
1002 }
1003 
1004 std::optional<Prescanner::LineClassification>
1005 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1006   const char *p{start};
1007   char col1{*p++};
1008   if (!IsFixedFormCommentChar(col1)) {
1009     return std::nullopt;
1010   }
1011   char sentinel[5], *sp{sentinel};
1012   int column{2};
1013   for (; column < 6; ++column, ++p) {
1014     if (*p != ' ') {
1015       if (*p == '\n' || *p == '\t') {
1016         break;
1017       }
1018       if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) {
1019         // OpenMP conditional compilation line: leave the label alone
1020         break;
1021       }
1022       *sp++ = ToLowerCaseLetter(*p);
1023     }
1024   }
1025   if (column == 6) {
1026     if (*p == ' ' || *p == '\t' || *p == '0') {
1027       ++p;
1028     } else {
1029       // This is a Continuation line, not an initial directive line.
1030       return std::nullopt;
1031     }
1032   }
1033   if (sp == sentinel) {
1034     return std::nullopt;
1035   }
1036   *sp = '\0';
1037   if (const char *ss{IsCompilerDirectiveSentinel(sentinel)}) {
1038     std::size_t payloadOffset = p - start;
1039     return {LineClassification{
1040         LineClassification::Kind::CompilerDirective, payloadOffset, ss}};
1041   }
1042   return std::nullopt;
1043 }
1044 
1045 std::optional<Prescanner::LineClassification>
1046 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
1047   char sentinel[8];
1048   const char *p{SkipWhiteSpace(start)};
1049   if (*p++ != '!') {
1050     return std::nullopt;
1051   }
1052   for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) {
1053     if (*p == '\n') {
1054       break;
1055     }
1056     if (*p == ' ' || *p == '\t' || *p == '&') {
1057       if (j == 0) {
1058         break;
1059       }
1060       sentinel[j] = '\0';
1061       p = SkipWhiteSpace(p + 1);
1062       if (*p == '!') {
1063         break;
1064       }
1065       if (const char *sp{IsCompilerDirectiveSentinel(sentinel)}) {
1066         std::size_t offset = p - start;
1067         return {LineClassification{
1068             LineClassification::Kind::CompilerDirective, offset, sp}};
1069       }
1070       break;
1071     }
1072     sentinel[j] = ToLowerCaseLetter(*p);
1073   }
1074   return std::nullopt;
1075 }
1076 
1077 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
1078   std::uint64_t packed{0};
1079   for (char ch : dir) {
1080     packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
1081   }
1082   compilerDirectiveBloomFilter_.set(packed % prime1);
1083   compilerDirectiveBloomFilter_.set(packed % prime2);
1084   compilerDirectiveSentinels_.insert(dir);
1085   return *this;
1086 }
1087 
1088 const char *Prescanner::IsCompilerDirectiveSentinel(
1089     const char *sentinel) const {
1090   std::uint64_t packed{0};
1091   std::size_t n{0};
1092   for (; sentinel[n] != '\0'; ++n) {
1093     packed = (packed << 8) | (sentinel[n] & 0xff);
1094   }
1095   if (n == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) ||
1096       !compilerDirectiveBloomFilter_.test(packed % prime2)) {
1097     return nullptr;
1098   }
1099   const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, n))};
1100   return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
1101 }
1102 
1103 constexpr bool IsDirective(const char *match, const char *dir) {
1104   for (; *match; ++match) {
1105     if (*match != ToLowerCaseLetter(*dir++)) {
1106       return false;
1107     }
1108   }
1109   return true;
1110 }
1111 
1112 Prescanner::LineClassification Prescanner::ClassifyLine(
1113     const char *start) const {
1114   if (inFixedForm_) {
1115     if (std::optional<LineClassification> lc{
1116             IsFixedFormCompilerDirectiveLine(start)}) {
1117       return std::move(*lc);
1118     }
1119     if (IsFixedFormCommentLine(start)) {
1120       return {LineClassification::Kind::Comment};
1121     }
1122   } else {
1123     if (std::optional<LineClassification> lc{
1124             IsFreeFormCompilerDirectiveLine(start)}) {
1125       return std::move(*lc);
1126     }
1127     if (const char *bang{IsFreeFormComment(start)}) {
1128       return {LineClassification::Kind::Comment,
1129           static_cast<std::size_t>(bang - start)};
1130     }
1131   }
1132   if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
1133     return {LineClassification::Kind::IncludeLine, *quoteOffset};
1134   }
1135   if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1136     if (IsDirective("if", dir) || IsDirective("elif", dir) ||
1137         IsDirective("else", dir) || IsDirective("endif", dir)) {
1138       return {LineClassification::Kind::ConditionalCompilationDirective};
1139     } else if (IsDirective("include", dir)) {
1140       return {LineClassification::Kind::IncludeDirective};
1141     } else if (IsDirective("define", dir) || IsDirective("undef", dir)) {
1142       return {LineClassification::Kind::DefinitionDirective};
1143     } else {
1144       return {LineClassification::Kind::PreprocessorDirective};
1145     }
1146   }
1147   return {LineClassification::Kind::Source};
1148 }
1149 
1150 void Prescanner::SourceFormChange(std::string &&dir) {
1151   if (dir == "!dir$ free") {
1152     inFixedForm_ = false;
1153   } else if (dir == "!dir$ fixed") {
1154     inFixedForm_ = true;
1155   }
1156 }
1157 } // namespace Fortran::parser
1158