1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
11 
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser.  This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
18 
19 #include "token-sequence.h"
20 #include "flang/Common/Fortran-features.h"
21 #include "flang/Parser/characters.h"
22 #include "flang/Parser/message.h"
23 #include "flang/Parser/provenance.h"
24 #include <bitset>
25 #include <optional>
26 #include <string>
27 #include <unordered_set>
28 
29 namespace Fortran::parser {
30 
31 class Messages;
32 class Preprocessor;
33 
34 class Prescanner {
35 public:
36   Prescanner(Messages &, CookedSource &, Preprocessor &,
37       common::LanguageFeatureControl);
38   Prescanner(const Prescanner &);
39 
allSources()40   const AllSources &allSources() const { return allSources_; }
allSources()41   AllSources &allSources() { return allSources_; }
messages()42   const Messages &messages() const { return messages_; }
messages()43   Messages &messages() { return messages_; }
preprocessor()44   const Preprocessor &preprocessor() const { return preprocessor_; }
preprocessor()45   Preprocessor &preprocessor() { return preprocessor_; }
46 
set_fixedForm(bool yes)47   Prescanner &set_fixedForm(bool yes) {
48     inFixedForm_ = yes;
49     return *this;
50   }
set_encoding(Encoding code)51   Prescanner &set_encoding(Encoding code) {
52     encoding_ = code;
53     return *this;
54   }
set_fixedFormColumnLimit(int limit)55   Prescanner &set_fixedFormColumnLimit(int limit) {
56     fixedFormColumnLimit_ = limit;
57     return *this;
58   }
59 
60   Prescanner &AddCompilerDirectiveSentinel(const std::string &);
61 
62   void Prescan(ProvenanceRange);
63   void Statement();
64   void NextLine();
65 
66   // Callbacks for use by Preprocessor.
IsAtEnd()67   bool IsAtEnd() const { return nextLine_ >= limit_; }
68   bool IsNextLinePreprocessorDirective() const;
69   TokenSequence TokenizePreprocessorDirective();
GetCurrentProvenance()70   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
71 
Say(A &&...a)72   template <typename... A> Message &Say(A &&...a) {
73     return messages_.Say(std::forward<A>(a)...);
74   }
75 
76 private:
77   struct LineClassification {
78     enum class Kind {
79       Comment,
80       ConditionalCompilationDirective,
81       IncludeDirective, // #include
82       DefinitionDirective, // #define & #undef
83       PreprocessorDirective,
84       IncludeLine, // Fortran INCLUDE
85       CompilerDirective,
86       Source
87     };
88     LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
89         : kind{k}, payloadOffset{po}, sentinel{s} {}
90     LineClassification(LineClassification &&) = default;
91     Kind kind;
92     std::size_t payloadOffset; // byte offset of content
93     const char *sentinel; // if it's a compiler directive
94   };
95 
BeginSourceLine(const char * at)96   void BeginSourceLine(const char *at) {
97     at_ = at;
98     column_ = 1;
99     tabInCurrentLine_ = false;
100   }
101 
BeginSourceLineAndAdvance()102   void BeginSourceLineAndAdvance() {
103     BeginSourceLine(nextLine_);
104     NextLine();
105   }
106 
BeginStatementAndAdvance()107   void BeginStatementAndAdvance() {
108     BeginSourceLineAndAdvance();
109     slashInCurrentStatement_ = false;
110     preventHollerith_ = false;
111     delimiterNesting_ = 0;
112   }
113 
GetProvenance(const char * sourceChar)114   Provenance GetProvenance(const char *sourceChar) const {
115     return startProvenance_ + (sourceChar - start_);
116   }
117 
GetProvenanceRange(const char * first,const char * afterLast)118   ProvenanceRange GetProvenanceRange(
119       const char *first, const char *afterLast) const {
120     std::size_t bytes = afterLast - first;
121     return {startProvenance_ + (first - start_), bytes};
122   }
123 
EmitChar(TokenSequence & tokens,char ch)124   void EmitChar(TokenSequence &tokens, char ch) {
125     tokens.PutNextTokenChar(ch, GetCurrentProvenance());
126   }
127 
EmitInsertedChar(TokenSequence & tokens,char ch)128   void EmitInsertedChar(TokenSequence &tokens, char ch) {
129     Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
130     tokens.PutNextTokenChar(ch, provenance);
131   }
132 
EmitCharAndAdvance(TokenSequence & tokens,char ch)133   char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
134     EmitChar(tokens, ch);
135     NextChar();
136     return *at_;
137   }
138 
InCompilerDirective()139   bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
InFixedFormSource()140   bool InFixedFormSource() const {
141     return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
142   }
143 
IsCComment(const char * p)144   bool IsCComment(const char *p) const {
145     return p[0] == '/' && p[1] == '*' &&
146         (inPreprocessorDirective_ ||
147             (!inCharLiteral_ &&
148                 features_.IsEnabled(
149                     common::LanguageFeature::ClassicCComments)));
150   }
151 
152   void LabelField(TokenSequence &);
153   void EnforceStupidEndStatementRules(const TokenSequence &);
154   void SkipToEndOfLine();
155   bool MustSkipToEndOfLine() const;
156   void NextChar();
157   void SkipToNextSignificantCharacter();
158   void SkipCComments();
159   void SkipSpaces();
160   static const char *SkipWhiteSpace(const char *);
161   const char *SkipWhiteSpaceAndCComments(const char *) const;
162   const char *SkipCComment(const char *) const;
163   bool NextToken(TokenSequence &);
164   bool ExponentAndKind(TokenSequence &);
165   void QuotedCharacterLiteral(TokenSequence &, const char *start);
166   void Hollerith(TokenSequence &, int count, const char *start);
167   bool PadOutCharacterLiteral(TokenSequence &);
168   bool SkipCommentLine(bool afterAmpersand);
169   bool IsFixedFormCommentLine(const char *) const;
170   const char *IsFreeFormComment(const char *) const;
171   std::optional<std::size_t> IsIncludeLine(const char *) const;
172   void FortranInclude(const char *quote);
173   const char *IsPreprocessorDirectiveLine(const char *) const;
174   const char *FixedFormContinuationLine(bool mightNeedSpace);
175   const char *FreeFormContinuationLine(bool ampersand);
176   bool IsImplicitContinuation() const;
177   bool FixedFormContinuation(bool mightNeedSpace);
178   bool FreeFormContinuation();
179   bool Continuation(bool mightNeedFixedFormSpace);
180   std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
181       const char *) const;
182   std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
183       const char *) const;
184   const char *IsCompilerDirectiveSentinel(const char *) const;
185   LineClassification ClassifyLine(const char *) const;
186   void SourceFormChange(std::string &&);
187 
188   Messages &messages_;
189   CookedSource &cooked_;
190   Preprocessor &preprocessor_;
191   AllSources &allSources_;
192   common::LanguageFeatureControl features_;
193   bool inFixedForm_{false};
194   int fixedFormColumnLimit_{72};
195   Encoding encoding_{Encoding::UTF_8};
196   int delimiterNesting_{0};
197   int prescannerNesting_{0};
198 
199   Provenance startProvenance_;
200   const char *start_{nullptr}; // beginning of current source file content
201   const char *limit_{nullptr}; // first address after end of current source
202   const char *nextLine_{nullptr}; // next line to process; <= limit_
203   const char *directiveSentinel_{nullptr}; // current compiler directive
204 
205   // This data members are state for processing the source line containing
206   // "at_", which goes to up to the newline character before "nextLine_".
207   const char *at_{nullptr}; // next character to process; < nextLine_
208   int column_{1}; // card image column position of next character
209   bool tabInCurrentLine_{false};
210   bool slashInCurrentStatement_{false};
211   bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
212   bool inCharLiteral_{false};
213   bool inPreprocessorDirective_{false};
214 
215   // In some edge cases of compiler directive continuation lines, it
216   // is necessary to treat the line break as a space character by
217   // setting this flag, which is cleared by EmitChar().
218   bool insertASpace_{false};
219 
220   // When a free form continuation marker (&) appears at the end of a line
221   // before a INCLUDE or #include, we delete it and omit the newline, so
222   // that the first line of the included file is truly a continuation of
223   // the line before.  Also used when the & appears at the end of the last
224   // line in an include file.
225   bool omitNewline_{false};
226   bool skipLeadingAmpersand_{false};
227 
228   const Provenance spaceProvenance_{
229       allSources_.CompilerInsertionProvenance(' ')};
230   const Provenance backslashProvenance_{
231       allSources_.CompilerInsertionProvenance('\\')};
232 
233   // To avoid probing the set of active compiler directive sentinel strings
234   // on every comment line, they're checked first with a cheap Bloom filter.
235   static const int prime1{1019}, prime2{1021};
236   std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
237   std::unordered_set<std::string> compilerDirectiveSentinels_;
238 };
239 } // namespace Fortran::parser
240 #endif // FORTRAN_PARSER_PRESCAN_H_
241