1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PRESCAN_H_ 10 #define FORTRAN_PARSER_PRESCAN_H_ 11 12 // Defines a fast Fortran source prescanning phase that implements some 13 // character-level features of the language that can be inefficient to 14 // support directly in a backtracking parser. This phase handles Fortran 15 // line continuation, comment removal, card image margins, padding out 16 // fixed form character literals on truncated card images, file 17 // inclusion, and driving the Fortran source preprocessor. 18 19 #include "token-sequence.h" 20 #include "flang/Common/Fortran-features.h" 21 #include "flang/Parser/characters.h" 22 #include "flang/Parser/message.h" 23 #include "flang/Parser/provenance.h" 24 #include <bitset> 25 #include <optional> 26 #include <string> 27 #include <unordered_set> 28 29 namespace Fortran::parser { 30 31 class Messages; 32 class Preprocessor; 33 34 class Prescanner { 35 public: 36 Prescanner(Messages &, CookedSource &, Preprocessor &, 37 common::LanguageFeatureControl); 38 Prescanner(const Prescanner &); 39 allSources()40 const AllSources &allSources() const { return allSources_; } allSources()41 AllSources &allSources() { return allSources_; } messages()42 const Messages &messages() const { return messages_; } messages()43 Messages &messages() { return messages_; } preprocessor()44 const Preprocessor &preprocessor() const { return preprocessor_; } preprocessor()45 Preprocessor &preprocessor() { return preprocessor_; } 46 set_fixedForm(bool yes)47 Prescanner &set_fixedForm(bool yes) { 48 inFixedForm_ = yes; 49 return *this; 50 } set_encoding(Encoding code)51 Prescanner &set_encoding(Encoding code) { 52 encoding_ = code; 53 return *this; 54 } set_fixedFormColumnLimit(int limit)55 Prescanner &set_fixedFormColumnLimit(int limit) { 56 fixedFormColumnLimit_ = limit; 57 return *this; 58 } 59 60 Prescanner &AddCompilerDirectiveSentinel(const std::string &); 61 62 void Prescan(ProvenanceRange); 63 void Statement(); 64 void NextLine(); 65 66 // Callbacks for use by Preprocessor. IsAtEnd()67 bool IsAtEnd() const { return nextLine_ >= limit_; } 68 bool IsNextLinePreprocessorDirective() const; 69 TokenSequence TokenizePreprocessorDirective(); GetCurrentProvenance()70 Provenance GetCurrentProvenance() const { return GetProvenance(at_); } 71 Say(A &&...a)72 template <typename... A> Message &Say(A &&...a) { 73 return messages_.Say(std::forward<A>(a)...); 74 } 75 76 private: 77 struct LineClassification { 78 enum class Kind { 79 Comment, 80 ConditionalCompilationDirective, 81 IncludeDirective, // #include 82 DefinitionDirective, // #define & #undef 83 PreprocessorDirective, 84 IncludeLine, // Fortran INCLUDE 85 CompilerDirective, 86 Source 87 }; 88 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) 89 : kind{k}, payloadOffset{po}, sentinel{s} {} 90 LineClassification(LineClassification &&) = default; 91 Kind kind; 92 std::size_t payloadOffset; // byte offset of content 93 const char *sentinel; // if it's a compiler directive 94 }; 95 BeginSourceLine(const char * at)96 void BeginSourceLine(const char *at) { 97 at_ = at; 98 column_ = 1; 99 tabInCurrentLine_ = false; 100 } 101 BeginSourceLineAndAdvance()102 void BeginSourceLineAndAdvance() { 103 BeginSourceLine(nextLine_); 104 NextLine(); 105 } 106 BeginStatementAndAdvance()107 void BeginStatementAndAdvance() { 108 BeginSourceLineAndAdvance(); 109 slashInCurrentStatement_ = false; 110 preventHollerith_ = false; 111 delimiterNesting_ = 0; 112 } 113 GetProvenance(const char * sourceChar)114 Provenance GetProvenance(const char *sourceChar) const { 115 return startProvenance_ + (sourceChar - start_); 116 } 117 GetProvenanceRange(const char * first,const char * afterLast)118 ProvenanceRange GetProvenanceRange( 119 const char *first, const char *afterLast) const { 120 std::size_t bytes = afterLast - first; 121 return {startProvenance_ + (first - start_), bytes}; 122 } 123 EmitChar(TokenSequence & tokens,char ch)124 void EmitChar(TokenSequence &tokens, char ch) { 125 tokens.PutNextTokenChar(ch, GetCurrentProvenance()); 126 } 127 EmitInsertedChar(TokenSequence & tokens,char ch)128 void EmitInsertedChar(TokenSequence &tokens, char ch) { 129 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; 130 tokens.PutNextTokenChar(ch, provenance); 131 } 132 EmitCharAndAdvance(TokenSequence & tokens,char ch)133 char EmitCharAndAdvance(TokenSequence &tokens, char ch) { 134 EmitChar(tokens, ch); 135 NextChar(); 136 return *at_; 137 } 138 InCompilerDirective()139 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } InFixedFormSource()140 bool InFixedFormSource() const { 141 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); 142 } 143 IsCComment(const char * p)144 bool IsCComment(const char *p) const { 145 return p[0] == '/' && p[1] == '*' && 146 (inPreprocessorDirective_ || 147 (!inCharLiteral_ && 148 features_.IsEnabled( 149 common::LanguageFeature::ClassicCComments))); 150 } 151 152 void LabelField(TokenSequence &); 153 void EnforceStupidEndStatementRules(const TokenSequence &); 154 void SkipToEndOfLine(); 155 bool MustSkipToEndOfLine() const; 156 void NextChar(); 157 void SkipToNextSignificantCharacter(); 158 void SkipCComments(); 159 void SkipSpaces(); 160 static const char *SkipWhiteSpace(const char *); 161 const char *SkipWhiteSpaceAndCComments(const char *) const; 162 const char *SkipCComment(const char *) const; 163 bool NextToken(TokenSequence &); 164 bool ExponentAndKind(TokenSequence &); 165 void QuotedCharacterLiteral(TokenSequence &, const char *start); 166 void Hollerith(TokenSequence &, int count, const char *start); 167 bool PadOutCharacterLiteral(TokenSequence &); 168 bool SkipCommentLine(bool afterAmpersand); 169 bool IsFixedFormCommentLine(const char *) const; 170 const char *IsFreeFormComment(const char *) const; 171 std::optional<std::size_t> IsIncludeLine(const char *) const; 172 void FortranInclude(const char *quote); 173 const char *IsPreprocessorDirectiveLine(const char *) const; 174 const char *FixedFormContinuationLine(bool mightNeedSpace); 175 const char *FreeFormContinuationLine(bool ampersand); 176 bool IsImplicitContinuation() const; 177 bool FixedFormContinuation(bool mightNeedSpace); 178 bool FreeFormContinuation(); 179 bool Continuation(bool mightNeedFixedFormSpace); 180 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( 181 const char *) const; 182 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( 183 const char *) const; 184 const char *IsCompilerDirectiveSentinel(const char *) const; 185 LineClassification ClassifyLine(const char *) const; 186 void SourceFormChange(std::string &&); 187 188 Messages &messages_; 189 CookedSource &cooked_; 190 Preprocessor &preprocessor_; 191 AllSources &allSources_; 192 common::LanguageFeatureControl features_; 193 bool inFixedForm_{false}; 194 int fixedFormColumnLimit_{72}; 195 Encoding encoding_{Encoding::UTF_8}; 196 int delimiterNesting_{0}; 197 int prescannerNesting_{0}; 198 199 Provenance startProvenance_; 200 const char *start_{nullptr}; // beginning of current source file content 201 const char *limit_{nullptr}; // first address after end of current source 202 const char *nextLine_{nullptr}; // next line to process; <= limit_ 203 const char *directiveSentinel_{nullptr}; // current compiler directive 204 205 // This data members are state for processing the source line containing 206 // "at_", which goes to up to the newline character before "nextLine_". 207 const char *at_{nullptr}; // next character to process; < nextLine_ 208 int column_{1}; // card image column position of next character 209 bool tabInCurrentLine_{false}; 210 bool slashInCurrentStatement_{false}; 211 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith 212 bool inCharLiteral_{false}; 213 bool inPreprocessorDirective_{false}; 214 215 // In some edge cases of compiler directive continuation lines, it 216 // is necessary to treat the line break as a space character by 217 // setting this flag, which is cleared by EmitChar(). 218 bool insertASpace_{false}; 219 220 // When a free form continuation marker (&) appears at the end of a line 221 // before a INCLUDE or #include, we delete it and omit the newline, so 222 // that the first line of the included file is truly a continuation of 223 // the line before. Also used when the & appears at the end of the last 224 // line in an include file. 225 bool omitNewline_{false}; 226 bool skipLeadingAmpersand_{false}; 227 228 const Provenance spaceProvenance_{ 229 allSources_.CompilerInsertionProvenance(' ')}; 230 const Provenance backslashProvenance_{ 231 allSources_.CompilerInsertionProvenance('\\')}; 232 233 // To avoid probing the set of active compiler directive sentinel strings 234 // on every comment line, they're checked first with a cheap Bloom filter. 235 static const int prime1{1019}, prime2{1021}; 236 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes 237 std::unordered_set<std::string> compilerDirectiveSentinels_; 238 }; 239 } // namespace Fortran::parser 240 #endif // FORTRAN_PARSER_PRESCAN_H_ 241