1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) {
173           result.pop_back();
174           --resultSize;
175         }
176         CHECK(resultSize > 0 &&
177             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
178         result.pop_back();
179         result.Put(Stringify(args[index], prescanner.allSources()));
180       } else {
181         const TokenSequence *arg{&args[index]};
182         std::optional<TokenSequence> replaced;
183         // Don't replace macros in the actual argument if it is preceded or
184         // followed by the token-pasting operator ## in the replacement text.
185         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
186           auto next{replacement_.SkipBlanks(j + 1)};
187           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
188             // Apply macro replacement to the actual argument
189             replaced =
190                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
191             if (replaced) {
192               arg = &*replaced;
193             }
194           }
195         }
196         result.Put(DEREF(arg));
197       }
198     } else if (bytes == 11 && isVariadic_ &&
199         token.ToString() == "__VA_ARGS__") {
200       Provenance commaProvenance{
201           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
202               ',')};
203       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
204         if (k > argumentCount_) {
205           result.Put(","s, commaProvenance);
206         }
207         result.Put(args[k]);
208       }
209     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
210         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
211         parenthesesNesting == 0) {
212       parenthesesNesting = 1;
213       skipping = args.size() == argumentCount_;
214       ++j;
215     } else {
216       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
217         ++parenthesesNesting;
218       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
219         if (--parenthesesNesting == 0) {
220           skipping = false;
221           continue;
222         }
223       }
224       result.Put(replacement_, j);
225     }
226   }
227   return TokenPasting(std::move(result));
228 }
229 
FormatTime(const std::time_t & now,const char * format)230 static std::string FormatTime(const std::time_t &now, const char *format) {
231   char buffer[16];
232   return {buffer,
233       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
234 }
235 
Preprocessor(AllSources & allSources)236 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
237 
DefineStandardMacros()238 void Preprocessor::DefineStandardMacros() {
239   // Capture current local date & time once now to avoid having the values
240   // of __DATE__ or __TIME__ change during compilation.
241   std::time_t now;
242   std::time(&now);
243   Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
244   Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
245   // The values of these predefined macros depend on their invocation sites.
246   Define("__FILE__"s, "__FILE__"s);
247   Define("__LINE__"s, "__LINE__"s);
248 }
249 
Define(std::string macro,std::string value)250 void Preprocessor::Define(std::string macro, std::string value) {
251   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
252 }
253 
Undefine(std::string macro)254 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
255 
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)256 std::optional<TokenSequence> Preprocessor::MacroReplacement(
257     const TokenSequence &input, Prescanner &prescanner) {
258   // Do quick scan for any use of a defined name.
259   if (definitions_.empty()) {
260     return std::nullopt;
261   }
262   std::size_t tokens{input.SizeInTokens()};
263   std::size_t j;
264   for (j = 0; j < tokens; ++j) {
265     CharBlock token{input.TokenAt(j)};
266     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
267         IsNameDefined(token)) {
268       break;
269     }
270   }
271   if (j == tokens) {
272     return std::nullopt; // input contains nothing that would be replaced
273   }
274   TokenSequence result{input, 0, j};
275   for (; j < tokens; ++j) {
276     const CharBlock &token{input.TokenAt(j)};
277     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
278       result.Put(input, j);
279       continue;
280     }
281     auto it{definitions_.find(token)};
282     if (it == definitions_.end()) {
283       result.Put(input, j);
284       continue;
285     }
286     Definition &def{it->second};
287     if (def.isDisabled()) {
288       result.Put(input, j);
289       continue;
290     }
291     if (!def.isFunctionLike()) {
292       if (def.isPredefined()) {
293         std::string name{def.replacement().TokenAt(0).ToString()};
294         std::string repl;
295         if (name == "__FILE__") {
296           repl = "\""s +
297               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
298         } else if (name == "__LINE__") {
299           std::string buf;
300           llvm::raw_string_ostream ss{buf};
301           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
302           repl = ss.str();
303         }
304         if (!repl.empty()) {
305           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
306           ProvenanceRange call{allSources_.AddMacroCall(
307               insert, input.GetTokenProvenanceRange(j), repl)};
308           result.Put(repl, call.start());
309           continue;
310         }
311       }
312       def.set_isDisabled(true);
313       TokenSequence replaced{
314           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
315       def.set_isDisabled(false);
316       if (!replaced.empty()) {
317         ProvenanceRange from{def.replacement().GetProvenanceRange()};
318         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
319         ProvenanceRange newRange{
320             allSources_.AddMacroCall(from, use, replaced.ToString())};
321         result.Put(replaced, newRange);
322       }
323       continue;
324     }
325     // Possible function-like macro call.  Skip spaces and newlines to see
326     // whether '(' is next.
327     std::size_t k{j};
328     bool leftParen{false};
329     while (++k < tokens) {
330       const CharBlock &lookAhead{input.TokenAt(k)};
331       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
332         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
333         break;
334       }
335     }
336     if (!leftParen) {
337       result.Put(input, j);
338       continue;
339     }
340     std::vector<std::size_t> argStart{++k};
341     for (int nesting{0}; k < tokens; ++k) {
342       CharBlock token{input.TokenAt(k)};
343       if (token.size() == 1) {
344         char ch{token[0]};
345         if (ch == '(') {
346           ++nesting;
347         } else if (ch == ')') {
348           if (nesting == 0) {
349             break;
350           }
351           --nesting;
352         } else if (ch == ',' && nesting == 0) {
353           argStart.push_back(k + 1);
354         }
355       }
356     }
357     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
358       // Subtle: () is zero arguments, not one empty argument,
359       // unless one argument was expected.
360       argStart.clear();
361     }
362     if (k >= tokens || argStart.size() < def.argumentCount() ||
363         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
364       result.Put(input, j);
365       continue;
366     }
367     std::vector<TokenSequence> args;
368     for (std::size_t n{0}; n < argStart.size(); ++n) {
369       std::size_t at{argStart[n]};
370       std::size_t count{
371           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
372       args.emplace_back(TokenSequence(input, at, count));
373     }
374     def.set_isDisabled(true);
375     TokenSequence replaced{
376         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
377     def.set_isDisabled(false);
378     if (!replaced.empty()) {
379       ProvenanceRange from{def.replacement().GetProvenanceRange()};
380       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
381       ProvenanceRange newRange{
382           allSources_.AddMacroCall(from, use, replaced.ToString())};
383       result.Put(replaced, newRange);
384     }
385     j = k; // advance to the terminal ')'
386   }
387   return result;
388 }
389 
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)390 TokenSequence Preprocessor::ReplaceMacros(
391     const TokenSequence &tokens, Prescanner &prescanner) {
392   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
393     return std::move(*repl);
394   }
395   return tokens;
396 }
397 
Directive(const TokenSequence & dir,Prescanner & prescanner)398 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
399   std::size_t tokens{dir.SizeInTokens()};
400   std::size_t j{dir.SkipBlanks(0)};
401   if (j == tokens) {
402     return;
403   }
404   if (dir.TokenAt(j).ToString() != "#") {
405     prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
406     return;
407   }
408   j = dir.SkipBlanks(j + 1);
409   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
410     --tokens;
411   }
412   if (j == tokens) {
413     return;
414   }
415   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
416     return; // treat like #line, ignore it
417   }
418   std::size_t dirOffset{j};
419   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
420   j = dir.SkipBlanks(j + 1);
421   CharBlock nameToken;
422   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
423     nameToken = dir.TokenAt(j);
424   }
425   if (dirName == "line") {
426     // #line is ignored
427   } else if (dirName == "define") {
428     if (nameToken.empty()) {
429       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
430           "#define: missing or invalid name"_err_en_US);
431       return;
432     }
433     nameToken = SaveTokenAsName(nameToken);
434     definitions_.erase(nameToken);
435     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
436         dir.TokenAt(j)[0] == '(') {
437       j = dir.SkipBlanks(j + 1);
438       std::vector<std::string> argName;
439       bool isVariadic{false};
440       if (dir.TokenAt(j).ToString() != ")") {
441         while (true) {
442           std::string an{dir.TokenAt(j).ToString()};
443           if (an == "...") {
444             isVariadic = true;
445           } else {
446             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
447               prescanner.Say(dir.GetTokenProvenanceRange(j),
448                   "#define: missing or invalid argument name"_err_en_US);
449               return;
450             }
451             argName.push_back(an);
452           }
453           j = dir.SkipBlanks(j + 1);
454           if (j == tokens) {
455             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
456                 "#define: malformed argument list"_err_en_US);
457             return;
458           }
459           std::string punc{dir.TokenAt(j).ToString()};
460           if (punc == ")") {
461             break;
462           }
463           if (isVariadic || punc != ",") {
464             prescanner.Say(dir.GetTokenProvenanceRange(j),
465                 "#define: malformed argument list"_err_en_US);
466             return;
467           }
468           j = dir.SkipBlanks(j + 1);
469           if (j == tokens) {
470             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
471                 "#define: malformed argument list"_err_en_US);
472             return;
473           }
474         }
475         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
476             argName.size()) {
477           prescanner.Say(dir.GetTokenProvenance(dirOffset),
478               "#define: argument names are not distinct"_err_en_US);
479           return;
480         }
481       }
482       j = dir.SkipBlanks(j + 1);
483       definitions_.emplace(std::make_pair(
484           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
485     } else {
486       j = dir.SkipBlanks(j + 1);
487       definitions_.emplace(
488           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
489     }
490   } else if (dirName == "undef") {
491     if (nameToken.empty()) {
492       prescanner.Say(
493           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
494           "# missing or invalid name"_err_en_US);
495     } else {
496       if (dir.IsAnythingLeft(++j)) {
497         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
498             "#undef: excess tokens at end of directive"_port_en_US);
499       } else {
500         definitions_.erase(nameToken);
501       }
502     }
503   } else if (dirName == "ifdef" || dirName == "ifndef") {
504     bool doThen{false};
505     if (nameToken.empty()) {
506       prescanner.Say(
507           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
508           "#%s: missing name"_err_en_US, dirName);
509     } else {
510       if (dir.IsAnythingLeft(++j)) {
511         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
512             "#%s: excess tokens at end of directive"_port_en_US, dirName);
513       }
514       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
515     }
516     if (doThen) {
517       ifStack_.push(CanDeadElseAppear::Yes);
518     } else {
519       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
520           dir.GetTokenProvenance(dirOffset));
521     }
522   } else if (dirName == "if") {
523     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
524       ifStack_.push(CanDeadElseAppear::Yes);
525     } else {
526       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
527           dir.GetTokenProvenanceRange(dirOffset));
528     }
529   } else if (dirName == "else") {
530     if (dir.IsAnythingLeft(j)) {
531       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
532           "#else: excess tokens at end of directive"_port_en_US);
533     } else if (ifStack_.empty()) {
534       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
535           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
536     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
537       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
538           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
539     } else {
540       ifStack_.pop();
541       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
542           dir.GetTokenProvenanceRange(dirOffset));
543     }
544   } else if (dirName == "elif") {
545     if (ifStack_.empty()) {
546       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
547           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
548     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
549       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
550           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
551     } else {
552       ifStack_.pop();
553       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
554           dir.GetTokenProvenanceRange(dirOffset));
555     }
556   } else if (dirName == "endif") {
557     if (dir.IsAnythingLeft(j)) {
558       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
559           "#endif: excess tokens at end of directive"_port_en_US);
560     } else if (ifStack_.empty()) {
561       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
562           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
563     } else {
564       ifStack_.pop();
565     }
566   } else if (dirName == "error") {
567     prescanner.Say(
568         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
569         "%s"_err_en_US, dir.ToString());
570   } else if (dirName == "warning") {
571     prescanner.Say(
572         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
573         "%s"_warn_en_US, dir.ToString());
574   } else if (dirName == "comment" || dirName == "note") {
575     prescanner.Say(
576         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
577         "%s"_en_US, dir.ToString());
578   } else if (dirName == "include") {
579     if (j == tokens) {
580       prescanner.Say(
581           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
582           "#include: missing name of file to include"_err_en_US);
583       return;
584     }
585     std::string include;
586     std::optional<std::string> prependPath;
587     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
588       std::size_t k{j + 1};
589       if (k >= tokens) {
590         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
591             "#include: file name missing"_err_en_US);
592         return;
593       }
594       while (k < tokens && dir.TokenAt(k) != ">") {
595         ++k;
596       }
597       if (k >= tokens) {
598         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
599             "#include: expected '>' at end of included file"_port_en_US);
600       }
601       TokenSequence braced{dir, j + 1, k - j - 1};
602       include = ReplaceMacros(braced, prescanner).ToString();
603       j = k;
604     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
605         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
606       include = include.substr(1, include.size() - 2);
607       // #include "foo" starts search in directory of file containing
608       // the directive
609       auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
610       if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
611         prependPath = DirectoryName(currentFile->path());
612       }
613     } else {
614       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
615           "#include: expected name of file to include"_err_en_US);
616       return;
617     }
618     if (include.empty()) {
619       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
620           "#include: empty include file name"_err_en_US);
621       return;
622     }
623     j = dir.SkipBlanks(j + 1);
624     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
625       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
626           "#include: extra stuff ignored after file name"_port_en_US);
627     }
628     std::string buf;
629     llvm::raw_string_ostream error{buf};
630     const SourceFile *included{
631         allSources_.Open(include, error, std::move(prependPath))};
632     if (!included) {
633       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
634           "#include: %s"_err_en_US, error.str());
635     } else if (included->bytes() > 0) {
636       ProvenanceRange fileRange{
637           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
638       Prescanner{prescanner}
639           .set_encoding(included->encoding())
640           .Prescan(fileRange);
641     }
642   } else {
643     prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
644         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
645   }
646 }
647 
SaveTokenAsName(const CharBlock & t)648 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
649   names_.push_back(t.ToString());
650   return {names_.back().data(), names_.back().size()};
651 }
652 
IsNameDefined(const CharBlock & token)653 bool Preprocessor::IsNameDefined(const CharBlock &token) {
654   return definitions_.find(token) != definitions_.end();
655 }
656 
GetDirectiveName(const TokenSequence & line,std::size_t * rest)657 static std::string GetDirectiveName(
658     const TokenSequence &line, std::size_t *rest) {
659   std::size_t tokens{line.SizeInTokens()};
660   std::size_t j{line.SkipBlanks(0)};
661   if (j == tokens || line.TokenAt(j).ToString() != "#") {
662     *rest = tokens;
663     return "";
664   }
665   j = line.SkipBlanks(j + 1);
666   if (j == tokens) {
667     *rest = tokens;
668     return "";
669   }
670   *rest = line.SkipBlanks(j + 1);
671   return ToLowerCaseLetters(line.TokenAt(j).ToString());
672 }
673 
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner & prescanner,ProvenanceRange provenanceRange)674 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
675     IsElseActive isElseActive, Prescanner &prescanner,
676     ProvenanceRange provenanceRange) {
677   int nesting{0};
678   while (!prescanner.IsAtEnd()) {
679     if (!prescanner.IsNextLinePreprocessorDirective()) {
680       prescanner.NextLine();
681       continue;
682     }
683     TokenSequence line{prescanner.TokenizePreprocessorDirective()};
684     std::size_t rest{0};
685     std::string dn{GetDirectiveName(line, &rest)};
686     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
687       ++nesting;
688     } else if (dn == "endif") {
689       if (nesting-- == 0) {
690         return;
691       }
692     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
693       if (dn == "else") {
694         ifStack_.push(CanDeadElseAppear::No);
695         return;
696       }
697       if (dn == "elif" &&
698           IsIfPredicateTrue(
699               line, rest, line.SizeInTokens() - rest, prescanner)) {
700         ifStack_.push(CanDeadElseAppear::Yes);
701         return;
702       }
703     }
704   }
705   prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
706 }
707 
708 // Precedence level codes used here to accommodate mixed Fortran and C:
709 // 15: parentheses and constants, logical !, bitwise ~
710 // 14: unary + and -
711 // 13: **
712 // 12: *, /, % (modulus)
713 // 11: + and -
714 // 10: << and >>
715 //  9: bitwise &
716 //  8: bitwise ^
717 //  7: bitwise |
718 //  6: relations (.EQ., ==, &c.)
719 //  5: .NOT.
720 //  4: .AND., &&
721 //  3: .OR., ||
722 //  2: .EQV. and .NEQV. / .XOR.
723 //  1: ? :
724 //  0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)725 static std::int64_t ExpressionValue(const TokenSequence &token,
726     int minimumPrecedence, std::size_t *atToken,
727     std::optional<Message> *error) {
728   enum Operator {
729     PARENS,
730     CONST,
731     NOTZERO, // !
732     COMPLEMENT, // ~
733     UPLUS,
734     UMINUS,
735     POWER,
736     TIMES,
737     DIVIDE,
738     MODULUS,
739     ADD,
740     SUBTRACT,
741     LEFTSHIFT,
742     RIGHTSHIFT,
743     BITAND,
744     BITXOR,
745     BITOR,
746     LT,
747     LE,
748     EQ,
749     NE,
750     GE,
751     GT,
752     NOT,
753     AND,
754     OR,
755     EQV,
756     NEQV,
757     SELECT,
758     COMMA
759   };
760   static const int precedence[]{
761       15, 15, 15, 15, // (), 6, !, ~
762       14, 14, // unary +, -
763       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
764       9, 8, 7, // &, ^, |
765       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
766       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
767       1, 0 // ?: and ,
768   };
769   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
770       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
771 
772   static std::map<std::string, enum Operator> opNameMap;
773   if (opNameMap.empty()) {
774     opNameMap["("] = PARENS;
775     opNameMap["!"] = NOTZERO;
776     opNameMap["~"] = COMPLEMENT;
777     opNameMap["**"] = POWER;
778     opNameMap["*"] = TIMES;
779     opNameMap["/"] = DIVIDE;
780     opNameMap["%"] = MODULUS;
781     opNameMap["+"] = ADD;
782     opNameMap["-"] = SUBTRACT;
783     opNameMap["<<"] = LEFTSHIFT;
784     opNameMap[">>"] = RIGHTSHIFT;
785     opNameMap["&"] = BITAND;
786     opNameMap["^"] = BITXOR;
787     opNameMap["|"] = BITOR;
788     opNameMap[".lt."] = opNameMap["<"] = LT;
789     opNameMap[".le."] = opNameMap["<="] = LE;
790     opNameMap[".eq."] = opNameMap["=="] = EQ;
791     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
792     opNameMap[".ge."] = opNameMap[">="] = GE;
793     opNameMap[".gt."] = opNameMap[">"] = GT;
794     opNameMap[".not."] = NOT;
795     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
796     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
797     opNameMap[".eqv."] = EQV;
798     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
799     opNameMap["?"] = SELECT;
800     opNameMap[","] = COMMA;
801   }
802 
803   std::size_t tokens{token.SizeInTokens()};
804   CHECK(tokens > 0);
805   if (*atToken >= tokens) {
806     *error =
807         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
808     return 0;
809   }
810 
811   // Parse and evaluate a primary or a unary operator and its operand.
812   std::size_t opAt{*atToken};
813   std::string t{token.TokenAt(opAt).ToString()};
814   enum Operator op;
815   std::int64_t left{0};
816   if (t == "(") {
817     op = PARENS;
818   } else if (IsDecimalDigit(t[0])) {
819     op = CONST;
820     std::size_t consumed{0};
821     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
822     if (consumed < t.size()) {
823       *error = Message{token.GetTokenProvenanceRange(opAt),
824           "Uninterpretable numeric constant '%s'"_err_en_US, t};
825       return 0;
826     }
827   } else if (IsLegalIdentifierStart(t[0])) {
828     // undefined macro name -> zero
829     // TODO: BOZ constants?
830     op = CONST;
831   } else if (t == "+") {
832     op = UPLUS;
833   } else if (t == "-") {
834     op = UMINUS;
835   } else if (t == "." && *atToken + 2 < tokens &&
836       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
837       token.TokenAt(*atToken + 2).ToString() == ".") {
838     op = NOT;
839     *atToken += 2;
840   } else {
841     auto it{opNameMap.find(t)};
842     if (it != opNameMap.end()) {
843       op = it->second;
844     } else {
845       *error = Message{token.GetTokenProvenanceRange(opAt),
846           "operand expected in expression"_err_en_US};
847       return 0;
848     }
849   }
850   if (precedence[op] < minimumPrecedence) {
851     *error = Message{token.GetTokenProvenanceRange(opAt),
852         "operator precedence error"_err_en_US};
853     return 0;
854   }
855   ++*atToken;
856   if (op != CONST) {
857     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
858     if (*error) {
859       return 0;
860     }
861     switch (op) {
862     case PARENS:
863       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
864         ++*atToken;
865         break;
866       }
867       if (*atToken >= tokens) {
868         *error = Message{token.GetProvenanceRange(),
869             "')' missing from expression"_err_en_US};
870       } else {
871         *error = Message{
872             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
873       }
874       return 0;
875     case NOTZERO:
876       left = !left;
877       break;
878     case COMPLEMENT:
879       left = ~left;
880       break;
881     case UPLUS:
882       break;
883     case UMINUS:
884       left = -left;
885       break;
886     case NOT:
887       left = -!left;
888       break;
889     default:
890       CRASH_NO_CASE;
891     }
892   }
893 
894   // Parse and evaluate binary operators and their second operands, if present.
895   while (*atToken < tokens) {
896     int advance{1};
897     t = token.TokenAt(*atToken).ToString();
898     if (t == "." && *atToken + 2 < tokens &&
899         token.TokenAt(*atToken + 2).ToString() == ".") {
900       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
901       advance = 3;
902     }
903     auto it{opNameMap.find(t)};
904     if (it == opNameMap.end()) {
905       break;
906     }
907     op = it->second;
908     if (op < POWER || precedence[op] < minimumPrecedence) {
909       break;
910     }
911     opAt = *atToken;
912     *atToken += advance;
913 
914     std::int64_t right{
915         ExpressionValue(token, operandPrecedence[op], atToken, error)};
916     if (*error) {
917       return 0;
918     }
919 
920     switch (op) {
921     case POWER:
922       if (left == 0) {
923         if (right < 0) {
924           *error = Message{token.GetTokenProvenanceRange(opAt),
925               "0 ** negative power"_err_en_US};
926         }
927       } else if (left != 1 && right != 1) {
928         if (right <= 0) {
929           left = !right;
930         } else {
931           std::int64_t power{1};
932           for (; right > 0; --right) {
933             if ((power * left) / left != power) {
934               *error = Message{token.GetTokenProvenanceRange(opAt),
935                   "overflow in exponentation"_err_en_US};
936               left = 1;
937             }
938             power *= left;
939           }
940           left = power;
941         }
942       }
943       break;
944     case TIMES:
945       if (left != 0 && right != 0 && ((left * right) / left) != right) {
946         *error = Message{token.GetTokenProvenanceRange(opAt),
947             "overflow in multiplication"_err_en_US};
948       }
949       left = left * right;
950       break;
951     case DIVIDE:
952       if (right == 0) {
953         *error = Message{
954             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
955         left = 0;
956       } else {
957         left = left / right;
958       }
959       break;
960     case MODULUS:
961       if (right == 0) {
962         *error = Message{
963             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
964         left = 0;
965       } else {
966         left = left % right;
967       }
968       break;
969     case ADD:
970       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
971         *error = Message{token.GetTokenProvenanceRange(opAt),
972             "overflow in addition"_err_en_US};
973       }
974       left = left + right;
975       break;
976     case SUBTRACT:
977       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
978         *error = Message{token.GetTokenProvenanceRange(opAt),
979             "overflow in subtraction"_err_en_US};
980       }
981       left = left - right;
982       break;
983     case LEFTSHIFT:
984       if (right < 0 || right > 64) {
985         *error = Message{token.GetTokenProvenanceRange(opAt),
986             "bad left shift count"_err_en_US};
987       }
988       left = right >= 64 ? 0 : left << right;
989       break;
990     case RIGHTSHIFT:
991       if (right < 0 || right > 64) {
992         *error = Message{token.GetTokenProvenanceRange(opAt),
993             "bad right shift count"_err_en_US};
994       }
995       left = right >= 64 ? 0 : left >> right;
996       break;
997     case BITAND:
998     case AND:
999       left = left & right;
1000       break;
1001     case BITXOR:
1002       left = left ^ right;
1003       break;
1004     case BITOR:
1005     case OR:
1006       left = left | right;
1007       break;
1008     case LT:
1009       left = -(left < right);
1010       break;
1011     case LE:
1012       left = -(left <= right);
1013       break;
1014     case EQ:
1015       left = -(left == right);
1016       break;
1017     case NE:
1018       left = -(left != right);
1019       break;
1020     case GE:
1021       left = -(left >= right);
1022       break;
1023     case GT:
1024       left = -(left > right);
1025       break;
1026     case EQV:
1027       left = -(!left == !right);
1028       break;
1029     case NEQV:
1030       left = -(!left != !right);
1031       break;
1032     case SELECT:
1033       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1034         *error = Message{token.GetTokenProvenanceRange(opAt),
1035             "':' required in selection expression"_err_en_US};
1036         return 0;
1037       } else {
1038         ++*atToken;
1039         std::int64_t third{
1040             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1041         left = left != 0 ? right : third;
1042       }
1043       break;
1044     case COMMA:
1045       left = right;
1046       break;
1047     default:
1048       CRASH_NO_CASE;
1049     }
1050   }
1051   return left;
1052 }
1053 
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner & prescanner)1054 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1055     std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1056   TokenSequence expr1{expr, first, exprTokens};
1057   if (expr1.HasBlanks()) {
1058     expr1.RemoveBlanks();
1059   }
1060   TokenSequence expr2;
1061   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1062     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1063       CharBlock name;
1064       if (j + 3 < expr1.SizeInTokens() &&
1065           expr1.TokenAt(j + 1).ToString() == "(" &&
1066           expr1.TokenAt(j + 3).ToString() == ")") {
1067         name = expr1.TokenAt(j + 2);
1068         j += 3;
1069       } else if (j + 1 < expr1.SizeInTokens() &&
1070           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1071         name = expr1.TokenAt(++j);
1072       }
1073       if (!name.empty()) {
1074         char truth{IsNameDefined(name) ? '1' : '0'};
1075         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1076         continue;
1077       }
1078     }
1079     expr2.Put(expr1, j);
1080   }
1081   TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1082   if (expr3.HasBlanks()) {
1083     expr3.RemoveBlanks();
1084   }
1085   if (expr3.empty()) {
1086     prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1087     return false;
1088   }
1089   std::size_t atToken{0};
1090   std::optional<Message> error;
1091   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1092   if (error) {
1093     prescanner.Say(std::move(*error));
1094   } else if (atToken < expr3.SizeInTokens() &&
1095       expr3.TokenAt(atToken).ToString() != "!") {
1096     prescanner.Say(expr3.GetIntervalProvenanceRange(
1097                        atToken, expr3.SizeInTokens() - atToken),
1098         atToken == 0 ? "could not parse any expression"_err_en_US
1099                      : "excess characters after expression"_err_en_US);
1100   }
1101   return result;
1102 }
1103 } // namespace Fortran::parser
1104