1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173           result.pop_back();
174         }
175         CHECK(resultSize > 0 &&
176             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177         result.pop_back();
178         result.Put(Stringify(args[index], prescanner.allSources()));
179       } else {
180         const TokenSequence *arg{&args[index]};
181         std::optional<TokenSequence> replaced;
182         // Don't replace macros in the actual argument if it is preceded or
183         // followed by the token-pasting operator ## in the replacement text.
184         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185           auto next{replacement_.SkipBlanks(j + 1)};
186           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187             // Apply macro replacement to the actual argument
188             replaced =
189                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190             if (replaced) {
191               arg = &*replaced;
192             }
193           }
194         }
195         result.Put(DEREF(arg));
196       }
197     } else if (bytes == 11 && isVariadic_ &&
198         token.ToString() == "__VA_ARGS__") {
199       Provenance commaProvenance{
200           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201               ',')};
202       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203         if (k > argumentCount_) {
204           result.Put(","s, commaProvenance);
205         }
206         result.Put(args[k]);
207       }
208     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210         parenthesesNesting == 0) {
211       parenthesesNesting = 1;
212       skipping = args.size() == argumentCount_;
213       ++j;
214     } else {
215       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216         ++parenthesesNesting;
217       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218         if (--parenthesesNesting == 0) {
219           skipping = false;
220           continue;
221         }
222       }
223       result.Put(replacement_, j);
224     }
225   }
226   return TokenPasting(std::move(result));
227 }
228 
229 static std::string FormatTime(const std::time_t &now, const char *format) {
230   char buffer[16];
231   return {buffer,
232       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234 
235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
236 
237 void Preprocessor::DefineStandardMacros() {
238   // Capture current local date & time once now to avoid having the values
239   // of __DATE__ or __TIME__ change during compilation.
240   std::time_t now;
241   std::time(&now);
242   Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
243   Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
244   // The values of these predefined macros depend on their invocation sites.
245   Define("__FILE__"s, "__FILE__"s);
246   Define("__LINE__"s, "__LINE__"s);
247 }
248 
249 void Preprocessor::Define(std::string macro, std::string value) {
250   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
251 }
252 
253 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
254 
255 std::optional<TokenSequence> Preprocessor::MacroReplacement(
256     const TokenSequence &input, Prescanner &prescanner) {
257   // Do quick scan for any use of a defined name.
258   if (definitions_.empty()) {
259     return std::nullopt;
260   }
261   std::size_t tokens{input.SizeInTokens()};
262   std::size_t j;
263   for (j = 0; j < tokens; ++j) {
264     CharBlock token{input.TokenAt(j)};
265     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
266         IsNameDefined(token)) {
267       break;
268     }
269   }
270   if (j == tokens) {
271     return std::nullopt; // input contains nothing that would be replaced
272   }
273   TokenSequence result{input, 0, j};
274   for (; j < tokens; ++j) {
275     const CharBlock &token{input.TokenAt(j)};
276     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
277       result.Put(input, j);
278       continue;
279     }
280     auto it{definitions_.find(token)};
281     if (it == definitions_.end()) {
282       result.Put(input, j);
283       continue;
284     }
285     Definition &def{it->second};
286     if (def.isDisabled()) {
287       result.Put(input, j);
288       continue;
289     }
290     if (!def.isFunctionLike()) {
291       if (def.isPredefined()) {
292         std::string name{def.replacement().TokenAt(0).ToString()};
293         std::string repl;
294         if (name == "__FILE__") {
295           repl = "\""s +
296               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
297         } else if (name == "__LINE__") {
298           std::string buf;
299           llvm::raw_string_ostream ss{buf};
300           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
301           repl = ss.str();
302         }
303         if (!repl.empty()) {
304           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
305           ProvenanceRange call{allSources_.AddMacroCall(
306               insert, input.GetTokenProvenanceRange(j), repl)};
307           result.Put(repl, call.start());
308           continue;
309         }
310       }
311       def.set_isDisabled(true);
312       TokenSequence replaced{
313           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
314       def.set_isDisabled(false);
315       if (!replaced.empty()) {
316         ProvenanceRange from{def.replacement().GetProvenanceRange()};
317         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
318         ProvenanceRange newRange{
319             allSources_.AddMacroCall(from, use, replaced.ToString())};
320         result.Put(replaced, newRange);
321       }
322       continue;
323     }
324     // Possible function-like macro call.  Skip spaces and newlines to see
325     // whether '(' is next.
326     std::size_t k{j};
327     bool leftParen{false};
328     while (++k < tokens) {
329       const CharBlock &lookAhead{input.TokenAt(k)};
330       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
331         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
332         break;
333       }
334     }
335     if (!leftParen) {
336       result.Put(input, j);
337       continue;
338     }
339     std::vector<std::size_t> argStart{++k};
340     for (int nesting{0}; k < tokens; ++k) {
341       CharBlock token{input.TokenAt(k)};
342       if (token.size() == 1) {
343         char ch{token[0]};
344         if (ch == '(') {
345           ++nesting;
346         } else if (ch == ')') {
347           if (nesting == 0) {
348             break;
349           }
350           --nesting;
351         } else if (ch == ',' && nesting == 0) {
352           argStart.push_back(k + 1);
353         }
354       }
355     }
356     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
357       // Subtle: () is zero arguments, not one empty argument,
358       // unless one argument was expected.
359       argStart.clear();
360     }
361     if (k >= tokens || argStart.size() < def.argumentCount() ||
362         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
363       result.Put(input, j);
364       continue;
365     }
366     std::vector<TokenSequence> args;
367     for (std::size_t n{0}; n < argStart.size(); ++n) {
368       std::size_t at{argStart[n]};
369       std::size_t count{
370           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
371       args.emplace_back(TokenSequence(input, at, count));
372     }
373     def.set_isDisabled(true);
374     TokenSequence replaced{
375         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
376     def.set_isDisabled(false);
377     if (!replaced.empty()) {
378       ProvenanceRange from{def.replacement().GetProvenanceRange()};
379       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
380       ProvenanceRange newRange{
381           allSources_.AddMacroCall(from, use, replaced.ToString())};
382       result.Put(replaced, newRange);
383     }
384     j = k; // advance to the terminal ')'
385   }
386   return result;
387 }
388 
389 TokenSequence Preprocessor::ReplaceMacros(
390     const TokenSequence &tokens, Prescanner &prescanner) {
391   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
392     return std::move(*repl);
393   }
394   return tokens;
395 }
396 
397 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
398   std::size_t tokens{dir.SizeInTokens()};
399   std::size_t j{dir.SkipBlanks(0)};
400   if (j == tokens) {
401     return;
402   }
403   CHECK(prescanner); // TODO: change to reference
404   if (dir.TokenAt(j).ToString() != "#") {
405     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
406     return;
407   }
408   j = dir.SkipBlanks(j + 1);
409   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
410     --tokens;
411   }
412   if (j == tokens) {
413     return;
414   }
415   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
416     return; // treat like #line, ignore it
417   }
418   std::size_t dirOffset{j};
419   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
420   j = dir.SkipBlanks(j + 1);
421   CharBlock nameToken;
422   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
423     nameToken = dir.TokenAt(j);
424   }
425   if (dirName == "line") {
426     // #line is ignored
427   } else if (dirName == "define") {
428     if (nameToken.empty()) {
429       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
430           "#define: missing or invalid name"_err_en_US);
431       return;
432     }
433     nameToken = SaveTokenAsName(nameToken);
434     definitions_.erase(nameToken);
435     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
436         dir.TokenAt(j)[0] == '(') {
437       j = dir.SkipBlanks(j + 1);
438       std::vector<std::string> argName;
439       bool isVariadic{false};
440       if (dir.TokenAt(j).ToString() != ")") {
441         while (true) {
442           std::string an{dir.TokenAt(j).ToString()};
443           if (an == "...") {
444             isVariadic = true;
445           } else {
446             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
447               prescanner->Say(dir.GetTokenProvenanceRange(j),
448                   "#define: missing or invalid argument name"_err_en_US);
449               return;
450             }
451             argName.push_back(an);
452           }
453           j = dir.SkipBlanks(j + 1);
454           if (j == tokens) {
455             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
456                 "#define: malformed argument list"_err_en_US);
457             return;
458           }
459           std::string punc{dir.TokenAt(j).ToString()};
460           if (punc == ")") {
461             break;
462           }
463           if (isVariadic || punc != ",") {
464             prescanner->Say(dir.GetTokenProvenanceRange(j),
465                 "#define: malformed argument list"_err_en_US);
466             return;
467           }
468           j = dir.SkipBlanks(j + 1);
469           if (j == tokens) {
470             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
471                 "#define: malformed argument list"_err_en_US);
472             return;
473           }
474         }
475         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
476             argName.size()) {
477           prescanner->Say(dir.GetTokenProvenance(dirOffset),
478               "#define: argument names are not distinct"_err_en_US);
479           return;
480         }
481       }
482       j = dir.SkipBlanks(j + 1);
483       definitions_.emplace(std::make_pair(
484           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
485     } else {
486       j = dir.SkipBlanks(j + 1);
487       definitions_.emplace(
488           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
489     }
490   } else if (dirName == "undef") {
491     if (nameToken.empty()) {
492       prescanner->Say(
493           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
494           "# missing or invalid name"_err_en_US);
495     } else {
496       if (dir.IsAnythingLeft(++j)) {
497         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
498             "#undef: excess tokens at end of directive"_en_US);
499       } else {
500         definitions_.erase(nameToken);
501       }
502     }
503   } else if (dirName == "ifdef" || dirName == "ifndef") {
504     bool doThen{false};
505     if (nameToken.empty()) {
506       prescanner->Say(
507           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
508           "#%s: missing name"_err_en_US, dirName);
509     } else {
510       if (dir.IsAnythingLeft(++j)) {
511         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
512             "#%s: excess tokens at end of directive"_en_US, dirName);
513       }
514       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
515     }
516     if (doThen) {
517       ifStack_.push(CanDeadElseAppear::Yes);
518     } else {
519       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
520           dir.GetTokenProvenance(dirOffset));
521     }
522   } else if (dirName == "if") {
523     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
524       ifStack_.push(CanDeadElseAppear::Yes);
525     } else {
526       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
527           dir.GetTokenProvenanceRange(dirOffset));
528     }
529   } else if (dirName == "else") {
530     if (dir.IsAnythingLeft(j)) {
531       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
532           "#else: excess tokens at end of directive"_en_US);
533     } else if (ifStack_.empty()) {
534       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
535           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
536     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
537       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
538           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
539     } else {
540       ifStack_.pop();
541       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
542           dir.GetTokenProvenanceRange(dirOffset));
543     }
544   } else if (dirName == "elif") {
545     if (ifStack_.empty()) {
546       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
547           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
548     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
549       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
550           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
551     } else {
552       ifStack_.pop();
553       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
554           dir.GetTokenProvenanceRange(dirOffset));
555     }
556   } else if (dirName == "endif") {
557     if (dir.IsAnythingLeft(j)) {
558       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
559           "#endif: excess tokens at end of directive"_en_US);
560     } else if (ifStack_.empty()) {
561       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
562           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
563     } else {
564       ifStack_.pop();
565     }
566   } else if (dirName == "error") {
567     prescanner->Say(
568         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
569         "%s"_err_en_US, dir.ToString());
570   } else if (dirName == "warning" || dirName == "comment" ||
571       dirName == "note") {
572     prescanner->Say(
573         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
574         "%s"_en_US, dir.ToString());
575   } else if (dirName == "include") {
576     if (j == tokens) {
577       prescanner->Say(
578           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
579           "#include: missing name of file to include"_err_en_US);
580       return;
581     }
582     std::string include;
583     std::optional<std::string> prependPath;
584     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
585       std::size_t k{j + 1};
586       if (k >= tokens) {
587         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
588             "#include: file name missing"_err_en_US);
589         return;
590       }
591       while (k < tokens && dir.TokenAt(k) != ">") {
592         ++k;
593       }
594       if (k >= tokens) {
595         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
596             "#include: expected '>' at end of included file"_en_US);
597       }
598       TokenSequence braced{dir, j + 1, k - j - 1};
599       include = ReplaceMacros(braced, *prescanner).ToString();
600       j = k;
601     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
602         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
603       include = include.substr(1, include.size() - 2);
604       // #include "foo" starts search in directory of file containing
605       // the directive
606       auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
607       if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
608         prependPath = DirectoryName(currentFile->path());
609       }
610     } else {
611       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
612           "#include: expected name of file to include"_err_en_US);
613       return;
614     }
615     if (include.empty()) {
616       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
617           "#include: empty include file name"_err_en_US);
618       return;
619     }
620     j = dir.SkipBlanks(j + 1);
621     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
622       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
623           "#include: extra stuff ignored after file name"_en_US);
624     }
625     std::string buf;
626     llvm::raw_string_ostream error{buf};
627     const SourceFile *included{
628         allSources_.Open(include, error, std::move(prependPath))};
629     if (!included) {
630       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
631           "#include: %s"_err_en_US, error.str());
632     } else if (included->bytes() > 0) {
633       ProvenanceRange fileRange{
634           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
635       Prescanner{*prescanner}
636           .set_encoding(included->encoding())
637           .Prescan(fileRange);
638     }
639   } else {
640     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
641         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
642   }
643 }
644 
645 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
646   names_.push_back(t.ToString());
647   return {names_.back().data(), names_.back().size()};
648 }
649 
650 bool Preprocessor::IsNameDefined(const CharBlock &token) {
651   return definitions_.find(token) != definitions_.end();
652 }
653 
654 static std::string GetDirectiveName(
655     const TokenSequence &line, std::size_t *rest) {
656   std::size_t tokens{line.SizeInTokens()};
657   std::size_t j{line.SkipBlanks(0)};
658   if (j == tokens || line.TokenAt(j).ToString() != "#") {
659     *rest = tokens;
660     return "";
661   }
662   j = line.SkipBlanks(j + 1);
663   if (j == tokens) {
664     *rest = tokens;
665     return "";
666   }
667   *rest = line.SkipBlanks(j + 1);
668   return ToLowerCaseLetters(line.TokenAt(j).ToString());
669 }
670 
671 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
672     IsElseActive isElseActive, Prescanner *prescanner,
673     ProvenanceRange provenanceRange) {
674   int nesting{0};
675   while (!prescanner->IsAtEnd()) {
676     if (!prescanner->IsNextLinePreprocessorDirective()) {
677       prescanner->NextLine();
678       continue;
679     }
680     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
681     std::size_t rest{0};
682     std::string dn{GetDirectiveName(line, &rest)};
683     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
684       ++nesting;
685     } else if (dn == "endif") {
686       if (nesting-- == 0) {
687         return;
688       }
689     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
690       if (dn == "else") {
691         ifStack_.push(CanDeadElseAppear::No);
692         return;
693       }
694       if (dn == "elif" &&
695           IsIfPredicateTrue(
696               line, rest, line.SizeInTokens() - rest, prescanner)) {
697         ifStack_.push(CanDeadElseAppear::Yes);
698         return;
699       }
700     }
701   }
702   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
703 }
704 
705 // Precedence level codes used here to accommodate mixed Fortran and C:
706 // 15: parentheses and constants, logical !, bitwise ~
707 // 14: unary + and -
708 // 13: **
709 // 12: *, /, % (modulus)
710 // 11: + and -
711 // 10: << and >>
712 //  9: bitwise &
713 //  8: bitwise ^
714 //  7: bitwise |
715 //  6: relations (.EQ., ==, &c.)
716 //  5: .NOT.
717 //  4: .AND., &&
718 //  3: .OR., ||
719 //  2: .EQV. and .NEQV. / .XOR.
720 //  1: ? :
721 //  0: ,
722 static std::int64_t ExpressionValue(const TokenSequence &token,
723     int minimumPrecedence, std::size_t *atToken,
724     std::optional<Message> *error) {
725   enum Operator {
726     PARENS,
727     CONST,
728     NOTZERO, // !
729     COMPLEMENT, // ~
730     UPLUS,
731     UMINUS,
732     POWER,
733     TIMES,
734     DIVIDE,
735     MODULUS,
736     ADD,
737     SUBTRACT,
738     LEFTSHIFT,
739     RIGHTSHIFT,
740     BITAND,
741     BITXOR,
742     BITOR,
743     LT,
744     LE,
745     EQ,
746     NE,
747     GE,
748     GT,
749     NOT,
750     AND,
751     OR,
752     EQV,
753     NEQV,
754     SELECT,
755     COMMA
756   };
757   static const int precedence[]{
758       15, 15, 15, 15, // (), 6, !, ~
759       14, 14, // unary +, -
760       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
761       9, 8, 7, // &, ^, |
762       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
763       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
764       1, 0 // ?: and ,
765   };
766   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
767       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
768 
769   static std::map<std::string, enum Operator> opNameMap;
770   if (opNameMap.empty()) {
771     opNameMap["("] = PARENS;
772     opNameMap["!"] = NOTZERO;
773     opNameMap["~"] = COMPLEMENT;
774     opNameMap["**"] = POWER;
775     opNameMap["*"] = TIMES;
776     opNameMap["/"] = DIVIDE;
777     opNameMap["%"] = MODULUS;
778     opNameMap["+"] = ADD;
779     opNameMap["-"] = SUBTRACT;
780     opNameMap["<<"] = LEFTSHIFT;
781     opNameMap[">>"] = RIGHTSHIFT;
782     opNameMap["&"] = BITAND;
783     opNameMap["^"] = BITXOR;
784     opNameMap["|"] = BITOR;
785     opNameMap[".lt."] = opNameMap["<"] = LT;
786     opNameMap[".le."] = opNameMap["<="] = LE;
787     opNameMap[".eq."] = opNameMap["=="] = EQ;
788     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
789     opNameMap[".ge."] = opNameMap[">="] = GE;
790     opNameMap[".gt."] = opNameMap[">"] = GT;
791     opNameMap[".not."] = NOT;
792     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
793     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
794     opNameMap[".eqv."] = EQV;
795     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
796     opNameMap["?"] = SELECT;
797     opNameMap[","] = COMMA;
798   }
799 
800   std::size_t tokens{token.SizeInTokens()};
801   CHECK(tokens > 0);
802   if (*atToken >= tokens) {
803     *error =
804         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
805     return 0;
806   }
807 
808   // Parse and evaluate a primary or a unary operator and its operand.
809   std::size_t opAt{*atToken};
810   std::string t{token.TokenAt(opAt).ToString()};
811   enum Operator op;
812   std::int64_t left{0};
813   if (t == "(") {
814     op = PARENS;
815   } else if (IsDecimalDigit(t[0])) {
816     op = CONST;
817     std::size_t consumed{0};
818     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
819     if (consumed < t.size()) {
820       *error = Message{token.GetTokenProvenanceRange(opAt),
821           "Uninterpretable numeric constant '%s'"_err_en_US, t};
822       return 0;
823     }
824   } else if (IsLegalIdentifierStart(t[0])) {
825     // undefined macro name -> zero
826     // TODO: BOZ constants?
827     op = CONST;
828   } else if (t == "+") {
829     op = UPLUS;
830   } else if (t == "-") {
831     op = UMINUS;
832   } else if (t == "." && *atToken + 2 < tokens &&
833       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
834       token.TokenAt(*atToken + 2).ToString() == ".") {
835     op = NOT;
836     *atToken += 2;
837   } else {
838     auto it{opNameMap.find(t)};
839     if (it != opNameMap.end()) {
840       op = it->second;
841     } else {
842       *error = Message{token.GetTokenProvenanceRange(opAt),
843           "operand expected in expression"_err_en_US};
844       return 0;
845     }
846   }
847   if (precedence[op] < minimumPrecedence) {
848     *error = Message{token.GetTokenProvenanceRange(opAt),
849         "operator precedence error"_err_en_US};
850     return 0;
851   }
852   ++*atToken;
853   if (op != CONST) {
854     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
855     if (*error) {
856       return 0;
857     }
858     switch (op) {
859     case PARENS:
860       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
861         ++*atToken;
862         break;
863       }
864       if (*atToken >= tokens) {
865         *error = Message{token.GetProvenanceRange(),
866             "')' missing from expression"_err_en_US};
867       } else {
868         *error = Message{
869             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
870       }
871       return 0;
872     case NOTZERO:
873       left = !left;
874       break;
875     case COMPLEMENT:
876       left = ~left;
877       break;
878     case UPLUS:
879       break;
880     case UMINUS:
881       left = -left;
882       break;
883     case NOT:
884       left = -!left;
885       break;
886     default:
887       CRASH_NO_CASE;
888     }
889   }
890 
891   // Parse and evaluate binary operators and their second operands, if present.
892   while (*atToken < tokens) {
893     int advance{1};
894     t = token.TokenAt(*atToken).ToString();
895     if (t == "." && *atToken + 2 < tokens &&
896         token.TokenAt(*atToken + 2).ToString() == ".") {
897       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
898       advance = 3;
899     }
900     auto it{opNameMap.find(t)};
901     if (it == opNameMap.end()) {
902       break;
903     }
904     op = it->second;
905     if (op < POWER || precedence[op] < minimumPrecedence) {
906       break;
907     }
908     opAt = *atToken;
909     *atToken += advance;
910 
911     std::int64_t right{
912         ExpressionValue(token, operandPrecedence[op], atToken, error)};
913     if (*error) {
914       return 0;
915     }
916 
917     switch (op) {
918     case POWER:
919       if (left == 0) {
920         if (right < 0) {
921           *error = Message{token.GetTokenProvenanceRange(opAt),
922               "0 ** negative power"_err_en_US};
923         }
924       } else if (left != 1 && right != 1) {
925         if (right <= 0) {
926           left = !right;
927         } else {
928           std::int64_t power{1};
929           for (; right > 0; --right) {
930             if ((power * left) / left != power) {
931               *error = Message{token.GetTokenProvenanceRange(opAt),
932                   "overflow in exponentation"_err_en_US};
933               left = 1;
934             }
935             power *= left;
936           }
937           left = power;
938         }
939       }
940       break;
941     case TIMES:
942       if (left != 0 && right != 0 && ((left * right) / left) != right) {
943         *error = Message{token.GetTokenProvenanceRange(opAt),
944             "overflow in multiplication"_err_en_US};
945       }
946       left = left * right;
947       break;
948     case DIVIDE:
949       if (right == 0) {
950         *error = Message{
951             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
952         left = 0;
953       } else {
954         left = left / right;
955       }
956       break;
957     case MODULUS:
958       if (right == 0) {
959         *error = Message{
960             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
961         left = 0;
962       } else {
963         left = left % right;
964       }
965       break;
966     case ADD:
967       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
968         *error = Message{token.GetTokenProvenanceRange(opAt),
969             "overflow in addition"_err_en_US};
970       }
971       left = left + right;
972       break;
973     case SUBTRACT:
974       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
975         *error = Message{token.GetTokenProvenanceRange(opAt),
976             "overflow in subtraction"_err_en_US};
977       }
978       left = left - right;
979       break;
980     case LEFTSHIFT:
981       if (right < 0 || right > 64) {
982         *error = Message{token.GetTokenProvenanceRange(opAt),
983             "bad left shift count"_err_en_US};
984       }
985       left = right >= 64 ? 0 : left << right;
986       break;
987     case RIGHTSHIFT:
988       if (right < 0 || right > 64) {
989         *error = Message{token.GetTokenProvenanceRange(opAt),
990             "bad right shift count"_err_en_US};
991       }
992       left = right >= 64 ? 0 : left >> right;
993       break;
994     case BITAND:
995     case AND:
996       left = left & right;
997       break;
998     case BITXOR:
999       left = left ^ right;
1000       break;
1001     case BITOR:
1002     case OR:
1003       left = left | right;
1004       break;
1005     case LT:
1006       left = -(left < right);
1007       break;
1008     case LE:
1009       left = -(left <= right);
1010       break;
1011     case EQ:
1012       left = -(left == right);
1013       break;
1014     case NE:
1015       left = -(left != right);
1016       break;
1017     case GE:
1018       left = -(left >= right);
1019       break;
1020     case GT:
1021       left = -(left > right);
1022       break;
1023     case EQV:
1024       left = -(!left == !right);
1025       break;
1026     case NEQV:
1027       left = -(!left != !right);
1028       break;
1029     case SELECT:
1030       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1031         *error = Message{token.GetTokenProvenanceRange(opAt),
1032             "':' required in selection expression"_err_en_US};
1033         return 0;
1034       } else {
1035         ++*atToken;
1036         std::int64_t third{
1037             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1038         left = left != 0 ? right : third;
1039       }
1040       break;
1041     case COMMA:
1042       left = right;
1043       break;
1044     default:
1045       CRASH_NO_CASE;
1046     }
1047   }
1048   return left;
1049 }
1050 
1051 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1052     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1053   TokenSequence expr1{expr, first, exprTokens};
1054   if (expr1.HasBlanks()) {
1055     expr1.RemoveBlanks();
1056   }
1057   TokenSequence expr2;
1058   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1059     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1060       CharBlock name;
1061       if (j + 3 < expr1.SizeInTokens() &&
1062           expr1.TokenAt(j + 1).ToString() == "(" &&
1063           expr1.TokenAt(j + 3).ToString() == ")") {
1064         name = expr1.TokenAt(j + 2);
1065         j += 3;
1066       } else if (j + 1 < expr1.SizeInTokens() &&
1067           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1068         name = expr1.TokenAt(++j);
1069       }
1070       if (!name.empty()) {
1071         char truth{IsNameDefined(name) ? '1' : '0'};
1072         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1073         continue;
1074       }
1075     }
1076     expr2.Put(expr1, j);
1077   }
1078   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1079   if (expr3.HasBlanks()) {
1080     expr3.RemoveBlanks();
1081   }
1082   if (expr3.empty()) {
1083     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1084     return false;
1085   }
1086   std::size_t atToken{0};
1087   std::optional<Message> error;
1088   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1089   if (error) {
1090     prescanner->Say(std::move(*error));
1091   } else if (atToken < expr3.SizeInTokens() &&
1092       expr3.TokenAt(atToken).ToString() != "!") {
1093     prescanner->Say(expr3.GetIntervalProvenanceRange(
1094                         atToken, expr3.SizeInTokens() - atToken),
1095         atToken == 0 ? "could not parse any expression"_err_en_US
1096                      : "excess characters after expression"_err_en_US);
1097   }
1098   return result;
1099 }
1100 } // namespace Fortran::parser
1101