1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) {
173           result.pop_back();
174           --resultSize;
175         }
176         CHECK(resultSize > 0 &&
177             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
178         result.pop_back();
179         result.Put(Stringify(args[index], prescanner.allSources()));
180       } else {
181         const TokenSequence *arg{&args[index]};
182         std::optional<TokenSequence> replaced;
183         // Don't replace macros in the actual argument if it is preceded or
184         // followed by the token-pasting operator ## in the replacement text.
185         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
186           auto next{replacement_.SkipBlanks(j + 1)};
187           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
188             // Apply macro replacement to the actual argument
189             replaced =
190                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
191             if (replaced) {
192               arg = &*replaced;
193             }
194           }
195         }
196         result.Put(DEREF(arg));
197       }
198     } else if (bytes == 11 && isVariadic_ &&
199         token.ToString() == "__VA_ARGS__") {
200       Provenance commaProvenance{
201           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
202               ',')};
203       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
204         if (k > argumentCount_) {
205           result.Put(","s, commaProvenance);
206         }
207         result.Put(args[k]);
208       }
209     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
210         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
211         parenthesesNesting == 0) {
212       parenthesesNesting = 1;
213       skipping = args.size() == argumentCount_;
214       ++j;
215     } else {
216       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
217         ++parenthesesNesting;
218       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
219         if (--parenthesesNesting == 0) {
220           skipping = false;
221           continue;
222         }
223       }
224       result.Put(replacement_, j);
225     }
226   }
227   return TokenPasting(std::move(result));
228 }
229 
230 static std::string FormatTime(const std::time_t &now, const char *format) {
231   char buffer[16];
232   return {buffer,
233       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
234 }
235 
236 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
237 
238 void Preprocessor::DefineStandardMacros() {
239   // Capture current local date & time once now to avoid having the values
240   // of __DATE__ or __TIME__ change during compilation.
241   std::time_t now;
242   std::time(&now);
243   Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
244   Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
245   // The values of these predefined macros depend on their invocation sites.
246   Define("__FILE__"s, "__FILE__"s);
247   Define("__LINE__"s, "__LINE__"s);
248 }
249 
250 void Preprocessor::Define(std::string macro, std::string value) {
251   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
252 }
253 
254 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
255 
256 std::optional<TokenSequence> Preprocessor::MacroReplacement(
257     const TokenSequence &input, Prescanner &prescanner) {
258   // Do quick scan for any use of a defined name.
259   if (definitions_.empty()) {
260     return std::nullopt;
261   }
262   std::size_t tokens{input.SizeInTokens()};
263   std::size_t j;
264   for (j = 0; j < tokens; ++j) {
265     CharBlock token{input.TokenAt(j)};
266     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
267         IsNameDefined(token)) {
268       break;
269     }
270   }
271   if (j == tokens) {
272     return std::nullopt; // input contains nothing that would be replaced
273   }
274   TokenSequence result{input, 0, j};
275   for (; j < tokens; ++j) {
276     const CharBlock &token{input.TokenAt(j)};
277     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
278       result.Put(input, j);
279       continue;
280     }
281     auto it{definitions_.find(token)};
282     if (it == definitions_.end()) {
283       result.Put(input, j);
284       continue;
285     }
286     Definition &def{it->second};
287     if (def.isDisabled()) {
288       result.Put(input, j);
289       continue;
290     }
291     if (!def.isFunctionLike()) {
292       if (def.isPredefined()) {
293         std::string name{def.replacement().TokenAt(0).ToString()};
294         std::string repl;
295         if (name == "__FILE__") {
296           repl = "\""s +
297               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
298         } else if (name == "__LINE__") {
299           std::string buf;
300           llvm::raw_string_ostream ss{buf};
301           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
302           repl = ss.str();
303         }
304         if (!repl.empty()) {
305           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
306           ProvenanceRange call{allSources_.AddMacroCall(
307               insert, input.GetTokenProvenanceRange(j), repl)};
308           result.Put(repl, call.start());
309           continue;
310         }
311       }
312       def.set_isDisabled(true);
313       TokenSequence replaced{
314           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
315       def.set_isDisabled(false);
316       if (!replaced.empty()) {
317         ProvenanceRange from{def.replacement().GetProvenanceRange()};
318         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
319         ProvenanceRange newRange{
320             allSources_.AddMacroCall(from, use, replaced.ToString())};
321         result.Put(replaced, newRange);
322       }
323       continue;
324     }
325     // Possible function-like macro call.  Skip spaces and newlines to see
326     // whether '(' is next.
327     std::size_t k{j};
328     bool leftParen{false};
329     while (++k < tokens) {
330       const CharBlock &lookAhead{input.TokenAt(k)};
331       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
332         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
333         break;
334       }
335     }
336     if (!leftParen) {
337       result.Put(input, j);
338       continue;
339     }
340     std::vector<std::size_t> argStart{++k};
341     for (int nesting{0}; k < tokens; ++k) {
342       CharBlock token{input.TokenAt(k)};
343       if (token.size() == 1) {
344         char ch{token[0]};
345         if (ch == '(') {
346           ++nesting;
347         } else if (ch == ')') {
348           if (nesting == 0) {
349             break;
350           }
351           --nesting;
352         } else if (ch == ',' && nesting == 0) {
353           argStart.push_back(k + 1);
354         }
355       }
356     }
357     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
358       // Subtle: () is zero arguments, not one empty argument,
359       // unless one argument was expected.
360       argStart.clear();
361     }
362     if (k >= tokens || argStart.size() < def.argumentCount() ||
363         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
364       result.Put(input, j);
365       continue;
366     }
367     std::vector<TokenSequence> args;
368     for (std::size_t n{0}; n < argStart.size(); ++n) {
369       std::size_t at{argStart[n]};
370       std::size_t count{
371           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
372       args.emplace_back(TokenSequence(input, at, count));
373     }
374     def.set_isDisabled(true);
375     TokenSequence replaced{
376         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
377     def.set_isDisabled(false);
378     if (!replaced.empty()) {
379       ProvenanceRange from{def.replacement().GetProvenanceRange()};
380       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
381       ProvenanceRange newRange{
382           allSources_.AddMacroCall(from, use, replaced.ToString())};
383       result.Put(replaced, newRange);
384     }
385     j = k; // advance to the terminal ')'
386   }
387   return result;
388 }
389 
390 TokenSequence Preprocessor::ReplaceMacros(
391     const TokenSequence &tokens, Prescanner &prescanner) {
392   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
393     return std::move(*repl);
394   }
395   return tokens;
396 }
397 
398 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
399   std::size_t tokens{dir.SizeInTokens()};
400   std::size_t j{dir.SkipBlanks(0)};
401   if (j == tokens) {
402     return;
403   }
404   if (dir.TokenAt(j).ToString() != "#") {
405     prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
406     return;
407   }
408   j = dir.SkipBlanks(j + 1);
409   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
410     --tokens;
411   }
412   if (j == tokens) {
413     return;
414   }
415   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
416     return; // treat like #line, ignore it
417   }
418   std::size_t dirOffset{j};
419   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
420   j = dir.SkipBlanks(j + 1);
421   CharBlock nameToken;
422   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
423     nameToken = dir.TokenAt(j);
424   }
425   if (dirName == "line") {
426     // #line is ignored
427   } else if (dirName == "define") {
428     if (nameToken.empty()) {
429       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
430           "#define: missing or invalid name"_err_en_US);
431       return;
432     }
433     nameToken = SaveTokenAsName(nameToken);
434     definitions_.erase(nameToken);
435     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
436         dir.TokenAt(j)[0] == '(') {
437       j = dir.SkipBlanks(j + 1);
438       std::vector<std::string> argName;
439       bool isVariadic{false};
440       if (dir.TokenAt(j).ToString() != ")") {
441         while (true) {
442           std::string an{dir.TokenAt(j).ToString()};
443           if (an == "...") {
444             isVariadic = true;
445           } else {
446             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
447               prescanner.Say(dir.GetTokenProvenanceRange(j),
448                   "#define: missing or invalid argument name"_err_en_US);
449               return;
450             }
451             argName.push_back(an);
452           }
453           j = dir.SkipBlanks(j + 1);
454           if (j == tokens) {
455             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
456                 "#define: malformed argument list"_err_en_US);
457             return;
458           }
459           std::string punc{dir.TokenAt(j).ToString()};
460           if (punc == ")") {
461             break;
462           }
463           if (isVariadic || punc != ",") {
464             prescanner.Say(dir.GetTokenProvenanceRange(j),
465                 "#define: malformed argument list"_err_en_US);
466             return;
467           }
468           j = dir.SkipBlanks(j + 1);
469           if (j == tokens) {
470             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
471                 "#define: malformed argument list"_err_en_US);
472             return;
473           }
474         }
475         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
476             argName.size()) {
477           prescanner.Say(dir.GetTokenProvenance(dirOffset),
478               "#define: argument names are not distinct"_err_en_US);
479           return;
480         }
481       }
482       j = dir.SkipBlanks(j + 1);
483       definitions_.emplace(std::make_pair(
484           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
485     } else {
486       j = dir.SkipBlanks(j + 1);
487       definitions_.emplace(
488           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
489     }
490   } else if (dirName == "undef") {
491     if (nameToken.empty()) {
492       prescanner.Say(
493           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
494           "# missing or invalid name"_err_en_US);
495     } else {
496       if (dir.IsAnythingLeft(++j)) {
497         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
498             "#undef: excess tokens at end of directive"_en_US);
499       } else {
500         definitions_.erase(nameToken);
501       }
502     }
503   } else if (dirName == "ifdef" || dirName == "ifndef") {
504     bool doThen{false};
505     if (nameToken.empty()) {
506       prescanner.Say(
507           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
508           "#%s: missing name"_err_en_US, dirName);
509     } else {
510       if (dir.IsAnythingLeft(++j)) {
511         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
512             "#%s: excess tokens at end of directive"_en_US, dirName);
513       }
514       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
515     }
516     if (doThen) {
517       ifStack_.push(CanDeadElseAppear::Yes);
518     } else {
519       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
520           dir.GetTokenProvenance(dirOffset));
521     }
522   } else if (dirName == "if") {
523     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
524       ifStack_.push(CanDeadElseAppear::Yes);
525     } else {
526       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
527           dir.GetTokenProvenanceRange(dirOffset));
528     }
529   } else if (dirName == "else") {
530     if (dir.IsAnythingLeft(j)) {
531       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
532           "#else: excess tokens at end of directive"_en_US);
533     } else if (ifStack_.empty()) {
534       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
535           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
536     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
537       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
538           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
539     } else {
540       ifStack_.pop();
541       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
542           dir.GetTokenProvenanceRange(dirOffset));
543     }
544   } else if (dirName == "elif") {
545     if (ifStack_.empty()) {
546       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
547           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
548     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
549       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
550           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
551     } else {
552       ifStack_.pop();
553       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
554           dir.GetTokenProvenanceRange(dirOffset));
555     }
556   } else if (dirName == "endif") {
557     if (dir.IsAnythingLeft(j)) {
558       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
559           "#endif: excess tokens at end of directive"_en_US);
560     } else if (ifStack_.empty()) {
561       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
562           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
563     } else {
564       ifStack_.pop();
565     }
566   } else if (dirName == "error") {
567     prescanner.Say(
568         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
569         "%s"_err_en_US, dir.ToString());
570   } else if (dirName == "warning" || dirName == "comment" ||
571       dirName == "note") {
572     prescanner.Say(
573         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
574         "%s"_en_US, dir.ToString());
575   } else if (dirName == "include") {
576     if (j == tokens) {
577       prescanner.Say(
578           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
579           "#include: missing name of file to include"_err_en_US);
580       return;
581     }
582     std::string include;
583     std::optional<std::string> prependPath;
584     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
585       std::size_t k{j + 1};
586       if (k >= tokens) {
587         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
588             "#include: file name missing"_err_en_US);
589         return;
590       }
591       while (k < tokens && dir.TokenAt(k) != ">") {
592         ++k;
593       }
594       if (k >= tokens) {
595         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
596             "#include: expected '>' at end of included file"_en_US);
597       }
598       TokenSequence braced{dir, j + 1, k - j - 1};
599       include = ReplaceMacros(braced, prescanner).ToString();
600       j = k;
601     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
602         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
603       include = include.substr(1, include.size() - 2);
604       // #include "foo" starts search in directory of file containing
605       // the directive
606       auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
607       if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
608         prependPath = DirectoryName(currentFile->path());
609       }
610     } else {
611       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
612           "#include: expected name of file to include"_err_en_US);
613       return;
614     }
615     if (include.empty()) {
616       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
617           "#include: empty include file name"_err_en_US);
618       return;
619     }
620     j = dir.SkipBlanks(j + 1);
621     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
622       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
623           "#include: extra stuff ignored after file name"_en_US);
624     }
625     std::string buf;
626     llvm::raw_string_ostream error{buf};
627     const SourceFile *included{
628         allSources_.Open(include, error, std::move(prependPath))};
629     if (!included) {
630       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
631           "#include: %s"_err_en_US, error.str());
632     } else if (included->bytes() > 0) {
633       ProvenanceRange fileRange{
634           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
635       Prescanner{prescanner}
636           .set_encoding(included->encoding())
637           .Prescan(fileRange);
638     }
639   } else {
640     prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
641         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
642   }
643 }
644 
645 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
646   names_.push_back(t.ToString());
647   return {names_.back().data(), names_.back().size()};
648 }
649 
650 bool Preprocessor::IsNameDefined(const CharBlock &token) {
651   return definitions_.find(token) != definitions_.end();
652 }
653 
654 static std::string GetDirectiveName(
655     const TokenSequence &line, std::size_t *rest) {
656   std::size_t tokens{line.SizeInTokens()};
657   std::size_t j{line.SkipBlanks(0)};
658   if (j == tokens || line.TokenAt(j).ToString() != "#") {
659     *rest = tokens;
660     return "";
661   }
662   j = line.SkipBlanks(j + 1);
663   if (j == tokens) {
664     *rest = tokens;
665     return "";
666   }
667   *rest = line.SkipBlanks(j + 1);
668   return ToLowerCaseLetters(line.TokenAt(j).ToString());
669 }
670 
671 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
672     IsElseActive isElseActive, Prescanner &prescanner,
673     ProvenanceRange provenanceRange) {
674   int nesting{0};
675   while (!prescanner.IsAtEnd()) {
676     if (!prescanner.IsNextLinePreprocessorDirective()) {
677       prescanner.NextLine();
678       continue;
679     }
680     TokenSequence line{prescanner.TokenizePreprocessorDirective()};
681     std::size_t rest{0};
682     std::string dn{GetDirectiveName(line, &rest)};
683     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
684       ++nesting;
685     } else if (dn == "endif") {
686       if (nesting-- == 0) {
687         return;
688       }
689     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
690       if (dn == "else") {
691         ifStack_.push(CanDeadElseAppear::No);
692         return;
693       }
694       if (dn == "elif" &&
695           IsIfPredicateTrue(
696               line, rest, line.SizeInTokens() - rest, prescanner)) {
697         ifStack_.push(CanDeadElseAppear::Yes);
698         return;
699       }
700     }
701   }
702   prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
703 }
704 
705 // Precedence level codes used here to accommodate mixed Fortran and C:
706 // 15: parentheses and constants, logical !, bitwise ~
707 // 14: unary + and -
708 // 13: **
709 // 12: *, /, % (modulus)
710 // 11: + and -
711 // 10: << and >>
712 //  9: bitwise &
713 //  8: bitwise ^
714 //  7: bitwise |
715 //  6: relations (.EQ., ==, &c.)
716 //  5: .NOT.
717 //  4: .AND., &&
718 //  3: .OR., ||
719 //  2: .EQV. and .NEQV. / .XOR.
720 //  1: ? :
721 //  0: ,
722 static std::int64_t ExpressionValue(const TokenSequence &token,
723     int minimumPrecedence, std::size_t *atToken,
724     std::optional<Message> *error) {
725   enum Operator {
726     PARENS,
727     CONST,
728     NOTZERO, // !
729     COMPLEMENT, // ~
730     UPLUS,
731     UMINUS,
732     POWER,
733     TIMES,
734     DIVIDE,
735     MODULUS,
736     ADD,
737     SUBTRACT,
738     LEFTSHIFT,
739     RIGHTSHIFT,
740     BITAND,
741     BITXOR,
742     BITOR,
743     LT,
744     LE,
745     EQ,
746     NE,
747     GE,
748     GT,
749     NOT,
750     AND,
751     OR,
752     EQV,
753     NEQV,
754     SELECT,
755     COMMA
756   };
757   static const int precedence[]{
758       15, 15, 15, 15, // (), 6, !, ~
759       14, 14, // unary +, -
760       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
761       9, 8, 7, // &, ^, |
762       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
763       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
764       1, 0 // ?: and ,
765   };
766   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
767       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
768 
769   static std::map<std::string, enum Operator> opNameMap;
770   if (opNameMap.empty()) {
771     opNameMap["("] = PARENS;
772     opNameMap["!"] = NOTZERO;
773     opNameMap["~"] = COMPLEMENT;
774     opNameMap["**"] = POWER;
775     opNameMap["*"] = TIMES;
776     opNameMap["/"] = DIVIDE;
777     opNameMap["%"] = MODULUS;
778     opNameMap["+"] = ADD;
779     opNameMap["-"] = SUBTRACT;
780     opNameMap["<<"] = LEFTSHIFT;
781     opNameMap[">>"] = RIGHTSHIFT;
782     opNameMap["&"] = BITAND;
783     opNameMap["^"] = BITXOR;
784     opNameMap["|"] = BITOR;
785     opNameMap[".lt."] = opNameMap["<"] = LT;
786     opNameMap[".le."] = opNameMap["<="] = LE;
787     opNameMap[".eq."] = opNameMap["=="] = EQ;
788     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
789     opNameMap[".ge."] = opNameMap[">="] = GE;
790     opNameMap[".gt."] = opNameMap[">"] = GT;
791     opNameMap[".not."] = NOT;
792     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
793     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
794     opNameMap[".eqv."] = EQV;
795     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
796     opNameMap["?"] = SELECT;
797     opNameMap[","] = COMMA;
798   }
799 
800   std::size_t tokens{token.SizeInTokens()};
801   CHECK(tokens > 0);
802   if (*atToken >= tokens) {
803     *error =
804         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
805     return 0;
806   }
807 
808   // Parse and evaluate a primary or a unary operator and its operand.
809   std::size_t opAt{*atToken};
810   std::string t{token.TokenAt(opAt).ToString()};
811   enum Operator op;
812   std::int64_t left{0};
813   if (t == "(") {
814     op = PARENS;
815   } else if (IsDecimalDigit(t[0])) {
816     op = CONST;
817     std::size_t consumed{0};
818     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
819     if (consumed < t.size()) {
820       *error = Message{token.GetTokenProvenanceRange(opAt),
821           "Uninterpretable numeric constant '%s'"_err_en_US, t};
822       return 0;
823     }
824   } else if (IsLegalIdentifierStart(t[0])) {
825     // undefined macro name -> zero
826     // TODO: BOZ constants?
827     op = CONST;
828   } else if (t == "+") {
829     op = UPLUS;
830   } else if (t == "-") {
831     op = UMINUS;
832   } else if (t == "." && *atToken + 2 < tokens &&
833       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
834       token.TokenAt(*atToken + 2).ToString() == ".") {
835     op = NOT;
836     *atToken += 2;
837   } else {
838     auto it{opNameMap.find(t)};
839     if (it != opNameMap.end()) {
840       op = it->second;
841     } else {
842       *error = Message{token.GetTokenProvenanceRange(opAt),
843           "operand expected in expression"_err_en_US};
844       return 0;
845     }
846   }
847   if (precedence[op] < minimumPrecedence) {
848     *error = Message{token.GetTokenProvenanceRange(opAt),
849         "operator precedence error"_err_en_US};
850     return 0;
851   }
852   ++*atToken;
853   if (op != CONST) {
854     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
855     if (*error) {
856       return 0;
857     }
858     switch (op) {
859     case PARENS:
860       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
861         ++*atToken;
862         break;
863       }
864       if (*atToken >= tokens) {
865         *error = Message{token.GetProvenanceRange(),
866             "')' missing from expression"_err_en_US};
867       } else {
868         *error = Message{
869             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
870       }
871       return 0;
872     case NOTZERO:
873       left = !left;
874       break;
875     case COMPLEMENT:
876       left = ~left;
877       break;
878     case UPLUS:
879       break;
880     case UMINUS:
881       left = -left;
882       break;
883     case NOT:
884       left = -!left;
885       break;
886     default:
887       CRASH_NO_CASE;
888     }
889   }
890 
891   // Parse and evaluate binary operators and their second operands, if present.
892   while (*atToken < tokens) {
893     int advance{1};
894     t = token.TokenAt(*atToken).ToString();
895     if (t == "." && *atToken + 2 < tokens &&
896         token.TokenAt(*atToken + 2).ToString() == ".") {
897       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
898       advance = 3;
899     }
900     auto it{opNameMap.find(t)};
901     if (it == opNameMap.end()) {
902       break;
903     }
904     op = it->second;
905     if (op < POWER || precedence[op] < minimumPrecedence) {
906       break;
907     }
908     opAt = *atToken;
909     *atToken += advance;
910 
911     std::int64_t right{
912         ExpressionValue(token, operandPrecedence[op], atToken, error)};
913     if (*error) {
914       return 0;
915     }
916 
917     switch (op) {
918     case POWER:
919       if (left == 0) {
920         if (right < 0) {
921           *error = Message{token.GetTokenProvenanceRange(opAt),
922               "0 ** negative power"_err_en_US};
923         }
924       } else if (left != 1 && right != 1) {
925         if (right <= 0) {
926           left = !right;
927         } else {
928           std::int64_t power{1};
929           for (; right > 0; --right) {
930             if ((power * left) / left != power) {
931               *error = Message{token.GetTokenProvenanceRange(opAt),
932                   "overflow in exponentation"_err_en_US};
933               left = 1;
934             }
935             power *= left;
936           }
937           left = power;
938         }
939       }
940       break;
941     case TIMES:
942       if (left != 0 && right != 0 && ((left * right) / left) != right) {
943         *error = Message{token.GetTokenProvenanceRange(opAt),
944             "overflow in multiplication"_err_en_US};
945       }
946       left = left * right;
947       break;
948     case DIVIDE:
949       if (right == 0) {
950         *error = Message{
951             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
952         left = 0;
953       } else {
954         left = left / right;
955       }
956       break;
957     case MODULUS:
958       if (right == 0) {
959         *error = Message{
960             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
961         left = 0;
962       } else {
963         left = left % right;
964       }
965       break;
966     case ADD:
967       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
968         *error = Message{token.GetTokenProvenanceRange(opAt),
969             "overflow in addition"_err_en_US};
970       }
971       left = left + right;
972       break;
973     case SUBTRACT:
974       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
975         *error = Message{token.GetTokenProvenanceRange(opAt),
976             "overflow in subtraction"_err_en_US};
977       }
978       left = left - right;
979       break;
980     case LEFTSHIFT:
981       if (right < 0 || right > 64) {
982         *error = Message{token.GetTokenProvenanceRange(opAt),
983             "bad left shift count"_err_en_US};
984       }
985       left = right >= 64 ? 0 : left << right;
986       break;
987     case RIGHTSHIFT:
988       if (right < 0 || right > 64) {
989         *error = Message{token.GetTokenProvenanceRange(opAt),
990             "bad right shift count"_err_en_US};
991       }
992       left = right >= 64 ? 0 : left >> right;
993       break;
994     case BITAND:
995     case AND:
996       left = left & right;
997       break;
998     case BITXOR:
999       left = left ^ right;
1000       break;
1001     case BITOR:
1002     case OR:
1003       left = left | right;
1004       break;
1005     case LT:
1006       left = -(left < right);
1007       break;
1008     case LE:
1009       left = -(left <= right);
1010       break;
1011     case EQ:
1012       left = -(left == right);
1013       break;
1014     case NE:
1015       left = -(left != right);
1016       break;
1017     case GE:
1018       left = -(left >= right);
1019       break;
1020     case GT:
1021       left = -(left > right);
1022       break;
1023     case EQV:
1024       left = -(!left == !right);
1025       break;
1026     case NEQV:
1027       left = -(!left != !right);
1028       break;
1029     case SELECT:
1030       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1031         *error = Message{token.GetTokenProvenanceRange(opAt),
1032             "':' required in selection expression"_err_en_US};
1033         return 0;
1034       } else {
1035         ++*atToken;
1036         std::int64_t third{
1037             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1038         left = left != 0 ? right : third;
1039       }
1040       break;
1041     case COMMA:
1042       left = right;
1043       break;
1044     default:
1045       CRASH_NO_CASE;
1046     }
1047   }
1048   return left;
1049 }
1050 
1051 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1052     std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1053   TokenSequence expr1{expr, first, exprTokens};
1054   if (expr1.HasBlanks()) {
1055     expr1.RemoveBlanks();
1056   }
1057   TokenSequence expr2;
1058   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1059     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1060       CharBlock name;
1061       if (j + 3 < expr1.SizeInTokens() &&
1062           expr1.TokenAt(j + 1).ToString() == "(" &&
1063           expr1.TokenAt(j + 3).ToString() == ")") {
1064         name = expr1.TokenAt(j + 2);
1065         j += 3;
1066       } else if (j + 1 < expr1.SizeInTokens() &&
1067           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1068         name = expr1.TokenAt(++j);
1069       }
1070       if (!name.empty()) {
1071         char truth{IsNameDefined(name) ? '1' : '0'};
1072         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1073         continue;
1074       }
1075     }
1076     expr2.Put(expr1, j);
1077   }
1078   TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1079   if (expr3.HasBlanks()) {
1080     expr3.RemoveBlanks();
1081   }
1082   if (expr3.empty()) {
1083     prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1084     return false;
1085   }
1086   std::size_t atToken{0};
1087   std::optional<Message> error;
1088   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1089   if (error) {
1090     prescanner.Say(std::move(*error));
1091   } else if (atToken < expr3.SizeInTokens() &&
1092       expr3.TokenAt(atToken).ToString() != "!") {
1093     prescanner.Say(expr3.GetIntervalProvenanceRange(
1094                        atToken, expr3.SizeInTokens() - atToken),
1095         atToken == 0 ? "could not parse any expression"_err_en_US
1096                      : "excess characters after expression"_err_en_US);
1097   }
1098   return result;
1099 }
1100 } // namespace Fortran::parser
1101