1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include <algorithm>
15 #include <cinttypes>
16 #include <cstddef>
17 #include <ctime>
18 #include <map>
19 #include <memory>
20 #include <optional>
21 #include <set>
22 #include <sstream>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29   : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34   : isFunctionLike_{true},
35     argumentCount_(argNames.size()), isVariadic_{isVariadic},
36     replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39   : isPredefined_{true}, replacement_{predefined,
40                              sources.AddCompilerInsertion(predefined).start()} {
41 }
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
77   for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
78     if (!tokens.TokenAt(j - 1).IsBlank()) {
79       return j;
80     }
81   }
82   return 0;
83 }
84 
85 static TokenSequence Stringify(
86     const TokenSequence &tokens, AllSources &allSources) {
87   TokenSequence result;
88   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
89   result.PutNextTokenChar('"', quoteProvenance);
90   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
91     const CharBlock &token{tokens.TokenAt(j)};
92     std::size_t bytes{token.size()};
93     for (std::size_t k{0}; k < bytes; ++k) {
94       char ch{token[k]};
95       Provenance from{tokens.GetTokenProvenance(j, k)};
96       if (ch == '"' || ch == '\\') {
97         result.PutNextTokenChar(ch, from);
98       }
99       result.PutNextTokenChar(ch, from);
100     }
101   }
102   result.PutNextTokenChar('"', quoteProvenance);
103   result.CloseToken();
104   return result;
105 }
106 
107 TokenSequence Definition::Apply(
108     const std::vector<TokenSequence> &args, AllSources &allSources) {
109   TokenSequence result;
110   bool pasting{false};
111   bool skipping{false};
112   int parenthesesNesting{0};
113   std::size_t tokens{replacement_.SizeInTokens()};
114   for (std::size_t j{0}; j < tokens; ++j) {
115     const CharBlock &token{replacement_.TokenAt(j)};
116     std::size_t bytes{token.size()};
117     if (skipping) {
118       if (bytes == 1) {
119         if (token[0] == '(') {
120           ++parenthesesNesting;
121         } else if (token[0] == ')') {
122           skipping = --parenthesesNesting > 0;
123         }
124       }
125       continue;
126     }
127     if (bytes == 2 && token[0] == '~') {
128       std::size_t index = token[1] - 'A';
129       if (index >= args.size()) {
130         continue;
131       }
132       std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
133       if (afterLastNonBlank > 0 &&
134           result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
135         // stringifying
136         while (result.SizeInTokens() >= afterLastNonBlank) {
137           result.pop_back();
138         }
139         result.Put(Stringify(args[index], allSources));
140       } else {
141         std::size_t argTokens{args[index].SizeInTokens()};
142         for (std::size_t k{0}; k < argTokens; ++k) {
143           if (!pasting || !args[index].TokenAt(k).IsBlank()) {
144             result.Put(args[index], k);
145             pasting = false;
146           }
147         }
148       }
149     } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
150       // Token pasting operator in body (not expanded argument); discard any
151       // immediately preceding white space, then reopen the last token.
152       while (!result.empty() &&
153           result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
154         result.pop_back();
155       }
156       if (!result.empty()) {
157         result.ReopenLastToken();
158         pasting = true;
159       }
160     } else if (pasting && token.IsBlank()) {
161       // Delete whitespace immediately following ## in the body.
162     } else if (bytes == 11 && isVariadic_ &&
163         token.ToString() == "__VA_ARGS__") {
164       Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
165       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
166         if (k > argumentCount_) {
167           result.Put(","s, commaProvenance);
168         }
169         result.Put(args[k]);
170       }
171     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
172         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
173         parenthesesNesting == 0) {
174       parenthesesNesting = 1;
175       skipping = args.size() == argumentCount_;
176       ++j;
177     } else {
178       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
179         ++parenthesesNesting;
180       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
181         if (--parenthesesNesting == 0) {
182           skipping = false;
183           continue;
184         }
185       }
186       result.Put(replacement_, j);
187     }
188   }
189   return result;
190 }
191 
192 static std::string FormatTime(const std::time_t &now, const char *format) {
193   char buffer[16];
194   return {buffer,
195       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
196 }
197 
198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
199   // Capture current local date & time once now to avoid having the values
200   // of __DATE__ or __TIME__ change during compilation.
201   std::time_t now;
202   std::time(&now);
203   definitions_.emplace(SaveTokenAsName("__DATE__"s),  // e.g., "Jun 16 1904"
204       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
205   definitions_.emplace(SaveTokenAsName("__TIME__"s),  // e.g., "23:59:60"
206       Definition{FormatTime(now, "\"%T\""), allSources});
207   // The values of these predefined macros depend on their invocation sites.
208   definitions_.emplace(
209       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
210   definitions_.emplace(
211       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
212 }
213 
214 void Preprocessor::Define(std::string macro, std::string value) {
215   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
216 }
217 
218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
219 
220 std::optional<TokenSequence> Preprocessor::MacroReplacement(
221     const TokenSequence &input, const Prescanner &prescanner) {
222   // Do quick scan for any use of a defined name.
223   std::size_t tokens{input.SizeInTokens()};
224   std::size_t j;
225   for (j = 0; j < tokens; ++j) {
226     CharBlock token{input.TokenAt(j)};
227     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
228         IsNameDefined(token)) {
229       break;
230     }
231   }
232   if (j == tokens) {
233     return std::nullopt;  // input contains nothing that would be replaced
234   }
235   TokenSequence result{input, 0, j};
236   for (; j < tokens; ++j) {
237     const CharBlock &token{input.TokenAt(j)};
238     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
239       result.Put(input, j);
240       continue;
241     }
242     auto it{definitions_.find(token)};
243     if (it == definitions_.end()) {
244       result.Put(input, j);
245       continue;
246     }
247     Definition &def{it->second};
248     if (def.isDisabled()) {
249       result.Put(input, j);
250       continue;
251     }
252     if (!def.isFunctionLike()) {
253       if (def.isPredefined()) {
254         std::string name{def.replacement().TokenAt(0).ToString()};
255         std::string repl;
256         if (name == "__FILE__") {
257           repl = "\""s +
258               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
259         } else if (name == "__LINE__") {
260           std::stringstream ss;
261           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
262           repl = ss.str();
263         }
264         if (!repl.empty()) {
265           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
266           ProvenanceRange call{allSources_.AddMacroCall(
267               insert, input.GetTokenProvenanceRange(j), repl)};
268           result.Put(repl, call.start());
269           continue;
270         }
271       }
272       def.set_isDisabled(true);
273       TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
274       def.set_isDisabled(false);
275       if (!replaced.empty()) {
276         ProvenanceRange from{def.replacement().GetProvenanceRange()};
277         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
278         ProvenanceRange newRange{
279             allSources_.AddMacroCall(from, use, replaced.ToString())};
280         result.Put(replaced, newRange);
281       }
282       continue;
283     }
284     // Possible function-like macro call.  Skip spaces and newlines to see
285     // whether '(' is next.
286     std::size_t k{j};
287     bool leftParen{false};
288     while (++k < tokens) {
289       const CharBlock &lookAhead{input.TokenAt(k)};
290       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
291         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
292         break;
293       }
294     }
295     if (!leftParen) {
296       result.Put(input, j);
297       continue;
298     }
299     std::vector<std::size_t> argStart{++k};
300     for (int nesting{0}; k < tokens; ++k) {
301       CharBlock token{input.TokenAt(k)};
302       if (token.size() == 1) {
303         char ch{token[0]};
304         if (ch == '(') {
305           ++nesting;
306         } else if (ch == ')') {
307           if (nesting == 0) {
308             break;
309           }
310           --nesting;
311         } else if (ch == ',' && nesting == 0) {
312           argStart.push_back(k + 1);
313         }
314       }
315     }
316     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
317       // Subtle: () is zero arguments, not one empty argument,
318       // unless one argument was expected.
319       argStart.clear();
320     }
321     if (k >= tokens || argStart.size() < def.argumentCount() ||
322         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
323       result.Put(input, j);
324       continue;
325     }
326     std::vector<TokenSequence> args;
327     for (std::size_t n{0}; n < argStart.size(); ++n) {
328       std::size_t at{argStart[n]};
329       std::size_t count{
330           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
331       args.emplace_back(TokenSequence(input, at, count));
332     }
333     def.set_isDisabled(true);
334     TokenSequence replaced{
335         ReplaceMacros(def.Apply(args, allSources_), prescanner)};
336     def.set_isDisabled(false);
337     if (!replaced.empty()) {
338       ProvenanceRange from{def.replacement().GetProvenanceRange()};
339       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
340       ProvenanceRange newRange{
341           allSources_.AddMacroCall(from, use, replaced.ToString())};
342       result.Put(replaced, newRange);
343     }
344     j = k;  // advance to the terminal ')'
345   }
346   return result;
347 }
348 
349 TokenSequence Preprocessor::ReplaceMacros(
350     const TokenSequence &tokens, const Prescanner &prescanner) {
351   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
352     return std::move(*repl);
353   }
354   return tokens;
355 }
356 
357 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
358   std::size_t tokens{dir.SizeInTokens()};
359   std::size_t j{dir.SkipBlanks(0)};
360   if (j == tokens) {
361     return;
362   }
363   if (dir.TokenAt(j).ToString() != "#") {
364     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
365     return;
366   }
367   j = dir.SkipBlanks(j + 1);
368   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
369     --tokens;
370   }
371   if (j == tokens) {
372     return;
373   }
374   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
375     return;  // treat like #line, ignore it
376   }
377   std::size_t dirOffset{j};
378   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
379   j = dir.SkipBlanks(j + 1);
380   CharBlock nameToken;
381   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
382     nameToken = dir.TokenAt(j);
383   }
384   if (dirName == "line") {
385     // #line is ignored
386   } else if (dirName == "define") {
387     if (nameToken.empty()) {
388       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
389           "#define: missing or invalid name"_err_en_US);
390       return;
391     }
392     nameToken = SaveTokenAsName(nameToken);
393     definitions_.erase(nameToken);
394     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
395         dir.TokenAt(j)[0] == '(') {
396       j = dir.SkipBlanks(j + 1);
397       std::vector<std::string> argName;
398       bool isVariadic{false};
399       if (dir.TokenAt(j).ToString() != ")") {
400         while (true) {
401           std::string an{dir.TokenAt(j).ToString()};
402           if (an == "...") {
403             isVariadic = true;
404           } else {
405             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
406               prescanner->Say(dir.GetTokenProvenanceRange(j),
407                   "#define: missing or invalid argument name"_err_en_US);
408               return;
409             }
410             argName.push_back(an);
411           }
412           j = dir.SkipBlanks(j + 1);
413           if (j == tokens) {
414             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
415                 "#define: malformed argument list"_err_en_US);
416             return;
417           }
418           std::string punc{dir.TokenAt(j).ToString()};
419           if (punc == ")") {
420             break;
421           }
422           if (isVariadic || punc != ",") {
423             prescanner->Say(dir.GetTokenProvenanceRange(j),
424                 "#define: malformed argument list"_err_en_US);
425             return;
426           }
427           j = dir.SkipBlanks(j + 1);
428           if (j == tokens) {
429             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
430                 "#define: malformed argument list"_err_en_US);
431             return;
432           }
433         }
434         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
435             argName.size()) {
436           prescanner->Say(dir.GetTokenProvenance(dirOffset),
437               "#define: argument names are not distinct"_err_en_US);
438           return;
439         }
440       }
441       j = dir.SkipBlanks(j + 1);
442       definitions_.emplace(std::make_pair(
443           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
444     } else {
445       j = dir.SkipBlanks(j + 1);
446       definitions_.emplace(
447           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
448     }
449   } else if (dirName == "undef") {
450     if (nameToken.empty()) {
451       prescanner->Say(
452           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
453           "# missing or invalid name"_err_en_US);
454     } else {
455       j = dir.SkipBlanks(j + 1);
456       if (j != tokens) {
457         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
458             "#undef: excess tokens at end of directive"_err_en_US);
459       } else {
460         definitions_.erase(nameToken);
461       }
462     }
463   } else if (dirName == "ifdef" || dirName == "ifndef") {
464     bool doThen{false};
465     if (nameToken.empty()) {
466       prescanner->Say(
467           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
468           "#%s: missing name"_err_en_US, dirName);
469     } else {
470       j = dir.SkipBlanks(j + 1);
471       if (j != tokens) {
472         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
473             "#%s: excess tokens at end of directive"_en_US, dirName);
474       }
475       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
476     }
477     if (doThen) {
478       ifStack_.push(CanDeadElseAppear::Yes);
479     } else {
480       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
481           dir.GetTokenProvenance(dirOffset));
482     }
483   } else if (dirName == "if") {
484     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
485       ifStack_.push(CanDeadElseAppear::Yes);
486     } else {
487       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
488           dir.GetTokenProvenanceRange(dirOffset));
489     }
490   } else if (dirName == "else") {
491     if (j != tokens) {
492       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
493           "#else: excess tokens at end of directive"_err_en_US);
494     } else if (ifStack_.empty()) {
495       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
496           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
497     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
498       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
499           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
500     } else {
501       ifStack_.pop();
502       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
503           dir.GetTokenProvenanceRange(dirOffset));
504     }
505   } else if (dirName == "elif") {
506     if (ifStack_.empty()) {
507       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
508           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
509     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
510       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
511           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
512     } else {
513       ifStack_.pop();
514       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
515           dir.GetTokenProvenanceRange(dirOffset));
516     }
517   } else if (dirName == "endif") {
518     if (j != tokens) {
519       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
520           "#endif: excess tokens at end of directive"_err_en_US);
521     } else if (ifStack_.empty()) {
522       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
523           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
524     } else {
525       ifStack_.pop();
526     }
527   } else if (dirName == "error") {
528     prescanner->Say(
529         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
530         "%s"_err_en_US, dir.ToString());
531   } else if (dirName == "warning" || dirName == "comment" ||
532       dirName == "note") {
533     prescanner->Say(
534         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
535         "%s"_en_US, dir.ToString());
536   } else if (dirName == "include") {
537     if (j == tokens) {
538       prescanner->Say(
539           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
540           "#include: missing name of file to include"_err_en_US);
541       return;
542     }
543     std::string include;
544     if (dir.TokenAt(j).ToString() == "<") {
545       std::size_t k{j + 1};
546       if (k >= tokens) {
547         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
548             "#include: file name missing"_err_en_US);
549         return;
550       }
551       while (k < tokens && dir.TokenAt(k) != ">") {
552         ++k;
553       }
554       if (k >= tokens) {
555         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
556             "#include: expected '>' at end of included file"_en_US);
557       } else if (k + 1 < tokens) {
558         prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
559             "#include: extra stuff ignored after '>'"_en_US);
560       }
561       TokenSequence braced{dir, j + 1, k - j - 1};
562       include = ReplaceMacros(braced, *prescanner).ToString();
563     } else if (j + 1 == tokens &&
564         (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
565         include.substr(include.size() - 1, 1) == "\"") {
566       include = include.substr(1, include.size() - 2);
567     } else {
568       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
569           "#include: expected name of file to include"_err_en_US);
570       return;
571     }
572     if (include.empty()) {
573       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
574           "#include: empty include file name"_err_en_US);
575       return;
576     }
577     std::stringstream error;
578     const SourceFile *included{allSources_.Open(include, &error)};
579     if (!included) {
580       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
581           "#include: %s"_err_en_US, error.str());
582     } else if (included->bytes() > 0) {
583       ProvenanceRange fileRange{
584           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
585       Prescanner{*prescanner}
586           .set_encoding(included->encoding())
587           .Prescan(fileRange);
588     }
589   } else {
590     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
591         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
592   }
593 }
594 
595 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
596   names_.push_back(t.ToString());
597   return {names_.back().data(), names_.back().size()};
598 }
599 
600 bool Preprocessor::IsNameDefined(const CharBlock &token) {
601   return definitions_.find(token) != definitions_.end();
602 }
603 
604 static std::string GetDirectiveName(
605     const TokenSequence &line, std::size_t *rest) {
606   std::size_t tokens{line.SizeInTokens()};
607   std::size_t j{line.SkipBlanks(0)};
608   if (j == tokens || line.TokenAt(j).ToString() != "#") {
609     *rest = tokens;
610     return "";
611   }
612   j = line.SkipBlanks(j + 1);
613   if (j == tokens) {
614     *rest = tokens;
615     return "";
616   }
617   *rest = line.SkipBlanks(j + 1);
618   return ToLowerCaseLetters(line.TokenAt(j).ToString());
619 }
620 
621 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
622     IsElseActive isElseActive, Prescanner *prescanner,
623     ProvenanceRange provenanceRange) {
624   int nesting{0};
625   while (!prescanner->IsAtEnd()) {
626     if (!prescanner->IsNextLinePreprocessorDirective()) {
627       prescanner->NextLine();
628       continue;
629     }
630     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
631     std::size_t rest{0};
632     std::string dn{GetDirectiveName(line, &rest)};
633     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
634       ++nesting;
635     } else if (dn == "endif") {
636       if (nesting-- == 0) {
637         return;
638       }
639     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
640       if (dn == "else") {
641         ifStack_.push(CanDeadElseAppear::No);
642         return;
643       }
644       if (dn == "elif" &&
645           IsIfPredicateTrue(
646               line, rest, line.SizeInTokens() - rest, prescanner)) {
647         ifStack_.push(CanDeadElseAppear::Yes);
648         return;
649       }
650     }
651   }
652   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
653 }
654 
655 // Precedence level codes used here to accommodate mixed Fortran and C:
656 // 15: parentheses and constants, logical !, bitwise ~
657 // 14: unary + and -
658 // 13: **
659 // 12: *, /, % (modulus)
660 // 11: + and -
661 // 10: << and >>
662 //  9: bitwise &
663 //  8: bitwise ^
664 //  7: bitwise |
665 //  6: relations (.EQ., ==, &c.)
666 //  5: .NOT.
667 //  4: .AND., &&
668 //  3: .OR., ||
669 //  2: .EQV. and .NEQV. / .XOR.
670 //  1: ? :
671 //  0: ,
672 static std::int64_t ExpressionValue(const TokenSequence &token,
673     int minimumPrecedence, std::size_t *atToken,
674     std::optional<Message> *error) {
675   enum Operator {
676     PARENS,
677     CONST,
678     NOTZERO,  // !
679     COMPLEMENT,  // ~
680     UPLUS,
681     UMINUS,
682     POWER,
683     TIMES,
684     DIVIDE,
685     MODULUS,
686     ADD,
687     SUBTRACT,
688     LEFTSHIFT,
689     RIGHTSHIFT,
690     BITAND,
691     BITXOR,
692     BITOR,
693     LT,
694     LE,
695     EQ,
696     NE,
697     GE,
698     GT,
699     NOT,
700     AND,
701     OR,
702     EQV,
703     NEQV,
704     SELECT,
705     COMMA
706   };
707   static const int precedence[]{
708       15, 15, 15, 15,  // (), 6, !, ~
709       14, 14,  // unary +, -
710       13, 12, 12, 12, 11, 11, 10, 10,  // **, *, /, %, +, -, <<, >>
711       9, 8, 7,  // &, ^, |
712       6, 6, 6, 6, 6, 6,  // relations .LT. to .GT.
713       5, 4, 3, 2, 2,  // .NOT., .AND., .OR., .EQV., .NEQV.
714       1, 0  // ?: and ,
715   };
716   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
717       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
718 
719   static std::map<std::string, enum Operator> opNameMap;
720   if (opNameMap.empty()) {
721     opNameMap["("] = PARENS;
722     opNameMap["!"] = NOTZERO;
723     opNameMap["~"] = COMPLEMENT;
724     opNameMap["**"] = POWER;
725     opNameMap["*"] = TIMES;
726     opNameMap["/"] = DIVIDE;
727     opNameMap["%"] = MODULUS;
728     opNameMap["+"] = ADD;
729     opNameMap["-"] = SUBTRACT;
730     opNameMap["<<"] = LEFTSHIFT;
731     opNameMap[">>"] = RIGHTSHIFT;
732     opNameMap["&"] = BITAND;
733     opNameMap["^"] = BITXOR;
734     opNameMap["|"] = BITOR;
735     opNameMap[".lt."] = opNameMap["<"] = LT;
736     opNameMap[".le."] = opNameMap["<="] = LE;
737     opNameMap[".eq."] = opNameMap["=="] = EQ;
738     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
739     opNameMap[".ge."] = opNameMap[">="] = GE;
740     opNameMap[".gt."] = opNameMap[">"] = GT;
741     opNameMap[".not."] = NOT;
742     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
743     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
744     opNameMap[".eqv."] = EQV;
745     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
746     opNameMap["?"] = SELECT;
747     opNameMap[","] = COMMA;
748   }
749 
750   std::size_t tokens{token.SizeInTokens()};
751   CHECK(tokens > 0);
752   if (*atToken >= tokens) {
753     *error =
754         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
755     return 0;
756   }
757 
758   // Parse and evaluate a primary or a unary operator and its operand.
759   std::size_t opAt{*atToken};
760   std::string t{token.TokenAt(opAt).ToString()};
761   enum Operator op;
762   std::int64_t left{0};
763   if (t == "(") {
764     op = PARENS;
765   } else if (IsDecimalDigit(t[0])) {
766     op = CONST;
767     std::size_t consumed{0};
768     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
769     if (consumed < t.size()) {
770       *error = Message{token.GetTokenProvenanceRange(opAt),
771           "Uninterpretable numeric constant '%s'"_err_en_US, t};
772       return 0;
773     }
774   } else if (IsLegalIdentifierStart(t[0])) {
775     // undefined macro name -> zero
776     // TODO: BOZ constants?
777     op = CONST;
778   } else if (t == "+") {
779     op = UPLUS;
780   } else if (t == "-") {
781     op = UMINUS;
782   } else if (t == "." && *atToken + 2 < tokens &&
783       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
784       token.TokenAt(*atToken + 2).ToString() == ".") {
785     op = NOT;
786     *atToken += 2;
787   } else {
788     auto it{opNameMap.find(t)};
789     if (it != opNameMap.end()) {
790       op = it->second;
791     } else {
792       *error = Message{token.GetTokenProvenanceRange(opAt),
793           "operand expected in expression"_err_en_US};
794       return 0;
795     }
796   }
797   if (precedence[op] < minimumPrecedence) {
798     *error = Message{token.GetTokenProvenanceRange(opAt),
799         "operator precedence error"_err_en_US};
800     return 0;
801   }
802   ++*atToken;
803   if (op != CONST) {
804     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
805     if (*error) {
806       return 0;
807     }
808     switch (op) {
809     case PARENS:
810       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
811         ++*atToken;
812         break;
813       }
814       if (*atToken >= tokens) {
815         *error = Message{token.GetProvenanceRange(),
816             "')' missing from expression"_err_en_US};
817       } else {
818         *error = Message{
819             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
820       }
821       return 0;
822     case NOTZERO: left = !left; break;
823     case COMPLEMENT: left = ~left; break;
824     case UPLUS: break;
825     case UMINUS: left = -left; break;
826     case NOT: left = -!left; break;
827     default: CRASH_NO_CASE;
828     }
829   }
830 
831   // Parse and evaluate binary operators and their second operands, if present.
832   while (*atToken < tokens) {
833     int advance{1};
834     t = token.TokenAt(*atToken).ToString();
835     if (t == "." && *atToken + 2 < tokens &&
836         token.TokenAt(*atToken + 2).ToString() == ".") {
837       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
838       advance = 3;
839     }
840     auto it{opNameMap.find(t)};
841     if (it == opNameMap.end()) {
842       break;
843     }
844     op = it->second;
845     if (op < POWER || precedence[op] < minimumPrecedence) {
846       break;
847     }
848     opAt = *atToken;
849     *atToken += advance;
850 
851     std::int64_t right{
852         ExpressionValue(token, operandPrecedence[op], atToken, error)};
853     if (*error) {
854       return 0;
855     }
856 
857     switch (op) {
858     case POWER:
859       if (left == 0) {
860         if (right < 0) {
861           *error = Message{token.GetTokenProvenanceRange(opAt),
862               "0 ** negative power"_err_en_US};
863         }
864       } else if (left != 1 && right != 1) {
865         if (right <= 0) {
866           left = !right;
867         } else {
868           std::int64_t power{1};
869           for (; right > 0; --right) {
870             if ((power * left) / left != power) {
871               *error = Message{token.GetTokenProvenanceRange(opAt),
872                   "overflow in exponentation"_err_en_US};
873               left = 1;
874             }
875             power *= left;
876           }
877           left = power;
878         }
879       }
880       break;
881     case TIMES:
882       if (left != 0 && right != 0 && ((left * right) / left) != right) {
883         *error = Message{token.GetTokenProvenanceRange(opAt),
884             "overflow in multiplication"_err_en_US};
885       }
886       left = left * right;
887       break;
888     case DIVIDE:
889       if (right == 0) {
890         *error = Message{
891             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
892         left = 0;
893       } else {
894         left = left / right;
895       }
896       break;
897     case MODULUS:
898       if (right == 0) {
899         *error = Message{
900             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
901         left = 0;
902       } else {
903         left = left % right;
904       }
905       break;
906     case ADD:
907       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
908         *error = Message{token.GetTokenProvenanceRange(opAt),
909             "overflow in addition"_err_en_US};
910       }
911       left = left + right;
912       break;
913     case SUBTRACT:
914       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
915         *error = Message{token.GetTokenProvenanceRange(opAt),
916             "overflow in subtraction"_err_en_US};
917       }
918       left = left - right;
919       break;
920     case LEFTSHIFT:
921       if (right < 0 || right > 64) {
922         *error = Message{token.GetTokenProvenanceRange(opAt),
923             "bad left shift count"_err_en_US};
924       }
925       left = right >= 64 ? 0 : left << right;
926       break;
927     case RIGHTSHIFT:
928       if (right < 0 || right > 64) {
929         *error = Message{token.GetTokenProvenanceRange(opAt),
930             "bad right shift count"_err_en_US};
931       }
932       left = right >= 64 ? 0 : left >> right;
933       break;
934     case BITAND:
935     case AND: left = left & right; break;
936     case BITXOR: left = left ^ right; break;
937     case BITOR:
938     case OR: left = left | right; break;
939     case LT: left = -(left < right); break;
940     case LE: left = -(left <= right); break;
941     case EQ: left = -(left == right); break;
942     case NE: left = -(left != right); break;
943     case GE: left = -(left >= right); break;
944     case GT: left = -(left > right); break;
945     case EQV: left = -(!left == !right); break;
946     case NEQV: left = -(!left != !right); break;
947     case SELECT:
948       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
949         *error = Message{token.GetTokenProvenanceRange(opAt),
950             "':' required in selection expression"_err_en_US};
951         return 0;
952       } else {
953         ++*atToken;
954         std::int64_t third{
955             ExpressionValue(token, operandPrecedence[op], atToken, error)};
956         left = left != 0 ? right : third;
957       }
958       break;
959     case COMMA: left = right; break;
960     default: CRASH_NO_CASE;
961     }
962   }
963   return left;
964 }
965 
966 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
967     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
968   TokenSequence expr1{expr, first, exprTokens};
969   if (expr1.HasBlanks()) {
970     expr1.RemoveBlanks();
971   }
972   TokenSequence expr2;
973   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
974     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
975       CharBlock name;
976       if (j + 3 < expr1.SizeInTokens() &&
977           expr1.TokenAt(j + 1).ToString() == "(" &&
978           expr1.TokenAt(j + 3).ToString() == ")") {
979         name = expr1.TokenAt(j + 2);
980         j += 3;
981       } else if (j + 1 < expr1.SizeInTokens() &&
982           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
983         name = expr1.TokenAt(++j);
984       }
985       if (!name.empty()) {
986         char truth{IsNameDefined(name) ? '1' : '0'};
987         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
988         continue;
989       }
990     }
991     expr2.Put(expr1, j);
992   }
993   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
994   if (expr3.HasBlanks()) {
995     expr3.RemoveBlanks();
996   }
997   if (expr3.empty()) {
998     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
999     return false;
1000   }
1001   std::size_t atToken{0};
1002   std::optional<Message> error;
1003   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1004   if (error) {
1005     prescanner->Say(std::move(*error));
1006   } else if (atToken < expr3.SizeInTokens() &&
1007       expr3.TokenAt(atToken).ToString() != "!") {
1008     prescanner->Say(expr3.GetIntervalProvenanceRange(
1009                         atToken, expr3.SizeInTokens() - atToken),
1010         atToken == 0 ? "could not parse any expression"_err_en_US
1011                      : "excess characters after expression"_err_en_US);
1012   }
1013   return result;
1014 }
1015 }
1016