1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29   : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34   : isFunctionLike_{true},
35     argumentCount_(argNames.size()), isVariadic_{isVariadic},
36     replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39   : isPredefined_{true}, replacement_{predefined,
40                              sources.AddCompilerInsertion(predefined).start()} {
41 }
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
77   for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
78     if (!tokens.TokenAt(j - 1).IsBlank()) {
79       return j;
80     }
81   }
82   return 0;
83 }
84 
85 static TokenSequence Stringify(
86     const TokenSequence &tokens, AllSources &allSources) {
87   TokenSequence result;
88   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
89   result.PutNextTokenChar('"', quoteProvenance);
90   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
91     const CharBlock &token{tokens.TokenAt(j)};
92     std::size_t bytes{token.size()};
93     for (std::size_t k{0}; k < bytes; ++k) {
94       char ch{token[k]};
95       Provenance from{tokens.GetTokenProvenance(j, k)};
96       if (ch == '"' || ch == '\\') {
97         result.PutNextTokenChar(ch, from);
98       }
99       result.PutNextTokenChar(ch, from);
100     }
101   }
102   result.PutNextTokenChar('"', quoteProvenance);
103   result.CloseToken();
104   return result;
105 }
106 
107 TokenSequence Definition::Apply(
108     const std::vector<TokenSequence> &args, AllSources &allSources) {
109   TokenSequence result;
110   bool pasting{false};
111   bool skipping{false};
112   int parenthesesNesting{0};
113   std::size_t tokens{replacement_.SizeInTokens()};
114   for (std::size_t j{0}; j < tokens; ++j) {
115     const CharBlock &token{replacement_.TokenAt(j)};
116     std::size_t bytes{token.size()};
117     if (skipping) {
118       if (bytes == 1) {
119         if (token[0] == '(') {
120           ++parenthesesNesting;
121         } else if (token[0] == ')') {
122           skipping = --parenthesesNesting > 0;
123         }
124       }
125       continue;
126     }
127     if (bytes == 2 && token[0] == '~') {
128       std::size_t index = token[1] - 'A';
129       if (index >= args.size()) {
130         continue;
131       }
132       std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
133       if (afterLastNonBlank > 0 &&
134           result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
135         // stringifying
136         while (result.SizeInTokens() >= afterLastNonBlank) {
137           result.pop_back();
138         }
139         result.Put(Stringify(args[index], allSources));
140       } else {
141         std::size_t argTokens{args[index].SizeInTokens()};
142         for (std::size_t k{0}; k < argTokens; ++k) {
143           if (!pasting || !args[index].TokenAt(k).IsBlank()) {
144             result.Put(args[index], k);
145             pasting = false;
146           }
147         }
148       }
149     } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
150       // Token pasting operator in body (not expanded argument); discard any
151       // immediately preceding white space, then reopen the last token.
152       while (!result.empty() &&
153           result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
154         result.pop_back();
155       }
156       if (!result.empty()) {
157         result.ReopenLastToken();
158         pasting = true;
159       }
160     } else if (pasting && token.IsBlank()) {
161       // Delete whitespace immediately following ## in the body.
162     } else if (bytes == 11 && isVariadic_ &&
163         token.ToString() == "__VA_ARGS__") {
164       Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
165       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
166         if (k > argumentCount_) {
167           result.Put(","s, commaProvenance);
168         }
169         result.Put(args[k]);
170       }
171     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
172         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
173         parenthesesNesting == 0) {
174       parenthesesNesting = 1;
175       skipping = args.size() == argumentCount_;
176       ++j;
177     } else {
178       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
179         ++parenthesesNesting;
180       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
181         if (--parenthesesNesting == 0) {
182           skipping = false;
183           continue;
184         }
185       }
186       result.Put(replacement_, j);
187     }
188   }
189   return result;
190 }
191 
192 static std::string FormatTime(const std::time_t &now, const char *format) {
193   char buffer[16];
194   return {buffer,
195       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
196 }
197 
198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
199   // Capture current local date & time once now to avoid having the values
200   // of __DATE__ or __TIME__ change during compilation.
201   std::time_t now;
202   std::time(&now);
203   definitions_.emplace(SaveTokenAsName("__DATE__"s),  // e.g., "Jun 16 1904"
204       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
205   definitions_.emplace(SaveTokenAsName("__TIME__"s),  // e.g., "23:59:60"
206       Definition{FormatTime(now, "\"%T\""), allSources});
207   // The values of these predefined macros depend on their invocation sites.
208   definitions_.emplace(
209       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
210   definitions_.emplace(
211       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
212 }
213 
214 void Preprocessor::Define(std::string macro, std::string value) {
215   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
216 }
217 
218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
219 
220 std::optional<TokenSequence> Preprocessor::MacroReplacement(
221     const TokenSequence &input, const Prescanner &prescanner) {
222   // Do quick scan for any use of a defined name.
223   std::size_t tokens{input.SizeInTokens()};
224   std::size_t j;
225   for (j = 0; j < tokens; ++j) {
226     CharBlock token{input.TokenAt(j)};
227     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
228         IsNameDefined(token)) {
229       break;
230     }
231   }
232   if (j == tokens) {
233     return std::nullopt;  // input contains nothing that would be replaced
234   }
235   TokenSequence result{input, 0, j};
236   for (; j < tokens; ++j) {
237     const CharBlock &token{input.TokenAt(j)};
238     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
239       result.Put(input, j);
240       continue;
241     }
242     auto it{definitions_.find(token)};
243     if (it == definitions_.end()) {
244       result.Put(input, j);
245       continue;
246     }
247     Definition &def{it->second};
248     if (def.isDisabled()) {
249       result.Put(input, j);
250       continue;
251     }
252     if (!def.isFunctionLike()) {
253       if (def.isPredefined()) {
254         std::string name{def.replacement().TokenAt(0).ToString()};
255         std::string repl;
256         if (name == "__FILE__") {
257           repl = "\""s +
258               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
259         } else if (name == "__LINE__") {
260           std::string buf;
261           llvm::raw_string_ostream ss{buf};
262           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
263           repl = ss.str();
264         }
265         if (!repl.empty()) {
266           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
267           ProvenanceRange call{allSources_.AddMacroCall(
268               insert, input.GetTokenProvenanceRange(j), repl)};
269           result.Put(repl, call.start());
270           continue;
271         }
272       }
273       def.set_isDisabled(true);
274       TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
275       def.set_isDisabled(false);
276       if (!replaced.empty()) {
277         ProvenanceRange from{def.replacement().GetProvenanceRange()};
278         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
279         ProvenanceRange newRange{
280             allSources_.AddMacroCall(from, use, replaced.ToString())};
281         result.Put(replaced, newRange);
282       }
283       continue;
284     }
285     // Possible function-like macro call.  Skip spaces and newlines to see
286     // whether '(' is next.
287     std::size_t k{j};
288     bool leftParen{false};
289     while (++k < tokens) {
290       const CharBlock &lookAhead{input.TokenAt(k)};
291       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
292         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
293         break;
294       }
295     }
296     if (!leftParen) {
297       result.Put(input, j);
298       continue;
299     }
300     std::vector<std::size_t> argStart{++k};
301     for (int nesting{0}; k < tokens; ++k) {
302       CharBlock token{input.TokenAt(k)};
303       if (token.size() == 1) {
304         char ch{token[0]};
305         if (ch == '(') {
306           ++nesting;
307         } else if (ch == ')') {
308           if (nesting == 0) {
309             break;
310           }
311           --nesting;
312         } else if (ch == ',' && nesting == 0) {
313           argStart.push_back(k + 1);
314         }
315       }
316     }
317     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
318       // Subtle: () is zero arguments, not one empty argument,
319       // unless one argument was expected.
320       argStart.clear();
321     }
322     if (k >= tokens || argStart.size() < def.argumentCount() ||
323         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
324       result.Put(input, j);
325       continue;
326     }
327     std::vector<TokenSequence> args;
328     for (std::size_t n{0}; n < argStart.size(); ++n) {
329       std::size_t at{argStart[n]};
330       std::size_t count{
331           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
332       args.emplace_back(TokenSequence(input, at, count));
333     }
334     def.set_isDisabled(true);
335     TokenSequence replaced{
336         ReplaceMacros(def.Apply(args, allSources_), prescanner)};
337     def.set_isDisabled(false);
338     if (!replaced.empty()) {
339       ProvenanceRange from{def.replacement().GetProvenanceRange()};
340       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
341       ProvenanceRange newRange{
342           allSources_.AddMacroCall(from, use, replaced.ToString())};
343       result.Put(replaced, newRange);
344     }
345     j = k;  // advance to the terminal ')'
346   }
347   return result;
348 }
349 
350 TokenSequence Preprocessor::ReplaceMacros(
351     const TokenSequence &tokens, const Prescanner &prescanner) {
352   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
353     return std::move(*repl);
354   }
355   return tokens;
356 }
357 
358 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
359   std::size_t tokens{dir.SizeInTokens()};
360   std::size_t j{dir.SkipBlanks(0)};
361   if (j == tokens) {
362     return;
363   }
364   if (dir.TokenAt(j).ToString() != "#") {
365     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
366     return;
367   }
368   j = dir.SkipBlanks(j + 1);
369   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
370     --tokens;
371   }
372   if (j == tokens) {
373     return;
374   }
375   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
376     return;  // treat like #line, ignore it
377   }
378   std::size_t dirOffset{j};
379   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
380   j = dir.SkipBlanks(j + 1);
381   CharBlock nameToken;
382   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
383     nameToken = dir.TokenAt(j);
384   }
385   if (dirName == "line") {
386     // #line is ignored
387   } else if (dirName == "define") {
388     if (nameToken.empty()) {
389       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
390           "#define: missing or invalid name"_err_en_US);
391       return;
392     }
393     nameToken = SaveTokenAsName(nameToken);
394     definitions_.erase(nameToken);
395     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
396         dir.TokenAt(j)[0] == '(') {
397       j = dir.SkipBlanks(j + 1);
398       std::vector<std::string> argName;
399       bool isVariadic{false};
400       if (dir.TokenAt(j).ToString() != ")") {
401         while (true) {
402           std::string an{dir.TokenAt(j).ToString()};
403           if (an == "...") {
404             isVariadic = true;
405           } else {
406             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
407               prescanner->Say(dir.GetTokenProvenanceRange(j),
408                   "#define: missing or invalid argument name"_err_en_US);
409               return;
410             }
411             argName.push_back(an);
412           }
413           j = dir.SkipBlanks(j + 1);
414           if (j == tokens) {
415             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
416                 "#define: malformed argument list"_err_en_US);
417             return;
418           }
419           std::string punc{dir.TokenAt(j).ToString()};
420           if (punc == ")") {
421             break;
422           }
423           if (isVariadic || punc != ",") {
424             prescanner->Say(dir.GetTokenProvenanceRange(j),
425                 "#define: malformed argument list"_err_en_US);
426             return;
427           }
428           j = dir.SkipBlanks(j + 1);
429           if (j == tokens) {
430             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
431                 "#define: malformed argument list"_err_en_US);
432             return;
433           }
434         }
435         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
436             argName.size()) {
437           prescanner->Say(dir.GetTokenProvenance(dirOffset),
438               "#define: argument names are not distinct"_err_en_US);
439           return;
440         }
441       }
442       j = dir.SkipBlanks(j + 1);
443       definitions_.emplace(std::make_pair(
444           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
445     } else {
446       j = dir.SkipBlanks(j + 1);
447       definitions_.emplace(
448           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
449     }
450   } else if (dirName == "undef") {
451     if (nameToken.empty()) {
452       prescanner->Say(
453           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
454           "# missing or invalid name"_err_en_US);
455     } else {
456       j = dir.SkipBlanks(j + 1);
457       if (j != tokens) {
458         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
459             "#undef: excess tokens at end of directive"_err_en_US);
460       } else {
461         definitions_.erase(nameToken);
462       }
463     }
464   } else if (dirName == "ifdef" || dirName == "ifndef") {
465     bool doThen{false};
466     if (nameToken.empty()) {
467       prescanner->Say(
468           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
469           "#%s: missing name"_err_en_US, dirName);
470     } else {
471       j = dir.SkipBlanks(j + 1);
472       if (j != tokens) {
473         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
474             "#%s: excess tokens at end of directive"_en_US, dirName);
475       }
476       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
477     }
478     if (doThen) {
479       ifStack_.push(CanDeadElseAppear::Yes);
480     } else {
481       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
482           dir.GetTokenProvenance(dirOffset));
483     }
484   } else if (dirName == "if") {
485     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
486       ifStack_.push(CanDeadElseAppear::Yes);
487     } else {
488       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
489           dir.GetTokenProvenanceRange(dirOffset));
490     }
491   } else if (dirName == "else") {
492     if (j != tokens) {
493       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
494           "#else: excess tokens at end of directive"_err_en_US);
495     } else if (ifStack_.empty()) {
496       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
497           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
498     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
499       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
500           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
501     } else {
502       ifStack_.pop();
503       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
504           dir.GetTokenProvenanceRange(dirOffset));
505     }
506   } else if (dirName == "elif") {
507     if (ifStack_.empty()) {
508       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
509           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
510     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
511       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
512           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
513     } else {
514       ifStack_.pop();
515       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
516           dir.GetTokenProvenanceRange(dirOffset));
517     }
518   } else if (dirName == "endif") {
519     if (j != tokens) {
520       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
521           "#endif: excess tokens at end of directive"_err_en_US);
522     } else if (ifStack_.empty()) {
523       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
524           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
525     } else {
526       ifStack_.pop();
527     }
528   } else if (dirName == "error") {
529     prescanner->Say(
530         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
531         "%s"_err_en_US, dir.ToString());
532   } else if (dirName == "warning" || dirName == "comment" ||
533       dirName == "note") {
534     prescanner->Say(
535         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
536         "%s"_en_US, dir.ToString());
537   } else if (dirName == "include") {
538     if (j == tokens) {
539       prescanner->Say(
540           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
541           "#include: missing name of file to include"_err_en_US);
542       return;
543     }
544     std::string include;
545     if (dir.TokenAt(j).ToString() == "<") {
546       std::size_t k{j + 1};
547       if (k >= tokens) {
548         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
549             "#include: file name missing"_err_en_US);
550         return;
551       }
552       while (k < tokens && dir.TokenAt(k) != ">") {
553         ++k;
554       }
555       if (k >= tokens) {
556         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
557             "#include: expected '>' at end of included file"_en_US);
558       } else if (k + 1 < tokens) {
559         prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
560             "#include: extra stuff ignored after '>'"_en_US);
561       }
562       TokenSequence braced{dir, j + 1, k - j - 1};
563       include = ReplaceMacros(braced, *prescanner).ToString();
564     } else if (j + 1 == tokens &&
565         (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
566         include.substr(include.size() - 1, 1) == "\"") {
567       include = include.substr(1, include.size() - 2);
568     } else {
569       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
570           "#include: expected name of file to include"_err_en_US);
571       return;
572     }
573     if (include.empty()) {
574       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
575           "#include: empty include file name"_err_en_US);
576       return;
577     }
578     std::string buf;
579     llvm::raw_string_ostream error{buf};
580     const SourceFile *included{allSources_.Open(include, error)};
581     if (!included) {
582       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
583           "#include: %s"_err_en_US, error.str());
584     } else if (included->bytes() > 0) {
585       ProvenanceRange fileRange{
586           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
587       Prescanner{*prescanner}
588           .set_encoding(included->encoding())
589           .Prescan(fileRange);
590     }
591   } else {
592     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
593         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
594   }
595 }
596 
597 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
598   names_.push_back(t.ToString());
599   return {names_.back().data(), names_.back().size()};
600 }
601 
602 bool Preprocessor::IsNameDefined(const CharBlock &token) {
603   return definitions_.find(token) != definitions_.end();
604 }
605 
606 static std::string GetDirectiveName(
607     const TokenSequence &line, std::size_t *rest) {
608   std::size_t tokens{line.SizeInTokens()};
609   std::size_t j{line.SkipBlanks(0)};
610   if (j == tokens || line.TokenAt(j).ToString() != "#") {
611     *rest = tokens;
612     return "";
613   }
614   j = line.SkipBlanks(j + 1);
615   if (j == tokens) {
616     *rest = tokens;
617     return "";
618   }
619   *rest = line.SkipBlanks(j + 1);
620   return ToLowerCaseLetters(line.TokenAt(j).ToString());
621 }
622 
623 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
624     IsElseActive isElseActive, Prescanner *prescanner,
625     ProvenanceRange provenanceRange) {
626   int nesting{0};
627   while (!prescanner->IsAtEnd()) {
628     if (!prescanner->IsNextLinePreprocessorDirective()) {
629       prescanner->NextLine();
630       continue;
631     }
632     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
633     std::size_t rest{0};
634     std::string dn{GetDirectiveName(line, &rest)};
635     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
636       ++nesting;
637     } else if (dn == "endif") {
638       if (nesting-- == 0) {
639         return;
640       }
641     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
642       if (dn == "else") {
643         ifStack_.push(CanDeadElseAppear::No);
644         return;
645       }
646       if (dn == "elif" &&
647           IsIfPredicateTrue(
648               line, rest, line.SizeInTokens() - rest, prescanner)) {
649         ifStack_.push(CanDeadElseAppear::Yes);
650         return;
651       }
652     }
653   }
654   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
655 }
656 
657 // Precedence level codes used here to accommodate mixed Fortran and C:
658 // 15: parentheses and constants, logical !, bitwise ~
659 // 14: unary + and -
660 // 13: **
661 // 12: *, /, % (modulus)
662 // 11: + and -
663 // 10: << and >>
664 //  9: bitwise &
665 //  8: bitwise ^
666 //  7: bitwise |
667 //  6: relations (.EQ., ==, &c.)
668 //  5: .NOT.
669 //  4: .AND., &&
670 //  3: .OR., ||
671 //  2: .EQV. and .NEQV. / .XOR.
672 //  1: ? :
673 //  0: ,
674 static std::int64_t ExpressionValue(const TokenSequence &token,
675     int minimumPrecedence, std::size_t *atToken,
676     std::optional<Message> *error) {
677   enum Operator {
678     PARENS,
679     CONST,
680     NOTZERO,  // !
681     COMPLEMENT,  // ~
682     UPLUS,
683     UMINUS,
684     POWER,
685     TIMES,
686     DIVIDE,
687     MODULUS,
688     ADD,
689     SUBTRACT,
690     LEFTSHIFT,
691     RIGHTSHIFT,
692     BITAND,
693     BITXOR,
694     BITOR,
695     LT,
696     LE,
697     EQ,
698     NE,
699     GE,
700     GT,
701     NOT,
702     AND,
703     OR,
704     EQV,
705     NEQV,
706     SELECT,
707     COMMA
708   };
709   static const int precedence[]{
710       15, 15, 15, 15,  // (), 6, !, ~
711       14, 14,  // unary +, -
712       13, 12, 12, 12, 11, 11, 10, 10,  // **, *, /, %, +, -, <<, >>
713       9, 8, 7,  // &, ^, |
714       6, 6, 6, 6, 6, 6,  // relations .LT. to .GT.
715       5, 4, 3, 2, 2,  // .NOT., .AND., .OR., .EQV., .NEQV.
716       1, 0  // ?: and ,
717   };
718   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
719       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
720 
721   static std::map<std::string, enum Operator> opNameMap;
722   if (opNameMap.empty()) {
723     opNameMap["("] = PARENS;
724     opNameMap["!"] = NOTZERO;
725     opNameMap["~"] = COMPLEMENT;
726     opNameMap["**"] = POWER;
727     opNameMap["*"] = TIMES;
728     opNameMap["/"] = DIVIDE;
729     opNameMap["%"] = MODULUS;
730     opNameMap["+"] = ADD;
731     opNameMap["-"] = SUBTRACT;
732     opNameMap["<<"] = LEFTSHIFT;
733     opNameMap[">>"] = RIGHTSHIFT;
734     opNameMap["&"] = BITAND;
735     opNameMap["^"] = BITXOR;
736     opNameMap["|"] = BITOR;
737     opNameMap[".lt."] = opNameMap["<"] = LT;
738     opNameMap[".le."] = opNameMap["<="] = LE;
739     opNameMap[".eq."] = opNameMap["=="] = EQ;
740     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
741     opNameMap[".ge."] = opNameMap[">="] = GE;
742     opNameMap[".gt."] = opNameMap[">"] = GT;
743     opNameMap[".not."] = NOT;
744     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
745     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
746     opNameMap[".eqv."] = EQV;
747     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
748     opNameMap["?"] = SELECT;
749     opNameMap[","] = COMMA;
750   }
751 
752   std::size_t tokens{token.SizeInTokens()};
753   CHECK(tokens > 0);
754   if (*atToken >= tokens) {
755     *error =
756         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
757     return 0;
758   }
759 
760   // Parse and evaluate a primary or a unary operator and its operand.
761   std::size_t opAt{*atToken};
762   std::string t{token.TokenAt(opAt).ToString()};
763   enum Operator op;
764   std::int64_t left{0};
765   if (t == "(") {
766     op = PARENS;
767   } else if (IsDecimalDigit(t[0])) {
768     op = CONST;
769     std::size_t consumed{0};
770     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
771     if (consumed < t.size()) {
772       *error = Message{token.GetTokenProvenanceRange(opAt),
773           "Uninterpretable numeric constant '%s'"_err_en_US, t};
774       return 0;
775     }
776   } else if (IsLegalIdentifierStart(t[0])) {
777     // undefined macro name -> zero
778     // TODO: BOZ constants?
779     op = CONST;
780   } else if (t == "+") {
781     op = UPLUS;
782   } else if (t == "-") {
783     op = UMINUS;
784   } else if (t == "." && *atToken + 2 < tokens &&
785       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
786       token.TokenAt(*atToken + 2).ToString() == ".") {
787     op = NOT;
788     *atToken += 2;
789   } else {
790     auto it{opNameMap.find(t)};
791     if (it != opNameMap.end()) {
792       op = it->second;
793     } else {
794       *error = Message{token.GetTokenProvenanceRange(opAt),
795           "operand expected in expression"_err_en_US};
796       return 0;
797     }
798   }
799   if (precedence[op] < minimumPrecedence) {
800     *error = Message{token.GetTokenProvenanceRange(opAt),
801         "operator precedence error"_err_en_US};
802     return 0;
803   }
804   ++*atToken;
805   if (op != CONST) {
806     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
807     if (*error) {
808       return 0;
809     }
810     switch (op) {
811     case PARENS:
812       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
813         ++*atToken;
814         break;
815       }
816       if (*atToken >= tokens) {
817         *error = Message{token.GetProvenanceRange(),
818             "')' missing from expression"_err_en_US};
819       } else {
820         *error = Message{
821             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
822       }
823       return 0;
824     case NOTZERO: left = !left; break;
825     case COMPLEMENT: left = ~left; break;
826     case UPLUS: break;
827     case UMINUS: left = -left; break;
828     case NOT: left = -!left; break;
829     default: CRASH_NO_CASE;
830     }
831   }
832 
833   // Parse and evaluate binary operators and their second operands, if present.
834   while (*atToken < tokens) {
835     int advance{1};
836     t = token.TokenAt(*atToken).ToString();
837     if (t == "." && *atToken + 2 < tokens &&
838         token.TokenAt(*atToken + 2).ToString() == ".") {
839       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
840       advance = 3;
841     }
842     auto it{opNameMap.find(t)};
843     if (it == opNameMap.end()) {
844       break;
845     }
846     op = it->second;
847     if (op < POWER || precedence[op] < minimumPrecedence) {
848       break;
849     }
850     opAt = *atToken;
851     *atToken += advance;
852 
853     std::int64_t right{
854         ExpressionValue(token, operandPrecedence[op], atToken, error)};
855     if (*error) {
856       return 0;
857     }
858 
859     switch (op) {
860     case POWER:
861       if (left == 0) {
862         if (right < 0) {
863           *error = Message{token.GetTokenProvenanceRange(opAt),
864               "0 ** negative power"_err_en_US};
865         }
866       } else if (left != 1 && right != 1) {
867         if (right <= 0) {
868           left = !right;
869         } else {
870           std::int64_t power{1};
871           for (; right > 0; --right) {
872             if ((power * left) / left != power) {
873               *error = Message{token.GetTokenProvenanceRange(opAt),
874                   "overflow in exponentation"_err_en_US};
875               left = 1;
876             }
877             power *= left;
878           }
879           left = power;
880         }
881       }
882       break;
883     case TIMES:
884       if (left != 0 && right != 0 && ((left * right) / left) != right) {
885         *error = Message{token.GetTokenProvenanceRange(opAt),
886             "overflow in multiplication"_err_en_US};
887       }
888       left = left * right;
889       break;
890     case DIVIDE:
891       if (right == 0) {
892         *error = Message{
893             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
894         left = 0;
895       } else {
896         left = left / right;
897       }
898       break;
899     case MODULUS:
900       if (right == 0) {
901         *error = Message{
902             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
903         left = 0;
904       } else {
905         left = left % right;
906       }
907       break;
908     case ADD:
909       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
910         *error = Message{token.GetTokenProvenanceRange(opAt),
911             "overflow in addition"_err_en_US};
912       }
913       left = left + right;
914       break;
915     case SUBTRACT:
916       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
917         *error = Message{token.GetTokenProvenanceRange(opAt),
918             "overflow in subtraction"_err_en_US};
919       }
920       left = left - right;
921       break;
922     case LEFTSHIFT:
923       if (right < 0 || right > 64) {
924         *error = Message{token.GetTokenProvenanceRange(opAt),
925             "bad left shift count"_err_en_US};
926       }
927       left = right >= 64 ? 0 : left << right;
928       break;
929     case RIGHTSHIFT:
930       if (right < 0 || right > 64) {
931         *error = Message{token.GetTokenProvenanceRange(opAt),
932             "bad right shift count"_err_en_US};
933       }
934       left = right >= 64 ? 0 : left >> right;
935       break;
936     case BITAND:
937     case AND: left = left & right; break;
938     case BITXOR: left = left ^ right; break;
939     case BITOR:
940     case OR: left = left | right; break;
941     case LT: left = -(left < right); break;
942     case LE: left = -(left <= right); break;
943     case EQ: left = -(left == right); break;
944     case NE: left = -(left != right); break;
945     case GE: left = -(left >= right); break;
946     case GT: left = -(left > right); break;
947     case EQV: left = -(!left == !right); break;
948     case NEQV: left = -(!left != !right); break;
949     case SELECT:
950       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
951         *error = Message{token.GetTokenProvenanceRange(opAt),
952             "':' required in selection expression"_err_en_US};
953         return 0;
954       } else {
955         ++*atToken;
956         std::int64_t third{
957             ExpressionValue(token, operandPrecedence[op], atToken, error)};
958         left = left != 0 ? right : third;
959       }
960       break;
961     case COMMA: left = right; break;
962     default: CRASH_NO_CASE;
963     }
964   }
965   return left;
966 }
967 
968 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
969     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
970   TokenSequence expr1{expr, first, exprTokens};
971   if (expr1.HasBlanks()) {
972     expr1.RemoveBlanks();
973   }
974   TokenSequence expr2;
975   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
976     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
977       CharBlock name;
978       if (j + 3 < expr1.SizeInTokens() &&
979           expr1.TokenAt(j + 1).ToString() == "(" &&
980           expr1.TokenAt(j + 3).ToString() == ")") {
981         name = expr1.TokenAt(j + 2);
982         j += 3;
983       } else if (j + 1 < expr1.SizeInTokens() &&
984           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
985         name = expr1.TokenAt(++j);
986       }
987       if (!name.empty()) {
988         char truth{IsNameDefined(name) ? '1' : '0'};
989         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
990         continue;
991       }
992     }
993     expr2.Put(expr1, j);
994   }
995   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
996   if (expr3.HasBlanks()) {
997     expr3.RemoveBlanks();
998   }
999   if (expr3.empty()) {
1000     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1001     return false;
1002   }
1003   std::size_t atToken{0};
1004   std::optional<Message> error;
1005   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1006   if (error) {
1007     prescanner->Say(std::move(*error));
1008   } else if (atToken < expr3.SizeInTokens() &&
1009       expr3.TokenAt(atToken).ToString() != "!") {
1010     prescanner->Say(expr3.GetIntervalProvenanceRange(
1011                         atToken, expr3.SizeInTokens() - atToken),
1012         atToken == 0 ? "could not parse any expression"_err_en_US
1013                      : "excess characters after expression"_err_en_US);
1014   }
1015   return result;
1016 }
1017 }
1018