1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
77   for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
78     if (!tokens.TokenAt(j - 1).IsBlank()) {
79       return j;
80     }
81   }
82   return 0;
83 }
84 
85 static TokenSequence Stringify(
86     const TokenSequence &tokens, AllSources &allSources) {
87   TokenSequence result;
88   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
89   result.PutNextTokenChar('"', quoteProvenance);
90   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
91     const CharBlock &token{tokens.TokenAt(j)};
92     std::size_t bytes{token.size()};
93     for (std::size_t k{0}; k < bytes; ++k) {
94       char ch{token[k]};
95       Provenance from{tokens.GetTokenProvenance(j, k)};
96       if (ch == '"' || ch == '\\') {
97         result.PutNextTokenChar(ch, from);
98       }
99       result.PutNextTokenChar(ch, from);
100     }
101   }
102   result.PutNextTokenChar('"', quoteProvenance);
103   result.CloseToken();
104   return result;
105 }
106 
107 TokenSequence Definition::Apply(
108     const std::vector<TokenSequence> &args, AllSources &allSources) {
109   TokenSequence result;
110   bool pasting{false};
111   bool skipping{false};
112   int parenthesesNesting{0};
113   std::size_t tokens{replacement_.SizeInTokens()};
114   for (std::size_t j{0}; j < tokens; ++j) {
115     const CharBlock &token{replacement_.TokenAt(j)};
116     std::size_t bytes{token.size()};
117     if (skipping) {
118       if (bytes == 1) {
119         if (token[0] == '(') {
120           ++parenthesesNesting;
121         } else if (token[0] == ')') {
122           skipping = --parenthesesNesting > 0;
123         }
124       }
125       continue;
126     }
127     if (bytes == 2 && token[0] == '~') {
128       std::size_t index = token[1] - 'A';
129       if (index >= args.size()) {
130         continue;
131       }
132       std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
133       if (afterLastNonBlank > 0 &&
134           result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
135         // stringifying
136         while (result.SizeInTokens() >= afterLastNonBlank) {
137           result.pop_back();
138         }
139         result.Put(Stringify(args[index], allSources));
140       } else {
141         std::size_t argTokens{args[index].SizeInTokens()};
142         for (std::size_t k{0}; k < argTokens; ++k) {
143           if (!pasting || !args[index].TokenAt(k).IsBlank()) {
144             result.Put(args[index], k);
145             pasting = false;
146           }
147         }
148       }
149     } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
150       // Token pasting operator in body (not expanded argument); discard any
151       // immediately preceding white space, then reopen the last token.
152       while (!result.empty() &&
153           result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
154         result.pop_back();
155       }
156       if (!result.empty()) {
157         result.ReopenLastToken();
158         pasting = true;
159       }
160     } else if (pasting && token.IsBlank()) {
161       // Delete whitespace immediately following ## in the body.
162     } else if (bytes == 11 && isVariadic_ &&
163         token.ToString() == "__VA_ARGS__") {
164       Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
165       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
166         if (k > argumentCount_) {
167           result.Put(","s, commaProvenance);
168         }
169         result.Put(args[k]);
170       }
171     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
172         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
173         parenthesesNesting == 0) {
174       parenthesesNesting = 1;
175       skipping = args.size() == argumentCount_;
176       ++j;
177     } else {
178       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
179         ++parenthesesNesting;
180       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
181         if (--parenthesesNesting == 0) {
182           skipping = false;
183           continue;
184         }
185       }
186       result.Put(replacement_, j);
187     }
188   }
189   return result;
190 }
191 
192 static std::string FormatTime(const std::time_t &now, const char *format) {
193   char buffer[16];
194   return {buffer,
195       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
196 }
197 
198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
199   // Capture current local date & time once now to avoid having the values
200   // of __DATE__ or __TIME__ change during compilation.
201   std::time_t now;
202   std::time(&now);
203   definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
204       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
205   definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
206       Definition{FormatTime(now, "\"%T\""), allSources});
207   // The values of these predefined macros depend on their invocation sites.
208   definitions_.emplace(
209       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
210   definitions_.emplace(
211       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
212 }
213 
214 void Preprocessor::Define(std::string macro, std::string value) {
215   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
216 }
217 
218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
219 
220 std::optional<TokenSequence> Preprocessor::MacroReplacement(
221     const TokenSequence &input, const Prescanner &prescanner) {
222   // Do quick scan for any use of a defined name.
223   std::size_t tokens{input.SizeInTokens()};
224   std::size_t j;
225   for (j = 0; j < tokens; ++j) {
226     CharBlock token{input.TokenAt(j)};
227     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
228         IsNameDefined(token)) {
229       break;
230     }
231   }
232   if (j == tokens) {
233     return std::nullopt; // input contains nothing that would be replaced
234   }
235   TokenSequence result{input, 0, j};
236   for (; j < tokens; ++j) {
237     const CharBlock &token{input.TokenAt(j)};
238     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
239       result.Put(input, j);
240       continue;
241     }
242     auto it{definitions_.find(token)};
243     if (it == definitions_.end()) {
244       result.Put(input, j);
245       continue;
246     }
247     Definition &def{it->second};
248     if (def.isDisabled()) {
249       result.Put(input, j);
250       continue;
251     }
252     if (!def.isFunctionLike()) {
253       if (def.isPredefined()) {
254         std::string name{def.replacement().TokenAt(0).ToString()};
255         std::string repl;
256         if (name == "__FILE__") {
257           repl = "\""s +
258               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
259         } else if (name == "__LINE__") {
260           std::string buf;
261           llvm::raw_string_ostream ss{buf};
262           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
263           repl = ss.str();
264         }
265         if (!repl.empty()) {
266           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
267           ProvenanceRange call{allSources_.AddMacroCall(
268               insert, input.GetTokenProvenanceRange(j), repl)};
269           result.Put(repl, call.start());
270           continue;
271         }
272       }
273       def.set_isDisabled(true);
274       TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
275       def.set_isDisabled(false);
276       if (!replaced.empty()) {
277         ProvenanceRange from{def.replacement().GetProvenanceRange()};
278         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
279         ProvenanceRange newRange{
280             allSources_.AddMacroCall(from, use, replaced.ToString())};
281         result.Put(replaced, newRange);
282       }
283       continue;
284     }
285     // Possible function-like macro call.  Skip spaces and newlines to see
286     // whether '(' is next.
287     std::size_t k{j};
288     bool leftParen{false};
289     while (++k < tokens) {
290       const CharBlock &lookAhead{input.TokenAt(k)};
291       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
292         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
293         break;
294       }
295     }
296     if (!leftParen) {
297       result.Put(input, j);
298       continue;
299     }
300     std::vector<std::size_t> argStart{++k};
301     for (int nesting{0}; k < tokens; ++k) {
302       CharBlock token{input.TokenAt(k)};
303       if (token.size() == 1) {
304         char ch{token[0]};
305         if (ch == '(') {
306           ++nesting;
307         } else if (ch == ')') {
308           if (nesting == 0) {
309             break;
310           }
311           --nesting;
312         } else if (ch == ',' && nesting == 0) {
313           argStart.push_back(k + 1);
314         }
315       }
316     }
317     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
318       // Subtle: () is zero arguments, not one empty argument,
319       // unless one argument was expected.
320       argStart.clear();
321     }
322     if (k >= tokens || argStart.size() < def.argumentCount() ||
323         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
324       result.Put(input, j);
325       continue;
326     }
327     std::vector<TokenSequence> args;
328     for (std::size_t n{0}; n < argStart.size(); ++n) {
329       std::size_t at{argStart[n]};
330       std::size_t count{
331           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
332       args.emplace_back(TokenSequence(input, at, count));
333     }
334     def.set_isDisabled(true);
335     TokenSequence replaced{
336         ReplaceMacros(def.Apply(args, allSources_), prescanner)};
337     def.set_isDisabled(false);
338     if (!replaced.empty()) {
339       ProvenanceRange from{def.replacement().GetProvenanceRange()};
340       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
341       ProvenanceRange newRange{
342           allSources_.AddMacroCall(from, use, replaced.ToString())};
343       result.Put(replaced, newRange);
344     }
345     j = k; // advance to the terminal ')'
346   }
347   return result;
348 }
349 
350 TokenSequence Preprocessor::ReplaceMacros(
351     const TokenSequence &tokens, const Prescanner &prescanner) {
352   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
353     return std::move(*repl);
354   }
355   return tokens;
356 }
357 
358 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
359   std::size_t tokens{dir.SizeInTokens()};
360   std::size_t j{dir.SkipBlanks(0)};
361   if (j == tokens) {
362     return;
363   }
364   if (dir.TokenAt(j).ToString() != "#") {
365     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
366     return;
367   }
368   j = dir.SkipBlanks(j + 1);
369   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
370     --tokens;
371   }
372   if (j == tokens) {
373     return;
374   }
375   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
376     return; // treat like #line, ignore it
377   }
378   std::size_t dirOffset{j};
379   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
380   j = dir.SkipBlanks(j + 1);
381   CharBlock nameToken;
382   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
383     nameToken = dir.TokenAt(j);
384   }
385   if (dirName == "line") {
386     // #line is ignored
387   } else if (dirName == "define") {
388     if (nameToken.empty()) {
389       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
390           "#define: missing or invalid name"_err_en_US);
391       return;
392     }
393     nameToken = SaveTokenAsName(nameToken);
394     definitions_.erase(nameToken);
395     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
396         dir.TokenAt(j)[0] == '(') {
397       j = dir.SkipBlanks(j + 1);
398       std::vector<std::string> argName;
399       bool isVariadic{false};
400       if (dir.TokenAt(j).ToString() != ")") {
401         while (true) {
402           std::string an{dir.TokenAt(j).ToString()};
403           if (an == "...") {
404             isVariadic = true;
405           } else {
406             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
407               prescanner->Say(dir.GetTokenProvenanceRange(j),
408                   "#define: missing or invalid argument name"_err_en_US);
409               return;
410             }
411             argName.push_back(an);
412           }
413           j = dir.SkipBlanks(j + 1);
414           if (j == tokens) {
415             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
416                 "#define: malformed argument list"_err_en_US);
417             return;
418           }
419           std::string punc{dir.TokenAt(j).ToString()};
420           if (punc == ")") {
421             break;
422           }
423           if (isVariadic || punc != ",") {
424             prescanner->Say(dir.GetTokenProvenanceRange(j),
425                 "#define: malformed argument list"_err_en_US);
426             return;
427           }
428           j = dir.SkipBlanks(j + 1);
429           if (j == tokens) {
430             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
431                 "#define: malformed argument list"_err_en_US);
432             return;
433           }
434         }
435         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
436             argName.size()) {
437           prescanner->Say(dir.GetTokenProvenance(dirOffset),
438               "#define: argument names are not distinct"_err_en_US);
439           return;
440         }
441       }
442       j = dir.SkipBlanks(j + 1);
443       definitions_.emplace(std::make_pair(
444           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
445     } else {
446       j = dir.SkipBlanks(j + 1);
447       definitions_.emplace(
448           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
449     }
450   } else if (dirName == "undef") {
451     if (nameToken.empty()) {
452       prescanner->Say(
453           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
454           "# missing or invalid name"_err_en_US);
455     } else {
456       j = dir.SkipBlanks(j + 1);
457       if (j != tokens) {
458         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
459             "#undef: excess tokens at end of directive"_err_en_US);
460       } else {
461         definitions_.erase(nameToken);
462       }
463     }
464   } else if (dirName == "ifdef" || dirName == "ifndef") {
465     bool doThen{false};
466     if (nameToken.empty()) {
467       prescanner->Say(
468           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
469           "#%s: missing name"_err_en_US, dirName);
470     } else {
471       j = dir.SkipBlanks(j + 1);
472       if (j != tokens) {
473         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
474             "#%s: excess tokens at end of directive"_en_US, dirName);
475       }
476       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
477     }
478     if (doThen) {
479       ifStack_.push(CanDeadElseAppear::Yes);
480     } else {
481       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
482           dir.GetTokenProvenance(dirOffset));
483     }
484   } else if (dirName == "if") {
485     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
486       ifStack_.push(CanDeadElseAppear::Yes);
487     } else {
488       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
489           dir.GetTokenProvenanceRange(dirOffset));
490     }
491   } else if (dirName == "else") {
492     if (j != tokens) {
493       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
494           "#else: excess tokens at end of directive"_err_en_US);
495     } else if (ifStack_.empty()) {
496       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
497           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
498     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
499       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
500           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
501     } else {
502       ifStack_.pop();
503       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
504           dir.GetTokenProvenanceRange(dirOffset));
505     }
506   } else if (dirName == "elif") {
507     if (ifStack_.empty()) {
508       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
509           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
510     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
511       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
512           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
513     } else {
514       ifStack_.pop();
515       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
516           dir.GetTokenProvenanceRange(dirOffset));
517     }
518   } else if (dirName == "endif") {
519     if (j != tokens) {
520       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
521           "#endif: excess tokens at end of directive"_err_en_US);
522     } else if (ifStack_.empty()) {
523       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
524           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
525     } else {
526       ifStack_.pop();
527     }
528   } else if (dirName == "error") {
529     prescanner->Say(
530         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
531         "%s"_err_en_US, dir.ToString());
532   } else if (dirName == "warning" || dirName == "comment" ||
533       dirName == "note") {
534     prescanner->Say(
535         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
536         "%s"_en_US, dir.ToString());
537   } else if (dirName == "include") {
538     if (j == tokens) {
539       prescanner->Say(
540           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
541           "#include: missing name of file to include"_err_en_US);
542       return;
543     }
544     std::string include;
545     if (dir.TokenAt(j).ToString() == "<") {
546       std::size_t k{j + 1};
547       if (k >= tokens) {
548         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
549             "#include: file name missing"_err_en_US);
550         return;
551       }
552       while (k < tokens && dir.TokenAt(k) != ">") {
553         ++k;
554       }
555       if (k >= tokens) {
556         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
557             "#include: expected '>' at end of included file"_en_US);
558       } else if (k + 1 < tokens) {
559         prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
560             "#include: extra stuff ignored after '>'"_en_US);
561       }
562       TokenSequence braced{dir, j + 1, k - j - 1};
563       include = ReplaceMacros(braced, *prescanner).ToString();
564     } else if (j + 1 == tokens &&
565         (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
566         include.substr(include.size() - 1, 1) == "\"") {
567       include = include.substr(1, include.size() - 2);
568     } else {
569       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
570           "#include: expected name of file to include"_err_en_US);
571       return;
572     }
573     if (include.empty()) {
574       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
575           "#include: empty include file name"_err_en_US);
576       return;
577     }
578     std::string buf;
579     llvm::raw_string_ostream error{buf};
580     const SourceFile *included{allSources_.Open(include, error)};
581     if (!included) {
582       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
583           "#include: %s"_err_en_US, error.str());
584     } else if (included->bytes() > 0) {
585       ProvenanceRange fileRange{
586           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
587       Prescanner{*prescanner}
588           .set_encoding(included->encoding())
589           .Prescan(fileRange);
590     }
591   } else {
592     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
593         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
594   }
595 }
596 
597 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
598   names_.push_back(t.ToString());
599   return {names_.back().data(), names_.back().size()};
600 }
601 
602 bool Preprocessor::IsNameDefined(const CharBlock &token) {
603   return definitions_.find(token) != definitions_.end();
604 }
605 
606 static std::string GetDirectiveName(
607     const TokenSequence &line, std::size_t *rest) {
608   std::size_t tokens{line.SizeInTokens()};
609   std::size_t j{line.SkipBlanks(0)};
610   if (j == tokens || line.TokenAt(j).ToString() != "#") {
611     *rest = tokens;
612     return "";
613   }
614   j = line.SkipBlanks(j + 1);
615   if (j == tokens) {
616     *rest = tokens;
617     return "";
618   }
619   *rest = line.SkipBlanks(j + 1);
620   return ToLowerCaseLetters(line.TokenAt(j).ToString());
621 }
622 
623 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
624     IsElseActive isElseActive, Prescanner *prescanner,
625     ProvenanceRange provenanceRange) {
626   int nesting{0};
627   while (!prescanner->IsAtEnd()) {
628     if (!prescanner->IsNextLinePreprocessorDirective()) {
629       prescanner->NextLine();
630       continue;
631     }
632     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
633     std::size_t rest{0};
634     std::string dn{GetDirectiveName(line, &rest)};
635     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
636       ++nesting;
637     } else if (dn == "endif") {
638       if (nesting-- == 0) {
639         return;
640       }
641     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
642       if (dn == "else") {
643         ifStack_.push(CanDeadElseAppear::No);
644         return;
645       }
646       if (dn == "elif" &&
647           IsIfPredicateTrue(
648               line, rest, line.SizeInTokens() - rest, prescanner)) {
649         ifStack_.push(CanDeadElseAppear::Yes);
650         return;
651       }
652     }
653   }
654   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
655 }
656 
657 // Precedence level codes used here to accommodate mixed Fortran and C:
658 // 15: parentheses and constants, logical !, bitwise ~
659 // 14: unary + and -
660 // 13: **
661 // 12: *, /, % (modulus)
662 // 11: + and -
663 // 10: << and >>
664 //  9: bitwise &
665 //  8: bitwise ^
666 //  7: bitwise |
667 //  6: relations (.EQ., ==, &c.)
668 //  5: .NOT.
669 //  4: .AND., &&
670 //  3: .OR., ||
671 //  2: .EQV. and .NEQV. / .XOR.
672 //  1: ? :
673 //  0: ,
674 static std::int64_t ExpressionValue(const TokenSequence &token,
675     int minimumPrecedence, std::size_t *atToken,
676     std::optional<Message> *error) {
677   enum Operator {
678     PARENS,
679     CONST,
680     NOTZERO, // !
681     COMPLEMENT, // ~
682     UPLUS,
683     UMINUS,
684     POWER,
685     TIMES,
686     DIVIDE,
687     MODULUS,
688     ADD,
689     SUBTRACT,
690     LEFTSHIFT,
691     RIGHTSHIFT,
692     BITAND,
693     BITXOR,
694     BITOR,
695     LT,
696     LE,
697     EQ,
698     NE,
699     GE,
700     GT,
701     NOT,
702     AND,
703     OR,
704     EQV,
705     NEQV,
706     SELECT,
707     COMMA
708   };
709   static const int precedence[]{
710       15, 15, 15, 15, // (), 6, !, ~
711       14, 14, // unary +, -
712       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
713       9, 8, 7, // &, ^, |
714       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
715       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
716       1, 0 // ?: and ,
717   };
718   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
719       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
720 
721   static std::map<std::string, enum Operator> opNameMap;
722   if (opNameMap.empty()) {
723     opNameMap["("] = PARENS;
724     opNameMap["!"] = NOTZERO;
725     opNameMap["~"] = COMPLEMENT;
726     opNameMap["**"] = POWER;
727     opNameMap["*"] = TIMES;
728     opNameMap["/"] = DIVIDE;
729     opNameMap["%"] = MODULUS;
730     opNameMap["+"] = ADD;
731     opNameMap["-"] = SUBTRACT;
732     opNameMap["<<"] = LEFTSHIFT;
733     opNameMap[">>"] = RIGHTSHIFT;
734     opNameMap["&"] = BITAND;
735     opNameMap["^"] = BITXOR;
736     opNameMap["|"] = BITOR;
737     opNameMap[".lt."] = opNameMap["<"] = LT;
738     opNameMap[".le."] = opNameMap["<="] = LE;
739     opNameMap[".eq."] = opNameMap["=="] = EQ;
740     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
741     opNameMap[".ge."] = opNameMap[">="] = GE;
742     opNameMap[".gt."] = opNameMap[">"] = GT;
743     opNameMap[".not."] = NOT;
744     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
745     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
746     opNameMap[".eqv."] = EQV;
747     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
748     opNameMap["?"] = SELECT;
749     opNameMap[","] = COMMA;
750   }
751 
752   std::size_t tokens{token.SizeInTokens()};
753   CHECK(tokens > 0);
754   if (*atToken >= tokens) {
755     *error =
756         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
757     return 0;
758   }
759 
760   // Parse and evaluate a primary or a unary operator and its operand.
761   std::size_t opAt{*atToken};
762   std::string t{token.TokenAt(opAt).ToString()};
763   enum Operator op;
764   std::int64_t left{0};
765   if (t == "(") {
766     op = PARENS;
767   } else if (IsDecimalDigit(t[0])) {
768     op = CONST;
769     std::size_t consumed{0};
770     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
771     if (consumed < t.size()) {
772       *error = Message{token.GetTokenProvenanceRange(opAt),
773           "Uninterpretable numeric constant '%s'"_err_en_US, t};
774       return 0;
775     }
776   } else if (IsLegalIdentifierStart(t[0])) {
777     // undefined macro name -> zero
778     // TODO: BOZ constants?
779     op = CONST;
780   } else if (t == "+") {
781     op = UPLUS;
782   } else if (t == "-") {
783     op = UMINUS;
784   } else if (t == "." && *atToken + 2 < tokens &&
785       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
786       token.TokenAt(*atToken + 2).ToString() == ".") {
787     op = NOT;
788     *atToken += 2;
789   } else {
790     auto it{opNameMap.find(t)};
791     if (it != opNameMap.end()) {
792       op = it->second;
793     } else {
794       *error = Message{token.GetTokenProvenanceRange(opAt),
795           "operand expected in expression"_err_en_US};
796       return 0;
797     }
798   }
799   if (precedence[op] < minimumPrecedence) {
800     *error = Message{token.GetTokenProvenanceRange(opAt),
801         "operator precedence error"_err_en_US};
802     return 0;
803   }
804   ++*atToken;
805   if (op != CONST) {
806     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
807     if (*error) {
808       return 0;
809     }
810     switch (op) {
811     case PARENS:
812       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
813         ++*atToken;
814         break;
815       }
816       if (*atToken >= tokens) {
817         *error = Message{token.GetProvenanceRange(),
818             "')' missing from expression"_err_en_US};
819       } else {
820         *error = Message{
821             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
822       }
823       return 0;
824     case NOTZERO:
825       left = !left;
826       break;
827     case COMPLEMENT:
828       left = ~left;
829       break;
830     case UPLUS:
831       break;
832     case UMINUS:
833       left = -left;
834       break;
835     case NOT:
836       left = -!left;
837       break;
838     default:
839       CRASH_NO_CASE;
840     }
841   }
842 
843   // Parse and evaluate binary operators and their second operands, if present.
844   while (*atToken < tokens) {
845     int advance{1};
846     t = token.TokenAt(*atToken).ToString();
847     if (t == "." && *atToken + 2 < tokens &&
848         token.TokenAt(*atToken + 2).ToString() == ".") {
849       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
850       advance = 3;
851     }
852     auto it{opNameMap.find(t)};
853     if (it == opNameMap.end()) {
854       break;
855     }
856     op = it->second;
857     if (op < POWER || precedence[op] < minimumPrecedence) {
858       break;
859     }
860     opAt = *atToken;
861     *atToken += advance;
862 
863     std::int64_t right{
864         ExpressionValue(token, operandPrecedence[op], atToken, error)};
865     if (*error) {
866       return 0;
867     }
868 
869     switch (op) {
870     case POWER:
871       if (left == 0) {
872         if (right < 0) {
873           *error = Message{token.GetTokenProvenanceRange(opAt),
874               "0 ** negative power"_err_en_US};
875         }
876       } else if (left != 1 && right != 1) {
877         if (right <= 0) {
878           left = !right;
879         } else {
880           std::int64_t power{1};
881           for (; right > 0; --right) {
882             if ((power * left) / left != power) {
883               *error = Message{token.GetTokenProvenanceRange(opAt),
884                   "overflow in exponentation"_err_en_US};
885               left = 1;
886             }
887             power *= left;
888           }
889           left = power;
890         }
891       }
892       break;
893     case TIMES:
894       if (left != 0 && right != 0 && ((left * right) / left) != right) {
895         *error = Message{token.GetTokenProvenanceRange(opAt),
896             "overflow in multiplication"_err_en_US};
897       }
898       left = left * right;
899       break;
900     case DIVIDE:
901       if (right == 0) {
902         *error = Message{
903             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
904         left = 0;
905       } else {
906         left = left / right;
907       }
908       break;
909     case MODULUS:
910       if (right == 0) {
911         *error = Message{
912             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
913         left = 0;
914       } else {
915         left = left % right;
916       }
917       break;
918     case ADD:
919       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
920         *error = Message{token.GetTokenProvenanceRange(opAt),
921             "overflow in addition"_err_en_US};
922       }
923       left = left + right;
924       break;
925     case SUBTRACT:
926       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
927         *error = Message{token.GetTokenProvenanceRange(opAt),
928             "overflow in subtraction"_err_en_US};
929       }
930       left = left - right;
931       break;
932     case LEFTSHIFT:
933       if (right < 0 || right > 64) {
934         *error = Message{token.GetTokenProvenanceRange(opAt),
935             "bad left shift count"_err_en_US};
936       }
937       left = right >= 64 ? 0 : left << right;
938       break;
939     case RIGHTSHIFT:
940       if (right < 0 || right > 64) {
941         *error = Message{token.GetTokenProvenanceRange(opAt),
942             "bad right shift count"_err_en_US};
943       }
944       left = right >= 64 ? 0 : left >> right;
945       break;
946     case BITAND:
947     case AND:
948       left = left & right;
949       break;
950     case BITXOR:
951       left = left ^ right;
952       break;
953     case BITOR:
954     case OR:
955       left = left | right;
956       break;
957     case LT:
958       left = -(left < right);
959       break;
960     case LE:
961       left = -(left <= right);
962       break;
963     case EQ:
964       left = -(left == right);
965       break;
966     case NE:
967       left = -(left != right);
968       break;
969     case GE:
970       left = -(left >= right);
971       break;
972     case GT:
973       left = -(left > right);
974       break;
975     case EQV:
976       left = -(!left == !right);
977       break;
978     case NEQV:
979       left = -(!left != !right);
980       break;
981     case SELECT:
982       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
983         *error = Message{token.GetTokenProvenanceRange(opAt),
984             "':' required in selection expression"_err_en_US};
985         return 0;
986       } else {
987         ++*atToken;
988         std::int64_t third{
989             ExpressionValue(token, operandPrecedence[op], atToken, error)};
990         left = left != 0 ? right : third;
991       }
992       break;
993     case COMMA:
994       left = right;
995       break;
996     default:
997       CRASH_NO_CASE;
998     }
999   }
1000   return left;
1001 }
1002 
1003 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1004     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1005   TokenSequence expr1{expr, first, exprTokens};
1006   if (expr1.HasBlanks()) {
1007     expr1.RemoveBlanks();
1008   }
1009   TokenSequence expr2;
1010   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1011     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1012       CharBlock name;
1013       if (j + 3 < expr1.SizeInTokens() &&
1014           expr1.TokenAt(j + 1).ToString() == "(" &&
1015           expr1.TokenAt(j + 3).ToString() == ")") {
1016         name = expr1.TokenAt(j + 2);
1017         j += 3;
1018       } else if (j + 1 < expr1.SizeInTokens() &&
1019           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1020         name = expr1.TokenAt(++j);
1021       }
1022       if (!name.empty()) {
1023         char truth{IsNameDefined(name) ? '1' : '0'};
1024         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1025         continue;
1026       }
1027     }
1028     expr2.Put(expr1, j);
1029   }
1030   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1031   if (expr3.HasBlanks()) {
1032     expr3.RemoveBlanks();
1033   }
1034   if (expr3.empty()) {
1035     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1036     return false;
1037   }
1038   std::size_t atToken{0};
1039   std::optional<Message> error;
1040   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1041   if (error) {
1042     prescanner->Say(std::move(*error));
1043   } else if (atToken < expr3.SizeInTokens() &&
1044       expr3.TokenAt(atToken).ToString() != "!") {
1045     prescanner->Say(expr3.GetIntervalProvenanceRange(
1046                         atToken, expr3.SizeInTokens() - atToken),
1047         atToken == 0 ? "could not parse any expression"_err_en_US
1048                      : "excess characters after expression"_err_en_US);
1049   }
1050   return result;
1051 }
1052 } // namespace Fortran::parser
1053