1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
77   for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
78     if (!tokens.TokenAt(j - 1).IsBlank()) {
79       return j;
80     }
81   }
82   return 0;
83 }
84 
85 static TokenSequence Stringify(
86     const TokenSequence &tokens, AllSources &allSources) {
87   TokenSequence result;
88   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
89   result.PutNextTokenChar('"', quoteProvenance);
90   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
91     const CharBlock &token{tokens.TokenAt(j)};
92     std::size_t bytes{token.size()};
93     for (std::size_t k{0}; k < bytes; ++k) {
94       char ch{token[k]};
95       Provenance from{tokens.GetTokenProvenance(j, k)};
96       if (ch == '"' || ch == '\\') {
97         result.PutNextTokenChar(ch, from);
98       }
99       result.PutNextTokenChar(ch, from);
100     }
101   }
102   result.PutNextTokenChar('"', quoteProvenance);
103   result.CloseToken();
104   return result;
105 }
106 
107 TokenSequence Definition::Apply(
108     const std::vector<TokenSequence> &args, AllSources &allSources) {
109   TokenSequence result;
110   bool pasting{false};
111   bool skipping{false};
112   int parenthesesNesting{0};
113   std::size_t tokens{replacement_.SizeInTokens()};
114   for (std::size_t j{0}; j < tokens; ++j) {
115     const CharBlock &token{replacement_.TokenAt(j)};
116     std::size_t bytes{token.size()};
117     if (skipping) {
118       if (bytes == 1) {
119         if (token[0] == '(') {
120           ++parenthesesNesting;
121         } else if (token[0] == ')') {
122           skipping = --parenthesesNesting > 0;
123         }
124       }
125       continue;
126     }
127     if (bytes == 2 && token[0] == '~') {
128       std::size_t index = token[1] - 'A';
129       if (index >= args.size()) {
130         continue;
131       }
132       std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
133       if (afterLastNonBlank > 0 &&
134           result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
135         // stringifying
136         while (result.SizeInTokens() >= afterLastNonBlank) {
137           result.pop_back();
138         }
139         result.Put(Stringify(args[index], allSources));
140       } else {
141         std::size_t argTokens{args[index].SizeInTokens()};
142         for (std::size_t k{0}; k < argTokens; ++k) {
143           if (!pasting || !args[index].TokenAt(k).IsBlank()) {
144             result.Put(args[index], k);
145             pasting = false;
146           }
147         }
148       }
149     } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
150       // Token pasting operator in body (not expanded argument); discard any
151       // immediately preceding white space, then reopen the last token.
152       while (!result.empty() &&
153           result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
154         result.pop_back();
155       }
156       if (!result.empty()) {
157         result.ReopenLastToken();
158         pasting = true;
159       }
160     } else if (pasting && token.IsBlank()) {
161       // Delete whitespace immediately following ## in the body.
162     } else if (bytes == 11 && isVariadic_ &&
163         token.ToString() == "__VA_ARGS__") {
164       Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
165       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
166         if (k > argumentCount_) {
167           result.Put(","s, commaProvenance);
168         }
169         result.Put(args[k]);
170       }
171     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
172         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
173         parenthesesNesting == 0) {
174       parenthesesNesting = 1;
175       skipping = args.size() == argumentCount_;
176       ++j;
177     } else {
178       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
179         ++parenthesesNesting;
180       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
181         if (--parenthesesNesting == 0) {
182           skipping = false;
183           continue;
184         }
185       }
186       result.Put(replacement_, j);
187     }
188   }
189   return result;
190 }
191 
192 static std::string FormatTime(const std::time_t &now, const char *format) {
193   char buffer[16];
194   return {buffer,
195       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
196 }
197 
198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
199   // Capture current local date & time once now to avoid having the values
200   // of __DATE__ or __TIME__ change during compilation.
201   std::time_t now;
202   std::time(&now);
203   definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
204       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
205   definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
206       Definition{FormatTime(now, "\"%T\""), allSources});
207   // The values of these predefined macros depend on their invocation sites.
208   definitions_.emplace(
209       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
210   definitions_.emplace(
211       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
212 }
213 
214 void Preprocessor::Define(std::string macro, std::string value) {
215   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
216 }
217 
218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
219 
220 std::optional<TokenSequence> Preprocessor::MacroReplacement(
221     const TokenSequence &input, const Prescanner &prescanner) {
222   // Do quick scan for any use of a defined name.
223   std::size_t tokens{input.SizeInTokens()};
224   std::size_t j;
225   for (j = 0; j < tokens; ++j) {
226     CharBlock token{input.TokenAt(j)};
227     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
228         IsNameDefined(token)) {
229       break;
230     }
231   }
232   if (j == tokens) {
233     return std::nullopt; // input contains nothing that would be replaced
234   }
235   TokenSequence result{input, 0, j};
236   for (; j < tokens; ++j) {
237     const CharBlock &token{input.TokenAt(j)};
238     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
239       result.Put(input, j);
240       continue;
241     }
242     auto it{definitions_.find(token)};
243     if (it == definitions_.end()) {
244       result.Put(input, j);
245       continue;
246     }
247     Definition &def{it->second};
248     if (def.isDisabled()) {
249       result.Put(input, j);
250       continue;
251     }
252     if (!def.isFunctionLike()) {
253       if (def.isPredefined()) {
254         std::string name{def.replacement().TokenAt(0).ToString()};
255         std::string repl;
256         if (name == "__FILE__") {
257           repl = "\""s +
258               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
259         } else if (name == "__LINE__") {
260           std::string buf;
261           llvm::raw_string_ostream ss{buf};
262           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
263           repl = ss.str();
264         }
265         if (!repl.empty()) {
266           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
267           ProvenanceRange call{allSources_.AddMacroCall(
268               insert, input.GetTokenProvenanceRange(j), repl)};
269           result.Put(repl, call.start());
270           continue;
271         }
272       }
273       def.set_isDisabled(true);
274       TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
275       def.set_isDisabled(false);
276       if (!replaced.empty()) {
277         ProvenanceRange from{def.replacement().GetProvenanceRange()};
278         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
279         ProvenanceRange newRange{
280             allSources_.AddMacroCall(from, use, replaced.ToString())};
281         result.Put(replaced, newRange);
282       }
283       continue;
284     }
285     // Possible function-like macro call.  Skip spaces and newlines to see
286     // whether '(' is next.
287     std::size_t k{j};
288     bool leftParen{false};
289     while (++k < tokens) {
290       const CharBlock &lookAhead{input.TokenAt(k)};
291       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
292         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
293         break;
294       }
295     }
296     if (!leftParen) {
297       result.Put(input, j);
298       continue;
299     }
300     std::vector<std::size_t> argStart{++k};
301     for (int nesting{0}; k < tokens; ++k) {
302       CharBlock token{input.TokenAt(k)};
303       if (token.size() == 1) {
304         char ch{token[0]};
305         if (ch == '(') {
306           ++nesting;
307         } else if (ch == ')') {
308           if (nesting == 0) {
309             break;
310           }
311           --nesting;
312         } else if (ch == ',' && nesting == 0) {
313           argStart.push_back(k + 1);
314         }
315       }
316     }
317     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
318       // Subtle: () is zero arguments, not one empty argument,
319       // unless one argument was expected.
320       argStart.clear();
321     }
322     if (k >= tokens || argStart.size() < def.argumentCount() ||
323         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
324       result.Put(input, j);
325       continue;
326     }
327     std::vector<TokenSequence> args;
328     for (std::size_t n{0}; n < argStart.size(); ++n) {
329       std::size_t at{argStart[n]};
330       std::size_t count{
331           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
332       args.emplace_back(TokenSequence(input, at, count));
333     }
334     def.set_isDisabled(true);
335     TokenSequence replaced{
336         ReplaceMacros(def.Apply(args, allSources_), prescanner)};
337     def.set_isDisabled(false);
338     if (!replaced.empty()) {
339       ProvenanceRange from{def.replacement().GetProvenanceRange()};
340       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
341       ProvenanceRange newRange{
342           allSources_.AddMacroCall(from, use, replaced.ToString())};
343       result.Put(replaced, newRange);
344     }
345     j = k; // advance to the terminal ')'
346   }
347   return result;
348 }
349 
350 TokenSequence Preprocessor::ReplaceMacros(
351     const TokenSequence &tokens, const Prescanner &prescanner) {
352   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
353     return std::move(*repl);
354   }
355   return tokens;
356 }
357 
358 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
359   std::size_t tokens{dir.SizeInTokens()};
360   std::size_t j{dir.SkipBlanks(0)};
361   if (j == tokens) {
362     return;
363   }
364   if (dir.TokenAt(j).ToString() != "#") {
365     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
366     return;
367   }
368   j = dir.SkipBlanks(j + 1);
369   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
370     --tokens;
371   }
372   if (j == tokens) {
373     return;
374   }
375   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
376     return; // treat like #line, ignore it
377   }
378   std::size_t dirOffset{j};
379   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
380   j = dir.SkipBlanks(j + 1);
381   CharBlock nameToken;
382   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
383     nameToken = dir.TokenAt(j);
384   }
385   if (dirName == "line") {
386     // #line is ignored
387   } else if (dirName == "define") {
388     if (nameToken.empty()) {
389       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
390           "#define: missing or invalid name"_err_en_US);
391       return;
392     }
393     nameToken = SaveTokenAsName(nameToken);
394     definitions_.erase(nameToken);
395     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
396         dir.TokenAt(j)[0] == '(') {
397       j = dir.SkipBlanks(j + 1);
398       std::vector<std::string> argName;
399       bool isVariadic{false};
400       if (dir.TokenAt(j).ToString() != ")") {
401         while (true) {
402           std::string an{dir.TokenAt(j).ToString()};
403           if (an == "...") {
404             isVariadic = true;
405           } else {
406             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
407               prescanner->Say(dir.GetTokenProvenanceRange(j),
408                   "#define: missing or invalid argument name"_err_en_US);
409               return;
410             }
411             argName.push_back(an);
412           }
413           j = dir.SkipBlanks(j + 1);
414           if (j == tokens) {
415             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
416                 "#define: malformed argument list"_err_en_US);
417             return;
418           }
419           std::string punc{dir.TokenAt(j).ToString()};
420           if (punc == ")") {
421             break;
422           }
423           if (isVariadic || punc != ",") {
424             prescanner->Say(dir.GetTokenProvenanceRange(j),
425                 "#define: malformed argument list"_err_en_US);
426             return;
427           }
428           j = dir.SkipBlanks(j + 1);
429           if (j == tokens) {
430             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
431                 "#define: malformed argument list"_err_en_US);
432             return;
433           }
434         }
435         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
436             argName.size()) {
437           prescanner->Say(dir.GetTokenProvenance(dirOffset),
438               "#define: argument names are not distinct"_err_en_US);
439           return;
440         }
441       }
442       j = dir.SkipBlanks(j + 1);
443       definitions_.emplace(std::make_pair(
444           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
445     } else {
446       j = dir.SkipBlanks(j + 1);
447       definitions_.emplace(
448           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
449     }
450   } else if (dirName == "undef") {
451     if (nameToken.empty()) {
452       prescanner->Say(
453           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
454           "# missing or invalid name"_err_en_US);
455     } else {
456       if (dir.IsAnythingLeft(++j)) {
457         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
458             "#undef: excess tokens at end of directive"_en_US);
459       } else {
460         definitions_.erase(nameToken);
461       }
462     }
463   } else if (dirName == "ifdef" || dirName == "ifndef") {
464     bool doThen{false};
465     if (nameToken.empty()) {
466       prescanner->Say(
467           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
468           "#%s: missing name"_err_en_US, dirName);
469     } else {
470       if (dir.IsAnythingLeft(++j)) {
471         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
472             "#%s: excess tokens at end of directive"_en_US, dirName);
473       }
474       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
475     }
476     if (doThen) {
477       ifStack_.push(CanDeadElseAppear::Yes);
478     } else {
479       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
480           dir.GetTokenProvenance(dirOffset));
481     }
482   } else if (dirName == "if") {
483     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
484       ifStack_.push(CanDeadElseAppear::Yes);
485     } else {
486       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
487           dir.GetTokenProvenanceRange(dirOffset));
488     }
489   } else if (dirName == "else") {
490     if (dir.IsAnythingLeft(j)) {
491       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
492           "#else: excess tokens at end of directive"_en_US);
493     } else if (ifStack_.empty()) {
494       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
495           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
496     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
497       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
498           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
499     } else {
500       ifStack_.pop();
501       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
502           dir.GetTokenProvenanceRange(dirOffset));
503     }
504   } else if (dirName == "elif") {
505     if (ifStack_.empty()) {
506       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
507           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
508     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
509       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
510           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
511     } else {
512       ifStack_.pop();
513       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
514           dir.GetTokenProvenanceRange(dirOffset));
515     }
516   } else if (dirName == "endif") {
517     if (dir.IsAnythingLeft(j)) {
518       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
519           "#endif: excess tokens at end of directive"_en_US);
520     } else if (ifStack_.empty()) {
521       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
522           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
523     } else {
524       ifStack_.pop();
525     }
526   } else if (dirName == "error") {
527     prescanner->Say(
528         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
529         "%s"_err_en_US, dir.ToString());
530   } else if (dirName == "warning" || dirName == "comment" ||
531       dirName == "note") {
532     prescanner->Say(
533         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
534         "%s"_en_US, dir.ToString());
535   } else if (dirName == "include") {
536     if (j == tokens) {
537       prescanner->Say(
538           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
539           "#include: missing name of file to include"_err_en_US);
540       return;
541     }
542     std::string include;
543     if (dir.TokenAt(j).ToString() == "<") {
544       std::size_t k{j + 1};
545       if (k >= tokens) {
546         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
547             "#include: file name missing"_err_en_US);
548         return;
549       }
550       while (k < tokens && dir.TokenAt(k) != ">") {
551         ++k;
552       }
553       if (k >= tokens) {
554         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
555             "#include: expected '>' at end of included file"_en_US);
556       } else if (k + 1 < tokens) {
557         prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
558             "#include: extra stuff ignored after '>'"_en_US);
559       }
560       TokenSequence braced{dir, j + 1, k - j - 1};
561       include = ReplaceMacros(braced, *prescanner).ToString();
562     } else if (j + 1 == tokens &&
563         (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
564         include.substr(include.size() - 1, 1) == "\"") {
565       include = include.substr(1, include.size() - 2);
566     } else {
567       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
568           "#include: expected name of file to include"_err_en_US);
569       return;
570     }
571     if (include.empty()) {
572       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
573           "#include: empty include file name"_err_en_US);
574       return;
575     }
576     std::string buf;
577     llvm::raw_string_ostream error{buf};
578     const SourceFile *included{allSources_.Open(include, error)};
579     if (!included) {
580       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
581           "#include: %s"_err_en_US, error.str());
582     } else if (included->bytes() > 0) {
583       ProvenanceRange fileRange{
584           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
585       Prescanner{*prescanner}
586           .set_encoding(included->encoding())
587           .Prescan(fileRange);
588     }
589   } else {
590     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
591         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
592   }
593 }
594 
595 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
596   names_.push_back(t.ToString());
597   return {names_.back().data(), names_.back().size()};
598 }
599 
600 bool Preprocessor::IsNameDefined(const CharBlock &token) {
601   return definitions_.find(token) != definitions_.end();
602 }
603 
604 static std::string GetDirectiveName(
605     const TokenSequence &line, std::size_t *rest) {
606   std::size_t tokens{line.SizeInTokens()};
607   std::size_t j{line.SkipBlanks(0)};
608   if (j == tokens || line.TokenAt(j).ToString() != "#") {
609     *rest = tokens;
610     return "";
611   }
612   j = line.SkipBlanks(j + 1);
613   if (j == tokens) {
614     *rest = tokens;
615     return "";
616   }
617   *rest = line.SkipBlanks(j + 1);
618   return ToLowerCaseLetters(line.TokenAt(j).ToString());
619 }
620 
621 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
622     IsElseActive isElseActive, Prescanner *prescanner,
623     ProvenanceRange provenanceRange) {
624   int nesting{0};
625   while (!prescanner->IsAtEnd()) {
626     if (!prescanner->IsNextLinePreprocessorDirective()) {
627       prescanner->NextLine();
628       continue;
629     }
630     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
631     std::size_t rest{0};
632     std::string dn{GetDirectiveName(line, &rest)};
633     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
634       ++nesting;
635     } else if (dn == "endif") {
636       if (nesting-- == 0) {
637         return;
638       }
639     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
640       if (dn == "else") {
641         ifStack_.push(CanDeadElseAppear::No);
642         return;
643       }
644       if (dn == "elif" &&
645           IsIfPredicateTrue(
646               line, rest, line.SizeInTokens() - rest, prescanner)) {
647         ifStack_.push(CanDeadElseAppear::Yes);
648         return;
649       }
650     }
651   }
652   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
653 }
654 
655 // Precedence level codes used here to accommodate mixed Fortran and C:
656 // 15: parentheses and constants, logical !, bitwise ~
657 // 14: unary + and -
658 // 13: **
659 // 12: *, /, % (modulus)
660 // 11: + and -
661 // 10: << and >>
662 //  9: bitwise &
663 //  8: bitwise ^
664 //  7: bitwise |
665 //  6: relations (.EQ., ==, &c.)
666 //  5: .NOT.
667 //  4: .AND., &&
668 //  3: .OR., ||
669 //  2: .EQV. and .NEQV. / .XOR.
670 //  1: ? :
671 //  0: ,
672 static std::int64_t ExpressionValue(const TokenSequence &token,
673     int minimumPrecedence, std::size_t *atToken,
674     std::optional<Message> *error) {
675   enum Operator {
676     PARENS,
677     CONST,
678     NOTZERO, // !
679     COMPLEMENT, // ~
680     UPLUS,
681     UMINUS,
682     POWER,
683     TIMES,
684     DIVIDE,
685     MODULUS,
686     ADD,
687     SUBTRACT,
688     LEFTSHIFT,
689     RIGHTSHIFT,
690     BITAND,
691     BITXOR,
692     BITOR,
693     LT,
694     LE,
695     EQ,
696     NE,
697     GE,
698     GT,
699     NOT,
700     AND,
701     OR,
702     EQV,
703     NEQV,
704     SELECT,
705     COMMA
706   };
707   static const int precedence[]{
708       15, 15, 15, 15, // (), 6, !, ~
709       14, 14, // unary +, -
710       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
711       9, 8, 7, // &, ^, |
712       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
713       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
714       1, 0 // ?: and ,
715   };
716   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
717       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
718 
719   static std::map<std::string, enum Operator> opNameMap;
720   if (opNameMap.empty()) {
721     opNameMap["("] = PARENS;
722     opNameMap["!"] = NOTZERO;
723     opNameMap["~"] = COMPLEMENT;
724     opNameMap["**"] = POWER;
725     opNameMap["*"] = TIMES;
726     opNameMap["/"] = DIVIDE;
727     opNameMap["%"] = MODULUS;
728     opNameMap["+"] = ADD;
729     opNameMap["-"] = SUBTRACT;
730     opNameMap["<<"] = LEFTSHIFT;
731     opNameMap[">>"] = RIGHTSHIFT;
732     opNameMap["&"] = BITAND;
733     opNameMap["^"] = BITXOR;
734     opNameMap["|"] = BITOR;
735     opNameMap[".lt."] = opNameMap["<"] = LT;
736     opNameMap[".le."] = opNameMap["<="] = LE;
737     opNameMap[".eq."] = opNameMap["=="] = EQ;
738     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
739     opNameMap[".ge."] = opNameMap[">="] = GE;
740     opNameMap[".gt."] = opNameMap[">"] = GT;
741     opNameMap[".not."] = NOT;
742     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
743     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
744     opNameMap[".eqv."] = EQV;
745     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
746     opNameMap["?"] = SELECT;
747     opNameMap[","] = COMMA;
748   }
749 
750   std::size_t tokens{token.SizeInTokens()};
751   CHECK(tokens > 0);
752   if (*atToken >= tokens) {
753     *error =
754         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
755     return 0;
756   }
757 
758   // Parse and evaluate a primary or a unary operator and its operand.
759   std::size_t opAt{*atToken};
760   std::string t{token.TokenAt(opAt).ToString()};
761   enum Operator op;
762   std::int64_t left{0};
763   if (t == "(") {
764     op = PARENS;
765   } else if (IsDecimalDigit(t[0])) {
766     op = CONST;
767     std::size_t consumed{0};
768     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
769     if (consumed < t.size()) {
770       *error = Message{token.GetTokenProvenanceRange(opAt),
771           "Uninterpretable numeric constant '%s'"_err_en_US, t};
772       return 0;
773     }
774   } else if (IsLegalIdentifierStart(t[0])) {
775     // undefined macro name -> zero
776     // TODO: BOZ constants?
777     op = CONST;
778   } else if (t == "+") {
779     op = UPLUS;
780   } else if (t == "-") {
781     op = UMINUS;
782   } else if (t == "." && *atToken + 2 < tokens &&
783       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
784       token.TokenAt(*atToken + 2).ToString() == ".") {
785     op = NOT;
786     *atToken += 2;
787   } else {
788     auto it{opNameMap.find(t)};
789     if (it != opNameMap.end()) {
790       op = it->second;
791     } else {
792       *error = Message{token.GetTokenProvenanceRange(opAt),
793           "operand expected in expression"_err_en_US};
794       return 0;
795     }
796   }
797   if (precedence[op] < minimumPrecedence) {
798     *error = Message{token.GetTokenProvenanceRange(opAt),
799         "operator precedence error"_err_en_US};
800     return 0;
801   }
802   ++*atToken;
803   if (op != CONST) {
804     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
805     if (*error) {
806       return 0;
807     }
808     switch (op) {
809     case PARENS:
810       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
811         ++*atToken;
812         break;
813       }
814       if (*atToken >= tokens) {
815         *error = Message{token.GetProvenanceRange(),
816             "')' missing from expression"_err_en_US};
817       } else {
818         *error = Message{
819             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
820       }
821       return 0;
822     case NOTZERO:
823       left = !left;
824       break;
825     case COMPLEMENT:
826       left = ~left;
827       break;
828     case UPLUS:
829       break;
830     case UMINUS:
831       left = -left;
832       break;
833     case NOT:
834       left = -!left;
835       break;
836     default:
837       CRASH_NO_CASE;
838     }
839   }
840 
841   // Parse and evaluate binary operators and their second operands, if present.
842   while (*atToken < tokens) {
843     int advance{1};
844     t = token.TokenAt(*atToken).ToString();
845     if (t == "." && *atToken + 2 < tokens &&
846         token.TokenAt(*atToken + 2).ToString() == ".") {
847       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
848       advance = 3;
849     }
850     auto it{opNameMap.find(t)};
851     if (it == opNameMap.end()) {
852       break;
853     }
854     op = it->second;
855     if (op < POWER || precedence[op] < minimumPrecedence) {
856       break;
857     }
858     opAt = *atToken;
859     *atToken += advance;
860 
861     std::int64_t right{
862         ExpressionValue(token, operandPrecedence[op], atToken, error)};
863     if (*error) {
864       return 0;
865     }
866 
867     switch (op) {
868     case POWER:
869       if (left == 0) {
870         if (right < 0) {
871           *error = Message{token.GetTokenProvenanceRange(opAt),
872               "0 ** negative power"_err_en_US};
873         }
874       } else if (left != 1 && right != 1) {
875         if (right <= 0) {
876           left = !right;
877         } else {
878           std::int64_t power{1};
879           for (; right > 0; --right) {
880             if ((power * left) / left != power) {
881               *error = Message{token.GetTokenProvenanceRange(opAt),
882                   "overflow in exponentation"_err_en_US};
883               left = 1;
884             }
885             power *= left;
886           }
887           left = power;
888         }
889       }
890       break;
891     case TIMES:
892       if (left != 0 && right != 0 && ((left * right) / left) != right) {
893         *error = Message{token.GetTokenProvenanceRange(opAt),
894             "overflow in multiplication"_err_en_US};
895       }
896       left = left * right;
897       break;
898     case DIVIDE:
899       if (right == 0) {
900         *error = Message{
901             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
902         left = 0;
903       } else {
904         left = left / right;
905       }
906       break;
907     case MODULUS:
908       if (right == 0) {
909         *error = Message{
910             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
911         left = 0;
912       } else {
913         left = left % right;
914       }
915       break;
916     case ADD:
917       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
918         *error = Message{token.GetTokenProvenanceRange(opAt),
919             "overflow in addition"_err_en_US};
920       }
921       left = left + right;
922       break;
923     case SUBTRACT:
924       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
925         *error = Message{token.GetTokenProvenanceRange(opAt),
926             "overflow in subtraction"_err_en_US};
927       }
928       left = left - right;
929       break;
930     case LEFTSHIFT:
931       if (right < 0 || right > 64) {
932         *error = Message{token.GetTokenProvenanceRange(opAt),
933             "bad left shift count"_err_en_US};
934       }
935       left = right >= 64 ? 0 : left << right;
936       break;
937     case RIGHTSHIFT:
938       if (right < 0 || right > 64) {
939         *error = Message{token.GetTokenProvenanceRange(opAt),
940             "bad right shift count"_err_en_US};
941       }
942       left = right >= 64 ? 0 : left >> right;
943       break;
944     case BITAND:
945     case AND:
946       left = left & right;
947       break;
948     case BITXOR:
949       left = left ^ right;
950       break;
951     case BITOR:
952     case OR:
953       left = left | right;
954       break;
955     case LT:
956       left = -(left < right);
957       break;
958     case LE:
959       left = -(left <= right);
960       break;
961     case EQ:
962       left = -(left == right);
963       break;
964     case NE:
965       left = -(left != right);
966       break;
967     case GE:
968       left = -(left >= right);
969       break;
970     case GT:
971       left = -(left > right);
972       break;
973     case EQV:
974       left = -(!left == !right);
975       break;
976     case NEQV:
977       left = -(!left != !right);
978       break;
979     case SELECT:
980       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
981         *error = Message{token.GetTokenProvenanceRange(opAt),
982             "':' required in selection expression"_err_en_US};
983         return 0;
984       } else {
985         ++*atToken;
986         std::int64_t third{
987             ExpressionValue(token, operandPrecedence[op], atToken, error)};
988         left = left != 0 ? right : third;
989       }
990       break;
991     case COMMA:
992       left = right;
993       break;
994     default:
995       CRASH_NO_CASE;
996     }
997   }
998   return left;
999 }
1000 
1001 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1002     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1003   TokenSequence expr1{expr, first, exprTokens};
1004   if (expr1.HasBlanks()) {
1005     expr1.RemoveBlanks();
1006   }
1007   TokenSequence expr2;
1008   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1009     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1010       CharBlock name;
1011       if (j + 3 < expr1.SizeInTokens() &&
1012           expr1.TokenAt(j + 1).ToString() == "(" &&
1013           expr1.TokenAt(j + 3).ToString() == ")") {
1014         name = expr1.TokenAt(j + 2);
1015         j += 3;
1016       } else if (j + 1 < expr1.SizeInTokens() &&
1017           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1018         name = expr1.TokenAt(++j);
1019       }
1020       if (!name.empty()) {
1021         char truth{IsNameDefined(name) ? '1' : '0'};
1022         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1023         continue;
1024       }
1025     }
1026     expr2.Put(expr1, j);
1027   }
1028   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1029   if (expr3.HasBlanks()) {
1030     expr3.RemoveBlanks();
1031   }
1032   if (expr3.empty()) {
1033     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1034     return false;
1035   }
1036   std::size_t atToken{0};
1037   std::optional<Message> error;
1038   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1039   if (error) {
1040     prescanner->Say(std::move(*error));
1041   } else if (atToken < expr3.SizeInTokens() &&
1042       expr3.TokenAt(atToken).ToString() != "!") {
1043     prescanner->Say(expr3.GetIntervalProvenanceRange(
1044                         atToken, expr3.SizeInTokens() - atToken),
1045         atToken == 0 ? "could not parse any expression"_err_en_US
1046                      : "excess characters after expression"_err_en_US);
1047   }
1048   return result;
1049 }
1050 } // namespace Fortran::parser
1051