1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173           result.pop_back();
174         }
175         CHECK(resultSize > 0 &&
176             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177         result.pop_back();
178         result.Put(Stringify(args[index], prescanner.allSources()));
179       } else {
180         const TokenSequence *arg{&args[index]};
181         std::optional<TokenSequence> replaced;
182         // Don't replace macros in the actual argument if it is preceded or
183         // followed by the token-pasting operator ## in the replacement text.
184         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185           auto next{replacement_.SkipBlanks(j + 1)};
186           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187             // Apply macro replacement to the actual argument
188             replaced =
189                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190             if (replaced) {
191               arg = &*replaced;
192             }
193           }
194         }
195         result.Put(DEREF(arg));
196       }
197     } else if (bytes == 11 && isVariadic_ &&
198         token.ToString() == "__VA_ARGS__") {
199       Provenance commaProvenance{
200           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201               ',')};
202       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203         if (k > argumentCount_) {
204           result.Put(","s, commaProvenance);
205         }
206         result.Put(args[k]);
207       }
208     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210         parenthesesNesting == 0) {
211       parenthesesNesting = 1;
212       skipping = args.size() == argumentCount_;
213       ++j;
214     } else {
215       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216         ++parenthesesNesting;
217       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218         if (--parenthesesNesting == 0) {
219           skipping = false;
220           continue;
221         }
222       }
223       result.Put(replacement_, j);
224     }
225   }
226   return TokenPasting(std::move(result));
227 }
228 
229 static std::string FormatTime(const std::time_t &now, const char *format) {
230   char buffer[16];
231   return {buffer,
232       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234 
235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
236   // Capture current local date & time once now to avoid having the values
237   // of __DATE__ or __TIME__ change during compilation.
238   std::time_t now;
239   std::time(&now);
240   definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
241       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
242   definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
243       Definition{FormatTime(now, "\"%T\""), allSources});
244   // The values of these predefined macros depend on their invocation sites.
245   definitions_.emplace(
246       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
247   definitions_.emplace(
248       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
249 }
250 
251 void Preprocessor::Define(std::string macro, std::string value) {
252   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
253 }
254 
255 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
256 
257 std::optional<TokenSequence> Preprocessor::MacroReplacement(
258     const TokenSequence &input, Prescanner &prescanner) {
259   // Do quick scan for any use of a defined name.
260   std::size_t tokens{input.SizeInTokens()};
261   std::size_t j;
262   for (j = 0; j < tokens; ++j) {
263     CharBlock token{input.TokenAt(j)};
264     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
265         IsNameDefined(token)) {
266       break;
267     }
268   }
269   if (j == tokens) {
270     return std::nullopt; // input contains nothing that would be replaced
271   }
272   TokenSequence result{input, 0, j};
273   for (; j < tokens; ++j) {
274     const CharBlock &token{input.TokenAt(j)};
275     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
276       result.Put(input, j);
277       continue;
278     }
279     auto it{definitions_.find(token)};
280     if (it == definitions_.end()) {
281       result.Put(input, j);
282       continue;
283     }
284     Definition &def{it->second};
285     if (def.isDisabled()) {
286       result.Put(input, j);
287       continue;
288     }
289     if (!def.isFunctionLike()) {
290       if (def.isPredefined()) {
291         std::string name{def.replacement().TokenAt(0).ToString()};
292         std::string repl;
293         if (name == "__FILE__") {
294           repl = "\""s +
295               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
296         } else if (name == "__LINE__") {
297           std::string buf;
298           llvm::raw_string_ostream ss{buf};
299           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
300           repl = ss.str();
301         }
302         if (!repl.empty()) {
303           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
304           ProvenanceRange call{allSources_.AddMacroCall(
305               insert, input.GetTokenProvenanceRange(j), repl)};
306           result.Put(repl, call.start());
307           continue;
308         }
309       }
310       def.set_isDisabled(true);
311       TokenSequence replaced{
312           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
313       def.set_isDisabled(false);
314       if (!replaced.empty()) {
315         ProvenanceRange from{def.replacement().GetProvenanceRange()};
316         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
317         ProvenanceRange newRange{
318             allSources_.AddMacroCall(from, use, replaced.ToString())};
319         result.Put(replaced, newRange);
320       }
321       continue;
322     }
323     // Possible function-like macro call.  Skip spaces and newlines to see
324     // whether '(' is next.
325     std::size_t k{j};
326     bool leftParen{false};
327     while (++k < tokens) {
328       const CharBlock &lookAhead{input.TokenAt(k)};
329       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
330         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
331         break;
332       }
333     }
334     if (!leftParen) {
335       result.Put(input, j);
336       continue;
337     }
338     std::vector<std::size_t> argStart{++k};
339     for (int nesting{0}; k < tokens; ++k) {
340       CharBlock token{input.TokenAt(k)};
341       if (token.size() == 1) {
342         char ch{token[0]};
343         if (ch == '(') {
344           ++nesting;
345         } else if (ch == ')') {
346           if (nesting == 0) {
347             break;
348           }
349           --nesting;
350         } else if (ch == ',' && nesting == 0) {
351           argStart.push_back(k + 1);
352         }
353       }
354     }
355     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
356       // Subtle: () is zero arguments, not one empty argument,
357       // unless one argument was expected.
358       argStart.clear();
359     }
360     if (k >= tokens || argStart.size() < def.argumentCount() ||
361         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
362       result.Put(input, j);
363       continue;
364     }
365     std::vector<TokenSequence> args;
366     for (std::size_t n{0}; n < argStart.size(); ++n) {
367       std::size_t at{argStart[n]};
368       std::size_t count{
369           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
370       args.emplace_back(TokenSequence(input, at, count));
371     }
372     def.set_isDisabled(true);
373     TokenSequence replaced{
374         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
375     def.set_isDisabled(false);
376     if (!replaced.empty()) {
377       ProvenanceRange from{def.replacement().GetProvenanceRange()};
378       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
379       ProvenanceRange newRange{
380           allSources_.AddMacroCall(from, use, replaced.ToString())};
381       result.Put(replaced, newRange);
382     }
383     j = k; // advance to the terminal ')'
384   }
385   return result;
386 }
387 
388 TokenSequence Preprocessor::ReplaceMacros(
389     const TokenSequence &tokens, Prescanner &prescanner) {
390   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
391     return std::move(*repl);
392   }
393   return tokens;
394 }
395 
396 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
397   std::size_t tokens{dir.SizeInTokens()};
398   std::size_t j{dir.SkipBlanks(0)};
399   if (j == tokens) {
400     return;
401   }
402   CHECK(prescanner); // TODO: change to reference
403   if (dir.TokenAt(j).ToString() != "#") {
404     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
405     return;
406   }
407   j = dir.SkipBlanks(j + 1);
408   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
409     --tokens;
410   }
411   if (j == tokens) {
412     return;
413   }
414   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
415     return; // treat like #line, ignore it
416   }
417   std::size_t dirOffset{j};
418   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
419   j = dir.SkipBlanks(j + 1);
420   CharBlock nameToken;
421   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
422     nameToken = dir.TokenAt(j);
423   }
424   if (dirName == "line") {
425     // #line is ignored
426   } else if (dirName == "define") {
427     if (nameToken.empty()) {
428       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
429           "#define: missing or invalid name"_err_en_US);
430       return;
431     }
432     nameToken = SaveTokenAsName(nameToken);
433     definitions_.erase(nameToken);
434     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
435         dir.TokenAt(j)[0] == '(') {
436       j = dir.SkipBlanks(j + 1);
437       std::vector<std::string> argName;
438       bool isVariadic{false};
439       if (dir.TokenAt(j).ToString() != ")") {
440         while (true) {
441           std::string an{dir.TokenAt(j).ToString()};
442           if (an == "...") {
443             isVariadic = true;
444           } else {
445             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
446               prescanner->Say(dir.GetTokenProvenanceRange(j),
447                   "#define: missing or invalid argument name"_err_en_US);
448               return;
449             }
450             argName.push_back(an);
451           }
452           j = dir.SkipBlanks(j + 1);
453           if (j == tokens) {
454             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
455                 "#define: malformed argument list"_err_en_US);
456             return;
457           }
458           std::string punc{dir.TokenAt(j).ToString()};
459           if (punc == ")") {
460             break;
461           }
462           if (isVariadic || punc != ",") {
463             prescanner->Say(dir.GetTokenProvenanceRange(j),
464                 "#define: malformed argument list"_err_en_US);
465             return;
466           }
467           j = dir.SkipBlanks(j + 1);
468           if (j == tokens) {
469             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
470                 "#define: malformed argument list"_err_en_US);
471             return;
472           }
473         }
474         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
475             argName.size()) {
476           prescanner->Say(dir.GetTokenProvenance(dirOffset),
477               "#define: argument names are not distinct"_err_en_US);
478           return;
479         }
480       }
481       j = dir.SkipBlanks(j + 1);
482       definitions_.emplace(std::make_pair(
483           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
484     } else {
485       j = dir.SkipBlanks(j + 1);
486       definitions_.emplace(
487           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
488     }
489   } else if (dirName == "undef") {
490     if (nameToken.empty()) {
491       prescanner->Say(
492           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
493           "# missing or invalid name"_err_en_US);
494     } else {
495       if (dir.IsAnythingLeft(++j)) {
496         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
497             "#undef: excess tokens at end of directive"_en_US);
498       } else {
499         definitions_.erase(nameToken);
500       }
501     }
502   } else if (dirName == "ifdef" || dirName == "ifndef") {
503     bool doThen{false};
504     if (nameToken.empty()) {
505       prescanner->Say(
506           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
507           "#%s: missing name"_err_en_US, dirName);
508     } else {
509       if (dir.IsAnythingLeft(++j)) {
510         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
511             "#%s: excess tokens at end of directive"_en_US, dirName);
512       }
513       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
514     }
515     if (doThen) {
516       ifStack_.push(CanDeadElseAppear::Yes);
517     } else {
518       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
519           dir.GetTokenProvenance(dirOffset));
520     }
521   } else if (dirName == "if") {
522     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
523       ifStack_.push(CanDeadElseAppear::Yes);
524     } else {
525       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
526           dir.GetTokenProvenanceRange(dirOffset));
527     }
528   } else if (dirName == "else") {
529     if (dir.IsAnythingLeft(j)) {
530       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
531           "#else: excess tokens at end of directive"_en_US);
532     } else if (ifStack_.empty()) {
533       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
534           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
535     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
536       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
537           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
538     } else {
539       ifStack_.pop();
540       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
541           dir.GetTokenProvenanceRange(dirOffset));
542     }
543   } else if (dirName == "elif") {
544     if (ifStack_.empty()) {
545       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
546           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
547     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
548       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
549           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
550     } else {
551       ifStack_.pop();
552       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
553           dir.GetTokenProvenanceRange(dirOffset));
554     }
555   } else if (dirName == "endif") {
556     if (dir.IsAnythingLeft(j)) {
557       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
558           "#endif: excess tokens at end of directive"_en_US);
559     } else if (ifStack_.empty()) {
560       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
561           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
562     } else {
563       ifStack_.pop();
564     }
565   } else if (dirName == "error") {
566     prescanner->Say(
567         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
568         "%s"_err_en_US, dir.ToString());
569   } else if (dirName == "warning" || dirName == "comment" ||
570       dirName == "note") {
571     prescanner->Say(
572         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
573         "%s"_en_US, dir.ToString());
574   } else if (dirName == "include") {
575     if (j == tokens) {
576       prescanner->Say(
577           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
578           "#include: missing name of file to include"_err_en_US);
579       return;
580     }
581     std::string include;
582     std::optional<std::string> prependPath;
583     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
584       std::size_t k{j + 1};
585       if (k >= tokens) {
586         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
587             "#include: file name missing"_err_en_US);
588         return;
589       }
590       while (k < tokens && dir.TokenAt(k) != ">") {
591         ++k;
592       }
593       if (k >= tokens) {
594         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
595             "#include: expected '>' at end of included file"_en_US);
596       }
597       TokenSequence braced{dir, j + 1, k - j - 1};
598       include = ReplaceMacros(braced, *prescanner).ToString();
599       j = k;
600     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
601         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
602       include = include.substr(1, include.size() - 2);
603       // #include "foo" starts search in directory of file containing
604       // the directive
605       auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
606       if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
607         prependPath = DirectoryName(currentFile->path());
608       }
609     } else {
610       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
611           "#include: expected name of file to include"_err_en_US);
612       return;
613     }
614     if (include.empty()) {
615       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
616           "#include: empty include file name"_err_en_US);
617       return;
618     }
619     j = dir.SkipBlanks(j + 1);
620     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
621       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
622           "#include: extra stuff ignored after file name"_en_US);
623     }
624     std::string buf;
625     llvm::raw_string_ostream error{buf};
626     const SourceFile *included{allSources_.Open(include, error, prependPath)};
627     if (!included) {
628       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
629           "#include: %s"_err_en_US, error.str());
630     } else if (included->bytes() > 0) {
631       ProvenanceRange fileRange{
632           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
633       Prescanner{*prescanner}
634           .set_encoding(included->encoding())
635           .Prescan(fileRange);
636     }
637   } else {
638     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
639         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
640   }
641 }
642 
643 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
644   names_.push_back(t.ToString());
645   return {names_.back().data(), names_.back().size()};
646 }
647 
648 bool Preprocessor::IsNameDefined(const CharBlock &token) {
649   return definitions_.find(token) != definitions_.end();
650 }
651 
652 static std::string GetDirectiveName(
653     const TokenSequence &line, std::size_t *rest) {
654   std::size_t tokens{line.SizeInTokens()};
655   std::size_t j{line.SkipBlanks(0)};
656   if (j == tokens || line.TokenAt(j).ToString() != "#") {
657     *rest = tokens;
658     return "";
659   }
660   j = line.SkipBlanks(j + 1);
661   if (j == tokens) {
662     *rest = tokens;
663     return "";
664   }
665   *rest = line.SkipBlanks(j + 1);
666   return ToLowerCaseLetters(line.TokenAt(j).ToString());
667 }
668 
669 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
670     IsElseActive isElseActive, Prescanner *prescanner,
671     ProvenanceRange provenanceRange) {
672   int nesting{0};
673   while (!prescanner->IsAtEnd()) {
674     if (!prescanner->IsNextLinePreprocessorDirective()) {
675       prescanner->NextLine();
676       continue;
677     }
678     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
679     std::size_t rest{0};
680     std::string dn{GetDirectiveName(line, &rest)};
681     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
682       ++nesting;
683     } else if (dn == "endif") {
684       if (nesting-- == 0) {
685         return;
686       }
687     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
688       if (dn == "else") {
689         ifStack_.push(CanDeadElseAppear::No);
690         return;
691       }
692       if (dn == "elif" &&
693           IsIfPredicateTrue(
694               line, rest, line.SizeInTokens() - rest, prescanner)) {
695         ifStack_.push(CanDeadElseAppear::Yes);
696         return;
697       }
698     }
699   }
700   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
701 }
702 
703 // Precedence level codes used here to accommodate mixed Fortran and C:
704 // 15: parentheses and constants, logical !, bitwise ~
705 // 14: unary + and -
706 // 13: **
707 // 12: *, /, % (modulus)
708 // 11: + and -
709 // 10: << and >>
710 //  9: bitwise &
711 //  8: bitwise ^
712 //  7: bitwise |
713 //  6: relations (.EQ., ==, &c.)
714 //  5: .NOT.
715 //  4: .AND., &&
716 //  3: .OR., ||
717 //  2: .EQV. and .NEQV. / .XOR.
718 //  1: ? :
719 //  0: ,
720 static std::int64_t ExpressionValue(const TokenSequence &token,
721     int minimumPrecedence, std::size_t *atToken,
722     std::optional<Message> *error) {
723   enum Operator {
724     PARENS,
725     CONST,
726     NOTZERO, // !
727     COMPLEMENT, // ~
728     UPLUS,
729     UMINUS,
730     POWER,
731     TIMES,
732     DIVIDE,
733     MODULUS,
734     ADD,
735     SUBTRACT,
736     LEFTSHIFT,
737     RIGHTSHIFT,
738     BITAND,
739     BITXOR,
740     BITOR,
741     LT,
742     LE,
743     EQ,
744     NE,
745     GE,
746     GT,
747     NOT,
748     AND,
749     OR,
750     EQV,
751     NEQV,
752     SELECT,
753     COMMA
754   };
755   static const int precedence[]{
756       15, 15, 15, 15, // (), 6, !, ~
757       14, 14, // unary +, -
758       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
759       9, 8, 7, // &, ^, |
760       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
761       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
762       1, 0 // ?: and ,
763   };
764   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
765       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
766 
767   static std::map<std::string, enum Operator> opNameMap;
768   if (opNameMap.empty()) {
769     opNameMap["("] = PARENS;
770     opNameMap["!"] = NOTZERO;
771     opNameMap["~"] = COMPLEMENT;
772     opNameMap["**"] = POWER;
773     opNameMap["*"] = TIMES;
774     opNameMap["/"] = DIVIDE;
775     opNameMap["%"] = MODULUS;
776     opNameMap["+"] = ADD;
777     opNameMap["-"] = SUBTRACT;
778     opNameMap["<<"] = LEFTSHIFT;
779     opNameMap[">>"] = RIGHTSHIFT;
780     opNameMap["&"] = BITAND;
781     opNameMap["^"] = BITXOR;
782     opNameMap["|"] = BITOR;
783     opNameMap[".lt."] = opNameMap["<"] = LT;
784     opNameMap[".le."] = opNameMap["<="] = LE;
785     opNameMap[".eq."] = opNameMap["=="] = EQ;
786     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
787     opNameMap[".ge."] = opNameMap[">="] = GE;
788     opNameMap[".gt."] = opNameMap[">"] = GT;
789     opNameMap[".not."] = NOT;
790     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
791     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
792     opNameMap[".eqv."] = EQV;
793     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
794     opNameMap["?"] = SELECT;
795     opNameMap[","] = COMMA;
796   }
797 
798   std::size_t tokens{token.SizeInTokens()};
799   CHECK(tokens > 0);
800   if (*atToken >= tokens) {
801     *error =
802         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
803     return 0;
804   }
805 
806   // Parse and evaluate a primary or a unary operator and its operand.
807   std::size_t opAt{*atToken};
808   std::string t{token.TokenAt(opAt).ToString()};
809   enum Operator op;
810   std::int64_t left{0};
811   if (t == "(") {
812     op = PARENS;
813   } else if (IsDecimalDigit(t[0])) {
814     op = CONST;
815     std::size_t consumed{0};
816     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
817     if (consumed < t.size()) {
818       *error = Message{token.GetTokenProvenanceRange(opAt),
819           "Uninterpretable numeric constant '%s'"_err_en_US, t};
820       return 0;
821     }
822   } else if (IsLegalIdentifierStart(t[0])) {
823     // undefined macro name -> zero
824     // TODO: BOZ constants?
825     op = CONST;
826   } else if (t == "+") {
827     op = UPLUS;
828   } else if (t == "-") {
829     op = UMINUS;
830   } else if (t == "." && *atToken + 2 < tokens &&
831       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
832       token.TokenAt(*atToken + 2).ToString() == ".") {
833     op = NOT;
834     *atToken += 2;
835   } else {
836     auto it{opNameMap.find(t)};
837     if (it != opNameMap.end()) {
838       op = it->second;
839     } else {
840       *error = Message{token.GetTokenProvenanceRange(opAt),
841           "operand expected in expression"_err_en_US};
842       return 0;
843     }
844   }
845   if (precedence[op] < minimumPrecedence) {
846     *error = Message{token.GetTokenProvenanceRange(opAt),
847         "operator precedence error"_err_en_US};
848     return 0;
849   }
850   ++*atToken;
851   if (op != CONST) {
852     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
853     if (*error) {
854       return 0;
855     }
856     switch (op) {
857     case PARENS:
858       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
859         ++*atToken;
860         break;
861       }
862       if (*atToken >= tokens) {
863         *error = Message{token.GetProvenanceRange(),
864             "')' missing from expression"_err_en_US};
865       } else {
866         *error = Message{
867             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
868       }
869       return 0;
870     case NOTZERO:
871       left = !left;
872       break;
873     case COMPLEMENT:
874       left = ~left;
875       break;
876     case UPLUS:
877       break;
878     case UMINUS:
879       left = -left;
880       break;
881     case NOT:
882       left = -!left;
883       break;
884     default:
885       CRASH_NO_CASE;
886     }
887   }
888 
889   // Parse and evaluate binary operators and their second operands, if present.
890   while (*atToken < tokens) {
891     int advance{1};
892     t = token.TokenAt(*atToken).ToString();
893     if (t == "." && *atToken + 2 < tokens &&
894         token.TokenAt(*atToken + 2).ToString() == ".") {
895       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
896       advance = 3;
897     }
898     auto it{opNameMap.find(t)};
899     if (it == opNameMap.end()) {
900       break;
901     }
902     op = it->second;
903     if (op < POWER || precedence[op] < minimumPrecedence) {
904       break;
905     }
906     opAt = *atToken;
907     *atToken += advance;
908 
909     std::int64_t right{
910         ExpressionValue(token, operandPrecedence[op], atToken, error)};
911     if (*error) {
912       return 0;
913     }
914 
915     switch (op) {
916     case POWER:
917       if (left == 0) {
918         if (right < 0) {
919           *error = Message{token.GetTokenProvenanceRange(opAt),
920               "0 ** negative power"_err_en_US};
921         }
922       } else if (left != 1 && right != 1) {
923         if (right <= 0) {
924           left = !right;
925         } else {
926           std::int64_t power{1};
927           for (; right > 0; --right) {
928             if ((power * left) / left != power) {
929               *error = Message{token.GetTokenProvenanceRange(opAt),
930                   "overflow in exponentation"_err_en_US};
931               left = 1;
932             }
933             power *= left;
934           }
935           left = power;
936         }
937       }
938       break;
939     case TIMES:
940       if (left != 0 && right != 0 && ((left * right) / left) != right) {
941         *error = Message{token.GetTokenProvenanceRange(opAt),
942             "overflow in multiplication"_err_en_US};
943       }
944       left = left * right;
945       break;
946     case DIVIDE:
947       if (right == 0) {
948         *error = Message{
949             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
950         left = 0;
951       } else {
952         left = left / right;
953       }
954       break;
955     case MODULUS:
956       if (right == 0) {
957         *error = Message{
958             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
959         left = 0;
960       } else {
961         left = left % right;
962       }
963       break;
964     case ADD:
965       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
966         *error = Message{token.GetTokenProvenanceRange(opAt),
967             "overflow in addition"_err_en_US};
968       }
969       left = left + right;
970       break;
971     case SUBTRACT:
972       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
973         *error = Message{token.GetTokenProvenanceRange(opAt),
974             "overflow in subtraction"_err_en_US};
975       }
976       left = left - right;
977       break;
978     case LEFTSHIFT:
979       if (right < 0 || right > 64) {
980         *error = Message{token.GetTokenProvenanceRange(opAt),
981             "bad left shift count"_err_en_US};
982       }
983       left = right >= 64 ? 0 : left << right;
984       break;
985     case RIGHTSHIFT:
986       if (right < 0 || right > 64) {
987         *error = Message{token.GetTokenProvenanceRange(opAt),
988             "bad right shift count"_err_en_US};
989       }
990       left = right >= 64 ? 0 : left >> right;
991       break;
992     case BITAND:
993     case AND:
994       left = left & right;
995       break;
996     case BITXOR:
997       left = left ^ right;
998       break;
999     case BITOR:
1000     case OR:
1001       left = left | right;
1002       break;
1003     case LT:
1004       left = -(left < right);
1005       break;
1006     case LE:
1007       left = -(left <= right);
1008       break;
1009     case EQ:
1010       left = -(left == right);
1011       break;
1012     case NE:
1013       left = -(left != right);
1014       break;
1015     case GE:
1016       left = -(left >= right);
1017       break;
1018     case GT:
1019       left = -(left > right);
1020       break;
1021     case EQV:
1022       left = -(!left == !right);
1023       break;
1024     case NEQV:
1025       left = -(!left != !right);
1026       break;
1027     case SELECT:
1028       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1029         *error = Message{token.GetTokenProvenanceRange(opAt),
1030             "':' required in selection expression"_err_en_US};
1031         return 0;
1032       } else {
1033         ++*atToken;
1034         std::int64_t third{
1035             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1036         left = left != 0 ? right : third;
1037       }
1038       break;
1039     case COMMA:
1040       left = right;
1041       break;
1042     default:
1043       CRASH_NO_CASE;
1044     }
1045   }
1046   return left;
1047 }
1048 
1049 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1050     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1051   TokenSequence expr1{expr, first, exprTokens};
1052   if (expr1.HasBlanks()) {
1053     expr1.RemoveBlanks();
1054   }
1055   TokenSequence expr2;
1056   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1057     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1058       CharBlock name;
1059       if (j + 3 < expr1.SizeInTokens() &&
1060           expr1.TokenAt(j + 1).ToString() == "(" &&
1061           expr1.TokenAt(j + 3).ToString() == ")") {
1062         name = expr1.TokenAt(j + 2);
1063         j += 3;
1064       } else if (j + 1 < expr1.SizeInTokens() &&
1065           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1066         name = expr1.TokenAt(++j);
1067       }
1068       if (!name.empty()) {
1069         char truth{IsNameDefined(name) ? '1' : '0'};
1070         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1071         continue;
1072       }
1073     }
1074     expr2.Put(expr1, j);
1075   }
1076   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1077   if (expr3.HasBlanks()) {
1078     expr3.RemoveBlanks();
1079   }
1080   if (expr3.empty()) {
1081     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1082     return false;
1083   }
1084   std::size_t atToken{0};
1085   std::optional<Message> error;
1086   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1087   if (error) {
1088     prescanner->Say(std::move(*error));
1089   } else if (atToken < expr3.SizeInTokens() &&
1090       expr3.TokenAt(atToken).ToString() != "!") {
1091     prescanner->Say(expr3.GetIntervalProvenanceRange(
1092                         atToken, expr3.SizeInTokens() - atToken),
1093         atToken == 0 ? "could not parse any expression"_err_en_US
1094                      : "excess characters after expression"_err_en_US);
1095   }
1096   return result;
1097 }
1098 } // namespace Fortran::parser
1099