1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24
25 namespace Fortran::parser {
26
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33 bool isVariadic)
34 : isFunctionLike_{true},
35 argumentCount_(argNames.size()), isVariadic_{isVariadic},
36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39 : isPredefined_{true},
40 replacement_{
41 predefined, sources.AddCompilerInsertion(predefined).start()} {}
42
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44 bool was{isDisabled_};
45 isDisabled_ = disable;
46 return was;
47 }
48
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55 std::map<std::string, std::string> args;
56 char argIndex{'A'};
57 for (const std::string &arg : argNames) {
58 CHECK(args.find(arg) == args.end());
59 args[arg] = "~"s + argIndex++;
60 }
61 TokenSequence result;
62 for (std::size_t j{0}; j < tokens; ++j) {
63 CharBlock tok{token.TokenAt(firstToken + j)};
64 if (IsLegalIdentifierStart(tok)) {
65 auto it{args.find(tok.ToString())};
66 if (it != args.end()) {
67 result.Put(it->second, token.GetTokenProvenance(j));
68 continue;
69 }
70 }
71 result.Put(token, firstToken + j, 1);
72 }
73 return result;
74 }
75
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77 const TokenSequence &tokens, AllSources &allSources) {
78 TokenSequence result;
79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80 result.PutNextTokenChar('"', quoteProvenance);
81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82 const CharBlock &token{tokens.TokenAt(j)};
83 std::size_t bytes{token.size()};
84 for (std::size_t k{0}; k < bytes; ++k) {
85 char ch{token[k]};
86 Provenance from{tokens.GetTokenProvenance(j, k)};
87 if (ch == '"' || ch == '\\') {
88 result.PutNextTokenChar(ch, from);
89 }
90 result.PutNextTokenChar(ch, from);
91 }
92 }
93 result.PutNextTokenChar('"', quoteProvenance);
94 result.CloseToken();
95 return result;
96 }
97
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103 std::size_t tokens{text.SizeInTokens()};
104 for (std::size_t j{0}; j < tokens; ++j) {
105 if (IsTokenPasting(text.TokenAt(j))) {
106 return true;
107 }
108 }
109 return false;
110 }
111
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113 if (!AnyTokenPasting(text)) {
114 return std::move(text);
115 }
116 TokenSequence result;
117 std::size_t tokens{text.SizeInTokens()};
118 bool pasting{false};
119 for (std::size_t j{0}; j < tokens; ++j) {
120 if (IsTokenPasting(text.TokenAt(j))) {
121 if (!pasting) {
122 while (!result.empty() &&
123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124 result.pop_back();
125 }
126 if (!result.empty()) {
127 result.ReopenLastToken();
128 pasting = true;
129 }
130 }
131 } else if (pasting && text.TokenAt(j).IsBlank()) {
132 } else {
133 result.Put(text, j, 1);
134 pasting = false;
135 }
136 }
137 return result;
138 }
139
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141 const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142 TokenSequence result;
143 bool skipping{false};
144 int parenthesesNesting{0};
145 std::size_t tokens{replacement_.SizeInTokens()};
146 for (std::size_t j{0}; j < tokens; ++j) {
147 CharBlock token{replacement_.TokenAt(j)};
148 std::size_t bytes{token.size()};
149 if (skipping) {
150 if (bytes == 1) {
151 if (token[0] == '(') {
152 ++parenthesesNesting;
153 } else if (token[0] == ')') {
154 skipping = --parenthesesNesting > 0;
155 }
156 }
157 continue;
158 }
159 if (bytes == 2 && token[0] == '~') { // argument substitution
160 std::size_t index = token[1] - 'A';
161 if (index >= args.size()) {
162 continue;
163 }
164 std::size_t prev{j};
165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166 --prev;
167 }
168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169 replacement_.TokenAt(prev - 1)[0] ==
170 '#') { // stringify argument without macro replacement
171 std::size_t resultSize{result.SizeInTokens()};
172 while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) {
173 result.pop_back();
174 --resultSize;
175 }
176 CHECK(resultSize > 0 &&
177 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
178 result.pop_back();
179 result.Put(Stringify(args[index], prescanner.allSources()));
180 } else {
181 const TokenSequence *arg{&args[index]};
182 std::optional<TokenSequence> replaced;
183 // Don't replace macros in the actual argument if it is preceded or
184 // followed by the token-pasting operator ## in the replacement text.
185 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
186 auto next{replacement_.SkipBlanks(j + 1)};
187 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
188 // Apply macro replacement to the actual argument
189 replaced =
190 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
191 if (replaced) {
192 arg = &*replaced;
193 }
194 }
195 }
196 result.Put(DEREF(arg));
197 }
198 } else if (bytes == 11 && isVariadic_ &&
199 token.ToString() == "__VA_ARGS__") {
200 Provenance commaProvenance{
201 prescanner.preprocessor().allSources().CompilerInsertionProvenance(
202 ',')};
203 for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
204 if (k > argumentCount_) {
205 result.Put(","s, commaProvenance);
206 }
207 result.Put(args[k]);
208 }
209 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
210 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
211 parenthesesNesting == 0) {
212 parenthesesNesting = 1;
213 skipping = args.size() == argumentCount_;
214 ++j;
215 } else {
216 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
217 ++parenthesesNesting;
218 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
219 if (--parenthesesNesting == 0) {
220 skipping = false;
221 continue;
222 }
223 }
224 result.Put(replacement_, j);
225 }
226 }
227 return TokenPasting(std::move(result));
228 }
229
FormatTime(const std::time_t & now,const char * format)230 static std::string FormatTime(const std::time_t &now, const char *format) {
231 char buffer[16];
232 return {buffer,
233 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
234 }
235
Preprocessor(AllSources & allSources)236 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
237
DefineStandardMacros()238 void Preprocessor::DefineStandardMacros() {
239 // Capture current local date & time once now to avoid having the values
240 // of __DATE__ or __TIME__ change during compilation.
241 std::time_t now;
242 std::time(&now);
243 Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
244 Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
245 // The values of these predefined macros depend on their invocation sites.
246 Define("__FILE__"s, "__FILE__"s);
247 Define("__LINE__"s, "__LINE__"s);
248 }
249
Define(std::string macro,std::string value)250 void Preprocessor::Define(std::string macro, std::string value) {
251 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
252 }
253
Undefine(std::string macro)254 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
255
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)256 std::optional<TokenSequence> Preprocessor::MacroReplacement(
257 const TokenSequence &input, Prescanner &prescanner) {
258 // Do quick scan for any use of a defined name.
259 if (definitions_.empty()) {
260 return std::nullopt;
261 }
262 std::size_t tokens{input.SizeInTokens()};
263 std::size_t j;
264 for (j = 0; j < tokens; ++j) {
265 CharBlock token{input.TokenAt(j)};
266 if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
267 IsNameDefined(token)) {
268 break;
269 }
270 }
271 if (j == tokens) {
272 return std::nullopt; // input contains nothing that would be replaced
273 }
274 TokenSequence result{input, 0, j};
275 for (; j < tokens; ++j) {
276 const CharBlock &token{input.TokenAt(j)};
277 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
278 result.Put(input, j);
279 continue;
280 }
281 auto it{definitions_.find(token)};
282 if (it == definitions_.end()) {
283 result.Put(input, j);
284 continue;
285 }
286 Definition &def{it->second};
287 if (def.isDisabled()) {
288 result.Put(input, j);
289 continue;
290 }
291 if (!def.isFunctionLike()) {
292 if (def.isPredefined()) {
293 std::string name{def.replacement().TokenAt(0).ToString()};
294 std::string repl;
295 if (name == "__FILE__") {
296 repl = "\""s +
297 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
298 } else if (name == "__LINE__") {
299 std::string buf;
300 llvm::raw_string_ostream ss{buf};
301 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
302 repl = ss.str();
303 }
304 if (!repl.empty()) {
305 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
306 ProvenanceRange call{allSources_.AddMacroCall(
307 insert, input.GetTokenProvenanceRange(j), repl)};
308 result.Put(repl, call.start());
309 continue;
310 }
311 }
312 def.set_isDisabled(true);
313 TokenSequence replaced{
314 TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
315 def.set_isDisabled(false);
316 if (!replaced.empty()) {
317 ProvenanceRange from{def.replacement().GetProvenanceRange()};
318 ProvenanceRange use{input.GetTokenProvenanceRange(j)};
319 ProvenanceRange newRange{
320 allSources_.AddMacroCall(from, use, replaced.ToString())};
321 result.Put(replaced, newRange);
322 }
323 continue;
324 }
325 // Possible function-like macro call. Skip spaces and newlines to see
326 // whether '(' is next.
327 std::size_t k{j};
328 bool leftParen{false};
329 while (++k < tokens) {
330 const CharBlock &lookAhead{input.TokenAt(k)};
331 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
332 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
333 break;
334 }
335 }
336 if (!leftParen) {
337 result.Put(input, j);
338 continue;
339 }
340 std::vector<std::size_t> argStart{++k};
341 for (int nesting{0}; k < tokens; ++k) {
342 CharBlock token{input.TokenAt(k)};
343 if (token.size() == 1) {
344 char ch{token[0]};
345 if (ch == '(') {
346 ++nesting;
347 } else if (ch == ')') {
348 if (nesting == 0) {
349 break;
350 }
351 --nesting;
352 } else if (ch == ',' && nesting == 0) {
353 argStart.push_back(k + 1);
354 }
355 }
356 }
357 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
358 // Subtle: () is zero arguments, not one empty argument,
359 // unless one argument was expected.
360 argStart.clear();
361 }
362 if (k >= tokens || argStart.size() < def.argumentCount() ||
363 (argStart.size() > def.argumentCount() && !def.isVariadic())) {
364 result.Put(input, j);
365 continue;
366 }
367 std::vector<TokenSequence> args;
368 for (std::size_t n{0}; n < argStart.size(); ++n) {
369 std::size_t at{argStart[n]};
370 std::size_t count{
371 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
372 args.emplace_back(TokenSequence(input, at, count));
373 }
374 def.set_isDisabled(true);
375 TokenSequence replaced{
376 ReplaceMacros(def.Apply(args, prescanner), prescanner)};
377 def.set_isDisabled(false);
378 if (!replaced.empty()) {
379 ProvenanceRange from{def.replacement().GetProvenanceRange()};
380 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
381 ProvenanceRange newRange{
382 allSources_.AddMacroCall(from, use, replaced.ToString())};
383 result.Put(replaced, newRange);
384 }
385 j = k; // advance to the terminal ')'
386 }
387 return result;
388 }
389
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)390 TokenSequence Preprocessor::ReplaceMacros(
391 const TokenSequence &tokens, Prescanner &prescanner) {
392 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
393 return std::move(*repl);
394 }
395 return tokens;
396 }
397
Directive(const TokenSequence & dir,Prescanner & prescanner)398 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
399 std::size_t tokens{dir.SizeInTokens()};
400 std::size_t j{dir.SkipBlanks(0)};
401 if (j == tokens) {
402 return;
403 }
404 if (dir.TokenAt(j).ToString() != "#") {
405 prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
406 return;
407 }
408 j = dir.SkipBlanks(j + 1);
409 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
410 --tokens;
411 }
412 if (j == tokens) {
413 return;
414 }
415 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
416 return; // treat like #line, ignore it
417 }
418 std::size_t dirOffset{j};
419 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
420 j = dir.SkipBlanks(j + 1);
421 CharBlock nameToken;
422 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
423 nameToken = dir.TokenAt(j);
424 }
425 if (dirName == "line") {
426 // #line is ignored
427 } else if (dirName == "define") {
428 if (nameToken.empty()) {
429 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
430 "#define: missing or invalid name"_err_en_US);
431 return;
432 }
433 nameToken = SaveTokenAsName(nameToken);
434 definitions_.erase(nameToken);
435 if (++j < tokens && dir.TokenAt(j).size() == 1 &&
436 dir.TokenAt(j)[0] == '(') {
437 j = dir.SkipBlanks(j + 1);
438 std::vector<std::string> argName;
439 bool isVariadic{false};
440 if (dir.TokenAt(j).ToString() != ")") {
441 while (true) {
442 std::string an{dir.TokenAt(j).ToString()};
443 if (an == "...") {
444 isVariadic = true;
445 } else {
446 if (an.empty() || !IsLegalIdentifierStart(an[0])) {
447 prescanner.Say(dir.GetTokenProvenanceRange(j),
448 "#define: missing or invalid argument name"_err_en_US);
449 return;
450 }
451 argName.push_back(an);
452 }
453 j = dir.SkipBlanks(j + 1);
454 if (j == tokens) {
455 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
456 "#define: malformed argument list"_err_en_US);
457 return;
458 }
459 std::string punc{dir.TokenAt(j).ToString()};
460 if (punc == ")") {
461 break;
462 }
463 if (isVariadic || punc != ",") {
464 prescanner.Say(dir.GetTokenProvenanceRange(j),
465 "#define: malformed argument list"_err_en_US);
466 return;
467 }
468 j = dir.SkipBlanks(j + 1);
469 if (j == tokens) {
470 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
471 "#define: malformed argument list"_err_en_US);
472 return;
473 }
474 }
475 if (std::set<std::string>(argName.begin(), argName.end()).size() !=
476 argName.size()) {
477 prescanner.Say(dir.GetTokenProvenance(dirOffset),
478 "#define: argument names are not distinct"_err_en_US);
479 return;
480 }
481 }
482 j = dir.SkipBlanks(j + 1);
483 definitions_.emplace(std::make_pair(
484 nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
485 } else {
486 j = dir.SkipBlanks(j + 1);
487 definitions_.emplace(
488 std::make_pair(nameToken, Definition{dir, j, tokens - j}));
489 }
490 } else if (dirName == "undef") {
491 if (nameToken.empty()) {
492 prescanner.Say(
493 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
494 "# missing or invalid name"_err_en_US);
495 } else {
496 if (dir.IsAnythingLeft(++j)) {
497 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
498 "#undef: excess tokens at end of directive"_port_en_US);
499 } else {
500 definitions_.erase(nameToken);
501 }
502 }
503 } else if (dirName == "ifdef" || dirName == "ifndef") {
504 bool doThen{false};
505 if (nameToken.empty()) {
506 prescanner.Say(
507 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
508 "#%s: missing name"_err_en_US, dirName);
509 } else {
510 if (dir.IsAnythingLeft(++j)) {
511 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
512 "#%s: excess tokens at end of directive"_port_en_US, dirName);
513 }
514 doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
515 }
516 if (doThen) {
517 ifStack_.push(CanDeadElseAppear::Yes);
518 } else {
519 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
520 dir.GetTokenProvenance(dirOffset));
521 }
522 } else if (dirName == "if") {
523 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
524 ifStack_.push(CanDeadElseAppear::Yes);
525 } else {
526 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
527 dir.GetTokenProvenanceRange(dirOffset));
528 }
529 } else if (dirName == "else") {
530 if (dir.IsAnythingLeft(j)) {
531 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
532 "#else: excess tokens at end of directive"_port_en_US);
533 } else if (ifStack_.empty()) {
534 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
535 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
536 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
537 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
538 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
539 } else {
540 ifStack_.pop();
541 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
542 dir.GetTokenProvenanceRange(dirOffset));
543 }
544 } else if (dirName == "elif") {
545 if (ifStack_.empty()) {
546 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
547 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
548 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
549 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
550 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
551 } else {
552 ifStack_.pop();
553 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
554 dir.GetTokenProvenanceRange(dirOffset));
555 }
556 } else if (dirName == "endif") {
557 if (dir.IsAnythingLeft(j)) {
558 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
559 "#endif: excess tokens at end of directive"_port_en_US);
560 } else if (ifStack_.empty()) {
561 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
562 "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
563 } else {
564 ifStack_.pop();
565 }
566 } else if (dirName == "error") {
567 prescanner.Say(
568 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
569 "%s"_err_en_US, dir.ToString());
570 } else if (dirName == "warning") {
571 prescanner.Say(
572 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
573 "%s"_warn_en_US, dir.ToString());
574 } else if (dirName == "comment" || dirName == "note") {
575 prescanner.Say(
576 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
577 "%s"_en_US, dir.ToString());
578 } else if (dirName == "include") {
579 if (j == tokens) {
580 prescanner.Say(
581 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
582 "#include: missing name of file to include"_err_en_US);
583 return;
584 }
585 std::string include;
586 std::optional<std::string> prependPath;
587 if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
588 std::size_t k{j + 1};
589 if (k >= tokens) {
590 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
591 "#include: file name missing"_err_en_US);
592 return;
593 }
594 while (k < tokens && dir.TokenAt(k) != ">") {
595 ++k;
596 }
597 if (k >= tokens) {
598 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
599 "#include: expected '>' at end of included file"_port_en_US);
600 }
601 TokenSequence braced{dir, j + 1, k - j - 1};
602 include = ReplaceMacros(braced, prescanner).ToString();
603 j = k;
604 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
605 include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
606 include = include.substr(1, include.size() - 2);
607 // #include "foo" starts search in directory of file containing
608 // the directive
609 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
610 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
611 prependPath = DirectoryName(currentFile->path());
612 }
613 } else {
614 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
615 "#include: expected name of file to include"_err_en_US);
616 return;
617 }
618 if (include.empty()) {
619 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
620 "#include: empty include file name"_err_en_US);
621 return;
622 }
623 j = dir.SkipBlanks(j + 1);
624 if (j < tokens && dir.TokenAt(j).ToString() != "!") {
625 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
626 "#include: extra stuff ignored after file name"_port_en_US);
627 }
628 std::string buf;
629 llvm::raw_string_ostream error{buf};
630 const SourceFile *included{
631 allSources_.Open(include, error, std::move(prependPath))};
632 if (!included) {
633 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
634 "#include: %s"_err_en_US, error.str());
635 } else if (included->bytes() > 0) {
636 ProvenanceRange fileRange{
637 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
638 Prescanner{prescanner}
639 .set_encoding(included->encoding())
640 .Prescan(fileRange);
641 }
642 } else {
643 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
644 "#%s: unknown or unimplemented directive"_err_en_US, dirName);
645 }
646 }
647
SaveTokenAsName(const CharBlock & t)648 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
649 names_.push_back(t.ToString());
650 return {names_.back().data(), names_.back().size()};
651 }
652
IsNameDefined(const CharBlock & token)653 bool Preprocessor::IsNameDefined(const CharBlock &token) {
654 return definitions_.find(token) != definitions_.end();
655 }
656
GetDirectiveName(const TokenSequence & line,std::size_t * rest)657 static std::string GetDirectiveName(
658 const TokenSequence &line, std::size_t *rest) {
659 std::size_t tokens{line.SizeInTokens()};
660 std::size_t j{line.SkipBlanks(0)};
661 if (j == tokens || line.TokenAt(j).ToString() != "#") {
662 *rest = tokens;
663 return "";
664 }
665 j = line.SkipBlanks(j + 1);
666 if (j == tokens) {
667 *rest = tokens;
668 return "";
669 }
670 *rest = line.SkipBlanks(j + 1);
671 return ToLowerCaseLetters(line.TokenAt(j).ToString());
672 }
673
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner & prescanner,ProvenanceRange provenanceRange)674 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
675 IsElseActive isElseActive, Prescanner &prescanner,
676 ProvenanceRange provenanceRange) {
677 int nesting{0};
678 while (!prescanner.IsAtEnd()) {
679 if (!prescanner.IsNextLinePreprocessorDirective()) {
680 prescanner.NextLine();
681 continue;
682 }
683 TokenSequence line{prescanner.TokenizePreprocessorDirective()};
684 std::size_t rest{0};
685 std::string dn{GetDirectiveName(line, &rest)};
686 if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
687 ++nesting;
688 } else if (dn == "endif") {
689 if (nesting-- == 0) {
690 return;
691 }
692 } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
693 if (dn == "else") {
694 ifStack_.push(CanDeadElseAppear::No);
695 return;
696 }
697 if (dn == "elif" &&
698 IsIfPredicateTrue(
699 line, rest, line.SizeInTokens() - rest, prescanner)) {
700 ifStack_.push(CanDeadElseAppear::Yes);
701 return;
702 }
703 }
704 }
705 prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
706 }
707
708 // Precedence level codes used here to accommodate mixed Fortran and C:
709 // 15: parentheses and constants, logical !, bitwise ~
710 // 14: unary + and -
711 // 13: **
712 // 12: *, /, % (modulus)
713 // 11: + and -
714 // 10: << and >>
715 // 9: bitwise &
716 // 8: bitwise ^
717 // 7: bitwise |
718 // 6: relations (.EQ., ==, &c.)
719 // 5: .NOT.
720 // 4: .AND., &&
721 // 3: .OR., ||
722 // 2: .EQV. and .NEQV. / .XOR.
723 // 1: ? :
724 // 0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)725 static std::int64_t ExpressionValue(const TokenSequence &token,
726 int minimumPrecedence, std::size_t *atToken,
727 std::optional<Message> *error) {
728 enum Operator {
729 PARENS,
730 CONST,
731 NOTZERO, // !
732 COMPLEMENT, // ~
733 UPLUS,
734 UMINUS,
735 POWER,
736 TIMES,
737 DIVIDE,
738 MODULUS,
739 ADD,
740 SUBTRACT,
741 LEFTSHIFT,
742 RIGHTSHIFT,
743 BITAND,
744 BITXOR,
745 BITOR,
746 LT,
747 LE,
748 EQ,
749 NE,
750 GE,
751 GT,
752 NOT,
753 AND,
754 OR,
755 EQV,
756 NEQV,
757 SELECT,
758 COMMA
759 };
760 static const int precedence[]{
761 15, 15, 15, 15, // (), 6, !, ~
762 14, 14, // unary +, -
763 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
764 9, 8, 7, // &, ^, |
765 6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
766 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
767 1, 0 // ?: and ,
768 };
769 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
770 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
771
772 static std::map<std::string, enum Operator> opNameMap;
773 if (opNameMap.empty()) {
774 opNameMap["("] = PARENS;
775 opNameMap["!"] = NOTZERO;
776 opNameMap["~"] = COMPLEMENT;
777 opNameMap["**"] = POWER;
778 opNameMap["*"] = TIMES;
779 opNameMap["/"] = DIVIDE;
780 opNameMap["%"] = MODULUS;
781 opNameMap["+"] = ADD;
782 opNameMap["-"] = SUBTRACT;
783 opNameMap["<<"] = LEFTSHIFT;
784 opNameMap[">>"] = RIGHTSHIFT;
785 opNameMap["&"] = BITAND;
786 opNameMap["^"] = BITXOR;
787 opNameMap["|"] = BITOR;
788 opNameMap[".lt."] = opNameMap["<"] = LT;
789 opNameMap[".le."] = opNameMap["<="] = LE;
790 opNameMap[".eq."] = opNameMap["=="] = EQ;
791 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
792 opNameMap[".ge."] = opNameMap[">="] = GE;
793 opNameMap[".gt."] = opNameMap[">"] = GT;
794 opNameMap[".not."] = NOT;
795 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
796 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
797 opNameMap[".eqv."] = EQV;
798 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
799 opNameMap["?"] = SELECT;
800 opNameMap[","] = COMMA;
801 }
802
803 std::size_t tokens{token.SizeInTokens()};
804 CHECK(tokens > 0);
805 if (*atToken >= tokens) {
806 *error =
807 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
808 return 0;
809 }
810
811 // Parse and evaluate a primary or a unary operator and its operand.
812 std::size_t opAt{*atToken};
813 std::string t{token.TokenAt(opAt).ToString()};
814 enum Operator op;
815 std::int64_t left{0};
816 if (t == "(") {
817 op = PARENS;
818 } else if (IsDecimalDigit(t[0])) {
819 op = CONST;
820 std::size_t consumed{0};
821 left = std::stoll(t, &consumed, 0 /*base to be detected*/);
822 if (consumed < t.size()) {
823 *error = Message{token.GetTokenProvenanceRange(opAt),
824 "Uninterpretable numeric constant '%s'"_err_en_US, t};
825 return 0;
826 }
827 } else if (IsLegalIdentifierStart(t[0])) {
828 // undefined macro name -> zero
829 // TODO: BOZ constants?
830 op = CONST;
831 } else if (t == "+") {
832 op = UPLUS;
833 } else if (t == "-") {
834 op = UMINUS;
835 } else if (t == "." && *atToken + 2 < tokens &&
836 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
837 token.TokenAt(*atToken + 2).ToString() == ".") {
838 op = NOT;
839 *atToken += 2;
840 } else {
841 auto it{opNameMap.find(t)};
842 if (it != opNameMap.end()) {
843 op = it->second;
844 } else {
845 *error = Message{token.GetTokenProvenanceRange(opAt),
846 "operand expected in expression"_err_en_US};
847 return 0;
848 }
849 }
850 if (precedence[op] < minimumPrecedence) {
851 *error = Message{token.GetTokenProvenanceRange(opAt),
852 "operator precedence error"_err_en_US};
853 return 0;
854 }
855 ++*atToken;
856 if (op != CONST) {
857 left = ExpressionValue(token, operandPrecedence[op], atToken, error);
858 if (*error) {
859 return 0;
860 }
861 switch (op) {
862 case PARENS:
863 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
864 ++*atToken;
865 break;
866 }
867 if (*atToken >= tokens) {
868 *error = Message{token.GetProvenanceRange(),
869 "')' missing from expression"_err_en_US};
870 } else {
871 *error = Message{
872 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
873 }
874 return 0;
875 case NOTZERO:
876 left = !left;
877 break;
878 case COMPLEMENT:
879 left = ~left;
880 break;
881 case UPLUS:
882 break;
883 case UMINUS:
884 left = -left;
885 break;
886 case NOT:
887 left = -!left;
888 break;
889 default:
890 CRASH_NO_CASE;
891 }
892 }
893
894 // Parse and evaluate binary operators and their second operands, if present.
895 while (*atToken < tokens) {
896 int advance{1};
897 t = token.TokenAt(*atToken).ToString();
898 if (t == "." && *atToken + 2 < tokens &&
899 token.TokenAt(*atToken + 2).ToString() == ".") {
900 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
901 advance = 3;
902 }
903 auto it{opNameMap.find(t)};
904 if (it == opNameMap.end()) {
905 break;
906 }
907 op = it->second;
908 if (op < POWER || precedence[op] < minimumPrecedence) {
909 break;
910 }
911 opAt = *atToken;
912 *atToken += advance;
913
914 std::int64_t right{
915 ExpressionValue(token, operandPrecedence[op], atToken, error)};
916 if (*error) {
917 return 0;
918 }
919
920 switch (op) {
921 case POWER:
922 if (left == 0) {
923 if (right < 0) {
924 *error = Message{token.GetTokenProvenanceRange(opAt),
925 "0 ** negative power"_err_en_US};
926 }
927 } else if (left != 1 && right != 1) {
928 if (right <= 0) {
929 left = !right;
930 } else {
931 std::int64_t power{1};
932 for (; right > 0; --right) {
933 if ((power * left) / left != power) {
934 *error = Message{token.GetTokenProvenanceRange(opAt),
935 "overflow in exponentation"_err_en_US};
936 left = 1;
937 }
938 power *= left;
939 }
940 left = power;
941 }
942 }
943 break;
944 case TIMES:
945 if (left != 0 && right != 0 && ((left * right) / left) != right) {
946 *error = Message{token.GetTokenProvenanceRange(opAt),
947 "overflow in multiplication"_err_en_US};
948 }
949 left = left * right;
950 break;
951 case DIVIDE:
952 if (right == 0) {
953 *error = Message{
954 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
955 left = 0;
956 } else {
957 left = left / right;
958 }
959 break;
960 case MODULUS:
961 if (right == 0) {
962 *error = Message{
963 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
964 left = 0;
965 } else {
966 left = left % right;
967 }
968 break;
969 case ADD:
970 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
971 *error = Message{token.GetTokenProvenanceRange(opAt),
972 "overflow in addition"_err_en_US};
973 }
974 left = left + right;
975 break;
976 case SUBTRACT:
977 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
978 *error = Message{token.GetTokenProvenanceRange(opAt),
979 "overflow in subtraction"_err_en_US};
980 }
981 left = left - right;
982 break;
983 case LEFTSHIFT:
984 if (right < 0 || right > 64) {
985 *error = Message{token.GetTokenProvenanceRange(opAt),
986 "bad left shift count"_err_en_US};
987 }
988 left = right >= 64 ? 0 : left << right;
989 break;
990 case RIGHTSHIFT:
991 if (right < 0 || right > 64) {
992 *error = Message{token.GetTokenProvenanceRange(opAt),
993 "bad right shift count"_err_en_US};
994 }
995 left = right >= 64 ? 0 : left >> right;
996 break;
997 case BITAND:
998 case AND:
999 left = left & right;
1000 break;
1001 case BITXOR:
1002 left = left ^ right;
1003 break;
1004 case BITOR:
1005 case OR:
1006 left = left | right;
1007 break;
1008 case LT:
1009 left = -(left < right);
1010 break;
1011 case LE:
1012 left = -(left <= right);
1013 break;
1014 case EQ:
1015 left = -(left == right);
1016 break;
1017 case NE:
1018 left = -(left != right);
1019 break;
1020 case GE:
1021 left = -(left >= right);
1022 break;
1023 case GT:
1024 left = -(left > right);
1025 break;
1026 case EQV:
1027 left = -(!left == !right);
1028 break;
1029 case NEQV:
1030 left = -(!left != !right);
1031 break;
1032 case SELECT:
1033 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1034 *error = Message{token.GetTokenProvenanceRange(opAt),
1035 "':' required in selection expression"_err_en_US};
1036 return 0;
1037 } else {
1038 ++*atToken;
1039 std::int64_t third{
1040 ExpressionValue(token, operandPrecedence[op], atToken, error)};
1041 left = left != 0 ? right : third;
1042 }
1043 break;
1044 case COMMA:
1045 left = right;
1046 break;
1047 default:
1048 CRASH_NO_CASE;
1049 }
1050 }
1051 return left;
1052 }
1053
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner & prescanner)1054 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1055 std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1056 TokenSequence expr1{expr, first, exprTokens};
1057 if (expr1.HasBlanks()) {
1058 expr1.RemoveBlanks();
1059 }
1060 TokenSequence expr2;
1061 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1062 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1063 CharBlock name;
1064 if (j + 3 < expr1.SizeInTokens() &&
1065 expr1.TokenAt(j + 1).ToString() == "(" &&
1066 expr1.TokenAt(j + 3).ToString() == ")") {
1067 name = expr1.TokenAt(j + 2);
1068 j += 3;
1069 } else if (j + 1 < expr1.SizeInTokens() &&
1070 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1071 name = expr1.TokenAt(++j);
1072 }
1073 if (!name.empty()) {
1074 char truth{IsNameDefined(name) ? '1' : '0'};
1075 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1076 continue;
1077 }
1078 }
1079 expr2.Put(expr1, j);
1080 }
1081 TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1082 if (expr3.HasBlanks()) {
1083 expr3.RemoveBlanks();
1084 }
1085 if (expr3.empty()) {
1086 prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1087 return false;
1088 }
1089 std::size_t atToken{0};
1090 std::optional<Message> error;
1091 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1092 if (error) {
1093 prescanner.Say(std::move(*error));
1094 } else if (atToken < expr3.SizeInTokens() &&
1095 expr3.TokenAt(atToken).ToString() != "!") {
1096 prescanner.Say(expr3.GetIntervalProvenanceRange(
1097 atToken, expr3.SizeInTokens() - atToken),
1098 atToken == 0 ? "could not parse any expression"_err_en_US
1099 : "excess characters after expression"_err_en_US);
1100 }
1101 return result;
1102 }
1103 } // namespace Fortran::parser
1104