1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <cinttypes> 17 #include <cstddef> 18 #include <ctime> 19 #include <map> 20 #include <memory> 21 #include <optional> 22 #include <set> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, 40 replacement_{ 41 predefined, sources.AddCompilerInsertion(predefined).start()} {} 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) { 77 for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) { 78 if (!tokens.TokenAt(j - 1).IsBlank()) { 79 return j; 80 } 81 } 82 return 0; 83 } 84 85 static TokenSequence Stringify( 86 const TokenSequence &tokens, AllSources &allSources) { 87 TokenSequence result; 88 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 89 result.PutNextTokenChar('"', quoteProvenance); 90 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 91 const CharBlock &token{tokens.TokenAt(j)}; 92 std::size_t bytes{token.size()}; 93 for (std::size_t k{0}; k < bytes; ++k) { 94 char ch{token[k]}; 95 Provenance from{tokens.GetTokenProvenance(j, k)}; 96 if (ch == '"' || ch == '\\') { 97 result.PutNextTokenChar(ch, from); 98 } 99 result.PutNextTokenChar(ch, from); 100 } 101 } 102 result.PutNextTokenChar('"', quoteProvenance); 103 result.CloseToken(); 104 return result; 105 } 106 107 TokenSequence Definition::Apply( 108 const std::vector<TokenSequence> &args, AllSources &allSources) { 109 TokenSequence result; 110 bool pasting{false}; 111 bool skipping{false}; 112 int parenthesesNesting{0}; 113 std::size_t tokens{replacement_.SizeInTokens()}; 114 for (std::size_t j{0}; j < tokens; ++j) { 115 const CharBlock &token{replacement_.TokenAt(j)}; 116 std::size_t bytes{token.size()}; 117 if (skipping) { 118 if (bytes == 1) { 119 if (token[0] == '(') { 120 ++parenthesesNesting; 121 } else if (token[0] == ')') { 122 skipping = --parenthesesNesting > 0; 123 } 124 } 125 continue; 126 } 127 if (bytes == 2 && token[0] == '~') { 128 std::size_t index = token[1] - 'A'; 129 if (index >= args.size()) { 130 continue; 131 } 132 std::size_t afterLastNonBlank{AfterLastNonBlank(result)}; 133 if (afterLastNonBlank > 0 && 134 result.TokenAt(afterLastNonBlank - 1).ToString() == "#") { 135 // stringifying 136 while (result.SizeInTokens() >= afterLastNonBlank) { 137 result.pop_back(); 138 } 139 result.Put(Stringify(args[index], allSources)); 140 } else { 141 std::size_t argTokens{args[index].SizeInTokens()}; 142 for (std::size_t k{0}; k < argTokens; ++k) { 143 if (!pasting || !args[index].TokenAt(k).IsBlank()) { 144 result.Put(args[index], k); 145 pasting = false; 146 } 147 } 148 } 149 } else if (bytes == 2 && token[0] == '#' && token[1] == '#') { 150 // Token pasting operator in body (not expanded argument); discard any 151 // immediately preceding white space, then reopen the last token. 152 while (!result.empty() && 153 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 154 result.pop_back(); 155 } 156 if (!result.empty()) { 157 result.ReopenLastToken(); 158 pasting = true; 159 } 160 } else if (pasting && token.IsBlank()) { 161 // Delete whitespace immediately following ## in the body. 162 } else if (bytes == 11 && isVariadic_ && 163 token.ToString() == "__VA_ARGS__") { 164 Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')}; 165 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 166 if (k > argumentCount_) { 167 result.Put(","s, commaProvenance); 168 } 169 result.Put(args[k]); 170 } 171 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 172 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 173 parenthesesNesting == 0) { 174 parenthesesNesting = 1; 175 skipping = args.size() == argumentCount_; 176 ++j; 177 } else { 178 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 179 ++parenthesesNesting; 180 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 181 if (--parenthesesNesting == 0) { 182 skipping = false; 183 continue; 184 } 185 } 186 result.Put(replacement_, j); 187 } 188 } 189 return result; 190 } 191 192 static std::string FormatTime(const std::time_t &now, const char *format) { 193 char buffer[16]; 194 return {buffer, 195 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 196 } 197 198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} { 199 // Capture current local date & time once now to avoid having the values 200 // of __DATE__ or __TIME__ change during compilation. 201 std::time_t now; 202 std::time(&now); 203 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904" 204 Definition{FormatTime(now, "\"%h %e %Y\""), allSources}); 205 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60" 206 Definition{FormatTime(now, "\"%T\""), allSources}); 207 // The values of these predefined macros depend on their invocation sites. 208 definitions_.emplace( 209 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources}); 210 definitions_.emplace( 211 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources}); 212 } 213 214 void Preprocessor::Define(std::string macro, std::string value) { 215 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 216 } 217 218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 219 220 std::optional<TokenSequence> Preprocessor::MacroReplacement( 221 const TokenSequence &input, const Prescanner &prescanner) { 222 // Do quick scan for any use of a defined name. 223 std::size_t tokens{input.SizeInTokens()}; 224 std::size_t j; 225 for (j = 0; j < tokens; ++j) { 226 CharBlock token{input.TokenAt(j)}; 227 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 228 IsNameDefined(token)) { 229 break; 230 } 231 } 232 if (j == tokens) { 233 return std::nullopt; // input contains nothing that would be replaced 234 } 235 TokenSequence result{input, 0, j}; 236 for (; j < tokens; ++j) { 237 const CharBlock &token{input.TokenAt(j)}; 238 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 239 result.Put(input, j); 240 continue; 241 } 242 auto it{definitions_.find(token)}; 243 if (it == definitions_.end()) { 244 result.Put(input, j); 245 continue; 246 } 247 Definition &def{it->second}; 248 if (def.isDisabled()) { 249 result.Put(input, j); 250 continue; 251 } 252 if (!def.isFunctionLike()) { 253 if (def.isPredefined()) { 254 std::string name{def.replacement().TokenAt(0).ToString()}; 255 std::string repl; 256 if (name == "__FILE__") { 257 repl = "\""s + 258 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 259 } else if (name == "__LINE__") { 260 std::string buf; 261 llvm::raw_string_ostream ss{buf}; 262 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 263 repl = ss.str(); 264 } 265 if (!repl.empty()) { 266 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 267 ProvenanceRange call{allSources_.AddMacroCall( 268 insert, input.GetTokenProvenanceRange(j), repl)}; 269 result.Put(repl, call.start()); 270 continue; 271 } 272 } 273 def.set_isDisabled(true); 274 TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)}; 275 def.set_isDisabled(false); 276 if (!replaced.empty()) { 277 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 278 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 279 ProvenanceRange newRange{ 280 allSources_.AddMacroCall(from, use, replaced.ToString())}; 281 result.Put(replaced, newRange); 282 } 283 continue; 284 } 285 // Possible function-like macro call. Skip spaces and newlines to see 286 // whether '(' is next. 287 std::size_t k{j}; 288 bool leftParen{false}; 289 while (++k < tokens) { 290 const CharBlock &lookAhead{input.TokenAt(k)}; 291 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 292 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 293 break; 294 } 295 } 296 if (!leftParen) { 297 result.Put(input, j); 298 continue; 299 } 300 std::vector<std::size_t> argStart{++k}; 301 for (int nesting{0}; k < tokens; ++k) { 302 CharBlock token{input.TokenAt(k)}; 303 if (token.size() == 1) { 304 char ch{token[0]}; 305 if (ch == '(') { 306 ++nesting; 307 } else if (ch == ')') { 308 if (nesting == 0) { 309 break; 310 } 311 --nesting; 312 } else if (ch == ',' && nesting == 0) { 313 argStart.push_back(k + 1); 314 } 315 } 316 } 317 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 318 // Subtle: () is zero arguments, not one empty argument, 319 // unless one argument was expected. 320 argStart.clear(); 321 } 322 if (k >= tokens || argStart.size() < def.argumentCount() || 323 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 324 result.Put(input, j); 325 continue; 326 } 327 std::vector<TokenSequence> args; 328 for (std::size_t n{0}; n < argStart.size(); ++n) { 329 std::size_t at{argStart[n]}; 330 std::size_t count{ 331 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 332 args.emplace_back(TokenSequence(input, at, count)); 333 } 334 def.set_isDisabled(true); 335 TokenSequence replaced{ 336 ReplaceMacros(def.Apply(args, allSources_), prescanner)}; 337 def.set_isDisabled(false); 338 if (!replaced.empty()) { 339 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 340 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 341 ProvenanceRange newRange{ 342 allSources_.AddMacroCall(from, use, replaced.ToString())}; 343 result.Put(replaced, newRange); 344 } 345 j = k; // advance to the terminal ')' 346 } 347 return result; 348 } 349 350 TokenSequence Preprocessor::ReplaceMacros( 351 const TokenSequence &tokens, const Prescanner &prescanner) { 352 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 353 return std::move(*repl); 354 } 355 return tokens; 356 } 357 358 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { 359 std::size_t tokens{dir.SizeInTokens()}; 360 std::size_t j{dir.SkipBlanks(0)}; 361 if (j == tokens) { 362 return; 363 } 364 if (dir.TokenAt(j).ToString() != "#") { 365 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 366 return; 367 } 368 j = dir.SkipBlanks(j + 1); 369 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 370 --tokens; 371 } 372 if (j == tokens) { 373 return; 374 } 375 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 376 return; // treat like #line, ignore it 377 } 378 std::size_t dirOffset{j}; 379 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 380 j = dir.SkipBlanks(j + 1); 381 CharBlock nameToken; 382 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 383 nameToken = dir.TokenAt(j); 384 } 385 if (dirName == "line") { 386 // #line is ignored 387 } else if (dirName == "define") { 388 if (nameToken.empty()) { 389 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 390 "#define: missing or invalid name"_err_en_US); 391 return; 392 } 393 nameToken = SaveTokenAsName(nameToken); 394 definitions_.erase(nameToken); 395 if (++j < tokens && dir.TokenAt(j).size() == 1 && 396 dir.TokenAt(j)[0] == '(') { 397 j = dir.SkipBlanks(j + 1); 398 std::vector<std::string> argName; 399 bool isVariadic{false}; 400 if (dir.TokenAt(j).ToString() != ")") { 401 while (true) { 402 std::string an{dir.TokenAt(j).ToString()}; 403 if (an == "...") { 404 isVariadic = true; 405 } else { 406 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 407 prescanner->Say(dir.GetTokenProvenanceRange(j), 408 "#define: missing or invalid argument name"_err_en_US); 409 return; 410 } 411 argName.push_back(an); 412 } 413 j = dir.SkipBlanks(j + 1); 414 if (j == tokens) { 415 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 416 "#define: malformed argument list"_err_en_US); 417 return; 418 } 419 std::string punc{dir.TokenAt(j).ToString()}; 420 if (punc == ")") { 421 break; 422 } 423 if (isVariadic || punc != ",") { 424 prescanner->Say(dir.GetTokenProvenanceRange(j), 425 "#define: malformed argument list"_err_en_US); 426 return; 427 } 428 j = dir.SkipBlanks(j + 1); 429 if (j == tokens) { 430 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 431 "#define: malformed argument list"_err_en_US); 432 return; 433 } 434 } 435 if (std::set<std::string>(argName.begin(), argName.end()).size() != 436 argName.size()) { 437 prescanner->Say(dir.GetTokenProvenance(dirOffset), 438 "#define: argument names are not distinct"_err_en_US); 439 return; 440 } 441 } 442 j = dir.SkipBlanks(j + 1); 443 definitions_.emplace(std::make_pair( 444 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 445 } else { 446 j = dir.SkipBlanks(j + 1); 447 definitions_.emplace( 448 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 449 } 450 } else if (dirName == "undef") { 451 if (nameToken.empty()) { 452 prescanner->Say( 453 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 454 "# missing or invalid name"_err_en_US); 455 } else { 456 j = dir.SkipBlanks(j + 1); 457 if (j != tokens) { 458 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 459 "#undef: excess tokens at end of directive"_err_en_US); 460 } else { 461 definitions_.erase(nameToken); 462 } 463 } 464 } else if (dirName == "ifdef" || dirName == "ifndef") { 465 bool doThen{false}; 466 if (nameToken.empty()) { 467 prescanner->Say( 468 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 469 "#%s: missing name"_err_en_US, dirName); 470 } else { 471 j = dir.SkipBlanks(j + 1); 472 if (j != tokens) { 473 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 474 "#%s: excess tokens at end of directive"_en_US, dirName); 475 } 476 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 477 } 478 if (doThen) { 479 ifStack_.push(CanDeadElseAppear::Yes); 480 } else { 481 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 482 dir.GetTokenProvenance(dirOffset)); 483 } 484 } else if (dirName == "if") { 485 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 486 ifStack_.push(CanDeadElseAppear::Yes); 487 } else { 488 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 489 dir.GetTokenProvenanceRange(dirOffset)); 490 } 491 } else if (dirName == "else") { 492 if (j != tokens) { 493 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 494 "#else: excess tokens at end of directive"_err_en_US); 495 } else if (ifStack_.empty()) { 496 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 497 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 498 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 499 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 500 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 501 } else { 502 ifStack_.pop(); 503 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 504 dir.GetTokenProvenanceRange(dirOffset)); 505 } 506 } else if (dirName == "elif") { 507 if (ifStack_.empty()) { 508 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 509 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 510 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 511 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 512 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 513 } else { 514 ifStack_.pop(); 515 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 516 dir.GetTokenProvenanceRange(dirOffset)); 517 } 518 } else if (dirName == "endif") { 519 if (j != tokens) { 520 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 521 "#endif: excess tokens at end of directive"_err_en_US); 522 } else if (ifStack_.empty()) { 523 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 524 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 525 } else { 526 ifStack_.pop(); 527 } 528 } else if (dirName == "error") { 529 prescanner->Say( 530 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 531 "%s"_err_en_US, dir.ToString()); 532 } else if (dirName == "warning" || dirName == "comment" || 533 dirName == "note") { 534 prescanner->Say( 535 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 536 "%s"_en_US, dir.ToString()); 537 } else if (dirName == "include") { 538 if (j == tokens) { 539 prescanner->Say( 540 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 541 "#include: missing name of file to include"_err_en_US); 542 return; 543 } 544 std::string include; 545 if (dir.TokenAt(j).ToString() == "<") { 546 std::size_t k{j + 1}; 547 if (k >= tokens) { 548 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 549 "#include: file name missing"_err_en_US); 550 return; 551 } 552 while (k < tokens && dir.TokenAt(k) != ">") { 553 ++k; 554 } 555 if (k >= tokens) { 556 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 557 "#include: expected '>' at end of included file"_en_US); 558 } else if (k + 1 < tokens) { 559 prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1), 560 "#include: extra stuff ignored after '>'"_en_US); 561 } 562 TokenSequence braced{dir, j + 1, k - j - 1}; 563 include = ReplaceMacros(braced, *prescanner).ToString(); 564 } else if (j + 1 == tokens && 565 (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 566 include.substr(include.size() - 1, 1) == "\"") { 567 include = include.substr(1, include.size() - 2); 568 } else { 569 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 570 "#include: expected name of file to include"_err_en_US); 571 return; 572 } 573 if (include.empty()) { 574 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 575 "#include: empty include file name"_err_en_US); 576 return; 577 } 578 std::string buf; 579 llvm::raw_string_ostream error{buf}; 580 const SourceFile *included{allSources_.Open(include, error)}; 581 if (!included) { 582 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 583 "#include: %s"_err_en_US, error.str()); 584 } else if (included->bytes() > 0) { 585 ProvenanceRange fileRange{ 586 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 587 Prescanner{*prescanner} 588 .set_encoding(included->encoding()) 589 .Prescan(fileRange); 590 } 591 } else { 592 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 593 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 594 } 595 } 596 597 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 598 names_.push_back(t.ToString()); 599 return {names_.back().data(), names_.back().size()}; 600 } 601 602 bool Preprocessor::IsNameDefined(const CharBlock &token) { 603 return definitions_.find(token) != definitions_.end(); 604 } 605 606 static std::string GetDirectiveName( 607 const TokenSequence &line, std::size_t *rest) { 608 std::size_t tokens{line.SizeInTokens()}; 609 std::size_t j{line.SkipBlanks(0)}; 610 if (j == tokens || line.TokenAt(j).ToString() != "#") { 611 *rest = tokens; 612 return ""; 613 } 614 j = line.SkipBlanks(j + 1); 615 if (j == tokens) { 616 *rest = tokens; 617 return ""; 618 } 619 *rest = line.SkipBlanks(j + 1); 620 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 621 } 622 623 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 624 IsElseActive isElseActive, Prescanner *prescanner, 625 ProvenanceRange provenanceRange) { 626 int nesting{0}; 627 while (!prescanner->IsAtEnd()) { 628 if (!prescanner->IsNextLinePreprocessorDirective()) { 629 prescanner->NextLine(); 630 continue; 631 } 632 TokenSequence line{prescanner->TokenizePreprocessorDirective()}; 633 std::size_t rest{0}; 634 std::string dn{GetDirectiveName(line, &rest)}; 635 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 636 ++nesting; 637 } else if (dn == "endif") { 638 if (nesting-- == 0) { 639 return; 640 } 641 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 642 if (dn == "else") { 643 ifStack_.push(CanDeadElseAppear::No); 644 return; 645 } 646 if (dn == "elif" && 647 IsIfPredicateTrue( 648 line, rest, line.SizeInTokens() - rest, prescanner)) { 649 ifStack_.push(CanDeadElseAppear::Yes); 650 return; 651 } 652 } 653 } 654 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 655 } 656 657 // Precedence level codes used here to accommodate mixed Fortran and C: 658 // 15: parentheses and constants, logical !, bitwise ~ 659 // 14: unary + and - 660 // 13: ** 661 // 12: *, /, % (modulus) 662 // 11: + and - 663 // 10: << and >> 664 // 9: bitwise & 665 // 8: bitwise ^ 666 // 7: bitwise | 667 // 6: relations (.EQ., ==, &c.) 668 // 5: .NOT. 669 // 4: .AND., && 670 // 3: .OR., || 671 // 2: .EQV. and .NEQV. / .XOR. 672 // 1: ? : 673 // 0: , 674 static std::int64_t ExpressionValue(const TokenSequence &token, 675 int minimumPrecedence, std::size_t *atToken, 676 std::optional<Message> *error) { 677 enum Operator { 678 PARENS, 679 CONST, 680 NOTZERO, // ! 681 COMPLEMENT, // ~ 682 UPLUS, 683 UMINUS, 684 POWER, 685 TIMES, 686 DIVIDE, 687 MODULUS, 688 ADD, 689 SUBTRACT, 690 LEFTSHIFT, 691 RIGHTSHIFT, 692 BITAND, 693 BITXOR, 694 BITOR, 695 LT, 696 LE, 697 EQ, 698 NE, 699 GE, 700 GT, 701 NOT, 702 AND, 703 OR, 704 EQV, 705 NEQV, 706 SELECT, 707 COMMA 708 }; 709 static const int precedence[]{ 710 15, 15, 15, 15, // (), 6, !, ~ 711 14, 14, // unary +, - 712 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 713 9, 8, 7, // &, ^, | 714 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 715 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 716 1, 0 // ?: and , 717 }; 718 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 719 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 720 721 static std::map<std::string, enum Operator> opNameMap; 722 if (opNameMap.empty()) { 723 opNameMap["("] = PARENS; 724 opNameMap["!"] = NOTZERO; 725 opNameMap["~"] = COMPLEMENT; 726 opNameMap["**"] = POWER; 727 opNameMap["*"] = TIMES; 728 opNameMap["/"] = DIVIDE; 729 opNameMap["%"] = MODULUS; 730 opNameMap["+"] = ADD; 731 opNameMap["-"] = SUBTRACT; 732 opNameMap["<<"] = LEFTSHIFT; 733 opNameMap[">>"] = RIGHTSHIFT; 734 opNameMap["&"] = BITAND; 735 opNameMap["^"] = BITXOR; 736 opNameMap["|"] = BITOR; 737 opNameMap[".lt."] = opNameMap["<"] = LT; 738 opNameMap[".le."] = opNameMap["<="] = LE; 739 opNameMap[".eq."] = opNameMap["=="] = EQ; 740 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 741 opNameMap[".ge."] = opNameMap[">="] = GE; 742 opNameMap[".gt."] = opNameMap[">"] = GT; 743 opNameMap[".not."] = NOT; 744 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 745 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 746 opNameMap[".eqv."] = EQV; 747 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 748 opNameMap["?"] = SELECT; 749 opNameMap[","] = COMMA; 750 } 751 752 std::size_t tokens{token.SizeInTokens()}; 753 CHECK(tokens > 0); 754 if (*atToken >= tokens) { 755 *error = 756 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 757 return 0; 758 } 759 760 // Parse and evaluate a primary or a unary operator and its operand. 761 std::size_t opAt{*atToken}; 762 std::string t{token.TokenAt(opAt).ToString()}; 763 enum Operator op; 764 std::int64_t left{0}; 765 if (t == "(") { 766 op = PARENS; 767 } else if (IsDecimalDigit(t[0])) { 768 op = CONST; 769 std::size_t consumed{0}; 770 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 771 if (consumed < t.size()) { 772 *error = Message{token.GetTokenProvenanceRange(opAt), 773 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 774 return 0; 775 } 776 } else if (IsLegalIdentifierStart(t[0])) { 777 // undefined macro name -> zero 778 // TODO: BOZ constants? 779 op = CONST; 780 } else if (t == "+") { 781 op = UPLUS; 782 } else if (t == "-") { 783 op = UMINUS; 784 } else if (t == "." && *atToken + 2 < tokens && 785 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 786 token.TokenAt(*atToken + 2).ToString() == ".") { 787 op = NOT; 788 *atToken += 2; 789 } else { 790 auto it{opNameMap.find(t)}; 791 if (it != opNameMap.end()) { 792 op = it->second; 793 } else { 794 *error = Message{token.GetTokenProvenanceRange(opAt), 795 "operand expected in expression"_err_en_US}; 796 return 0; 797 } 798 } 799 if (precedence[op] < minimumPrecedence) { 800 *error = Message{token.GetTokenProvenanceRange(opAt), 801 "operator precedence error"_err_en_US}; 802 return 0; 803 } 804 ++*atToken; 805 if (op != CONST) { 806 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 807 if (*error) { 808 return 0; 809 } 810 switch (op) { 811 case PARENS: 812 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 813 ++*atToken; 814 break; 815 } 816 if (*atToken >= tokens) { 817 *error = Message{token.GetProvenanceRange(), 818 "')' missing from expression"_err_en_US}; 819 } else { 820 *error = Message{ 821 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 822 } 823 return 0; 824 case NOTZERO: 825 left = !left; 826 break; 827 case COMPLEMENT: 828 left = ~left; 829 break; 830 case UPLUS: 831 break; 832 case UMINUS: 833 left = -left; 834 break; 835 case NOT: 836 left = -!left; 837 break; 838 default: 839 CRASH_NO_CASE; 840 } 841 } 842 843 // Parse and evaluate binary operators and their second operands, if present. 844 while (*atToken < tokens) { 845 int advance{1}; 846 t = token.TokenAt(*atToken).ToString(); 847 if (t == "." && *atToken + 2 < tokens && 848 token.TokenAt(*atToken + 2).ToString() == ".") { 849 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 850 advance = 3; 851 } 852 auto it{opNameMap.find(t)}; 853 if (it == opNameMap.end()) { 854 break; 855 } 856 op = it->second; 857 if (op < POWER || precedence[op] < minimumPrecedence) { 858 break; 859 } 860 opAt = *atToken; 861 *atToken += advance; 862 863 std::int64_t right{ 864 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 865 if (*error) { 866 return 0; 867 } 868 869 switch (op) { 870 case POWER: 871 if (left == 0) { 872 if (right < 0) { 873 *error = Message{token.GetTokenProvenanceRange(opAt), 874 "0 ** negative power"_err_en_US}; 875 } 876 } else if (left != 1 && right != 1) { 877 if (right <= 0) { 878 left = !right; 879 } else { 880 std::int64_t power{1}; 881 for (; right > 0; --right) { 882 if ((power * left) / left != power) { 883 *error = Message{token.GetTokenProvenanceRange(opAt), 884 "overflow in exponentation"_err_en_US}; 885 left = 1; 886 } 887 power *= left; 888 } 889 left = power; 890 } 891 } 892 break; 893 case TIMES: 894 if (left != 0 && right != 0 && ((left * right) / left) != right) { 895 *error = Message{token.GetTokenProvenanceRange(opAt), 896 "overflow in multiplication"_err_en_US}; 897 } 898 left = left * right; 899 break; 900 case DIVIDE: 901 if (right == 0) { 902 *error = Message{ 903 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 904 left = 0; 905 } else { 906 left = left / right; 907 } 908 break; 909 case MODULUS: 910 if (right == 0) { 911 *error = Message{ 912 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 913 left = 0; 914 } else { 915 left = left % right; 916 } 917 break; 918 case ADD: 919 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 920 *error = Message{token.GetTokenProvenanceRange(opAt), 921 "overflow in addition"_err_en_US}; 922 } 923 left = left + right; 924 break; 925 case SUBTRACT: 926 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 927 *error = Message{token.GetTokenProvenanceRange(opAt), 928 "overflow in subtraction"_err_en_US}; 929 } 930 left = left - right; 931 break; 932 case LEFTSHIFT: 933 if (right < 0 || right > 64) { 934 *error = Message{token.GetTokenProvenanceRange(opAt), 935 "bad left shift count"_err_en_US}; 936 } 937 left = right >= 64 ? 0 : left << right; 938 break; 939 case RIGHTSHIFT: 940 if (right < 0 || right > 64) { 941 *error = Message{token.GetTokenProvenanceRange(opAt), 942 "bad right shift count"_err_en_US}; 943 } 944 left = right >= 64 ? 0 : left >> right; 945 break; 946 case BITAND: 947 case AND: 948 left = left & right; 949 break; 950 case BITXOR: 951 left = left ^ right; 952 break; 953 case BITOR: 954 case OR: 955 left = left | right; 956 break; 957 case LT: 958 left = -(left < right); 959 break; 960 case LE: 961 left = -(left <= right); 962 break; 963 case EQ: 964 left = -(left == right); 965 break; 966 case NE: 967 left = -(left != right); 968 break; 969 case GE: 970 left = -(left >= right); 971 break; 972 case GT: 973 left = -(left > right); 974 break; 975 case EQV: 976 left = -(!left == !right); 977 break; 978 case NEQV: 979 left = -(!left != !right); 980 break; 981 case SELECT: 982 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 983 *error = Message{token.GetTokenProvenanceRange(opAt), 984 "':' required in selection expression"_err_en_US}; 985 return 0; 986 } else { 987 ++*atToken; 988 std::int64_t third{ 989 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 990 left = left != 0 ? right : third; 991 } 992 break; 993 case COMMA: 994 left = right; 995 break; 996 default: 997 CRASH_NO_CASE; 998 } 999 } 1000 return left; 1001 } 1002 1003 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 1004 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) { 1005 TokenSequence expr1{expr, first, exprTokens}; 1006 if (expr1.HasBlanks()) { 1007 expr1.RemoveBlanks(); 1008 } 1009 TokenSequence expr2; 1010 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 1011 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 1012 CharBlock name; 1013 if (j + 3 < expr1.SizeInTokens() && 1014 expr1.TokenAt(j + 1).ToString() == "(" && 1015 expr1.TokenAt(j + 3).ToString() == ")") { 1016 name = expr1.TokenAt(j + 2); 1017 j += 3; 1018 } else if (j + 1 < expr1.SizeInTokens() && 1019 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 1020 name = expr1.TokenAt(++j); 1021 } 1022 if (!name.empty()) { 1023 char truth{IsNameDefined(name) ? '1' : '0'}; 1024 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 1025 continue; 1026 } 1027 } 1028 expr2.Put(expr1, j); 1029 } 1030 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)}; 1031 if (expr3.HasBlanks()) { 1032 expr3.RemoveBlanks(); 1033 } 1034 if (expr3.empty()) { 1035 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 1036 return false; 1037 } 1038 std::size_t atToken{0}; 1039 std::optional<Message> error; 1040 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1041 if (error) { 1042 prescanner->Say(std::move(*error)); 1043 } else if (atToken < expr3.SizeInTokens() && 1044 expr3.TokenAt(atToken).ToString() != "!") { 1045 prescanner->Say(expr3.GetIntervalProvenanceRange( 1046 atToken, expr3.SizeInTokens() - atToken), 1047 atToken == 0 ? "could not parse any expression"_err_en_US 1048 : "excess characters after expression"_err_en_US); 1049 } 1050 return result; 1051 } 1052 } // namespace Fortran::parser 1053