1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include <algorithm> 15 #include <cinttypes> 16 #include <cstddef> 17 #include <ctime> 18 #include <map> 19 #include <memory> 20 #include <optional> 21 #include <set> 22 #include <sstream> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, replacement_{predefined, 40 sources.AddCompilerInsertion(predefined).start()} { 41 } 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) { 77 for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) { 78 if (!tokens.TokenAt(j - 1).IsBlank()) { 79 return j; 80 } 81 } 82 return 0; 83 } 84 85 static TokenSequence Stringify( 86 const TokenSequence &tokens, AllSources &allSources) { 87 TokenSequence result; 88 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 89 result.PutNextTokenChar('"', quoteProvenance); 90 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 91 const CharBlock &token{tokens.TokenAt(j)}; 92 std::size_t bytes{token.size()}; 93 for (std::size_t k{0}; k < bytes; ++k) { 94 char ch{token[k]}; 95 Provenance from{tokens.GetTokenProvenance(j, k)}; 96 if (ch == '"' || ch == '\\') { 97 result.PutNextTokenChar(ch, from); 98 } 99 result.PutNextTokenChar(ch, from); 100 } 101 } 102 result.PutNextTokenChar('"', quoteProvenance); 103 result.CloseToken(); 104 return result; 105 } 106 107 TokenSequence Definition::Apply( 108 const std::vector<TokenSequence> &args, AllSources &allSources) { 109 TokenSequence result; 110 bool pasting{false}; 111 bool skipping{false}; 112 int parenthesesNesting{0}; 113 std::size_t tokens{replacement_.SizeInTokens()}; 114 for (std::size_t j{0}; j < tokens; ++j) { 115 const CharBlock &token{replacement_.TokenAt(j)}; 116 std::size_t bytes{token.size()}; 117 if (skipping) { 118 if (bytes == 1) { 119 if (token[0] == '(') { 120 ++parenthesesNesting; 121 } else if (token[0] == ')') { 122 skipping = --parenthesesNesting > 0; 123 } 124 } 125 continue; 126 } 127 if (bytes == 2 && token[0] == '~') { 128 std::size_t index = token[1] - 'A'; 129 if (index >= args.size()) { 130 continue; 131 } 132 std::size_t afterLastNonBlank{AfterLastNonBlank(result)}; 133 if (afterLastNonBlank > 0 && 134 result.TokenAt(afterLastNonBlank - 1).ToString() == "#") { 135 // stringifying 136 while (result.SizeInTokens() >= afterLastNonBlank) { 137 result.pop_back(); 138 } 139 result.Put(Stringify(args[index], allSources)); 140 } else { 141 std::size_t argTokens{args[index].SizeInTokens()}; 142 for (std::size_t k{0}; k < argTokens; ++k) { 143 if (!pasting || !args[index].TokenAt(k).IsBlank()) { 144 result.Put(args[index], k); 145 pasting = false; 146 } 147 } 148 } 149 } else if (bytes == 2 && token[0] == '#' && token[1] == '#') { 150 // Token pasting operator in body (not expanded argument); discard any 151 // immediately preceding white space, then reopen the last token. 152 while (!result.empty() && 153 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 154 result.pop_back(); 155 } 156 if (!result.empty()) { 157 result.ReopenLastToken(); 158 pasting = true; 159 } 160 } else if (pasting && token.IsBlank()) { 161 // Delete whitespace immediately following ## in the body. 162 } else if (bytes == 11 && isVariadic_ && 163 token.ToString() == "__VA_ARGS__") { 164 Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')}; 165 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 166 if (k > argumentCount_) { 167 result.Put(","s, commaProvenance); 168 } 169 result.Put(args[k]); 170 } 171 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 172 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 173 parenthesesNesting == 0) { 174 parenthesesNesting = 1; 175 skipping = args.size() == argumentCount_; 176 ++j; 177 } else { 178 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 179 ++parenthesesNesting; 180 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 181 if (--parenthesesNesting == 0) { 182 skipping = false; 183 continue; 184 } 185 } 186 result.Put(replacement_, j); 187 } 188 } 189 return result; 190 } 191 192 static std::string FormatTime(const std::time_t &now, const char *format) { 193 char buffer[16]; 194 return {buffer, 195 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 196 } 197 198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} { 199 // Capture current local date & time once now to avoid having the values 200 // of __DATE__ or __TIME__ change during compilation. 201 std::time_t now; 202 std::time(&now); 203 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904" 204 Definition{FormatTime(now, "\"%h %e %Y\""), allSources}); 205 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60" 206 Definition{FormatTime(now, "\"%T\""), allSources}); 207 // The values of these predefined macros depend on their invocation sites. 208 definitions_.emplace( 209 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources}); 210 definitions_.emplace( 211 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources}); 212 } 213 214 void Preprocessor::Define(std::string macro, std::string value) { 215 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 216 } 217 218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 219 220 std::optional<TokenSequence> Preprocessor::MacroReplacement( 221 const TokenSequence &input, const Prescanner &prescanner) { 222 // Do quick scan for any use of a defined name. 223 std::size_t tokens{input.SizeInTokens()}; 224 std::size_t j; 225 for (j = 0; j < tokens; ++j) { 226 CharBlock token{input.TokenAt(j)}; 227 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 228 IsNameDefined(token)) { 229 break; 230 } 231 } 232 if (j == tokens) { 233 return std::nullopt; // input contains nothing that would be replaced 234 } 235 TokenSequence result{input, 0, j}; 236 for (; j < tokens; ++j) { 237 const CharBlock &token{input.TokenAt(j)}; 238 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 239 result.Put(input, j); 240 continue; 241 } 242 auto it{definitions_.find(token)}; 243 if (it == definitions_.end()) { 244 result.Put(input, j); 245 continue; 246 } 247 Definition &def{it->second}; 248 if (def.isDisabled()) { 249 result.Put(input, j); 250 continue; 251 } 252 if (!def.isFunctionLike()) { 253 if (def.isPredefined()) { 254 std::string name{def.replacement().TokenAt(0).ToString()}; 255 std::string repl; 256 if (name == "__FILE__") { 257 repl = "\""s + 258 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 259 } else if (name == "__LINE__") { 260 std::stringstream ss; 261 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 262 repl = ss.str(); 263 } 264 if (!repl.empty()) { 265 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 266 ProvenanceRange call{allSources_.AddMacroCall( 267 insert, input.GetTokenProvenanceRange(j), repl)}; 268 result.Put(repl, call.start()); 269 continue; 270 } 271 } 272 def.set_isDisabled(true); 273 TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)}; 274 def.set_isDisabled(false); 275 if (!replaced.empty()) { 276 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 277 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 278 ProvenanceRange newRange{ 279 allSources_.AddMacroCall(from, use, replaced.ToString())}; 280 result.Put(replaced, newRange); 281 } 282 continue; 283 } 284 // Possible function-like macro call. Skip spaces and newlines to see 285 // whether '(' is next. 286 std::size_t k{j}; 287 bool leftParen{false}; 288 while (++k < tokens) { 289 const CharBlock &lookAhead{input.TokenAt(k)}; 290 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 291 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 292 break; 293 } 294 } 295 if (!leftParen) { 296 result.Put(input, j); 297 continue; 298 } 299 std::vector<std::size_t> argStart{++k}; 300 for (int nesting{0}; k < tokens; ++k) { 301 CharBlock token{input.TokenAt(k)}; 302 if (token.size() == 1) { 303 char ch{token[0]}; 304 if (ch == '(') { 305 ++nesting; 306 } else if (ch == ')') { 307 if (nesting == 0) { 308 break; 309 } 310 --nesting; 311 } else if (ch == ',' && nesting == 0) { 312 argStart.push_back(k + 1); 313 } 314 } 315 } 316 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 317 // Subtle: () is zero arguments, not one empty argument, 318 // unless one argument was expected. 319 argStart.clear(); 320 } 321 if (k >= tokens || argStart.size() < def.argumentCount() || 322 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 323 result.Put(input, j); 324 continue; 325 } 326 std::vector<TokenSequence> args; 327 for (std::size_t n{0}; n < argStart.size(); ++n) { 328 std::size_t at{argStart[n]}; 329 std::size_t count{ 330 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 331 args.emplace_back(TokenSequence(input, at, count)); 332 } 333 def.set_isDisabled(true); 334 TokenSequence replaced{ 335 ReplaceMacros(def.Apply(args, allSources_), prescanner)}; 336 def.set_isDisabled(false); 337 if (!replaced.empty()) { 338 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 339 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 340 ProvenanceRange newRange{ 341 allSources_.AddMacroCall(from, use, replaced.ToString())}; 342 result.Put(replaced, newRange); 343 } 344 j = k; // advance to the terminal ')' 345 } 346 return result; 347 } 348 349 TokenSequence Preprocessor::ReplaceMacros( 350 const TokenSequence &tokens, const Prescanner &prescanner) { 351 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 352 return std::move(*repl); 353 } 354 return tokens; 355 } 356 357 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { 358 std::size_t tokens{dir.SizeInTokens()}; 359 std::size_t j{dir.SkipBlanks(0)}; 360 if (j == tokens) { 361 return; 362 } 363 if (dir.TokenAt(j).ToString() != "#") { 364 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 365 return; 366 } 367 j = dir.SkipBlanks(j + 1); 368 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 369 --tokens; 370 } 371 if (j == tokens) { 372 return; 373 } 374 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 375 return; // treat like #line, ignore it 376 } 377 std::size_t dirOffset{j}; 378 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 379 j = dir.SkipBlanks(j + 1); 380 CharBlock nameToken; 381 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 382 nameToken = dir.TokenAt(j); 383 } 384 if (dirName == "line") { 385 // #line is ignored 386 } else if (dirName == "define") { 387 if (nameToken.empty()) { 388 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 389 "#define: missing or invalid name"_err_en_US); 390 return; 391 } 392 nameToken = SaveTokenAsName(nameToken); 393 definitions_.erase(nameToken); 394 if (++j < tokens && dir.TokenAt(j).size() == 1 && 395 dir.TokenAt(j)[0] == '(') { 396 j = dir.SkipBlanks(j + 1); 397 std::vector<std::string> argName; 398 bool isVariadic{false}; 399 if (dir.TokenAt(j).ToString() != ")") { 400 while (true) { 401 std::string an{dir.TokenAt(j).ToString()}; 402 if (an == "...") { 403 isVariadic = true; 404 } else { 405 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 406 prescanner->Say(dir.GetTokenProvenanceRange(j), 407 "#define: missing or invalid argument name"_err_en_US); 408 return; 409 } 410 argName.push_back(an); 411 } 412 j = dir.SkipBlanks(j + 1); 413 if (j == tokens) { 414 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 415 "#define: malformed argument list"_err_en_US); 416 return; 417 } 418 std::string punc{dir.TokenAt(j).ToString()}; 419 if (punc == ")") { 420 break; 421 } 422 if (isVariadic || punc != ",") { 423 prescanner->Say(dir.GetTokenProvenanceRange(j), 424 "#define: malformed argument list"_err_en_US); 425 return; 426 } 427 j = dir.SkipBlanks(j + 1); 428 if (j == tokens) { 429 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 430 "#define: malformed argument list"_err_en_US); 431 return; 432 } 433 } 434 if (std::set<std::string>(argName.begin(), argName.end()).size() != 435 argName.size()) { 436 prescanner->Say(dir.GetTokenProvenance(dirOffset), 437 "#define: argument names are not distinct"_err_en_US); 438 return; 439 } 440 } 441 j = dir.SkipBlanks(j + 1); 442 definitions_.emplace(std::make_pair( 443 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 444 } else { 445 j = dir.SkipBlanks(j + 1); 446 definitions_.emplace( 447 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 448 } 449 } else if (dirName == "undef") { 450 if (nameToken.empty()) { 451 prescanner->Say( 452 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 453 "# missing or invalid name"_err_en_US); 454 } else { 455 j = dir.SkipBlanks(j + 1); 456 if (j != tokens) { 457 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 458 "#undef: excess tokens at end of directive"_err_en_US); 459 } else { 460 definitions_.erase(nameToken); 461 } 462 } 463 } else if (dirName == "ifdef" || dirName == "ifndef") { 464 bool doThen{false}; 465 if (nameToken.empty()) { 466 prescanner->Say( 467 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 468 "#%s: missing name"_err_en_US, dirName); 469 } else { 470 j = dir.SkipBlanks(j + 1); 471 if (j != tokens) { 472 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 473 "#%s: excess tokens at end of directive"_en_US, dirName); 474 } 475 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 476 } 477 if (doThen) { 478 ifStack_.push(CanDeadElseAppear::Yes); 479 } else { 480 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 481 dir.GetTokenProvenance(dirOffset)); 482 } 483 } else if (dirName == "if") { 484 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 485 ifStack_.push(CanDeadElseAppear::Yes); 486 } else { 487 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 488 dir.GetTokenProvenanceRange(dirOffset)); 489 } 490 } else if (dirName == "else") { 491 if (j != tokens) { 492 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 493 "#else: excess tokens at end of directive"_err_en_US); 494 } else if (ifStack_.empty()) { 495 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 496 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 497 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 498 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 499 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 500 } else { 501 ifStack_.pop(); 502 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 503 dir.GetTokenProvenanceRange(dirOffset)); 504 } 505 } else if (dirName == "elif") { 506 if (ifStack_.empty()) { 507 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 508 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 509 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 510 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 511 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 512 } else { 513 ifStack_.pop(); 514 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 515 dir.GetTokenProvenanceRange(dirOffset)); 516 } 517 } else if (dirName == "endif") { 518 if (j != tokens) { 519 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 520 "#endif: excess tokens at end of directive"_err_en_US); 521 } else if (ifStack_.empty()) { 522 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 523 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 524 } else { 525 ifStack_.pop(); 526 } 527 } else if (dirName == "error") { 528 prescanner->Say( 529 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 530 "%s"_err_en_US, dir.ToString()); 531 } else if (dirName == "warning" || dirName == "comment" || 532 dirName == "note") { 533 prescanner->Say( 534 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 535 "%s"_en_US, dir.ToString()); 536 } else if (dirName == "include") { 537 if (j == tokens) { 538 prescanner->Say( 539 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 540 "#include: missing name of file to include"_err_en_US); 541 return; 542 } 543 std::string include; 544 if (dir.TokenAt(j).ToString() == "<") { 545 std::size_t k{j + 1}; 546 if (k >= tokens) { 547 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 548 "#include: file name missing"_err_en_US); 549 return; 550 } 551 while (k < tokens && dir.TokenAt(k) != ">") { 552 ++k; 553 } 554 if (k >= tokens) { 555 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 556 "#include: expected '>' at end of included file"_en_US); 557 } else if (k + 1 < tokens) { 558 prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1), 559 "#include: extra stuff ignored after '>'"_en_US); 560 } 561 TokenSequence braced{dir, j + 1, k - j - 1}; 562 include = ReplaceMacros(braced, *prescanner).ToString(); 563 } else if (j + 1 == tokens && 564 (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 565 include.substr(include.size() - 1, 1) == "\"") { 566 include = include.substr(1, include.size() - 2); 567 } else { 568 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 569 "#include: expected name of file to include"_err_en_US); 570 return; 571 } 572 if (include.empty()) { 573 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 574 "#include: empty include file name"_err_en_US); 575 return; 576 } 577 std::stringstream error; 578 const SourceFile *included{allSources_.Open(include, &error)}; 579 if (!included) { 580 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 581 "#include: %s"_err_en_US, error.str()); 582 } else if (included->bytes() > 0) { 583 ProvenanceRange fileRange{ 584 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 585 Prescanner{*prescanner} 586 .set_encoding(included->encoding()) 587 .Prescan(fileRange); 588 } 589 } else { 590 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 591 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 592 } 593 } 594 595 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 596 names_.push_back(t.ToString()); 597 return {names_.back().data(), names_.back().size()}; 598 } 599 600 bool Preprocessor::IsNameDefined(const CharBlock &token) { 601 return definitions_.find(token) != definitions_.end(); 602 } 603 604 static std::string GetDirectiveName( 605 const TokenSequence &line, std::size_t *rest) { 606 std::size_t tokens{line.SizeInTokens()}; 607 std::size_t j{line.SkipBlanks(0)}; 608 if (j == tokens || line.TokenAt(j).ToString() != "#") { 609 *rest = tokens; 610 return ""; 611 } 612 j = line.SkipBlanks(j + 1); 613 if (j == tokens) { 614 *rest = tokens; 615 return ""; 616 } 617 *rest = line.SkipBlanks(j + 1); 618 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 619 } 620 621 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 622 IsElseActive isElseActive, Prescanner *prescanner, 623 ProvenanceRange provenanceRange) { 624 int nesting{0}; 625 while (!prescanner->IsAtEnd()) { 626 if (!prescanner->IsNextLinePreprocessorDirective()) { 627 prescanner->NextLine(); 628 continue; 629 } 630 TokenSequence line{prescanner->TokenizePreprocessorDirective()}; 631 std::size_t rest{0}; 632 std::string dn{GetDirectiveName(line, &rest)}; 633 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 634 ++nesting; 635 } else if (dn == "endif") { 636 if (nesting-- == 0) { 637 return; 638 } 639 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 640 if (dn == "else") { 641 ifStack_.push(CanDeadElseAppear::No); 642 return; 643 } 644 if (dn == "elif" && 645 IsIfPredicateTrue( 646 line, rest, line.SizeInTokens() - rest, prescanner)) { 647 ifStack_.push(CanDeadElseAppear::Yes); 648 return; 649 } 650 } 651 } 652 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 653 } 654 655 // Precedence level codes used here to accommodate mixed Fortran and C: 656 // 15: parentheses and constants, logical !, bitwise ~ 657 // 14: unary + and - 658 // 13: ** 659 // 12: *, /, % (modulus) 660 // 11: + and - 661 // 10: << and >> 662 // 9: bitwise & 663 // 8: bitwise ^ 664 // 7: bitwise | 665 // 6: relations (.EQ., ==, &c.) 666 // 5: .NOT. 667 // 4: .AND., && 668 // 3: .OR., || 669 // 2: .EQV. and .NEQV. / .XOR. 670 // 1: ? : 671 // 0: , 672 static std::int64_t ExpressionValue(const TokenSequence &token, 673 int minimumPrecedence, std::size_t *atToken, 674 std::optional<Message> *error) { 675 enum Operator { 676 PARENS, 677 CONST, 678 NOTZERO, // ! 679 COMPLEMENT, // ~ 680 UPLUS, 681 UMINUS, 682 POWER, 683 TIMES, 684 DIVIDE, 685 MODULUS, 686 ADD, 687 SUBTRACT, 688 LEFTSHIFT, 689 RIGHTSHIFT, 690 BITAND, 691 BITXOR, 692 BITOR, 693 LT, 694 LE, 695 EQ, 696 NE, 697 GE, 698 GT, 699 NOT, 700 AND, 701 OR, 702 EQV, 703 NEQV, 704 SELECT, 705 COMMA 706 }; 707 static const int precedence[]{ 708 15, 15, 15, 15, // (), 6, !, ~ 709 14, 14, // unary +, - 710 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 711 9, 8, 7, // &, ^, | 712 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 713 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 714 1, 0 // ?: and , 715 }; 716 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 717 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 718 719 static std::map<std::string, enum Operator> opNameMap; 720 if (opNameMap.empty()) { 721 opNameMap["("] = PARENS; 722 opNameMap["!"] = NOTZERO; 723 opNameMap["~"] = COMPLEMENT; 724 opNameMap["**"] = POWER; 725 opNameMap["*"] = TIMES; 726 opNameMap["/"] = DIVIDE; 727 opNameMap["%"] = MODULUS; 728 opNameMap["+"] = ADD; 729 opNameMap["-"] = SUBTRACT; 730 opNameMap["<<"] = LEFTSHIFT; 731 opNameMap[">>"] = RIGHTSHIFT; 732 opNameMap["&"] = BITAND; 733 opNameMap["^"] = BITXOR; 734 opNameMap["|"] = BITOR; 735 opNameMap[".lt."] = opNameMap["<"] = LT; 736 opNameMap[".le."] = opNameMap["<="] = LE; 737 opNameMap[".eq."] = opNameMap["=="] = EQ; 738 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 739 opNameMap[".ge."] = opNameMap[">="] = GE; 740 opNameMap[".gt."] = opNameMap[">"] = GT; 741 opNameMap[".not."] = NOT; 742 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 743 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 744 opNameMap[".eqv."] = EQV; 745 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 746 opNameMap["?"] = SELECT; 747 opNameMap[","] = COMMA; 748 } 749 750 std::size_t tokens{token.SizeInTokens()}; 751 CHECK(tokens > 0); 752 if (*atToken >= tokens) { 753 *error = 754 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 755 return 0; 756 } 757 758 // Parse and evaluate a primary or a unary operator and its operand. 759 std::size_t opAt{*atToken}; 760 std::string t{token.TokenAt(opAt).ToString()}; 761 enum Operator op; 762 std::int64_t left{0}; 763 if (t == "(") { 764 op = PARENS; 765 } else if (IsDecimalDigit(t[0])) { 766 op = CONST; 767 std::size_t consumed{0}; 768 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 769 if (consumed < t.size()) { 770 *error = Message{token.GetTokenProvenanceRange(opAt), 771 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 772 return 0; 773 } 774 } else if (IsLegalIdentifierStart(t[0])) { 775 // undefined macro name -> zero 776 // TODO: BOZ constants? 777 op = CONST; 778 } else if (t == "+") { 779 op = UPLUS; 780 } else if (t == "-") { 781 op = UMINUS; 782 } else if (t == "." && *atToken + 2 < tokens && 783 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 784 token.TokenAt(*atToken + 2).ToString() == ".") { 785 op = NOT; 786 *atToken += 2; 787 } else { 788 auto it{opNameMap.find(t)}; 789 if (it != opNameMap.end()) { 790 op = it->second; 791 } else { 792 *error = Message{token.GetTokenProvenanceRange(opAt), 793 "operand expected in expression"_err_en_US}; 794 return 0; 795 } 796 } 797 if (precedence[op] < minimumPrecedence) { 798 *error = Message{token.GetTokenProvenanceRange(opAt), 799 "operator precedence error"_err_en_US}; 800 return 0; 801 } 802 ++*atToken; 803 if (op != CONST) { 804 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 805 if (*error) { 806 return 0; 807 } 808 switch (op) { 809 case PARENS: 810 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 811 ++*atToken; 812 break; 813 } 814 if (*atToken >= tokens) { 815 *error = Message{token.GetProvenanceRange(), 816 "')' missing from expression"_err_en_US}; 817 } else { 818 *error = Message{ 819 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 820 } 821 return 0; 822 case NOTZERO: left = !left; break; 823 case COMPLEMENT: left = ~left; break; 824 case UPLUS: break; 825 case UMINUS: left = -left; break; 826 case NOT: left = -!left; break; 827 default: CRASH_NO_CASE; 828 } 829 } 830 831 // Parse and evaluate binary operators and their second operands, if present. 832 while (*atToken < tokens) { 833 int advance{1}; 834 t = token.TokenAt(*atToken).ToString(); 835 if (t == "." && *atToken + 2 < tokens && 836 token.TokenAt(*atToken + 2).ToString() == ".") { 837 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 838 advance = 3; 839 } 840 auto it{opNameMap.find(t)}; 841 if (it == opNameMap.end()) { 842 break; 843 } 844 op = it->second; 845 if (op < POWER || precedence[op] < minimumPrecedence) { 846 break; 847 } 848 opAt = *atToken; 849 *atToken += advance; 850 851 std::int64_t right{ 852 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 853 if (*error) { 854 return 0; 855 } 856 857 switch (op) { 858 case POWER: 859 if (left == 0) { 860 if (right < 0) { 861 *error = Message{token.GetTokenProvenanceRange(opAt), 862 "0 ** negative power"_err_en_US}; 863 } 864 } else if (left != 1 && right != 1) { 865 if (right <= 0) { 866 left = !right; 867 } else { 868 std::int64_t power{1}; 869 for (; right > 0; --right) { 870 if ((power * left) / left != power) { 871 *error = Message{token.GetTokenProvenanceRange(opAt), 872 "overflow in exponentation"_err_en_US}; 873 left = 1; 874 } 875 power *= left; 876 } 877 left = power; 878 } 879 } 880 break; 881 case TIMES: 882 if (left != 0 && right != 0 && ((left * right) / left) != right) { 883 *error = Message{token.GetTokenProvenanceRange(opAt), 884 "overflow in multiplication"_err_en_US}; 885 } 886 left = left * right; 887 break; 888 case DIVIDE: 889 if (right == 0) { 890 *error = Message{ 891 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 892 left = 0; 893 } else { 894 left = left / right; 895 } 896 break; 897 case MODULUS: 898 if (right == 0) { 899 *error = Message{ 900 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 901 left = 0; 902 } else { 903 left = left % right; 904 } 905 break; 906 case ADD: 907 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 908 *error = Message{token.GetTokenProvenanceRange(opAt), 909 "overflow in addition"_err_en_US}; 910 } 911 left = left + right; 912 break; 913 case SUBTRACT: 914 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 915 *error = Message{token.GetTokenProvenanceRange(opAt), 916 "overflow in subtraction"_err_en_US}; 917 } 918 left = left - right; 919 break; 920 case LEFTSHIFT: 921 if (right < 0 || right > 64) { 922 *error = Message{token.GetTokenProvenanceRange(opAt), 923 "bad left shift count"_err_en_US}; 924 } 925 left = right >= 64 ? 0 : left << right; 926 break; 927 case RIGHTSHIFT: 928 if (right < 0 || right > 64) { 929 *error = Message{token.GetTokenProvenanceRange(opAt), 930 "bad right shift count"_err_en_US}; 931 } 932 left = right >= 64 ? 0 : left >> right; 933 break; 934 case BITAND: 935 case AND: left = left & right; break; 936 case BITXOR: left = left ^ right; break; 937 case BITOR: 938 case OR: left = left | right; break; 939 case LT: left = -(left < right); break; 940 case LE: left = -(left <= right); break; 941 case EQ: left = -(left == right); break; 942 case NE: left = -(left != right); break; 943 case GE: left = -(left >= right); break; 944 case GT: left = -(left > right); break; 945 case EQV: left = -(!left == !right); break; 946 case NEQV: left = -(!left != !right); break; 947 case SELECT: 948 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 949 *error = Message{token.GetTokenProvenanceRange(opAt), 950 "':' required in selection expression"_err_en_US}; 951 return 0; 952 } else { 953 ++*atToken; 954 std::int64_t third{ 955 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 956 left = left != 0 ? right : third; 957 } 958 break; 959 case COMMA: left = right; break; 960 default: CRASH_NO_CASE; 961 } 962 } 963 return left; 964 } 965 966 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 967 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) { 968 TokenSequence expr1{expr, first, exprTokens}; 969 if (expr1.HasBlanks()) { 970 expr1.RemoveBlanks(); 971 } 972 TokenSequence expr2; 973 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 974 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 975 CharBlock name; 976 if (j + 3 < expr1.SizeInTokens() && 977 expr1.TokenAt(j + 1).ToString() == "(" && 978 expr1.TokenAt(j + 3).ToString() == ")") { 979 name = expr1.TokenAt(j + 2); 980 j += 3; 981 } else if (j + 1 < expr1.SizeInTokens() && 982 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 983 name = expr1.TokenAt(++j); 984 } 985 if (!name.empty()) { 986 char truth{IsNameDefined(name) ? '1' : '0'}; 987 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 988 continue; 989 } 990 } 991 expr2.Put(expr1, j); 992 } 993 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)}; 994 if (expr3.HasBlanks()) { 995 expr3.RemoveBlanks(); 996 } 997 if (expr3.empty()) { 998 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 999 return false; 1000 } 1001 std::size_t atToken{0}; 1002 std::optional<Message> error; 1003 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1004 if (error) { 1005 prescanner->Say(std::move(*error)); 1006 } else if (atToken < expr3.SizeInTokens() && 1007 expr3.TokenAt(atToken).ToString() != "!") { 1008 prescanner->Say(expr3.GetIntervalProvenanceRange( 1009 atToken, expr3.SizeInTokens() - atToken), 1010 atToken == 0 ? "could not parse any expression"_err_en_US 1011 : "excess characters after expression"_err_en_US); 1012 } 1013 return result; 1014 } 1015 } 1016