1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <cinttypes> 17 #include <cstddef> 18 #include <ctime> 19 #include <map> 20 #include <memory> 21 #include <optional> 22 #include <set> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, replacement_{predefined, 40 sources.AddCompilerInsertion(predefined).start()} { 41 } 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) { 77 for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) { 78 if (!tokens.TokenAt(j - 1).IsBlank()) { 79 return j; 80 } 81 } 82 return 0; 83 } 84 85 static TokenSequence Stringify( 86 const TokenSequence &tokens, AllSources &allSources) { 87 TokenSequence result; 88 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 89 result.PutNextTokenChar('"', quoteProvenance); 90 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 91 const CharBlock &token{tokens.TokenAt(j)}; 92 std::size_t bytes{token.size()}; 93 for (std::size_t k{0}; k < bytes; ++k) { 94 char ch{token[k]}; 95 Provenance from{tokens.GetTokenProvenance(j, k)}; 96 if (ch == '"' || ch == '\\') { 97 result.PutNextTokenChar(ch, from); 98 } 99 result.PutNextTokenChar(ch, from); 100 } 101 } 102 result.PutNextTokenChar('"', quoteProvenance); 103 result.CloseToken(); 104 return result; 105 } 106 107 TokenSequence Definition::Apply( 108 const std::vector<TokenSequence> &args, AllSources &allSources) { 109 TokenSequence result; 110 bool pasting{false}; 111 bool skipping{false}; 112 int parenthesesNesting{0}; 113 std::size_t tokens{replacement_.SizeInTokens()}; 114 for (std::size_t j{0}; j < tokens; ++j) { 115 const CharBlock &token{replacement_.TokenAt(j)}; 116 std::size_t bytes{token.size()}; 117 if (skipping) { 118 if (bytes == 1) { 119 if (token[0] == '(') { 120 ++parenthesesNesting; 121 } else if (token[0] == ')') { 122 skipping = --parenthesesNesting > 0; 123 } 124 } 125 continue; 126 } 127 if (bytes == 2 && token[0] == '~') { 128 std::size_t index = token[1] - 'A'; 129 if (index >= args.size()) { 130 continue; 131 } 132 std::size_t afterLastNonBlank{AfterLastNonBlank(result)}; 133 if (afterLastNonBlank > 0 && 134 result.TokenAt(afterLastNonBlank - 1).ToString() == "#") { 135 // stringifying 136 while (result.SizeInTokens() >= afterLastNonBlank) { 137 result.pop_back(); 138 } 139 result.Put(Stringify(args[index], allSources)); 140 } else { 141 std::size_t argTokens{args[index].SizeInTokens()}; 142 for (std::size_t k{0}; k < argTokens; ++k) { 143 if (!pasting || !args[index].TokenAt(k).IsBlank()) { 144 result.Put(args[index], k); 145 pasting = false; 146 } 147 } 148 } 149 } else if (bytes == 2 && token[0] == '#' && token[1] == '#') { 150 // Token pasting operator in body (not expanded argument); discard any 151 // immediately preceding white space, then reopen the last token. 152 while (!result.empty() && 153 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 154 result.pop_back(); 155 } 156 if (!result.empty()) { 157 result.ReopenLastToken(); 158 pasting = true; 159 } 160 } else if (pasting && token.IsBlank()) { 161 // Delete whitespace immediately following ## in the body. 162 } else if (bytes == 11 && isVariadic_ && 163 token.ToString() == "__VA_ARGS__") { 164 Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')}; 165 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 166 if (k > argumentCount_) { 167 result.Put(","s, commaProvenance); 168 } 169 result.Put(args[k]); 170 } 171 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 172 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 173 parenthesesNesting == 0) { 174 parenthesesNesting = 1; 175 skipping = args.size() == argumentCount_; 176 ++j; 177 } else { 178 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 179 ++parenthesesNesting; 180 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 181 if (--parenthesesNesting == 0) { 182 skipping = false; 183 continue; 184 } 185 } 186 result.Put(replacement_, j); 187 } 188 } 189 return result; 190 } 191 192 static std::string FormatTime(const std::time_t &now, const char *format) { 193 char buffer[16]; 194 return {buffer, 195 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 196 } 197 198 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} { 199 // Capture current local date & time once now to avoid having the values 200 // of __DATE__ or __TIME__ change during compilation. 201 std::time_t now; 202 std::time(&now); 203 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904" 204 Definition{FormatTime(now, "\"%h %e %Y\""), allSources}); 205 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60" 206 Definition{FormatTime(now, "\"%T\""), allSources}); 207 // The values of these predefined macros depend on their invocation sites. 208 definitions_.emplace( 209 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources}); 210 definitions_.emplace( 211 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources}); 212 } 213 214 void Preprocessor::Define(std::string macro, std::string value) { 215 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 216 } 217 218 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 219 220 std::optional<TokenSequence> Preprocessor::MacroReplacement( 221 const TokenSequence &input, const Prescanner &prescanner) { 222 // Do quick scan for any use of a defined name. 223 std::size_t tokens{input.SizeInTokens()}; 224 std::size_t j; 225 for (j = 0; j < tokens; ++j) { 226 CharBlock token{input.TokenAt(j)}; 227 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 228 IsNameDefined(token)) { 229 break; 230 } 231 } 232 if (j == tokens) { 233 return std::nullopt; // input contains nothing that would be replaced 234 } 235 TokenSequence result{input, 0, j}; 236 for (; j < tokens; ++j) { 237 const CharBlock &token{input.TokenAt(j)}; 238 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 239 result.Put(input, j); 240 continue; 241 } 242 auto it{definitions_.find(token)}; 243 if (it == definitions_.end()) { 244 result.Put(input, j); 245 continue; 246 } 247 Definition &def{it->second}; 248 if (def.isDisabled()) { 249 result.Put(input, j); 250 continue; 251 } 252 if (!def.isFunctionLike()) { 253 if (def.isPredefined()) { 254 std::string name{def.replacement().TokenAt(0).ToString()}; 255 std::string repl; 256 if (name == "__FILE__") { 257 repl = "\""s + 258 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 259 } else if (name == "__LINE__") { 260 std::string buf; 261 llvm::raw_string_ostream ss{buf}; 262 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 263 repl = ss.str(); 264 } 265 if (!repl.empty()) { 266 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 267 ProvenanceRange call{allSources_.AddMacroCall( 268 insert, input.GetTokenProvenanceRange(j), repl)}; 269 result.Put(repl, call.start()); 270 continue; 271 } 272 } 273 def.set_isDisabled(true); 274 TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)}; 275 def.set_isDisabled(false); 276 if (!replaced.empty()) { 277 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 278 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 279 ProvenanceRange newRange{ 280 allSources_.AddMacroCall(from, use, replaced.ToString())}; 281 result.Put(replaced, newRange); 282 } 283 continue; 284 } 285 // Possible function-like macro call. Skip spaces and newlines to see 286 // whether '(' is next. 287 std::size_t k{j}; 288 bool leftParen{false}; 289 while (++k < tokens) { 290 const CharBlock &lookAhead{input.TokenAt(k)}; 291 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 292 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 293 break; 294 } 295 } 296 if (!leftParen) { 297 result.Put(input, j); 298 continue; 299 } 300 std::vector<std::size_t> argStart{++k}; 301 for (int nesting{0}; k < tokens; ++k) { 302 CharBlock token{input.TokenAt(k)}; 303 if (token.size() == 1) { 304 char ch{token[0]}; 305 if (ch == '(') { 306 ++nesting; 307 } else if (ch == ')') { 308 if (nesting == 0) { 309 break; 310 } 311 --nesting; 312 } else if (ch == ',' && nesting == 0) { 313 argStart.push_back(k + 1); 314 } 315 } 316 } 317 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 318 // Subtle: () is zero arguments, not one empty argument, 319 // unless one argument was expected. 320 argStart.clear(); 321 } 322 if (k >= tokens || argStart.size() < def.argumentCount() || 323 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 324 result.Put(input, j); 325 continue; 326 } 327 std::vector<TokenSequence> args; 328 for (std::size_t n{0}; n < argStart.size(); ++n) { 329 std::size_t at{argStart[n]}; 330 std::size_t count{ 331 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 332 args.emplace_back(TokenSequence(input, at, count)); 333 } 334 def.set_isDisabled(true); 335 TokenSequence replaced{ 336 ReplaceMacros(def.Apply(args, allSources_), prescanner)}; 337 def.set_isDisabled(false); 338 if (!replaced.empty()) { 339 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 340 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 341 ProvenanceRange newRange{ 342 allSources_.AddMacroCall(from, use, replaced.ToString())}; 343 result.Put(replaced, newRange); 344 } 345 j = k; // advance to the terminal ')' 346 } 347 return result; 348 } 349 350 TokenSequence Preprocessor::ReplaceMacros( 351 const TokenSequence &tokens, const Prescanner &prescanner) { 352 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 353 return std::move(*repl); 354 } 355 return tokens; 356 } 357 358 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { 359 std::size_t tokens{dir.SizeInTokens()}; 360 std::size_t j{dir.SkipBlanks(0)}; 361 if (j == tokens) { 362 return; 363 } 364 if (dir.TokenAt(j).ToString() != "#") { 365 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 366 return; 367 } 368 j = dir.SkipBlanks(j + 1); 369 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 370 --tokens; 371 } 372 if (j == tokens) { 373 return; 374 } 375 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 376 return; // treat like #line, ignore it 377 } 378 std::size_t dirOffset{j}; 379 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 380 j = dir.SkipBlanks(j + 1); 381 CharBlock nameToken; 382 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 383 nameToken = dir.TokenAt(j); 384 } 385 if (dirName == "line") { 386 // #line is ignored 387 } else if (dirName == "define") { 388 if (nameToken.empty()) { 389 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 390 "#define: missing or invalid name"_err_en_US); 391 return; 392 } 393 nameToken = SaveTokenAsName(nameToken); 394 definitions_.erase(nameToken); 395 if (++j < tokens && dir.TokenAt(j).size() == 1 && 396 dir.TokenAt(j)[0] == '(') { 397 j = dir.SkipBlanks(j + 1); 398 std::vector<std::string> argName; 399 bool isVariadic{false}; 400 if (dir.TokenAt(j).ToString() != ")") { 401 while (true) { 402 std::string an{dir.TokenAt(j).ToString()}; 403 if (an == "...") { 404 isVariadic = true; 405 } else { 406 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 407 prescanner->Say(dir.GetTokenProvenanceRange(j), 408 "#define: missing or invalid argument name"_err_en_US); 409 return; 410 } 411 argName.push_back(an); 412 } 413 j = dir.SkipBlanks(j + 1); 414 if (j == tokens) { 415 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 416 "#define: malformed argument list"_err_en_US); 417 return; 418 } 419 std::string punc{dir.TokenAt(j).ToString()}; 420 if (punc == ")") { 421 break; 422 } 423 if (isVariadic || punc != ",") { 424 prescanner->Say(dir.GetTokenProvenanceRange(j), 425 "#define: malformed argument list"_err_en_US); 426 return; 427 } 428 j = dir.SkipBlanks(j + 1); 429 if (j == tokens) { 430 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 431 "#define: malformed argument list"_err_en_US); 432 return; 433 } 434 } 435 if (std::set<std::string>(argName.begin(), argName.end()).size() != 436 argName.size()) { 437 prescanner->Say(dir.GetTokenProvenance(dirOffset), 438 "#define: argument names are not distinct"_err_en_US); 439 return; 440 } 441 } 442 j = dir.SkipBlanks(j + 1); 443 definitions_.emplace(std::make_pair( 444 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 445 } else { 446 j = dir.SkipBlanks(j + 1); 447 definitions_.emplace( 448 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 449 } 450 } else if (dirName == "undef") { 451 if (nameToken.empty()) { 452 prescanner->Say( 453 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 454 "# missing or invalid name"_err_en_US); 455 } else { 456 j = dir.SkipBlanks(j + 1); 457 if (j != tokens) { 458 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 459 "#undef: excess tokens at end of directive"_err_en_US); 460 } else { 461 definitions_.erase(nameToken); 462 } 463 } 464 } else if (dirName == "ifdef" || dirName == "ifndef") { 465 bool doThen{false}; 466 if (nameToken.empty()) { 467 prescanner->Say( 468 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 469 "#%s: missing name"_err_en_US, dirName); 470 } else { 471 j = dir.SkipBlanks(j + 1); 472 if (j != tokens) { 473 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 474 "#%s: excess tokens at end of directive"_en_US, dirName); 475 } 476 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 477 } 478 if (doThen) { 479 ifStack_.push(CanDeadElseAppear::Yes); 480 } else { 481 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 482 dir.GetTokenProvenance(dirOffset)); 483 } 484 } else if (dirName == "if") { 485 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 486 ifStack_.push(CanDeadElseAppear::Yes); 487 } else { 488 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 489 dir.GetTokenProvenanceRange(dirOffset)); 490 } 491 } else if (dirName == "else") { 492 if (j != tokens) { 493 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 494 "#else: excess tokens at end of directive"_err_en_US); 495 } else if (ifStack_.empty()) { 496 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 497 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 498 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 499 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 500 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 501 } else { 502 ifStack_.pop(); 503 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 504 dir.GetTokenProvenanceRange(dirOffset)); 505 } 506 } else if (dirName == "elif") { 507 if (ifStack_.empty()) { 508 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 509 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 510 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 511 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 512 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 513 } else { 514 ifStack_.pop(); 515 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 516 dir.GetTokenProvenanceRange(dirOffset)); 517 } 518 } else if (dirName == "endif") { 519 if (j != tokens) { 520 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 521 "#endif: excess tokens at end of directive"_err_en_US); 522 } else if (ifStack_.empty()) { 523 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 524 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 525 } else { 526 ifStack_.pop(); 527 } 528 } else if (dirName == "error") { 529 prescanner->Say( 530 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 531 "%s"_err_en_US, dir.ToString()); 532 } else if (dirName == "warning" || dirName == "comment" || 533 dirName == "note") { 534 prescanner->Say( 535 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 536 "%s"_en_US, dir.ToString()); 537 } else if (dirName == "include") { 538 if (j == tokens) { 539 prescanner->Say( 540 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 541 "#include: missing name of file to include"_err_en_US); 542 return; 543 } 544 std::string include; 545 if (dir.TokenAt(j).ToString() == "<") { 546 std::size_t k{j + 1}; 547 if (k >= tokens) { 548 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 549 "#include: file name missing"_err_en_US); 550 return; 551 } 552 while (k < tokens && dir.TokenAt(k) != ">") { 553 ++k; 554 } 555 if (k >= tokens) { 556 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 557 "#include: expected '>' at end of included file"_en_US); 558 } else if (k + 1 < tokens) { 559 prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1), 560 "#include: extra stuff ignored after '>'"_en_US); 561 } 562 TokenSequence braced{dir, j + 1, k - j - 1}; 563 include = ReplaceMacros(braced, *prescanner).ToString(); 564 } else if (j + 1 == tokens && 565 (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 566 include.substr(include.size() - 1, 1) == "\"") { 567 include = include.substr(1, include.size() - 2); 568 } else { 569 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 570 "#include: expected name of file to include"_err_en_US); 571 return; 572 } 573 if (include.empty()) { 574 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 575 "#include: empty include file name"_err_en_US); 576 return; 577 } 578 std::string buf; 579 llvm::raw_string_ostream error{buf}; 580 const SourceFile *included{allSources_.Open(include, error)}; 581 if (!included) { 582 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 583 "#include: %s"_err_en_US, error.str()); 584 } else if (included->bytes() > 0) { 585 ProvenanceRange fileRange{ 586 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 587 Prescanner{*prescanner} 588 .set_encoding(included->encoding()) 589 .Prescan(fileRange); 590 } 591 } else { 592 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 593 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 594 } 595 } 596 597 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 598 names_.push_back(t.ToString()); 599 return {names_.back().data(), names_.back().size()}; 600 } 601 602 bool Preprocessor::IsNameDefined(const CharBlock &token) { 603 return definitions_.find(token) != definitions_.end(); 604 } 605 606 static std::string GetDirectiveName( 607 const TokenSequence &line, std::size_t *rest) { 608 std::size_t tokens{line.SizeInTokens()}; 609 std::size_t j{line.SkipBlanks(0)}; 610 if (j == tokens || line.TokenAt(j).ToString() != "#") { 611 *rest = tokens; 612 return ""; 613 } 614 j = line.SkipBlanks(j + 1); 615 if (j == tokens) { 616 *rest = tokens; 617 return ""; 618 } 619 *rest = line.SkipBlanks(j + 1); 620 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 621 } 622 623 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 624 IsElseActive isElseActive, Prescanner *prescanner, 625 ProvenanceRange provenanceRange) { 626 int nesting{0}; 627 while (!prescanner->IsAtEnd()) { 628 if (!prescanner->IsNextLinePreprocessorDirective()) { 629 prescanner->NextLine(); 630 continue; 631 } 632 TokenSequence line{prescanner->TokenizePreprocessorDirective()}; 633 std::size_t rest{0}; 634 std::string dn{GetDirectiveName(line, &rest)}; 635 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 636 ++nesting; 637 } else if (dn == "endif") { 638 if (nesting-- == 0) { 639 return; 640 } 641 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 642 if (dn == "else") { 643 ifStack_.push(CanDeadElseAppear::No); 644 return; 645 } 646 if (dn == "elif" && 647 IsIfPredicateTrue( 648 line, rest, line.SizeInTokens() - rest, prescanner)) { 649 ifStack_.push(CanDeadElseAppear::Yes); 650 return; 651 } 652 } 653 } 654 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 655 } 656 657 // Precedence level codes used here to accommodate mixed Fortran and C: 658 // 15: parentheses and constants, logical !, bitwise ~ 659 // 14: unary + and - 660 // 13: ** 661 // 12: *, /, % (modulus) 662 // 11: + and - 663 // 10: << and >> 664 // 9: bitwise & 665 // 8: bitwise ^ 666 // 7: bitwise | 667 // 6: relations (.EQ., ==, &c.) 668 // 5: .NOT. 669 // 4: .AND., && 670 // 3: .OR., || 671 // 2: .EQV. and .NEQV. / .XOR. 672 // 1: ? : 673 // 0: , 674 static std::int64_t ExpressionValue(const TokenSequence &token, 675 int minimumPrecedence, std::size_t *atToken, 676 std::optional<Message> *error) { 677 enum Operator { 678 PARENS, 679 CONST, 680 NOTZERO, // ! 681 COMPLEMENT, // ~ 682 UPLUS, 683 UMINUS, 684 POWER, 685 TIMES, 686 DIVIDE, 687 MODULUS, 688 ADD, 689 SUBTRACT, 690 LEFTSHIFT, 691 RIGHTSHIFT, 692 BITAND, 693 BITXOR, 694 BITOR, 695 LT, 696 LE, 697 EQ, 698 NE, 699 GE, 700 GT, 701 NOT, 702 AND, 703 OR, 704 EQV, 705 NEQV, 706 SELECT, 707 COMMA 708 }; 709 static const int precedence[]{ 710 15, 15, 15, 15, // (), 6, !, ~ 711 14, 14, // unary +, - 712 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 713 9, 8, 7, // &, ^, | 714 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 715 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 716 1, 0 // ?: and , 717 }; 718 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 719 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 720 721 static std::map<std::string, enum Operator> opNameMap; 722 if (opNameMap.empty()) { 723 opNameMap["("] = PARENS; 724 opNameMap["!"] = NOTZERO; 725 opNameMap["~"] = COMPLEMENT; 726 opNameMap["**"] = POWER; 727 opNameMap["*"] = TIMES; 728 opNameMap["/"] = DIVIDE; 729 opNameMap["%"] = MODULUS; 730 opNameMap["+"] = ADD; 731 opNameMap["-"] = SUBTRACT; 732 opNameMap["<<"] = LEFTSHIFT; 733 opNameMap[">>"] = RIGHTSHIFT; 734 opNameMap["&"] = BITAND; 735 opNameMap["^"] = BITXOR; 736 opNameMap["|"] = BITOR; 737 opNameMap[".lt."] = opNameMap["<"] = LT; 738 opNameMap[".le."] = opNameMap["<="] = LE; 739 opNameMap[".eq."] = opNameMap["=="] = EQ; 740 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 741 opNameMap[".ge."] = opNameMap[">="] = GE; 742 opNameMap[".gt."] = opNameMap[">"] = GT; 743 opNameMap[".not."] = NOT; 744 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 745 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 746 opNameMap[".eqv."] = EQV; 747 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 748 opNameMap["?"] = SELECT; 749 opNameMap[","] = COMMA; 750 } 751 752 std::size_t tokens{token.SizeInTokens()}; 753 CHECK(tokens > 0); 754 if (*atToken >= tokens) { 755 *error = 756 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 757 return 0; 758 } 759 760 // Parse and evaluate a primary or a unary operator and its operand. 761 std::size_t opAt{*atToken}; 762 std::string t{token.TokenAt(opAt).ToString()}; 763 enum Operator op; 764 std::int64_t left{0}; 765 if (t == "(") { 766 op = PARENS; 767 } else if (IsDecimalDigit(t[0])) { 768 op = CONST; 769 std::size_t consumed{0}; 770 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 771 if (consumed < t.size()) { 772 *error = Message{token.GetTokenProvenanceRange(opAt), 773 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 774 return 0; 775 } 776 } else if (IsLegalIdentifierStart(t[0])) { 777 // undefined macro name -> zero 778 // TODO: BOZ constants? 779 op = CONST; 780 } else if (t == "+") { 781 op = UPLUS; 782 } else if (t == "-") { 783 op = UMINUS; 784 } else if (t == "." && *atToken + 2 < tokens && 785 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 786 token.TokenAt(*atToken + 2).ToString() == ".") { 787 op = NOT; 788 *atToken += 2; 789 } else { 790 auto it{opNameMap.find(t)}; 791 if (it != opNameMap.end()) { 792 op = it->second; 793 } else { 794 *error = Message{token.GetTokenProvenanceRange(opAt), 795 "operand expected in expression"_err_en_US}; 796 return 0; 797 } 798 } 799 if (precedence[op] < minimumPrecedence) { 800 *error = Message{token.GetTokenProvenanceRange(opAt), 801 "operator precedence error"_err_en_US}; 802 return 0; 803 } 804 ++*atToken; 805 if (op != CONST) { 806 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 807 if (*error) { 808 return 0; 809 } 810 switch (op) { 811 case PARENS: 812 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 813 ++*atToken; 814 break; 815 } 816 if (*atToken >= tokens) { 817 *error = Message{token.GetProvenanceRange(), 818 "')' missing from expression"_err_en_US}; 819 } else { 820 *error = Message{ 821 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 822 } 823 return 0; 824 case NOTZERO: left = !left; break; 825 case COMPLEMENT: left = ~left; break; 826 case UPLUS: break; 827 case UMINUS: left = -left; break; 828 case NOT: left = -!left; break; 829 default: CRASH_NO_CASE; 830 } 831 } 832 833 // Parse and evaluate binary operators and their second operands, if present. 834 while (*atToken < tokens) { 835 int advance{1}; 836 t = token.TokenAt(*atToken).ToString(); 837 if (t == "." && *atToken + 2 < tokens && 838 token.TokenAt(*atToken + 2).ToString() == ".") { 839 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 840 advance = 3; 841 } 842 auto it{opNameMap.find(t)}; 843 if (it == opNameMap.end()) { 844 break; 845 } 846 op = it->second; 847 if (op < POWER || precedence[op] < minimumPrecedence) { 848 break; 849 } 850 opAt = *atToken; 851 *atToken += advance; 852 853 std::int64_t right{ 854 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 855 if (*error) { 856 return 0; 857 } 858 859 switch (op) { 860 case POWER: 861 if (left == 0) { 862 if (right < 0) { 863 *error = Message{token.GetTokenProvenanceRange(opAt), 864 "0 ** negative power"_err_en_US}; 865 } 866 } else if (left != 1 && right != 1) { 867 if (right <= 0) { 868 left = !right; 869 } else { 870 std::int64_t power{1}; 871 for (; right > 0; --right) { 872 if ((power * left) / left != power) { 873 *error = Message{token.GetTokenProvenanceRange(opAt), 874 "overflow in exponentation"_err_en_US}; 875 left = 1; 876 } 877 power *= left; 878 } 879 left = power; 880 } 881 } 882 break; 883 case TIMES: 884 if (left != 0 && right != 0 && ((left * right) / left) != right) { 885 *error = Message{token.GetTokenProvenanceRange(opAt), 886 "overflow in multiplication"_err_en_US}; 887 } 888 left = left * right; 889 break; 890 case DIVIDE: 891 if (right == 0) { 892 *error = Message{ 893 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 894 left = 0; 895 } else { 896 left = left / right; 897 } 898 break; 899 case MODULUS: 900 if (right == 0) { 901 *error = Message{ 902 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 903 left = 0; 904 } else { 905 left = left % right; 906 } 907 break; 908 case ADD: 909 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 910 *error = Message{token.GetTokenProvenanceRange(opAt), 911 "overflow in addition"_err_en_US}; 912 } 913 left = left + right; 914 break; 915 case SUBTRACT: 916 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 917 *error = Message{token.GetTokenProvenanceRange(opAt), 918 "overflow in subtraction"_err_en_US}; 919 } 920 left = left - right; 921 break; 922 case LEFTSHIFT: 923 if (right < 0 || right > 64) { 924 *error = Message{token.GetTokenProvenanceRange(opAt), 925 "bad left shift count"_err_en_US}; 926 } 927 left = right >= 64 ? 0 : left << right; 928 break; 929 case RIGHTSHIFT: 930 if (right < 0 || right > 64) { 931 *error = Message{token.GetTokenProvenanceRange(opAt), 932 "bad right shift count"_err_en_US}; 933 } 934 left = right >= 64 ? 0 : left >> right; 935 break; 936 case BITAND: 937 case AND: left = left & right; break; 938 case BITXOR: left = left ^ right; break; 939 case BITOR: 940 case OR: left = left | right; break; 941 case LT: left = -(left < right); break; 942 case LE: left = -(left <= right); break; 943 case EQ: left = -(left == right); break; 944 case NE: left = -(left != right); break; 945 case GE: left = -(left >= right); break; 946 case GT: left = -(left > right); break; 947 case EQV: left = -(!left == !right); break; 948 case NEQV: left = -(!left != !right); break; 949 case SELECT: 950 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 951 *error = Message{token.GetTokenProvenanceRange(opAt), 952 "':' required in selection expression"_err_en_US}; 953 return 0; 954 } else { 955 ++*atToken; 956 std::int64_t third{ 957 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 958 left = left != 0 ? right : third; 959 } 960 break; 961 case COMMA: left = right; break; 962 default: CRASH_NO_CASE; 963 } 964 } 965 return left; 966 } 967 968 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 969 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) { 970 TokenSequence expr1{expr, first, exprTokens}; 971 if (expr1.HasBlanks()) { 972 expr1.RemoveBlanks(); 973 } 974 TokenSequence expr2; 975 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 976 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 977 CharBlock name; 978 if (j + 3 < expr1.SizeInTokens() && 979 expr1.TokenAt(j + 1).ToString() == "(" && 980 expr1.TokenAt(j + 3).ToString() == ")") { 981 name = expr1.TokenAt(j + 2); 982 j += 3; 983 } else if (j + 1 < expr1.SizeInTokens() && 984 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 985 name = expr1.TokenAt(++j); 986 } 987 if (!name.empty()) { 988 char truth{IsNameDefined(name) ? '1' : '0'}; 989 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 990 continue; 991 } 992 } 993 expr2.Put(expr1, j); 994 } 995 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)}; 996 if (expr3.HasBlanks()) { 997 expr3.RemoveBlanks(); 998 } 999 if (expr3.empty()) { 1000 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 1001 return false; 1002 } 1003 std::size_t atToken{0}; 1004 std::optional<Message> error; 1005 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1006 if (error) { 1007 prescanner->Say(std::move(*error)); 1008 } else if (atToken < expr3.SizeInTokens() && 1009 expr3.TokenAt(atToken).ToString() != "!") { 1010 prescanner->Say(expr3.GetIntervalProvenanceRange( 1011 atToken, expr3.SizeInTokens() - atToken), 1012 atToken == 0 ? "could not parse any expression"_err_en_US 1013 : "excess characters after expression"_err_en_US); 1014 } 1015 return result; 1016 } 1017 } 1018