1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <cinttypes> 17 #include <cstddef> 18 #include <ctime> 19 #include <map> 20 #include <memory> 21 #include <optional> 22 #include <set> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, 40 replacement_{ 41 predefined, sources.AddCompilerInsertion(predefined).start()} {} 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static TokenSequence Stringify( 77 const TokenSequence &tokens, AllSources &allSources) { 78 TokenSequence result; 79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 80 result.PutNextTokenChar('"', quoteProvenance); 81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 82 const CharBlock &token{tokens.TokenAt(j)}; 83 std::size_t bytes{token.size()}; 84 for (std::size_t k{0}; k < bytes; ++k) { 85 char ch{token[k]}; 86 Provenance from{tokens.GetTokenProvenance(j, k)}; 87 if (ch == '"' || ch == '\\') { 88 result.PutNextTokenChar(ch, from); 89 } 90 result.PutNextTokenChar(ch, from); 91 } 92 } 93 result.PutNextTokenChar('"', quoteProvenance); 94 result.CloseToken(); 95 return result; 96 } 97 98 constexpr bool IsTokenPasting(CharBlock opr) { 99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#'; 100 } 101 102 static bool AnyTokenPasting(const TokenSequence &text) { 103 std::size_t tokens{text.SizeInTokens()}; 104 for (std::size_t j{0}; j < tokens; ++j) { 105 if (IsTokenPasting(text.TokenAt(j))) { 106 return true; 107 } 108 } 109 return false; 110 } 111 112 static TokenSequence TokenPasting(TokenSequence &&text) { 113 if (!AnyTokenPasting(text)) { 114 return std::move(text); 115 } 116 TokenSequence result; 117 std::size_t tokens{text.SizeInTokens()}; 118 bool pasting{false}; 119 for (std::size_t j{0}; j < tokens; ++j) { 120 if (IsTokenPasting(text.TokenAt(j))) { 121 if (!pasting) { 122 while (!result.empty() && 123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 124 result.pop_back(); 125 } 126 if (!result.empty()) { 127 result.ReopenLastToken(); 128 pasting = true; 129 } 130 } 131 } else if (pasting && text.TokenAt(j).IsBlank()) { 132 } else { 133 result.Put(text, j, 1); 134 pasting = false; 135 } 136 } 137 return result; 138 } 139 140 TokenSequence Definition::Apply( 141 const std::vector<TokenSequence> &args, Prescanner &prescanner) { 142 TokenSequence result; 143 bool skipping{false}; 144 int parenthesesNesting{0}; 145 std::size_t tokens{replacement_.SizeInTokens()}; 146 for (std::size_t j{0}; j < tokens; ++j) { 147 CharBlock token{replacement_.TokenAt(j)}; 148 std::size_t bytes{token.size()}; 149 if (skipping) { 150 if (bytes == 1) { 151 if (token[0] == '(') { 152 ++parenthesesNesting; 153 } else if (token[0] == ')') { 154 skipping = --parenthesesNesting > 0; 155 } 156 } 157 continue; 158 } 159 if (bytes == 2 && token[0] == '~') { // argument substitution 160 std::size_t index = token[1] - 'A'; 161 if (index >= args.size()) { 162 continue; 163 } 164 std::size_t prev{j}; 165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) { 166 --prev; 167 } 168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 && 169 replacement_.TokenAt(prev - 1)[0] == 170 '#') { // stringify argument without macro replacement 171 std::size_t resultSize{result.SizeInTokens()}; 172 while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) { 173 result.pop_back(); 174 } 175 CHECK(resultSize > 0 && 176 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1)); 177 result.pop_back(); 178 result.Put(Stringify(args[index], prescanner.allSources())); 179 } else { 180 const TokenSequence *arg{&args[index]}; 181 std::optional<TokenSequence> replaced; 182 // Don't replace macros in the actual argument if it is preceded or 183 // followed by the token-pasting operator ## in the replacement text. 184 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) { 185 auto next{replacement_.SkipBlanks(j + 1)}; 186 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) { 187 // Apply macro replacement to the actual argument 188 replaced = 189 prescanner.preprocessor().MacroReplacement(*arg, prescanner); 190 if (replaced) { 191 arg = &*replaced; 192 } 193 } 194 } 195 result.Put(DEREF(arg)); 196 } 197 } else if (bytes == 11 && isVariadic_ && 198 token.ToString() == "__VA_ARGS__") { 199 Provenance commaProvenance{ 200 prescanner.preprocessor().allSources().CompilerInsertionProvenance( 201 ',')}; 202 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 203 if (k > argumentCount_) { 204 result.Put(","s, commaProvenance); 205 } 206 result.Put(args[k]); 207 } 208 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 209 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 210 parenthesesNesting == 0) { 211 parenthesesNesting = 1; 212 skipping = args.size() == argumentCount_; 213 ++j; 214 } else { 215 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 216 ++parenthesesNesting; 217 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 218 if (--parenthesesNesting == 0) { 219 skipping = false; 220 continue; 221 } 222 } 223 result.Put(replacement_, j); 224 } 225 } 226 return TokenPasting(std::move(result)); 227 } 228 229 static std::string FormatTime(const std::time_t &now, const char *format) { 230 char buffer[16]; 231 return {buffer, 232 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 233 } 234 235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {} 236 237 void Preprocessor::DefineStandardMacros() { 238 // Capture current local date & time once now to avoid having the values 239 // of __DATE__ or __TIME__ change during compilation. 240 std::time_t now; 241 std::time(&now); 242 Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904" 243 Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60" 244 // The values of these predefined macros depend on their invocation sites. 245 Define("__FILE__"s, "__FILE__"s); 246 Define("__LINE__"s, "__LINE__"s); 247 } 248 249 void Preprocessor::Define(std::string macro, std::string value) { 250 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 251 } 252 253 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 254 255 std::optional<TokenSequence> Preprocessor::MacroReplacement( 256 const TokenSequence &input, Prescanner &prescanner) { 257 // Do quick scan for any use of a defined name. 258 if (definitions_.empty()) { 259 return std::nullopt; 260 } 261 std::size_t tokens{input.SizeInTokens()}; 262 std::size_t j; 263 for (j = 0; j < tokens; ++j) { 264 CharBlock token{input.TokenAt(j)}; 265 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 266 IsNameDefined(token)) { 267 break; 268 } 269 } 270 if (j == tokens) { 271 return std::nullopt; // input contains nothing that would be replaced 272 } 273 TokenSequence result{input, 0, j}; 274 for (; j < tokens; ++j) { 275 const CharBlock &token{input.TokenAt(j)}; 276 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 277 result.Put(input, j); 278 continue; 279 } 280 auto it{definitions_.find(token)}; 281 if (it == definitions_.end()) { 282 result.Put(input, j); 283 continue; 284 } 285 Definition &def{it->second}; 286 if (def.isDisabled()) { 287 result.Put(input, j); 288 continue; 289 } 290 if (!def.isFunctionLike()) { 291 if (def.isPredefined()) { 292 std::string name{def.replacement().TokenAt(0).ToString()}; 293 std::string repl; 294 if (name == "__FILE__") { 295 repl = "\""s + 296 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 297 } else if (name == "__LINE__") { 298 std::string buf; 299 llvm::raw_string_ostream ss{buf}; 300 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 301 repl = ss.str(); 302 } 303 if (!repl.empty()) { 304 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 305 ProvenanceRange call{allSources_.AddMacroCall( 306 insert, input.GetTokenProvenanceRange(j), repl)}; 307 result.Put(repl, call.start()); 308 continue; 309 } 310 } 311 def.set_isDisabled(true); 312 TokenSequence replaced{ 313 TokenPasting(ReplaceMacros(def.replacement(), prescanner))}; 314 def.set_isDisabled(false); 315 if (!replaced.empty()) { 316 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 317 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 318 ProvenanceRange newRange{ 319 allSources_.AddMacroCall(from, use, replaced.ToString())}; 320 result.Put(replaced, newRange); 321 } 322 continue; 323 } 324 // Possible function-like macro call. Skip spaces and newlines to see 325 // whether '(' is next. 326 std::size_t k{j}; 327 bool leftParen{false}; 328 while (++k < tokens) { 329 const CharBlock &lookAhead{input.TokenAt(k)}; 330 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 331 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 332 break; 333 } 334 } 335 if (!leftParen) { 336 result.Put(input, j); 337 continue; 338 } 339 std::vector<std::size_t> argStart{++k}; 340 for (int nesting{0}; k < tokens; ++k) { 341 CharBlock token{input.TokenAt(k)}; 342 if (token.size() == 1) { 343 char ch{token[0]}; 344 if (ch == '(') { 345 ++nesting; 346 } else if (ch == ')') { 347 if (nesting == 0) { 348 break; 349 } 350 --nesting; 351 } else if (ch == ',' && nesting == 0) { 352 argStart.push_back(k + 1); 353 } 354 } 355 } 356 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 357 // Subtle: () is zero arguments, not one empty argument, 358 // unless one argument was expected. 359 argStart.clear(); 360 } 361 if (k >= tokens || argStart.size() < def.argumentCount() || 362 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 363 result.Put(input, j); 364 continue; 365 } 366 std::vector<TokenSequence> args; 367 for (std::size_t n{0}; n < argStart.size(); ++n) { 368 std::size_t at{argStart[n]}; 369 std::size_t count{ 370 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 371 args.emplace_back(TokenSequence(input, at, count)); 372 } 373 def.set_isDisabled(true); 374 TokenSequence replaced{ 375 ReplaceMacros(def.Apply(args, prescanner), prescanner)}; 376 def.set_isDisabled(false); 377 if (!replaced.empty()) { 378 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 379 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 380 ProvenanceRange newRange{ 381 allSources_.AddMacroCall(from, use, replaced.ToString())}; 382 result.Put(replaced, newRange); 383 } 384 j = k; // advance to the terminal ')' 385 } 386 return result; 387 } 388 389 TokenSequence Preprocessor::ReplaceMacros( 390 const TokenSequence &tokens, Prescanner &prescanner) { 391 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 392 return std::move(*repl); 393 } 394 return tokens; 395 } 396 397 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { 398 std::size_t tokens{dir.SizeInTokens()}; 399 std::size_t j{dir.SkipBlanks(0)}; 400 if (j == tokens) { 401 return; 402 } 403 CHECK(prescanner); // TODO: change to reference 404 if (dir.TokenAt(j).ToString() != "#") { 405 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 406 return; 407 } 408 j = dir.SkipBlanks(j + 1); 409 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 410 --tokens; 411 } 412 if (j == tokens) { 413 return; 414 } 415 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 416 return; // treat like #line, ignore it 417 } 418 std::size_t dirOffset{j}; 419 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 420 j = dir.SkipBlanks(j + 1); 421 CharBlock nameToken; 422 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 423 nameToken = dir.TokenAt(j); 424 } 425 if (dirName == "line") { 426 // #line is ignored 427 } else if (dirName == "define") { 428 if (nameToken.empty()) { 429 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 430 "#define: missing or invalid name"_err_en_US); 431 return; 432 } 433 nameToken = SaveTokenAsName(nameToken); 434 definitions_.erase(nameToken); 435 if (++j < tokens && dir.TokenAt(j).size() == 1 && 436 dir.TokenAt(j)[0] == '(') { 437 j = dir.SkipBlanks(j + 1); 438 std::vector<std::string> argName; 439 bool isVariadic{false}; 440 if (dir.TokenAt(j).ToString() != ")") { 441 while (true) { 442 std::string an{dir.TokenAt(j).ToString()}; 443 if (an == "...") { 444 isVariadic = true; 445 } else { 446 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 447 prescanner->Say(dir.GetTokenProvenanceRange(j), 448 "#define: missing or invalid argument name"_err_en_US); 449 return; 450 } 451 argName.push_back(an); 452 } 453 j = dir.SkipBlanks(j + 1); 454 if (j == tokens) { 455 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 456 "#define: malformed argument list"_err_en_US); 457 return; 458 } 459 std::string punc{dir.TokenAt(j).ToString()}; 460 if (punc == ")") { 461 break; 462 } 463 if (isVariadic || punc != ",") { 464 prescanner->Say(dir.GetTokenProvenanceRange(j), 465 "#define: malformed argument list"_err_en_US); 466 return; 467 } 468 j = dir.SkipBlanks(j + 1); 469 if (j == tokens) { 470 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 471 "#define: malformed argument list"_err_en_US); 472 return; 473 } 474 } 475 if (std::set<std::string>(argName.begin(), argName.end()).size() != 476 argName.size()) { 477 prescanner->Say(dir.GetTokenProvenance(dirOffset), 478 "#define: argument names are not distinct"_err_en_US); 479 return; 480 } 481 } 482 j = dir.SkipBlanks(j + 1); 483 definitions_.emplace(std::make_pair( 484 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 485 } else { 486 j = dir.SkipBlanks(j + 1); 487 definitions_.emplace( 488 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 489 } 490 } else if (dirName == "undef") { 491 if (nameToken.empty()) { 492 prescanner->Say( 493 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 494 "# missing or invalid name"_err_en_US); 495 } else { 496 if (dir.IsAnythingLeft(++j)) { 497 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 498 "#undef: excess tokens at end of directive"_en_US); 499 } else { 500 definitions_.erase(nameToken); 501 } 502 } 503 } else if (dirName == "ifdef" || dirName == "ifndef") { 504 bool doThen{false}; 505 if (nameToken.empty()) { 506 prescanner->Say( 507 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 508 "#%s: missing name"_err_en_US, dirName); 509 } else { 510 if (dir.IsAnythingLeft(++j)) { 511 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 512 "#%s: excess tokens at end of directive"_en_US, dirName); 513 } 514 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 515 } 516 if (doThen) { 517 ifStack_.push(CanDeadElseAppear::Yes); 518 } else { 519 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 520 dir.GetTokenProvenance(dirOffset)); 521 } 522 } else if (dirName == "if") { 523 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 524 ifStack_.push(CanDeadElseAppear::Yes); 525 } else { 526 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 527 dir.GetTokenProvenanceRange(dirOffset)); 528 } 529 } else if (dirName == "else") { 530 if (dir.IsAnythingLeft(j)) { 531 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 532 "#else: excess tokens at end of directive"_en_US); 533 } else if (ifStack_.empty()) { 534 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 535 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 536 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 537 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 538 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 539 } else { 540 ifStack_.pop(); 541 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 542 dir.GetTokenProvenanceRange(dirOffset)); 543 } 544 } else if (dirName == "elif") { 545 if (ifStack_.empty()) { 546 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 547 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 548 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 549 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 550 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 551 } else { 552 ifStack_.pop(); 553 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 554 dir.GetTokenProvenanceRange(dirOffset)); 555 } 556 } else if (dirName == "endif") { 557 if (dir.IsAnythingLeft(j)) { 558 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 559 "#endif: excess tokens at end of directive"_en_US); 560 } else if (ifStack_.empty()) { 561 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 562 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 563 } else { 564 ifStack_.pop(); 565 } 566 } else if (dirName == "error") { 567 prescanner->Say( 568 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 569 "%s"_err_en_US, dir.ToString()); 570 } else if (dirName == "warning" || dirName == "comment" || 571 dirName == "note") { 572 prescanner->Say( 573 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 574 "%s"_en_US, dir.ToString()); 575 } else if (dirName == "include") { 576 if (j == tokens) { 577 prescanner->Say( 578 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 579 "#include: missing name of file to include"_err_en_US); 580 return; 581 } 582 std::string include; 583 std::optional<std::string> prependPath; 584 if (dir.TokenAt(j).ToString() == "<") { // #include <foo> 585 std::size_t k{j + 1}; 586 if (k >= tokens) { 587 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 588 "#include: file name missing"_err_en_US); 589 return; 590 } 591 while (k < tokens && dir.TokenAt(k) != ">") { 592 ++k; 593 } 594 if (k >= tokens) { 595 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 596 "#include: expected '>' at end of included file"_en_US); 597 } 598 TokenSequence braced{dir, j + 1, k - j - 1}; 599 include = ReplaceMacros(braced, *prescanner).ToString(); 600 j = k; 601 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 602 include.substr(include.size() - 1, 1) == "\"") { // #include "foo" 603 include = include.substr(1, include.size() - 2); 604 // #include "foo" starts search in directory of file containing 605 // the directive 606 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()}; 607 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) { 608 prependPath = DirectoryName(currentFile->path()); 609 } 610 } else { 611 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 612 "#include: expected name of file to include"_err_en_US); 613 return; 614 } 615 if (include.empty()) { 616 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 617 "#include: empty include file name"_err_en_US); 618 return; 619 } 620 j = dir.SkipBlanks(j + 1); 621 if (j < tokens && dir.TokenAt(j).ToString() != "!") { 622 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 623 "#include: extra stuff ignored after file name"_en_US); 624 } 625 std::string buf; 626 llvm::raw_string_ostream error{buf}; 627 const SourceFile *included{ 628 allSources_.Open(include, error, std::move(prependPath))}; 629 if (!included) { 630 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 631 "#include: %s"_err_en_US, error.str()); 632 } else if (included->bytes() > 0) { 633 ProvenanceRange fileRange{ 634 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 635 Prescanner{*prescanner} 636 .set_encoding(included->encoding()) 637 .Prescan(fileRange); 638 } 639 } else { 640 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 641 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 642 } 643 } 644 645 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 646 names_.push_back(t.ToString()); 647 return {names_.back().data(), names_.back().size()}; 648 } 649 650 bool Preprocessor::IsNameDefined(const CharBlock &token) { 651 return definitions_.find(token) != definitions_.end(); 652 } 653 654 static std::string GetDirectiveName( 655 const TokenSequence &line, std::size_t *rest) { 656 std::size_t tokens{line.SizeInTokens()}; 657 std::size_t j{line.SkipBlanks(0)}; 658 if (j == tokens || line.TokenAt(j).ToString() != "#") { 659 *rest = tokens; 660 return ""; 661 } 662 j = line.SkipBlanks(j + 1); 663 if (j == tokens) { 664 *rest = tokens; 665 return ""; 666 } 667 *rest = line.SkipBlanks(j + 1); 668 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 669 } 670 671 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 672 IsElseActive isElseActive, Prescanner *prescanner, 673 ProvenanceRange provenanceRange) { 674 int nesting{0}; 675 while (!prescanner->IsAtEnd()) { 676 if (!prescanner->IsNextLinePreprocessorDirective()) { 677 prescanner->NextLine(); 678 continue; 679 } 680 TokenSequence line{prescanner->TokenizePreprocessorDirective()}; 681 std::size_t rest{0}; 682 std::string dn{GetDirectiveName(line, &rest)}; 683 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 684 ++nesting; 685 } else if (dn == "endif") { 686 if (nesting-- == 0) { 687 return; 688 } 689 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 690 if (dn == "else") { 691 ifStack_.push(CanDeadElseAppear::No); 692 return; 693 } 694 if (dn == "elif" && 695 IsIfPredicateTrue( 696 line, rest, line.SizeInTokens() - rest, prescanner)) { 697 ifStack_.push(CanDeadElseAppear::Yes); 698 return; 699 } 700 } 701 } 702 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 703 } 704 705 // Precedence level codes used here to accommodate mixed Fortran and C: 706 // 15: parentheses and constants, logical !, bitwise ~ 707 // 14: unary + and - 708 // 13: ** 709 // 12: *, /, % (modulus) 710 // 11: + and - 711 // 10: << and >> 712 // 9: bitwise & 713 // 8: bitwise ^ 714 // 7: bitwise | 715 // 6: relations (.EQ., ==, &c.) 716 // 5: .NOT. 717 // 4: .AND., && 718 // 3: .OR., || 719 // 2: .EQV. and .NEQV. / .XOR. 720 // 1: ? : 721 // 0: , 722 static std::int64_t ExpressionValue(const TokenSequence &token, 723 int minimumPrecedence, std::size_t *atToken, 724 std::optional<Message> *error) { 725 enum Operator { 726 PARENS, 727 CONST, 728 NOTZERO, // ! 729 COMPLEMENT, // ~ 730 UPLUS, 731 UMINUS, 732 POWER, 733 TIMES, 734 DIVIDE, 735 MODULUS, 736 ADD, 737 SUBTRACT, 738 LEFTSHIFT, 739 RIGHTSHIFT, 740 BITAND, 741 BITXOR, 742 BITOR, 743 LT, 744 LE, 745 EQ, 746 NE, 747 GE, 748 GT, 749 NOT, 750 AND, 751 OR, 752 EQV, 753 NEQV, 754 SELECT, 755 COMMA 756 }; 757 static const int precedence[]{ 758 15, 15, 15, 15, // (), 6, !, ~ 759 14, 14, // unary +, - 760 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 761 9, 8, 7, // &, ^, | 762 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 763 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 764 1, 0 // ?: and , 765 }; 766 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 767 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 768 769 static std::map<std::string, enum Operator> opNameMap; 770 if (opNameMap.empty()) { 771 opNameMap["("] = PARENS; 772 opNameMap["!"] = NOTZERO; 773 opNameMap["~"] = COMPLEMENT; 774 opNameMap["**"] = POWER; 775 opNameMap["*"] = TIMES; 776 opNameMap["/"] = DIVIDE; 777 opNameMap["%"] = MODULUS; 778 opNameMap["+"] = ADD; 779 opNameMap["-"] = SUBTRACT; 780 opNameMap["<<"] = LEFTSHIFT; 781 opNameMap[">>"] = RIGHTSHIFT; 782 opNameMap["&"] = BITAND; 783 opNameMap["^"] = BITXOR; 784 opNameMap["|"] = BITOR; 785 opNameMap[".lt."] = opNameMap["<"] = LT; 786 opNameMap[".le."] = opNameMap["<="] = LE; 787 opNameMap[".eq."] = opNameMap["=="] = EQ; 788 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 789 opNameMap[".ge."] = opNameMap[">="] = GE; 790 opNameMap[".gt."] = opNameMap[">"] = GT; 791 opNameMap[".not."] = NOT; 792 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 793 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 794 opNameMap[".eqv."] = EQV; 795 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 796 opNameMap["?"] = SELECT; 797 opNameMap[","] = COMMA; 798 } 799 800 std::size_t tokens{token.SizeInTokens()}; 801 CHECK(tokens > 0); 802 if (*atToken >= tokens) { 803 *error = 804 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 805 return 0; 806 } 807 808 // Parse and evaluate a primary or a unary operator and its operand. 809 std::size_t opAt{*atToken}; 810 std::string t{token.TokenAt(opAt).ToString()}; 811 enum Operator op; 812 std::int64_t left{0}; 813 if (t == "(") { 814 op = PARENS; 815 } else if (IsDecimalDigit(t[0])) { 816 op = CONST; 817 std::size_t consumed{0}; 818 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 819 if (consumed < t.size()) { 820 *error = Message{token.GetTokenProvenanceRange(opAt), 821 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 822 return 0; 823 } 824 } else if (IsLegalIdentifierStart(t[0])) { 825 // undefined macro name -> zero 826 // TODO: BOZ constants? 827 op = CONST; 828 } else if (t == "+") { 829 op = UPLUS; 830 } else if (t == "-") { 831 op = UMINUS; 832 } else if (t == "." && *atToken + 2 < tokens && 833 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 834 token.TokenAt(*atToken + 2).ToString() == ".") { 835 op = NOT; 836 *atToken += 2; 837 } else { 838 auto it{opNameMap.find(t)}; 839 if (it != opNameMap.end()) { 840 op = it->second; 841 } else { 842 *error = Message{token.GetTokenProvenanceRange(opAt), 843 "operand expected in expression"_err_en_US}; 844 return 0; 845 } 846 } 847 if (precedence[op] < minimumPrecedence) { 848 *error = Message{token.GetTokenProvenanceRange(opAt), 849 "operator precedence error"_err_en_US}; 850 return 0; 851 } 852 ++*atToken; 853 if (op != CONST) { 854 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 855 if (*error) { 856 return 0; 857 } 858 switch (op) { 859 case PARENS: 860 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 861 ++*atToken; 862 break; 863 } 864 if (*atToken >= tokens) { 865 *error = Message{token.GetProvenanceRange(), 866 "')' missing from expression"_err_en_US}; 867 } else { 868 *error = Message{ 869 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 870 } 871 return 0; 872 case NOTZERO: 873 left = !left; 874 break; 875 case COMPLEMENT: 876 left = ~left; 877 break; 878 case UPLUS: 879 break; 880 case UMINUS: 881 left = -left; 882 break; 883 case NOT: 884 left = -!left; 885 break; 886 default: 887 CRASH_NO_CASE; 888 } 889 } 890 891 // Parse and evaluate binary operators and their second operands, if present. 892 while (*atToken < tokens) { 893 int advance{1}; 894 t = token.TokenAt(*atToken).ToString(); 895 if (t == "." && *atToken + 2 < tokens && 896 token.TokenAt(*atToken + 2).ToString() == ".") { 897 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 898 advance = 3; 899 } 900 auto it{opNameMap.find(t)}; 901 if (it == opNameMap.end()) { 902 break; 903 } 904 op = it->second; 905 if (op < POWER || precedence[op] < minimumPrecedence) { 906 break; 907 } 908 opAt = *atToken; 909 *atToken += advance; 910 911 std::int64_t right{ 912 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 913 if (*error) { 914 return 0; 915 } 916 917 switch (op) { 918 case POWER: 919 if (left == 0) { 920 if (right < 0) { 921 *error = Message{token.GetTokenProvenanceRange(opAt), 922 "0 ** negative power"_err_en_US}; 923 } 924 } else if (left != 1 && right != 1) { 925 if (right <= 0) { 926 left = !right; 927 } else { 928 std::int64_t power{1}; 929 for (; right > 0; --right) { 930 if ((power * left) / left != power) { 931 *error = Message{token.GetTokenProvenanceRange(opAt), 932 "overflow in exponentation"_err_en_US}; 933 left = 1; 934 } 935 power *= left; 936 } 937 left = power; 938 } 939 } 940 break; 941 case TIMES: 942 if (left != 0 && right != 0 && ((left * right) / left) != right) { 943 *error = Message{token.GetTokenProvenanceRange(opAt), 944 "overflow in multiplication"_err_en_US}; 945 } 946 left = left * right; 947 break; 948 case DIVIDE: 949 if (right == 0) { 950 *error = Message{ 951 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 952 left = 0; 953 } else { 954 left = left / right; 955 } 956 break; 957 case MODULUS: 958 if (right == 0) { 959 *error = Message{ 960 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 961 left = 0; 962 } else { 963 left = left % right; 964 } 965 break; 966 case ADD: 967 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 968 *error = Message{token.GetTokenProvenanceRange(opAt), 969 "overflow in addition"_err_en_US}; 970 } 971 left = left + right; 972 break; 973 case SUBTRACT: 974 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 975 *error = Message{token.GetTokenProvenanceRange(opAt), 976 "overflow in subtraction"_err_en_US}; 977 } 978 left = left - right; 979 break; 980 case LEFTSHIFT: 981 if (right < 0 || right > 64) { 982 *error = Message{token.GetTokenProvenanceRange(opAt), 983 "bad left shift count"_err_en_US}; 984 } 985 left = right >= 64 ? 0 : left << right; 986 break; 987 case RIGHTSHIFT: 988 if (right < 0 || right > 64) { 989 *error = Message{token.GetTokenProvenanceRange(opAt), 990 "bad right shift count"_err_en_US}; 991 } 992 left = right >= 64 ? 0 : left >> right; 993 break; 994 case BITAND: 995 case AND: 996 left = left & right; 997 break; 998 case BITXOR: 999 left = left ^ right; 1000 break; 1001 case BITOR: 1002 case OR: 1003 left = left | right; 1004 break; 1005 case LT: 1006 left = -(left < right); 1007 break; 1008 case LE: 1009 left = -(left <= right); 1010 break; 1011 case EQ: 1012 left = -(left == right); 1013 break; 1014 case NE: 1015 left = -(left != right); 1016 break; 1017 case GE: 1018 left = -(left >= right); 1019 break; 1020 case GT: 1021 left = -(left > right); 1022 break; 1023 case EQV: 1024 left = -(!left == !right); 1025 break; 1026 case NEQV: 1027 left = -(!left != !right); 1028 break; 1029 case SELECT: 1030 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 1031 *error = Message{token.GetTokenProvenanceRange(opAt), 1032 "':' required in selection expression"_err_en_US}; 1033 return 0; 1034 } else { 1035 ++*atToken; 1036 std::int64_t third{ 1037 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 1038 left = left != 0 ? right : third; 1039 } 1040 break; 1041 case COMMA: 1042 left = right; 1043 break; 1044 default: 1045 CRASH_NO_CASE; 1046 } 1047 } 1048 return left; 1049 } 1050 1051 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 1052 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) { 1053 TokenSequence expr1{expr, first, exprTokens}; 1054 if (expr1.HasBlanks()) { 1055 expr1.RemoveBlanks(); 1056 } 1057 TokenSequence expr2; 1058 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 1059 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 1060 CharBlock name; 1061 if (j + 3 < expr1.SizeInTokens() && 1062 expr1.TokenAt(j + 1).ToString() == "(" && 1063 expr1.TokenAt(j + 3).ToString() == ")") { 1064 name = expr1.TokenAt(j + 2); 1065 j += 3; 1066 } else if (j + 1 < expr1.SizeInTokens() && 1067 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 1068 name = expr1.TokenAt(++j); 1069 } 1070 if (!name.empty()) { 1071 char truth{IsNameDefined(name) ? '1' : '0'}; 1072 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 1073 continue; 1074 } 1075 } 1076 expr2.Put(expr1, j); 1077 } 1078 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)}; 1079 if (expr3.HasBlanks()) { 1080 expr3.RemoveBlanks(); 1081 } 1082 if (expr3.empty()) { 1083 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 1084 return false; 1085 } 1086 std::size_t atToken{0}; 1087 std::optional<Message> error; 1088 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1089 if (error) { 1090 prescanner->Say(std::move(*error)); 1091 } else if (atToken < expr3.SizeInTokens() && 1092 expr3.TokenAt(atToken).ToString() != "!") { 1093 prescanner->Say(expr3.GetIntervalProvenanceRange( 1094 atToken, expr3.SizeInTokens() - atToken), 1095 atToken == 0 ? "could not parse any expression"_err_en_US 1096 : "excess characters after expression"_err_en_US); 1097 } 1098 return result; 1099 } 1100 } // namespace Fortran::parser 1101