1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <cinttypes> 17 #include <cstddef> 18 #include <ctime> 19 #include <map> 20 #include <memory> 21 #include <optional> 22 #include <set> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, 40 replacement_{ 41 predefined, sources.AddCompilerInsertion(predefined).start()} {} 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static TokenSequence Stringify( 77 const TokenSequence &tokens, AllSources &allSources) { 78 TokenSequence result; 79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 80 result.PutNextTokenChar('"', quoteProvenance); 81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 82 const CharBlock &token{tokens.TokenAt(j)}; 83 std::size_t bytes{token.size()}; 84 for (std::size_t k{0}; k < bytes; ++k) { 85 char ch{token[k]}; 86 Provenance from{tokens.GetTokenProvenance(j, k)}; 87 if (ch == '"' || ch == '\\') { 88 result.PutNextTokenChar(ch, from); 89 } 90 result.PutNextTokenChar(ch, from); 91 } 92 } 93 result.PutNextTokenChar('"', quoteProvenance); 94 result.CloseToken(); 95 return result; 96 } 97 98 constexpr bool IsTokenPasting(CharBlock opr) { 99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#'; 100 } 101 102 static bool AnyTokenPasting(const TokenSequence &text) { 103 std::size_t tokens{text.SizeInTokens()}; 104 for (std::size_t j{0}; j < tokens; ++j) { 105 if (IsTokenPasting(text.TokenAt(j))) { 106 return true; 107 } 108 } 109 return false; 110 } 111 112 static TokenSequence TokenPasting(TokenSequence &&text) { 113 if (!AnyTokenPasting(text)) { 114 return std::move(text); 115 } 116 TokenSequence result; 117 std::size_t tokens{text.SizeInTokens()}; 118 bool pasting{false}; 119 for (std::size_t j{0}; j < tokens; ++j) { 120 if (IsTokenPasting(text.TokenAt(j))) { 121 if (!pasting) { 122 while (!result.empty() && 123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 124 result.pop_back(); 125 } 126 if (!result.empty()) { 127 result.ReopenLastToken(); 128 pasting = true; 129 } 130 } 131 } else if (pasting && text.TokenAt(j).IsBlank()) { 132 } else { 133 result.Put(text, j, 1); 134 pasting = false; 135 } 136 } 137 return result; 138 } 139 140 TokenSequence Definition::Apply( 141 const std::vector<TokenSequence> &args, Prescanner &prescanner) { 142 TokenSequence result; 143 bool skipping{false}; 144 int parenthesesNesting{0}; 145 std::size_t tokens{replacement_.SizeInTokens()}; 146 for (std::size_t j{0}; j < tokens; ++j) { 147 CharBlock token{replacement_.TokenAt(j)}; 148 std::size_t bytes{token.size()}; 149 if (skipping) { 150 if (bytes == 1) { 151 if (token[0] == '(') { 152 ++parenthesesNesting; 153 } else if (token[0] == ')') { 154 skipping = --parenthesesNesting > 0; 155 } 156 } 157 continue; 158 } 159 if (bytes == 2 && token[0] == '~') { // argument substitution 160 std::size_t index = token[1] - 'A'; 161 if (index >= args.size()) { 162 continue; 163 } 164 std::size_t prev{j}; 165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) { 166 --prev; 167 } 168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 && 169 replacement_.TokenAt(prev - 1)[0] == 170 '#') { // stringify argument without macro replacement 171 std::size_t resultSize{result.SizeInTokens()}; 172 while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) { 173 result.pop_back(); 174 --resultSize; 175 } 176 CHECK(resultSize > 0 && 177 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1)); 178 result.pop_back(); 179 result.Put(Stringify(args[index], prescanner.allSources())); 180 } else { 181 const TokenSequence *arg{&args[index]}; 182 std::optional<TokenSequence> replaced; 183 // Don't replace macros in the actual argument if it is preceded or 184 // followed by the token-pasting operator ## in the replacement text. 185 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) { 186 auto next{replacement_.SkipBlanks(j + 1)}; 187 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) { 188 // Apply macro replacement to the actual argument 189 replaced = 190 prescanner.preprocessor().MacroReplacement(*arg, prescanner); 191 if (replaced) { 192 arg = &*replaced; 193 } 194 } 195 } 196 result.Put(DEREF(arg)); 197 } 198 } else if (bytes == 11 && isVariadic_ && 199 token.ToString() == "__VA_ARGS__") { 200 Provenance commaProvenance{ 201 prescanner.preprocessor().allSources().CompilerInsertionProvenance( 202 ',')}; 203 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 204 if (k > argumentCount_) { 205 result.Put(","s, commaProvenance); 206 } 207 result.Put(args[k]); 208 } 209 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 210 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 211 parenthesesNesting == 0) { 212 parenthesesNesting = 1; 213 skipping = args.size() == argumentCount_; 214 ++j; 215 } else { 216 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 217 ++parenthesesNesting; 218 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 219 if (--parenthesesNesting == 0) { 220 skipping = false; 221 continue; 222 } 223 } 224 result.Put(replacement_, j); 225 } 226 } 227 return TokenPasting(std::move(result)); 228 } 229 230 static std::string FormatTime(const std::time_t &now, const char *format) { 231 char buffer[16]; 232 return {buffer, 233 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 234 } 235 236 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {} 237 238 void Preprocessor::DefineStandardMacros() { 239 // Capture current local date & time once now to avoid having the values 240 // of __DATE__ or __TIME__ change during compilation. 241 std::time_t now; 242 std::time(&now); 243 Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904" 244 Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60" 245 // The values of these predefined macros depend on their invocation sites. 246 Define("__FILE__"s, "__FILE__"s); 247 Define("__LINE__"s, "__LINE__"s); 248 } 249 250 void Preprocessor::Define(std::string macro, std::string value) { 251 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 252 } 253 254 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 255 256 std::optional<TokenSequence> Preprocessor::MacroReplacement( 257 const TokenSequence &input, Prescanner &prescanner) { 258 // Do quick scan for any use of a defined name. 259 if (definitions_.empty()) { 260 return std::nullopt; 261 } 262 std::size_t tokens{input.SizeInTokens()}; 263 std::size_t j; 264 for (j = 0; j < tokens; ++j) { 265 CharBlock token{input.TokenAt(j)}; 266 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 267 IsNameDefined(token)) { 268 break; 269 } 270 } 271 if (j == tokens) { 272 return std::nullopt; // input contains nothing that would be replaced 273 } 274 TokenSequence result{input, 0, j}; 275 for (; j < tokens; ++j) { 276 const CharBlock &token{input.TokenAt(j)}; 277 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 278 result.Put(input, j); 279 continue; 280 } 281 auto it{definitions_.find(token)}; 282 if (it == definitions_.end()) { 283 result.Put(input, j); 284 continue; 285 } 286 Definition &def{it->second}; 287 if (def.isDisabled()) { 288 result.Put(input, j); 289 continue; 290 } 291 if (!def.isFunctionLike()) { 292 if (def.isPredefined()) { 293 std::string name{def.replacement().TokenAt(0).ToString()}; 294 std::string repl; 295 if (name == "__FILE__") { 296 repl = "\""s + 297 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 298 } else if (name == "__LINE__") { 299 std::string buf; 300 llvm::raw_string_ostream ss{buf}; 301 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 302 repl = ss.str(); 303 } 304 if (!repl.empty()) { 305 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 306 ProvenanceRange call{allSources_.AddMacroCall( 307 insert, input.GetTokenProvenanceRange(j), repl)}; 308 result.Put(repl, call.start()); 309 continue; 310 } 311 } 312 def.set_isDisabled(true); 313 TokenSequence replaced{ 314 TokenPasting(ReplaceMacros(def.replacement(), prescanner))}; 315 def.set_isDisabled(false); 316 if (!replaced.empty()) { 317 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 318 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 319 ProvenanceRange newRange{ 320 allSources_.AddMacroCall(from, use, replaced.ToString())}; 321 result.Put(replaced, newRange); 322 } 323 continue; 324 } 325 // Possible function-like macro call. Skip spaces and newlines to see 326 // whether '(' is next. 327 std::size_t k{j}; 328 bool leftParen{false}; 329 while (++k < tokens) { 330 const CharBlock &lookAhead{input.TokenAt(k)}; 331 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 332 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 333 break; 334 } 335 } 336 if (!leftParen) { 337 result.Put(input, j); 338 continue; 339 } 340 std::vector<std::size_t> argStart{++k}; 341 for (int nesting{0}; k < tokens; ++k) { 342 CharBlock token{input.TokenAt(k)}; 343 if (token.size() == 1) { 344 char ch{token[0]}; 345 if (ch == '(') { 346 ++nesting; 347 } else if (ch == ')') { 348 if (nesting == 0) { 349 break; 350 } 351 --nesting; 352 } else if (ch == ',' && nesting == 0) { 353 argStart.push_back(k + 1); 354 } 355 } 356 } 357 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 358 // Subtle: () is zero arguments, not one empty argument, 359 // unless one argument was expected. 360 argStart.clear(); 361 } 362 if (k >= tokens || argStart.size() < def.argumentCount() || 363 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 364 result.Put(input, j); 365 continue; 366 } 367 std::vector<TokenSequence> args; 368 for (std::size_t n{0}; n < argStart.size(); ++n) { 369 std::size_t at{argStart[n]}; 370 std::size_t count{ 371 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 372 args.emplace_back(TokenSequence(input, at, count)); 373 } 374 def.set_isDisabled(true); 375 TokenSequence replaced{ 376 ReplaceMacros(def.Apply(args, prescanner), prescanner)}; 377 def.set_isDisabled(false); 378 if (!replaced.empty()) { 379 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 380 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 381 ProvenanceRange newRange{ 382 allSources_.AddMacroCall(from, use, replaced.ToString())}; 383 result.Put(replaced, newRange); 384 } 385 j = k; // advance to the terminal ')' 386 } 387 return result; 388 } 389 390 TokenSequence Preprocessor::ReplaceMacros( 391 const TokenSequence &tokens, Prescanner &prescanner) { 392 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 393 return std::move(*repl); 394 } 395 return tokens; 396 } 397 398 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { 399 std::size_t tokens{dir.SizeInTokens()}; 400 std::size_t j{dir.SkipBlanks(0)}; 401 if (j == tokens) { 402 return; 403 } 404 if (dir.TokenAt(j).ToString() != "#") { 405 prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 406 return; 407 } 408 j = dir.SkipBlanks(j + 1); 409 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 410 --tokens; 411 } 412 if (j == tokens) { 413 return; 414 } 415 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 416 return; // treat like #line, ignore it 417 } 418 std::size_t dirOffset{j}; 419 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 420 j = dir.SkipBlanks(j + 1); 421 CharBlock nameToken; 422 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 423 nameToken = dir.TokenAt(j); 424 } 425 if (dirName == "line") { 426 // #line is ignored 427 } else if (dirName == "define") { 428 if (nameToken.empty()) { 429 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 430 "#define: missing or invalid name"_err_en_US); 431 return; 432 } 433 nameToken = SaveTokenAsName(nameToken); 434 definitions_.erase(nameToken); 435 if (++j < tokens && dir.TokenAt(j).size() == 1 && 436 dir.TokenAt(j)[0] == '(') { 437 j = dir.SkipBlanks(j + 1); 438 std::vector<std::string> argName; 439 bool isVariadic{false}; 440 if (dir.TokenAt(j).ToString() != ")") { 441 while (true) { 442 std::string an{dir.TokenAt(j).ToString()}; 443 if (an == "...") { 444 isVariadic = true; 445 } else { 446 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 447 prescanner.Say(dir.GetTokenProvenanceRange(j), 448 "#define: missing or invalid argument name"_err_en_US); 449 return; 450 } 451 argName.push_back(an); 452 } 453 j = dir.SkipBlanks(j + 1); 454 if (j == tokens) { 455 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1), 456 "#define: malformed argument list"_err_en_US); 457 return; 458 } 459 std::string punc{dir.TokenAt(j).ToString()}; 460 if (punc == ")") { 461 break; 462 } 463 if (isVariadic || punc != ",") { 464 prescanner.Say(dir.GetTokenProvenanceRange(j), 465 "#define: malformed argument list"_err_en_US); 466 return; 467 } 468 j = dir.SkipBlanks(j + 1); 469 if (j == tokens) { 470 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1), 471 "#define: malformed argument list"_err_en_US); 472 return; 473 } 474 } 475 if (std::set<std::string>(argName.begin(), argName.end()).size() != 476 argName.size()) { 477 prescanner.Say(dir.GetTokenProvenance(dirOffset), 478 "#define: argument names are not distinct"_err_en_US); 479 return; 480 } 481 } 482 j = dir.SkipBlanks(j + 1); 483 definitions_.emplace(std::make_pair( 484 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 485 } else { 486 j = dir.SkipBlanks(j + 1); 487 definitions_.emplace( 488 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 489 } 490 } else if (dirName == "undef") { 491 if (nameToken.empty()) { 492 prescanner.Say( 493 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 494 "# missing or invalid name"_err_en_US); 495 } else { 496 if (dir.IsAnythingLeft(++j)) { 497 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 498 "#undef: excess tokens at end of directive"_en_US); 499 } else { 500 definitions_.erase(nameToken); 501 } 502 } 503 } else if (dirName == "ifdef" || dirName == "ifndef") { 504 bool doThen{false}; 505 if (nameToken.empty()) { 506 prescanner.Say( 507 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 508 "#%s: missing name"_err_en_US, dirName); 509 } else { 510 if (dir.IsAnythingLeft(++j)) { 511 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 512 "#%s: excess tokens at end of directive"_en_US, dirName); 513 } 514 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 515 } 516 if (doThen) { 517 ifStack_.push(CanDeadElseAppear::Yes); 518 } else { 519 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 520 dir.GetTokenProvenance(dirOffset)); 521 } 522 } else if (dirName == "if") { 523 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 524 ifStack_.push(CanDeadElseAppear::Yes); 525 } else { 526 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 527 dir.GetTokenProvenanceRange(dirOffset)); 528 } 529 } else if (dirName == "else") { 530 if (dir.IsAnythingLeft(j)) { 531 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 532 "#else: excess tokens at end of directive"_en_US); 533 } else if (ifStack_.empty()) { 534 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 535 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 536 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 537 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 538 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 539 } else { 540 ifStack_.pop(); 541 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 542 dir.GetTokenProvenanceRange(dirOffset)); 543 } 544 } else if (dirName == "elif") { 545 if (ifStack_.empty()) { 546 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 547 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 548 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 549 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 550 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 551 } else { 552 ifStack_.pop(); 553 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 554 dir.GetTokenProvenanceRange(dirOffset)); 555 } 556 } else if (dirName == "endif") { 557 if (dir.IsAnythingLeft(j)) { 558 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 559 "#endif: excess tokens at end of directive"_en_US); 560 } else if (ifStack_.empty()) { 561 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 562 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 563 } else { 564 ifStack_.pop(); 565 } 566 } else if (dirName == "error") { 567 prescanner.Say( 568 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 569 "%s"_err_en_US, dir.ToString()); 570 } else if (dirName == "warning" || dirName == "comment" || 571 dirName == "note") { 572 prescanner.Say( 573 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 574 "%s"_en_US, dir.ToString()); 575 } else if (dirName == "include") { 576 if (j == tokens) { 577 prescanner.Say( 578 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 579 "#include: missing name of file to include"_err_en_US); 580 return; 581 } 582 std::string include; 583 std::optional<std::string> prependPath; 584 if (dir.TokenAt(j).ToString() == "<") { // #include <foo> 585 std::size_t k{j + 1}; 586 if (k >= tokens) { 587 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 588 "#include: file name missing"_err_en_US); 589 return; 590 } 591 while (k < tokens && dir.TokenAt(k) != ">") { 592 ++k; 593 } 594 if (k >= tokens) { 595 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 596 "#include: expected '>' at end of included file"_en_US); 597 } 598 TokenSequence braced{dir, j + 1, k - j - 1}; 599 include = ReplaceMacros(braced, prescanner).ToString(); 600 j = k; 601 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 602 include.substr(include.size() - 1, 1) == "\"") { // #include "foo" 603 include = include.substr(1, include.size() - 2); 604 // #include "foo" starts search in directory of file containing 605 // the directive 606 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()}; 607 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) { 608 prependPath = DirectoryName(currentFile->path()); 609 } 610 } else { 611 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 612 "#include: expected name of file to include"_err_en_US); 613 return; 614 } 615 if (include.empty()) { 616 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 617 "#include: empty include file name"_err_en_US); 618 return; 619 } 620 j = dir.SkipBlanks(j + 1); 621 if (j < tokens && dir.TokenAt(j).ToString() != "!") { 622 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j), 623 "#include: extra stuff ignored after file name"_en_US); 624 } 625 std::string buf; 626 llvm::raw_string_ostream error{buf}; 627 const SourceFile *included{ 628 allSources_.Open(include, error, std::move(prependPath))}; 629 if (!included) { 630 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 631 "#include: %s"_err_en_US, error.str()); 632 } else if (included->bytes() > 0) { 633 ProvenanceRange fileRange{ 634 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 635 Prescanner{prescanner} 636 .set_encoding(included->encoding()) 637 .Prescan(fileRange); 638 } 639 } else { 640 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), 641 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 642 } 643 } 644 645 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 646 names_.push_back(t.ToString()); 647 return {names_.back().data(), names_.back().size()}; 648 } 649 650 bool Preprocessor::IsNameDefined(const CharBlock &token) { 651 return definitions_.find(token) != definitions_.end(); 652 } 653 654 static std::string GetDirectiveName( 655 const TokenSequence &line, std::size_t *rest) { 656 std::size_t tokens{line.SizeInTokens()}; 657 std::size_t j{line.SkipBlanks(0)}; 658 if (j == tokens || line.TokenAt(j).ToString() != "#") { 659 *rest = tokens; 660 return ""; 661 } 662 j = line.SkipBlanks(j + 1); 663 if (j == tokens) { 664 *rest = tokens; 665 return ""; 666 } 667 *rest = line.SkipBlanks(j + 1); 668 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 669 } 670 671 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 672 IsElseActive isElseActive, Prescanner &prescanner, 673 ProvenanceRange provenanceRange) { 674 int nesting{0}; 675 while (!prescanner.IsAtEnd()) { 676 if (!prescanner.IsNextLinePreprocessorDirective()) { 677 prescanner.NextLine(); 678 continue; 679 } 680 TokenSequence line{prescanner.TokenizePreprocessorDirective()}; 681 std::size_t rest{0}; 682 std::string dn{GetDirectiveName(line, &rest)}; 683 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 684 ++nesting; 685 } else if (dn == "endif") { 686 if (nesting-- == 0) { 687 return; 688 } 689 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 690 if (dn == "else") { 691 ifStack_.push(CanDeadElseAppear::No); 692 return; 693 } 694 if (dn == "elif" && 695 IsIfPredicateTrue( 696 line, rest, line.SizeInTokens() - rest, prescanner)) { 697 ifStack_.push(CanDeadElseAppear::Yes); 698 return; 699 } 700 } 701 } 702 prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 703 } 704 705 // Precedence level codes used here to accommodate mixed Fortran and C: 706 // 15: parentheses and constants, logical !, bitwise ~ 707 // 14: unary + and - 708 // 13: ** 709 // 12: *, /, % (modulus) 710 // 11: + and - 711 // 10: << and >> 712 // 9: bitwise & 713 // 8: bitwise ^ 714 // 7: bitwise | 715 // 6: relations (.EQ., ==, &c.) 716 // 5: .NOT. 717 // 4: .AND., && 718 // 3: .OR., || 719 // 2: .EQV. and .NEQV. / .XOR. 720 // 1: ? : 721 // 0: , 722 static std::int64_t ExpressionValue(const TokenSequence &token, 723 int minimumPrecedence, std::size_t *atToken, 724 std::optional<Message> *error) { 725 enum Operator { 726 PARENS, 727 CONST, 728 NOTZERO, // ! 729 COMPLEMENT, // ~ 730 UPLUS, 731 UMINUS, 732 POWER, 733 TIMES, 734 DIVIDE, 735 MODULUS, 736 ADD, 737 SUBTRACT, 738 LEFTSHIFT, 739 RIGHTSHIFT, 740 BITAND, 741 BITXOR, 742 BITOR, 743 LT, 744 LE, 745 EQ, 746 NE, 747 GE, 748 GT, 749 NOT, 750 AND, 751 OR, 752 EQV, 753 NEQV, 754 SELECT, 755 COMMA 756 }; 757 static const int precedence[]{ 758 15, 15, 15, 15, // (), 6, !, ~ 759 14, 14, // unary +, - 760 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 761 9, 8, 7, // &, ^, | 762 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 763 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 764 1, 0 // ?: and , 765 }; 766 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 767 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 768 769 static std::map<std::string, enum Operator> opNameMap; 770 if (opNameMap.empty()) { 771 opNameMap["("] = PARENS; 772 opNameMap["!"] = NOTZERO; 773 opNameMap["~"] = COMPLEMENT; 774 opNameMap["**"] = POWER; 775 opNameMap["*"] = TIMES; 776 opNameMap["/"] = DIVIDE; 777 opNameMap["%"] = MODULUS; 778 opNameMap["+"] = ADD; 779 opNameMap["-"] = SUBTRACT; 780 opNameMap["<<"] = LEFTSHIFT; 781 opNameMap[">>"] = RIGHTSHIFT; 782 opNameMap["&"] = BITAND; 783 opNameMap["^"] = BITXOR; 784 opNameMap["|"] = BITOR; 785 opNameMap[".lt."] = opNameMap["<"] = LT; 786 opNameMap[".le."] = opNameMap["<="] = LE; 787 opNameMap[".eq."] = opNameMap["=="] = EQ; 788 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 789 opNameMap[".ge."] = opNameMap[">="] = GE; 790 opNameMap[".gt."] = opNameMap[">"] = GT; 791 opNameMap[".not."] = NOT; 792 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 793 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 794 opNameMap[".eqv."] = EQV; 795 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 796 opNameMap["?"] = SELECT; 797 opNameMap[","] = COMMA; 798 } 799 800 std::size_t tokens{token.SizeInTokens()}; 801 CHECK(tokens > 0); 802 if (*atToken >= tokens) { 803 *error = 804 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 805 return 0; 806 } 807 808 // Parse and evaluate a primary or a unary operator and its operand. 809 std::size_t opAt{*atToken}; 810 std::string t{token.TokenAt(opAt).ToString()}; 811 enum Operator op; 812 std::int64_t left{0}; 813 if (t == "(") { 814 op = PARENS; 815 } else if (IsDecimalDigit(t[0])) { 816 op = CONST; 817 std::size_t consumed{0}; 818 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 819 if (consumed < t.size()) { 820 *error = Message{token.GetTokenProvenanceRange(opAt), 821 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 822 return 0; 823 } 824 } else if (IsLegalIdentifierStart(t[0])) { 825 // undefined macro name -> zero 826 // TODO: BOZ constants? 827 op = CONST; 828 } else if (t == "+") { 829 op = UPLUS; 830 } else if (t == "-") { 831 op = UMINUS; 832 } else if (t == "." && *atToken + 2 < tokens && 833 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 834 token.TokenAt(*atToken + 2).ToString() == ".") { 835 op = NOT; 836 *atToken += 2; 837 } else { 838 auto it{opNameMap.find(t)}; 839 if (it != opNameMap.end()) { 840 op = it->second; 841 } else { 842 *error = Message{token.GetTokenProvenanceRange(opAt), 843 "operand expected in expression"_err_en_US}; 844 return 0; 845 } 846 } 847 if (precedence[op] < minimumPrecedence) { 848 *error = Message{token.GetTokenProvenanceRange(opAt), 849 "operator precedence error"_err_en_US}; 850 return 0; 851 } 852 ++*atToken; 853 if (op != CONST) { 854 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 855 if (*error) { 856 return 0; 857 } 858 switch (op) { 859 case PARENS: 860 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 861 ++*atToken; 862 break; 863 } 864 if (*atToken >= tokens) { 865 *error = Message{token.GetProvenanceRange(), 866 "')' missing from expression"_err_en_US}; 867 } else { 868 *error = Message{ 869 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 870 } 871 return 0; 872 case NOTZERO: 873 left = !left; 874 break; 875 case COMPLEMENT: 876 left = ~left; 877 break; 878 case UPLUS: 879 break; 880 case UMINUS: 881 left = -left; 882 break; 883 case NOT: 884 left = -!left; 885 break; 886 default: 887 CRASH_NO_CASE; 888 } 889 } 890 891 // Parse and evaluate binary operators and their second operands, if present. 892 while (*atToken < tokens) { 893 int advance{1}; 894 t = token.TokenAt(*atToken).ToString(); 895 if (t == "." && *atToken + 2 < tokens && 896 token.TokenAt(*atToken + 2).ToString() == ".") { 897 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 898 advance = 3; 899 } 900 auto it{opNameMap.find(t)}; 901 if (it == opNameMap.end()) { 902 break; 903 } 904 op = it->second; 905 if (op < POWER || precedence[op] < minimumPrecedence) { 906 break; 907 } 908 opAt = *atToken; 909 *atToken += advance; 910 911 std::int64_t right{ 912 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 913 if (*error) { 914 return 0; 915 } 916 917 switch (op) { 918 case POWER: 919 if (left == 0) { 920 if (right < 0) { 921 *error = Message{token.GetTokenProvenanceRange(opAt), 922 "0 ** negative power"_err_en_US}; 923 } 924 } else if (left != 1 && right != 1) { 925 if (right <= 0) { 926 left = !right; 927 } else { 928 std::int64_t power{1}; 929 for (; right > 0; --right) { 930 if ((power * left) / left != power) { 931 *error = Message{token.GetTokenProvenanceRange(opAt), 932 "overflow in exponentation"_err_en_US}; 933 left = 1; 934 } 935 power *= left; 936 } 937 left = power; 938 } 939 } 940 break; 941 case TIMES: 942 if (left != 0 && right != 0 && ((left * right) / left) != right) { 943 *error = Message{token.GetTokenProvenanceRange(opAt), 944 "overflow in multiplication"_err_en_US}; 945 } 946 left = left * right; 947 break; 948 case DIVIDE: 949 if (right == 0) { 950 *error = Message{ 951 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 952 left = 0; 953 } else { 954 left = left / right; 955 } 956 break; 957 case MODULUS: 958 if (right == 0) { 959 *error = Message{ 960 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 961 left = 0; 962 } else { 963 left = left % right; 964 } 965 break; 966 case ADD: 967 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 968 *error = Message{token.GetTokenProvenanceRange(opAt), 969 "overflow in addition"_err_en_US}; 970 } 971 left = left + right; 972 break; 973 case SUBTRACT: 974 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 975 *error = Message{token.GetTokenProvenanceRange(opAt), 976 "overflow in subtraction"_err_en_US}; 977 } 978 left = left - right; 979 break; 980 case LEFTSHIFT: 981 if (right < 0 || right > 64) { 982 *error = Message{token.GetTokenProvenanceRange(opAt), 983 "bad left shift count"_err_en_US}; 984 } 985 left = right >= 64 ? 0 : left << right; 986 break; 987 case RIGHTSHIFT: 988 if (right < 0 || right > 64) { 989 *error = Message{token.GetTokenProvenanceRange(opAt), 990 "bad right shift count"_err_en_US}; 991 } 992 left = right >= 64 ? 0 : left >> right; 993 break; 994 case BITAND: 995 case AND: 996 left = left & right; 997 break; 998 case BITXOR: 999 left = left ^ right; 1000 break; 1001 case BITOR: 1002 case OR: 1003 left = left | right; 1004 break; 1005 case LT: 1006 left = -(left < right); 1007 break; 1008 case LE: 1009 left = -(left <= right); 1010 break; 1011 case EQ: 1012 left = -(left == right); 1013 break; 1014 case NE: 1015 left = -(left != right); 1016 break; 1017 case GE: 1018 left = -(left >= right); 1019 break; 1020 case GT: 1021 left = -(left > right); 1022 break; 1023 case EQV: 1024 left = -(!left == !right); 1025 break; 1026 case NEQV: 1027 left = -(!left != !right); 1028 break; 1029 case SELECT: 1030 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 1031 *error = Message{token.GetTokenProvenanceRange(opAt), 1032 "':' required in selection expression"_err_en_US}; 1033 return 0; 1034 } else { 1035 ++*atToken; 1036 std::int64_t third{ 1037 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 1038 left = left != 0 ? right : third; 1039 } 1040 break; 1041 case COMMA: 1042 left = right; 1043 break; 1044 default: 1045 CRASH_NO_CASE; 1046 } 1047 } 1048 return left; 1049 } 1050 1051 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 1052 std::size_t first, std::size_t exprTokens, Prescanner &prescanner) { 1053 TokenSequence expr1{expr, first, exprTokens}; 1054 if (expr1.HasBlanks()) { 1055 expr1.RemoveBlanks(); 1056 } 1057 TokenSequence expr2; 1058 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 1059 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 1060 CharBlock name; 1061 if (j + 3 < expr1.SizeInTokens() && 1062 expr1.TokenAt(j + 1).ToString() == "(" && 1063 expr1.TokenAt(j + 3).ToString() == ")") { 1064 name = expr1.TokenAt(j + 2); 1065 j += 3; 1066 } else if (j + 1 < expr1.SizeInTokens() && 1067 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 1068 name = expr1.TokenAt(++j); 1069 } 1070 if (!name.empty()) { 1071 char truth{IsNameDefined(name) ? '1' : '0'}; 1072 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 1073 continue; 1074 } 1075 } 1076 expr2.Put(expr1, j); 1077 } 1078 TokenSequence expr3{ReplaceMacros(expr2, prescanner)}; 1079 if (expr3.HasBlanks()) { 1080 expr3.RemoveBlanks(); 1081 } 1082 if (expr3.empty()) { 1083 prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 1084 return false; 1085 } 1086 std::size_t atToken{0}; 1087 std::optional<Message> error; 1088 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1089 if (error) { 1090 prescanner.Say(std::move(*error)); 1091 } else if (atToken < expr3.SizeInTokens() && 1092 expr3.TokenAt(atToken).ToString() != "!") { 1093 prescanner.Say(expr3.GetIntervalProvenanceRange( 1094 atToken, expr3.SizeInTokens() - atToken), 1095 atToken == 0 ? "could not parse any expression"_err_en_US 1096 : "excess characters after expression"_err_en_US); 1097 } 1098 return result; 1099 } 1100 } // namespace Fortran::parser 1101