1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "preprocessor.h" 10 #include "prescan.h" 11 #include "flang/Common/idioms.h" 12 #include "flang/Parser/characters.h" 13 #include "flang/Parser/message.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <cinttypes> 17 #include <cstddef> 18 #include <ctime> 19 #include <map> 20 #include <memory> 21 #include <optional> 22 #include <set> 23 #include <utility> 24 25 namespace Fortran::parser { 26 27 Definition::Definition( 28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) 29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {} 30 31 Definition::Definition(const std::vector<std::string> &argNames, 32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, 33 bool isVariadic) 34 : isFunctionLike_{true}, 35 argumentCount_(argNames.size()), isVariadic_{isVariadic}, 36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} 37 38 Definition::Definition(const std::string &predefined, AllSources &sources) 39 : isPredefined_{true}, 40 replacement_{ 41 predefined, sources.AddCompilerInsertion(predefined).start()} {} 42 43 bool Definition::set_isDisabled(bool disable) { 44 bool was{isDisabled_}; 45 isDisabled_ = disable; 46 return was; 47 } 48 49 static bool IsLegalIdentifierStart(const CharBlock &cpl) { 50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); 51 } 52 53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames, 54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { 55 std::map<std::string, std::string> args; 56 char argIndex{'A'}; 57 for (const std::string &arg : argNames) { 58 CHECK(args.find(arg) == args.end()); 59 args[arg] = "~"s + argIndex++; 60 } 61 TokenSequence result; 62 for (std::size_t j{0}; j < tokens; ++j) { 63 CharBlock tok{token.TokenAt(firstToken + j)}; 64 if (IsLegalIdentifierStart(tok)) { 65 auto it{args.find(tok.ToString())}; 66 if (it != args.end()) { 67 result.Put(it->second, token.GetTokenProvenance(j)); 68 continue; 69 } 70 } 71 result.Put(token, firstToken + j, 1); 72 } 73 return result; 74 } 75 76 static TokenSequence Stringify( 77 const TokenSequence &tokens, AllSources &allSources) { 78 TokenSequence result; 79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; 80 result.PutNextTokenChar('"', quoteProvenance); 81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { 82 const CharBlock &token{tokens.TokenAt(j)}; 83 std::size_t bytes{token.size()}; 84 for (std::size_t k{0}; k < bytes; ++k) { 85 char ch{token[k]}; 86 Provenance from{tokens.GetTokenProvenance(j, k)}; 87 if (ch == '"' || ch == '\\') { 88 result.PutNextTokenChar(ch, from); 89 } 90 result.PutNextTokenChar(ch, from); 91 } 92 } 93 result.PutNextTokenChar('"', quoteProvenance); 94 result.CloseToken(); 95 return result; 96 } 97 98 constexpr bool IsTokenPasting(CharBlock opr) { 99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#'; 100 } 101 102 static bool AnyTokenPasting(const TokenSequence &text) { 103 std::size_t tokens{text.SizeInTokens()}; 104 for (std::size_t j{0}; j < tokens; ++j) { 105 if (IsTokenPasting(text.TokenAt(j))) { 106 return true; 107 } 108 } 109 return false; 110 } 111 112 static TokenSequence TokenPasting(TokenSequence &&text) { 113 if (!AnyTokenPasting(text)) { 114 return std::move(text); 115 } 116 TokenSequence result; 117 std::size_t tokens{text.SizeInTokens()}; 118 bool pasting{false}; 119 for (std::size_t j{0}; j < tokens; ++j) { 120 if (IsTokenPasting(text.TokenAt(j))) { 121 if (!pasting) { 122 while (!result.empty() && 123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { 124 result.pop_back(); 125 } 126 if (!result.empty()) { 127 result.ReopenLastToken(); 128 pasting = true; 129 } 130 } 131 } else if (pasting && text.TokenAt(j).IsBlank()) { 132 } else { 133 result.Put(text, j, 1); 134 pasting = false; 135 } 136 } 137 return result; 138 } 139 140 TokenSequence Definition::Apply( 141 const std::vector<TokenSequence> &args, Prescanner &prescanner) { 142 TokenSequence result; 143 bool skipping{false}; 144 int parenthesesNesting{0}; 145 std::size_t tokens{replacement_.SizeInTokens()}; 146 for (std::size_t j{0}; j < tokens; ++j) { 147 CharBlock token{replacement_.TokenAt(j)}; 148 std::size_t bytes{token.size()}; 149 if (skipping) { 150 if (bytes == 1) { 151 if (token[0] == '(') { 152 ++parenthesesNesting; 153 } else if (token[0] == ')') { 154 skipping = --parenthesesNesting > 0; 155 } 156 } 157 continue; 158 } 159 if (bytes == 2 && token[0] == '~') { // argument substitution 160 std::size_t index = token[1] - 'A'; 161 if (index >= args.size()) { 162 continue; 163 } 164 std::size_t prev{j}; 165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) { 166 --prev; 167 } 168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 && 169 replacement_.TokenAt(prev - 1)[0] == 170 '#') { // stringify argument without macro replacement 171 std::size_t resultSize{result.SizeInTokens()}; 172 while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) { 173 result.pop_back(); 174 } 175 CHECK(resultSize > 0 && 176 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1)); 177 result.pop_back(); 178 result.Put(Stringify(args[index], prescanner.allSources())); 179 } else { 180 const TokenSequence *arg{&args[index]}; 181 std::optional<TokenSequence> replaced; 182 // Don't replace macros in the actual argument if it is preceded or 183 // followed by the token-pasting operator ## in the replacement text. 184 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) { 185 auto next{replacement_.SkipBlanks(j + 1)}; 186 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) { 187 // Apply macro replacement to the actual argument 188 replaced = 189 prescanner.preprocessor().MacroReplacement(*arg, prescanner); 190 if (replaced) { 191 arg = &*replaced; 192 } 193 } 194 } 195 result.Put(DEREF(arg)); 196 } 197 } else if (bytes == 11 && isVariadic_ && 198 token.ToString() == "__VA_ARGS__") { 199 Provenance commaProvenance{ 200 prescanner.preprocessor().allSources().CompilerInsertionProvenance( 201 ',')}; 202 for (std::size_t k{argumentCount_}; k < args.size(); ++k) { 203 if (k > argumentCount_) { 204 result.Put(","s, commaProvenance); 205 } 206 result.Put(args[k]); 207 } 208 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && 209 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && 210 parenthesesNesting == 0) { 211 parenthesesNesting = 1; 212 skipping = args.size() == argumentCount_; 213 ++j; 214 } else { 215 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { 216 ++parenthesesNesting; 217 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { 218 if (--parenthesesNesting == 0) { 219 skipping = false; 220 continue; 221 } 222 } 223 result.Put(replacement_, j); 224 } 225 } 226 return TokenPasting(std::move(result)); 227 } 228 229 static std::string FormatTime(const std::time_t &now, const char *format) { 230 char buffer[16]; 231 return {buffer, 232 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; 233 } 234 235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} { 236 // Capture current local date & time once now to avoid having the values 237 // of __DATE__ or __TIME__ change during compilation. 238 std::time_t now; 239 std::time(&now); 240 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904" 241 Definition{FormatTime(now, "\"%h %e %Y\""), allSources}); 242 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60" 243 Definition{FormatTime(now, "\"%T\""), allSources}); 244 // The values of these predefined macros depend on their invocation sites. 245 definitions_.emplace( 246 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources}); 247 definitions_.emplace( 248 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources}); 249 } 250 251 void Preprocessor::Define(std::string macro, std::string value) { 252 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); 253 } 254 255 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } 256 257 std::optional<TokenSequence> Preprocessor::MacroReplacement( 258 const TokenSequence &input, Prescanner &prescanner) { 259 // Do quick scan for any use of a defined name. 260 std::size_t tokens{input.SizeInTokens()}; 261 std::size_t j; 262 for (j = 0; j < tokens; ++j) { 263 CharBlock token{input.TokenAt(j)}; 264 if (!token.empty() && IsLegalIdentifierStart(token[0]) && 265 IsNameDefined(token)) { 266 break; 267 } 268 } 269 if (j == tokens) { 270 return std::nullopt; // input contains nothing that would be replaced 271 } 272 TokenSequence result{input, 0, j}; 273 for (; j < tokens; ++j) { 274 const CharBlock &token{input.TokenAt(j)}; 275 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { 276 result.Put(input, j); 277 continue; 278 } 279 auto it{definitions_.find(token)}; 280 if (it == definitions_.end()) { 281 result.Put(input, j); 282 continue; 283 } 284 Definition &def{it->second}; 285 if (def.isDisabled()) { 286 result.Put(input, j); 287 continue; 288 } 289 if (!def.isFunctionLike()) { 290 if (def.isPredefined()) { 291 std::string name{def.replacement().TokenAt(0).ToString()}; 292 std::string repl; 293 if (name == "__FILE__") { 294 repl = "\""s + 295 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; 296 } else if (name == "__LINE__") { 297 std::string buf; 298 llvm::raw_string_ostream ss{buf}; 299 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); 300 repl = ss.str(); 301 } 302 if (!repl.empty()) { 303 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; 304 ProvenanceRange call{allSources_.AddMacroCall( 305 insert, input.GetTokenProvenanceRange(j), repl)}; 306 result.Put(repl, call.start()); 307 continue; 308 } 309 } 310 def.set_isDisabled(true); 311 TokenSequence replaced{ 312 TokenPasting(ReplaceMacros(def.replacement(), prescanner))}; 313 def.set_isDisabled(false); 314 if (!replaced.empty()) { 315 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 316 ProvenanceRange use{input.GetTokenProvenanceRange(j)}; 317 ProvenanceRange newRange{ 318 allSources_.AddMacroCall(from, use, replaced.ToString())}; 319 result.Put(replaced, newRange); 320 } 321 continue; 322 } 323 // Possible function-like macro call. Skip spaces and newlines to see 324 // whether '(' is next. 325 std::size_t k{j}; 326 bool leftParen{false}; 327 while (++k < tokens) { 328 const CharBlock &lookAhead{input.TokenAt(k)}; 329 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { 330 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; 331 break; 332 } 333 } 334 if (!leftParen) { 335 result.Put(input, j); 336 continue; 337 } 338 std::vector<std::size_t> argStart{++k}; 339 for (int nesting{0}; k < tokens; ++k) { 340 CharBlock token{input.TokenAt(k)}; 341 if (token.size() == 1) { 342 char ch{token[0]}; 343 if (ch == '(') { 344 ++nesting; 345 } else if (ch == ')') { 346 if (nesting == 0) { 347 break; 348 } 349 --nesting; 350 } else if (ch == ',' && nesting == 0) { 351 argStart.push_back(k + 1); 352 } 353 } 354 } 355 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) { 356 // Subtle: () is zero arguments, not one empty argument, 357 // unless one argument was expected. 358 argStart.clear(); 359 } 360 if (k >= tokens || argStart.size() < def.argumentCount() || 361 (argStart.size() > def.argumentCount() && !def.isVariadic())) { 362 result.Put(input, j); 363 continue; 364 } 365 std::vector<TokenSequence> args; 366 for (std::size_t n{0}; n < argStart.size(); ++n) { 367 std::size_t at{argStart[n]}; 368 std::size_t count{ 369 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; 370 args.emplace_back(TokenSequence(input, at, count)); 371 } 372 def.set_isDisabled(true); 373 TokenSequence replaced{ 374 ReplaceMacros(def.Apply(args, prescanner), prescanner)}; 375 def.set_isDisabled(false); 376 if (!replaced.empty()) { 377 ProvenanceRange from{def.replacement().GetProvenanceRange()}; 378 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; 379 ProvenanceRange newRange{ 380 allSources_.AddMacroCall(from, use, replaced.ToString())}; 381 result.Put(replaced, newRange); 382 } 383 j = k; // advance to the terminal ')' 384 } 385 return result; 386 } 387 388 TokenSequence Preprocessor::ReplaceMacros( 389 const TokenSequence &tokens, Prescanner &prescanner) { 390 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) { 391 return std::move(*repl); 392 } 393 return tokens; 394 } 395 396 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) { 397 std::size_t tokens{dir.SizeInTokens()}; 398 std::size_t j{dir.SkipBlanks(0)}; 399 if (j == tokens) { 400 return; 401 } 402 CHECK(prescanner); // TODO: change to reference 403 if (dir.TokenAt(j).ToString() != "#") { 404 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); 405 return; 406 } 407 j = dir.SkipBlanks(j + 1); 408 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { 409 --tokens; 410 } 411 if (j == tokens) { 412 return; 413 } 414 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { 415 return; // treat like #line, ignore it 416 } 417 std::size_t dirOffset{j}; 418 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; 419 j = dir.SkipBlanks(j + 1); 420 CharBlock nameToken; 421 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { 422 nameToken = dir.TokenAt(j); 423 } 424 if (dirName == "line") { 425 // #line is ignored 426 } else if (dirName == "define") { 427 if (nameToken.empty()) { 428 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 429 "#define: missing or invalid name"_err_en_US); 430 return; 431 } 432 nameToken = SaveTokenAsName(nameToken); 433 definitions_.erase(nameToken); 434 if (++j < tokens && dir.TokenAt(j).size() == 1 && 435 dir.TokenAt(j)[0] == '(') { 436 j = dir.SkipBlanks(j + 1); 437 std::vector<std::string> argName; 438 bool isVariadic{false}; 439 if (dir.TokenAt(j).ToString() != ")") { 440 while (true) { 441 std::string an{dir.TokenAt(j).ToString()}; 442 if (an == "...") { 443 isVariadic = true; 444 } else { 445 if (an.empty() || !IsLegalIdentifierStart(an[0])) { 446 prescanner->Say(dir.GetTokenProvenanceRange(j), 447 "#define: missing or invalid argument name"_err_en_US); 448 return; 449 } 450 argName.push_back(an); 451 } 452 j = dir.SkipBlanks(j + 1); 453 if (j == tokens) { 454 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 455 "#define: malformed argument list"_err_en_US); 456 return; 457 } 458 std::string punc{dir.TokenAt(j).ToString()}; 459 if (punc == ")") { 460 break; 461 } 462 if (isVariadic || punc != ",") { 463 prescanner->Say(dir.GetTokenProvenanceRange(j), 464 "#define: malformed argument list"_err_en_US); 465 return; 466 } 467 j = dir.SkipBlanks(j + 1); 468 if (j == tokens) { 469 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1), 470 "#define: malformed argument list"_err_en_US); 471 return; 472 } 473 } 474 if (std::set<std::string>(argName.begin(), argName.end()).size() != 475 argName.size()) { 476 prescanner->Say(dir.GetTokenProvenance(dirOffset), 477 "#define: argument names are not distinct"_err_en_US); 478 return; 479 } 480 } 481 j = dir.SkipBlanks(j + 1); 482 definitions_.emplace(std::make_pair( 483 nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); 484 } else { 485 j = dir.SkipBlanks(j + 1); 486 definitions_.emplace( 487 std::make_pair(nameToken, Definition{dir, j, tokens - j})); 488 } 489 } else if (dirName == "undef") { 490 if (nameToken.empty()) { 491 prescanner->Say( 492 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 493 "# missing or invalid name"_err_en_US); 494 } else { 495 if (dir.IsAnythingLeft(++j)) { 496 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 497 "#undef: excess tokens at end of directive"_en_US); 498 } else { 499 definitions_.erase(nameToken); 500 } 501 } 502 } else if (dirName == "ifdef" || dirName == "ifndef") { 503 bool doThen{false}; 504 if (nameToken.empty()) { 505 prescanner->Say( 506 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 507 "#%s: missing name"_err_en_US, dirName); 508 } else { 509 if (dir.IsAnythingLeft(++j)) { 510 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 511 "#%s: excess tokens at end of directive"_en_US, dirName); 512 } 513 doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); 514 } 515 if (doThen) { 516 ifStack_.push(CanDeadElseAppear::Yes); 517 } else { 518 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 519 dir.GetTokenProvenance(dirOffset)); 520 } 521 } else if (dirName == "if") { 522 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { 523 ifStack_.push(CanDeadElseAppear::Yes); 524 } else { 525 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, 526 dir.GetTokenProvenanceRange(dirOffset)); 527 } 528 } else if (dirName == "else") { 529 if (dir.IsAnythingLeft(j)) { 530 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 531 "#else: excess tokens at end of directive"_en_US); 532 } else if (ifStack_.empty()) { 533 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 534 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); 535 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 536 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 537 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); 538 } else { 539 ifStack_.pop(); 540 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, 541 dir.GetTokenProvenanceRange(dirOffset)); 542 } 543 } else if (dirName == "elif") { 544 if (ifStack_.empty()) { 545 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 546 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); 547 } else if (ifStack_.top() != CanDeadElseAppear::Yes) { 548 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 549 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); 550 } else { 551 ifStack_.pop(); 552 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, 553 dir.GetTokenProvenanceRange(dirOffset)); 554 } 555 } else if (dirName == "endif") { 556 if (dir.IsAnythingLeft(j)) { 557 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 558 "#endif: excess tokens at end of directive"_en_US); 559 } else if (ifStack_.empty()) { 560 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 561 "#endif: no #if, #ifdef, or #ifndef"_err_en_US); 562 } else { 563 ifStack_.pop(); 564 } 565 } else if (dirName == "error") { 566 prescanner->Say( 567 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 568 "%s"_err_en_US, dir.ToString()); 569 } else if (dirName == "warning" || dirName == "comment" || 570 dirName == "note") { 571 prescanner->Say( 572 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 573 "%s"_en_US, dir.ToString()); 574 } else if (dirName == "include") { 575 if (j == tokens) { 576 prescanner->Say( 577 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), 578 "#include: missing name of file to include"_err_en_US); 579 return; 580 } 581 std::string include; 582 std::optional<std::string> prependPath; 583 if (dir.TokenAt(j).ToString() == "<") { // #include <foo> 584 std::size_t k{j + 1}; 585 if (k >= tokens) { 586 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 587 "#include: file name missing"_err_en_US); 588 return; 589 } 590 while (k < tokens && dir.TokenAt(k) != ">") { 591 ++k; 592 } 593 if (k >= tokens) { 594 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 595 "#include: expected '>' at end of included file"_en_US); 596 } 597 TokenSequence braced{dir, j + 1, k - j - 1}; 598 include = ReplaceMacros(braced, *prescanner).ToString(); 599 j = k; 600 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" && 601 include.substr(include.size() - 1, 1) == "\"") { // #include "foo" 602 include = include.substr(1, include.size() - 2); 603 // #include "foo" starts search in directory of file containing 604 // the directive 605 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()}; 606 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) { 607 prependPath = DirectoryName(currentFile->path()); 608 } 609 } else { 610 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), 611 "#include: expected name of file to include"_err_en_US); 612 return; 613 } 614 if (include.empty()) { 615 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 616 "#include: empty include file name"_err_en_US); 617 return; 618 } 619 j = dir.SkipBlanks(j + 1); 620 if (j < tokens && dir.TokenAt(j).ToString() != "!") { 621 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j), 622 "#include: extra stuff ignored after file name"_en_US); 623 } 624 std::string buf; 625 llvm::raw_string_ostream error{buf}; 626 const SourceFile *included{allSources_.Open(include, error, prependPath)}; 627 if (!included) { 628 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 629 "#include: %s"_err_en_US, error.str()); 630 } else if (included->bytes() > 0) { 631 ProvenanceRange fileRange{ 632 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; 633 Prescanner{*prescanner} 634 .set_encoding(included->encoding()) 635 .Prescan(fileRange); 636 } 637 } else { 638 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset), 639 "#%s: unknown or unimplemented directive"_err_en_US, dirName); 640 } 641 } 642 643 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { 644 names_.push_back(t.ToString()); 645 return {names_.back().data(), names_.back().size()}; 646 } 647 648 bool Preprocessor::IsNameDefined(const CharBlock &token) { 649 return definitions_.find(token) != definitions_.end(); 650 } 651 652 static std::string GetDirectiveName( 653 const TokenSequence &line, std::size_t *rest) { 654 std::size_t tokens{line.SizeInTokens()}; 655 std::size_t j{line.SkipBlanks(0)}; 656 if (j == tokens || line.TokenAt(j).ToString() != "#") { 657 *rest = tokens; 658 return ""; 659 } 660 j = line.SkipBlanks(j + 1); 661 if (j == tokens) { 662 *rest = tokens; 663 return ""; 664 } 665 *rest = line.SkipBlanks(j + 1); 666 return ToLowerCaseLetters(line.TokenAt(j).ToString()); 667 } 668 669 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, 670 IsElseActive isElseActive, Prescanner *prescanner, 671 ProvenanceRange provenanceRange) { 672 int nesting{0}; 673 while (!prescanner->IsAtEnd()) { 674 if (!prescanner->IsNextLinePreprocessorDirective()) { 675 prescanner->NextLine(); 676 continue; 677 } 678 TokenSequence line{prescanner->TokenizePreprocessorDirective()}; 679 std::size_t rest{0}; 680 std::string dn{GetDirectiveName(line, &rest)}; 681 if (dn == "ifdef" || dn == "ifndef" || dn == "if") { 682 ++nesting; 683 } else if (dn == "endif") { 684 if (nesting-- == 0) { 685 return; 686 } 687 } else if (isElseActive == IsElseActive::Yes && nesting == 0) { 688 if (dn == "else") { 689 ifStack_.push(CanDeadElseAppear::No); 690 return; 691 } 692 if (dn == "elif" && 693 IsIfPredicateTrue( 694 line, rest, line.SizeInTokens() - rest, prescanner)) { 695 ifStack_.push(CanDeadElseAppear::Yes); 696 return; 697 } 698 } 699 } 700 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); 701 } 702 703 // Precedence level codes used here to accommodate mixed Fortran and C: 704 // 15: parentheses and constants, logical !, bitwise ~ 705 // 14: unary + and - 706 // 13: ** 707 // 12: *, /, % (modulus) 708 // 11: + and - 709 // 10: << and >> 710 // 9: bitwise & 711 // 8: bitwise ^ 712 // 7: bitwise | 713 // 6: relations (.EQ., ==, &c.) 714 // 5: .NOT. 715 // 4: .AND., && 716 // 3: .OR., || 717 // 2: .EQV. and .NEQV. / .XOR. 718 // 1: ? : 719 // 0: , 720 static std::int64_t ExpressionValue(const TokenSequence &token, 721 int minimumPrecedence, std::size_t *atToken, 722 std::optional<Message> *error) { 723 enum Operator { 724 PARENS, 725 CONST, 726 NOTZERO, // ! 727 COMPLEMENT, // ~ 728 UPLUS, 729 UMINUS, 730 POWER, 731 TIMES, 732 DIVIDE, 733 MODULUS, 734 ADD, 735 SUBTRACT, 736 LEFTSHIFT, 737 RIGHTSHIFT, 738 BITAND, 739 BITXOR, 740 BITOR, 741 LT, 742 LE, 743 EQ, 744 NE, 745 GE, 746 GT, 747 NOT, 748 AND, 749 OR, 750 EQV, 751 NEQV, 752 SELECT, 753 COMMA 754 }; 755 static const int precedence[]{ 756 15, 15, 15, 15, // (), 6, !, ~ 757 14, 14, // unary +, - 758 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 759 9, 8, 7, // &, ^, | 760 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 761 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 762 1, 0 // ?: and , 763 }; 764 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 765 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; 766 767 static std::map<std::string, enum Operator> opNameMap; 768 if (opNameMap.empty()) { 769 opNameMap["("] = PARENS; 770 opNameMap["!"] = NOTZERO; 771 opNameMap["~"] = COMPLEMENT; 772 opNameMap["**"] = POWER; 773 opNameMap["*"] = TIMES; 774 opNameMap["/"] = DIVIDE; 775 opNameMap["%"] = MODULUS; 776 opNameMap["+"] = ADD; 777 opNameMap["-"] = SUBTRACT; 778 opNameMap["<<"] = LEFTSHIFT; 779 opNameMap[">>"] = RIGHTSHIFT; 780 opNameMap["&"] = BITAND; 781 opNameMap["^"] = BITXOR; 782 opNameMap["|"] = BITOR; 783 opNameMap[".lt."] = opNameMap["<"] = LT; 784 opNameMap[".le."] = opNameMap["<="] = LE; 785 opNameMap[".eq."] = opNameMap["=="] = EQ; 786 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; 787 opNameMap[".ge."] = opNameMap[">="] = GE; 788 opNameMap[".gt."] = opNameMap[">"] = GT; 789 opNameMap[".not."] = NOT; 790 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; 791 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; 792 opNameMap[".eqv."] = EQV; 793 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; 794 opNameMap["?"] = SELECT; 795 opNameMap[","] = COMMA; 796 } 797 798 std::size_t tokens{token.SizeInTokens()}; 799 CHECK(tokens > 0); 800 if (*atToken >= tokens) { 801 *error = 802 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; 803 return 0; 804 } 805 806 // Parse and evaluate a primary or a unary operator and its operand. 807 std::size_t opAt{*atToken}; 808 std::string t{token.TokenAt(opAt).ToString()}; 809 enum Operator op; 810 std::int64_t left{0}; 811 if (t == "(") { 812 op = PARENS; 813 } else if (IsDecimalDigit(t[0])) { 814 op = CONST; 815 std::size_t consumed{0}; 816 left = std::stoll(t, &consumed, 0 /*base to be detected*/); 817 if (consumed < t.size()) { 818 *error = Message{token.GetTokenProvenanceRange(opAt), 819 "Uninterpretable numeric constant '%s'"_err_en_US, t}; 820 return 0; 821 } 822 } else if (IsLegalIdentifierStart(t[0])) { 823 // undefined macro name -> zero 824 // TODO: BOZ constants? 825 op = CONST; 826 } else if (t == "+") { 827 op = UPLUS; 828 } else if (t == "-") { 829 op = UMINUS; 830 } else if (t == "." && *atToken + 2 < tokens && 831 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && 832 token.TokenAt(*atToken + 2).ToString() == ".") { 833 op = NOT; 834 *atToken += 2; 835 } else { 836 auto it{opNameMap.find(t)}; 837 if (it != opNameMap.end()) { 838 op = it->second; 839 } else { 840 *error = Message{token.GetTokenProvenanceRange(opAt), 841 "operand expected in expression"_err_en_US}; 842 return 0; 843 } 844 } 845 if (precedence[op] < minimumPrecedence) { 846 *error = Message{token.GetTokenProvenanceRange(opAt), 847 "operator precedence error"_err_en_US}; 848 return 0; 849 } 850 ++*atToken; 851 if (op != CONST) { 852 left = ExpressionValue(token, operandPrecedence[op], atToken, error); 853 if (*error) { 854 return 0; 855 } 856 switch (op) { 857 case PARENS: 858 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { 859 ++*atToken; 860 break; 861 } 862 if (*atToken >= tokens) { 863 *error = Message{token.GetProvenanceRange(), 864 "')' missing from expression"_err_en_US}; 865 } else { 866 *error = Message{ 867 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; 868 } 869 return 0; 870 case NOTZERO: 871 left = !left; 872 break; 873 case COMPLEMENT: 874 left = ~left; 875 break; 876 case UPLUS: 877 break; 878 case UMINUS: 879 left = -left; 880 break; 881 case NOT: 882 left = -!left; 883 break; 884 default: 885 CRASH_NO_CASE; 886 } 887 } 888 889 // Parse and evaluate binary operators and their second operands, if present. 890 while (*atToken < tokens) { 891 int advance{1}; 892 t = token.TokenAt(*atToken).ToString(); 893 if (t == "." && *atToken + 2 < tokens && 894 token.TokenAt(*atToken + 2).ToString() == ".") { 895 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; 896 advance = 3; 897 } 898 auto it{opNameMap.find(t)}; 899 if (it == opNameMap.end()) { 900 break; 901 } 902 op = it->second; 903 if (op < POWER || precedence[op] < minimumPrecedence) { 904 break; 905 } 906 opAt = *atToken; 907 *atToken += advance; 908 909 std::int64_t right{ 910 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 911 if (*error) { 912 return 0; 913 } 914 915 switch (op) { 916 case POWER: 917 if (left == 0) { 918 if (right < 0) { 919 *error = Message{token.GetTokenProvenanceRange(opAt), 920 "0 ** negative power"_err_en_US}; 921 } 922 } else if (left != 1 && right != 1) { 923 if (right <= 0) { 924 left = !right; 925 } else { 926 std::int64_t power{1}; 927 for (; right > 0; --right) { 928 if ((power * left) / left != power) { 929 *error = Message{token.GetTokenProvenanceRange(opAt), 930 "overflow in exponentation"_err_en_US}; 931 left = 1; 932 } 933 power *= left; 934 } 935 left = power; 936 } 937 } 938 break; 939 case TIMES: 940 if (left != 0 && right != 0 && ((left * right) / left) != right) { 941 *error = Message{token.GetTokenProvenanceRange(opAt), 942 "overflow in multiplication"_err_en_US}; 943 } 944 left = left * right; 945 break; 946 case DIVIDE: 947 if (right == 0) { 948 *error = Message{ 949 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; 950 left = 0; 951 } else { 952 left = left / right; 953 } 954 break; 955 case MODULUS: 956 if (right == 0) { 957 *error = Message{ 958 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; 959 left = 0; 960 } else { 961 left = left % right; 962 } 963 break; 964 case ADD: 965 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { 966 *error = Message{token.GetTokenProvenanceRange(opAt), 967 "overflow in addition"_err_en_US}; 968 } 969 left = left + right; 970 break; 971 case SUBTRACT: 972 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { 973 *error = Message{token.GetTokenProvenanceRange(opAt), 974 "overflow in subtraction"_err_en_US}; 975 } 976 left = left - right; 977 break; 978 case LEFTSHIFT: 979 if (right < 0 || right > 64) { 980 *error = Message{token.GetTokenProvenanceRange(opAt), 981 "bad left shift count"_err_en_US}; 982 } 983 left = right >= 64 ? 0 : left << right; 984 break; 985 case RIGHTSHIFT: 986 if (right < 0 || right > 64) { 987 *error = Message{token.GetTokenProvenanceRange(opAt), 988 "bad right shift count"_err_en_US}; 989 } 990 left = right >= 64 ? 0 : left >> right; 991 break; 992 case BITAND: 993 case AND: 994 left = left & right; 995 break; 996 case BITXOR: 997 left = left ^ right; 998 break; 999 case BITOR: 1000 case OR: 1001 left = left | right; 1002 break; 1003 case LT: 1004 left = -(left < right); 1005 break; 1006 case LE: 1007 left = -(left <= right); 1008 break; 1009 case EQ: 1010 left = -(left == right); 1011 break; 1012 case NE: 1013 left = -(left != right); 1014 break; 1015 case GE: 1016 left = -(left >= right); 1017 break; 1018 case GT: 1019 left = -(left > right); 1020 break; 1021 case EQV: 1022 left = -(!left == !right); 1023 break; 1024 case NEQV: 1025 left = -(!left != !right); 1026 break; 1027 case SELECT: 1028 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { 1029 *error = Message{token.GetTokenProvenanceRange(opAt), 1030 "':' required in selection expression"_err_en_US}; 1031 return 0; 1032 } else { 1033 ++*atToken; 1034 std::int64_t third{ 1035 ExpressionValue(token, operandPrecedence[op], atToken, error)}; 1036 left = left != 0 ? right : third; 1037 } 1038 break; 1039 case COMMA: 1040 left = right; 1041 break; 1042 default: 1043 CRASH_NO_CASE; 1044 } 1045 } 1046 return left; 1047 } 1048 1049 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, 1050 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) { 1051 TokenSequence expr1{expr, first, exprTokens}; 1052 if (expr1.HasBlanks()) { 1053 expr1.RemoveBlanks(); 1054 } 1055 TokenSequence expr2; 1056 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) { 1057 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { 1058 CharBlock name; 1059 if (j + 3 < expr1.SizeInTokens() && 1060 expr1.TokenAt(j + 1).ToString() == "(" && 1061 expr1.TokenAt(j + 3).ToString() == ")") { 1062 name = expr1.TokenAt(j + 2); 1063 j += 3; 1064 } else if (j + 1 < expr1.SizeInTokens() && 1065 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) { 1066 name = expr1.TokenAt(++j); 1067 } 1068 if (!name.empty()) { 1069 char truth{IsNameDefined(name) ? '1' : '0'}; 1070 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); 1071 continue; 1072 } 1073 } 1074 expr2.Put(expr1, j); 1075 } 1076 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)}; 1077 if (expr3.HasBlanks()) { 1078 expr3.RemoveBlanks(); 1079 } 1080 if (expr3.empty()) { 1081 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); 1082 return false; 1083 } 1084 std::size_t atToken{0}; 1085 std::optional<Message> error; 1086 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0}; 1087 if (error) { 1088 prescanner->Say(std::move(*error)); 1089 } else if (atToken < expr3.SizeInTokens() && 1090 expr3.TokenAt(atToken).ToString() != "!") { 1091 prescanner->Say(expr3.GetIntervalProvenanceRange( 1092 atToken, expr3.SizeInTokens() - atToken), 1093 atToken == 0 ? "could not parse any expression"_err_en_US 1094 : "excess characters after expression"_err_en_US); 1095 } 1096 return result; 1097 } 1098 } // namespace Fortran::parser 1099