1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APInt.h" 47 #include "llvm/ADT/DenseMap.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/SmallVector.h" 50 #include "llvm/ADT/STLExtras.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/StringSwitch.h" 53 #include "llvm/Support/Capacity.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <memory> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 using namespace clang; 65 66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 67 68 //===----------------------------------------------------------------------===// 69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 70 71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 72 DiagnosticsEngine &diags, LangOptions &opts, 73 SourceManager &SM, MemoryBufferCache &PCMCache, 74 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 75 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 76 TranslationUnitKind TUKind) 77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 79 PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)), 80 HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), 81 ExternalSource(nullptr), Identifiers(opts, IILookup), 82 PragmaHandlers(new PragmaNamespace(StringRef())), 83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 84 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 86 CodeCompletionReached(false), CodeCompletionII(nullptr), 87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), 89 CurLexerSubmodule(nullptr), Callbacks(nullptr), 90 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 91 Record(nullptr), MIChainHead(nullptr) { 92 OwnsHeaderSearch = OwnsHeaders; 93 94 CounterValue = 0; // __COUNTER__ starts at 0. 95 96 // Clear stats. 97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 98 NumIf = NumElse = NumEndif = 0; 99 NumEnteredSourceFiles = 0; 100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 102 MaxIncludeStackDepth = 0; 103 NumSkipped = 0; 104 105 // Default to discarding comments. 106 KeepComments = false; 107 KeepMacroComments = false; 108 SuppressIncludeNotFoundError = false; 109 110 // Macro expansion is enabled. 111 DisableMacroExpansion = false; 112 MacroExpansionInDirectivesOverride = false; 113 InMacroArgs = false; 114 InMacroArgPreExpansion = false; 115 NumCachedTokenLexers = 0; 116 PragmasEnabled = true; 117 ParsingIfOrElifDirective = false; 118 PreprocessedOutput = false; 119 120 CachedLexPos = 0; 121 122 // We haven't read anything from the external source. 123 ReadMacrosFromExternalSource = false; 124 125 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of 126 // a macro. They get unpoisoned where it is allowed. 127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 129 if (getLangOpts().CPlusPlus2a) { 130 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); 131 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); 132 } else { 133 Ident__VA_OPT__ = nullptr; 134 } 135 136 // Initialize the pragma handlers. 137 RegisterBuiltinPragmas(); 138 139 // Initialize builtin macros like __LINE__ and friends. 140 RegisterBuiltinMacros(); 141 142 if(LangOpts.Borland) { 143 Ident__exception_info = getIdentifierInfo("_exception_info"); 144 Ident___exception_info = getIdentifierInfo("__exception_info"); 145 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 146 Ident__exception_code = getIdentifierInfo("_exception_code"); 147 Ident___exception_code = getIdentifierInfo("__exception_code"); 148 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 149 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 150 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 151 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 152 } else { 153 Ident__exception_info = Ident__exception_code = nullptr; 154 Ident__abnormal_termination = Ident___exception_info = nullptr; 155 Ident___exception_code = Ident___abnormal_termination = nullptr; 156 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 157 Ident_AbnormalTermination = nullptr; 158 } 159 160 if (this->PPOpts->GeneratePreamble) 161 PreambleConditionalStack.startRecording(); 162 } 163 164 Preprocessor::~Preprocessor() { 165 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 166 167 IncludeMacroStack.clear(); 168 169 // Destroy any macro definitions. 170 while (MacroInfoChain *I = MIChainHead) { 171 MIChainHead = I->Next; 172 I->~MacroInfoChain(); 173 } 174 175 // Free any cached macro expanders. 176 // This populates MacroArgCache, so all TokenLexers need to be destroyed 177 // before the code below that frees up the MacroArgCache list. 178 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 179 CurTokenLexer.reset(); 180 181 // Free any cached MacroArgs. 182 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 183 ArgList = ArgList->deallocate(); 184 185 // Delete the header search info, if we own it. 186 if (OwnsHeaderSearch) 187 delete &HeaderInfo; 188 } 189 190 void Preprocessor::Initialize(const TargetInfo &Target, 191 const TargetInfo *AuxTarget) { 192 assert((!this->Target || this->Target == &Target) && 193 "Invalid override of target information"); 194 this->Target = &Target; 195 196 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 197 "Invalid override of aux target information."); 198 this->AuxTarget = AuxTarget; 199 200 // Initialize information about built-ins. 201 BuiltinInfo.InitializeTarget(Target, AuxTarget); 202 HeaderInfo.setTarget(Target); 203 } 204 205 void Preprocessor::InitializeForModelFile() { 206 NumEnteredSourceFiles = 0; 207 208 // Reset pragmas 209 PragmaHandlersBackup = std::move(PragmaHandlers); 210 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 211 RegisterBuiltinPragmas(); 212 213 // Reset PredefinesFileID 214 PredefinesFileID = FileID(); 215 } 216 217 void Preprocessor::FinalizeForModelFile() { 218 NumEnteredSourceFiles = 1; 219 220 PragmaHandlers = std::move(PragmaHandlersBackup); 221 } 222 223 void Preprocessor::setPTHManager(PTHManager* pm) { 224 PTH.reset(pm); 225 FileMgr.addStatCache(PTH->createStatCache()); 226 } 227 228 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 229 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 230 << getSpelling(Tok) << "'"; 231 232 if (!DumpFlags) return; 233 234 llvm::errs() << "\t"; 235 if (Tok.isAtStartOfLine()) 236 llvm::errs() << " [StartOfLine]"; 237 if (Tok.hasLeadingSpace()) 238 llvm::errs() << " [LeadingSpace]"; 239 if (Tok.isExpandDisabled()) 240 llvm::errs() << " [ExpandDisabled]"; 241 if (Tok.needsCleaning()) { 242 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 243 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 244 << "']"; 245 } 246 247 llvm::errs() << "\tLoc=<"; 248 DumpLocation(Tok.getLocation()); 249 llvm::errs() << ">"; 250 } 251 252 void Preprocessor::DumpLocation(SourceLocation Loc) const { 253 Loc.dump(SourceMgr); 254 } 255 256 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 257 llvm::errs() << "MACRO: "; 258 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 259 DumpToken(MI.getReplacementToken(i)); 260 llvm::errs() << " "; 261 } 262 llvm::errs() << "\n"; 263 } 264 265 void Preprocessor::PrintStats() { 266 llvm::errs() << "\n*** Preprocessor Stats:\n"; 267 llvm::errs() << NumDirectives << " directives found:\n"; 268 llvm::errs() << " " << NumDefined << " #define.\n"; 269 llvm::errs() << " " << NumUndefined << " #undef.\n"; 270 llvm::errs() << " #include/#include_next/#import:\n"; 271 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 272 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 273 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 274 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 275 llvm::errs() << " " << NumEndif << " #endif.\n"; 276 llvm::errs() << " " << NumPragma << " #pragma.\n"; 277 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 278 279 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 280 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 281 << NumFastMacroExpanded << " on the fast path.\n"; 282 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 283 << " token paste (##) operations performed, " 284 << NumFastTokenPaste << " on the fast path.\n"; 285 286 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 287 288 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 289 llvm::errs() << "\n Macro Expanded Tokens: " 290 << llvm::capacity_in_bytes(MacroExpandedTokens); 291 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 292 // FIXME: List information for all submodules. 293 llvm::errs() << "\n Macros: " 294 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 295 llvm::errs() << "\n #pragma push_macro Info: " 296 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 297 llvm::errs() << "\n Poison Reasons: " 298 << llvm::capacity_in_bytes(PoisonReasons); 299 llvm::errs() << "\n Comment Handlers: " 300 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 301 } 302 303 Preprocessor::macro_iterator 304 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 305 if (IncludeExternalMacros && ExternalSource && 306 !ReadMacrosFromExternalSource) { 307 ReadMacrosFromExternalSource = true; 308 ExternalSource->ReadDefinedMacros(); 309 } 310 311 // Make sure we cover all macros in visible modules. 312 for (const ModuleMacro &Macro : ModuleMacros) 313 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 314 315 return CurSubmoduleState->Macros.begin(); 316 } 317 318 size_t Preprocessor::getTotalMemory() const { 319 return BP.getTotalMemory() 320 + llvm::capacity_in_bytes(MacroExpandedTokens) 321 + Predefines.capacity() /* Predefines buffer. */ 322 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 323 // and ModuleMacros. 324 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 325 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 326 + llvm::capacity_in_bytes(PoisonReasons) 327 + llvm::capacity_in_bytes(CommentHandlers); 328 } 329 330 Preprocessor::macro_iterator 331 Preprocessor::macro_end(bool IncludeExternalMacros) const { 332 if (IncludeExternalMacros && ExternalSource && 333 !ReadMacrosFromExternalSource) { 334 ReadMacrosFromExternalSource = true; 335 ExternalSource->ReadDefinedMacros(); 336 } 337 338 return CurSubmoduleState->Macros.end(); 339 } 340 341 /// \brief Compares macro tokens with a specified token value sequence. 342 static bool MacroDefinitionEquals(const MacroInfo *MI, 343 ArrayRef<TokenValue> Tokens) { 344 return Tokens.size() == MI->getNumTokens() && 345 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 346 } 347 348 StringRef Preprocessor::getLastMacroWithSpelling( 349 SourceLocation Loc, 350 ArrayRef<TokenValue> Tokens) const { 351 SourceLocation BestLocation; 352 StringRef BestSpelling; 353 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 354 I != E; ++I) { 355 const MacroDirective::DefInfo 356 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 357 if (!Def || !Def.getMacroInfo()) 358 continue; 359 if (!Def.getMacroInfo()->isObjectLike()) 360 continue; 361 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 362 continue; 363 SourceLocation Location = Def.getLocation(); 364 // Choose the macro defined latest. 365 if (BestLocation.isInvalid() || 366 (Location.isValid() && 367 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 368 BestLocation = Location; 369 BestSpelling = I->first->getName(); 370 } 371 } 372 return BestSpelling; 373 } 374 375 void Preprocessor::recomputeCurLexerKind() { 376 if (CurLexer) 377 CurLexerKind = CLK_Lexer; 378 else if (CurPTHLexer) 379 CurLexerKind = CLK_PTHLexer; 380 else if (CurTokenLexer) 381 CurLexerKind = CLK_TokenLexer; 382 else 383 CurLexerKind = CLK_CachingLexer; 384 } 385 386 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 387 unsigned CompleteLine, 388 unsigned CompleteColumn) { 389 assert(File); 390 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 391 assert(!CodeCompletionFile && "Already set"); 392 393 using llvm::MemoryBuffer; 394 395 // Load the actual file's contents. 396 bool Invalid = false; 397 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 398 if (Invalid) 399 return true; 400 401 // Find the byte position of the truncation point. 402 const char *Position = Buffer->getBufferStart(); 403 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 404 for (; *Position; ++Position) { 405 if (*Position != '\r' && *Position != '\n') 406 continue; 407 408 // Eat \r\n or \n\r as a single line. 409 if ((Position[1] == '\r' || Position[1] == '\n') && 410 Position[0] != Position[1]) 411 ++Position; 412 ++Position; 413 break; 414 } 415 } 416 417 Position += CompleteColumn - 1; 418 419 // If pointing inside the preamble, adjust the position at the beginning of 420 // the file after the preamble. 421 if (SkipMainFilePreamble.first && 422 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 423 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 424 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 425 } 426 427 if (Position > Buffer->getBufferEnd()) 428 Position = Buffer->getBufferEnd(); 429 430 CodeCompletionFile = File; 431 CodeCompletionOffset = Position - Buffer->getBufferStart(); 432 433 std::unique_ptr<MemoryBuffer> NewBuffer = 434 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 435 Buffer->getBufferIdentifier()); 436 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 437 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 438 *NewPos = '\0'; 439 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 440 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 441 442 return false; 443 } 444 445 void Preprocessor::CodeCompleteNaturalLanguage() { 446 if (CodeComplete) 447 CodeComplete->CodeCompleteNaturalLanguage(); 448 setCodeCompletionReached(); 449 } 450 451 /// getSpelling - This method is used to get the spelling of a token into a 452 /// SmallVector. Note that the returned StringRef may not point to the 453 /// supplied buffer if a copy can be avoided. 454 StringRef Preprocessor::getSpelling(const Token &Tok, 455 SmallVectorImpl<char> &Buffer, 456 bool *Invalid) const { 457 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 458 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 459 // Try the fast path. 460 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 461 return II->getName(); 462 } 463 464 // Resize the buffer if we need to copy into it. 465 if (Tok.needsCleaning()) 466 Buffer.resize(Tok.getLength()); 467 468 const char *Ptr = Buffer.data(); 469 unsigned Len = getSpelling(Tok, Ptr, Invalid); 470 return StringRef(Ptr, Len); 471 } 472 473 /// CreateString - Plop the specified string into a scratch buffer and return a 474 /// location for it. If specified, the source location provides a source 475 /// location for the token. 476 void Preprocessor::CreateString(StringRef Str, Token &Tok, 477 SourceLocation ExpansionLocStart, 478 SourceLocation ExpansionLocEnd) { 479 Tok.setLength(Str.size()); 480 481 const char *DestPtr; 482 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 483 484 if (ExpansionLocStart.isValid()) 485 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 486 ExpansionLocEnd, Str.size()); 487 Tok.setLocation(Loc); 488 489 // If this is a raw identifier or a literal token, set the pointer data. 490 if (Tok.is(tok::raw_identifier)) 491 Tok.setRawIdentifierData(DestPtr); 492 else if (Tok.isLiteral()) 493 Tok.setLiteralData(DestPtr); 494 } 495 496 Module *Preprocessor::getCurrentModule() { 497 if (!getLangOpts().isCompilingModule()) 498 return nullptr; 499 500 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 501 } 502 503 //===----------------------------------------------------------------------===// 504 // Preprocessor Initialization Methods 505 //===----------------------------------------------------------------------===// 506 507 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 508 /// which implicitly adds the builtin defines etc. 509 void Preprocessor::EnterMainSourceFile() { 510 // We do not allow the preprocessor to reenter the main file. Doing so will 511 // cause FileID's to accumulate information from both runs (e.g. #line 512 // information) and predefined macros aren't guaranteed to be set properly. 513 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 514 FileID MainFileID = SourceMgr.getMainFileID(); 515 516 // If MainFileID is loaded it means we loaded an AST file, no need to enter 517 // a main file. 518 if (!SourceMgr.isLoadedFileID(MainFileID)) { 519 // Enter the main file source buffer. 520 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 521 522 // If we've been asked to skip bytes in the main file (e.g., as part of a 523 // precompiled preamble), do so now. 524 if (SkipMainFilePreamble.first > 0) 525 CurLexer->SetByteOffset(SkipMainFilePreamble.first, 526 SkipMainFilePreamble.second); 527 528 // Tell the header info that the main file was entered. If the file is later 529 // #imported, it won't be re-entered. 530 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 531 HeaderInfo.IncrementIncludeCount(FE); 532 } 533 534 // Preprocess Predefines to populate the initial preprocessor state. 535 std::unique_ptr<llvm::MemoryBuffer> SB = 536 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 537 assert(SB && "Cannot create predefined source buffer"); 538 FileID FID = SourceMgr.createFileID(std::move(SB)); 539 assert(FID.isValid() && "Could not create FileID for predefines?"); 540 setPredefinesFileID(FID); 541 542 // Start parsing the predefines. 543 EnterSourceFile(FID, nullptr, SourceLocation()); 544 } 545 546 void Preprocessor::replayPreambleConditionalStack() { 547 // Restore the conditional stack from the preamble, if there is one. 548 if (PreambleConditionalStack.isReplaying()) { 549 assert(CurPPLexer && 550 "CurPPLexer is null when calling replayPreambleConditionalStack."); 551 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); 552 PreambleConditionalStack.doneReplaying(); 553 if (PreambleConditionalStack.reachedEOFWhileSkipping()) 554 SkipExcludedConditionalBlock( 555 PreambleConditionalStack.SkipInfo->HashTokenLoc, 556 PreambleConditionalStack.SkipInfo->IfTokenLoc, 557 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, 558 PreambleConditionalStack.SkipInfo->FoundElse, 559 PreambleConditionalStack.SkipInfo->ElseLoc); 560 } 561 } 562 563 void Preprocessor::EndSourceFile() { 564 // Notify the client that we reached the end of the source file. 565 if (Callbacks) 566 Callbacks->EndOfMainFile(); 567 } 568 569 //===----------------------------------------------------------------------===// 570 // Lexer Event Handling. 571 //===----------------------------------------------------------------------===// 572 573 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 574 /// identifier information for the token and install it into the token, 575 /// updating the token kind accordingly. 576 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 577 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 578 579 // Look up this token, see if it is a macro, or if it is a language keyword. 580 IdentifierInfo *II; 581 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 582 // No cleaning needed, just use the characters from the lexed buffer. 583 II = getIdentifierInfo(Identifier.getRawIdentifier()); 584 } else { 585 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 586 SmallString<64> IdentifierBuffer; 587 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 588 589 if (Identifier.hasUCN()) { 590 SmallString<64> UCNIdentifierBuffer; 591 expandUCNs(UCNIdentifierBuffer, CleanedStr); 592 II = getIdentifierInfo(UCNIdentifierBuffer); 593 } else { 594 II = getIdentifierInfo(CleanedStr); 595 } 596 } 597 598 // Update the token info (identifier info and appropriate token kind). 599 Identifier.setIdentifierInfo(II); 600 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && 601 getSourceManager().isInSystemHeader(Identifier.getLocation())) 602 Identifier.setKind(clang::tok::identifier); 603 else 604 Identifier.setKind(II->getTokenID()); 605 606 return II; 607 } 608 609 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 610 PoisonReasons[II] = DiagID; 611 } 612 613 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 614 assert(Ident__exception_code && Ident__exception_info); 615 assert(Ident___exception_code && Ident___exception_info); 616 Ident__exception_code->setIsPoisoned(Poison); 617 Ident___exception_code->setIsPoisoned(Poison); 618 Ident_GetExceptionCode->setIsPoisoned(Poison); 619 Ident__exception_info->setIsPoisoned(Poison); 620 Ident___exception_info->setIsPoisoned(Poison); 621 Ident_GetExceptionInfo->setIsPoisoned(Poison); 622 Ident__abnormal_termination->setIsPoisoned(Poison); 623 Ident___abnormal_termination->setIsPoisoned(Poison); 624 Ident_AbnormalTermination->setIsPoisoned(Poison); 625 } 626 627 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 628 assert(Identifier.getIdentifierInfo() && 629 "Can't handle identifiers without identifier info!"); 630 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 631 PoisonReasons.find(Identifier.getIdentifierInfo()); 632 if(it == PoisonReasons.end()) 633 Diag(Identifier, diag::err_pp_used_poisoned_id); 634 else 635 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 636 } 637 638 /// \brief Returns a diagnostic message kind for reporting a future keyword as 639 /// appropriate for the identifier and specified language. 640 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 641 const LangOptions &LangOpts) { 642 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 643 644 if (LangOpts.CPlusPlus) 645 return llvm::StringSwitch<diag::kind>(II.getName()) 646 #define CXX11_KEYWORD(NAME, FLAGS) \ 647 .Case(#NAME, diag::warn_cxx11_keyword) 648 #define CXX2A_KEYWORD(NAME, FLAGS) \ 649 .Case(#NAME, diag::warn_cxx2a_keyword) 650 #include "clang/Basic/TokenKinds.def" 651 ; 652 653 llvm_unreachable( 654 "Keyword not known to come from a newer Standard or proposed Standard"); 655 } 656 657 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 658 assert(II.isOutOfDate() && "not out of date"); 659 getExternalSource()->updateOutOfDateIdentifier(II); 660 } 661 662 /// HandleIdentifier - This callback is invoked when the lexer reads an 663 /// identifier. This callback looks up the identifier in the map and/or 664 /// potentially macro expands it or turns it into a named token (like 'for'). 665 /// 666 /// Note that callers of this method are guarded by checking the 667 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 668 /// IdentifierInfo methods that compute these properties will need to change to 669 /// match. 670 bool Preprocessor::HandleIdentifier(Token &Identifier) { 671 assert(Identifier.getIdentifierInfo() && 672 "Can't handle identifiers without identifier info!"); 673 674 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 675 676 // If the information about this identifier is out of date, update it from 677 // the external source. 678 // We have to treat __VA_ARGS__ in a special way, since it gets 679 // serialized with isPoisoned = true, but our preprocessor may have 680 // unpoisoned it if we're defining a C99 macro. 681 if (II.isOutOfDate()) { 682 bool CurrentIsPoisoned = false; 683 const bool IsSpecialVariadicMacro = 684 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; 685 if (IsSpecialVariadicMacro) 686 CurrentIsPoisoned = II.isPoisoned(); 687 688 updateOutOfDateIdentifier(II); 689 Identifier.setKind(II.getTokenID()); 690 691 if (IsSpecialVariadicMacro) 692 II.setIsPoisoned(CurrentIsPoisoned); 693 } 694 695 // If this identifier was poisoned, and if it was not produced from a macro 696 // expansion, emit an error. 697 if (II.isPoisoned() && CurPPLexer) { 698 HandlePoisonedIdentifier(Identifier); 699 } 700 701 // If this is a macro to be expanded, do it. 702 if (MacroDefinition MD = getMacroDefinition(&II)) { 703 auto *MI = MD.getMacroInfo(); 704 assert(MI && "macro definition with no macro info?"); 705 if (!DisableMacroExpansion) { 706 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 707 // C99 6.10.3p10: If the preprocessing token immediately after the 708 // macro name isn't a '(', this macro should not be expanded. 709 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 710 return HandleMacroExpandedIdentifier(Identifier, MD); 711 } else { 712 // C99 6.10.3.4p2 says that a disabled macro may never again be 713 // expanded, even if it's in a context where it could be expanded in the 714 // future. 715 Identifier.setFlag(Token::DisableExpand); 716 if (MI->isObjectLike() || isNextPPTokenLParen()) 717 Diag(Identifier, diag::pp_disabled_macro_expansion); 718 } 719 } 720 } 721 722 // If this identifier is a keyword in a newer Standard or proposed Standard, 723 // produce a warning. Don't warn if we're not considering macro expansion, 724 // since this identifier might be the name of a macro. 725 // FIXME: This warning is disabled in cases where it shouldn't be, like 726 // "#define constexpr constexpr", "int constexpr;" 727 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 728 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 729 << II.getName(); 730 // Don't diagnose this keyword again in this translation unit. 731 II.setIsFutureCompatKeyword(false); 732 } 733 734 // If this is an extension token, diagnose its use. 735 // We avoid diagnosing tokens that originate from macro definitions. 736 // FIXME: This warning is disabled in cases where it shouldn't be, 737 // like "#define TY typeof", "TY(1) x". 738 if (II.isExtensionToken() && !DisableMacroExpansion) 739 Diag(Identifier, diag::ext_token_used); 740 741 // If this is the 'import' contextual keyword following an '@', note 742 // that the next token indicates a module name. 743 // 744 // Note that we do not treat 'import' as a contextual 745 // keyword when we're in a caching lexer, because caching lexers only get 746 // used in contexts where import declarations are disallowed. 747 // 748 // Likewise if this is the C++ Modules TS import keyword. 749 if (((LastTokenWasAt && II.isModulesImport()) || 750 Identifier.is(tok::kw_import)) && 751 !InMacroArgs && !DisableMacroExpansion && 752 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 753 CurLexerKind != CLK_CachingLexer) { 754 ModuleImportLoc = Identifier.getLocation(); 755 ModuleImportPath.clear(); 756 ModuleImportExpectsIdentifier = true; 757 CurLexerKind = CLK_LexAfterModuleImport; 758 } 759 return true; 760 } 761 762 void Preprocessor::Lex(Token &Result) { 763 // We loop here until a lex function returns a token; this avoids recursion. 764 bool ReturnedToken; 765 do { 766 switch (CurLexerKind) { 767 case CLK_Lexer: 768 ReturnedToken = CurLexer->Lex(Result); 769 break; 770 case CLK_PTHLexer: 771 ReturnedToken = CurPTHLexer->Lex(Result); 772 break; 773 case CLK_TokenLexer: 774 ReturnedToken = CurTokenLexer->Lex(Result); 775 break; 776 case CLK_CachingLexer: 777 CachingLex(Result); 778 ReturnedToken = true; 779 break; 780 case CLK_LexAfterModuleImport: 781 LexAfterModuleImport(Result); 782 ReturnedToken = true; 783 break; 784 } 785 } while (!ReturnedToken); 786 787 if (Result.is(tok::code_completion)) 788 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 789 790 LastTokenWasAt = Result.is(tok::at); 791 } 792 793 /// \brief Lex a token following the 'import' contextual keyword. 794 /// 795 void Preprocessor::LexAfterModuleImport(Token &Result) { 796 // Figure out what kind of lexer we actually have. 797 recomputeCurLexerKind(); 798 799 // Lex the next token. 800 Lex(Result); 801 802 // The token sequence 803 // 804 // import identifier (. identifier)* 805 // 806 // indicates a module import directive. We already saw the 'import' 807 // contextual keyword, so now we're looking for the identifiers. 808 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 809 // We expected to see an identifier here, and we did; continue handling 810 // identifiers. 811 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 812 Result.getLocation())); 813 ModuleImportExpectsIdentifier = false; 814 CurLexerKind = CLK_LexAfterModuleImport; 815 return; 816 } 817 818 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 819 // see the next identifier. (We can also see a '[[' that begins an 820 // attribute-specifier-seq here under the C++ Modules TS.) 821 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 822 ModuleImportExpectsIdentifier = true; 823 CurLexerKind = CLK_LexAfterModuleImport; 824 return; 825 } 826 827 // If we have a non-empty module path, load the named module. 828 if (!ModuleImportPath.empty()) { 829 // Under the Modules TS, the dot is just part of the module name, and not 830 // a real hierarachy separator. Flatten such module names now. 831 // 832 // FIXME: Is this the right level to be performing this transformation? 833 std::string FlatModuleName; 834 if (getLangOpts().ModulesTS) { 835 for (auto &Piece : ModuleImportPath) { 836 if (!FlatModuleName.empty()) 837 FlatModuleName += "."; 838 FlatModuleName += Piece.first->getName(); 839 } 840 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 841 ModuleImportPath.clear(); 842 ModuleImportPath.push_back( 843 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 844 } 845 846 Module *Imported = nullptr; 847 if (getLangOpts().Modules) { 848 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 849 ModuleImportPath, 850 Module::Hidden, 851 /*IsIncludeDirective=*/false); 852 if (Imported) 853 makeModuleVisible(Imported, ModuleImportLoc); 854 } 855 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 856 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 857 } 858 } 859 860 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 861 CurSubmoduleState->VisibleModules.setVisible( 862 M, Loc, [](Module *) {}, 863 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 864 // FIXME: Include the path in the diagnostic. 865 // FIXME: Include the import location for the conflicting module. 866 Diag(ModuleImportLoc, diag::warn_module_conflict) 867 << Path[0]->getFullModuleName() 868 << Conflict->getFullModuleName() 869 << Message; 870 }); 871 872 // Add this module to the imports list of the currently-built submodule. 873 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 874 BuildingSubmoduleStack.back().M->Imports.insert(M); 875 } 876 877 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 878 const char *DiagnosticTag, 879 bool AllowMacroExpansion) { 880 // We need at least one string literal. 881 if (Result.isNot(tok::string_literal)) { 882 Diag(Result, diag::err_expected_string_literal) 883 << /*Source='in...'*/0 << DiagnosticTag; 884 return false; 885 } 886 887 // Lex string literal tokens, optionally with macro expansion. 888 SmallVector<Token, 4> StrToks; 889 do { 890 StrToks.push_back(Result); 891 892 if (Result.hasUDSuffix()) 893 Diag(Result, diag::err_invalid_string_udl); 894 895 if (AllowMacroExpansion) 896 Lex(Result); 897 else 898 LexUnexpandedToken(Result); 899 } while (Result.is(tok::string_literal)); 900 901 // Concatenate and parse the strings. 902 StringLiteralParser Literal(StrToks, *this); 903 assert(Literal.isAscii() && "Didn't allow wide strings in"); 904 905 if (Literal.hadError) 906 return false; 907 908 if (Literal.Pascal) { 909 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 910 << /*Source='in...'*/0 << DiagnosticTag; 911 return false; 912 } 913 914 String = Literal.GetString(); 915 return true; 916 } 917 918 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 919 assert(Tok.is(tok::numeric_constant)); 920 SmallString<8> IntegerBuffer; 921 bool NumberInvalid = false; 922 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 923 if (NumberInvalid) 924 return false; 925 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 926 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 927 return false; 928 llvm::APInt APVal(64, 0); 929 if (Literal.GetIntegerValue(APVal)) 930 return false; 931 Lex(Tok); 932 Value = APVal.getLimitedValue(); 933 return true; 934 } 935 936 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 937 assert(Handler && "NULL comment handler"); 938 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 939 CommentHandlers.end() && "Comment handler already registered"); 940 CommentHandlers.push_back(Handler); 941 } 942 943 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 944 std::vector<CommentHandler *>::iterator Pos 945 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 946 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 947 CommentHandlers.erase(Pos); 948 } 949 950 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 951 bool AnyPendingTokens = false; 952 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 953 HEnd = CommentHandlers.end(); 954 H != HEnd; ++H) { 955 if ((*H)->HandleComment(*this, Comment)) 956 AnyPendingTokens = true; 957 } 958 if (!AnyPendingTokens || getCommentRetentionState()) 959 return false; 960 Lex(result); 961 return true; 962 } 963 964 ModuleLoader::~ModuleLoader() { } 965 966 CommentHandler::~CommentHandler() { } 967 968 CodeCompletionHandler::~CodeCompletionHandler() { } 969 970 void Preprocessor::createPreprocessingRecord() { 971 if (Record) 972 return; 973 974 Record = new PreprocessingRecord(getSourceManager()); 975 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 976 } 977