1 //===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/IdentifierTable.h" 32 #include "clang/Basic/LLVM.h" 33 #include "clang/Basic/LangOptions.h" 34 #include "clang/Basic/Module.h" 35 #include "clang/Basic/SourceLocation.h" 36 #include "clang/Basic/SourceManager.h" 37 #include "clang/Basic/TargetInfo.h" 38 #include "clang/Lex/CodeCompletionHandler.h" 39 #include "clang/Lex/ExternalPreprocessorSource.h" 40 #include "clang/Lex/HeaderSearch.h" 41 #include "clang/Lex/LexDiagnostic.h" 42 #include "clang/Lex/Lexer.h" 43 #include "clang/Lex/LiteralSupport.h" 44 #include "clang/Lex/MacroArgs.h" 45 #include "clang/Lex/MacroInfo.h" 46 #include "clang/Lex/ModuleLoader.h" 47 #include "clang/Lex/PTHLexer.h" 48 #include "clang/Lex/PTHManager.h" 49 #include "clang/Lex/Pragma.h" 50 #include "clang/Lex/PreprocessingRecord.h" 51 #include "clang/Lex/PreprocessorLexer.h" 52 #include "clang/Lex/PreprocessorOptions.h" 53 #include "clang/Lex/ScratchBuffer.h" 54 #include "clang/Lex/Token.h" 55 #include "clang/Lex/TokenLexer.h" 56 #include "llvm/ADT/APInt.h" 57 #include "llvm/ADT/ArrayRef.h" 58 #include "llvm/ADT/DenseMap.h" 59 #include "llvm/ADT/SmallString.h" 60 #include "llvm/ADT/SmallVector.h" 61 #include "llvm/ADT/STLExtras.h" 62 #include "llvm/ADT/StringRef.h" 63 #include "llvm/ADT/StringSwitch.h" 64 #include "llvm/Support/Capacity.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/MemoryBuffer.h" 67 #include "llvm/Support/raw_ostream.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <memory> 71 #include <string> 72 #include <utility> 73 #include <vector> 74 75 using namespace clang; 76 77 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 78 79 ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; 80 81 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 82 DiagnosticsEngine &diags, LangOptions &opts, 83 SourceManager &SM, MemoryBufferCache &PCMCache, 84 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 85 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 86 TranslationUnitKind TUKind) 87 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), 88 FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache), 89 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 90 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 91 // As the language options may have not been loaded yet (when 92 // deserializing an ASTUnit), adding keywords to the identifier table is 93 // deferred to Preprocessor::Initialize(). 94 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), 95 TUKind(TUKind), SkipMainFilePreamble(0, true), 96 CurSubmoduleState(&NullSubmoduleState) { 97 OwnsHeaderSearch = OwnsHeaders; 98 99 // Default to discarding comments. 100 KeepComments = false; 101 KeepMacroComments = false; 102 SuppressIncludeNotFoundError = false; 103 104 // Macro expansion is enabled. 105 DisableMacroExpansion = false; 106 MacroExpansionInDirectivesOverride = false; 107 InMacroArgs = false; 108 InMacroArgPreExpansion = false; 109 NumCachedTokenLexers = 0; 110 PragmasEnabled = true; 111 ParsingIfOrElifDirective = false; 112 PreprocessedOutput = false; 113 114 // We haven't read anything from the external source. 115 ReadMacrosFromExternalSource = false; 116 117 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of 118 // a macro. They get unpoisoned where it is allowed. 119 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 120 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 121 if (getLangOpts().CPlusPlus2a) { 122 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); 123 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); 124 } else { 125 Ident__VA_OPT__ = nullptr; 126 } 127 128 // Initialize the pragma handlers. 129 RegisterBuiltinPragmas(); 130 131 // Initialize builtin macros like __LINE__ and friends. 132 RegisterBuiltinMacros(); 133 134 if(LangOpts.Borland) { 135 Ident__exception_info = getIdentifierInfo("_exception_info"); 136 Ident___exception_info = getIdentifierInfo("__exception_info"); 137 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 138 Ident__exception_code = getIdentifierInfo("_exception_code"); 139 Ident___exception_code = getIdentifierInfo("__exception_code"); 140 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 141 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 142 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 143 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 144 } else { 145 Ident__exception_info = Ident__exception_code = nullptr; 146 Ident__abnormal_termination = Ident___exception_info = nullptr; 147 Ident___exception_code = Ident___abnormal_termination = nullptr; 148 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 149 Ident_AbnormalTermination = nullptr; 150 } 151 152 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. 153 if (usingPCHWithPragmaHdrStop()) 154 SkippingUntilPragmaHdrStop = true; 155 156 // If using a PCH with a through header, start skipping tokens. 157 if (!this->PPOpts->PCHThroughHeader.empty() && 158 !this->PPOpts->ImplicitPCHInclude.empty()) 159 SkippingUntilPCHThroughHeader = true; 160 161 if (this->PPOpts->GeneratePreamble) 162 PreambleConditionalStack.startRecording(); 163 } 164 165 Preprocessor::~Preprocessor() { 166 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 167 168 IncludeMacroStack.clear(); 169 170 // Destroy any macro definitions. 171 while (MacroInfoChain *I = MIChainHead) { 172 MIChainHead = I->Next; 173 I->~MacroInfoChain(); 174 } 175 176 // Free any cached macro expanders. 177 // This populates MacroArgCache, so all TokenLexers need to be destroyed 178 // before the code below that frees up the MacroArgCache list. 179 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 180 CurTokenLexer.reset(); 181 182 // Free any cached MacroArgs. 183 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 184 ArgList = ArgList->deallocate(); 185 186 // Delete the header search info, if we own it. 187 if (OwnsHeaderSearch) 188 delete &HeaderInfo; 189 } 190 191 void Preprocessor::Initialize(const TargetInfo &Target, 192 const TargetInfo *AuxTarget) { 193 assert((!this->Target || this->Target == &Target) && 194 "Invalid override of target information"); 195 this->Target = &Target; 196 197 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 198 "Invalid override of aux target information."); 199 this->AuxTarget = AuxTarget; 200 201 // Initialize information about built-ins. 202 BuiltinInfo.InitializeTarget(Target, AuxTarget); 203 HeaderInfo.setTarget(Target); 204 205 // Populate the identifier table with info about keywords for the current language. 206 Identifiers.AddKeywords(LangOpts); 207 } 208 209 void Preprocessor::InitializeForModelFile() { 210 NumEnteredSourceFiles = 0; 211 212 // Reset pragmas 213 PragmaHandlersBackup = std::move(PragmaHandlers); 214 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 215 RegisterBuiltinPragmas(); 216 217 // Reset PredefinesFileID 218 PredefinesFileID = FileID(); 219 } 220 221 void Preprocessor::FinalizeForModelFile() { 222 NumEnteredSourceFiles = 1; 223 224 PragmaHandlers = std::move(PragmaHandlersBackup); 225 } 226 227 void Preprocessor::setPTHManager(PTHManager* pm) { 228 PTH.reset(pm); 229 FileMgr.addStatCache(PTH->createStatCache()); 230 } 231 232 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 233 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 234 << getSpelling(Tok) << "'"; 235 236 if (!DumpFlags) return; 237 238 llvm::errs() << "\t"; 239 if (Tok.isAtStartOfLine()) 240 llvm::errs() << " [StartOfLine]"; 241 if (Tok.hasLeadingSpace()) 242 llvm::errs() << " [LeadingSpace]"; 243 if (Tok.isExpandDisabled()) 244 llvm::errs() << " [ExpandDisabled]"; 245 if (Tok.needsCleaning()) { 246 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 247 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 248 << "']"; 249 } 250 251 llvm::errs() << "\tLoc=<"; 252 DumpLocation(Tok.getLocation()); 253 llvm::errs() << ">"; 254 } 255 256 void Preprocessor::DumpLocation(SourceLocation Loc) const { 257 Loc.print(llvm::errs(), SourceMgr); 258 } 259 260 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 261 llvm::errs() << "MACRO: "; 262 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 263 DumpToken(MI.getReplacementToken(i)); 264 llvm::errs() << " "; 265 } 266 llvm::errs() << "\n"; 267 } 268 269 void Preprocessor::PrintStats() { 270 llvm::errs() << "\n*** Preprocessor Stats:\n"; 271 llvm::errs() << NumDirectives << " directives found:\n"; 272 llvm::errs() << " " << NumDefined << " #define.\n"; 273 llvm::errs() << " " << NumUndefined << " #undef.\n"; 274 llvm::errs() << " #include/#include_next/#import:\n"; 275 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 276 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 277 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 278 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 279 llvm::errs() << " " << NumEndif << " #endif.\n"; 280 llvm::errs() << " " << NumPragma << " #pragma.\n"; 281 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 282 283 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 284 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 285 << NumFastMacroExpanded << " on the fast path.\n"; 286 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 287 << " token paste (##) operations performed, " 288 << NumFastTokenPaste << " on the fast path.\n"; 289 290 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 291 292 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 293 llvm::errs() << "\n Macro Expanded Tokens: " 294 << llvm::capacity_in_bytes(MacroExpandedTokens); 295 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 296 // FIXME: List information for all submodules. 297 llvm::errs() << "\n Macros: " 298 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 299 llvm::errs() << "\n #pragma push_macro Info: " 300 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 301 llvm::errs() << "\n Poison Reasons: " 302 << llvm::capacity_in_bytes(PoisonReasons); 303 llvm::errs() << "\n Comment Handlers: " 304 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 305 } 306 307 Preprocessor::macro_iterator 308 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 309 if (IncludeExternalMacros && ExternalSource && 310 !ReadMacrosFromExternalSource) { 311 ReadMacrosFromExternalSource = true; 312 ExternalSource->ReadDefinedMacros(); 313 } 314 315 // Make sure we cover all macros in visible modules. 316 for (const ModuleMacro &Macro : ModuleMacros) 317 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 318 319 return CurSubmoduleState->Macros.begin(); 320 } 321 322 size_t Preprocessor::getTotalMemory() const { 323 return BP.getTotalMemory() 324 + llvm::capacity_in_bytes(MacroExpandedTokens) 325 + Predefines.capacity() /* Predefines buffer. */ 326 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 327 // and ModuleMacros. 328 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 329 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 330 + llvm::capacity_in_bytes(PoisonReasons) 331 + llvm::capacity_in_bytes(CommentHandlers); 332 } 333 334 Preprocessor::macro_iterator 335 Preprocessor::macro_end(bool IncludeExternalMacros) const { 336 if (IncludeExternalMacros && ExternalSource && 337 !ReadMacrosFromExternalSource) { 338 ReadMacrosFromExternalSource = true; 339 ExternalSource->ReadDefinedMacros(); 340 } 341 342 return CurSubmoduleState->Macros.end(); 343 } 344 345 /// Compares macro tokens with a specified token value sequence. 346 static bool MacroDefinitionEquals(const MacroInfo *MI, 347 ArrayRef<TokenValue> Tokens) { 348 return Tokens.size() == MI->getNumTokens() && 349 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 350 } 351 352 StringRef Preprocessor::getLastMacroWithSpelling( 353 SourceLocation Loc, 354 ArrayRef<TokenValue> Tokens) const { 355 SourceLocation BestLocation; 356 StringRef BestSpelling; 357 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 358 I != E; ++I) { 359 const MacroDirective::DefInfo 360 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 361 if (!Def || !Def.getMacroInfo()) 362 continue; 363 if (!Def.getMacroInfo()->isObjectLike()) 364 continue; 365 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 366 continue; 367 SourceLocation Location = Def.getLocation(); 368 // Choose the macro defined latest. 369 if (BestLocation.isInvalid() || 370 (Location.isValid() && 371 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 372 BestLocation = Location; 373 BestSpelling = I->first->getName(); 374 } 375 } 376 return BestSpelling; 377 } 378 379 void Preprocessor::recomputeCurLexerKind() { 380 if (CurLexer) 381 CurLexerKind = CLK_Lexer; 382 else if (CurPTHLexer) 383 CurLexerKind = CLK_PTHLexer; 384 else if (CurTokenLexer) 385 CurLexerKind = CLK_TokenLexer; 386 else 387 CurLexerKind = CLK_CachingLexer; 388 } 389 390 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 391 unsigned CompleteLine, 392 unsigned CompleteColumn) { 393 assert(File); 394 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 395 assert(!CodeCompletionFile && "Already set"); 396 397 using llvm::MemoryBuffer; 398 399 // Load the actual file's contents. 400 bool Invalid = false; 401 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 402 if (Invalid) 403 return true; 404 405 // Find the byte position of the truncation point. 406 const char *Position = Buffer->getBufferStart(); 407 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 408 for (; *Position; ++Position) { 409 if (*Position != '\r' && *Position != '\n') 410 continue; 411 412 // Eat \r\n or \n\r as a single line. 413 if ((Position[1] == '\r' || Position[1] == '\n') && 414 Position[0] != Position[1]) 415 ++Position; 416 ++Position; 417 break; 418 } 419 } 420 421 Position += CompleteColumn - 1; 422 423 // If pointing inside the preamble, adjust the position at the beginning of 424 // the file after the preamble. 425 if (SkipMainFilePreamble.first && 426 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 427 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 428 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 429 } 430 431 if (Position > Buffer->getBufferEnd()) 432 Position = Buffer->getBufferEnd(); 433 434 CodeCompletionFile = File; 435 CodeCompletionOffset = Position - Buffer->getBufferStart(); 436 437 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( 438 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); 439 char *NewBuf = NewBuffer->getBufferStart(); 440 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 441 *NewPos = '\0'; 442 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 443 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 444 445 return false; 446 } 447 448 void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir, 449 bool IsAngled) { 450 if (CodeComplete) 451 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled); 452 setCodeCompletionReached(); 453 } 454 455 void Preprocessor::CodeCompleteNaturalLanguage() { 456 if (CodeComplete) 457 CodeComplete->CodeCompleteNaturalLanguage(); 458 setCodeCompletionReached(); 459 } 460 461 /// getSpelling - This method is used to get the spelling of a token into a 462 /// SmallVector. Note that the returned StringRef may not point to the 463 /// supplied buffer if a copy can be avoided. 464 StringRef Preprocessor::getSpelling(const Token &Tok, 465 SmallVectorImpl<char> &Buffer, 466 bool *Invalid) const { 467 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 468 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 469 // Try the fast path. 470 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 471 return II->getName(); 472 } 473 474 // Resize the buffer if we need to copy into it. 475 if (Tok.needsCleaning()) 476 Buffer.resize(Tok.getLength()); 477 478 const char *Ptr = Buffer.data(); 479 unsigned Len = getSpelling(Tok, Ptr, Invalid); 480 return StringRef(Ptr, Len); 481 } 482 483 /// CreateString - Plop the specified string into a scratch buffer and return a 484 /// location for it. If specified, the source location provides a source 485 /// location for the token. 486 void Preprocessor::CreateString(StringRef Str, Token &Tok, 487 SourceLocation ExpansionLocStart, 488 SourceLocation ExpansionLocEnd) { 489 Tok.setLength(Str.size()); 490 491 const char *DestPtr; 492 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 493 494 if (ExpansionLocStart.isValid()) 495 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 496 ExpansionLocEnd, Str.size()); 497 Tok.setLocation(Loc); 498 499 // If this is a raw identifier or a literal token, set the pointer data. 500 if (Tok.is(tok::raw_identifier)) 501 Tok.setRawIdentifierData(DestPtr); 502 else if (Tok.isLiteral()) 503 Tok.setLiteralData(DestPtr); 504 } 505 506 SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { 507 auto &SM = getSourceManager(); 508 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 509 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); 510 bool Invalid = false; 511 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 512 if (Invalid) 513 return SourceLocation(); 514 515 // FIXME: We could consider re-using spelling for tokens we see repeatedly. 516 const char *DestPtr; 517 SourceLocation Spelling = 518 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); 519 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); 520 } 521 522 Module *Preprocessor::getCurrentModule() { 523 if (!getLangOpts().isCompilingModule()) 524 return nullptr; 525 526 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 527 } 528 529 //===----------------------------------------------------------------------===// 530 // Preprocessor Initialization Methods 531 //===----------------------------------------------------------------------===// 532 533 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 534 /// which implicitly adds the builtin defines etc. 535 void Preprocessor::EnterMainSourceFile() { 536 // We do not allow the preprocessor to reenter the main file. Doing so will 537 // cause FileID's to accumulate information from both runs (e.g. #line 538 // information) and predefined macros aren't guaranteed to be set properly. 539 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 540 FileID MainFileID = SourceMgr.getMainFileID(); 541 542 // If MainFileID is loaded it means we loaded an AST file, no need to enter 543 // a main file. 544 if (!SourceMgr.isLoadedFileID(MainFileID)) { 545 // Enter the main file source buffer. 546 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 547 548 // If we've been asked to skip bytes in the main file (e.g., as part of a 549 // precompiled preamble), do so now. 550 if (SkipMainFilePreamble.first > 0) 551 CurLexer->SetByteOffset(SkipMainFilePreamble.first, 552 SkipMainFilePreamble.second); 553 554 // Tell the header info that the main file was entered. If the file is later 555 // #imported, it won't be re-entered. 556 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 557 HeaderInfo.IncrementIncludeCount(FE); 558 } 559 560 // Preprocess Predefines to populate the initial preprocessor state. 561 std::unique_ptr<llvm::MemoryBuffer> SB = 562 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 563 assert(SB && "Cannot create predefined source buffer"); 564 FileID FID = SourceMgr.createFileID(std::move(SB)); 565 assert(FID.isValid() && "Could not create FileID for predefines?"); 566 setPredefinesFileID(FID); 567 568 // Start parsing the predefines. 569 EnterSourceFile(FID, nullptr, SourceLocation()); 570 571 if (!PPOpts->PCHThroughHeader.empty()) { 572 // Lookup and save the FileID for the through header. If it isn't found 573 // in the search path, it's a fatal error. 574 const DirectoryLookup *CurDir; 575 const FileEntry *File = LookupFile( 576 SourceLocation(), PPOpts->PCHThroughHeader, 577 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, 578 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, 579 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr); 580 if (!File) { 581 Diag(SourceLocation(), diag::err_pp_through_header_not_found) 582 << PPOpts->PCHThroughHeader; 583 return; 584 } 585 setPCHThroughHeaderFileID( 586 SourceMgr.createFileID(File, SourceLocation(), SrcMgr::C_User)); 587 } 588 589 // Skip tokens from the Predefines and if needed the main file. 590 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || 591 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) 592 SkipTokensWhileUsingPCH(); 593 } 594 595 void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { 596 assert(PCHThroughHeaderFileID.isInvalid() && 597 "PCHThroughHeaderFileID already set!"); 598 PCHThroughHeaderFileID = FID; 599 } 600 601 bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { 602 assert(PCHThroughHeaderFileID.isValid() && 603 "Invalid PCH through header FileID"); 604 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); 605 } 606 607 bool Preprocessor::creatingPCHWithThroughHeader() { 608 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && 609 PCHThroughHeaderFileID.isValid(); 610 } 611 612 bool Preprocessor::usingPCHWithThroughHeader() { 613 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && 614 PCHThroughHeaderFileID.isValid(); 615 } 616 617 bool Preprocessor::creatingPCHWithPragmaHdrStop() { 618 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; 619 } 620 621 bool Preprocessor::usingPCHWithPragmaHdrStop() { 622 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; 623 } 624 625 /// Skip tokens until after the #include of the through header or 626 /// until after a #pragma hdrstop is seen. Tokens in the predefines file 627 /// and the main file may be skipped. If the end of the predefines file 628 /// is reached, skipping continues into the main file. If the end of the 629 /// main file is reached, it's a fatal error. 630 void Preprocessor::SkipTokensWhileUsingPCH() { 631 bool ReachedMainFileEOF = false; 632 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; 633 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; 634 Token Tok; 635 while (true) { 636 bool InPredefines = (CurLexer->getFileID() == getPredefinesFileID()); 637 CurLexer->Lex(Tok); 638 if (Tok.is(tok::eof) && !InPredefines) { 639 ReachedMainFileEOF = true; 640 break; 641 } 642 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) 643 break; 644 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) 645 break; 646 } 647 if (ReachedMainFileEOF) { 648 if (UsingPCHThroughHeader) 649 Diag(SourceLocation(), diag::err_pp_through_header_not_seen) 650 << PPOpts->PCHThroughHeader << 1; 651 else if (!PPOpts->PCHWithHdrStopCreate) 652 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); 653 } 654 } 655 656 void Preprocessor::replayPreambleConditionalStack() { 657 // Restore the conditional stack from the preamble, if there is one. 658 if (PreambleConditionalStack.isReplaying()) { 659 assert(CurPPLexer && 660 "CurPPLexer is null when calling replayPreambleConditionalStack."); 661 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); 662 PreambleConditionalStack.doneReplaying(); 663 if (PreambleConditionalStack.reachedEOFWhileSkipping()) 664 SkipExcludedConditionalBlock( 665 PreambleConditionalStack.SkipInfo->HashTokenLoc, 666 PreambleConditionalStack.SkipInfo->IfTokenLoc, 667 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, 668 PreambleConditionalStack.SkipInfo->FoundElse, 669 PreambleConditionalStack.SkipInfo->ElseLoc); 670 } 671 } 672 673 void Preprocessor::EndSourceFile() { 674 // Notify the client that we reached the end of the source file. 675 if (Callbacks) 676 Callbacks->EndOfMainFile(); 677 } 678 679 //===----------------------------------------------------------------------===// 680 // Lexer Event Handling. 681 //===----------------------------------------------------------------------===// 682 683 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 684 /// identifier information for the token and install it into the token, 685 /// updating the token kind accordingly. 686 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 687 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 688 689 // Look up this token, see if it is a macro, or if it is a language keyword. 690 IdentifierInfo *II; 691 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 692 // No cleaning needed, just use the characters from the lexed buffer. 693 II = getIdentifierInfo(Identifier.getRawIdentifier()); 694 } else { 695 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 696 SmallString<64> IdentifierBuffer; 697 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 698 699 if (Identifier.hasUCN()) { 700 SmallString<64> UCNIdentifierBuffer; 701 expandUCNs(UCNIdentifierBuffer, CleanedStr); 702 II = getIdentifierInfo(UCNIdentifierBuffer); 703 } else { 704 II = getIdentifierInfo(CleanedStr); 705 } 706 } 707 708 // Update the token info (identifier info and appropriate token kind). 709 Identifier.setIdentifierInfo(II); 710 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && 711 getSourceManager().isInSystemHeader(Identifier.getLocation())) 712 Identifier.setKind(tok::identifier); 713 else 714 Identifier.setKind(II->getTokenID()); 715 716 return II; 717 } 718 719 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 720 PoisonReasons[II] = DiagID; 721 } 722 723 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 724 assert(Ident__exception_code && Ident__exception_info); 725 assert(Ident___exception_code && Ident___exception_info); 726 Ident__exception_code->setIsPoisoned(Poison); 727 Ident___exception_code->setIsPoisoned(Poison); 728 Ident_GetExceptionCode->setIsPoisoned(Poison); 729 Ident__exception_info->setIsPoisoned(Poison); 730 Ident___exception_info->setIsPoisoned(Poison); 731 Ident_GetExceptionInfo->setIsPoisoned(Poison); 732 Ident__abnormal_termination->setIsPoisoned(Poison); 733 Ident___abnormal_termination->setIsPoisoned(Poison); 734 Ident_AbnormalTermination->setIsPoisoned(Poison); 735 } 736 737 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 738 assert(Identifier.getIdentifierInfo() && 739 "Can't handle identifiers without identifier info!"); 740 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 741 PoisonReasons.find(Identifier.getIdentifierInfo()); 742 if(it == PoisonReasons.end()) 743 Diag(Identifier, diag::err_pp_used_poisoned_id); 744 else 745 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 746 } 747 748 /// Returns a diagnostic message kind for reporting a future keyword as 749 /// appropriate for the identifier and specified language. 750 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 751 const LangOptions &LangOpts) { 752 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 753 754 if (LangOpts.CPlusPlus) 755 return llvm::StringSwitch<diag::kind>(II.getName()) 756 #define CXX11_KEYWORD(NAME, FLAGS) \ 757 .Case(#NAME, diag::warn_cxx11_keyword) 758 #define CXX2A_KEYWORD(NAME, FLAGS) \ 759 .Case(#NAME, diag::warn_cxx2a_keyword) 760 #include "clang/Basic/TokenKinds.def" 761 ; 762 763 llvm_unreachable( 764 "Keyword not known to come from a newer Standard or proposed Standard"); 765 } 766 767 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 768 assert(II.isOutOfDate() && "not out of date"); 769 getExternalSource()->updateOutOfDateIdentifier(II); 770 } 771 772 /// HandleIdentifier - This callback is invoked when the lexer reads an 773 /// identifier. This callback looks up the identifier in the map and/or 774 /// potentially macro expands it or turns it into a named token (like 'for'). 775 /// 776 /// Note that callers of this method are guarded by checking the 777 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 778 /// IdentifierInfo methods that compute these properties will need to change to 779 /// match. 780 bool Preprocessor::HandleIdentifier(Token &Identifier) { 781 assert(Identifier.getIdentifierInfo() && 782 "Can't handle identifiers without identifier info!"); 783 784 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 785 786 // If the information about this identifier is out of date, update it from 787 // the external source. 788 // We have to treat __VA_ARGS__ in a special way, since it gets 789 // serialized with isPoisoned = true, but our preprocessor may have 790 // unpoisoned it if we're defining a C99 macro. 791 if (II.isOutOfDate()) { 792 bool CurrentIsPoisoned = false; 793 const bool IsSpecialVariadicMacro = 794 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; 795 if (IsSpecialVariadicMacro) 796 CurrentIsPoisoned = II.isPoisoned(); 797 798 updateOutOfDateIdentifier(II); 799 Identifier.setKind(II.getTokenID()); 800 801 if (IsSpecialVariadicMacro) 802 II.setIsPoisoned(CurrentIsPoisoned); 803 } 804 805 // If this identifier was poisoned, and if it was not produced from a macro 806 // expansion, emit an error. 807 if (II.isPoisoned() && CurPPLexer) { 808 HandlePoisonedIdentifier(Identifier); 809 } 810 811 // If this is a macro to be expanded, do it. 812 if (MacroDefinition MD = getMacroDefinition(&II)) { 813 auto *MI = MD.getMacroInfo(); 814 assert(MI && "macro definition with no macro info?"); 815 if (!DisableMacroExpansion) { 816 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 817 // C99 6.10.3p10: If the preprocessing token immediately after the 818 // macro name isn't a '(', this macro should not be expanded. 819 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 820 return HandleMacroExpandedIdentifier(Identifier, MD); 821 } else { 822 // C99 6.10.3.4p2 says that a disabled macro may never again be 823 // expanded, even if it's in a context where it could be expanded in the 824 // future. 825 Identifier.setFlag(Token::DisableExpand); 826 if (MI->isObjectLike() || isNextPPTokenLParen()) 827 Diag(Identifier, diag::pp_disabled_macro_expansion); 828 } 829 } 830 } 831 832 // If this identifier is a keyword in a newer Standard or proposed Standard, 833 // produce a warning. Don't warn if we're not considering macro expansion, 834 // since this identifier might be the name of a macro. 835 // FIXME: This warning is disabled in cases where it shouldn't be, like 836 // "#define constexpr constexpr", "int constexpr;" 837 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 838 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 839 << II.getName(); 840 // Don't diagnose this keyword again in this translation unit. 841 II.setIsFutureCompatKeyword(false); 842 } 843 844 // If this is an extension token, diagnose its use. 845 // We avoid diagnosing tokens that originate from macro definitions. 846 // FIXME: This warning is disabled in cases where it shouldn't be, 847 // like "#define TY typeof", "TY(1) x". 848 if (II.isExtensionToken() && !DisableMacroExpansion) 849 Diag(Identifier, diag::ext_token_used); 850 851 // If this is the 'import' contextual keyword following an '@', note 852 // that the next token indicates a module name. 853 // 854 // Note that we do not treat 'import' as a contextual 855 // keyword when we're in a caching lexer, because caching lexers only get 856 // used in contexts where import declarations are disallowed. 857 // 858 // Likewise if this is the C++ Modules TS import keyword. 859 if (((LastTokenWasAt && II.isModulesImport()) || 860 Identifier.is(tok::kw_import)) && 861 !InMacroArgs && !DisableMacroExpansion && 862 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 863 CurLexerKind != CLK_CachingLexer) { 864 ModuleImportLoc = Identifier.getLocation(); 865 ModuleImportPath.clear(); 866 ModuleImportExpectsIdentifier = true; 867 CurLexerKind = CLK_LexAfterModuleImport; 868 } 869 return true; 870 } 871 872 void Preprocessor::Lex(Token &Result) { 873 // We loop here until a lex function returns a token; this avoids recursion. 874 bool ReturnedToken; 875 do { 876 switch (CurLexerKind) { 877 case CLK_Lexer: 878 ReturnedToken = CurLexer->Lex(Result); 879 break; 880 case CLK_PTHLexer: 881 ReturnedToken = CurPTHLexer->Lex(Result); 882 break; 883 case CLK_TokenLexer: 884 ReturnedToken = CurTokenLexer->Lex(Result); 885 break; 886 case CLK_CachingLexer: 887 CachingLex(Result); 888 ReturnedToken = true; 889 break; 890 case CLK_LexAfterModuleImport: 891 LexAfterModuleImport(Result); 892 ReturnedToken = true; 893 break; 894 } 895 } while (!ReturnedToken); 896 897 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { 898 // Remember the identifier before code completion token. 899 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 900 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); 901 // Set IdenfitierInfo to null to avoid confusing code that handles both 902 // identifiers and completion tokens. 903 Result.setIdentifierInfo(nullptr); 904 } 905 906 LastTokenWasAt = Result.is(tok::at); 907 } 908 909 /// Lex a token following the 'import' contextual keyword. 910 /// 911 void Preprocessor::LexAfterModuleImport(Token &Result) { 912 // Figure out what kind of lexer we actually have. 913 recomputeCurLexerKind(); 914 915 // Lex the next token. 916 Lex(Result); 917 918 // The token sequence 919 // 920 // import identifier (. identifier)* 921 // 922 // indicates a module import directive. We already saw the 'import' 923 // contextual keyword, so now we're looking for the identifiers. 924 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 925 // We expected to see an identifier here, and we did; continue handling 926 // identifiers. 927 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 928 Result.getLocation())); 929 ModuleImportExpectsIdentifier = false; 930 CurLexerKind = CLK_LexAfterModuleImport; 931 return; 932 } 933 934 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 935 // see the next identifier. (We can also see a '[[' that begins an 936 // attribute-specifier-seq here under the C++ Modules TS.) 937 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 938 ModuleImportExpectsIdentifier = true; 939 CurLexerKind = CLK_LexAfterModuleImport; 940 return; 941 } 942 943 // If we have a non-empty module path, load the named module. 944 if (!ModuleImportPath.empty()) { 945 // Under the Modules TS, the dot is just part of the module name, and not 946 // a real hierarachy separator. Flatten such module names now. 947 // 948 // FIXME: Is this the right level to be performing this transformation? 949 std::string FlatModuleName; 950 if (getLangOpts().ModulesTS) { 951 for (auto &Piece : ModuleImportPath) { 952 if (!FlatModuleName.empty()) 953 FlatModuleName += "."; 954 FlatModuleName += Piece.first->getName(); 955 } 956 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 957 ModuleImportPath.clear(); 958 ModuleImportPath.push_back( 959 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 960 } 961 962 Module *Imported = nullptr; 963 if (getLangOpts().Modules) { 964 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 965 ModuleImportPath, 966 Module::Hidden, 967 /*IsIncludeDirective=*/false); 968 if (Imported) 969 makeModuleVisible(Imported, ModuleImportLoc); 970 } 971 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 972 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 973 } 974 } 975 976 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 977 CurSubmoduleState->VisibleModules.setVisible( 978 M, Loc, [](Module *) {}, 979 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 980 // FIXME: Include the path in the diagnostic. 981 // FIXME: Include the import location for the conflicting module. 982 Diag(ModuleImportLoc, diag::warn_module_conflict) 983 << Path[0]->getFullModuleName() 984 << Conflict->getFullModuleName() 985 << Message; 986 }); 987 988 // Add this module to the imports list of the currently-built submodule. 989 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 990 BuildingSubmoduleStack.back().M->Imports.insert(M); 991 } 992 993 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 994 const char *DiagnosticTag, 995 bool AllowMacroExpansion) { 996 // We need at least one string literal. 997 if (Result.isNot(tok::string_literal)) { 998 Diag(Result, diag::err_expected_string_literal) 999 << /*Source='in...'*/0 << DiagnosticTag; 1000 return false; 1001 } 1002 1003 // Lex string literal tokens, optionally with macro expansion. 1004 SmallVector<Token, 4> StrToks; 1005 do { 1006 StrToks.push_back(Result); 1007 1008 if (Result.hasUDSuffix()) 1009 Diag(Result, diag::err_invalid_string_udl); 1010 1011 if (AllowMacroExpansion) 1012 Lex(Result); 1013 else 1014 LexUnexpandedToken(Result); 1015 } while (Result.is(tok::string_literal)); 1016 1017 // Concatenate and parse the strings. 1018 StringLiteralParser Literal(StrToks, *this); 1019 assert(Literal.isAscii() && "Didn't allow wide strings in"); 1020 1021 if (Literal.hadError) 1022 return false; 1023 1024 if (Literal.Pascal) { 1025 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 1026 << /*Source='in...'*/0 << DiagnosticTag; 1027 return false; 1028 } 1029 1030 String = Literal.GetString(); 1031 return true; 1032 } 1033 1034 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 1035 assert(Tok.is(tok::numeric_constant)); 1036 SmallString<8> IntegerBuffer; 1037 bool NumberInvalid = false; 1038 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 1039 if (NumberInvalid) 1040 return false; 1041 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 1042 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 1043 return false; 1044 llvm::APInt APVal(64, 0); 1045 if (Literal.GetIntegerValue(APVal)) 1046 return false; 1047 Lex(Tok); 1048 Value = APVal.getLimitedValue(); 1049 return true; 1050 } 1051 1052 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 1053 assert(Handler && "NULL comment handler"); 1054 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 1055 CommentHandlers.end() && "Comment handler already registered"); 1056 CommentHandlers.push_back(Handler); 1057 } 1058 1059 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 1060 std::vector<CommentHandler *>::iterator Pos = 1061 std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 1062 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 1063 CommentHandlers.erase(Pos); 1064 } 1065 1066 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 1067 bool AnyPendingTokens = false; 1068 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 1069 HEnd = CommentHandlers.end(); 1070 H != HEnd; ++H) { 1071 if ((*H)->HandleComment(*this, Comment)) 1072 AnyPendingTokens = true; 1073 } 1074 if (!AnyPendingTokens || getCommentRetentionState()) 1075 return false; 1076 Lex(result); 1077 return true; 1078 } 1079 1080 ModuleLoader::~ModuleLoader() = default; 1081 1082 CommentHandler::~CommentHandler() = default; 1083 1084 CodeCompletionHandler::~CodeCompletionHandler() = default; 1085 1086 void Preprocessor::createPreprocessingRecord() { 1087 if (Record) 1088 return; 1089 1090 Record = new PreprocessingRecord(getSourceManager()); 1091 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 1092 } 1093