1 //===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/IdentifierTable.h" 32 #include "clang/Basic/LLVM.h" 33 #include "clang/Basic/LangOptions.h" 34 #include "clang/Basic/Module.h" 35 #include "clang/Basic/SourceLocation.h" 36 #include "clang/Basic/SourceManager.h" 37 #include "clang/Basic/TargetInfo.h" 38 #include "clang/Lex/CodeCompletionHandler.h" 39 #include "clang/Lex/ExternalPreprocessorSource.h" 40 #include "clang/Lex/HeaderSearch.h" 41 #include "clang/Lex/LexDiagnostic.h" 42 #include "clang/Lex/Lexer.h" 43 #include "clang/Lex/LiteralSupport.h" 44 #include "clang/Lex/MacroArgs.h" 45 #include "clang/Lex/MacroInfo.h" 46 #include "clang/Lex/ModuleLoader.h" 47 #include "clang/Lex/PTHLexer.h" 48 #include "clang/Lex/PTHManager.h" 49 #include "clang/Lex/Pragma.h" 50 #include "clang/Lex/PreprocessingRecord.h" 51 #include "clang/Lex/PreprocessorLexer.h" 52 #include "clang/Lex/PreprocessorOptions.h" 53 #include "clang/Lex/ScratchBuffer.h" 54 #include "clang/Lex/Token.h" 55 #include "clang/Lex/TokenLexer.h" 56 #include "llvm/ADT/APInt.h" 57 #include "llvm/ADT/ArrayRef.h" 58 #include "llvm/ADT/DenseMap.h" 59 #include "llvm/ADT/SmallString.h" 60 #include "llvm/ADT/SmallVector.h" 61 #include "llvm/ADT/STLExtras.h" 62 #include "llvm/ADT/StringRef.h" 63 #include "llvm/ADT/StringSwitch.h" 64 #include "llvm/Support/Capacity.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/MemoryBuffer.h" 67 #include "llvm/Support/raw_ostream.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <memory> 71 #include <string> 72 #include <utility> 73 #include <vector> 74 75 using namespace clang; 76 77 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 78 79 ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; 80 81 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 82 DiagnosticsEngine &diags, LangOptions &opts, 83 SourceManager &SM, MemoryBufferCache &PCMCache, 84 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 85 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 86 TranslationUnitKind TUKind) 87 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), 88 FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache), 89 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 90 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 91 // As the language options may have not been loaded yet (when 92 // deserializing an ASTUnit), adding keywords to the identifier table is 93 // deferred to Preprocessor::Initialize(). 94 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), 95 TUKind(TUKind), SkipMainFilePreamble(0, true), 96 CurSubmoduleState(&NullSubmoduleState) { 97 OwnsHeaderSearch = OwnsHeaders; 98 99 // Default to discarding comments. 100 KeepComments = false; 101 KeepMacroComments = false; 102 SuppressIncludeNotFoundError = false; 103 104 // Macro expansion is enabled. 105 DisableMacroExpansion = false; 106 MacroExpansionInDirectivesOverride = false; 107 InMacroArgs = false; 108 InMacroArgPreExpansion = false; 109 NumCachedTokenLexers = 0; 110 PragmasEnabled = true; 111 ParsingIfOrElifDirective = false; 112 PreprocessedOutput = false; 113 114 // We haven't read anything from the external source. 115 ReadMacrosFromExternalSource = false; 116 117 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of 118 // a macro. They get unpoisoned where it is allowed. 119 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 120 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 121 if (getLangOpts().CPlusPlus2a) { 122 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); 123 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); 124 } else { 125 Ident__VA_OPT__ = nullptr; 126 } 127 128 // Initialize the pragma handlers. 129 RegisterBuiltinPragmas(); 130 131 // Initialize builtin macros like __LINE__ and friends. 132 RegisterBuiltinMacros(); 133 134 if(LangOpts.Borland) { 135 Ident__exception_info = getIdentifierInfo("_exception_info"); 136 Ident___exception_info = getIdentifierInfo("__exception_info"); 137 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 138 Ident__exception_code = getIdentifierInfo("_exception_code"); 139 Ident___exception_code = getIdentifierInfo("__exception_code"); 140 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 141 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 142 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 143 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 144 } else { 145 Ident__exception_info = Ident__exception_code = nullptr; 146 Ident__abnormal_termination = Ident___exception_info = nullptr; 147 Ident___exception_code = Ident___abnormal_termination = nullptr; 148 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 149 Ident_AbnormalTermination = nullptr; 150 } 151 152 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. 153 if (usingPCHWithPragmaHdrStop()) 154 SkippingUntilPragmaHdrStop = true; 155 156 // If using a PCH with a through header, start skipping tokens. 157 if (!this->PPOpts->PCHThroughHeader.empty() && 158 !this->PPOpts->ImplicitPCHInclude.empty()) 159 SkippingUntilPCHThroughHeader = true; 160 161 if (this->PPOpts->GeneratePreamble) 162 PreambleConditionalStack.startRecording(); 163 } 164 165 Preprocessor::~Preprocessor() { 166 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 167 168 IncludeMacroStack.clear(); 169 170 // Destroy any macro definitions. 171 while (MacroInfoChain *I = MIChainHead) { 172 MIChainHead = I->Next; 173 I->~MacroInfoChain(); 174 } 175 176 // Free any cached macro expanders. 177 // This populates MacroArgCache, so all TokenLexers need to be destroyed 178 // before the code below that frees up the MacroArgCache list. 179 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 180 CurTokenLexer.reset(); 181 182 // Free any cached MacroArgs. 183 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 184 ArgList = ArgList->deallocate(); 185 186 // Delete the header search info, if we own it. 187 if (OwnsHeaderSearch) 188 delete &HeaderInfo; 189 } 190 191 void Preprocessor::Initialize(const TargetInfo &Target, 192 const TargetInfo *AuxTarget) { 193 assert((!this->Target || this->Target == &Target) && 194 "Invalid override of target information"); 195 this->Target = &Target; 196 197 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 198 "Invalid override of aux target information."); 199 this->AuxTarget = AuxTarget; 200 201 // Initialize information about built-ins. 202 BuiltinInfo.InitializeTarget(Target, AuxTarget); 203 HeaderInfo.setTarget(Target); 204 205 // Populate the identifier table with info about keywords for the current language. 206 Identifiers.AddKeywords(LangOpts); 207 } 208 209 void Preprocessor::InitializeForModelFile() { 210 NumEnteredSourceFiles = 0; 211 212 // Reset pragmas 213 PragmaHandlersBackup = std::move(PragmaHandlers); 214 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 215 RegisterBuiltinPragmas(); 216 217 // Reset PredefinesFileID 218 PredefinesFileID = FileID(); 219 } 220 221 void Preprocessor::FinalizeForModelFile() { 222 NumEnteredSourceFiles = 1; 223 224 PragmaHandlers = std::move(PragmaHandlersBackup); 225 } 226 227 void Preprocessor::setPTHManager(PTHManager* pm) { 228 PTH.reset(pm); 229 FileMgr.addStatCache(PTH->createStatCache()); 230 } 231 232 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 233 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 234 << getSpelling(Tok) << "'"; 235 236 if (!DumpFlags) return; 237 238 llvm::errs() << "\t"; 239 if (Tok.isAtStartOfLine()) 240 llvm::errs() << " [StartOfLine]"; 241 if (Tok.hasLeadingSpace()) 242 llvm::errs() << " [LeadingSpace]"; 243 if (Tok.isExpandDisabled()) 244 llvm::errs() << " [ExpandDisabled]"; 245 if (Tok.needsCleaning()) { 246 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 247 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 248 << "']"; 249 } 250 251 llvm::errs() << "\tLoc=<"; 252 DumpLocation(Tok.getLocation()); 253 llvm::errs() << ">"; 254 } 255 256 void Preprocessor::DumpLocation(SourceLocation Loc) const { 257 Loc.print(llvm::errs(), SourceMgr); 258 } 259 260 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 261 llvm::errs() << "MACRO: "; 262 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 263 DumpToken(MI.getReplacementToken(i)); 264 llvm::errs() << " "; 265 } 266 llvm::errs() << "\n"; 267 } 268 269 void Preprocessor::PrintStats() { 270 llvm::errs() << "\n*** Preprocessor Stats:\n"; 271 llvm::errs() << NumDirectives << " directives found:\n"; 272 llvm::errs() << " " << NumDefined << " #define.\n"; 273 llvm::errs() << " " << NumUndefined << " #undef.\n"; 274 llvm::errs() << " #include/#include_next/#import:\n"; 275 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 276 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 277 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 278 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 279 llvm::errs() << " " << NumEndif << " #endif.\n"; 280 llvm::errs() << " " << NumPragma << " #pragma.\n"; 281 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 282 283 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 284 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 285 << NumFastMacroExpanded << " on the fast path.\n"; 286 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 287 << " token paste (##) operations performed, " 288 << NumFastTokenPaste << " on the fast path.\n"; 289 290 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 291 292 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 293 llvm::errs() << "\n Macro Expanded Tokens: " 294 << llvm::capacity_in_bytes(MacroExpandedTokens); 295 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 296 // FIXME: List information for all submodules. 297 llvm::errs() << "\n Macros: " 298 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 299 llvm::errs() << "\n #pragma push_macro Info: " 300 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 301 llvm::errs() << "\n Poison Reasons: " 302 << llvm::capacity_in_bytes(PoisonReasons); 303 llvm::errs() << "\n Comment Handlers: " 304 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 305 } 306 307 Preprocessor::macro_iterator 308 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 309 if (IncludeExternalMacros && ExternalSource && 310 !ReadMacrosFromExternalSource) { 311 ReadMacrosFromExternalSource = true; 312 ExternalSource->ReadDefinedMacros(); 313 } 314 315 // Make sure we cover all macros in visible modules. 316 for (const ModuleMacro &Macro : ModuleMacros) 317 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 318 319 return CurSubmoduleState->Macros.begin(); 320 } 321 322 size_t Preprocessor::getTotalMemory() const { 323 return BP.getTotalMemory() 324 + llvm::capacity_in_bytes(MacroExpandedTokens) 325 + Predefines.capacity() /* Predefines buffer. */ 326 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 327 // and ModuleMacros. 328 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 329 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 330 + llvm::capacity_in_bytes(PoisonReasons) 331 + llvm::capacity_in_bytes(CommentHandlers); 332 } 333 334 Preprocessor::macro_iterator 335 Preprocessor::macro_end(bool IncludeExternalMacros) const { 336 if (IncludeExternalMacros && ExternalSource && 337 !ReadMacrosFromExternalSource) { 338 ReadMacrosFromExternalSource = true; 339 ExternalSource->ReadDefinedMacros(); 340 } 341 342 return CurSubmoduleState->Macros.end(); 343 } 344 345 /// Compares macro tokens with a specified token value sequence. 346 static bool MacroDefinitionEquals(const MacroInfo *MI, 347 ArrayRef<TokenValue> Tokens) { 348 return Tokens.size() == MI->getNumTokens() && 349 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 350 } 351 352 StringRef Preprocessor::getLastMacroWithSpelling( 353 SourceLocation Loc, 354 ArrayRef<TokenValue> Tokens) const { 355 SourceLocation BestLocation; 356 StringRef BestSpelling; 357 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 358 I != E; ++I) { 359 const MacroDirective::DefInfo 360 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 361 if (!Def || !Def.getMacroInfo()) 362 continue; 363 if (!Def.getMacroInfo()->isObjectLike()) 364 continue; 365 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 366 continue; 367 SourceLocation Location = Def.getLocation(); 368 // Choose the macro defined latest. 369 if (BestLocation.isInvalid() || 370 (Location.isValid() && 371 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 372 BestLocation = Location; 373 BestSpelling = I->first->getName(); 374 } 375 } 376 return BestSpelling; 377 } 378 379 void Preprocessor::recomputeCurLexerKind() { 380 if (CurLexer) 381 CurLexerKind = CLK_Lexer; 382 else if (CurPTHLexer) 383 CurLexerKind = CLK_PTHLexer; 384 else if (CurTokenLexer) 385 CurLexerKind = CLK_TokenLexer; 386 else 387 CurLexerKind = CLK_CachingLexer; 388 } 389 390 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 391 unsigned CompleteLine, 392 unsigned CompleteColumn) { 393 assert(File); 394 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 395 assert(!CodeCompletionFile && "Already set"); 396 397 using llvm::MemoryBuffer; 398 399 // Load the actual file's contents. 400 bool Invalid = false; 401 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 402 if (Invalid) 403 return true; 404 405 // Find the byte position of the truncation point. 406 const char *Position = Buffer->getBufferStart(); 407 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 408 for (; *Position; ++Position) { 409 if (*Position != '\r' && *Position != '\n') 410 continue; 411 412 // Eat \r\n or \n\r as a single line. 413 if ((Position[1] == '\r' || Position[1] == '\n') && 414 Position[0] != Position[1]) 415 ++Position; 416 ++Position; 417 break; 418 } 419 } 420 421 Position += CompleteColumn - 1; 422 423 // If pointing inside the preamble, adjust the position at the beginning of 424 // the file after the preamble. 425 if (SkipMainFilePreamble.first && 426 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 427 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 428 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 429 } 430 431 if (Position > Buffer->getBufferEnd()) 432 Position = Buffer->getBufferEnd(); 433 434 CodeCompletionFile = File; 435 CodeCompletionOffset = Position - Buffer->getBufferStart(); 436 437 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( 438 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); 439 char *NewBuf = NewBuffer->getBufferStart(); 440 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 441 *NewPos = '\0'; 442 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 443 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 444 445 return false; 446 } 447 448 void Preprocessor::CodeCompleteNaturalLanguage() { 449 if (CodeComplete) 450 CodeComplete->CodeCompleteNaturalLanguage(); 451 setCodeCompletionReached(); 452 } 453 454 /// getSpelling - This method is used to get the spelling of a token into a 455 /// SmallVector. Note that the returned StringRef may not point to the 456 /// supplied buffer if a copy can be avoided. 457 StringRef Preprocessor::getSpelling(const Token &Tok, 458 SmallVectorImpl<char> &Buffer, 459 bool *Invalid) const { 460 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 461 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 462 // Try the fast path. 463 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 464 return II->getName(); 465 } 466 467 // Resize the buffer if we need to copy into it. 468 if (Tok.needsCleaning()) 469 Buffer.resize(Tok.getLength()); 470 471 const char *Ptr = Buffer.data(); 472 unsigned Len = getSpelling(Tok, Ptr, Invalid); 473 return StringRef(Ptr, Len); 474 } 475 476 /// CreateString - Plop the specified string into a scratch buffer and return a 477 /// location for it. If specified, the source location provides a source 478 /// location for the token. 479 void Preprocessor::CreateString(StringRef Str, Token &Tok, 480 SourceLocation ExpansionLocStart, 481 SourceLocation ExpansionLocEnd) { 482 Tok.setLength(Str.size()); 483 484 const char *DestPtr; 485 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 486 487 if (ExpansionLocStart.isValid()) 488 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 489 ExpansionLocEnd, Str.size()); 490 Tok.setLocation(Loc); 491 492 // If this is a raw identifier or a literal token, set the pointer data. 493 if (Tok.is(tok::raw_identifier)) 494 Tok.setRawIdentifierData(DestPtr); 495 else if (Tok.isLiteral()) 496 Tok.setLiteralData(DestPtr); 497 } 498 499 SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { 500 auto &SM = getSourceManager(); 501 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 502 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); 503 bool Invalid = false; 504 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); 505 if (Invalid) 506 return SourceLocation(); 507 508 // FIXME: We could consider re-using spelling for tokens we see repeatedly. 509 const char *DestPtr; 510 SourceLocation Spelling = 511 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); 512 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); 513 } 514 515 Module *Preprocessor::getCurrentModule() { 516 if (!getLangOpts().isCompilingModule()) 517 return nullptr; 518 519 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 520 } 521 522 //===----------------------------------------------------------------------===// 523 // Preprocessor Initialization Methods 524 //===----------------------------------------------------------------------===// 525 526 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 527 /// which implicitly adds the builtin defines etc. 528 void Preprocessor::EnterMainSourceFile() { 529 // We do not allow the preprocessor to reenter the main file. Doing so will 530 // cause FileID's to accumulate information from both runs (e.g. #line 531 // information) and predefined macros aren't guaranteed to be set properly. 532 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 533 FileID MainFileID = SourceMgr.getMainFileID(); 534 535 // If MainFileID is loaded it means we loaded an AST file, no need to enter 536 // a main file. 537 if (!SourceMgr.isLoadedFileID(MainFileID)) { 538 // Enter the main file source buffer. 539 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 540 541 // If we've been asked to skip bytes in the main file (e.g., as part of a 542 // precompiled preamble), do so now. 543 if (SkipMainFilePreamble.first > 0) 544 CurLexer->SetByteOffset(SkipMainFilePreamble.first, 545 SkipMainFilePreamble.second); 546 547 // Tell the header info that the main file was entered. If the file is later 548 // #imported, it won't be re-entered. 549 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 550 HeaderInfo.IncrementIncludeCount(FE); 551 } 552 553 // Preprocess Predefines to populate the initial preprocessor state. 554 std::unique_ptr<llvm::MemoryBuffer> SB = 555 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 556 assert(SB && "Cannot create predefined source buffer"); 557 FileID FID = SourceMgr.createFileID(std::move(SB)); 558 assert(FID.isValid() && "Could not create FileID for predefines?"); 559 setPredefinesFileID(FID); 560 561 // Start parsing the predefines. 562 EnterSourceFile(FID, nullptr, SourceLocation()); 563 564 if (!PPOpts->PCHThroughHeader.empty()) { 565 // Lookup and save the FileID for the through header. If it isn't found 566 // in the search path, it's a fatal error. 567 const DirectoryLookup *CurDir; 568 const FileEntry *File = LookupFile( 569 SourceLocation(), PPOpts->PCHThroughHeader, 570 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, 571 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, 572 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr); 573 if (!File) { 574 Diag(SourceLocation(), diag::err_pp_through_header_not_found) 575 << PPOpts->PCHThroughHeader; 576 return; 577 } 578 setPCHThroughHeaderFileID( 579 SourceMgr.createFileID(File, SourceLocation(), SrcMgr::C_User)); 580 } 581 582 // Skip tokens from the Predefines and if needed the main file. 583 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || 584 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) 585 SkipTokensWhileUsingPCH(); 586 } 587 588 void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { 589 assert(PCHThroughHeaderFileID.isInvalid() && 590 "PCHThroughHeaderFileID already set!"); 591 PCHThroughHeaderFileID = FID; 592 } 593 594 bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { 595 assert(PCHThroughHeaderFileID.isValid() && 596 "Invalid PCH through header FileID"); 597 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); 598 } 599 600 bool Preprocessor::creatingPCHWithThroughHeader() { 601 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && 602 PCHThroughHeaderFileID.isValid(); 603 } 604 605 bool Preprocessor::usingPCHWithThroughHeader() { 606 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && 607 PCHThroughHeaderFileID.isValid(); 608 } 609 610 bool Preprocessor::creatingPCHWithPragmaHdrStop() { 611 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; 612 } 613 614 bool Preprocessor::usingPCHWithPragmaHdrStop() { 615 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; 616 } 617 618 /// Skip tokens until after the #include of the through header or 619 /// until after a #pragma hdrstop is seen. Tokens in the predefines file 620 /// and the main file may be skipped. If the end of the predefines file 621 /// is reached, skipping continues into the main file. If the end of the 622 /// main file is reached, it's a fatal error. 623 void Preprocessor::SkipTokensWhileUsingPCH() { 624 bool ReachedMainFileEOF = false; 625 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; 626 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; 627 Token Tok; 628 while (true) { 629 bool InPredefines = (CurLexer->getFileID() == getPredefinesFileID()); 630 CurLexer->Lex(Tok); 631 if (Tok.is(tok::eof) && !InPredefines) { 632 ReachedMainFileEOF = true; 633 break; 634 } 635 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) 636 break; 637 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) 638 break; 639 } 640 if (ReachedMainFileEOF) { 641 if (UsingPCHThroughHeader) 642 Diag(SourceLocation(), diag::err_pp_through_header_not_seen) 643 << PPOpts->PCHThroughHeader << 1; 644 else if (!PPOpts->PCHWithHdrStopCreate) 645 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); 646 } 647 } 648 649 void Preprocessor::replayPreambleConditionalStack() { 650 // Restore the conditional stack from the preamble, if there is one. 651 if (PreambleConditionalStack.isReplaying()) { 652 assert(CurPPLexer && 653 "CurPPLexer is null when calling replayPreambleConditionalStack."); 654 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); 655 PreambleConditionalStack.doneReplaying(); 656 if (PreambleConditionalStack.reachedEOFWhileSkipping()) 657 SkipExcludedConditionalBlock( 658 PreambleConditionalStack.SkipInfo->HashTokenLoc, 659 PreambleConditionalStack.SkipInfo->IfTokenLoc, 660 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, 661 PreambleConditionalStack.SkipInfo->FoundElse, 662 PreambleConditionalStack.SkipInfo->ElseLoc); 663 } 664 } 665 666 void Preprocessor::EndSourceFile() { 667 // Notify the client that we reached the end of the source file. 668 if (Callbacks) 669 Callbacks->EndOfMainFile(); 670 } 671 672 //===----------------------------------------------------------------------===// 673 // Lexer Event Handling. 674 //===----------------------------------------------------------------------===// 675 676 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 677 /// identifier information for the token and install it into the token, 678 /// updating the token kind accordingly. 679 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 680 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 681 682 // Look up this token, see if it is a macro, or if it is a language keyword. 683 IdentifierInfo *II; 684 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 685 // No cleaning needed, just use the characters from the lexed buffer. 686 II = getIdentifierInfo(Identifier.getRawIdentifier()); 687 } else { 688 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 689 SmallString<64> IdentifierBuffer; 690 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 691 692 if (Identifier.hasUCN()) { 693 SmallString<64> UCNIdentifierBuffer; 694 expandUCNs(UCNIdentifierBuffer, CleanedStr); 695 II = getIdentifierInfo(UCNIdentifierBuffer); 696 } else { 697 II = getIdentifierInfo(CleanedStr); 698 } 699 } 700 701 // Update the token info (identifier info and appropriate token kind). 702 Identifier.setIdentifierInfo(II); 703 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && 704 getSourceManager().isInSystemHeader(Identifier.getLocation())) 705 Identifier.setKind(tok::identifier); 706 else 707 Identifier.setKind(II->getTokenID()); 708 709 return II; 710 } 711 712 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 713 PoisonReasons[II] = DiagID; 714 } 715 716 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 717 assert(Ident__exception_code && Ident__exception_info); 718 assert(Ident___exception_code && Ident___exception_info); 719 Ident__exception_code->setIsPoisoned(Poison); 720 Ident___exception_code->setIsPoisoned(Poison); 721 Ident_GetExceptionCode->setIsPoisoned(Poison); 722 Ident__exception_info->setIsPoisoned(Poison); 723 Ident___exception_info->setIsPoisoned(Poison); 724 Ident_GetExceptionInfo->setIsPoisoned(Poison); 725 Ident__abnormal_termination->setIsPoisoned(Poison); 726 Ident___abnormal_termination->setIsPoisoned(Poison); 727 Ident_AbnormalTermination->setIsPoisoned(Poison); 728 } 729 730 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 731 assert(Identifier.getIdentifierInfo() && 732 "Can't handle identifiers without identifier info!"); 733 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 734 PoisonReasons.find(Identifier.getIdentifierInfo()); 735 if(it == PoisonReasons.end()) 736 Diag(Identifier, diag::err_pp_used_poisoned_id); 737 else 738 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 739 } 740 741 /// Returns a diagnostic message kind for reporting a future keyword as 742 /// appropriate for the identifier and specified language. 743 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 744 const LangOptions &LangOpts) { 745 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 746 747 if (LangOpts.CPlusPlus) 748 return llvm::StringSwitch<diag::kind>(II.getName()) 749 #define CXX11_KEYWORD(NAME, FLAGS) \ 750 .Case(#NAME, diag::warn_cxx11_keyword) 751 #define CXX2A_KEYWORD(NAME, FLAGS) \ 752 .Case(#NAME, diag::warn_cxx2a_keyword) 753 #include "clang/Basic/TokenKinds.def" 754 ; 755 756 llvm_unreachable( 757 "Keyword not known to come from a newer Standard or proposed Standard"); 758 } 759 760 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 761 assert(II.isOutOfDate() && "not out of date"); 762 getExternalSource()->updateOutOfDateIdentifier(II); 763 } 764 765 /// HandleIdentifier - This callback is invoked when the lexer reads an 766 /// identifier. This callback looks up the identifier in the map and/or 767 /// potentially macro expands it or turns it into a named token (like 'for'). 768 /// 769 /// Note that callers of this method are guarded by checking the 770 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 771 /// IdentifierInfo methods that compute these properties will need to change to 772 /// match. 773 bool Preprocessor::HandleIdentifier(Token &Identifier) { 774 assert(Identifier.getIdentifierInfo() && 775 "Can't handle identifiers without identifier info!"); 776 777 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 778 779 // If the information about this identifier is out of date, update it from 780 // the external source. 781 // We have to treat __VA_ARGS__ in a special way, since it gets 782 // serialized with isPoisoned = true, but our preprocessor may have 783 // unpoisoned it if we're defining a C99 macro. 784 if (II.isOutOfDate()) { 785 bool CurrentIsPoisoned = false; 786 const bool IsSpecialVariadicMacro = 787 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; 788 if (IsSpecialVariadicMacro) 789 CurrentIsPoisoned = II.isPoisoned(); 790 791 updateOutOfDateIdentifier(II); 792 Identifier.setKind(II.getTokenID()); 793 794 if (IsSpecialVariadicMacro) 795 II.setIsPoisoned(CurrentIsPoisoned); 796 } 797 798 // If this identifier was poisoned, and if it was not produced from a macro 799 // expansion, emit an error. 800 if (II.isPoisoned() && CurPPLexer) { 801 HandlePoisonedIdentifier(Identifier); 802 } 803 804 // If this is a macro to be expanded, do it. 805 if (MacroDefinition MD = getMacroDefinition(&II)) { 806 auto *MI = MD.getMacroInfo(); 807 assert(MI && "macro definition with no macro info?"); 808 if (!DisableMacroExpansion) { 809 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 810 // C99 6.10.3p10: If the preprocessing token immediately after the 811 // macro name isn't a '(', this macro should not be expanded. 812 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 813 return HandleMacroExpandedIdentifier(Identifier, MD); 814 } else { 815 // C99 6.10.3.4p2 says that a disabled macro may never again be 816 // expanded, even if it's in a context where it could be expanded in the 817 // future. 818 Identifier.setFlag(Token::DisableExpand); 819 if (MI->isObjectLike() || isNextPPTokenLParen()) 820 Diag(Identifier, diag::pp_disabled_macro_expansion); 821 } 822 } 823 } 824 825 // If this identifier is a keyword in a newer Standard or proposed Standard, 826 // produce a warning. Don't warn if we're not considering macro expansion, 827 // since this identifier might be the name of a macro. 828 // FIXME: This warning is disabled in cases where it shouldn't be, like 829 // "#define constexpr constexpr", "int constexpr;" 830 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 831 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 832 << II.getName(); 833 // Don't diagnose this keyword again in this translation unit. 834 II.setIsFutureCompatKeyword(false); 835 } 836 837 // If this is an extension token, diagnose its use. 838 // We avoid diagnosing tokens that originate from macro definitions. 839 // FIXME: This warning is disabled in cases where it shouldn't be, 840 // like "#define TY typeof", "TY(1) x". 841 if (II.isExtensionToken() && !DisableMacroExpansion) 842 Diag(Identifier, diag::ext_token_used); 843 844 // If this is the 'import' contextual keyword following an '@', note 845 // that the next token indicates a module name. 846 // 847 // Note that we do not treat 'import' as a contextual 848 // keyword when we're in a caching lexer, because caching lexers only get 849 // used in contexts where import declarations are disallowed. 850 // 851 // Likewise if this is the C++ Modules TS import keyword. 852 if (((LastTokenWasAt && II.isModulesImport()) || 853 Identifier.is(tok::kw_import)) && 854 !InMacroArgs && !DisableMacroExpansion && 855 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 856 CurLexerKind != CLK_CachingLexer) { 857 ModuleImportLoc = Identifier.getLocation(); 858 ModuleImportPath.clear(); 859 ModuleImportExpectsIdentifier = true; 860 CurLexerKind = CLK_LexAfterModuleImport; 861 } 862 return true; 863 } 864 865 void Preprocessor::Lex(Token &Result) { 866 // We loop here until a lex function returns a token; this avoids recursion. 867 bool ReturnedToken; 868 do { 869 switch (CurLexerKind) { 870 case CLK_Lexer: 871 ReturnedToken = CurLexer->Lex(Result); 872 break; 873 case CLK_PTHLexer: 874 ReturnedToken = CurPTHLexer->Lex(Result); 875 break; 876 case CLK_TokenLexer: 877 ReturnedToken = CurTokenLexer->Lex(Result); 878 break; 879 case CLK_CachingLexer: 880 CachingLex(Result); 881 ReturnedToken = true; 882 break; 883 case CLK_LexAfterModuleImport: 884 LexAfterModuleImport(Result); 885 ReturnedToken = true; 886 break; 887 } 888 } while (!ReturnedToken); 889 890 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { 891 // Remember the identifier before code completion token. 892 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 893 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); 894 // Set IdenfitierInfo to null to avoid confusing code that handles both 895 // identifiers and completion tokens. 896 Result.setIdentifierInfo(nullptr); 897 } 898 899 LastTokenWasAt = Result.is(tok::at); 900 } 901 902 /// Lex a token following the 'import' contextual keyword. 903 /// 904 void Preprocessor::LexAfterModuleImport(Token &Result) { 905 // Figure out what kind of lexer we actually have. 906 recomputeCurLexerKind(); 907 908 // Lex the next token. 909 Lex(Result); 910 911 // The token sequence 912 // 913 // import identifier (. identifier)* 914 // 915 // indicates a module import directive. We already saw the 'import' 916 // contextual keyword, so now we're looking for the identifiers. 917 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 918 // We expected to see an identifier here, and we did; continue handling 919 // identifiers. 920 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 921 Result.getLocation())); 922 ModuleImportExpectsIdentifier = false; 923 CurLexerKind = CLK_LexAfterModuleImport; 924 return; 925 } 926 927 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 928 // see the next identifier. (We can also see a '[[' that begins an 929 // attribute-specifier-seq here under the C++ Modules TS.) 930 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 931 ModuleImportExpectsIdentifier = true; 932 CurLexerKind = CLK_LexAfterModuleImport; 933 return; 934 } 935 936 // If we have a non-empty module path, load the named module. 937 if (!ModuleImportPath.empty()) { 938 // Under the Modules TS, the dot is just part of the module name, and not 939 // a real hierarachy separator. Flatten such module names now. 940 // 941 // FIXME: Is this the right level to be performing this transformation? 942 std::string FlatModuleName; 943 if (getLangOpts().ModulesTS) { 944 for (auto &Piece : ModuleImportPath) { 945 if (!FlatModuleName.empty()) 946 FlatModuleName += "."; 947 FlatModuleName += Piece.first->getName(); 948 } 949 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 950 ModuleImportPath.clear(); 951 ModuleImportPath.push_back( 952 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 953 } 954 955 Module *Imported = nullptr; 956 if (getLangOpts().Modules) { 957 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 958 ModuleImportPath, 959 Module::Hidden, 960 /*IsIncludeDirective=*/false); 961 if (Imported) 962 makeModuleVisible(Imported, ModuleImportLoc); 963 } 964 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 965 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 966 } 967 } 968 969 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 970 CurSubmoduleState->VisibleModules.setVisible( 971 M, Loc, [](Module *) {}, 972 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 973 // FIXME: Include the path in the diagnostic. 974 // FIXME: Include the import location for the conflicting module. 975 Diag(ModuleImportLoc, diag::warn_module_conflict) 976 << Path[0]->getFullModuleName() 977 << Conflict->getFullModuleName() 978 << Message; 979 }); 980 981 // Add this module to the imports list of the currently-built submodule. 982 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 983 BuildingSubmoduleStack.back().M->Imports.insert(M); 984 } 985 986 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 987 const char *DiagnosticTag, 988 bool AllowMacroExpansion) { 989 // We need at least one string literal. 990 if (Result.isNot(tok::string_literal)) { 991 Diag(Result, diag::err_expected_string_literal) 992 << /*Source='in...'*/0 << DiagnosticTag; 993 return false; 994 } 995 996 // Lex string literal tokens, optionally with macro expansion. 997 SmallVector<Token, 4> StrToks; 998 do { 999 StrToks.push_back(Result); 1000 1001 if (Result.hasUDSuffix()) 1002 Diag(Result, diag::err_invalid_string_udl); 1003 1004 if (AllowMacroExpansion) 1005 Lex(Result); 1006 else 1007 LexUnexpandedToken(Result); 1008 } while (Result.is(tok::string_literal)); 1009 1010 // Concatenate and parse the strings. 1011 StringLiteralParser Literal(StrToks, *this); 1012 assert(Literal.isAscii() && "Didn't allow wide strings in"); 1013 1014 if (Literal.hadError) 1015 return false; 1016 1017 if (Literal.Pascal) { 1018 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 1019 << /*Source='in...'*/0 << DiagnosticTag; 1020 return false; 1021 } 1022 1023 String = Literal.GetString(); 1024 return true; 1025 } 1026 1027 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 1028 assert(Tok.is(tok::numeric_constant)); 1029 SmallString<8> IntegerBuffer; 1030 bool NumberInvalid = false; 1031 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 1032 if (NumberInvalid) 1033 return false; 1034 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 1035 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 1036 return false; 1037 llvm::APInt APVal(64, 0); 1038 if (Literal.GetIntegerValue(APVal)) 1039 return false; 1040 Lex(Tok); 1041 Value = APVal.getLimitedValue(); 1042 return true; 1043 } 1044 1045 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 1046 assert(Handler && "NULL comment handler"); 1047 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 1048 CommentHandlers.end() && "Comment handler already registered"); 1049 CommentHandlers.push_back(Handler); 1050 } 1051 1052 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 1053 std::vector<CommentHandler *>::iterator Pos = 1054 std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 1055 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 1056 CommentHandlers.erase(Pos); 1057 } 1058 1059 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 1060 bool AnyPendingTokens = false; 1061 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 1062 HEnd = CommentHandlers.end(); 1063 H != HEnd; ++H) { 1064 if ((*H)->HandleComment(*this, Comment)) 1065 AnyPendingTokens = true; 1066 } 1067 if (!AnyPendingTokens || getCommentRetentionState()) 1068 return false; 1069 Lex(result); 1070 return true; 1071 } 1072 1073 ModuleLoader::~ModuleLoader() = default; 1074 1075 CommentHandler::~CommentHandler() = default; 1076 1077 CodeCompletionHandler::~CodeCompletionHandler() = default; 1078 1079 void Preprocessor::createPreprocessingRecord() { 1080 if (Record) 1081 return; 1082 1083 Record = new PreprocessingRecord(getSourceManager()); 1084 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 1085 } 1086