1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APInt.h" 47 #include "llvm/ADT/DenseMap.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/SmallVector.h" 50 #include "llvm/ADT/STLExtras.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/StringSwitch.h" 53 #include "llvm/Support/Capacity.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <memory> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 using namespace clang; 65 66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 67 68 //===----------------------------------------------------------------------===// 69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 70 71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 72 DiagnosticsEngine &diags, LangOptions &opts, 73 SourceManager &SM, MemoryBufferCache &PCMCache, 74 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 75 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 76 TranslationUnitKind TUKind) 77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 79 PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)), 80 HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), 81 ExternalSource(nullptr), Identifiers(opts, IILookup), 82 PragmaHandlers(new PragmaNamespace(StringRef())), 83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 84 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 86 CodeCompletionReached(false), CodeCompletionII(nullptr), 87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), 89 CurLexerSubmodule(nullptr), Callbacks(nullptr), 90 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 91 Record(nullptr), MIChainHead(nullptr) { 92 OwnsHeaderSearch = OwnsHeaders; 93 94 CounterValue = 0; // __COUNTER__ starts at 0. 95 96 // Clear stats. 97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 98 NumIf = NumElse = NumEndif = 0; 99 NumEnteredSourceFiles = 0; 100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 102 MaxIncludeStackDepth = 0; 103 NumSkipped = 0; 104 105 // Default to discarding comments. 106 KeepComments = false; 107 KeepMacroComments = false; 108 SuppressIncludeNotFoundError = false; 109 110 // Macro expansion is enabled. 111 DisableMacroExpansion = false; 112 MacroExpansionInDirectivesOverride = false; 113 InMacroArgs = false; 114 InMacroArgPreExpansion = false; 115 NumCachedTokenLexers = 0; 116 PragmasEnabled = true; 117 ParsingIfOrElifDirective = false; 118 PreprocessedOutput = false; 119 120 CachedLexPos = 0; 121 122 // We haven't read anything from the external source. 123 ReadMacrosFromExternalSource = false; 124 125 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of 126 // a macro. They get unpoisoned where it is allowed. 127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 129 if (getLangOpts().CPlusPlus2a) { 130 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); 131 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); 132 } else { 133 Ident__VA_OPT__ = nullptr; 134 } 135 136 // Initialize the pragma handlers. 137 RegisterBuiltinPragmas(); 138 139 // Initialize builtin macros like __LINE__ and friends. 140 RegisterBuiltinMacros(); 141 142 if(LangOpts.Borland) { 143 Ident__exception_info = getIdentifierInfo("_exception_info"); 144 Ident___exception_info = getIdentifierInfo("__exception_info"); 145 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 146 Ident__exception_code = getIdentifierInfo("_exception_code"); 147 Ident___exception_code = getIdentifierInfo("__exception_code"); 148 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 149 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 150 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 151 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 152 } else { 153 Ident__exception_info = Ident__exception_code = nullptr; 154 Ident__abnormal_termination = Ident___exception_info = nullptr; 155 Ident___exception_code = Ident___abnormal_termination = nullptr; 156 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 157 Ident_AbnormalTermination = nullptr; 158 } 159 160 if (this->PPOpts->GeneratePreamble) 161 PreambleConditionalStack.startRecording(); 162 } 163 164 Preprocessor::~Preprocessor() { 165 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 166 167 IncludeMacroStack.clear(); 168 169 // Destroy any macro definitions. 170 while (MacroInfoChain *I = MIChainHead) { 171 MIChainHead = I->Next; 172 I->~MacroInfoChain(); 173 } 174 175 // Free any cached macro expanders. 176 // This populates MacroArgCache, so all TokenLexers need to be destroyed 177 // before the code below that frees up the MacroArgCache list. 178 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 179 CurTokenLexer.reset(); 180 181 // Free any cached MacroArgs. 182 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 183 ArgList = ArgList->deallocate(); 184 185 // Delete the header search info, if we own it. 186 if (OwnsHeaderSearch) 187 delete &HeaderInfo; 188 } 189 190 void Preprocessor::Initialize(const TargetInfo &Target, 191 const TargetInfo *AuxTarget) { 192 assert((!this->Target || this->Target == &Target) && 193 "Invalid override of target information"); 194 this->Target = &Target; 195 196 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 197 "Invalid override of aux target information."); 198 this->AuxTarget = AuxTarget; 199 200 // Initialize information about built-ins. 201 BuiltinInfo.InitializeTarget(Target, AuxTarget); 202 HeaderInfo.setTarget(Target); 203 } 204 205 void Preprocessor::InitializeForModelFile() { 206 NumEnteredSourceFiles = 0; 207 208 // Reset pragmas 209 PragmaHandlersBackup = std::move(PragmaHandlers); 210 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 211 RegisterBuiltinPragmas(); 212 213 // Reset PredefinesFileID 214 PredefinesFileID = FileID(); 215 } 216 217 void Preprocessor::FinalizeForModelFile() { 218 NumEnteredSourceFiles = 1; 219 220 PragmaHandlers = std::move(PragmaHandlersBackup); 221 } 222 223 void Preprocessor::setPTHManager(PTHManager* pm) { 224 PTH.reset(pm); 225 FileMgr.addStatCache(PTH->createStatCache()); 226 } 227 228 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 229 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 230 << getSpelling(Tok) << "'"; 231 232 if (!DumpFlags) return; 233 234 llvm::errs() << "\t"; 235 if (Tok.isAtStartOfLine()) 236 llvm::errs() << " [StartOfLine]"; 237 if (Tok.hasLeadingSpace()) 238 llvm::errs() << " [LeadingSpace]"; 239 if (Tok.isExpandDisabled()) 240 llvm::errs() << " [ExpandDisabled]"; 241 if (Tok.needsCleaning()) { 242 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 243 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 244 << "']"; 245 } 246 247 llvm::errs() << "\tLoc=<"; 248 DumpLocation(Tok.getLocation()); 249 llvm::errs() << ">"; 250 } 251 252 void Preprocessor::DumpLocation(SourceLocation Loc) const { 253 Loc.dump(SourceMgr); 254 } 255 256 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 257 llvm::errs() << "MACRO: "; 258 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 259 DumpToken(MI.getReplacementToken(i)); 260 llvm::errs() << " "; 261 } 262 llvm::errs() << "\n"; 263 } 264 265 void Preprocessor::PrintStats() { 266 llvm::errs() << "\n*** Preprocessor Stats:\n"; 267 llvm::errs() << NumDirectives << " directives found:\n"; 268 llvm::errs() << " " << NumDefined << " #define.\n"; 269 llvm::errs() << " " << NumUndefined << " #undef.\n"; 270 llvm::errs() << " #include/#include_next/#import:\n"; 271 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 272 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 273 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 274 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 275 llvm::errs() << " " << NumEndif << " #endif.\n"; 276 llvm::errs() << " " << NumPragma << " #pragma.\n"; 277 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 278 279 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 280 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 281 << NumFastMacroExpanded << " on the fast path.\n"; 282 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 283 << " token paste (##) operations performed, " 284 << NumFastTokenPaste << " on the fast path.\n"; 285 286 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 287 288 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 289 llvm::errs() << "\n Macro Expanded Tokens: " 290 << llvm::capacity_in_bytes(MacroExpandedTokens); 291 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 292 // FIXME: List information for all submodules. 293 llvm::errs() << "\n Macros: " 294 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 295 llvm::errs() << "\n #pragma push_macro Info: " 296 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 297 llvm::errs() << "\n Poison Reasons: " 298 << llvm::capacity_in_bytes(PoisonReasons); 299 llvm::errs() << "\n Comment Handlers: " 300 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 301 } 302 303 Preprocessor::macro_iterator 304 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 305 if (IncludeExternalMacros && ExternalSource && 306 !ReadMacrosFromExternalSource) { 307 ReadMacrosFromExternalSource = true; 308 ExternalSource->ReadDefinedMacros(); 309 } 310 311 // Make sure we cover all macros in visible modules. 312 for (const ModuleMacro &Macro : ModuleMacros) 313 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 314 315 return CurSubmoduleState->Macros.begin(); 316 } 317 318 size_t Preprocessor::getTotalMemory() const { 319 return BP.getTotalMemory() 320 + llvm::capacity_in_bytes(MacroExpandedTokens) 321 + Predefines.capacity() /* Predefines buffer. */ 322 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 323 // and ModuleMacros. 324 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 325 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 326 + llvm::capacity_in_bytes(PoisonReasons) 327 + llvm::capacity_in_bytes(CommentHandlers); 328 } 329 330 Preprocessor::macro_iterator 331 Preprocessor::macro_end(bool IncludeExternalMacros) const { 332 if (IncludeExternalMacros && ExternalSource && 333 !ReadMacrosFromExternalSource) { 334 ReadMacrosFromExternalSource = true; 335 ExternalSource->ReadDefinedMacros(); 336 } 337 338 return CurSubmoduleState->Macros.end(); 339 } 340 341 /// \brief Compares macro tokens with a specified token value sequence. 342 static bool MacroDefinitionEquals(const MacroInfo *MI, 343 ArrayRef<TokenValue> Tokens) { 344 return Tokens.size() == MI->getNumTokens() && 345 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 346 } 347 348 StringRef Preprocessor::getLastMacroWithSpelling( 349 SourceLocation Loc, 350 ArrayRef<TokenValue> Tokens) const { 351 SourceLocation BestLocation; 352 StringRef BestSpelling; 353 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 354 I != E; ++I) { 355 const MacroDirective::DefInfo 356 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 357 if (!Def || !Def.getMacroInfo()) 358 continue; 359 if (!Def.getMacroInfo()->isObjectLike()) 360 continue; 361 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 362 continue; 363 SourceLocation Location = Def.getLocation(); 364 // Choose the macro defined latest. 365 if (BestLocation.isInvalid() || 366 (Location.isValid() && 367 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 368 BestLocation = Location; 369 BestSpelling = I->first->getName(); 370 } 371 } 372 return BestSpelling; 373 } 374 375 void Preprocessor::recomputeCurLexerKind() { 376 if (CurLexer) 377 CurLexerKind = CLK_Lexer; 378 else if (CurPTHLexer) 379 CurLexerKind = CLK_PTHLexer; 380 else if (CurTokenLexer) 381 CurLexerKind = CLK_TokenLexer; 382 else 383 CurLexerKind = CLK_CachingLexer; 384 } 385 386 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 387 unsigned CompleteLine, 388 unsigned CompleteColumn) { 389 assert(File); 390 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 391 assert(!CodeCompletionFile && "Already set"); 392 393 using llvm::MemoryBuffer; 394 395 // Load the actual file's contents. 396 bool Invalid = false; 397 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 398 if (Invalid) 399 return true; 400 401 // Find the byte position of the truncation point. 402 const char *Position = Buffer->getBufferStart(); 403 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 404 for (; *Position; ++Position) { 405 if (*Position != '\r' && *Position != '\n') 406 continue; 407 408 // Eat \r\n or \n\r as a single line. 409 if ((Position[1] == '\r' || Position[1] == '\n') && 410 Position[0] != Position[1]) 411 ++Position; 412 ++Position; 413 break; 414 } 415 } 416 417 Position += CompleteColumn - 1; 418 419 // If pointing inside the preamble, adjust the position at the beginning of 420 // the file after the preamble. 421 if (SkipMainFilePreamble.first && 422 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 423 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 424 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 425 } 426 427 if (Position > Buffer->getBufferEnd()) 428 Position = Buffer->getBufferEnd(); 429 430 CodeCompletionFile = File; 431 CodeCompletionOffset = Position - Buffer->getBufferStart(); 432 433 std::unique_ptr<MemoryBuffer> NewBuffer = 434 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 435 Buffer->getBufferIdentifier()); 436 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 437 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 438 *NewPos = '\0'; 439 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 440 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 441 442 return false; 443 } 444 445 void Preprocessor::CodeCompleteNaturalLanguage() { 446 if (CodeComplete) 447 CodeComplete->CodeCompleteNaturalLanguage(); 448 setCodeCompletionReached(); 449 } 450 451 /// getSpelling - This method is used to get the spelling of a token into a 452 /// SmallVector. Note that the returned StringRef may not point to the 453 /// supplied buffer if a copy can be avoided. 454 StringRef Preprocessor::getSpelling(const Token &Tok, 455 SmallVectorImpl<char> &Buffer, 456 bool *Invalid) const { 457 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 458 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 459 // Try the fast path. 460 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 461 return II->getName(); 462 } 463 464 // Resize the buffer if we need to copy into it. 465 if (Tok.needsCleaning()) 466 Buffer.resize(Tok.getLength()); 467 468 const char *Ptr = Buffer.data(); 469 unsigned Len = getSpelling(Tok, Ptr, Invalid); 470 return StringRef(Ptr, Len); 471 } 472 473 /// CreateString - Plop the specified string into a scratch buffer and return a 474 /// location for it. If specified, the source location provides a source 475 /// location for the token. 476 void Preprocessor::CreateString(StringRef Str, Token &Tok, 477 SourceLocation ExpansionLocStart, 478 SourceLocation ExpansionLocEnd) { 479 Tok.setLength(Str.size()); 480 481 const char *DestPtr; 482 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 483 484 if (ExpansionLocStart.isValid()) 485 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 486 ExpansionLocEnd, Str.size()); 487 Tok.setLocation(Loc); 488 489 // If this is a raw identifier or a literal token, set the pointer data. 490 if (Tok.is(tok::raw_identifier)) 491 Tok.setRawIdentifierData(DestPtr); 492 else if (Tok.isLiteral()) 493 Tok.setLiteralData(DestPtr); 494 } 495 496 Module *Preprocessor::getCurrentModule() { 497 if (!getLangOpts().isCompilingModule()) 498 return nullptr; 499 500 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 501 } 502 503 //===----------------------------------------------------------------------===// 504 // Preprocessor Initialization Methods 505 //===----------------------------------------------------------------------===// 506 507 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 508 /// which implicitly adds the builtin defines etc. 509 void Preprocessor::EnterMainSourceFile() { 510 // We do not allow the preprocessor to reenter the main file. Doing so will 511 // cause FileID's to accumulate information from both runs (e.g. #line 512 // information) and predefined macros aren't guaranteed to be set properly. 513 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 514 FileID MainFileID = SourceMgr.getMainFileID(); 515 516 // If MainFileID is loaded it means we loaded an AST file, no need to enter 517 // a main file. 518 if (!SourceMgr.isLoadedFileID(MainFileID)) { 519 // Enter the main file source buffer. 520 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 521 522 // If we've been asked to skip bytes in the main file (e.g., as part of a 523 // precompiled preamble), do so now. 524 if (SkipMainFilePreamble.first > 0) 525 CurLexer->SetByteOffset(SkipMainFilePreamble.first, 526 SkipMainFilePreamble.second); 527 528 // Tell the header info that the main file was entered. If the file is later 529 // #imported, it won't be re-entered. 530 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 531 HeaderInfo.IncrementIncludeCount(FE); 532 } 533 534 // Preprocess Predefines to populate the initial preprocessor state. 535 std::unique_ptr<llvm::MemoryBuffer> SB = 536 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 537 assert(SB && "Cannot create predefined source buffer"); 538 FileID FID = SourceMgr.createFileID(std::move(SB)); 539 assert(FID.isValid() && "Could not create FileID for predefines?"); 540 setPredefinesFileID(FID); 541 542 // Start parsing the predefines. 543 EnterSourceFile(FID, nullptr, SourceLocation()); 544 } 545 546 void Preprocessor::replayPreambleConditionalStack() { 547 // Restore the conditional stack from the preamble, if there is one. 548 if (PreambleConditionalStack.isReplaying()) { 549 assert(CurPPLexer && 550 "CurPPLexer is null when calling replayPreambleConditionalStack."); 551 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); 552 PreambleConditionalStack.doneReplaying(); 553 } 554 } 555 556 void Preprocessor::EndSourceFile() { 557 // Notify the client that we reached the end of the source file. 558 if (Callbacks) 559 Callbacks->EndOfMainFile(); 560 } 561 562 //===----------------------------------------------------------------------===// 563 // Lexer Event Handling. 564 //===----------------------------------------------------------------------===// 565 566 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 567 /// identifier information for the token and install it into the token, 568 /// updating the token kind accordingly. 569 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 570 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 571 572 // Look up this token, see if it is a macro, or if it is a language keyword. 573 IdentifierInfo *II; 574 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 575 // No cleaning needed, just use the characters from the lexed buffer. 576 II = getIdentifierInfo(Identifier.getRawIdentifier()); 577 } else { 578 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 579 SmallString<64> IdentifierBuffer; 580 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 581 582 if (Identifier.hasUCN()) { 583 SmallString<64> UCNIdentifierBuffer; 584 expandUCNs(UCNIdentifierBuffer, CleanedStr); 585 II = getIdentifierInfo(UCNIdentifierBuffer); 586 } else { 587 II = getIdentifierInfo(CleanedStr); 588 } 589 } 590 591 // Update the token info (identifier info and appropriate token kind). 592 Identifier.setIdentifierInfo(II); 593 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && 594 getSourceManager().isInSystemHeader(Identifier.getLocation())) 595 Identifier.setKind(clang::tok::identifier); 596 else 597 Identifier.setKind(II->getTokenID()); 598 599 return II; 600 } 601 602 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 603 PoisonReasons[II] = DiagID; 604 } 605 606 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 607 assert(Ident__exception_code && Ident__exception_info); 608 assert(Ident___exception_code && Ident___exception_info); 609 Ident__exception_code->setIsPoisoned(Poison); 610 Ident___exception_code->setIsPoisoned(Poison); 611 Ident_GetExceptionCode->setIsPoisoned(Poison); 612 Ident__exception_info->setIsPoisoned(Poison); 613 Ident___exception_info->setIsPoisoned(Poison); 614 Ident_GetExceptionInfo->setIsPoisoned(Poison); 615 Ident__abnormal_termination->setIsPoisoned(Poison); 616 Ident___abnormal_termination->setIsPoisoned(Poison); 617 Ident_AbnormalTermination->setIsPoisoned(Poison); 618 } 619 620 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 621 assert(Identifier.getIdentifierInfo() && 622 "Can't handle identifiers without identifier info!"); 623 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 624 PoisonReasons.find(Identifier.getIdentifierInfo()); 625 if(it == PoisonReasons.end()) 626 Diag(Identifier, diag::err_pp_used_poisoned_id); 627 else 628 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 629 } 630 631 /// \brief Returns a diagnostic message kind for reporting a future keyword as 632 /// appropriate for the identifier and specified language. 633 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 634 const LangOptions &LangOpts) { 635 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 636 637 if (LangOpts.CPlusPlus) 638 return llvm::StringSwitch<diag::kind>(II.getName()) 639 #define CXX11_KEYWORD(NAME, FLAGS) \ 640 .Case(#NAME, diag::warn_cxx11_keyword) 641 #define CXX2A_KEYWORD(NAME, FLAGS) \ 642 .Case(#NAME, diag::warn_cxx2a_keyword) 643 #include "clang/Basic/TokenKinds.def" 644 ; 645 646 llvm_unreachable( 647 "Keyword not known to come from a newer Standard or proposed Standard"); 648 } 649 650 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 651 assert(II.isOutOfDate() && "not out of date"); 652 getExternalSource()->updateOutOfDateIdentifier(II); 653 } 654 655 /// HandleIdentifier - This callback is invoked when the lexer reads an 656 /// identifier. This callback looks up the identifier in the map and/or 657 /// potentially macro expands it or turns it into a named token (like 'for'). 658 /// 659 /// Note that callers of this method are guarded by checking the 660 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 661 /// IdentifierInfo methods that compute these properties will need to change to 662 /// match. 663 bool Preprocessor::HandleIdentifier(Token &Identifier) { 664 assert(Identifier.getIdentifierInfo() && 665 "Can't handle identifiers without identifier info!"); 666 667 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 668 669 // If the information about this identifier is out of date, update it from 670 // the external source. 671 // We have to treat __VA_ARGS__ in a special way, since it gets 672 // serialized with isPoisoned = true, but our preprocessor may have 673 // unpoisoned it if we're defining a C99 macro. 674 if (II.isOutOfDate()) { 675 bool CurrentIsPoisoned = false; 676 const bool IsSpecialVariadicMacro = 677 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; 678 if (IsSpecialVariadicMacro) 679 CurrentIsPoisoned = II.isPoisoned(); 680 681 updateOutOfDateIdentifier(II); 682 Identifier.setKind(II.getTokenID()); 683 684 if (IsSpecialVariadicMacro) 685 II.setIsPoisoned(CurrentIsPoisoned); 686 } 687 688 // If this identifier was poisoned, and if it was not produced from a macro 689 // expansion, emit an error. 690 if (II.isPoisoned() && CurPPLexer) { 691 HandlePoisonedIdentifier(Identifier); 692 } 693 694 // If this is a macro to be expanded, do it. 695 if (MacroDefinition MD = getMacroDefinition(&II)) { 696 auto *MI = MD.getMacroInfo(); 697 assert(MI && "macro definition with no macro info?"); 698 if (!DisableMacroExpansion) { 699 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 700 // C99 6.10.3p10: If the preprocessing token immediately after the 701 // macro name isn't a '(', this macro should not be expanded. 702 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 703 return HandleMacroExpandedIdentifier(Identifier, MD); 704 } else { 705 // C99 6.10.3.4p2 says that a disabled macro may never again be 706 // expanded, even if it's in a context where it could be expanded in the 707 // future. 708 Identifier.setFlag(Token::DisableExpand); 709 if (MI->isObjectLike() || isNextPPTokenLParen()) 710 Diag(Identifier, diag::pp_disabled_macro_expansion); 711 } 712 } 713 } 714 715 // If this identifier is a keyword in a newer Standard or proposed Standard, 716 // produce a warning. Don't warn if we're not considering macro expansion, 717 // since this identifier might be the name of a macro. 718 // FIXME: This warning is disabled in cases where it shouldn't be, like 719 // "#define constexpr constexpr", "int constexpr;" 720 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 721 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 722 << II.getName(); 723 // Don't diagnose this keyword again in this translation unit. 724 II.setIsFutureCompatKeyword(false); 725 } 726 727 // If this is an extension token, diagnose its use. 728 // We avoid diagnosing tokens that originate from macro definitions. 729 // FIXME: This warning is disabled in cases where it shouldn't be, 730 // like "#define TY typeof", "TY(1) x". 731 if (II.isExtensionToken() && !DisableMacroExpansion) 732 Diag(Identifier, diag::ext_token_used); 733 734 // If this is the 'import' contextual keyword following an '@', note 735 // that the next token indicates a module name. 736 // 737 // Note that we do not treat 'import' as a contextual 738 // keyword when we're in a caching lexer, because caching lexers only get 739 // used in contexts where import declarations are disallowed. 740 // 741 // Likewise if this is the C++ Modules TS import keyword. 742 if (((LastTokenWasAt && II.isModulesImport()) || 743 Identifier.is(tok::kw_import)) && 744 !InMacroArgs && !DisableMacroExpansion && 745 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 746 CurLexerKind != CLK_CachingLexer) { 747 ModuleImportLoc = Identifier.getLocation(); 748 ModuleImportPath.clear(); 749 ModuleImportExpectsIdentifier = true; 750 CurLexerKind = CLK_LexAfterModuleImport; 751 } 752 return true; 753 } 754 755 void Preprocessor::Lex(Token &Result) { 756 // We loop here until a lex function returns a token; this avoids recursion. 757 bool ReturnedToken; 758 do { 759 switch (CurLexerKind) { 760 case CLK_Lexer: 761 ReturnedToken = CurLexer->Lex(Result); 762 break; 763 case CLK_PTHLexer: 764 ReturnedToken = CurPTHLexer->Lex(Result); 765 break; 766 case CLK_TokenLexer: 767 ReturnedToken = CurTokenLexer->Lex(Result); 768 break; 769 case CLK_CachingLexer: 770 CachingLex(Result); 771 ReturnedToken = true; 772 break; 773 case CLK_LexAfterModuleImport: 774 LexAfterModuleImport(Result); 775 ReturnedToken = true; 776 break; 777 } 778 } while (!ReturnedToken); 779 780 if (Result.is(tok::code_completion)) 781 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 782 783 LastTokenWasAt = Result.is(tok::at); 784 } 785 786 /// \brief Lex a token following the 'import' contextual keyword. 787 /// 788 void Preprocessor::LexAfterModuleImport(Token &Result) { 789 // Figure out what kind of lexer we actually have. 790 recomputeCurLexerKind(); 791 792 // Lex the next token. 793 Lex(Result); 794 795 // The token sequence 796 // 797 // import identifier (. identifier)* 798 // 799 // indicates a module import directive. We already saw the 'import' 800 // contextual keyword, so now we're looking for the identifiers. 801 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 802 // We expected to see an identifier here, and we did; continue handling 803 // identifiers. 804 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 805 Result.getLocation())); 806 ModuleImportExpectsIdentifier = false; 807 CurLexerKind = CLK_LexAfterModuleImport; 808 return; 809 } 810 811 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 812 // see the next identifier. (We can also see a '[[' that begins an 813 // attribute-specifier-seq here under the C++ Modules TS.) 814 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 815 ModuleImportExpectsIdentifier = true; 816 CurLexerKind = CLK_LexAfterModuleImport; 817 return; 818 } 819 820 // If we have a non-empty module path, load the named module. 821 if (!ModuleImportPath.empty()) { 822 // Under the Modules TS, the dot is just part of the module name, and not 823 // a real hierarachy separator. Flatten such module names now. 824 // 825 // FIXME: Is this the right level to be performing this transformation? 826 std::string FlatModuleName; 827 if (getLangOpts().ModulesTS) { 828 for (auto &Piece : ModuleImportPath) { 829 if (!FlatModuleName.empty()) 830 FlatModuleName += "."; 831 FlatModuleName += Piece.first->getName(); 832 } 833 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 834 ModuleImportPath.clear(); 835 ModuleImportPath.push_back( 836 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 837 } 838 839 Module *Imported = nullptr; 840 if (getLangOpts().Modules) { 841 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 842 ModuleImportPath, 843 Module::Hidden, 844 /*IsIncludeDirective=*/false); 845 if (Imported) 846 makeModuleVisible(Imported, ModuleImportLoc); 847 } 848 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 849 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 850 } 851 } 852 853 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 854 CurSubmoduleState->VisibleModules.setVisible( 855 M, Loc, [](Module *) {}, 856 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 857 // FIXME: Include the path in the diagnostic. 858 // FIXME: Include the import location for the conflicting module. 859 Diag(ModuleImportLoc, diag::warn_module_conflict) 860 << Path[0]->getFullModuleName() 861 << Conflict->getFullModuleName() 862 << Message; 863 }); 864 865 // Add this module to the imports list of the currently-built submodule. 866 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 867 BuildingSubmoduleStack.back().M->Imports.insert(M); 868 } 869 870 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 871 const char *DiagnosticTag, 872 bool AllowMacroExpansion) { 873 // We need at least one string literal. 874 if (Result.isNot(tok::string_literal)) { 875 Diag(Result, diag::err_expected_string_literal) 876 << /*Source='in...'*/0 << DiagnosticTag; 877 return false; 878 } 879 880 // Lex string literal tokens, optionally with macro expansion. 881 SmallVector<Token, 4> StrToks; 882 do { 883 StrToks.push_back(Result); 884 885 if (Result.hasUDSuffix()) 886 Diag(Result, diag::err_invalid_string_udl); 887 888 if (AllowMacroExpansion) 889 Lex(Result); 890 else 891 LexUnexpandedToken(Result); 892 } while (Result.is(tok::string_literal)); 893 894 // Concatenate and parse the strings. 895 StringLiteralParser Literal(StrToks, *this); 896 assert(Literal.isAscii() && "Didn't allow wide strings in"); 897 898 if (Literal.hadError) 899 return false; 900 901 if (Literal.Pascal) { 902 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 903 << /*Source='in...'*/0 << DiagnosticTag; 904 return false; 905 } 906 907 String = Literal.GetString(); 908 return true; 909 } 910 911 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 912 assert(Tok.is(tok::numeric_constant)); 913 SmallString<8> IntegerBuffer; 914 bool NumberInvalid = false; 915 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 916 if (NumberInvalid) 917 return false; 918 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 919 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 920 return false; 921 llvm::APInt APVal(64, 0); 922 if (Literal.GetIntegerValue(APVal)) 923 return false; 924 Lex(Tok); 925 Value = APVal.getLimitedValue(); 926 return true; 927 } 928 929 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 930 assert(Handler && "NULL comment handler"); 931 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 932 CommentHandlers.end() && "Comment handler already registered"); 933 CommentHandlers.push_back(Handler); 934 } 935 936 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 937 std::vector<CommentHandler *>::iterator Pos 938 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 939 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 940 CommentHandlers.erase(Pos); 941 } 942 943 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 944 bool AnyPendingTokens = false; 945 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 946 HEnd = CommentHandlers.end(); 947 H != HEnd; ++H) { 948 if ((*H)->HandleComment(*this, Comment)) 949 AnyPendingTokens = true; 950 } 951 if (!AnyPendingTokens || getCommentRetentionState()) 952 return false; 953 Lex(result); 954 return true; 955 } 956 957 ModuleLoader::~ModuleLoader() { } 958 959 CommentHandler::~CommentHandler() { } 960 961 CodeCompletionHandler::~CodeCompletionHandler() { } 962 963 void Preprocessor::createPreprocessingRecord() { 964 if (Record) 965 return; 966 967 Record = new PreprocessingRecord(getSourceManager()); 968 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 969 } 970