1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APInt.h" 47 #include "llvm/ADT/DenseMap.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/SmallVector.h" 50 #include "llvm/ADT/STLExtras.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/StringSwitch.h" 53 #include "llvm/Support/Capacity.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <memory> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 using namespace clang; 65 66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 67 68 //===----------------------------------------------------------------------===// 69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 70 71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 72 DiagnosticsEngine &diags, LangOptions &opts, 73 SourceManager &SM, MemoryBufferCache &PCMCache, 74 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 75 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 76 TranslationUnitKind TUKind) 77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 79 PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)), 80 HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), 81 ExternalSource(nullptr), Identifiers(opts, IILookup), 82 PragmaHandlers(new PragmaNamespace(StringRef())), 83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 84 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 86 CodeCompletionReached(false), CodeCompletionII(nullptr), 87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), 89 CurLexerSubmodule(nullptr), Callbacks(nullptr), 90 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 91 Record(nullptr), MIChainHead(nullptr) { 92 OwnsHeaderSearch = OwnsHeaders; 93 94 CounterValue = 0; // __COUNTER__ starts at 0. 95 96 // Clear stats. 97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 98 NumIf = NumElse = NumEndif = 0; 99 NumEnteredSourceFiles = 0; 100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 102 MaxIncludeStackDepth = 0; 103 NumSkipped = 0; 104 105 // Default to discarding comments. 106 KeepComments = false; 107 KeepMacroComments = false; 108 SuppressIncludeNotFoundError = false; 109 110 // Macro expansion is enabled. 111 DisableMacroExpansion = false; 112 MacroExpansionInDirectivesOverride = false; 113 InMacroArgs = false; 114 InMacroArgPreExpansion = false; 115 NumCachedTokenLexers = 0; 116 PragmasEnabled = true; 117 ParsingIfOrElifDirective = false; 118 PreprocessedOutput = false; 119 120 CachedLexPos = 0; 121 122 // We haven't read anything from the external source. 123 ReadMacrosFromExternalSource = false; 124 125 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 126 // This gets unpoisoned where it is allowed. 127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 129 130 // Initialize the pragma handlers. 131 RegisterBuiltinPragmas(); 132 133 // Initialize builtin macros like __LINE__ and friends. 134 RegisterBuiltinMacros(); 135 136 if(LangOpts.Borland) { 137 Ident__exception_info = getIdentifierInfo("_exception_info"); 138 Ident___exception_info = getIdentifierInfo("__exception_info"); 139 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 140 Ident__exception_code = getIdentifierInfo("_exception_code"); 141 Ident___exception_code = getIdentifierInfo("__exception_code"); 142 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 143 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 144 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 145 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 146 } else { 147 Ident__exception_info = Ident__exception_code = nullptr; 148 Ident__abnormal_termination = Ident___exception_info = nullptr; 149 Ident___exception_code = Ident___abnormal_termination = nullptr; 150 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 151 Ident_AbnormalTermination = nullptr; 152 } 153 154 if (this->PPOpts->GeneratePreamble) 155 PreambleConditionalStack.startRecording(); 156 } 157 158 Preprocessor::~Preprocessor() { 159 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 160 161 IncludeMacroStack.clear(); 162 163 // Destroy any macro definitions. 164 while (MacroInfoChain *I = MIChainHead) { 165 MIChainHead = I->Next; 166 I->~MacroInfoChain(); 167 } 168 169 // Free any cached macro expanders. 170 // This populates MacroArgCache, so all TokenLexers need to be destroyed 171 // before the code below that frees up the MacroArgCache list. 172 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 173 CurTokenLexer.reset(); 174 175 // Free any cached MacroArgs. 176 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 177 ArgList = ArgList->deallocate(); 178 179 // Delete the header search info, if we own it. 180 if (OwnsHeaderSearch) 181 delete &HeaderInfo; 182 } 183 184 void Preprocessor::Initialize(const TargetInfo &Target, 185 const TargetInfo *AuxTarget) { 186 assert((!this->Target || this->Target == &Target) && 187 "Invalid override of target information"); 188 this->Target = &Target; 189 190 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 191 "Invalid override of aux target information."); 192 this->AuxTarget = AuxTarget; 193 194 // Initialize information about built-ins. 195 BuiltinInfo.InitializeTarget(Target, AuxTarget); 196 HeaderInfo.setTarget(Target); 197 } 198 199 void Preprocessor::InitializeForModelFile() { 200 NumEnteredSourceFiles = 0; 201 202 // Reset pragmas 203 PragmaHandlersBackup = std::move(PragmaHandlers); 204 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 205 RegisterBuiltinPragmas(); 206 207 // Reset PredefinesFileID 208 PredefinesFileID = FileID(); 209 } 210 211 void Preprocessor::FinalizeForModelFile() { 212 NumEnteredSourceFiles = 1; 213 214 PragmaHandlers = std::move(PragmaHandlersBackup); 215 } 216 217 void Preprocessor::setPTHManager(PTHManager* pm) { 218 PTH.reset(pm); 219 FileMgr.addStatCache(PTH->createStatCache()); 220 } 221 222 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 223 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 224 << getSpelling(Tok) << "'"; 225 226 if (!DumpFlags) return; 227 228 llvm::errs() << "\t"; 229 if (Tok.isAtStartOfLine()) 230 llvm::errs() << " [StartOfLine]"; 231 if (Tok.hasLeadingSpace()) 232 llvm::errs() << " [LeadingSpace]"; 233 if (Tok.isExpandDisabled()) 234 llvm::errs() << " [ExpandDisabled]"; 235 if (Tok.needsCleaning()) { 236 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 237 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 238 << "']"; 239 } 240 241 llvm::errs() << "\tLoc=<"; 242 DumpLocation(Tok.getLocation()); 243 llvm::errs() << ">"; 244 } 245 246 void Preprocessor::DumpLocation(SourceLocation Loc) const { 247 Loc.dump(SourceMgr); 248 } 249 250 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 251 llvm::errs() << "MACRO: "; 252 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 253 DumpToken(MI.getReplacementToken(i)); 254 llvm::errs() << " "; 255 } 256 llvm::errs() << "\n"; 257 } 258 259 void Preprocessor::PrintStats() { 260 llvm::errs() << "\n*** Preprocessor Stats:\n"; 261 llvm::errs() << NumDirectives << " directives found:\n"; 262 llvm::errs() << " " << NumDefined << " #define.\n"; 263 llvm::errs() << " " << NumUndefined << " #undef.\n"; 264 llvm::errs() << " #include/#include_next/#import:\n"; 265 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 266 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 267 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 268 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 269 llvm::errs() << " " << NumEndif << " #endif.\n"; 270 llvm::errs() << " " << NumPragma << " #pragma.\n"; 271 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 272 273 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 274 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 275 << NumFastMacroExpanded << " on the fast path.\n"; 276 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 277 << " token paste (##) operations performed, " 278 << NumFastTokenPaste << " on the fast path.\n"; 279 280 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 281 282 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 283 llvm::errs() << "\n Macro Expanded Tokens: " 284 << llvm::capacity_in_bytes(MacroExpandedTokens); 285 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 286 // FIXME: List information for all submodules. 287 llvm::errs() << "\n Macros: " 288 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 289 llvm::errs() << "\n #pragma push_macro Info: " 290 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 291 llvm::errs() << "\n Poison Reasons: " 292 << llvm::capacity_in_bytes(PoisonReasons); 293 llvm::errs() << "\n Comment Handlers: " 294 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 295 } 296 297 Preprocessor::macro_iterator 298 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 299 if (IncludeExternalMacros && ExternalSource && 300 !ReadMacrosFromExternalSource) { 301 ReadMacrosFromExternalSource = true; 302 ExternalSource->ReadDefinedMacros(); 303 } 304 305 // Make sure we cover all macros in visible modules. 306 for (const ModuleMacro &Macro : ModuleMacros) 307 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 308 309 return CurSubmoduleState->Macros.begin(); 310 } 311 312 size_t Preprocessor::getTotalMemory() const { 313 return BP.getTotalMemory() 314 + llvm::capacity_in_bytes(MacroExpandedTokens) 315 + Predefines.capacity() /* Predefines buffer. */ 316 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 317 // and ModuleMacros. 318 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 319 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 320 + llvm::capacity_in_bytes(PoisonReasons) 321 + llvm::capacity_in_bytes(CommentHandlers); 322 } 323 324 Preprocessor::macro_iterator 325 Preprocessor::macro_end(bool IncludeExternalMacros) const { 326 if (IncludeExternalMacros && ExternalSource && 327 !ReadMacrosFromExternalSource) { 328 ReadMacrosFromExternalSource = true; 329 ExternalSource->ReadDefinedMacros(); 330 } 331 332 return CurSubmoduleState->Macros.end(); 333 } 334 335 /// \brief Compares macro tokens with a specified token value sequence. 336 static bool MacroDefinitionEquals(const MacroInfo *MI, 337 ArrayRef<TokenValue> Tokens) { 338 return Tokens.size() == MI->getNumTokens() && 339 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 340 } 341 342 StringRef Preprocessor::getLastMacroWithSpelling( 343 SourceLocation Loc, 344 ArrayRef<TokenValue> Tokens) const { 345 SourceLocation BestLocation; 346 StringRef BestSpelling; 347 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 348 I != E; ++I) { 349 const MacroDirective::DefInfo 350 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 351 if (!Def || !Def.getMacroInfo()) 352 continue; 353 if (!Def.getMacroInfo()->isObjectLike()) 354 continue; 355 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 356 continue; 357 SourceLocation Location = Def.getLocation(); 358 // Choose the macro defined latest. 359 if (BestLocation.isInvalid() || 360 (Location.isValid() && 361 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 362 BestLocation = Location; 363 BestSpelling = I->first->getName(); 364 } 365 } 366 return BestSpelling; 367 } 368 369 void Preprocessor::recomputeCurLexerKind() { 370 if (CurLexer) 371 CurLexerKind = CLK_Lexer; 372 else if (CurPTHLexer) 373 CurLexerKind = CLK_PTHLexer; 374 else if (CurTokenLexer) 375 CurLexerKind = CLK_TokenLexer; 376 else 377 CurLexerKind = CLK_CachingLexer; 378 } 379 380 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 381 unsigned CompleteLine, 382 unsigned CompleteColumn) { 383 assert(File); 384 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 385 assert(!CodeCompletionFile && "Already set"); 386 387 using llvm::MemoryBuffer; 388 389 // Load the actual file's contents. 390 bool Invalid = false; 391 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 392 if (Invalid) 393 return true; 394 395 // Find the byte position of the truncation point. 396 const char *Position = Buffer->getBufferStart(); 397 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 398 for (; *Position; ++Position) { 399 if (*Position != '\r' && *Position != '\n') 400 continue; 401 402 // Eat \r\n or \n\r as a single line. 403 if ((Position[1] == '\r' || Position[1] == '\n') && 404 Position[0] != Position[1]) 405 ++Position; 406 ++Position; 407 break; 408 } 409 } 410 411 Position += CompleteColumn - 1; 412 413 // If pointing inside the preamble, adjust the position at the beginning of 414 // the file after the preamble. 415 if (SkipMainFilePreamble.first && 416 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 417 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 418 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 419 } 420 421 if (Position > Buffer->getBufferEnd()) 422 Position = Buffer->getBufferEnd(); 423 424 CodeCompletionFile = File; 425 CodeCompletionOffset = Position - Buffer->getBufferStart(); 426 427 std::unique_ptr<MemoryBuffer> NewBuffer = 428 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 429 Buffer->getBufferIdentifier()); 430 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 431 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 432 *NewPos = '\0'; 433 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 434 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 435 436 return false; 437 } 438 439 void Preprocessor::CodeCompleteNaturalLanguage() { 440 if (CodeComplete) 441 CodeComplete->CodeCompleteNaturalLanguage(); 442 setCodeCompletionReached(); 443 } 444 445 /// getSpelling - This method is used to get the spelling of a token into a 446 /// SmallVector. Note that the returned StringRef may not point to the 447 /// supplied buffer if a copy can be avoided. 448 StringRef Preprocessor::getSpelling(const Token &Tok, 449 SmallVectorImpl<char> &Buffer, 450 bool *Invalid) const { 451 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 452 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 453 // Try the fast path. 454 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 455 return II->getName(); 456 } 457 458 // Resize the buffer if we need to copy into it. 459 if (Tok.needsCleaning()) 460 Buffer.resize(Tok.getLength()); 461 462 const char *Ptr = Buffer.data(); 463 unsigned Len = getSpelling(Tok, Ptr, Invalid); 464 return StringRef(Ptr, Len); 465 } 466 467 /// CreateString - Plop the specified string into a scratch buffer and return a 468 /// location for it. If specified, the source location provides a source 469 /// location for the token. 470 void Preprocessor::CreateString(StringRef Str, Token &Tok, 471 SourceLocation ExpansionLocStart, 472 SourceLocation ExpansionLocEnd) { 473 Tok.setLength(Str.size()); 474 475 const char *DestPtr; 476 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 477 478 if (ExpansionLocStart.isValid()) 479 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 480 ExpansionLocEnd, Str.size()); 481 Tok.setLocation(Loc); 482 483 // If this is a raw identifier or a literal token, set the pointer data. 484 if (Tok.is(tok::raw_identifier)) 485 Tok.setRawIdentifierData(DestPtr); 486 else if (Tok.isLiteral()) 487 Tok.setLiteralData(DestPtr); 488 } 489 490 Module *Preprocessor::getCurrentModule() { 491 if (!getLangOpts().isCompilingModule()) 492 return nullptr; 493 494 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 495 } 496 497 //===----------------------------------------------------------------------===// 498 // Preprocessor Initialization Methods 499 //===----------------------------------------------------------------------===// 500 501 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 502 /// which implicitly adds the builtin defines etc. 503 void Preprocessor::EnterMainSourceFile() { 504 // We do not allow the preprocessor to reenter the main file. Doing so will 505 // cause FileID's to accumulate information from both runs (e.g. #line 506 // information) and predefined macros aren't guaranteed to be set properly. 507 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 508 FileID MainFileID = SourceMgr.getMainFileID(); 509 510 // If MainFileID is loaded it means we loaded an AST file, no need to enter 511 // a main file. 512 if (!SourceMgr.isLoadedFileID(MainFileID)) { 513 // Enter the main file source buffer. 514 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 515 516 // If we've been asked to skip bytes in the main file (e.g., as part of a 517 // precompiled preamble), do so now. 518 if (SkipMainFilePreamble.first > 0) 519 CurLexer->SkipBytes(SkipMainFilePreamble.first, 520 SkipMainFilePreamble.second); 521 522 // Tell the header info that the main file was entered. If the file is later 523 // #imported, it won't be re-entered. 524 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 525 HeaderInfo.IncrementIncludeCount(FE); 526 } 527 528 // Preprocess Predefines to populate the initial preprocessor state. 529 std::unique_ptr<llvm::MemoryBuffer> SB = 530 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 531 assert(SB && "Cannot create predefined source buffer"); 532 FileID FID = SourceMgr.createFileID(std::move(SB)); 533 assert(FID.isValid() && "Could not create FileID for predefines?"); 534 setPredefinesFileID(FID); 535 536 // Start parsing the predefines. 537 EnterSourceFile(FID, nullptr, SourceLocation()); 538 } 539 540 void Preprocessor::replayPreambleConditionalStack() { 541 // Restore the conditional stack from the preamble, if there is one. 542 if (PreambleConditionalStack.isReplaying()) { 543 assert(CurPPLexer && 544 "CurPPLexer is null when calling replayPreambleConditionalStack."); 545 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); 546 PreambleConditionalStack.doneReplaying(); 547 } 548 } 549 550 void Preprocessor::EndSourceFile() { 551 // Notify the client that we reached the end of the source file. 552 if (Callbacks) 553 Callbacks->EndOfMainFile(); 554 } 555 556 //===----------------------------------------------------------------------===// 557 // Lexer Event Handling. 558 //===----------------------------------------------------------------------===// 559 560 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 561 /// identifier information for the token and install it into the token, 562 /// updating the token kind accordingly. 563 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 564 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 565 566 // Look up this token, see if it is a macro, or if it is a language keyword. 567 IdentifierInfo *II; 568 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 569 // No cleaning needed, just use the characters from the lexed buffer. 570 II = getIdentifierInfo(Identifier.getRawIdentifier()); 571 } else { 572 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 573 SmallString<64> IdentifierBuffer; 574 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 575 576 if (Identifier.hasUCN()) { 577 SmallString<64> UCNIdentifierBuffer; 578 expandUCNs(UCNIdentifierBuffer, CleanedStr); 579 II = getIdentifierInfo(UCNIdentifierBuffer); 580 } else { 581 II = getIdentifierInfo(CleanedStr); 582 } 583 } 584 585 // Update the token info (identifier info and appropriate token kind). 586 Identifier.setIdentifierInfo(II); 587 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && 588 getSourceManager().isInSystemHeader(Identifier.getLocation())) 589 Identifier.setKind(clang::tok::identifier); 590 else 591 Identifier.setKind(II->getTokenID()); 592 593 return II; 594 } 595 596 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 597 PoisonReasons[II] = DiagID; 598 } 599 600 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 601 assert(Ident__exception_code && Ident__exception_info); 602 assert(Ident___exception_code && Ident___exception_info); 603 Ident__exception_code->setIsPoisoned(Poison); 604 Ident___exception_code->setIsPoisoned(Poison); 605 Ident_GetExceptionCode->setIsPoisoned(Poison); 606 Ident__exception_info->setIsPoisoned(Poison); 607 Ident___exception_info->setIsPoisoned(Poison); 608 Ident_GetExceptionInfo->setIsPoisoned(Poison); 609 Ident__abnormal_termination->setIsPoisoned(Poison); 610 Ident___abnormal_termination->setIsPoisoned(Poison); 611 Ident_AbnormalTermination->setIsPoisoned(Poison); 612 } 613 614 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 615 assert(Identifier.getIdentifierInfo() && 616 "Can't handle identifiers without identifier info!"); 617 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 618 PoisonReasons.find(Identifier.getIdentifierInfo()); 619 if(it == PoisonReasons.end()) 620 Diag(Identifier, diag::err_pp_used_poisoned_id); 621 else 622 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 623 } 624 625 /// \brief Returns a diagnostic message kind for reporting a future keyword as 626 /// appropriate for the identifier and specified language. 627 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 628 const LangOptions &LangOpts) { 629 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 630 631 if (LangOpts.CPlusPlus) 632 return llvm::StringSwitch<diag::kind>(II.getName()) 633 #define CXX11_KEYWORD(NAME, FLAGS) \ 634 .Case(#NAME, diag::warn_cxx11_keyword) 635 #define CXX2A_KEYWORD(NAME, FLAGS) \ 636 .Case(#NAME, diag::warn_cxx2a_keyword) 637 #include "clang/Basic/TokenKinds.def" 638 ; 639 640 llvm_unreachable( 641 "Keyword not known to come from a newer Standard or proposed Standard"); 642 } 643 644 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 645 assert(II.isOutOfDate() && "not out of date"); 646 getExternalSource()->updateOutOfDateIdentifier(II); 647 } 648 649 /// HandleIdentifier - This callback is invoked when the lexer reads an 650 /// identifier. This callback looks up the identifier in the map and/or 651 /// potentially macro expands it or turns it into a named token (like 'for'). 652 /// 653 /// Note that callers of this method are guarded by checking the 654 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 655 /// IdentifierInfo methods that compute these properties will need to change to 656 /// match. 657 bool Preprocessor::HandleIdentifier(Token &Identifier) { 658 assert(Identifier.getIdentifierInfo() && 659 "Can't handle identifiers without identifier info!"); 660 661 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 662 663 // If the information about this identifier is out of date, update it from 664 // the external source. 665 // We have to treat __VA_ARGS__ in a special way, since it gets 666 // serialized with isPoisoned = true, but our preprocessor may have 667 // unpoisoned it if we're defining a C99 macro. 668 if (II.isOutOfDate()) { 669 bool CurrentIsPoisoned = false; 670 if (&II == Ident__VA_ARGS__) 671 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 672 673 updateOutOfDateIdentifier(II); 674 Identifier.setKind(II.getTokenID()); 675 676 if (&II == Ident__VA_ARGS__) 677 II.setIsPoisoned(CurrentIsPoisoned); 678 } 679 680 // If this identifier was poisoned, and if it was not produced from a macro 681 // expansion, emit an error. 682 if (II.isPoisoned() && CurPPLexer) { 683 HandlePoisonedIdentifier(Identifier); 684 } 685 686 // If this is a macro to be expanded, do it. 687 if (MacroDefinition MD = getMacroDefinition(&II)) { 688 auto *MI = MD.getMacroInfo(); 689 assert(MI && "macro definition with no macro info?"); 690 if (!DisableMacroExpansion) { 691 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 692 // C99 6.10.3p10: If the preprocessing token immediately after the 693 // macro name isn't a '(', this macro should not be expanded. 694 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 695 return HandleMacroExpandedIdentifier(Identifier, MD); 696 } else { 697 // C99 6.10.3.4p2 says that a disabled macro may never again be 698 // expanded, even if it's in a context where it could be expanded in the 699 // future. 700 Identifier.setFlag(Token::DisableExpand); 701 if (MI->isObjectLike() || isNextPPTokenLParen()) 702 Diag(Identifier, diag::pp_disabled_macro_expansion); 703 } 704 } 705 } 706 707 // If this identifier is a keyword in a newer Standard or proposed Standard, 708 // produce a warning. Don't warn if we're not considering macro expansion, 709 // since this identifier might be the name of a macro. 710 // FIXME: This warning is disabled in cases where it shouldn't be, like 711 // "#define constexpr constexpr", "int constexpr;" 712 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 713 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 714 << II.getName(); 715 // Don't diagnose this keyword again in this translation unit. 716 II.setIsFutureCompatKeyword(false); 717 } 718 719 // If this is an extension token, diagnose its use. 720 // We avoid diagnosing tokens that originate from macro definitions. 721 // FIXME: This warning is disabled in cases where it shouldn't be, 722 // like "#define TY typeof", "TY(1) x". 723 if (II.isExtensionToken() && !DisableMacroExpansion) 724 Diag(Identifier, diag::ext_token_used); 725 726 // If this is the 'import' contextual keyword following an '@', note 727 // that the next token indicates a module name. 728 // 729 // Note that we do not treat 'import' as a contextual 730 // keyword when we're in a caching lexer, because caching lexers only get 731 // used in contexts where import declarations are disallowed. 732 // 733 // Likewise if this is the C++ Modules TS import keyword. 734 if (((LastTokenWasAt && II.isModulesImport()) || 735 Identifier.is(tok::kw_import)) && 736 !InMacroArgs && !DisableMacroExpansion && 737 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 738 CurLexerKind != CLK_CachingLexer) { 739 ModuleImportLoc = Identifier.getLocation(); 740 ModuleImportPath.clear(); 741 ModuleImportExpectsIdentifier = true; 742 CurLexerKind = CLK_LexAfterModuleImport; 743 } 744 return true; 745 } 746 747 void Preprocessor::Lex(Token &Result) { 748 // We loop here until a lex function returns a token; this avoids recursion. 749 bool ReturnedToken; 750 do { 751 switch (CurLexerKind) { 752 case CLK_Lexer: 753 ReturnedToken = CurLexer->Lex(Result); 754 break; 755 case CLK_PTHLexer: 756 ReturnedToken = CurPTHLexer->Lex(Result); 757 break; 758 case CLK_TokenLexer: 759 ReturnedToken = CurTokenLexer->Lex(Result); 760 break; 761 case CLK_CachingLexer: 762 CachingLex(Result); 763 ReturnedToken = true; 764 break; 765 case CLK_LexAfterModuleImport: 766 LexAfterModuleImport(Result); 767 ReturnedToken = true; 768 break; 769 } 770 } while (!ReturnedToken); 771 772 if (Result.is(tok::code_completion)) 773 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 774 775 LastTokenWasAt = Result.is(tok::at); 776 } 777 778 /// \brief Lex a token following the 'import' contextual keyword. 779 /// 780 void Preprocessor::LexAfterModuleImport(Token &Result) { 781 // Figure out what kind of lexer we actually have. 782 recomputeCurLexerKind(); 783 784 // Lex the next token. 785 Lex(Result); 786 787 // The token sequence 788 // 789 // import identifier (. identifier)* 790 // 791 // indicates a module import directive. We already saw the 'import' 792 // contextual keyword, so now we're looking for the identifiers. 793 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 794 // We expected to see an identifier here, and we did; continue handling 795 // identifiers. 796 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 797 Result.getLocation())); 798 ModuleImportExpectsIdentifier = false; 799 CurLexerKind = CLK_LexAfterModuleImport; 800 return; 801 } 802 803 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 804 // see the next identifier. (We can also see a '[[' that begins an 805 // attribute-specifier-seq here under the C++ Modules TS.) 806 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 807 ModuleImportExpectsIdentifier = true; 808 CurLexerKind = CLK_LexAfterModuleImport; 809 return; 810 } 811 812 // If we have a non-empty module path, load the named module. 813 if (!ModuleImportPath.empty()) { 814 // Under the Modules TS, the dot is just part of the module name, and not 815 // a real hierarachy separator. Flatten such module names now. 816 // 817 // FIXME: Is this the right level to be performing this transformation? 818 std::string FlatModuleName; 819 if (getLangOpts().ModulesTS) { 820 for (auto &Piece : ModuleImportPath) { 821 if (!FlatModuleName.empty()) 822 FlatModuleName += "."; 823 FlatModuleName += Piece.first->getName(); 824 } 825 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 826 ModuleImportPath.clear(); 827 ModuleImportPath.push_back( 828 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 829 } 830 831 Module *Imported = nullptr; 832 if (getLangOpts().Modules) { 833 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 834 ModuleImportPath, 835 Module::Hidden, 836 /*IsIncludeDirective=*/false); 837 if (Imported) 838 makeModuleVisible(Imported, ModuleImportLoc); 839 } 840 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 841 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 842 } 843 } 844 845 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 846 CurSubmoduleState->VisibleModules.setVisible( 847 M, Loc, [](Module *) {}, 848 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 849 // FIXME: Include the path in the diagnostic. 850 // FIXME: Include the import location for the conflicting module. 851 Diag(ModuleImportLoc, diag::warn_module_conflict) 852 << Path[0]->getFullModuleName() 853 << Conflict->getFullModuleName() 854 << Message; 855 }); 856 857 // Add this module to the imports list of the currently-built submodule. 858 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 859 BuildingSubmoduleStack.back().M->Imports.insert(M); 860 } 861 862 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 863 const char *DiagnosticTag, 864 bool AllowMacroExpansion) { 865 // We need at least one string literal. 866 if (Result.isNot(tok::string_literal)) { 867 Diag(Result, diag::err_expected_string_literal) 868 << /*Source='in...'*/0 << DiagnosticTag; 869 return false; 870 } 871 872 // Lex string literal tokens, optionally with macro expansion. 873 SmallVector<Token, 4> StrToks; 874 do { 875 StrToks.push_back(Result); 876 877 if (Result.hasUDSuffix()) 878 Diag(Result, diag::err_invalid_string_udl); 879 880 if (AllowMacroExpansion) 881 Lex(Result); 882 else 883 LexUnexpandedToken(Result); 884 } while (Result.is(tok::string_literal)); 885 886 // Concatenate and parse the strings. 887 StringLiteralParser Literal(StrToks, *this); 888 assert(Literal.isAscii() && "Didn't allow wide strings in"); 889 890 if (Literal.hadError) 891 return false; 892 893 if (Literal.Pascal) { 894 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 895 << /*Source='in...'*/0 << DiagnosticTag; 896 return false; 897 } 898 899 String = Literal.GetString(); 900 return true; 901 } 902 903 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 904 assert(Tok.is(tok::numeric_constant)); 905 SmallString<8> IntegerBuffer; 906 bool NumberInvalid = false; 907 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 908 if (NumberInvalid) 909 return false; 910 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 911 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 912 return false; 913 llvm::APInt APVal(64, 0); 914 if (Literal.GetIntegerValue(APVal)) 915 return false; 916 Lex(Tok); 917 Value = APVal.getLimitedValue(); 918 return true; 919 } 920 921 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 922 assert(Handler && "NULL comment handler"); 923 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 924 CommentHandlers.end() && "Comment handler already registered"); 925 CommentHandlers.push_back(Handler); 926 } 927 928 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 929 std::vector<CommentHandler *>::iterator Pos 930 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 931 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 932 CommentHandlers.erase(Pos); 933 } 934 935 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 936 bool AnyPendingTokens = false; 937 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 938 HEnd = CommentHandlers.end(); 939 H != HEnd; ++H) { 940 if ((*H)->HandleComment(*this, Comment)) 941 AnyPendingTokens = true; 942 } 943 if (!AnyPendingTokens || getCommentRetentionState()) 944 return false; 945 Lex(result); 946 return true; 947 } 948 949 ModuleLoader::~ModuleLoader() { } 950 951 CommentHandler::~CommentHandler() { } 952 953 CodeCompletionHandler::~CodeCompletionHandler() { } 954 955 void Preprocessor::createPreprocessingRecord() { 956 if (Record) 957 return; 958 959 Record = new PreprocessingRecord(getSourceManager()); 960 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 961 } 962