1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/ADT/StringSwitch.h" 50 #include "llvm/Support/Capacity.h" 51 #include "llvm/Support/ConvertUTF.h" 52 #include "llvm/Support/MemoryBuffer.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <utility> 55 using namespace clang; 56 57 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 58 59 //===----------------------------------------------------------------------===// 60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 61 62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 63 DiagnosticsEngine &diags, LangOptions &opts, 64 SourceManager &SM, HeaderSearch &Headers, 65 ModuleLoader &TheModuleLoader, 66 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 67 TranslationUnitKind TUKind) 68 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 69 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 70 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 71 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 72 Identifiers(opts, IILookup), 73 PragmaHandlers(new PragmaNamespace(StringRef())), 74 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 75 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 76 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 77 CodeCompletionReached(0), CodeCompletionII(0), MainFileDir(nullptr), 78 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), 79 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), 80 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 81 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 82 OwnsHeaderSearch = OwnsHeaders; 83 84 CounterValue = 0; // __COUNTER__ starts at 0. 85 86 // Clear stats. 87 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 88 NumIf = NumElse = NumEndif = 0; 89 NumEnteredSourceFiles = 0; 90 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 91 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 92 MaxIncludeStackDepth = 0; 93 NumSkipped = 0; 94 95 // Default to discarding comments. 96 KeepComments = false; 97 KeepMacroComments = false; 98 SuppressIncludeNotFoundError = false; 99 100 // Macro expansion is enabled. 101 DisableMacroExpansion = false; 102 MacroExpansionInDirectivesOverride = false; 103 InMacroArgs = false; 104 InMacroArgPreExpansion = false; 105 NumCachedTokenLexers = 0; 106 PragmasEnabled = true; 107 ParsingIfOrElifDirective = false; 108 PreprocessedOutput = false; 109 110 CachedLexPos = 0; 111 112 // We haven't read anything from the external source. 113 ReadMacrosFromExternalSource = false; 114 115 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 116 // This gets unpoisoned where it is allowed. 117 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 118 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 119 120 // Initialize the pragma handlers. 121 RegisterBuiltinPragmas(); 122 123 // Initialize builtin macros like __LINE__ and friends. 124 RegisterBuiltinMacros(); 125 126 if(LangOpts.Borland) { 127 Ident__exception_info = getIdentifierInfo("_exception_info"); 128 Ident___exception_info = getIdentifierInfo("__exception_info"); 129 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 130 Ident__exception_code = getIdentifierInfo("_exception_code"); 131 Ident___exception_code = getIdentifierInfo("__exception_code"); 132 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 133 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 134 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 135 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 136 } else { 137 Ident__exception_info = Ident__exception_code = nullptr; 138 Ident__abnormal_termination = Ident___exception_info = nullptr; 139 Ident___exception_code = Ident___abnormal_termination = nullptr; 140 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 141 Ident_AbnormalTermination = nullptr; 142 } 143 } 144 145 Preprocessor::~Preprocessor() { 146 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 147 148 IncludeMacroStack.clear(); 149 150 // Destroy any macro definitions. 151 while (MacroInfoChain *I = MIChainHead) { 152 MIChainHead = I->Next; 153 I->~MacroInfoChain(); 154 } 155 156 // Free any cached macro expanders. 157 // This populates MacroArgCache, so all TokenLexers need to be destroyed 158 // before the code below that frees up the MacroArgCache list. 159 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 160 CurTokenLexer.reset(); 161 162 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 163 DeserialMIChainHead = I->Next; 164 I->~DeserializedMacroInfoChain(); 165 } 166 167 // Free any cached MacroArgs. 168 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 169 ArgList = ArgList->deallocate(); 170 171 // Delete the header search info, if we own it. 172 if (OwnsHeaderSearch) 173 delete &HeaderInfo; 174 } 175 176 void Preprocessor::Initialize(const TargetInfo &Target, 177 const TargetInfo *AuxTarget) { 178 assert((!this->Target || this->Target == &Target) && 179 "Invalid override of target information"); 180 this->Target = &Target; 181 182 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 183 "Invalid override of aux target information."); 184 this->AuxTarget = AuxTarget; 185 186 // Initialize information about built-ins. 187 BuiltinInfo.InitializeTarget(Target, AuxTarget); 188 HeaderInfo.setTarget(Target); 189 } 190 191 void Preprocessor::InitializeForModelFile() { 192 NumEnteredSourceFiles = 0; 193 194 // Reset pragmas 195 PragmaHandlersBackup = std::move(PragmaHandlers); 196 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 197 RegisterBuiltinPragmas(); 198 199 // Reset PredefinesFileID 200 PredefinesFileID = FileID(); 201 } 202 203 void Preprocessor::FinalizeForModelFile() { 204 NumEnteredSourceFiles = 1; 205 206 PragmaHandlers = std::move(PragmaHandlersBackup); 207 } 208 209 void Preprocessor::setPTHManager(PTHManager* pm) { 210 PTH.reset(pm); 211 FileMgr.addStatCache(PTH->createStatCache()); 212 } 213 214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 215 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 216 << getSpelling(Tok) << "'"; 217 218 if (!DumpFlags) return; 219 220 llvm::errs() << "\t"; 221 if (Tok.isAtStartOfLine()) 222 llvm::errs() << " [StartOfLine]"; 223 if (Tok.hasLeadingSpace()) 224 llvm::errs() << " [LeadingSpace]"; 225 if (Tok.isExpandDisabled()) 226 llvm::errs() << " [ExpandDisabled]"; 227 if (Tok.needsCleaning()) { 228 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 229 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 230 << "']"; 231 } 232 233 llvm::errs() << "\tLoc=<"; 234 DumpLocation(Tok.getLocation()); 235 llvm::errs() << ">"; 236 } 237 238 void Preprocessor::DumpLocation(SourceLocation Loc) const { 239 Loc.dump(SourceMgr); 240 } 241 242 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 243 llvm::errs() << "MACRO: "; 244 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 245 DumpToken(MI.getReplacementToken(i)); 246 llvm::errs() << " "; 247 } 248 llvm::errs() << "\n"; 249 } 250 251 void Preprocessor::PrintStats() { 252 llvm::errs() << "\n*** Preprocessor Stats:\n"; 253 llvm::errs() << NumDirectives << " directives found:\n"; 254 llvm::errs() << " " << NumDefined << " #define.\n"; 255 llvm::errs() << " " << NumUndefined << " #undef.\n"; 256 llvm::errs() << " #include/#include_next/#import:\n"; 257 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 258 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 259 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 260 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 261 llvm::errs() << " " << NumEndif << " #endif.\n"; 262 llvm::errs() << " " << NumPragma << " #pragma.\n"; 263 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 264 265 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 266 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 267 << NumFastMacroExpanded << " on the fast path.\n"; 268 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 269 << " token paste (##) operations performed, " 270 << NumFastTokenPaste << " on the fast path.\n"; 271 272 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 273 274 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 275 llvm::errs() << "\n Macro Expanded Tokens: " 276 << llvm::capacity_in_bytes(MacroExpandedTokens); 277 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 278 // FIXME: List information for all submodules. 279 llvm::errs() << "\n Macros: " 280 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 281 llvm::errs() << "\n #pragma push_macro Info: " 282 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 283 llvm::errs() << "\n Poison Reasons: " 284 << llvm::capacity_in_bytes(PoisonReasons); 285 llvm::errs() << "\n Comment Handlers: " 286 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 287 } 288 289 Preprocessor::macro_iterator 290 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 291 if (IncludeExternalMacros && ExternalSource && 292 !ReadMacrosFromExternalSource) { 293 ReadMacrosFromExternalSource = true; 294 ExternalSource->ReadDefinedMacros(); 295 } 296 297 // Make sure we cover all macros in visible modules. 298 for (const ModuleMacro &Macro : ModuleMacros) 299 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 300 301 return CurSubmoduleState->Macros.begin(); 302 } 303 304 size_t Preprocessor::getTotalMemory() const { 305 return BP.getTotalMemory() 306 + llvm::capacity_in_bytes(MacroExpandedTokens) 307 + Predefines.capacity() /* Predefines buffer. */ 308 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 309 // and ModuleMacros. 310 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 311 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 312 + llvm::capacity_in_bytes(PoisonReasons) 313 + llvm::capacity_in_bytes(CommentHandlers); 314 } 315 316 Preprocessor::macro_iterator 317 Preprocessor::macro_end(bool IncludeExternalMacros) const { 318 if (IncludeExternalMacros && ExternalSource && 319 !ReadMacrosFromExternalSource) { 320 ReadMacrosFromExternalSource = true; 321 ExternalSource->ReadDefinedMacros(); 322 } 323 324 return CurSubmoduleState->Macros.end(); 325 } 326 327 /// \brief Compares macro tokens with a specified token value sequence. 328 static bool MacroDefinitionEquals(const MacroInfo *MI, 329 ArrayRef<TokenValue> Tokens) { 330 return Tokens.size() == MI->getNumTokens() && 331 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 332 } 333 334 StringRef Preprocessor::getLastMacroWithSpelling( 335 SourceLocation Loc, 336 ArrayRef<TokenValue> Tokens) const { 337 SourceLocation BestLocation; 338 StringRef BestSpelling; 339 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 340 I != E; ++I) { 341 const MacroDirective::DefInfo 342 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 343 if (!Def || !Def.getMacroInfo()) 344 continue; 345 if (!Def.getMacroInfo()->isObjectLike()) 346 continue; 347 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 348 continue; 349 SourceLocation Location = Def.getLocation(); 350 // Choose the macro defined latest. 351 if (BestLocation.isInvalid() || 352 (Location.isValid() && 353 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 354 BestLocation = Location; 355 BestSpelling = I->first->getName(); 356 } 357 } 358 return BestSpelling; 359 } 360 361 void Preprocessor::recomputeCurLexerKind() { 362 if (CurLexer) 363 CurLexerKind = CLK_Lexer; 364 else if (CurPTHLexer) 365 CurLexerKind = CLK_PTHLexer; 366 else if (CurTokenLexer) 367 CurLexerKind = CLK_TokenLexer; 368 else 369 CurLexerKind = CLK_CachingLexer; 370 } 371 372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 373 unsigned CompleteLine, 374 unsigned CompleteColumn) { 375 assert(File); 376 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 377 assert(!CodeCompletionFile && "Already set"); 378 379 using llvm::MemoryBuffer; 380 381 // Load the actual file's contents. 382 bool Invalid = false; 383 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 384 if (Invalid) 385 return true; 386 387 // Find the byte position of the truncation point. 388 const char *Position = Buffer->getBufferStart(); 389 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 390 for (; *Position; ++Position) { 391 if (*Position != '\r' && *Position != '\n') 392 continue; 393 394 // Eat \r\n or \n\r as a single line. 395 if ((Position[1] == '\r' || Position[1] == '\n') && 396 Position[0] != Position[1]) 397 ++Position; 398 ++Position; 399 break; 400 } 401 } 402 403 Position += CompleteColumn - 1; 404 405 // If pointing inside the preamble, adjust the position at the beginning of 406 // the file after the preamble. 407 if (SkipMainFilePreamble.first && 408 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 409 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 410 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 411 } 412 413 if (Position > Buffer->getBufferEnd()) 414 Position = Buffer->getBufferEnd(); 415 416 CodeCompletionFile = File; 417 CodeCompletionOffset = Position - Buffer->getBufferStart(); 418 419 std::unique_ptr<MemoryBuffer> NewBuffer = 420 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 421 Buffer->getBufferIdentifier()); 422 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 423 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 424 *NewPos = '\0'; 425 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 426 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 427 428 return false; 429 } 430 431 void Preprocessor::CodeCompleteNaturalLanguage() { 432 if (CodeComplete) 433 CodeComplete->CodeCompleteNaturalLanguage(); 434 setCodeCompletionReached(); 435 } 436 437 /// getSpelling - This method is used to get the spelling of a token into a 438 /// SmallVector. Note that the returned StringRef may not point to the 439 /// supplied buffer if a copy can be avoided. 440 StringRef Preprocessor::getSpelling(const Token &Tok, 441 SmallVectorImpl<char> &Buffer, 442 bool *Invalid) const { 443 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 444 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 445 // Try the fast path. 446 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 447 return II->getName(); 448 } 449 450 // Resize the buffer if we need to copy into it. 451 if (Tok.needsCleaning()) 452 Buffer.resize(Tok.getLength()); 453 454 const char *Ptr = Buffer.data(); 455 unsigned Len = getSpelling(Tok, Ptr, Invalid); 456 return StringRef(Ptr, Len); 457 } 458 459 /// CreateString - Plop the specified string into a scratch buffer and return a 460 /// location for it. If specified, the source location provides a source 461 /// location for the token. 462 void Preprocessor::CreateString(StringRef Str, Token &Tok, 463 SourceLocation ExpansionLocStart, 464 SourceLocation ExpansionLocEnd) { 465 Tok.setLength(Str.size()); 466 467 const char *DestPtr; 468 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 469 470 if (ExpansionLocStart.isValid()) 471 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 472 ExpansionLocEnd, Str.size()); 473 Tok.setLocation(Loc); 474 475 // If this is a raw identifier or a literal token, set the pointer data. 476 if (Tok.is(tok::raw_identifier)) 477 Tok.setRawIdentifierData(DestPtr); 478 else if (Tok.isLiteral()) 479 Tok.setLiteralData(DestPtr); 480 } 481 482 Module *Preprocessor::getCurrentModule() { 483 if (!getLangOpts().CompilingModule) 484 return nullptr; 485 486 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 487 } 488 489 //===----------------------------------------------------------------------===// 490 // Preprocessor Initialization Methods 491 //===----------------------------------------------------------------------===// 492 493 494 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 495 /// which implicitly adds the builtin defines etc. 496 void Preprocessor::EnterMainSourceFile() { 497 // We do not allow the preprocessor to reenter the main file. Doing so will 498 // cause FileID's to accumulate information from both runs (e.g. #line 499 // information) and predefined macros aren't guaranteed to be set properly. 500 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 501 FileID MainFileID = SourceMgr.getMainFileID(); 502 503 // If MainFileID is loaded it means we loaded an AST file, no need to enter 504 // a main file. 505 if (!SourceMgr.isLoadedFileID(MainFileID)) { 506 // Enter the main file source buffer. 507 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 508 509 // If we've been asked to skip bytes in the main file (e.g., as part of a 510 // precompiled preamble), do so now. 511 if (SkipMainFilePreamble.first > 0) 512 CurLexer->SkipBytes(SkipMainFilePreamble.first, 513 SkipMainFilePreamble.second); 514 515 // Tell the header info that the main file was entered. If the file is later 516 // #imported, it won't be re-entered. 517 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 518 HeaderInfo.IncrementIncludeCount(FE); 519 } 520 521 // Preprocess Predefines to populate the initial preprocessor state. 522 std::unique_ptr<llvm::MemoryBuffer> SB = 523 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 524 assert(SB && "Cannot create predefined source buffer"); 525 FileID FID = SourceMgr.createFileID(std::move(SB)); 526 assert(FID.isValid() && "Could not create FileID for predefines?"); 527 setPredefinesFileID(FID); 528 529 // Start parsing the predefines. 530 EnterSourceFile(FID, nullptr, SourceLocation()); 531 } 532 533 void Preprocessor::EndSourceFile() { 534 // Notify the client that we reached the end of the source file. 535 if (Callbacks) 536 Callbacks->EndOfMainFile(); 537 } 538 539 //===----------------------------------------------------------------------===// 540 // Lexer Event Handling. 541 //===----------------------------------------------------------------------===// 542 543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 544 /// identifier information for the token and install it into the token, 545 /// updating the token kind accordingly. 546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 547 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 548 549 // Look up this token, see if it is a macro, or if it is a language keyword. 550 IdentifierInfo *II; 551 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 552 // No cleaning needed, just use the characters from the lexed buffer. 553 II = getIdentifierInfo(Identifier.getRawIdentifier()); 554 } else { 555 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 556 SmallString<64> IdentifierBuffer; 557 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 558 559 if (Identifier.hasUCN()) { 560 SmallString<64> UCNIdentifierBuffer; 561 expandUCNs(UCNIdentifierBuffer, CleanedStr); 562 II = getIdentifierInfo(UCNIdentifierBuffer); 563 } else { 564 II = getIdentifierInfo(CleanedStr); 565 } 566 } 567 568 // Update the token info (identifier info and appropriate token kind). 569 Identifier.setIdentifierInfo(II); 570 Identifier.setKind(II->getTokenID()); 571 572 return II; 573 } 574 575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 576 PoisonReasons[II] = DiagID; 577 } 578 579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 580 assert(Ident__exception_code && Ident__exception_info); 581 assert(Ident___exception_code && Ident___exception_info); 582 Ident__exception_code->setIsPoisoned(Poison); 583 Ident___exception_code->setIsPoisoned(Poison); 584 Ident_GetExceptionCode->setIsPoisoned(Poison); 585 Ident__exception_info->setIsPoisoned(Poison); 586 Ident___exception_info->setIsPoisoned(Poison); 587 Ident_GetExceptionInfo->setIsPoisoned(Poison); 588 Ident__abnormal_termination->setIsPoisoned(Poison); 589 Ident___abnormal_termination->setIsPoisoned(Poison); 590 Ident_AbnormalTermination->setIsPoisoned(Poison); 591 } 592 593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 594 assert(Identifier.getIdentifierInfo() && 595 "Can't handle identifiers without identifier info!"); 596 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 597 PoisonReasons.find(Identifier.getIdentifierInfo()); 598 if(it == PoisonReasons.end()) 599 Diag(Identifier, diag::err_pp_used_poisoned_id); 600 else 601 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 602 } 603 604 /// \brief Returns a diagnostic message kind for reporting a future keyword as 605 /// appropriate for the identifier and specified language. 606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 607 const LangOptions &LangOpts) { 608 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 609 610 if (LangOpts.CPlusPlus) 611 return llvm::StringSwitch<diag::kind>(II.getName()) 612 #define CXX11_KEYWORD(NAME, FLAGS) \ 613 .Case(#NAME, diag::warn_cxx11_keyword) 614 #include "clang/Basic/TokenKinds.def" 615 ; 616 617 llvm_unreachable( 618 "Keyword not known to come from a newer Standard or proposed Standard"); 619 } 620 621 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 622 assert(II.isOutOfDate() && "not out of date"); 623 getExternalSource()->updateOutOfDateIdentifier(II); 624 } 625 626 /// HandleIdentifier - This callback is invoked when the lexer reads an 627 /// identifier. This callback looks up the identifier in the map and/or 628 /// potentially macro expands it or turns it into a named token (like 'for'). 629 /// 630 /// Note that callers of this method are guarded by checking the 631 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 632 /// IdentifierInfo methods that compute these properties will need to change to 633 /// match. 634 bool Preprocessor::HandleIdentifier(Token &Identifier) { 635 assert(Identifier.getIdentifierInfo() && 636 "Can't handle identifiers without identifier info!"); 637 638 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 639 640 // If the information about this identifier is out of date, update it from 641 // the external source. 642 // We have to treat __VA_ARGS__ in a special way, since it gets 643 // serialized with isPoisoned = true, but our preprocessor may have 644 // unpoisoned it if we're defining a C99 macro. 645 if (II.isOutOfDate()) { 646 bool CurrentIsPoisoned = false; 647 if (&II == Ident__VA_ARGS__) 648 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 649 650 updateOutOfDateIdentifier(II); 651 Identifier.setKind(II.getTokenID()); 652 653 if (&II == Ident__VA_ARGS__) 654 II.setIsPoisoned(CurrentIsPoisoned); 655 } 656 657 // If this identifier was poisoned, and if it was not produced from a macro 658 // expansion, emit an error. 659 if (II.isPoisoned() && CurPPLexer) { 660 HandlePoisonedIdentifier(Identifier); 661 } 662 663 // If this is a macro to be expanded, do it. 664 if (MacroDefinition MD = getMacroDefinition(&II)) { 665 auto *MI = MD.getMacroInfo(); 666 assert(MI && "macro definition with no macro info?"); 667 if (!DisableMacroExpansion) { 668 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 669 // C99 6.10.3p10: If the preprocessing token immediately after the 670 // macro name isn't a '(', this macro should not be expanded. 671 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 672 return HandleMacroExpandedIdentifier(Identifier, MD); 673 } else { 674 // C99 6.10.3.4p2 says that a disabled macro may never again be 675 // expanded, even if it's in a context where it could be expanded in the 676 // future. 677 Identifier.setFlag(Token::DisableExpand); 678 if (MI->isObjectLike() || isNextPPTokenLParen()) 679 Diag(Identifier, diag::pp_disabled_macro_expansion); 680 } 681 } 682 } 683 684 // If this identifier is a keyword in a newer Standard or proposed Standard, 685 // produce a warning. Don't warn if we're not considering macro expansion, 686 // since this identifier might be the name of a macro. 687 // FIXME: This warning is disabled in cases where it shouldn't be, like 688 // "#define constexpr constexpr", "int constexpr;" 689 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 690 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 691 << II.getName(); 692 // Don't diagnose this keyword again in this translation unit. 693 II.setIsFutureCompatKeyword(false); 694 } 695 696 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 697 // then we act as if it is the actual operator and not the textual 698 // representation of it. 699 if (II.isCPlusPlusOperatorKeyword()) 700 Identifier.setIdentifierInfo(nullptr); 701 702 // If this is an extension token, diagnose its use. 703 // We avoid diagnosing tokens that originate from macro definitions. 704 // FIXME: This warning is disabled in cases where it shouldn't be, 705 // like "#define TY typeof", "TY(1) x". 706 if (II.isExtensionToken() && !DisableMacroExpansion) 707 Diag(Identifier, diag::ext_token_used); 708 709 // If this is the 'import' contextual keyword following an '@', note 710 // that the next token indicates a module name. 711 // 712 // Note that we do not treat 'import' as a contextual 713 // keyword when we're in a caching lexer, because caching lexers only get 714 // used in contexts where import declarations are disallowed. 715 // 716 // Likewise if this is the C++ Modules TS import keyword. 717 if (((LastTokenWasAt && II.isModulesImport()) || 718 Identifier.is(tok::kw_import)) && 719 !InMacroArgs && !DisableMacroExpansion && 720 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 721 CurLexerKind != CLK_CachingLexer) { 722 ModuleImportLoc = Identifier.getLocation(); 723 ModuleImportPath.clear(); 724 ModuleImportExpectsIdentifier = true; 725 CurLexerKind = CLK_LexAfterModuleImport; 726 } 727 return true; 728 } 729 730 void Preprocessor::Lex(Token &Result) { 731 // We loop here until a lex function returns a token; this avoids recursion. 732 bool ReturnedToken; 733 do { 734 switch (CurLexerKind) { 735 case CLK_Lexer: 736 ReturnedToken = CurLexer->Lex(Result); 737 break; 738 case CLK_PTHLexer: 739 ReturnedToken = CurPTHLexer->Lex(Result); 740 break; 741 case CLK_TokenLexer: 742 ReturnedToken = CurTokenLexer->Lex(Result); 743 break; 744 case CLK_CachingLexer: 745 CachingLex(Result); 746 ReturnedToken = true; 747 break; 748 case CLK_LexAfterModuleImport: 749 LexAfterModuleImport(Result); 750 ReturnedToken = true; 751 break; 752 } 753 } while (!ReturnedToken); 754 755 if (Result.is(tok::code_completion)) 756 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 757 758 LastTokenWasAt = Result.is(tok::at); 759 } 760 761 762 /// \brief Lex a token following the 'import' contextual keyword. 763 /// 764 void Preprocessor::LexAfterModuleImport(Token &Result) { 765 // Figure out what kind of lexer we actually have. 766 recomputeCurLexerKind(); 767 768 // Lex the next token. 769 Lex(Result); 770 771 // The token sequence 772 // 773 // import identifier (. identifier)* 774 // 775 // indicates a module import directive. We already saw the 'import' 776 // contextual keyword, so now we're looking for the identifiers. 777 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 778 // We expected to see an identifier here, and we did; continue handling 779 // identifiers. 780 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 781 Result.getLocation())); 782 ModuleImportExpectsIdentifier = false; 783 CurLexerKind = CLK_LexAfterModuleImport; 784 return; 785 } 786 787 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 788 // see the next identifier. (We can also see a '[[' that begins an 789 // attribute-specifier-seq here under the C++ Modules TS.) 790 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 791 ModuleImportExpectsIdentifier = true; 792 CurLexerKind = CLK_LexAfterModuleImport; 793 return; 794 } 795 796 // If we have a non-empty module path, load the named module. 797 if (!ModuleImportPath.empty()) { 798 Module *Imported = nullptr; 799 if (getLangOpts().Modules) { 800 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 801 ModuleImportPath, 802 Module::Hidden, 803 /*IsIncludeDirective=*/false); 804 if (Imported) 805 makeModuleVisible(Imported, ModuleImportLoc); 806 } 807 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 808 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 809 } 810 } 811 812 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 813 CurSubmoduleState->VisibleModules.setVisible( 814 M, Loc, [](Module *) {}, 815 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 816 // FIXME: Include the path in the diagnostic. 817 // FIXME: Include the import location for the conflicting module. 818 Diag(ModuleImportLoc, diag::warn_module_conflict) 819 << Path[0]->getFullModuleName() 820 << Conflict->getFullModuleName() 821 << Message; 822 }); 823 824 // Add this module to the imports list of the currently-built submodule. 825 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 826 BuildingSubmoduleStack.back().M->Imports.insert(M); 827 } 828 829 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 830 const char *DiagnosticTag, 831 bool AllowMacroExpansion) { 832 // We need at least one string literal. 833 if (Result.isNot(tok::string_literal)) { 834 Diag(Result, diag::err_expected_string_literal) 835 << /*Source='in...'*/0 << DiagnosticTag; 836 return false; 837 } 838 839 // Lex string literal tokens, optionally with macro expansion. 840 SmallVector<Token, 4> StrToks; 841 do { 842 StrToks.push_back(Result); 843 844 if (Result.hasUDSuffix()) 845 Diag(Result, diag::err_invalid_string_udl); 846 847 if (AllowMacroExpansion) 848 Lex(Result); 849 else 850 LexUnexpandedToken(Result); 851 } while (Result.is(tok::string_literal)); 852 853 // Concatenate and parse the strings. 854 StringLiteralParser Literal(StrToks, *this); 855 assert(Literal.isAscii() && "Didn't allow wide strings in"); 856 857 if (Literal.hadError) 858 return false; 859 860 if (Literal.Pascal) { 861 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 862 << /*Source='in...'*/0 << DiagnosticTag; 863 return false; 864 } 865 866 String = Literal.GetString(); 867 return true; 868 } 869 870 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 871 assert(Tok.is(tok::numeric_constant)); 872 SmallString<8> IntegerBuffer; 873 bool NumberInvalid = false; 874 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 875 if (NumberInvalid) 876 return false; 877 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 878 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 879 return false; 880 llvm::APInt APVal(64, 0); 881 if (Literal.GetIntegerValue(APVal)) 882 return false; 883 Lex(Tok); 884 Value = APVal.getLimitedValue(); 885 return true; 886 } 887 888 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 889 assert(Handler && "NULL comment handler"); 890 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 891 CommentHandlers.end() && "Comment handler already registered"); 892 CommentHandlers.push_back(Handler); 893 } 894 895 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 896 std::vector<CommentHandler *>::iterator Pos 897 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 898 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 899 CommentHandlers.erase(Pos); 900 } 901 902 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 903 bool AnyPendingTokens = false; 904 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 905 HEnd = CommentHandlers.end(); 906 H != HEnd; ++H) { 907 if ((*H)->HandleComment(*this, Comment)) 908 AnyPendingTokens = true; 909 } 910 if (!AnyPendingTokens || getCommentRetentionState()) 911 return false; 912 Lex(result); 913 return true; 914 } 915 916 ModuleLoader::~ModuleLoader() { } 917 918 CommentHandler::~CommentHandler() { } 919 920 CodeCompletionHandler::~CodeCompletionHandler() { } 921 922 void Preprocessor::createPreprocessingRecord() { 923 if (Record) 924 return; 925 926 Record = new PreprocessingRecord(getSourceManager()); 927 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 928 } 929