1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/ADT/StringSwitch.h" 50 #include "llvm/Support/Capacity.h" 51 #include "llvm/Support/ConvertUTF.h" 52 #include "llvm/Support/MemoryBuffer.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <utility> 55 using namespace clang; 56 57 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 58 59 //===----------------------------------------------------------------------===// 60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 61 62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 63 DiagnosticsEngine &diags, LangOptions &opts, 64 SourceManager &SM, HeaderSearch &Headers, 65 ModuleLoader &TheModuleLoader, 66 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 67 TranslationUnitKind TUKind) 68 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 69 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 70 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 71 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 72 Identifiers(opts, IILookup), 73 PragmaHandlers(new PragmaNamespace(StringRef())), 74 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 75 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 76 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 77 CodeCompletionReached(0), CodeCompletionII(0), MainFileDir(nullptr), 78 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), 79 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), 80 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 81 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 82 OwnsHeaderSearch = OwnsHeaders; 83 84 CounterValue = 0; // __COUNTER__ starts at 0. 85 86 // Clear stats. 87 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 88 NumIf = NumElse = NumEndif = 0; 89 NumEnteredSourceFiles = 0; 90 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 91 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 92 MaxIncludeStackDepth = 0; 93 NumSkipped = 0; 94 95 // Default to discarding comments. 96 KeepComments = false; 97 KeepMacroComments = false; 98 SuppressIncludeNotFoundError = false; 99 100 // Macro expansion is enabled. 101 DisableMacroExpansion = false; 102 MacroExpansionInDirectivesOverride = false; 103 InMacroArgs = false; 104 InMacroArgPreExpansion = false; 105 NumCachedTokenLexers = 0; 106 PragmasEnabled = true; 107 ParsingIfOrElifDirective = false; 108 PreprocessedOutput = false; 109 110 CachedLexPos = 0; 111 112 // We haven't read anything from the external source. 113 ReadMacrosFromExternalSource = false; 114 115 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 116 // This gets unpoisoned where it is allowed. 117 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 118 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 119 120 // Initialize the pragma handlers. 121 RegisterBuiltinPragmas(); 122 123 // Initialize builtin macros like __LINE__ and friends. 124 RegisterBuiltinMacros(); 125 126 if(LangOpts.Borland) { 127 Ident__exception_info = getIdentifierInfo("_exception_info"); 128 Ident___exception_info = getIdentifierInfo("__exception_info"); 129 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 130 Ident__exception_code = getIdentifierInfo("_exception_code"); 131 Ident___exception_code = getIdentifierInfo("__exception_code"); 132 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 133 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 134 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 135 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 136 } else { 137 Ident__exception_info = Ident__exception_code = nullptr; 138 Ident__abnormal_termination = Ident___exception_info = nullptr; 139 Ident___exception_code = Ident___abnormal_termination = nullptr; 140 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 141 Ident_AbnormalTermination = nullptr; 142 } 143 } 144 145 Preprocessor::~Preprocessor() { 146 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 147 148 IncludeMacroStack.clear(); 149 150 // Destroy any macro definitions. 151 while (MacroInfoChain *I = MIChainHead) { 152 MIChainHead = I->Next; 153 I->~MacroInfoChain(); 154 } 155 156 // Free any cached macro expanders. 157 // This populates MacroArgCache, so all TokenLexers need to be destroyed 158 // before the code below that frees up the MacroArgCache list. 159 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 160 CurTokenLexer.reset(); 161 162 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 163 DeserialMIChainHead = I->Next; 164 I->~DeserializedMacroInfoChain(); 165 } 166 167 // Free any cached MacroArgs. 168 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 169 ArgList = ArgList->deallocate(); 170 171 // Delete the header search info, if we own it. 172 if (OwnsHeaderSearch) 173 delete &HeaderInfo; 174 } 175 176 void Preprocessor::Initialize(const TargetInfo &Target, 177 const TargetInfo *AuxTarget) { 178 assert((!this->Target || this->Target == &Target) && 179 "Invalid override of target information"); 180 this->Target = &Target; 181 182 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 183 "Invalid override of aux target information."); 184 this->AuxTarget = AuxTarget; 185 186 // Initialize information about built-ins. 187 BuiltinInfo.InitializeTarget(Target, AuxTarget); 188 HeaderInfo.setTarget(Target); 189 } 190 191 void Preprocessor::InitializeForModelFile() { 192 NumEnteredSourceFiles = 0; 193 194 // Reset pragmas 195 PragmaHandlersBackup = std::move(PragmaHandlers); 196 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 197 RegisterBuiltinPragmas(); 198 199 // Reset PredefinesFileID 200 PredefinesFileID = FileID(); 201 } 202 203 void Preprocessor::FinalizeForModelFile() { 204 NumEnteredSourceFiles = 1; 205 206 PragmaHandlers = std::move(PragmaHandlersBackup); 207 } 208 209 void Preprocessor::setPTHManager(PTHManager* pm) { 210 PTH.reset(pm); 211 FileMgr.addStatCache(PTH->createStatCache()); 212 } 213 214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 215 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 216 << getSpelling(Tok) << "'"; 217 218 if (!DumpFlags) return; 219 220 llvm::errs() << "\t"; 221 if (Tok.isAtStartOfLine()) 222 llvm::errs() << " [StartOfLine]"; 223 if (Tok.hasLeadingSpace()) 224 llvm::errs() << " [LeadingSpace]"; 225 if (Tok.isExpandDisabled()) 226 llvm::errs() << " [ExpandDisabled]"; 227 if (Tok.needsCleaning()) { 228 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 229 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 230 << "']"; 231 } 232 233 llvm::errs() << "\tLoc=<"; 234 DumpLocation(Tok.getLocation()); 235 llvm::errs() << ">"; 236 } 237 238 void Preprocessor::DumpLocation(SourceLocation Loc) const { 239 Loc.dump(SourceMgr); 240 } 241 242 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 243 llvm::errs() << "MACRO: "; 244 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 245 DumpToken(MI.getReplacementToken(i)); 246 llvm::errs() << " "; 247 } 248 llvm::errs() << "\n"; 249 } 250 251 void Preprocessor::PrintStats() { 252 llvm::errs() << "\n*** Preprocessor Stats:\n"; 253 llvm::errs() << NumDirectives << " directives found:\n"; 254 llvm::errs() << " " << NumDefined << " #define.\n"; 255 llvm::errs() << " " << NumUndefined << " #undef.\n"; 256 llvm::errs() << " #include/#include_next/#import:\n"; 257 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 258 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 259 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 260 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 261 llvm::errs() << " " << NumEndif << " #endif.\n"; 262 llvm::errs() << " " << NumPragma << " #pragma.\n"; 263 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 264 265 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 266 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 267 << NumFastMacroExpanded << " on the fast path.\n"; 268 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 269 << " token paste (##) operations performed, " 270 << NumFastTokenPaste << " on the fast path.\n"; 271 272 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 273 274 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 275 llvm::errs() << "\n Macro Expanded Tokens: " 276 << llvm::capacity_in_bytes(MacroExpandedTokens); 277 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 278 // FIXME: List information for all submodules. 279 llvm::errs() << "\n Macros: " 280 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 281 llvm::errs() << "\n #pragma push_macro Info: " 282 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 283 llvm::errs() << "\n Poison Reasons: " 284 << llvm::capacity_in_bytes(PoisonReasons); 285 llvm::errs() << "\n Comment Handlers: " 286 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 287 } 288 289 Preprocessor::macro_iterator 290 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 291 if (IncludeExternalMacros && ExternalSource && 292 !ReadMacrosFromExternalSource) { 293 ReadMacrosFromExternalSource = true; 294 ExternalSource->ReadDefinedMacros(); 295 } 296 297 // Make sure we cover all macros in visible modules. 298 for (const ModuleMacro &Macro : ModuleMacros) 299 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 300 301 return CurSubmoduleState->Macros.begin(); 302 } 303 304 size_t Preprocessor::getTotalMemory() const { 305 return BP.getTotalMemory() 306 + llvm::capacity_in_bytes(MacroExpandedTokens) 307 + Predefines.capacity() /* Predefines buffer. */ 308 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 309 // and ModuleMacros. 310 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 311 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 312 + llvm::capacity_in_bytes(PoisonReasons) 313 + llvm::capacity_in_bytes(CommentHandlers); 314 } 315 316 Preprocessor::macro_iterator 317 Preprocessor::macro_end(bool IncludeExternalMacros) const { 318 if (IncludeExternalMacros && ExternalSource && 319 !ReadMacrosFromExternalSource) { 320 ReadMacrosFromExternalSource = true; 321 ExternalSource->ReadDefinedMacros(); 322 } 323 324 return CurSubmoduleState->Macros.end(); 325 } 326 327 /// \brief Compares macro tokens with a specified token value sequence. 328 static bool MacroDefinitionEquals(const MacroInfo *MI, 329 ArrayRef<TokenValue> Tokens) { 330 return Tokens.size() == MI->getNumTokens() && 331 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 332 } 333 334 StringRef Preprocessor::getLastMacroWithSpelling( 335 SourceLocation Loc, 336 ArrayRef<TokenValue> Tokens) const { 337 SourceLocation BestLocation; 338 StringRef BestSpelling; 339 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 340 I != E; ++I) { 341 const MacroDirective::DefInfo 342 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 343 if (!Def || !Def.getMacroInfo()) 344 continue; 345 if (!Def.getMacroInfo()->isObjectLike()) 346 continue; 347 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 348 continue; 349 SourceLocation Location = Def.getLocation(); 350 // Choose the macro defined latest. 351 if (BestLocation.isInvalid() || 352 (Location.isValid() && 353 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 354 BestLocation = Location; 355 BestSpelling = I->first->getName(); 356 } 357 } 358 return BestSpelling; 359 } 360 361 void Preprocessor::recomputeCurLexerKind() { 362 if (CurLexer) 363 CurLexerKind = CLK_Lexer; 364 else if (CurPTHLexer) 365 CurLexerKind = CLK_PTHLexer; 366 else if (CurTokenLexer) 367 CurLexerKind = CLK_TokenLexer; 368 else 369 CurLexerKind = CLK_CachingLexer; 370 } 371 372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 373 unsigned CompleteLine, 374 unsigned CompleteColumn) { 375 assert(File); 376 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 377 assert(!CodeCompletionFile && "Already set"); 378 379 using llvm::MemoryBuffer; 380 381 // Load the actual file's contents. 382 bool Invalid = false; 383 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 384 if (Invalid) 385 return true; 386 387 // Find the byte position of the truncation point. 388 const char *Position = Buffer->getBufferStart(); 389 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 390 for (; *Position; ++Position) { 391 if (*Position != '\r' && *Position != '\n') 392 continue; 393 394 // Eat \r\n or \n\r as a single line. 395 if ((Position[1] == '\r' || Position[1] == '\n') && 396 Position[0] != Position[1]) 397 ++Position; 398 ++Position; 399 break; 400 } 401 } 402 403 Position += CompleteColumn - 1; 404 405 // If pointing inside the preamble, adjust the position at the beginning of 406 // the file after the preamble. 407 if (SkipMainFilePreamble.first && 408 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 409 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 410 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 411 } 412 413 if (Position > Buffer->getBufferEnd()) 414 Position = Buffer->getBufferEnd(); 415 416 CodeCompletionFile = File; 417 CodeCompletionOffset = Position - Buffer->getBufferStart(); 418 419 std::unique_ptr<MemoryBuffer> NewBuffer = 420 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 421 Buffer->getBufferIdentifier()); 422 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 423 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 424 *NewPos = '\0'; 425 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 426 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 427 428 return false; 429 } 430 431 void Preprocessor::CodeCompleteNaturalLanguage() { 432 if (CodeComplete) 433 CodeComplete->CodeCompleteNaturalLanguage(); 434 setCodeCompletionReached(); 435 } 436 437 /// getSpelling - This method is used to get the spelling of a token into a 438 /// SmallVector. Note that the returned StringRef may not point to the 439 /// supplied buffer if a copy can be avoided. 440 StringRef Preprocessor::getSpelling(const Token &Tok, 441 SmallVectorImpl<char> &Buffer, 442 bool *Invalid) const { 443 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 444 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 445 // Try the fast path. 446 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 447 return II->getName(); 448 } 449 450 // Resize the buffer if we need to copy into it. 451 if (Tok.needsCleaning()) 452 Buffer.resize(Tok.getLength()); 453 454 const char *Ptr = Buffer.data(); 455 unsigned Len = getSpelling(Tok, Ptr, Invalid); 456 return StringRef(Ptr, Len); 457 } 458 459 /// CreateString - Plop the specified string into a scratch buffer and return a 460 /// location for it. If specified, the source location provides a source 461 /// location for the token. 462 void Preprocessor::CreateString(StringRef Str, Token &Tok, 463 SourceLocation ExpansionLocStart, 464 SourceLocation ExpansionLocEnd) { 465 Tok.setLength(Str.size()); 466 467 const char *DestPtr; 468 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 469 470 if (ExpansionLocStart.isValid()) 471 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 472 ExpansionLocEnd, Str.size()); 473 Tok.setLocation(Loc); 474 475 // If this is a raw identifier or a literal token, set the pointer data. 476 if (Tok.is(tok::raw_identifier)) 477 Tok.setRawIdentifierData(DestPtr); 478 else if (Tok.isLiteral()) 479 Tok.setLiteralData(DestPtr); 480 } 481 482 Module *Preprocessor::getCurrentModule() { 483 if (!getLangOpts().CompilingModule) 484 return nullptr; 485 486 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 487 } 488 489 //===----------------------------------------------------------------------===// 490 // Preprocessor Initialization Methods 491 //===----------------------------------------------------------------------===// 492 493 494 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 495 /// which implicitly adds the builtin defines etc. 496 void Preprocessor::EnterMainSourceFile() { 497 // We do not allow the preprocessor to reenter the main file. Doing so will 498 // cause FileID's to accumulate information from both runs (e.g. #line 499 // information) and predefined macros aren't guaranteed to be set properly. 500 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 501 FileID MainFileID = SourceMgr.getMainFileID(); 502 503 // If MainFileID is loaded it means we loaded an AST file, no need to enter 504 // a main file. 505 if (!SourceMgr.isLoadedFileID(MainFileID)) { 506 // Enter the main file source buffer. 507 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 508 509 // If we've been asked to skip bytes in the main file (e.g., as part of a 510 // precompiled preamble), do so now. 511 if (SkipMainFilePreamble.first > 0) 512 CurLexer->SkipBytes(SkipMainFilePreamble.first, 513 SkipMainFilePreamble.second); 514 515 // Tell the header info that the main file was entered. If the file is later 516 // #imported, it won't be re-entered. 517 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 518 HeaderInfo.IncrementIncludeCount(FE); 519 } 520 521 // Preprocess Predefines to populate the initial preprocessor state. 522 std::unique_ptr<llvm::MemoryBuffer> SB = 523 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 524 assert(SB && "Cannot create predefined source buffer"); 525 FileID FID = SourceMgr.createFileID(std::move(SB)); 526 assert(FID.isValid() && "Could not create FileID for predefines?"); 527 setPredefinesFileID(FID); 528 529 // Start parsing the predefines. 530 EnterSourceFile(FID, nullptr, SourceLocation()); 531 } 532 533 void Preprocessor::EndSourceFile() { 534 // Notify the client that we reached the end of the source file. 535 if (Callbacks) 536 Callbacks->EndOfMainFile(); 537 } 538 539 //===----------------------------------------------------------------------===// 540 // Lexer Event Handling. 541 //===----------------------------------------------------------------------===// 542 543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 544 /// identifier information for the token and install it into the token, 545 /// updating the token kind accordingly. 546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 547 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 548 549 // Look up this token, see if it is a macro, or if it is a language keyword. 550 IdentifierInfo *II; 551 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 552 // No cleaning needed, just use the characters from the lexed buffer. 553 II = getIdentifierInfo(Identifier.getRawIdentifier()); 554 } else { 555 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 556 SmallString<64> IdentifierBuffer; 557 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 558 559 if (Identifier.hasUCN()) { 560 SmallString<64> UCNIdentifierBuffer; 561 expandUCNs(UCNIdentifierBuffer, CleanedStr); 562 II = getIdentifierInfo(UCNIdentifierBuffer); 563 } else { 564 II = getIdentifierInfo(CleanedStr); 565 } 566 } 567 568 // Update the token info (identifier info and appropriate token kind). 569 Identifier.setIdentifierInfo(II); 570 Identifier.setKind(II->getTokenID()); 571 572 return II; 573 } 574 575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 576 PoisonReasons[II] = DiagID; 577 } 578 579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 580 assert(Ident__exception_code && Ident__exception_info); 581 assert(Ident___exception_code && Ident___exception_info); 582 Ident__exception_code->setIsPoisoned(Poison); 583 Ident___exception_code->setIsPoisoned(Poison); 584 Ident_GetExceptionCode->setIsPoisoned(Poison); 585 Ident__exception_info->setIsPoisoned(Poison); 586 Ident___exception_info->setIsPoisoned(Poison); 587 Ident_GetExceptionInfo->setIsPoisoned(Poison); 588 Ident__abnormal_termination->setIsPoisoned(Poison); 589 Ident___abnormal_termination->setIsPoisoned(Poison); 590 Ident_AbnormalTermination->setIsPoisoned(Poison); 591 } 592 593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 594 assert(Identifier.getIdentifierInfo() && 595 "Can't handle identifiers without identifier info!"); 596 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 597 PoisonReasons.find(Identifier.getIdentifierInfo()); 598 if(it == PoisonReasons.end()) 599 Diag(Identifier, diag::err_pp_used_poisoned_id); 600 else 601 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 602 } 603 604 /// \brief Returns a diagnostic message kind for reporting a future keyword as 605 /// appropriate for the identifier and specified language. 606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 607 const LangOptions &LangOpts) { 608 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 609 610 if (LangOpts.CPlusPlus) 611 return llvm::StringSwitch<diag::kind>(II.getName()) 612 #define CXX11_KEYWORD(NAME, FLAGS) \ 613 .Case(#NAME, diag::warn_cxx11_keyword) 614 #include "clang/Basic/TokenKinds.def" 615 ; 616 617 llvm_unreachable( 618 "Keyword not known to come from a newer Standard or proposed Standard"); 619 } 620 621 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 622 assert(II.isOutOfDate() && "not out of date"); 623 getExternalSource()->updateOutOfDateIdentifier(II); 624 } 625 626 /// HandleIdentifier - This callback is invoked when the lexer reads an 627 /// identifier. This callback looks up the identifier in the map and/or 628 /// potentially macro expands it or turns it into a named token (like 'for'). 629 /// 630 /// Note that callers of this method are guarded by checking the 631 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 632 /// IdentifierInfo methods that compute these properties will need to change to 633 /// match. 634 bool Preprocessor::HandleIdentifier(Token &Identifier) { 635 assert(Identifier.getIdentifierInfo() && 636 "Can't handle identifiers without identifier info!"); 637 638 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 639 640 // If the information about this identifier is out of date, update it from 641 // the external source. 642 // We have to treat __VA_ARGS__ in a special way, since it gets 643 // serialized with isPoisoned = true, but our preprocessor may have 644 // unpoisoned it if we're defining a C99 macro. 645 if (II.isOutOfDate()) { 646 bool CurrentIsPoisoned = false; 647 if (&II == Ident__VA_ARGS__) 648 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 649 650 updateOutOfDateIdentifier(II); 651 Identifier.setKind(II.getTokenID()); 652 653 if (&II == Ident__VA_ARGS__) 654 II.setIsPoisoned(CurrentIsPoisoned); 655 } 656 657 // If this identifier was poisoned, and if it was not produced from a macro 658 // expansion, emit an error. 659 if (II.isPoisoned() && CurPPLexer) { 660 HandlePoisonedIdentifier(Identifier); 661 } 662 663 // If this is a macro to be expanded, do it. 664 if (MacroDefinition MD = getMacroDefinition(&II)) { 665 auto *MI = MD.getMacroInfo(); 666 assert(MI && "macro definition with no macro info?"); 667 if (!DisableMacroExpansion) { 668 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 669 // C99 6.10.3p10: If the preprocessing token immediately after the 670 // macro name isn't a '(', this macro should not be expanded. 671 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 672 return HandleMacroExpandedIdentifier(Identifier, MD); 673 } else { 674 // C99 6.10.3.4p2 says that a disabled macro may never again be 675 // expanded, even if it's in a context where it could be expanded in the 676 // future. 677 Identifier.setFlag(Token::DisableExpand); 678 if (MI->isObjectLike() || isNextPPTokenLParen()) 679 Diag(Identifier, diag::pp_disabled_macro_expansion); 680 } 681 } 682 } 683 684 // If this identifier is a keyword in a newer Standard or proposed Standard, 685 // produce a warning. Don't warn if we're not considering macro expansion, 686 // since this identifier might be the name of a macro. 687 // FIXME: This warning is disabled in cases where it shouldn't be, like 688 // "#define constexpr constexpr", "int constexpr;" 689 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 690 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 691 << II.getName(); 692 // Don't diagnose this keyword again in this translation unit. 693 II.setIsFutureCompatKeyword(false); 694 } 695 696 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 697 // then we act as if it is the actual operator and not the textual 698 // representation of it. 699 if (II.isCPlusPlusOperatorKeyword()) 700 Identifier.setIdentifierInfo(nullptr); 701 702 // If this is an extension token, diagnose its use. 703 // We avoid diagnosing tokens that originate from macro definitions. 704 // FIXME: This warning is disabled in cases where it shouldn't be, 705 // like "#define TY typeof", "TY(1) x". 706 if (II.isExtensionToken() && !DisableMacroExpansion) 707 Diag(Identifier, diag::ext_token_used); 708 709 // If this is the 'import' contextual keyword following an '@', note 710 // that the next token indicates a module name. 711 // 712 // Note that we do not treat 'import' as a contextual 713 // keyword when we're in a caching lexer, because caching lexers only get 714 // used in contexts where import declarations are disallowed. 715 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 716 !DisableMacroExpansion && 717 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 718 CurLexerKind != CLK_CachingLexer) { 719 ModuleImportLoc = Identifier.getLocation(); 720 ModuleImportPath.clear(); 721 ModuleImportExpectsIdentifier = true; 722 CurLexerKind = CLK_LexAfterModuleImport; 723 } 724 return true; 725 } 726 727 void Preprocessor::Lex(Token &Result) { 728 // We loop here until a lex function returns a token; this avoids recursion. 729 bool ReturnedToken; 730 do { 731 switch (CurLexerKind) { 732 case CLK_Lexer: 733 ReturnedToken = CurLexer->Lex(Result); 734 break; 735 case CLK_PTHLexer: 736 ReturnedToken = CurPTHLexer->Lex(Result); 737 break; 738 case CLK_TokenLexer: 739 ReturnedToken = CurTokenLexer->Lex(Result); 740 break; 741 case CLK_CachingLexer: 742 CachingLex(Result); 743 ReturnedToken = true; 744 break; 745 case CLK_LexAfterModuleImport: 746 LexAfterModuleImport(Result); 747 ReturnedToken = true; 748 break; 749 } 750 } while (!ReturnedToken); 751 752 if (Result.is(tok::code_completion)) 753 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 754 755 LastTokenWasAt = Result.is(tok::at); 756 } 757 758 759 /// \brief Lex a token following the 'import' contextual keyword. 760 /// 761 void Preprocessor::LexAfterModuleImport(Token &Result) { 762 // Figure out what kind of lexer we actually have. 763 recomputeCurLexerKind(); 764 765 // Lex the next token. 766 Lex(Result); 767 768 // The token sequence 769 // 770 // import identifier (. identifier)* 771 // 772 // indicates a module import directive. We already saw the 'import' 773 // contextual keyword, so now we're looking for the identifiers. 774 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 775 // We expected to see an identifier here, and we did; continue handling 776 // identifiers. 777 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 778 Result.getLocation())); 779 ModuleImportExpectsIdentifier = false; 780 CurLexerKind = CLK_LexAfterModuleImport; 781 return; 782 } 783 784 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 785 // see the next identifier. 786 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 787 ModuleImportExpectsIdentifier = true; 788 CurLexerKind = CLK_LexAfterModuleImport; 789 return; 790 } 791 792 // If we have a non-empty module path, load the named module. 793 if (!ModuleImportPath.empty()) { 794 Module *Imported = nullptr; 795 if (getLangOpts().Modules) { 796 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 797 ModuleImportPath, 798 Module::Hidden, 799 /*IsIncludeDirective=*/false); 800 if (Imported) 801 makeModuleVisible(Imported, ModuleImportLoc); 802 } 803 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 804 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 805 } 806 } 807 808 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 809 CurSubmoduleState->VisibleModules.setVisible( 810 M, Loc, [](Module *) {}, 811 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 812 // FIXME: Include the path in the diagnostic. 813 // FIXME: Include the import location for the conflicting module. 814 Diag(ModuleImportLoc, diag::warn_module_conflict) 815 << Path[0]->getFullModuleName() 816 << Conflict->getFullModuleName() 817 << Message; 818 }); 819 820 // Add this module to the imports list of the currently-built submodule. 821 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 822 BuildingSubmoduleStack.back().M->Imports.insert(M); 823 } 824 825 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 826 const char *DiagnosticTag, 827 bool AllowMacroExpansion) { 828 // We need at least one string literal. 829 if (Result.isNot(tok::string_literal)) { 830 Diag(Result, diag::err_expected_string_literal) 831 << /*Source='in...'*/0 << DiagnosticTag; 832 return false; 833 } 834 835 // Lex string literal tokens, optionally with macro expansion. 836 SmallVector<Token, 4> StrToks; 837 do { 838 StrToks.push_back(Result); 839 840 if (Result.hasUDSuffix()) 841 Diag(Result, diag::err_invalid_string_udl); 842 843 if (AllowMacroExpansion) 844 Lex(Result); 845 else 846 LexUnexpandedToken(Result); 847 } while (Result.is(tok::string_literal)); 848 849 // Concatenate and parse the strings. 850 StringLiteralParser Literal(StrToks, *this); 851 assert(Literal.isAscii() && "Didn't allow wide strings in"); 852 853 if (Literal.hadError) 854 return false; 855 856 if (Literal.Pascal) { 857 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 858 << /*Source='in...'*/0 << DiagnosticTag; 859 return false; 860 } 861 862 String = Literal.GetString(); 863 return true; 864 } 865 866 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 867 assert(Tok.is(tok::numeric_constant)); 868 SmallString<8> IntegerBuffer; 869 bool NumberInvalid = false; 870 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 871 if (NumberInvalid) 872 return false; 873 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 874 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 875 return false; 876 llvm::APInt APVal(64, 0); 877 if (Literal.GetIntegerValue(APVal)) 878 return false; 879 Lex(Tok); 880 Value = APVal.getLimitedValue(); 881 return true; 882 } 883 884 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 885 assert(Handler && "NULL comment handler"); 886 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 887 CommentHandlers.end() && "Comment handler already registered"); 888 CommentHandlers.push_back(Handler); 889 } 890 891 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 892 std::vector<CommentHandler *>::iterator Pos 893 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 894 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 895 CommentHandlers.erase(Pos); 896 } 897 898 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 899 bool AnyPendingTokens = false; 900 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 901 HEnd = CommentHandlers.end(); 902 H != HEnd; ++H) { 903 if ((*H)->HandleComment(*this, Comment)) 904 AnyPendingTokens = true; 905 } 906 if (!AnyPendingTokens || getCommentRetentionState()) 907 return false; 908 Lex(result); 909 return true; 910 } 911 912 ModuleLoader::~ModuleLoader() { } 913 914 CommentHandler::~CommentHandler() { } 915 916 CodeCompletionHandler::~CodeCompletionHandler() { } 917 918 void Preprocessor::createPreprocessingRecord() { 919 if (Record) 920 return; 921 922 Record = new PreprocessingRecord(getSourceManager()); 923 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 924 } 925