1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APInt.h" 47 #include "llvm/ADT/DenseMap.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/SmallVector.h" 50 #include "llvm/ADT/STLExtras.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/StringSwitch.h" 53 #include "llvm/Support/Capacity.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <memory> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 using namespace clang; 65 66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 67 68 //===----------------------------------------------------------------------===// 69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 70 71 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 72 DiagnosticsEngine &diags, LangOptions &opts, 73 SourceManager &SM, HeaderSearch &Headers, 74 ModuleLoader &TheModuleLoader, 75 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 76 TranslationUnitKind TUKind) 77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 79 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 80 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 81 Identifiers(opts, IILookup), 82 PragmaHandlers(new PragmaNamespace(StringRef())), 83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 84 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 86 CodeCompletionReached(false), CodeCompletionII(nullptr), 87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), 89 Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState), 90 MacroArgCache(nullptr), Record(nullptr), MIChainHead(nullptr), 91 DeserialMIChainHead(nullptr) { 92 OwnsHeaderSearch = OwnsHeaders; 93 94 CounterValue = 0; // __COUNTER__ starts at 0. 95 96 // Clear stats. 97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 98 NumIf = NumElse = NumEndif = 0; 99 NumEnteredSourceFiles = 0; 100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 102 MaxIncludeStackDepth = 0; 103 NumSkipped = 0; 104 105 // Default to discarding comments. 106 KeepComments = false; 107 KeepMacroComments = false; 108 SuppressIncludeNotFoundError = false; 109 110 // Macro expansion is enabled. 111 DisableMacroExpansion = false; 112 MacroExpansionInDirectivesOverride = false; 113 InMacroArgs = false; 114 InMacroArgPreExpansion = false; 115 NumCachedTokenLexers = 0; 116 PragmasEnabled = true; 117 ParsingIfOrElifDirective = false; 118 PreprocessedOutput = false; 119 120 CachedLexPos = 0; 121 122 // We haven't read anything from the external source. 123 ReadMacrosFromExternalSource = false; 124 125 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 126 // This gets unpoisoned where it is allowed. 127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 129 130 // Initialize the pragma handlers. 131 RegisterBuiltinPragmas(); 132 133 // Initialize builtin macros like __LINE__ and friends. 134 RegisterBuiltinMacros(); 135 136 if(LangOpts.Borland) { 137 Ident__exception_info = getIdentifierInfo("_exception_info"); 138 Ident___exception_info = getIdentifierInfo("__exception_info"); 139 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 140 Ident__exception_code = getIdentifierInfo("_exception_code"); 141 Ident___exception_code = getIdentifierInfo("__exception_code"); 142 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 143 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 144 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 145 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 146 } else { 147 Ident__exception_info = Ident__exception_code = nullptr; 148 Ident__abnormal_termination = Ident___exception_info = nullptr; 149 Ident___exception_code = Ident___abnormal_termination = nullptr; 150 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 151 Ident_AbnormalTermination = nullptr; 152 } 153 } 154 155 Preprocessor::~Preprocessor() { 156 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 157 158 IncludeMacroStack.clear(); 159 160 // Destroy any macro definitions. 161 while (MacroInfoChain *I = MIChainHead) { 162 MIChainHead = I->Next; 163 I->~MacroInfoChain(); 164 } 165 166 // Free any cached macro expanders. 167 // This populates MacroArgCache, so all TokenLexers need to be destroyed 168 // before the code below that frees up the MacroArgCache list. 169 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 170 CurTokenLexer.reset(); 171 172 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 173 DeserialMIChainHead = I->Next; 174 I->~DeserializedMacroInfoChain(); 175 } 176 177 // Free any cached MacroArgs. 178 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 179 ArgList = ArgList->deallocate(); 180 181 // Delete the header search info, if we own it. 182 if (OwnsHeaderSearch) 183 delete &HeaderInfo; 184 } 185 186 void Preprocessor::Initialize(const TargetInfo &Target, 187 const TargetInfo *AuxTarget) { 188 assert((!this->Target || this->Target == &Target) && 189 "Invalid override of target information"); 190 this->Target = &Target; 191 192 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 193 "Invalid override of aux target information."); 194 this->AuxTarget = AuxTarget; 195 196 // Initialize information about built-ins. 197 BuiltinInfo.InitializeTarget(Target, AuxTarget); 198 HeaderInfo.setTarget(Target); 199 } 200 201 void Preprocessor::InitializeForModelFile() { 202 NumEnteredSourceFiles = 0; 203 204 // Reset pragmas 205 PragmaHandlersBackup = std::move(PragmaHandlers); 206 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 207 RegisterBuiltinPragmas(); 208 209 // Reset PredefinesFileID 210 PredefinesFileID = FileID(); 211 } 212 213 void Preprocessor::FinalizeForModelFile() { 214 NumEnteredSourceFiles = 1; 215 216 PragmaHandlers = std::move(PragmaHandlersBackup); 217 } 218 219 void Preprocessor::setPTHManager(PTHManager* pm) { 220 PTH.reset(pm); 221 FileMgr.addStatCache(PTH->createStatCache()); 222 } 223 224 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 225 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 226 << getSpelling(Tok) << "'"; 227 228 if (!DumpFlags) return; 229 230 llvm::errs() << "\t"; 231 if (Tok.isAtStartOfLine()) 232 llvm::errs() << " [StartOfLine]"; 233 if (Tok.hasLeadingSpace()) 234 llvm::errs() << " [LeadingSpace]"; 235 if (Tok.isExpandDisabled()) 236 llvm::errs() << " [ExpandDisabled]"; 237 if (Tok.needsCleaning()) { 238 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 239 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 240 << "']"; 241 } 242 243 llvm::errs() << "\tLoc=<"; 244 DumpLocation(Tok.getLocation()); 245 llvm::errs() << ">"; 246 } 247 248 void Preprocessor::DumpLocation(SourceLocation Loc) const { 249 Loc.dump(SourceMgr); 250 } 251 252 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 253 llvm::errs() << "MACRO: "; 254 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 255 DumpToken(MI.getReplacementToken(i)); 256 llvm::errs() << " "; 257 } 258 llvm::errs() << "\n"; 259 } 260 261 void Preprocessor::PrintStats() { 262 llvm::errs() << "\n*** Preprocessor Stats:\n"; 263 llvm::errs() << NumDirectives << " directives found:\n"; 264 llvm::errs() << " " << NumDefined << " #define.\n"; 265 llvm::errs() << " " << NumUndefined << " #undef.\n"; 266 llvm::errs() << " #include/#include_next/#import:\n"; 267 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 268 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 269 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 270 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 271 llvm::errs() << " " << NumEndif << " #endif.\n"; 272 llvm::errs() << " " << NumPragma << " #pragma.\n"; 273 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 274 275 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 276 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 277 << NumFastMacroExpanded << " on the fast path.\n"; 278 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 279 << " token paste (##) operations performed, " 280 << NumFastTokenPaste << " on the fast path.\n"; 281 282 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 283 284 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 285 llvm::errs() << "\n Macro Expanded Tokens: " 286 << llvm::capacity_in_bytes(MacroExpandedTokens); 287 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 288 // FIXME: List information for all submodules. 289 llvm::errs() << "\n Macros: " 290 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 291 llvm::errs() << "\n #pragma push_macro Info: " 292 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 293 llvm::errs() << "\n Poison Reasons: " 294 << llvm::capacity_in_bytes(PoisonReasons); 295 llvm::errs() << "\n Comment Handlers: " 296 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 297 } 298 299 Preprocessor::macro_iterator 300 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 301 if (IncludeExternalMacros && ExternalSource && 302 !ReadMacrosFromExternalSource) { 303 ReadMacrosFromExternalSource = true; 304 ExternalSource->ReadDefinedMacros(); 305 } 306 307 // Make sure we cover all macros in visible modules. 308 for (const ModuleMacro &Macro : ModuleMacros) 309 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 310 311 return CurSubmoduleState->Macros.begin(); 312 } 313 314 size_t Preprocessor::getTotalMemory() const { 315 return BP.getTotalMemory() 316 + llvm::capacity_in_bytes(MacroExpandedTokens) 317 + Predefines.capacity() /* Predefines buffer. */ 318 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 319 // and ModuleMacros. 320 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 321 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 322 + llvm::capacity_in_bytes(PoisonReasons) 323 + llvm::capacity_in_bytes(CommentHandlers); 324 } 325 326 Preprocessor::macro_iterator 327 Preprocessor::macro_end(bool IncludeExternalMacros) const { 328 if (IncludeExternalMacros && ExternalSource && 329 !ReadMacrosFromExternalSource) { 330 ReadMacrosFromExternalSource = true; 331 ExternalSource->ReadDefinedMacros(); 332 } 333 334 return CurSubmoduleState->Macros.end(); 335 } 336 337 /// \brief Compares macro tokens with a specified token value sequence. 338 static bool MacroDefinitionEquals(const MacroInfo *MI, 339 ArrayRef<TokenValue> Tokens) { 340 return Tokens.size() == MI->getNumTokens() && 341 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 342 } 343 344 StringRef Preprocessor::getLastMacroWithSpelling( 345 SourceLocation Loc, 346 ArrayRef<TokenValue> Tokens) const { 347 SourceLocation BestLocation; 348 StringRef BestSpelling; 349 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 350 I != E; ++I) { 351 const MacroDirective::DefInfo 352 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 353 if (!Def || !Def.getMacroInfo()) 354 continue; 355 if (!Def.getMacroInfo()->isObjectLike()) 356 continue; 357 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 358 continue; 359 SourceLocation Location = Def.getLocation(); 360 // Choose the macro defined latest. 361 if (BestLocation.isInvalid() || 362 (Location.isValid() && 363 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 364 BestLocation = Location; 365 BestSpelling = I->first->getName(); 366 } 367 } 368 return BestSpelling; 369 } 370 371 void Preprocessor::recomputeCurLexerKind() { 372 if (CurLexer) 373 CurLexerKind = CLK_Lexer; 374 else if (CurPTHLexer) 375 CurLexerKind = CLK_PTHLexer; 376 else if (CurTokenLexer) 377 CurLexerKind = CLK_TokenLexer; 378 else 379 CurLexerKind = CLK_CachingLexer; 380 } 381 382 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 383 unsigned CompleteLine, 384 unsigned CompleteColumn) { 385 assert(File); 386 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 387 assert(!CodeCompletionFile && "Already set"); 388 389 using llvm::MemoryBuffer; 390 391 // Load the actual file's contents. 392 bool Invalid = false; 393 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 394 if (Invalid) 395 return true; 396 397 // Find the byte position of the truncation point. 398 const char *Position = Buffer->getBufferStart(); 399 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 400 for (; *Position; ++Position) { 401 if (*Position != '\r' && *Position != '\n') 402 continue; 403 404 // Eat \r\n or \n\r as a single line. 405 if ((Position[1] == '\r' || Position[1] == '\n') && 406 Position[0] != Position[1]) 407 ++Position; 408 ++Position; 409 break; 410 } 411 } 412 413 Position += CompleteColumn - 1; 414 415 // If pointing inside the preamble, adjust the position at the beginning of 416 // the file after the preamble. 417 if (SkipMainFilePreamble.first && 418 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 419 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 420 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 421 } 422 423 if (Position > Buffer->getBufferEnd()) 424 Position = Buffer->getBufferEnd(); 425 426 CodeCompletionFile = File; 427 CodeCompletionOffset = Position - Buffer->getBufferStart(); 428 429 std::unique_ptr<MemoryBuffer> NewBuffer = 430 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 431 Buffer->getBufferIdentifier()); 432 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 433 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 434 *NewPos = '\0'; 435 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 436 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 437 438 return false; 439 } 440 441 void Preprocessor::CodeCompleteNaturalLanguage() { 442 if (CodeComplete) 443 CodeComplete->CodeCompleteNaturalLanguage(); 444 setCodeCompletionReached(); 445 } 446 447 /// getSpelling - This method is used to get the spelling of a token into a 448 /// SmallVector. Note that the returned StringRef may not point to the 449 /// supplied buffer if a copy can be avoided. 450 StringRef Preprocessor::getSpelling(const Token &Tok, 451 SmallVectorImpl<char> &Buffer, 452 bool *Invalid) const { 453 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 454 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 455 // Try the fast path. 456 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 457 return II->getName(); 458 } 459 460 // Resize the buffer if we need to copy into it. 461 if (Tok.needsCleaning()) 462 Buffer.resize(Tok.getLength()); 463 464 const char *Ptr = Buffer.data(); 465 unsigned Len = getSpelling(Tok, Ptr, Invalid); 466 return StringRef(Ptr, Len); 467 } 468 469 /// CreateString - Plop the specified string into a scratch buffer and return a 470 /// location for it. If specified, the source location provides a source 471 /// location for the token. 472 void Preprocessor::CreateString(StringRef Str, Token &Tok, 473 SourceLocation ExpansionLocStart, 474 SourceLocation ExpansionLocEnd) { 475 Tok.setLength(Str.size()); 476 477 const char *DestPtr; 478 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 479 480 if (ExpansionLocStart.isValid()) 481 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 482 ExpansionLocEnd, Str.size()); 483 Tok.setLocation(Loc); 484 485 // If this is a raw identifier or a literal token, set the pointer data. 486 if (Tok.is(tok::raw_identifier)) 487 Tok.setRawIdentifierData(DestPtr); 488 else if (Tok.isLiteral()) 489 Tok.setLiteralData(DestPtr); 490 } 491 492 Module *Preprocessor::getCurrentModule() { 493 if (!getLangOpts().isCompilingModule()) 494 return nullptr; 495 496 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 497 } 498 499 //===----------------------------------------------------------------------===// 500 // Preprocessor Initialization Methods 501 //===----------------------------------------------------------------------===// 502 503 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 504 /// which implicitly adds the builtin defines etc. 505 void Preprocessor::EnterMainSourceFile() { 506 // We do not allow the preprocessor to reenter the main file. Doing so will 507 // cause FileID's to accumulate information from both runs (e.g. #line 508 // information) and predefined macros aren't guaranteed to be set properly. 509 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 510 FileID MainFileID = SourceMgr.getMainFileID(); 511 512 // If MainFileID is loaded it means we loaded an AST file, no need to enter 513 // a main file. 514 if (!SourceMgr.isLoadedFileID(MainFileID)) { 515 // Enter the main file source buffer. 516 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 517 518 // If we've been asked to skip bytes in the main file (e.g., as part of a 519 // precompiled preamble), do so now. 520 if (SkipMainFilePreamble.first > 0) 521 CurLexer->SkipBytes(SkipMainFilePreamble.first, 522 SkipMainFilePreamble.second); 523 524 // Tell the header info that the main file was entered. If the file is later 525 // #imported, it won't be re-entered. 526 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 527 HeaderInfo.IncrementIncludeCount(FE); 528 } 529 530 // Preprocess Predefines to populate the initial preprocessor state. 531 std::unique_ptr<llvm::MemoryBuffer> SB = 532 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 533 assert(SB && "Cannot create predefined source buffer"); 534 FileID FID = SourceMgr.createFileID(std::move(SB)); 535 assert(FID.isValid() && "Could not create FileID for predefines?"); 536 setPredefinesFileID(FID); 537 538 // Start parsing the predefines. 539 EnterSourceFile(FID, nullptr, SourceLocation()); 540 } 541 542 void Preprocessor::EndSourceFile() { 543 // Notify the client that we reached the end of the source file. 544 if (Callbacks) 545 Callbacks->EndOfMainFile(); 546 } 547 548 //===----------------------------------------------------------------------===// 549 // Lexer Event Handling. 550 //===----------------------------------------------------------------------===// 551 552 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 553 /// identifier information for the token and install it into the token, 554 /// updating the token kind accordingly. 555 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 556 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 557 558 // Look up this token, see if it is a macro, or if it is a language keyword. 559 IdentifierInfo *II; 560 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 561 // No cleaning needed, just use the characters from the lexed buffer. 562 II = getIdentifierInfo(Identifier.getRawIdentifier()); 563 } else { 564 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 565 SmallString<64> IdentifierBuffer; 566 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 567 568 if (Identifier.hasUCN()) { 569 SmallString<64> UCNIdentifierBuffer; 570 expandUCNs(UCNIdentifierBuffer, CleanedStr); 571 II = getIdentifierInfo(UCNIdentifierBuffer); 572 } else { 573 II = getIdentifierInfo(CleanedStr); 574 } 575 } 576 577 // Update the token info (identifier info and appropriate token kind). 578 Identifier.setIdentifierInfo(II); 579 Identifier.setKind(II->getTokenID()); 580 581 return II; 582 } 583 584 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 585 PoisonReasons[II] = DiagID; 586 } 587 588 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 589 assert(Ident__exception_code && Ident__exception_info); 590 assert(Ident___exception_code && Ident___exception_info); 591 Ident__exception_code->setIsPoisoned(Poison); 592 Ident___exception_code->setIsPoisoned(Poison); 593 Ident_GetExceptionCode->setIsPoisoned(Poison); 594 Ident__exception_info->setIsPoisoned(Poison); 595 Ident___exception_info->setIsPoisoned(Poison); 596 Ident_GetExceptionInfo->setIsPoisoned(Poison); 597 Ident__abnormal_termination->setIsPoisoned(Poison); 598 Ident___abnormal_termination->setIsPoisoned(Poison); 599 Ident_AbnormalTermination->setIsPoisoned(Poison); 600 } 601 602 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 603 assert(Identifier.getIdentifierInfo() && 604 "Can't handle identifiers without identifier info!"); 605 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 606 PoisonReasons.find(Identifier.getIdentifierInfo()); 607 if(it == PoisonReasons.end()) 608 Diag(Identifier, diag::err_pp_used_poisoned_id); 609 else 610 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 611 } 612 613 /// \brief Returns a diagnostic message kind for reporting a future keyword as 614 /// appropriate for the identifier and specified language. 615 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 616 const LangOptions &LangOpts) { 617 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 618 619 if (LangOpts.CPlusPlus) 620 return llvm::StringSwitch<diag::kind>(II.getName()) 621 #define CXX11_KEYWORD(NAME, FLAGS) \ 622 .Case(#NAME, diag::warn_cxx11_keyword) 623 #include "clang/Basic/TokenKinds.def" 624 ; 625 626 llvm_unreachable( 627 "Keyword not known to come from a newer Standard or proposed Standard"); 628 } 629 630 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 631 assert(II.isOutOfDate() && "not out of date"); 632 getExternalSource()->updateOutOfDateIdentifier(II); 633 } 634 635 /// HandleIdentifier - This callback is invoked when the lexer reads an 636 /// identifier. This callback looks up the identifier in the map and/or 637 /// potentially macro expands it or turns it into a named token (like 'for'). 638 /// 639 /// Note that callers of this method are guarded by checking the 640 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 641 /// IdentifierInfo methods that compute these properties will need to change to 642 /// match. 643 bool Preprocessor::HandleIdentifier(Token &Identifier) { 644 assert(Identifier.getIdentifierInfo() && 645 "Can't handle identifiers without identifier info!"); 646 647 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 648 649 // If the information about this identifier is out of date, update it from 650 // the external source. 651 // We have to treat __VA_ARGS__ in a special way, since it gets 652 // serialized with isPoisoned = true, but our preprocessor may have 653 // unpoisoned it if we're defining a C99 macro. 654 if (II.isOutOfDate()) { 655 bool CurrentIsPoisoned = false; 656 if (&II == Ident__VA_ARGS__) 657 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 658 659 updateOutOfDateIdentifier(II); 660 Identifier.setKind(II.getTokenID()); 661 662 if (&II == Ident__VA_ARGS__) 663 II.setIsPoisoned(CurrentIsPoisoned); 664 } 665 666 // If this identifier was poisoned, and if it was not produced from a macro 667 // expansion, emit an error. 668 if (II.isPoisoned() && CurPPLexer) { 669 HandlePoisonedIdentifier(Identifier); 670 } 671 672 // If this is a macro to be expanded, do it. 673 if (MacroDefinition MD = getMacroDefinition(&II)) { 674 auto *MI = MD.getMacroInfo(); 675 assert(MI && "macro definition with no macro info?"); 676 if (!DisableMacroExpansion) { 677 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 678 // C99 6.10.3p10: If the preprocessing token immediately after the 679 // macro name isn't a '(', this macro should not be expanded. 680 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 681 return HandleMacroExpandedIdentifier(Identifier, MD); 682 } else { 683 // C99 6.10.3.4p2 says that a disabled macro may never again be 684 // expanded, even if it's in a context where it could be expanded in the 685 // future. 686 Identifier.setFlag(Token::DisableExpand); 687 if (MI->isObjectLike() || isNextPPTokenLParen()) 688 Diag(Identifier, diag::pp_disabled_macro_expansion); 689 } 690 } 691 } 692 693 // If this identifier is a keyword in a newer Standard or proposed Standard, 694 // produce a warning. Don't warn if we're not considering macro expansion, 695 // since this identifier might be the name of a macro. 696 // FIXME: This warning is disabled in cases where it shouldn't be, like 697 // "#define constexpr constexpr", "int constexpr;" 698 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 699 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 700 << II.getName(); 701 // Don't diagnose this keyword again in this translation unit. 702 II.setIsFutureCompatKeyword(false); 703 } 704 705 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 706 // then we act as if it is the actual operator and not the textual 707 // representation of it. 708 if (II.isCPlusPlusOperatorKeyword()) 709 Identifier.setIdentifierInfo(nullptr); 710 711 // If this is an extension token, diagnose its use. 712 // We avoid diagnosing tokens that originate from macro definitions. 713 // FIXME: This warning is disabled in cases where it shouldn't be, 714 // like "#define TY typeof", "TY(1) x". 715 if (II.isExtensionToken() && !DisableMacroExpansion) 716 Diag(Identifier, diag::ext_token_used); 717 718 // If this is the 'import' contextual keyword following an '@', note 719 // that the next token indicates a module name. 720 // 721 // Note that we do not treat 'import' as a contextual 722 // keyword when we're in a caching lexer, because caching lexers only get 723 // used in contexts where import declarations are disallowed. 724 // 725 // Likewise if this is the C++ Modules TS import keyword. 726 if (((LastTokenWasAt && II.isModulesImport()) || 727 Identifier.is(tok::kw_import)) && 728 !InMacroArgs && !DisableMacroExpansion && 729 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 730 CurLexerKind != CLK_CachingLexer) { 731 ModuleImportLoc = Identifier.getLocation(); 732 ModuleImportPath.clear(); 733 ModuleImportExpectsIdentifier = true; 734 CurLexerKind = CLK_LexAfterModuleImport; 735 } 736 return true; 737 } 738 739 void Preprocessor::Lex(Token &Result) { 740 // We loop here until a lex function returns a token; this avoids recursion. 741 bool ReturnedToken; 742 do { 743 switch (CurLexerKind) { 744 case CLK_Lexer: 745 ReturnedToken = CurLexer->Lex(Result); 746 break; 747 case CLK_PTHLexer: 748 ReturnedToken = CurPTHLexer->Lex(Result); 749 break; 750 case CLK_TokenLexer: 751 ReturnedToken = CurTokenLexer->Lex(Result); 752 break; 753 case CLK_CachingLexer: 754 CachingLex(Result); 755 ReturnedToken = true; 756 break; 757 case CLK_LexAfterModuleImport: 758 LexAfterModuleImport(Result); 759 ReturnedToken = true; 760 break; 761 } 762 } while (!ReturnedToken); 763 764 if (Result.is(tok::code_completion)) 765 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 766 767 LastTokenWasAt = Result.is(tok::at); 768 } 769 770 /// \brief Lex a token following the 'import' contextual keyword. 771 /// 772 void Preprocessor::LexAfterModuleImport(Token &Result) { 773 // Figure out what kind of lexer we actually have. 774 recomputeCurLexerKind(); 775 776 // Lex the next token. 777 Lex(Result); 778 779 // The token sequence 780 // 781 // import identifier (. identifier)* 782 // 783 // indicates a module import directive. We already saw the 'import' 784 // contextual keyword, so now we're looking for the identifiers. 785 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 786 // We expected to see an identifier here, and we did; continue handling 787 // identifiers. 788 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 789 Result.getLocation())); 790 ModuleImportExpectsIdentifier = false; 791 CurLexerKind = CLK_LexAfterModuleImport; 792 return; 793 } 794 795 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 796 // see the next identifier. (We can also see a '[[' that begins an 797 // attribute-specifier-seq here under the C++ Modules TS.) 798 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 799 ModuleImportExpectsIdentifier = true; 800 CurLexerKind = CLK_LexAfterModuleImport; 801 return; 802 } 803 804 // If we have a non-empty module path, load the named module. 805 if (!ModuleImportPath.empty()) { 806 // Under the Modules TS, the dot is just part of the module name, and not 807 // a real hierarachy separator. Flatten such module names now. 808 // 809 // FIXME: Is this the right level to be performing this transformation? 810 std::string FlatModuleName; 811 if (getLangOpts().ModulesTS) { 812 for (auto &Piece : ModuleImportPath) { 813 if (!FlatModuleName.empty()) 814 FlatModuleName += "."; 815 FlatModuleName += Piece.first->getName(); 816 } 817 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 818 ModuleImportPath.clear(); 819 ModuleImportPath.push_back( 820 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 821 } 822 823 Module *Imported = nullptr; 824 if (getLangOpts().Modules) { 825 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 826 ModuleImportPath, 827 Module::Hidden, 828 /*IsIncludeDirective=*/false); 829 if (Imported) 830 makeModuleVisible(Imported, ModuleImportLoc); 831 } 832 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 833 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 834 } 835 } 836 837 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 838 CurSubmoduleState->VisibleModules.setVisible( 839 M, Loc, [](Module *) {}, 840 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 841 // FIXME: Include the path in the diagnostic. 842 // FIXME: Include the import location for the conflicting module. 843 Diag(ModuleImportLoc, diag::warn_module_conflict) 844 << Path[0]->getFullModuleName() 845 << Conflict->getFullModuleName() 846 << Message; 847 }); 848 849 // Add this module to the imports list of the currently-built submodule. 850 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 851 BuildingSubmoduleStack.back().M->Imports.insert(M); 852 } 853 854 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 855 const char *DiagnosticTag, 856 bool AllowMacroExpansion) { 857 // We need at least one string literal. 858 if (Result.isNot(tok::string_literal)) { 859 Diag(Result, diag::err_expected_string_literal) 860 << /*Source='in...'*/0 << DiagnosticTag; 861 return false; 862 } 863 864 // Lex string literal tokens, optionally with macro expansion. 865 SmallVector<Token, 4> StrToks; 866 do { 867 StrToks.push_back(Result); 868 869 if (Result.hasUDSuffix()) 870 Diag(Result, diag::err_invalid_string_udl); 871 872 if (AllowMacroExpansion) 873 Lex(Result); 874 else 875 LexUnexpandedToken(Result); 876 } while (Result.is(tok::string_literal)); 877 878 // Concatenate and parse the strings. 879 StringLiteralParser Literal(StrToks, *this); 880 assert(Literal.isAscii() && "Didn't allow wide strings in"); 881 882 if (Literal.hadError) 883 return false; 884 885 if (Literal.Pascal) { 886 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 887 << /*Source='in...'*/0 << DiagnosticTag; 888 return false; 889 } 890 891 String = Literal.GetString(); 892 return true; 893 } 894 895 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 896 assert(Tok.is(tok::numeric_constant)); 897 SmallString<8> IntegerBuffer; 898 bool NumberInvalid = false; 899 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 900 if (NumberInvalid) 901 return false; 902 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 903 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 904 return false; 905 llvm::APInt APVal(64, 0); 906 if (Literal.GetIntegerValue(APVal)) 907 return false; 908 Lex(Tok); 909 Value = APVal.getLimitedValue(); 910 return true; 911 } 912 913 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 914 assert(Handler && "NULL comment handler"); 915 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 916 CommentHandlers.end() && "Comment handler already registered"); 917 CommentHandlers.push_back(Handler); 918 } 919 920 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 921 std::vector<CommentHandler *>::iterator Pos 922 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 923 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 924 CommentHandlers.erase(Pos); 925 } 926 927 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 928 bool AnyPendingTokens = false; 929 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 930 HEnd = CommentHandlers.end(); 931 H != HEnd; ++H) { 932 if ((*H)->HandleComment(*this, Comment)) 933 AnyPendingTokens = true; 934 } 935 if (!AnyPendingTokens || getCommentRetentionState()) 936 return false; 937 Lex(result); 938 return true; 939 } 940 941 ModuleLoader::~ModuleLoader() { } 942 943 CommentHandler::~CommentHandler() { } 944 945 CodeCompletionHandler::~CodeCompletionHandler() { } 946 947 void Preprocessor::createPreprocessingRecord() { 948 if (Record) 949 return; 950 951 Record = new PreprocessingRecord(getSourceManager()); 952 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 953 } 954