1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APFloat.h" 47 #include "llvm/ADT/STLExtras.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/StringExtras.h" 50 #include "llvm/Support/Capacity.h" 51 #include "llvm/Support/ConvertUTF.h" 52 #include "llvm/Support/MemoryBuffer.h" 53 #include "llvm/Support/raw_ostream.h" 54 using namespace clang; 55 56 template class llvm::Registry<clang::PragmaHandler>; 57 58 //===----------------------------------------------------------------------===// 59 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 60 61 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 62 DiagnosticsEngine &diags, LangOptions &opts, 63 SourceManager &SM, HeaderSearch &Headers, 64 ModuleLoader &TheModuleLoader, 65 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 66 TranslationUnitKind TUKind) 67 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 68 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 69 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 70 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 71 Identifiers(opts, IILookup), 72 PragmaHandlers(new PragmaNamespace(StringRef())), 73 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 74 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 75 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 76 CodeCompletionReached(0), MainFileDir(nullptr), 77 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), 78 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), 79 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 80 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 81 OwnsHeaderSearch = OwnsHeaders; 82 83 CounterValue = 0; // __COUNTER__ starts at 0. 84 85 // Clear stats. 86 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 87 NumIf = NumElse = NumEndif = 0; 88 NumEnteredSourceFiles = 0; 89 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 90 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 91 MaxIncludeStackDepth = 0; 92 NumSkipped = 0; 93 94 // Default to discarding comments. 95 KeepComments = false; 96 KeepMacroComments = false; 97 SuppressIncludeNotFoundError = false; 98 99 // Macro expansion is enabled. 100 DisableMacroExpansion = false; 101 MacroExpansionInDirectivesOverride = false; 102 InMacroArgs = false; 103 InMacroArgPreExpansion = false; 104 NumCachedTokenLexers = 0; 105 PragmasEnabled = true; 106 ParsingIfOrElifDirective = false; 107 PreprocessedOutput = false; 108 109 CachedLexPos = 0; 110 111 // We haven't read anything from the external source. 112 ReadMacrosFromExternalSource = false; 113 114 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 115 // This gets unpoisoned where it is allowed. 116 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 117 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 118 119 // Initialize the pragma handlers. 120 RegisterBuiltinPragmas(); 121 122 // Initialize builtin macros like __LINE__ and friends. 123 RegisterBuiltinMacros(); 124 125 if(LangOpts.Borland) { 126 Ident__exception_info = getIdentifierInfo("_exception_info"); 127 Ident___exception_info = getIdentifierInfo("__exception_info"); 128 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 129 Ident__exception_code = getIdentifierInfo("_exception_code"); 130 Ident___exception_code = getIdentifierInfo("__exception_code"); 131 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 132 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 133 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 134 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 135 } else { 136 Ident__exception_info = Ident__exception_code = nullptr; 137 Ident__abnormal_termination = Ident___exception_info = nullptr; 138 Ident___exception_code = Ident___abnormal_termination = nullptr; 139 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 140 Ident_AbnormalTermination = nullptr; 141 } 142 } 143 144 Preprocessor::~Preprocessor() { 145 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 146 147 IncludeMacroStack.clear(); 148 149 // Destroy any macro definitions. 150 while (MacroInfoChain *I = MIChainHead) { 151 MIChainHead = I->Next; 152 I->~MacroInfoChain(); 153 } 154 155 // Free any cached macro expanders. 156 // This populates MacroArgCache, so all TokenLexers need to be destroyed 157 // before the code below that frees up the MacroArgCache list. 158 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 159 CurTokenLexer.reset(); 160 161 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 162 DeserialMIChainHead = I->Next; 163 I->~DeserializedMacroInfoChain(); 164 } 165 166 // Free any cached MacroArgs. 167 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 168 ArgList = ArgList->deallocate(); 169 170 // Delete the header search info, if we own it. 171 if (OwnsHeaderSearch) 172 delete &HeaderInfo; 173 } 174 175 void Preprocessor::Initialize(const TargetInfo &Target, 176 const TargetInfo *AuxTarget) { 177 assert((!this->Target || this->Target == &Target) && 178 "Invalid override of target information"); 179 this->Target = &Target; 180 181 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 182 "Invalid override of aux target information."); 183 this->AuxTarget = AuxTarget; 184 185 // Initialize information about built-ins. 186 BuiltinInfo.InitializeTarget(Target, AuxTarget); 187 HeaderInfo.setTarget(Target); 188 } 189 190 void Preprocessor::InitializeForModelFile() { 191 NumEnteredSourceFiles = 0; 192 193 // Reset pragmas 194 PragmaHandlersBackup = std::move(PragmaHandlers); 195 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 196 RegisterBuiltinPragmas(); 197 198 // Reset PredefinesFileID 199 PredefinesFileID = FileID(); 200 } 201 202 void Preprocessor::FinalizeForModelFile() { 203 NumEnteredSourceFiles = 1; 204 205 PragmaHandlers = std::move(PragmaHandlersBackup); 206 } 207 208 void Preprocessor::setPTHManager(PTHManager* pm) { 209 PTH.reset(pm); 210 FileMgr.addStatCache(PTH->createStatCache()); 211 } 212 213 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 214 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 215 << getSpelling(Tok) << "'"; 216 217 if (!DumpFlags) return; 218 219 llvm::errs() << "\t"; 220 if (Tok.isAtStartOfLine()) 221 llvm::errs() << " [StartOfLine]"; 222 if (Tok.hasLeadingSpace()) 223 llvm::errs() << " [LeadingSpace]"; 224 if (Tok.isExpandDisabled()) 225 llvm::errs() << " [ExpandDisabled]"; 226 if (Tok.needsCleaning()) { 227 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 228 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 229 << "']"; 230 } 231 232 llvm::errs() << "\tLoc=<"; 233 DumpLocation(Tok.getLocation()); 234 llvm::errs() << ">"; 235 } 236 237 void Preprocessor::DumpLocation(SourceLocation Loc) const { 238 Loc.dump(SourceMgr); 239 } 240 241 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 242 llvm::errs() << "MACRO: "; 243 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 244 DumpToken(MI.getReplacementToken(i)); 245 llvm::errs() << " "; 246 } 247 llvm::errs() << "\n"; 248 } 249 250 void Preprocessor::PrintStats() { 251 llvm::errs() << "\n*** Preprocessor Stats:\n"; 252 llvm::errs() << NumDirectives << " directives found:\n"; 253 llvm::errs() << " " << NumDefined << " #define.\n"; 254 llvm::errs() << " " << NumUndefined << " #undef.\n"; 255 llvm::errs() << " #include/#include_next/#import:\n"; 256 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 257 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 258 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 259 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 260 llvm::errs() << " " << NumEndif << " #endif.\n"; 261 llvm::errs() << " " << NumPragma << " #pragma.\n"; 262 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 263 264 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 265 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 266 << NumFastMacroExpanded << " on the fast path.\n"; 267 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 268 << " token paste (##) operations performed, " 269 << NumFastTokenPaste << " on the fast path.\n"; 270 271 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 272 273 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 274 llvm::errs() << "\n Macro Expanded Tokens: " 275 << llvm::capacity_in_bytes(MacroExpandedTokens); 276 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 277 // FIXME: List information for all submodules. 278 llvm::errs() << "\n Macros: " 279 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 280 llvm::errs() << "\n #pragma push_macro Info: " 281 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 282 llvm::errs() << "\n Poison Reasons: " 283 << llvm::capacity_in_bytes(PoisonReasons); 284 llvm::errs() << "\n Comment Handlers: " 285 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 286 } 287 288 Preprocessor::macro_iterator 289 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 290 if (IncludeExternalMacros && ExternalSource && 291 !ReadMacrosFromExternalSource) { 292 ReadMacrosFromExternalSource = true; 293 ExternalSource->ReadDefinedMacros(); 294 } 295 296 // Make sure we cover all macros in visible modules. 297 for (const ModuleMacro &Macro : ModuleMacros) 298 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 299 300 return CurSubmoduleState->Macros.begin(); 301 } 302 303 size_t Preprocessor::getTotalMemory() const { 304 return BP.getTotalMemory() 305 + llvm::capacity_in_bytes(MacroExpandedTokens) 306 + Predefines.capacity() /* Predefines buffer. */ 307 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 308 // and ModuleMacros. 309 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 310 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 311 + llvm::capacity_in_bytes(PoisonReasons) 312 + llvm::capacity_in_bytes(CommentHandlers); 313 } 314 315 Preprocessor::macro_iterator 316 Preprocessor::macro_end(bool IncludeExternalMacros) const { 317 if (IncludeExternalMacros && ExternalSource && 318 !ReadMacrosFromExternalSource) { 319 ReadMacrosFromExternalSource = true; 320 ExternalSource->ReadDefinedMacros(); 321 } 322 323 return CurSubmoduleState->Macros.end(); 324 } 325 326 /// \brief Compares macro tokens with a specified token value sequence. 327 static bool MacroDefinitionEquals(const MacroInfo *MI, 328 ArrayRef<TokenValue> Tokens) { 329 return Tokens.size() == MI->getNumTokens() && 330 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 331 } 332 333 StringRef Preprocessor::getLastMacroWithSpelling( 334 SourceLocation Loc, 335 ArrayRef<TokenValue> Tokens) const { 336 SourceLocation BestLocation; 337 StringRef BestSpelling; 338 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 339 I != E; ++I) { 340 const MacroDirective::DefInfo 341 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 342 if (!Def || !Def.getMacroInfo()) 343 continue; 344 if (!Def.getMacroInfo()->isObjectLike()) 345 continue; 346 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 347 continue; 348 SourceLocation Location = Def.getLocation(); 349 // Choose the macro defined latest. 350 if (BestLocation.isInvalid() || 351 (Location.isValid() && 352 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 353 BestLocation = Location; 354 BestSpelling = I->first->getName(); 355 } 356 } 357 return BestSpelling; 358 } 359 360 void Preprocessor::recomputeCurLexerKind() { 361 if (CurLexer) 362 CurLexerKind = CLK_Lexer; 363 else if (CurPTHLexer) 364 CurLexerKind = CLK_PTHLexer; 365 else if (CurTokenLexer) 366 CurLexerKind = CLK_TokenLexer; 367 else 368 CurLexerKind = CLK_CachingLexer; 369 } 370 371 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 372 unsigned CompleteLine, 373 unsigned CompleteColumn) { 374 assert(File); 375 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 376 assert(!CodeCompletionFile && "Already set"); 377 378 using llvm::MemoryBuffer; 379 380 // Load the actual file's contents. 381 bool Invalid = false; 382 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 383 if (Invalid) 384 return true; 385 386 // Find the byte position of the truncation point. 387 const char *Position = Buffer->getBufferStart(); 388 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 389 for (; *Position; ++Position) { 390 if (*Position != '\r' && *Position != '\n') 391 continue; 392 393 // Eat \r\n or \n\r as a single line. 394 if ((Position[1] == '\r' || Position[1] == '\n') && 395 Position[0] != Position[1]) 396 ++Position; 397 ++Position; 398 break; 399 } 400 } 401 402 Position += CompleteColumn - 1; 403 404 // If pointing inside the preamble, adjust the position at the beginning of 405 // the file after the preamble. 406 if (SkipMainFilePreamble.first && 407 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 408 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 409 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 410 } 411 412 if (Position > Buffer->getBufferEnd()) 413 Position = Buffer->getBufferEnd(); 414 415 CodeCompletionFile = File; 416 CodeCompletionOffset = Position - Buffer->getBufferStart(); 417 418 std::unique_ptr<MemoryBuffer> NewBuffer = 419 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 420 Buffer->getBufferIdentifier()); 421 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 422 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 423 *NewPos = '\0'; 424 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 425 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 426 427 return false; 428 } 429 430 void Preprocessor::CodeCompleteNaturalLanguage() { 431 if (CodeComplete) 432 CodeComplete->CodeCompleteNaturalLanguage(); 433 setCodeCompletionReached(); 434 } 435 436 /// getSpelling - This method is used to get the spelling of a token into a 437 /// SmallVector. Note that the returned StringRef may not point to the 438 /// supplied buffer if a copy can be avoided. 439 StringRef Preprocessor::getSpelling(const Token &Tok, 440 SmallVectorImpl<char> &Buffer, 441 bool *Invalid) const { 442 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 443 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 444 // Try the fast path. 445 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 446 return II->getName(); 447 } 448 449 // Resize the buffer if we need to copy into it. 450 if (Tok.needsCleaning()) 451 Buffer.resize(Tok.getLength()); 452 453 const char *Ptr = Buffer.data(); 454 unsigned Len = getSpelling(Tok, Ptr, Invalid); 455 return StringRef(Ptr, Len); 456 } 457 458 /// CreateString - Plop the specified string into a scratch buffer and return a 459 /// location for it. If specified, the source location provides a source 460 /// location for the token. 461 void Preprocessor::CreateString(StringRef Str, Token &Tok, 462 SourceLocation ExpansionLocStart, 463 SourceLocation ExpansionLocEnd) { 464 Tok.setLength(Str.size()); 465 466 const char *DestPtr; 467 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 468 469 if (ExpansionLocStart.isValid()) 470 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 471 ExpansionLocEnd, Str.size()); 472 Tok.setLocation(Loc); 473 474 // If this is a raw identifier or a literal token, set the pointer data. 475 if (Tok.is(tok::raw_identifier)) 476 Tok.setRawIdentifierData(DestPtr); 477 else if (Tok.isLiteral()) 478 Tok.setLiteralData(DestPtr); 479 } 480 481 Module *Preprocessor::getCurrentModule() { 482 if (!getLangOpts().CompilingModule) 483 return nullptr; 484 485 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 486 } 487 488 //===----------------------------------------------------------------------===// 489 // Preprocessor Initialization Methods 490 //===----------------------------------------------------------------------===// 491 492 493 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 494 /// which implicitly adds the builtin defines etc. 495 void Preprocessor::EnterMainSourceFile() { 496 // We do not allow the preprocessor to reenter the main file. Doing so will 497 // cause FileID's to accumulate information from both runs (e.g. #line 498 // information) and predefined macros aren't guaranteed to be set properly. 499 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 500 FileID MainFileID = SourceMgr.getMainFileID(); 501 502 // If MainFileID is loaded it means we loaded an AST file, no need to enter 503 // a main file. 504 if (!SourceMgr.isLoadedFileID(MainFileID)) { 505 // Enter the main file source buffer. 506 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 507 508 // If we've been asked to skip bytes in the main file (e.g., as part of a 509 // precompiled preamble), do so now. 510 if (SkipMainFilePreamble.first > 0) 511 CurLexer->SkipBytes(SkipMainFilePreamble.first, 512 SkipMainFilePreamble.second); 513 514 // Tell the header info that the main file was entered. If the file is later 515 // #imported, it won't be re-entered. 516 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 517 HeaderInfo.IncrementIncludeCount(FE); 518 } 519 520 // Preprocess Predefines to populate the initial preprocessor state. 521 std::unique_ptr<llvm::MemoryBuffer> SB = 522 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 523 assert(SB && "Cannot create predefined source buffer"); 524 FileID FID = SourceMgr.createFileID(std::move(SB)); 525 assert(FID.isValid() && "Could not create FileID for predefines?"); 526 setPredefinesFileID(FID); 527 528 // Start parsing the predefines. 529 EnterSourceFile(FID, nullptr, SourceLocation()); 530 } 531 532 void Preprocessor::EndSourceFile() { 533 // Notify the client that we reached the end of the source file. 534 if (Callbacks) 535 Callbacks->EndOfMainFile(); 536 } 537 538 //===----------------------------------------------------------------------===// 539 // Lexer Event Handling. 540 //===----------------------------------------------------------------------===// 541 542 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 543 /// identifier information for the token and install it into the token, 544 /// updating the token kind accordingly. 545 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 546 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 547 548 // Look up this token, see if it is a macro, or if it is a language keyword. 549 IdentifierInfo *II; 550 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 551 // No cleaning needed, just use the characters from the lexed buffer. 552 II = getIdentifierInfo(Identifier.getRawIdentifier()); 553 } else { 554 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 555 SmallString<64> IdentifierBuffer; 556 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 557 558 if (Identifier.hasUCN()) { 559 SmallString<64> UCNIdentifierBuffer; 560 expandUCNs(UCNIdentifierBuffer, CleanedStr); 561 II = getIdentifierInfo(UCNIdentifierBuffer); 562 } else { 563 II = getIdentifierInfo(CleanedStr); 564 } 565 } 566 567 // Update the token info (identifier info and appropriate token kind). 568 Identifier.setIdentifierInfo(II); 569 Identifier.setKind(II->getTokenID()); 570 571 return II; 572 } 573 574 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 575 PoisonReasons[II] = DiagID; 576 } 577 578 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 579 assert(Ident__exception_code && Ident__exception_info); 580 assert(Ident___exception_code && Ident___exception_info); 581 Ident__exception_code->setIsPoisoned(Poison); 582 Ident___exception_code->setIsPoisoned(Poison); 583 Ident_GetExceptionCode->setIsPoisoned(Poison); 584 Ident__exception_info->setIsPoisoned(Poison); 585 Ident___exception_info->setIsPoisoned(Poison); 586 Ident_GetExceptionInfo->setIsPoisoned(Poison); 587 Ident__abnormal_termination->setIsPoisoned(Poison); 588 Ident___abnormal_termination->setIsPoisoned(Poison); 589 Ident_AbnormalTermination->setIsPoisoned(Poison); 590 } 591 592 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 593 assert(Identifier.getIdentifierInfo() && 594 "Can't handle identifiers without identifier info!"); 595 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 596 PoisonReasons.find(Identifier.getIdentifierInfo()); 597 if(it == PoisonReasons.end()) 598 Diag(Identifier, diag::err_pp_used_poisoned_id); 599 else 600 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 601 } 602 603 /// \brief Returns a diagnostic message kind for reporting a future keyword as 604 /// appropriate for the identifier and specified language. 605 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 606 const LangOptions &LangOpts) { 607 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 608 609 if (LangOpts.CPlusPlus) 610 return llvm::StringSwitch<diag::kind>(II.getName()) 611 #define CXX11_KEYWORD(NAME, FLAGS) \ 612 .Case(#NAME, diag::warn_cxx11_keyword) 613 #include "clang/Basic/TokenKinds.def" 614 ; 615 616 llvm_unreachable( 617 "Keyword not known to come from a newer Standard or proposed Standard"); 618 } 619 620 /// HandleIdentifier - This callback is invoked when the lexer reads an 621 /// identifier. This callback looks up the identifier in the map and/or 622 /// potentially macro expands it or turns it into a named token (like 'for'). 623 /// 624 /// Note that callers of this method are guarded by checking the 625 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 626 /// IdentifierInfo methods that compute these properties will need to change to 627 /// match. 628 bool Preprocessor::HandleIdentifier(Token &Identifier) { 629 assert(Identifier.getIdentifierInfo() && 630 "Can't handle identifiers without identifier info!"); 631 632 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 633 634 // If the information about this identifier is out of date, update it from 635 // the external source. 636 // We have to treat __VA_ARGS__ in a special way, since it gets 637 // serialized with isPoisoned = true, but our preprocessor may have 638 // unpoisoned it if we're defining a C99 macro. 639 if (II.isOutOfDate()) { 640 bool CurrentIsPoisoned = false; 641 if (&II == Ident__VA_ARGS__) 642 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 643 644 ExternalSource->updateOutOfDateIdentifier(II); 645 Identifier.setKind(II.getTokenID()); 646 647 if (&II == Ident__VA_ARGS__) 648 II.setIsPoisoned(CurrentIsPoisoned); 649 } 650 651 // If this identifier was poisoned, and if it was not produced from a macro 652 // expansion, emit an error. 653 if (II.isPoisoned() && CurPPLexer) { 654 HandlePoisonedIdentifier(Identifier); 655 } 656 657 // If this is a macro to be expanded, do it. 658 if (MacroDefinition MD = getMacroDefinition(&II)) { 659 auto *MI = MD.getMacroInfo(); 660 assert(MI && "macro definition with no macro info?"); 661 if (!DisableMacroExpansion) { 662 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 663 // C99 6.10.3p10: If the preprocessing token immediately after the 664 // macro name isn't a '(', this macro should not be expanded. 665 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 666 return HandleMacroExpandedIdentifier(Identifier, MD); 667 } else { 668 // C99 6.10.3.4p2 says that a disabled macro may never again be 669 // expanded, even if it's in a context where it could be expanded in the 670 // future. 671 Identifier.setFlag(Token::DisableExpand); 672 if (MI->isObjectLike() || isNextPPTokenLParen()) 673 Diag(Identifier, diag::pp_disabled_macro_expansion); 674 } 675 } 676 } 677 678 // If this identifier is a keyword in a newer Standard or proposed Standard, 679 // produce a warning. Don't warn if we're not considering macro expansion, 680 // since this identifier might be the name of a macro. 681 // FIXME: This warning is disabled in cases where it shouldn't be, like 682 // "#define constexpr constexpr", "int constexpr;" 683 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 684 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 685 << II.getName(); 686 // Don't diagnose this keyword again in this translation unit. 687 II.setIsFutureCompatKeyword(false); 688 } 689 690 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 691 // then we act as if it is the actual operator and not the textual 692 // representation of it. 693 if (II.isCPlusPlusOperatorKeyword()) 694 Identifier.setIdentifierInfo(nullptr); 695 696 // If this is an extension token, diagnose its use. 697 // We avoid diagnosing tokens that originate from macro definitions. 698 // FIXME: This warning is disabled in cases where it shouldn't be, 699 // like "#define TY typeof", "TY(1) x". 700 if (II.isExtensionToken() && !DisableMacroExpansion) 701 Diag(Identifier, diag::ext_token_used); 702 703 // If this is the 'import' contextual keyword following an '@', note 704 // that the next token indicates a module name. 705 // 706 // Note that we do not treat 'import' as a contextual 707 // keyword when we're in a caching lexer, because caching lexers only get 708 // used in contexts where import declarations are disallowed. 709 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 710 !DisableMacroExpansion && 711 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 712 CurLexerKind != CLK_CachingLexer) { 713 ModuleImportLoc = Identifier.getLocation(); 714 ModuleImportPath.clear(); 715 ModuleImportExpectsIdentifier = true; 716 CurLexerKind = CLK_LexAfterModuleImport; 717 } 718 return true; 719 } 720 721 void Preprocessor::Lex(Token &Result) { 722 // We loop here until a lex function returns a token; this avoids recursion. 723 bool ReturnedToken; 724 do { 725 switch (CurLexerKind) { 726 case CLK_Lexer: 727 ReturnedToken = CurLexer->Lex(Result); 728 break; 729 case CLK_PTHLexer: 730 ReturnedToken = CurPTHLexer->Lex(Result); 731 break; 732 case CLK_TokenLexer: 733 ReturnedToken = CurTokenLexer->Lex(Result); 734 break; 735 case CLK_CachingLexer: 736 CachingLex(Result); 737 ReturnedToken = true; 738 break; 739 case CLK_LexAfterModuleImport: 740 LexAfterModuleImport(Result); 741 ReturnedToken = true; 742 break; 743 } 744 } while (!ReturnedToken); 745 746 LastTokenWasAt = Result.is(tok::at); 747 } 748 749 750 /// \brief Lex a token following the 'import' contextual keyword. 751 /// 752 void Preprocessor::LexAfterModuleImport(Token &Result) { 753 // Figure out what kind of lexer we actually have. 754 recomputeCurLexerKind(); 755 756 // Lex the next token. 757 Lex(Result); 758 759 // The token sequence 760 // 761 // import identifier (. identifier)* 762 // 763 // indicates a module import directive. We already saw the 'import' 764 // contextual keyword, so now we're looking for the identifiers. 765 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 766 // We expected to see an identifier here, and we did; continue handling 767 // identifiers. 768 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 769 Result.getLocation())); 770 ModuleImportExpectsIdentifier = false; 771 CurLexerKind = CLK_LexAfterModuleImport; 772 return; 773 } 774 775 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 776 // see the next identifier. 777 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 778 ModuleImportExpectsIdentifier = true; 779 CurLexerKind = CLK_LexAfterModuleImport; 780 return; 781 } 782 783 // If we have a non-empty module path, load the named module. 784 if (!ModuleImportPath.empty()) { 785 Module *Imported = nullptr; 786 if (getLangOpts().Modules) { 787 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 788 ModuleImportPath, 789 Module::Hidden, 790 /*IsIncludeDirective=*/false); 791 if (Imported) 792 makeModuleVisible(Imported, ModuleImportLoc); 793 } 794 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 795 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 796 } 797 } 798 799 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 800 CurSubmoduleState->VisibleModules.setVisible( 801 M, Loc, [](Module *) {}, 802 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 803 // FIXME: Include the path in the diagnostic. 804 // FIXME: Include the import location for the conflicting module. 805 Diag(ModuleImportLoc, diag::warn_module_conflict) 806 << Path[0]->getFullModuleName() 807 << Conflict->getFullModuleName() 808 << Message; 809 }); 810 811 // Add this module to the imports list of the currently-built submodule. 812 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 813 BuildingSubmoduleStack.back().M->Imports.insert(M); 814 } 815 816 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 817 const char *DiagnosticTag, 818 bool AllowMacroExpansion) { 819 // We need at least one string literal. 820 if (Result.isNot(tok::string_literal)) { 821 Diag(Result, diag::err_expected_string_literal) 822 << /*Source='in...'*/0 << DiagnosticTag; 823 return false; 824 } 825 826 // Lex string literal tokens, optionally with macro expansion. 827 SmallVector<Token, 4> StrToks; 828 do { 829 StrToks.push_back(Result); 830 831 if (Result.hasUDSuffix()) 832 Diag(Result, diag::err_invalid_string_udl); 833 834 if (AllowMacroExpansion) 835 Lex(Result); 836 else 837 LexUnexpandedToken(Result); 838 } while (Result.is(tok::string_literal)); 839 840 // Concatenate and parse the strings. 841 StringLiteralParser Literal(StrToks, *this); 842 assert(Literal.isAscii() && "Didn't allow wide strings in"); 843 844 if (Literal.hadError) 845 return false; 846 847 if (Literal.Pascal) { 848 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 849 << /*Source='in...'*/0 << DiagnosticTag; 850 return false; 851 } 852 853 String = Literal.GetString(); 854 return true; 855 } 856 857 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 858 assert(Tok.is(tok::numeric_constant)); 859 SmallString<8> IntegerBuffer; 860 bool NumberInvalid = false; 861 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 862 if (NumberInvalid) 863 return false; 864 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 865 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 866 return false; 867 llvm::APInt APVal(64, 0); 868 if (Literal.GetIntegerValue(APVal)) 869 return false; 870 Lex(Tok); 871 Value = APVal.getLimitedValue(); 872 return true; 873 } 874 875 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 876 assert(Handler && "NULL comment handler"); 877 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 878 CommentHandlers.end() && "Comment handler already registered"); 879 CommentHandlers.push_back(Handler); 880 } 881 882 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 883 std::vector<CommentHandler *>::iterator Pos 884 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 885 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 886 CommentHandlers.erase(Pos); 887 } 888 889 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 890 bool AnyPendingTokens = false; 891 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 892 HEnd = CommentHandlers.end(); 893 H != HEnd; ++H) { 894 if ((*H)->HandleComment(*this, Comment)) 895 AnyPendingTokens = true; 896 } 897 if (!AnyPendingTokens || getCommentRetentionState()) 898 return false; 899 Lex(result); 900 return true; 901 } 902 903 ModuleLoader::~ModuleLoader() { } 904 905 CommentHandler::~CommentHandler() { } 906 907 CodeCompletionHandler::~CodeCompletionHandler() { } 908 909 void Preprocessor::createPreprocessingRecord() { 910 if (Record) 911 return; 912 913 Record = new PreprocessingRecord(getSourceManager()); 914 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 915 } 916