1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/PTHManager.h" 42 #include "clang/Lex/Pragma.h" 43 #include "clang/Lex/PreprocessingRecord.h" 44 #include "clang/Lex/PreprocessorOptions.h" 45 #include "clang/Lex/ScratchBuffer.h" 46 #include "llvm/ADT/APInt.h" 47 #include "llvm/ADT/DenseMap.h" 48 #include "llvm/ADT/SmallString.h" 49 #include "llvm/ADT/SmallVector.h" 50 #include "llvm/ADT/STLExtras.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/StringSwitch.h" 53 #include "llvm/Support/Capacity.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <memory> 60 #include <string> 61 #include <utility> 62 #include <vector> 63 64 using namespace clang; 65 66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) 67 68 //===----------------------------------------------------------------------===// 69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 70 71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 72 DiagnosticsEngine &diags, LangOptions &opts, 73 SourceManager &SM, MemoryBufferCache &PCMCache, 74 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 75 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 76 TranslationUnitKind TUKind) 77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr), 78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 79 PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)), 80 HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), 81 ExternalSource(nullptr), Identifiers(opts, IILookup), 82 PragmaHandlers(new PragmaNamespace(StringRef())), 83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 84 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 86 CodeCompletionReached(false), CodeCompletionII(nullptr), 87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), 89 CurLexerSubmodule(nullptr), Callbacks(nullptr), 90 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 91 Record(nullptr), MIChainHead(nullptr) { 92 OwnsHeaderSearch = OwnsHeaders; 93 94 CounterValue = 0; // __COUNTER__ starts at 0. 95 96 // Clear stats. 97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 98 NumIf = NumElse = NumEndif = 0; 99 NumEnteredSourceFiles = 0; 100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 102 MaxIncludeStackDepth = 0; 103 NumSkipped = 0; 104 105 // Default to discarding comments. 106 KeepComments = false; 107 KeepMacroComments = false; 108 SuppressIncludeNotFoundError = false; 109 110 // Macro expansion is enabled. 111 DisableMacroExpansion = false; 112 MacroExpansionInDirectivesOverride = false; 113 InMacroArgs = false; 114 InMacroArgPreExpansion = false; 115 NumCachedTokenLexers = 0; 116 PragmasEnabled = true; 117 ParsingIfOrElifDirective = false; 118 PreprocessedOutput = false; 119 120 CachedLexPos = 0; 121 122 // We haven't read anything from the external source. 123 ReadMacrosFromExternalSource = false; 124 125 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 126 // This gets unpoisoned where it is allowed. 127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 129 130 // Initialize the pragma handlers. 131 RegisterBuiltinPragmas(); 132 133 // Initialize builtin macros like __LINE__ and friends. 134 RegisterBuiltinMacros(); 135 136 if(LangOpts.Borland) { 137 Ident__exception_info = getIdentifierInfo("_exception_info"); 138 Ident___exception_info = getIdentifierInfo("__exception_info"); 139 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 140 Ident__exception_code = getIdentifierInfo("_exception_code"); 141 Ident___exception_code = getIdentifierInfo("__exception_code"); 142 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 143 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 144 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 145 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 146 } else { 147 Ident__exception_info = Ident__exception_code = nullptr; 148 Ident__abnormal_termination = Ident___exception_info = nullptr; 149 Ident___exception_code = Ident___abnormal_termination = nullptr; 150 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 151 Ident_AbnormalTermination = nullptr; 152 } 153 } 154 155 Preprocessor::~Preprocessor() { 156 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 157 158 IncludeMacroStack.clear(); 159 160 // Destroy any macro definitions. 161 while (MacroInfoChain *I = MIChainHead) { 162 MIChainHead = I->Next; 163 I->~MacroInfoChain(); 164 } 165 166 // Free any cached macro expanders. 167 // This populates MacroArgCache, so all TokenLexers need to be destroyed 168 // before the code below that frees up the MacroArgCache list. 169 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 170 CurTokenLexer.reset(); 171 172 // Free any cached MacroArgs. 173 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 174 ArgList = ArgList->deallocate(); 175 176 // Delete the header search info, if we own it. 177 if (OwnsHeaderSearch) 178 delete &HeaderInfo; 179 } 180 181 void Preprocessor::Initialize(const TargetInfo &Target, 182 const TargetInfo *AuxTarget) { 183 assert((!this->Target || this->Target == &Target) && 184 "Invalid override of target information"); 185 this->Target = &Target; 186 187 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 188 "Invalid override of aux target information."); 189 this->AuxTarget = AuxTarget; 190 191 // Initialize information about built-ins. 192 BuiltinInfo.InitializeTarget(Target, AuxTarget); 193 HeaderInfo.setTarget(Target); 194 } 195 196 void Preprocessor::InitializeForModelFile() { 197 NumEnteredSourceFiles = 0; 198 199 // Reset pragmas 200 PragmaHandlersBackup = std::move(PragmaHandlers); 201 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 202 RegisterBuiltinPragmas(); 203 204 // Reset PredefinesFileID 205 PredefinesFileID = FileID(); 206 } 207 208 void Preprocessor::FinalizeForModelFile() { 209 NumEnteredSourceFiles = 1; 210 211 PragmaHandlers = std::move(PragmaHandlersBackup); 212 } 213 214 void Preprocessor::setPTHManager(PTHManager* pm) { 215 PTH.reset(pm); 216 FileMgr.addStatCache(PTH->createStatCache()); 217 } 218 219 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 220 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 221 << getSpelling(Tok) << "'"; 222 223 if (!DumpFlags) return; 224 225 llvm::errs() << "\t"; 226 if (Tok.isAtStartOfLine()) 227 llvm::errs() << " [StartOfLine]"; 228 if (Tok.hasLeadingSpace()) 229 llvm::errs() << " [LeadingSpace]"; 230 if (Tok.isExpandDisabled()) 231 llvm::errs() << " [ExpandDisabled]"; 232 if (Tok.needsCleaning()) { 233 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 234 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 235 << "']"; 236 } 237 238 llvm::errs() << "\tLoc=<"; 239 DumpLocation(Tok.getLocation()); 240 llvm::errs() << ">"; 241 } 242 243 void Preprocessor::DumpLocation(SourceLocation Loc) const { 244 Loc.dump(SourceMgr); 245 } 246 247 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 248 llvm::errs() << "MACRO: "; 249 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 250 DumpToken(MI.getReplacementToken(i)); 251 llvm::errs() << " "; 252 } 253 llvm::errs() << "\n"; 254 } 255 256 void Preprocessor::PrintStats() { 257 llvm::errs() << "\n*** Preprocessor Stats:\n"; 258 llvm::errs() << NumDirectives << " directives found:\n"; 259 llvm::errs() << " " << NumDefined << " #define.\n"; 260 llvm::errs() << " " << NumUndefined << " #undef.\n"; 261 llvm::errs() << " #include/#include_next/#import:\n"; 262 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 263 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 264 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 265 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 266 llvm::errs() << " " << NumEndif << " #endif.\n"; 267 llvm::errs() << " " << NumPragma << " #pragma.\n"; 268 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 269 270 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 271 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 272 << NumFastMacroExpanded << " on the fast path.\n"; 273 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 274 << " token paste (##) operations performed, " 275 << NumFastTokenPaste << " on the fast path.\n"; 276 277 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 278 279 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 280 llvm::errs() << "\n Macro Expanded Tokens: " 281 << llvm::capacity_in_bytes(MacroExpandedTokens); 282 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 283 // FIXME: List information for all submodules. 284 llvm::errs() << "\n Macros: " 285 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 286 llvm::errs() << "\n #pragma push_macro Info: " 287 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 288 llvm::errs() << "\n Poison Reasons: " 289 << llvm::capacity_in_bytes(PoisonReasons); 290 llvm::errs() << "\n Comment Handlers: " 291 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 292 } 293 294 Preprocessor::macro_iterator 295 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 296 if (IncludeExternalMacros && ExternalSource && 297 !ReadMacrosFromExternalSource) { 298 ReadMacrosFromExternalSource = true; 299 ExternalSource->ReadDefinedMacros(); 300 } 301 302 // Make sure we cover all macros in visible modules. 303 for (const ModuleMacro &Macro : ModuleMacros) 304 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 305 306 return CurSubmoduleState->Macros.begin(); 307 } 308 309 size_t Preprocessor::getTotalMemory() const { 310 return BP.getTotalMemory() 311 + llvm::capacity_in_bytes(MacroExpandedTokens) 312 + Predefines.capacity() /* Predefines buffer. */ 313 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 314 // and ModuleMacros. 315 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 316 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 317 + llvm::capacity_in_bytes(PoisonReasons) 318 + llvm::capacity_in_bytes(CommentHandlers); 319 } 320 321 Preprocessor::macro_iterator 322 Preprocessor::macro_end(bool IncludeExternalMacros) const { 323 if (IncludeExternalMacros && ExternalSource && 324 !ReadMacrosFromExternalSource) { 325 ReadMacrosFromExternalSource = true; 326 ExternalSource->ReadDefinedMacros(); 327 } 328 329 return CurSubmoduleState->Macros.end(); 330 } 331 332 /// \brief Compares macro tokens with a specified token value sequence. 333 static bool MacroDefinitionEquals(const MacroInfo *MI, 334 ArrayRef<TokenValue> Tokens) { 335 return Tokens.size() == MI->getNumTokens() && 336 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 337 } 338 339 StringRef Preprocessor::getLastMacroWithSpelling( 340 SourceLocation Loc, 341 ArrayRef<TokenValue> Tokens) const { 342 SourceLocation BestLocation; 343 StringRef BestSpelling; 344 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 345 I != E; ++I) { 346 const MacroDirective::DefInfo 347 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 348 if (!Def || !Def.getMacroInfo()) 349 continue; 350 if (!Def.getMacroInfo()->isObjectLike()) 351 continue; 352 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 353 continue; 354 SourceLocation Location = Def.getLocation(); 355 // Choose the macro defined latest. 356 if (BestLocation.isInvalid() || 357 (Location.isValid() && 358 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 359 BestLocation = Location; 360 BestSpelling = I->first->getName(); 361 } 362 } 363 return BestSpelling; 364 } 365 366 void Preprocessor::recomputeCurLexerKind() { 367 if (CurLexer) 368 CurLexerKind = CLK_Lexer; 369 else if (CurPTHLexer) 370 CurLexerKind = CLK_PTHLexer; 371 else if (CurTokenLexer) 372 CurLexerKind = CLK_TokenLexer; 373 else 374 CurLexerKind = CLK_CachingLexer; 375 } 376 377 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 378 unsigned CompleteLine, 379 unsigned CompleteColumn) { 380 assert(File); 381 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 382 assert(!CodeCompletionFile && "Already set"); 383 384 using llvm::MemoryBuffer; 385 386 // Load the actual file's contents. 387 bool Invalid = false; 388 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 389 if (Invalid) 390 return true; 391 392 // Find the byte position of the truncation point. 393 const char *Position = Buffer->getBufferStart(); 394 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 395 for (; *Position; ++Position) { 396 if (*Position != '\r' && *Position != '\n') 397 continue; 398 399 // Eat \r\n or \n\r as a single line. 400 if ((Position[1] == '\r' || Position[1] == '\n') && 401 Position[0] != Position[1]) 402 ++Position; 403 ++Position; 404 break; 405 } 406 } 407 408 Position += CompleteColumn - 1; 409 410 // If pointing inside the preamble, adjust the position at the beginning of 411 // the file after the preamble. 412 if (SkipMainFilePreamble.first && 413 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 414 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 415 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 416 } 417 418 if (Position > Buffer->getBufferEnd()) 419 Position = Buffer->getBufferEnd(); 420 421 CodeCompletionFile = File; 422 CodeCompletionOffset = Position - Buffer->getBufferStart(); 423 424 std::unique_ptr<MemoryBuffer> NewBuffer = 425 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 426 Buffer->getBufferIdentifier()); 427 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 428 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 429 *NewPos = '\0'; 430 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 431 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 432 433 return false; 434 } 435 436 void Preprocessor::CodeCompleteNaturalLanguage() { 437 if (CodeComplete) 438 CodeComplete->CodeCompleteNaturalLanguage(); 439 setCodeCompletionReached(); 440 } 441 442 /// getSpelling - This method is used to get the spelling of a token into a 443 /// SmallVector. Note that the returned StringRef may not point to the 444 /// supplied buffer if a copy can be avoided. 445 StringRef Preprocessor::getSpelling(const Token &Tok, 446 SmallVectorImpl<char> &Buffer, 447 bool *Invalid) const { 448 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 449 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 450 // Try the fast path. 451 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 452 return II->getName(); 453 } 454 455 // Resize the buffer if we need to copy into it. 456 if (Tok.needsCleaning()) 457 Buffer.resize(Tok.getLength()); 458 459 const char *Ptr = Buffer.data(); 460 unsigned Len = getSpelling(Tok, Ptr, Invalid); 461 return StringRef(Ptr, Len); 462 } 463 464 /// CreateString - Plop the specified string into a scratch buffer and return a 465 /// location for it. If specified, the source location provides a source 466 /// location for the token. 467 void Preprocessor::CreateString(StringRef Str, Token &Tok, 468 SourceLocation ExpansionLocStart, 469 SourceLocation ExpansionLocEnd) { 470 Tok.setLength(Str.size()); 471 472 const char *DestPtr; 473 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 474 475 if (ExpansionLocStart.isValid()) 476 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 477 ExpansionLocEnd, Str.size()); 478 Tok.setLocation(Loc); 479 480 // If this is a raw identifier or a literal token, set the pointer data. 481 if (Tok.is(tok::raw_identifier)) 482 Tok.setRawIdentifierData(DestPtr); 483 else if (Tok.isLiteral()) 484 Tok.setLiteralData(DestPtr); 485 } 486 487 Module *Preprocessor::getCurrentModule() { 488 if (!getLangOpts().isCompilingModule()) 489 return nullptr; 490 491 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 492 } 493 494 //===----------------------------------------------------------------------===// 495 // Preprocessor Initialization Methods 496 //===----------------------------------------------------------------------===// 497 498 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 499 /// which implicitly adds the builtin defines etc. 500 void Preprocessor::EnterMainSourceFile() { 501 // We do not allow the preprocessor to reenter the main file. Doing so will 502 // cause FileID's to accumulate information from both runs (e.g. #line 503 // information) and predefined macros aren't guaranteed to be set properly. 504 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 505 FileID MainFileID = SourceMgr.getMainFileID(); 506 507 // If MainFileID is loaded it means we loaded an AST file, no need to enter 508 // a main file. 509 if (!SourceMgr.isLoadedFileID(MainFileID)) { 510 // Enter the main file source buffer. 511 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 512 513 // If we've been asked to skip bytes in the main file (e.g., as part of a 514 // precompiled preamble), do so now. 515 if (SkipMainFilePreamble.first > 0) 516 CurLexer->SkipBytes(SkipMainFilePreamble.first, 517 SkipMainFilePreamble.second); 518 519 // Tell the header info that the main file was entered. If the file is later 520 // #imported, it won't be re-entered. 521 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 522 HeaderInfo.IncrementIncludeCount(FE); 523 } 524 525 // Preprocess Predefines to populate the initial preprocessor state. 526 std::unique_ptr<llvm::MemoryBuffer> SB = 527 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 528 assert(SB && "Cannot create predefined source buffer"); 529 FileID FID = SourceMgr.createFileID(std::move(SB)); 530 assert(FID.isValid() && "Could not create FileID for predefines?"); 531 setPredefinesFileID(FID); 532 533 // Start parsing the predefines. 534 EnterSourceFile(FID, nullptr, SourceLocation()); 535 } 536 537 void Preprocessor::EndSourceFile() { 538 // Notify the client that we reached the end of the source file. 539 if (Callbacks) 540 Callbacks->EndOfMainFile(); 541 } 542 543 //===----------------------------------------------------------------------===// 544 // Lexer Event Handling. 545 //===----------------------------------------------------------------------===// 546 547 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 548 /// identifier information for the token and install it into the token, 549 /// updating the token kind accordingly. 550 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 551 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 552 553 // Look up this token, see if it is a macro, or if it is a language keyword. 554 IdentifierInfo *II; 555 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 556 // No cleaning needed, just use the characters from the lexed buffer. 557 II = getIdentifierInfo(Identifier.getRawIdentifier()); 558 } else { 559 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 560 SmallString<64> IdentifierBuffer; 561 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 562 563 if (Identifier.hasUCN()) { 564 SmallString<64> UCNIdentifierBuffer; 565 expandUCNs(UCNIdentifierBuffer, CleanedStr); 566 II = getIdentifierInfo(UCNIdentifierBuffer); 567 } else { 568 II = getIdentifierInfo(CleanedStr); 569 } 570 } 571 572 // Update the token info (identifier info and appropriate token kind). 573 Identifier.setIdentifierInfo(II); 574 Identifier.setKind(II->getTokenID()); 575 576 return II; 577 } 578 579 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 580 PoisonReasons[II] = DiagID; 581 } 582 583 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 584 assert(Ident__exception_code && Ident__exception_info); 585 assert(Ident___exception_code && Ident___exception_info); 586 Ident__exception_code->setIsPoisoned(Poison); 587 Ident___exception_code->setIsPoisoned(Poison); 588 Ident_GetExceptionCode->setIsPoisoned(Poison); 589 Ident__exception_info->setIsPoisoned(Poison); 590 Ident___exception_info->setIsPoisoned(Poison); 591 Ident_GetExceptionInfo->setIsPoisoned(Poison); 592 Ident__abnormal_termination->setIsPoisoned(Poison); 593 Ident___abnormal_termination->setIsPoisoned(Poison); 594 Ident_AbnormalTermination->setIsPoisoned(Poison); 595 } 596 597 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 598 assert(Identifier.getIdentifierInfo() && 599 "Can't handle identifiers without identifier info!"); 600 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 601 PoisonReasons.find(Identifier.getIdentifierInfo()); 602 if(it == PoisonReasons.end()) 603 Diag(Identifier, diag::err_pp_used_poisoned_id); 604 else 605 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 606 } 607 608 /// \brief Returns a diagnostic message kind for reporting a future keyword as 609 /// appropriate for the identifier and specified language. 610 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 611 const LangOptions &LangOpts) { 612 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 613 614 if (LangOpts.CPlusPlus) 615 return llvm::StringSwitch<diag::kind>(II.getName()) 616 #define CXX11_KEYWORD(NAME, FLAGS) \ 617 .Case(#NAME, diag::warn_cxx11_keyword) 618 #include "clang/Basic/TokenKinds.def" 619 ; 620 621 llvm_unreachable( 622 "Keyword not known to come from a newer Standard or proposed Standard"); 623 } 624 625 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { 626 assert(II.isOutOfDate() && "not out of date"); 627 getExternalSource()->updateOutOfDateIdentifier(II); 628 } 629 630 /// HandleIdentifier - This callback is invoked when the lexer reads an 631 /// identifier. This callback looks up the identifier in the map and/or 632 /// potentially macro expands it or turns it into a named token (like 'for'). 633 /// 634 /// Note that callers of this method are guarded by checking the 635 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 636 /// IdentifierInfo methods that compute these properties will need to change to 637 /// match. 638 bool Preprocessor::HandleIdentifier(Token &Identifier) { 639 assert(Identifier.getIdentifierInfo() && 640 "Can't handle identifiers without identifier info!"); 641 642 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 643 644 // If the information about this identifier is out of date, update it from 645 // the external source. 646 // We have to treat __VA_ARGS__ in a special way, since it gets 647 // serialized with isPoisoned = true, but our preprocessor may have 648 // unpoisoned it if we're defining a C99 macro. 649 if (II.isOutOfDate()) { 650 bool CurrentIsPoisoned = false; 651 if (&II == Ident__VA_ARGS__) 652 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 653 654 updateOutOfDateIdentifier(II); 655 Identifier.setKind(II.getTokenID()); 656 657 if (&II == Ident__VA_ARGS__) 658 II.setIsPoisoned(CurrentIsPoisoned); 659 } 660 661 // If this identifier was poisoned, and if it was not produced from a macro 662 // expansion, emit an error. 663 if (II.isPoisoned() && CurPPLexer) { 664 HandlePoisonedIdentifier(Identifier); 665 } 666 667 // If this is a macro to be expanded, do it. 668 if (MacroDefinition MD = getMacroDefinition(&II)) { 669 auto *MI = MD.getMacroInfo(); 670 assert(MI && "macro definition with no macro info?"); 671 if (!DisableMacroExpansion) { 672 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 673 // C99 6.10.3p10: If the preprocessing token immediately after the 674 // macro name isn't a '(', this macro should not be expanded. 675 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 676 return HandleMacroExpandedIdentifier(Identifier, MD); 677 } else { 678 // C99 6.10.3.4p2 says that a disabled macro may never again be 679 // expanded, even if it's in a context where it could be expanded in the 680 // future. 681 Identifier.setFlag(Token::DisableExpand); 682 if (MI->isObjectLike() || isNextPPTokenLParen()) 683 Diag(Identifier, diag::pp_disabled_macro_expansion); 684 } 685 } 686 } 687 688 // If this identifier is a keyword in a newer Standard or proposed Standard, 689 // produce a warning. Don't warn if we're not considering macro expansion, 690 // since this identifier might be the name of a macro. 691 // FIXME: This warning is disabled in cases where it shouldn't be, like 692 // "#define constexpr constexpr", "int constexpr;" 693 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 694 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 695 << II.getName(); 696 // Don't diagnose this keyword again in this translation unit. 697 II.setIsFutureCompatKeyword(false); 698 } 699 700 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 701 // then we act as if it is the actual operator and not the textual 702 // representation of it. 703 if (II.isCPlusPlusOperatorKeyword()) 704 Identifier.setIdentifierInfo(nullptr); 705 706 // If this is an extension token, diagnose its use. 707 // We avoid diagnosing tokens that originate from macro definitions. 708 // FIXME: This warning is disabled in cases where it shouldn't be, 709 // like "#define TY typeof", "TY(1) x". 710 if (II.isExtensionToken() && !DisableMacroExpansion) 711 Diag(Identifier, diag::ext_token_used); 712 713 // If this is the 'import' contextual keyword following an '@', note 714 // that the next token indicates a module name. 715 // 716 // Note that we do not treat 'import' as a contextual 717 // keyword when we're in a caching lexer, because caching lexers only get 718 // used in contexts where import declarations are disallowed. 719 // 720 // Likewise if this is the C++ Modules TS import keyword. 721 if (((LastTokenWasAt && II.isModulesImport()) || 722 Identifier.is(tok::kw_import)) && 723 !InMacroArgs && !DisableMacroExpansion && 724 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 725 CurLexerKind != CLK_CachingLexer) { 726 ModuleImportLoc = Identifier.getLocation(); 727 ModuleImportPath.clear(); 728 ModuleImportExpectsIdentifier = true; 729 CurLexerKind = CLK_LexAfterModuleImport; 730 } 731 return true; 732 } 733 734 void Preprocessor::Lex(Token &Result) { 735 // We loop here until a lex function returns a token; this avoids recursion. 736 bool ReturnedToken; 737 do { 738 switch (CurLexerKind) { 739 case CLK_Lexer: 740 ReturnedToken = CurLexer->Lex(Result); 741 break; 742 case CLK_PTHLexer: 743 ReturnedToken = CurPTHLexer->Lex(Result); 744 break; 745 case CLK_TokenLexer: 746 ReturnedToken = CurTokenLexer->Lex(Result); 747 break; 748 case CLK_CachingLexer: 749 CachingLex(Result); 750 ReturnedToken = true; 751 break; 752 case CLK_LexAfterModuleImport: 753 LexAfterModuleImport(Result); 754 ReturnedToken = true; 755 break; 756 } 757 } while (!ReturnedToken); 758 759 if (Result.is(tok::code_completion)) 760 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); 761 762 LastTokenWasAt = Result.is(tok::at); 763 } 764 765 /// \brief Lex a token following the 'import' contextual keyword. 766 /// 767 void Preprocessor::LexAfterModuleImport(Token &Result) { 768 // Figure out what kind of lexer we actually have. 769 recomputeCurLexerKind(); 770 771 // Lex the next token. 772 Lex(Result); 773 774 // The token sequence 775 // 776 // import identifier (. identifier)* 777 // 778 // indicates a module import directive. We already saw the 'import' 779 // contextual keyword, so now we're looking for the identifiers. 780 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 781 // We expected to see an identifier here, and we did; continue handling 782 // identifiers. 783 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 784 Result.getLocation())); 785 ModuleImportExpectsIdentifier = false; 786 CurLexerKind = CLK_LexAfterModuleImport; 787 return; 788 } 789 790 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 791 // see the next identifier. (We can also see a '[[' that begins an 792 // attribute-specifier-seq here under the C++ Modules TS.) 793 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 794 ModuleImportExpectsIdentifier = true; 795 CurLexerKind = CLK_LexAfterModuleImport; 796 return; 797 } 798 799 // If we have a non-empty module path, load the named module. 800 if (!ModuleImportPath.empty()) { 801 // Under the Modules TS, the dot is just part of the module name, and not 802 // a real hierarachy separator. Flatten such module names now. 803 // 804 // FIXME: Is this the right level to be performing this transformation? 805 std::string FlatModuleName; 806 if (getLangOpts().ModulesTS) { 807 for (auto &Piece : ModuleImportPath) { 808 if (!FlatModuleName.empty()) 809 FlatModuleName += "."; 810 FlatModuleName += Piece.first->getName(); 811 } 812 SourceLocation FirstPathLoc = ModuleImportPath[0].second; 813 ModuleImportPath.clear(); 814 ModuleImportPath.push_back( 815 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); 816 } 817 818 Module *Imported = nullptr; 819 if (getLangOpts().Modules) { 820 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 821 ModuleImportPath, 822 Module::Hidden, 823 /*IsIncludeDirective=*/false); 824 if (Imported) 825 makeModuleVisible(Imported, ModuleImportLoc); 826 } 827 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 828 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 829 } 830 } 831 832 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 833 CurSubmoduleState->VisibleModules.setVisible( 834 M, Loc, [](Module *) {}, 835 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 836 // FIXME: Include the path in the diagnostic. 837 // FIXME: Include the import location for the conflicting module. 838 Diag(ModuleImportLoc, diag::warn_module_conflict) 839 << Path[0]->getFullModuleName() 840 << Conflict->getFullModuleName() 841 << Message; 842 }); 843 844 // Add this module to the imports list of the currently-built submodule. 845 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 846 BuildingSubmoduleStack.back().M->Imports.insert(M); 847 } 848 849 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 850 const char *DiagnosticTag, 851 bool AllowMacroExpansion) { 852 // We need at least one string literal. 853 if (Result.isNot(tok::string_literal)) { 854 Diag(Result, diag::err_expected_string_literal) 855 << /*Source='in...'*/0 << DiagnosticTag; 856 return false; 857 } 858 859 // Lex string literal tokens, optionally with macro expansion. 860 SmallVector<Token, 4> StrToks; 861 do { 862 StrToks.push_back(Result); 863 864 if (Result.hasUDSuffix()) 865 Diag(Result, diag::err_invalid_string_udl); 866 867 if (AllowMacroExpansion) 868 Lex(Result); 869 else 870 LexUnexpandedToken(Result); 871 } while (Result.is(tok::string_literal)); 872 873 // Concatenate and parse the strings. 874 StringLiteralParser Literal(StrToks, *this); 875 assert(Literal.isAscii() && "Didn't allow wide strings in"); 876 877 if (Literal.hadError) 878 return false; 879 880 if (Literal.Pascal) { 881 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 882 << /*Source='in...'*/0 << DiagnosticTag; 883 return false; 884 } 885 886 String = Literal.GetString(); 887 return true; 888 } 889 890 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 891 assert(Tok.is(tok::numeric_constant)); 892 SmallString<8> IntegerBuffer; 893 bool NumberInvalid = false; 894 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 895 if (NumberInvalid) 896 return false; 897 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 898 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 899 return false; 900 llvm::APInt APVal(64, 0); 901 if (Literal.GetIntegerValue(APVal)) 902 return false; 903 Lex(Tok); 904 Value = APVal.getLimitedValue(); 905 return true; 906 } 907 908 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 909 assert(Handler && "NULL comment handler"); 910 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 911 CommentHandlers.end() && "Comment handler already registered"); 912 CommentHandlers.push_back(Handler); 913 } 914 915 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 916 std::vector<CommentHandler *>::iterator Pos 917 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 918 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 919 CommentHandlers.erase(Pos); 920 } 921 922 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 923 bool AnyPendingTokens = false; 924 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 925 HEnd = CommentHandlers.end(); 926 H != HEnd; ++H) { 927 if ((*H)->HandleComment(*this, Comment)) 928 AnyPendingTokens = true; 929 } 930 if (!AnyPendingTokens || getCommentRetentionState()) 931 return false; 932 Lex(result); 933 return true; 934 } 935 936 ModuleLoader::~ModuleLoader() { } 937 938 CommentHandler::~CommentHandler() { } 939 940 CodeCompletionHandler::~CodeCompletionHandler() { } 941 942 void Preprocessor::createPreprocessingRecord() { 943 if (Record) 944 return; 945 946 Record = new PreprocessingRecord(getSourceManager()); 947 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 948 } 949