1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/Pragma.h" 42 #include "clang/Lex/PreprocessingRecord.h" 43 #include "clang/Lex/PreprocessorOptions.h" 44 #include "clang/Lex/ScratchBuffer.h" 45 #include "llvm/ADT/APFloat.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/Support/Capacity.h" 50 #include "llvm/Support/ConvertUTF.h" 51 #include "llvm/Support/MemoryBuffer.h" 52 #include "llvm/Support/raw_ostream.h" 53 using namespace clang; 54 55 //===----------------------------------------------------------------------===// 56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 57 58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 59 DiagnosticsEngine &diags, LangOptions &opts, 60 SourceManager &SM, HeaderSearch &Headers, 61 ModuleLoader &TheModuleLoader, 62 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 63 TranslationUnitKind TUKind) 64 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 65 FileMgr(Headers.getFileMgr()), SourceMgr(SM), 66 ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers), 67 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 68 Identifiers(opts, IILookup), 69 PragmaHandlers(new PragmaNamespace(StringRef())), 70 IncrementalProcessing(false), TUKind(TUKind), 71 CodeComplete(nullptr), CodeCompletionFile(nullptr), 72 CodeCompletionOffset(0), LastTokenWasAt(false), 73 ModuleImportExpectsIdentifier(false), CodeCompletionReached(0), 74 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 75 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), 76 Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState), 77 MacroArgCache(nullptr), Record(nullptr), 78 MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 79 OwnsHeaderSearch = OwnsHeaders; 80 81 CounterValue = 0; // __COUNTER__ starts at 0. 82 83 // Clear stats. 84 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 85 NumIf = NumElse = NumEndif = 0; 86 NumEnteredSourceFiles = 0; 87 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 88 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 89 MaxIncludeStackDepth = 0; 90 NumSkipped = 0; 91 92 // Default to discarding comments. 93 KeepComments = false; 94 KeepMacroComments = false; 95 SuppressIncludeNotFoundError = false; 96 97 // Macro expansion is enabled. 98 DisableMacroExpansion = false; 99 MacroExpansionInDirectivesOverride = false; 100 InMacroArgs = false; 101 InMacroArgPreExpansion = false; 102 NumCachedTokenLexers = 0; 103 PragmasEnabled = true; 104 ParsingIfOrElifDirective = false; 105 PreprocessedOutput = false; 106 107 CachedLexPos = 0; 108 109 // We haven't read anything from the external source. 110 ReadMacrosFromExternalSource = false; 111 112 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 113 // This gets unpoisoned where it is allowed. 114 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 115 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 116 117 // Initialize the pragma handlers. 118 RegisterBuiltinPragmas(); 119 120 // Initialize builtin macros like __LINE__ and friends. 121 RegisterBuiltinMacros(); 122 123 if(LangOpts.Borland) { 124 Ident__exception_info = getIdentifierInfo("_exception_info"); 125 Ident___exception_info = getIdentifierInfo("__exception_info"); 126 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 127 Ident__exception_code = getIdentifierInfo("_exception_code"); 128 Ident___exception_code = getIdentifierInfo("__exception_code"); 129 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 130 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 131 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 132 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 133 } else { 134 Ident__exception_info = Ident__exception_code = nullptr; 135 Ident__abnormal_termination = Ident___exception_info = nullptr; 136 Ident___exception_code = Ident___abnormal_termination = nullptr; 137 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 138 Ident_AbnormalTermination = nullptr; 139 } 140 } 141 142 Preprocessor::~Preprocessor() { 143 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 144 145 IncludeMacroStack.clear(); 146 147 // Destroy any macro definitions. 148 while (MacroInfoChain *I = MIChainHead) { 149 MIChainHead = I->Next; 150 I->~MacroInfoChain(); 151 } 152 153 // Free any cached macro expanders. 154 // This populates MacroArgCache, so all TokenLexers need to be destroyed 155 // before the code below that frees up the MacroArgCache list. 156 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 157 CurTokenLexer.reset(); 158 159 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 160 DeserialMIChainHead = I->Next; 161 I->~DeserializedMacroInfoChain(); 162 } 163 164 // Free any cached MacroArgs. 165 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 166 ArgList = ArgList->deallocate(); 167 168 // Delete the header search info, if we own it. 169 if (OwnsHeaderSearch) 170 delete &HeaderInfo; 171 } 172 173 void Preprocessor::Initialize(const TargetInfo &Target) { 174 assert((!this->Target || this->Target == &Target) && 175 "Invalid override of target information"); 176 this->Target = &Target; 177 178 // Initialize information about built-ins. 179 BuiltinInfo.InitializeTarget(Target); 180 HeaderInfo.setTarget(Target); 181 } 182 183 void Preprocessor::InitializeForModelFile() { 184 NumEnteredSourceFiles = 0; 185 186 // Reset pragmas 187 PragmaHandlersBackup = std::move(PragmaHandlers); 188 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 189 RegisterBuiltinPragmas(); 190 191 // Reset PredefinesFileID 192 PredefinesFileID = FileID(); 193 } 194 195 void Preprocessor::FinalizeForModelFile() { 196 NumEnteredSourceFiles = 1; 197 198 PragmaHandlers = std::move(PragmaHandlersBackup); 199 } 200 201 void Preprocessor::setPTHManager(PTHManager* pm) { 202 PTH.reset(pm); 203 FileMgr.addStatCache(PTH->createStatCache()); 204 } 205 206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 207 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 208 << getSpelling(Tok) << "'"; 209 210 if (!DumpFlags) return; 211 212 llvm::errs() << "\t"; 213 if (Tok.isAtStartOfLine()) 214 llvm::errs() << " [StartOfLine]"; 215 if (Tok.hasLeadingSpace()) 216 llvm::errs() << " [LeadingSpace]"; 217 if (Tok.isExpandDisabled()) 218 llvm::errs() << " [ExpandDisabled]"; 219 if (Tok.needsCleaning()) { 220 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 221 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 222 << "']"; 223 } 224 225 llvm::errs() << "\tLoc=<"; 226 DumpLocation(Tok.getLocation()); 227 llvm::errs() << ">"; 228 } 229 230 void Preprocessor::DumpLocation(SourceLocation Loc) const { 231 Loc.dump(SourceMgr); 232 } 233 234 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 235 llvm::errs() << "MACRO: "; 236 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 237 DumpToken(MI.getReplacementToken(i)); 238 llvm::errs() << " "; 239 } 240 llvm::errs() << "\n"; 241 } 242 243 void Preprocessor::PrintStats() { 244 llvm::errs() << "\n*** Preprocessor Stats:\n"; 245 llvm::errs() << NumDirectives << " directives found:\n"; 246 llvm::errs() << " " << NumDefined << " #define.\n"; 247 llvm::errs() << " " << NumUndefined << " #undef.\n"; 248 llvm::errs() << " #include/#include_next/#import:\n"; 249 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 250 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 251 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 252 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 253 llvm::errs() << " " << NumEndif << " #endif.\n"; 254 llvm::errs() << " " << NumPragma << " #pragma.\n"; 255 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 256 257 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 258 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 259 << NumFastMacroExpanded << " on the fast path.\n"; 260 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 261 << " token paste (##) operations performed, " 262 << NumFastTokenPaste << " on the fast path.\n"; 263 264 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 265 266 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 267 llvm::errs() << "\n Macro Expanded Tokens: " 268 << llvm::capacity_in_bytes(MacroExpandedTokens); 269 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 270 // FIXME: List information for all submodules. 271 llvm::errs() << "\n Macros: " 272 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 273 llvm::errs() << "\n #pragma push_macro Info: " 274 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 275 llvm::errs() << "\n Poison Reasons: " 276 << llvm::capacity_in_bytes(PoisonReasons); 277 llvm::errs() << "\n Comment Handlers: " 278 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 279 } 280 281 Preprocessor::macro_iterator 282 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 283 if (IncludeExternalMacros && ExternalSource && 284 !ReadMacrosFromExternalSource) { 285 ReadMacrosFromExternalSource = true; 286 ExternalSource->ReadDefinedMacros(); 287 } 288 289 return CurSubmoduleState->Macros.begin(); 290 } 291 292 size_t Preprocessor::getTotalMemory() const { 293 return BP.getTotalMemory() 294 + llvm::capacity_in_bytes(MacroExpandedTokens) 295 + Predefines.capacity() /* Predefines buffer. */ 296 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 297 // and ModuleMacros. 298 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 299 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 300 + llvm::capacity_in_bytes(PoisonReasons) 301 + llvm::capacity_in_bytes(CommentHandlers); 302 } 303 304 Preprocessor::macro_iterator 305 Preprocessor::macro_end(bool IncludeExternalMacros) const { 306 if (IncludeExternalMacros && ExternalSource && 307 !ReadMacrosFromExternalSource) { 308 ReadMacrosFromExternalSource = true; 309 ExternalSource->ReadDefinedMacros(); 310 } 311 312 return CurSubmoduleState->Macros.end(); 313 } 314 315 /// \brief Compares macro tokens with a specified token value sequence. 316 static bool MacroDefinitionEquals(const MacroInfo *MI, 317 ArrayRef<TokenValue> Tokens) { 318 return Tokens.size() == MI->getNumTokens() && 319 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 320 } 321 322 StringRef Preprocessor::getLastMacroWithSpelling( 323 SourceLocation Loc, 324 ArrayRef<TokenValue> Tokens) const { 325 SourceLocation BestLocation; 326 StringRef BestSpelling; 327 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 328 I != E; ++I) { 329 const MacroDirective::DefInfo 330 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 331 if (!Def || !Def.getMacroInfo()) 332 continue; 333 if (!Def.getMacroInfo()->isObjectLike()) 334 continue; 335 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 336 continue; 337 SourceLocation Location = Def.getLocation(); 338 // Choose the macro defined latest. 339 if (BestLocation.isInvalid() || 340 (Location.isValid() && 341 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 342 BestLocation = Location; 343 BestSpelling = I->first->getName(); 344 } 345 } 346 return BestSpelling; 347 } 348 349 void Preprocessor::recomputeCurLexerKind() { 350 if (CurLexer) 351 CurLexerKind = CLK_Lexer; 352 else if (CurPTHLexer) 353 CurLexerKind = CLK_PTHLexer; 354 else if (CurTokenLexer) 355 CurLexerKind = CLK_TokenLexer; 356 else 357 CurLexerKind = CLK_CachingLexer; 358 } 359 360 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 361 unsigned CompleteLine, 362 unsigned CompleteColumn) { 363 assert(File); 364 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 365 assert(!CodeCompletionFile && "Already set"); 366 367 using llvm::MemoryBuffer; 368 369 // Load the actual file's contents. 370 bool Invalid = false; 371 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 372 if (Invalid) 373 return true; 374 375 // Find the byte position of the truncation point. 376 const char *Position = Buffer->getBufferStart(); 377 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 378 for (; *Position; ++Position) { 379 if (*Position != '\r' && *Position != '\n') 380 continue; 381 382 // Eat \r\n or \n\r as a single line. 383 if ((Position[1] == '\r' || Position[1] == '\n') && 384 Position[0] != Position[1]) 385 ++Position; 386 ++Position; 387 break; 388 } 389 } 390 391 Position += CompleteColumn - 1; 392 393 // If pointing inside the preamble, adjust the position at the beginning of 394 // the file after the preamble. 395 if (SkipMainFilePreamble.first && 396 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 397 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 398 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 399 } 400 401 if (Position > Buffer->getBufferEnd()) 402 Position = Buffer->getBufferEnd(); 403 404 CodeCompletionFile = File; 405 CodeCompletionOffset = Position - Buffer->getBufferStart(); 406 407 std::unique_ptr<MemoryBuffer> NewBuffer = 408 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 409 Buffer->getBufferIdentifier()); 410 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 411 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 412 *NewPos = '\0'; 413 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 414 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 415 416 return false; 417 } 418 419 void Preprocessor::CodeCompleteNaturalLanguage() { 420 if (CodeComplete) 421 CodeComplete->CodeCompleteNaturalLanguage(); 422 setCodeCompletionReached(); 423 } 424 425 /// getSpelling - This method is used to get the spelling of a token into a 426 /// SmallVector. Note that the returned StringRef may not point to the 427 /// supplied buffer if a copy can be avoided. 428 StringRef Preprocessor::getSpelling(const Token &Tok, 429 SmallVectorImpl<char> &Buffer, 430 bool *Invalid) const { 431 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 432 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 433 // Try the fast path. 434 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 435 return II->getName(); 436 } 437 438 // Resize the buffer if we need to copy into it. 439 if (Tok.needsCleaning()) 440 Buffer.resize(Tok.getLength()); 441 442 const char *Ptr = Buffer.data(); 443 unsigned Len = getSpelling(Tok, Ptr, Invalid); 444 return StringRef(Ptr, Len); 445 } 446 447 /// CreateString - Plop the specified string into a scratch buffer and return a 448 /// location for it. If specified, the source location provides a source 449 /// location for the token. 450 void Preprocessor::CreateString(StringRef Str, Token &Tok, 451 SourceLocation ExpansionLocStart, 452 SourceLocation ExpansionLocEnd) { 453 Tok.setLength(Str.size()); 454 455 const char *DestPtr; 456 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 457 458 if (ExpansionLocStart.isValid()) 459 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 460 ExpansionLocEnd, Str.size()); 461 Tok.setLocation(Loc); 462 463 // If this is a raw identifier or a literal token, set the pointer data. 464 if (Tok.is(tok::raw_identifier)) 465 Tok.setRawIdentifierData(DestPtr); 466 else if (Tok.isLiteral()) 467 Tok.setLiteralData(DestPtr); 468 } 469 470 Module *Preprocessor::getCurrentModule() { 471 if (getLangOpts().CurrentModule.empty()) 472 return nullptr; 473 474 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 475 } 476 477 //===----------------------------------------------------------------------===// 478 // Preprocessor Initialization Methods 479 //===----------------------------------------------------------------------===// 480 481 482 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 483 /// which implicitly adds the builtin defines etc. 484 void Preprocessor::EnterMainSourceFile() { 485 // We do not allow the preprocessor to reenter the main file. Doing so will 486 // cause FileID's to accumulate information from both runs (e.g. #line 487 // information) and predefined macros aren't guaranteed to be set properly. 488 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 489 FileID MainFileID = SourceMgr.getMainFileID(); 490 491 // If MainFileID is loaded it means we loaded an AST file, no need to enter 492 // a main file. 493 if (!SourceMgr.isLoadedFileID(MainFileID)) { 494 // Enter the main file source buffer. 495 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 496 497 // If we've been asked to skip bytes in the main file (e.g., as part of a 498 // precompiled preamble), do so now. 499 if (SkipMainFilePreamble.first > 0) 500 CurLexer->SkipBytes(SkipMainFilePreamble.first, 501 SkipMainFilePreamble.second); 502 503 // Tell the header info that the main file was entered. If the file is later 504 // #imported, it won't be re-entered. 505 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 506 HeaderInfo.IncrementIncludeCount(FE); 507 } 508 509 // Preprocess Predefines to populate the initial preprocessor state. 510 std::unique_ptr<llvm::MemoryBuffer> SB = 511 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 512 assert(SB && "Cannot create predefined source buffer"); 513 FileID FID = SourceMgr.createFileID(std::move(SB)); 514 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 515 setPredefinesFileID(FID); 516 517 // Start parsing the predefines. 518 EnterSourceFile(FID, nullptr, SourceLocation()); 519 } 520 521 void Preprocessor::EndSourceFile() { 522 // Notify the client that we reached the end of the source file. 523 if (Callbacks) 524 Callbacks->EndOfMainFile(); 525 } 526 527 //===----------------------------------------------------------------------===// 528 // Lexer Event Handling. 529 //===----------------------------------------------------------------------===// 530 531 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 532 /// identifier information for the token and install it into the token, 533 /// updating the token kind accordingly. 534 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 535 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 536 537 // Look up this token, see if it is a macro, or if it is a language keyword. 538 IdentifierInfo *II; 539 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 540 // No cleaning needed, just use the characters from the lexed buffer. 541 II = getIdentifierInfo(Identifier.getRawIdentifier()); 542 } else { 543 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 544 SmallString<64> IdentifierBuffer; 545 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 546 547 if (Identifier.hasUCN()) { 548 SmallString<64> UCNIdentifierBuffer; 549 expandUCNs(UCNIdentifierBuffer, CleanedStr); 550 II = getIdentifierInfo(UCNIdentifierBuffer); 551 } else { 552 II = getIdentifierInfo(CleanedStr); 553 } 554 } 555 556 // Update the token info (identifier info and appropriate token kind). 557 Identifier.setIdentifierInfo(II); 558 Identifier.setKind(II->getTokenID()); 559 560 return II; 561 } 562 563 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 564 PoisonReasons[II] = DiagID; 565 } 566 567 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 568 assert(Ident__exception_code && Ident__exception_info); 569 assert(Ident___exception_code && Ident___exception_info); 570 Ident__exception_code->setIsPoisoned(Poison); 571 Ident___exception_code->setIsPoisoned(Poison); 572 Ident_GetExceptionCode->setIsPoisoned(Poison); 573 Ident__exception_info->setIsPoisoned(Poison); 574 Ident___exception_info->setIsPoisoned(Poison); 575 Ident_GetExceptionInfo->setIsPoisoned(Poison); 576 Ident__abnormal_termination->setIsPoisoned(Poison); 577 Ident___abnormal_termination->setIsPoisoned(Poison); 578 Ident_AbnormalTermination->setIsPoisoned(Poison); 579 } 580 581 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 582 assert(Identifier.getIdentifierInfo() && 583 "Can't handle identifiers without identifier info!"); 584 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 585 PoisonReasons.find(Identifier.getIdentifierInfo()); 586 if(it == PoisonReasons.end()) 587 Diag(Identifier, diag::err_pp_used_poisoned_id); 588 else 589 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 590 } 591 592 /// \brief Returns a diagnostic message kind for reporting a future keyword as 593 /// appropriate for the identifier and specified language. 594 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 595 const LangOptions &LangOpts) { 596 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 597 598 if (LangOpts.CPlusPlus) 599 return llvm::StringSwitch<diag::kind>(II.getName()) 600 #define CXX11_KEYWORD(NAME, FLAGS) \ 601 .Case(#NAME, diag::warn_cxx11_keyword) 602 #include "clang/Basic/TokenKinds.def" 603 ; 604 605 llvm_unreachable( 606 "Keyword not known to come from a newer Standard or proposed Standard"); 607 } 608 609 /// HandleIdentifier - This callback is invoked when the lexer reads an 610 /// identifier. This callback looks up the identifier in the map and/or 611 /// potentially macro expands it or turns it into a named token (like 'for'). 612 /// 613 /// Note that callers of this method are guarded by checking the 614 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 615 /// IdentifierInfo methods that compute these properties will need to change to 616 /// match. 617 bool Preprocessor::HandleIdentifier(Token &Identifier) { 618 assert(Identifier.getIdentifierInfo() && 619 "Can't handle identifiers without identifier info!"); 620 621 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 622 623 // If the information about this identifier is out of date, update it from 624 // the external source. 625 // We have to treat __VA_ARGS__ in a special way, since it gets 626 // serialized with isPoisoned = true, but our preprocessor may have 627 // unpoisoned it if we're defining a C99 macro. 628 if (II.isOutOfDate()) { 629 bool CurrentIsPoisoned = false; 630 if (&II == Ident__VA_ARGS__) 631 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 632 633 ExternalSource->updateOutOfDateIdentifier(II); 634 Identifier.setKind(II.getTokenID()); 635 636 if (&II == Ident__VA_ARGS__) 637 II.setIsPoisoned(CurrentIsPoisoned); 638 } 639 640 // If this identifier was poisoned, and if it was not produced from a macro 641 // expansion, emit an error. 642 if (II.isPoisoned() && CurPPLexer) { 643 HandlePoisonedIdentifier(Identifier); 644 } 645 646 // If this is a macro to be expanded, do it. 647 if (MacroDefinition MD = getMacroDefinition(&II)) { 648 auto *MI = MD.getMacroInfo(); 649 assert(MI && "macro definition with no macro info?"); 650 if (!DisableMacroExpansion) { 651 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 652 // C99 6.10.3p10: If the preprocessing token immediately after the 653 // macro name isn't a '(', this macro should not be expanded. 654 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 655 return HandleMacroExpandedIdentifier(Identifier, MD); 656 } else { 657 // C99 6.10.3.4p2 says that a disabled macro may never again be 658 // expanded, even if it's in a context where it could be expanded in the 659 // future. 660 Identifier.setFlag(Token::DisableExpand); 661 if (MI->isObjectLike() || isNextPPTokenLParen()) 662 Diag(Identifier, diag::pp_disabled_macro_expansion); 663 } 664 } 665 } 666 667 // If this identifier is a keyword in a newer Standard or proposed Standard, 668 // produce a warning. Don't warn if we're not considering macro expansion, 669 // since this identifier might be the name of a macro. 670 // FIXME: This warning is disabled in cases where it shouldn't be, like 671 // "#define constexpr constexpr", "int constexpr;" 672 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 673 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 674 << II.getName(); 675 // Don't diagnose this keyword again in this translation unit. 676 II.setIsFutureCompatKeyword(false); 677 } 678 679 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 680 // then we act as if it is the actual operator and not the textual 681 // representation of it. 682 if (II.isCPlusPlusOperatorKeyword()) 683 Identifier.setIdentifierInfo(nullptr); 684 685 // If this is an extension token, diagnose its use. 686 // We avoid diagnosing tokens that originate from macro definitions. 687 // FIXME: This warning is disabled in cases where it shouldn't be, 688 // like "#define TY typeof", "TY(1) x". 689 if (II.isExtensionToken() && !DisableMacroExpansion) 690 Diag(Identifier, diag::ext_token_used); 691 692 // If this is the 'import' contextual keyword following an '@', note 693 // that the next token indicates a module name. 694 // 695 // Note that we do not treat 'import' as a contextual 696 // keyword when we're in a caching lexer, because caching lexers only get 697 // used in contexts where import declarations are disallowed. 698 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 699 !DisableMacroExpansion && 700 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 701 CurLexerKind != CLK_CachingLexer) { 702 ModuleImportLoc = Identifier.getLocation(); 703 ModuleImportPath.clear(); 704 ModuleImportExpectsIdentifier = true; 705 CurLexerKind = CLK_LexAfterModuleImport; 706 } 707 return true; 708 } 709 710 void Preprocessor::Lex(Token &Result) { 711 // We loop here until a lex function retuns a token; this avoids recursion. 712 bool ReturnedToken; 713 do { 714 switch (CurLexerKind) { 715 case CLK_Lexer: 716 ReturnedToken = CurLexer->Lex(Result); 717 break; 718 case CLK_PTHLexer: 719 ReturnedToken = CurPTHLexer->Lex(Result); 720 break; 721 case CLK_TokenLexer: 722 ReturnedToken = CurTokenLexer->Lex(Result); 723 break; 724 case CLK_CachingLexer: 725 CachingLex(Result); 726 ReturnedToken = true; 727 break; 728 case CLK_LexAfterModuleImport: 729 LexAfterModuleImport(Result); 730 ReturnedToken = true; 731 break; 732 } 733 } while (!ReturnedToken); 734 735 LastTokenWasAt = Result.is(tok::at); 736 } 737 738 739 /// \brief Lex a token following the 'import' contextual keyword. 740 /// 741 void Preprocessor::LexAfterModuleImport(Token &Result) { 742 // Figure out what kind of lexer we actually have. 743 recomputeCurLexerKind(); 744 745 // Lex the next token. 746 Lex(Result); 747 748 // The token sequence 749 // 750 // import identifier (. identifier)* 751 // 752 // indicates a module import directive. We already saw the 'import' 753 // contextual keyword, so now we're looking for the identifiers. 754 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 755 // We expected to see an identifier here, and we did; continue handling 756 // identifiers. 757 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 758 Result.getLocation())); 759 ModuleImportExpectsIdentifier = false; 760 CurLexerKind = CLK_LexAfterModuleImport; 761 return; 762 } 763 764 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 765 // see the next identifier. 766 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 767 ModuleImportExpectsIdentifier = true; 768 CurLexerKind = CLK_LexAfterModuleImport; 769 return; 770 } 771 772 // If we have a non-empty module path, load the named module. 773 if (!ModuleImportPath.empty()) { 774 Module *Imported = nullptr; 775 if (getLangOpts().Modules) { 776 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 777 ModuleImportPath, 778 Module::Hidden, 779 /*IsIncludeDirective=*/false); 780 if (Imported) 781 makeModuleVisible(Imported, ModuleImportLoc); 782 } 783 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 784 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 785 } 786 } 787 788 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 789 CurSubmoduleState->VisibleModules.setVisible( 790 M, Loc, [](Module *) {}, 791 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 792 // FIXME: Include the path in the diagnostic. 793 // FIXME: Include the import location for the conflicting module. 794 Diag(ModuleImportLoc, diag::warn_module_conflict) 795 << Path[0]->getFullModuleName() 796 << Conflict->getFullModuleName() 797 << Message; 798 }); 799 800 // Add this module to the imports list of the currently-built submodule. 801 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 802 BuildingSubmoduleStack.back().M->Imports.insert(M); 803 } 804 805 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 806 const char *DiagnosticTag, 807 bool AllowMacroExpansion) { 808 // We need at least one string literal. 809 if (Result.isNot(tok::string_literal)) { 810 Diag(Result, diag::err_expected_string_literal) 811 << /*Source='in...'*/0 << DiagnosticTag; 812 return false; 813 } 814 815 // Lex string literal tokens, optionally with macro expansion. 816 SmallVector<Token, 4> StrToks; 817 do { 818 StrToks.push_back(Result); 819 820 if (Result.hasUDSuffix()) 821 Diag(Result, diag::err_invalid_string_udl); 822 823 if (AllowMacroExpansion) 824 Lex(Result); 825 else 826 LexUnexpandedToken(Result); 827 } while (Result.is(tok::string_literal)); 828 829 // Concatenate and parse the strings. 830 StringLiteralParser Literal(StrToks, *this); 831 assert(Literal.isAscii() && "Didn't allow wide strings in"); 832 833 if (Literal.hadError) 834 return false; 835 836 if (Literal.Pascal) { 837 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 838 << /*Source='in...'*/0 << DiagnosticTag; 839 return false; 840 } 841 842 String = Literal.GetString(); 843 return true; 844 } 845 846 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 847 assert(Tok.is(tok::numeric_constant)); 848 SmallString<8> IntegerBuffer; 849 bool NumberInvalid = false; 850 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 851 if (NumberInvalid) 852 return false; 853 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 854 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 855 return false; 856 llvm::APInt APVal(64, 0); 857 if (Literal.GetIntegerValue(APVal)) 858 return false; 859 Lex(Tok); 860 Value = APVal.getLimitedValue(); 861 return true; 862 } 863 864 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 865 assert(Handler && "NULL comment handler"); 866 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 867 CommentHandlers.end() && "Comment handler already registered"); 868 CommentHandlers.push_back(Handler); 869 } 870 871 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 872 std::vector<CommentHandler *>::iterator Pos 873 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 874 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 875 CommentHandlers.erase(Pos); 876 } 877 878 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 879 bool AnyPendingTokens = false; 880 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 881 HEnd = CommentHandlers.end(); 882 H != HEnd; ++H) { 883 if ((*H)->HandleComment(*this, Comment)) 884 AnyPendingTokens = true; 885 } 886 if (!AnyPendingTokens || getCommentRetentionState()) 887 return false; 888 Lex(result); 889 return true; 890 } 891 892 ModuleLoader::~ModuleLoader() { } 893 894 CommentHandler::~CommentHandler() { } 895 896 CodeCompletionHandler::~CodeCompletionHandler() { } 897 898 void Preprocessor::createPreprocessingRecord() { 899 if (Record) 900 return; 901 902 Record = new PreprocessingRecord(getSourceManager()); 903 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 904 } 905