1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/Pragma.h" 42 #include "clang/Lex/PreprocessingRecord.h" 43 #include "clang/Lex/PreprocessorOptions.h" 44 #include "clang/Lex/ScratchBuffer.h" 45 #include "llvm/ADT/APFloat.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/Support/Capacity.h" 50 #include "llvm/Support/ConvertUTF.h" 51 #include "llvm/Support/MemoryBuffer.h" 52 #include "llvm/Support/raw_ostream.h" 53 using namespace clang; 54 55 //===----------------------------------------------------------------------===// 56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 57 58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 59 DiagnosticsEngine &diags, LangOptions &opts, 60 SourceManager &SM, HeaderSearch &Headers, 61 ModuleLoader &TheModuleLoader, 62 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 63 TranslationUnitKind TUKind) 64 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 65 FileMgr(Headers.getFileMgr()), SourceMgr(SM), 66 ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers), 67 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 68 Identifiers(opts, IILookup), 69 PragmaHandlers(new PragmaNamespace(StringRef())), 70 IncrementalProcessing(false), TUKind(TUKind), 71 CodeComplete(nullptr), CodeCompletionFile(nullptr), 72 CodeCompletionOffset(0), LastTokenWasAt(false), 73 ModuleImportExpectsIdentifier(false), CodeCompletionReached(0), 74 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 75 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), 76 Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState), 77 MacroArgCache(nullptr), Record(nullptr), 78 MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 79 OwnsHeaderSearch = OwnsHeaders; 80 81 CounterValue = 0; // __COUNTER__ starts at 0. 82 83 // Clear stats. 84 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 85 NumIf = NumElse = NumEndif = 0; 86 NumEnteredSourceFiles = 0; 87 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 88 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 89 MaxIncludeStackDepth = 0; 90 NumSkipped = 0; 91 92 // Default to discarding comments. 93 KeepComments = false; 94 KeepMacroComments = false; 95 SuppressIncludeNotFoundError = false; 96 97 // Macro expansion is enabled. 98 DisableMacroExpansion = false; 99 MacroExpansionInDirectivesOverride = false; 100 InMacroArgs = false; 101 InMacroArgPreExpansion = false; 102 NumCachedTokenLexers = 0; 103 PragmasEnabled = true; 104 ParsingIfOrElifDirective = false; 105 PreprocessedOutput = false; 106 107 CachedLexPos = 0; 108 109 // We haven't read anything from the external source. 110 ReadMacrosFromExternalSource = false; 111 112 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 113 // This gets unpoisoned where it is allowed. 114 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 115 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 116 117 // Initialize the pragma handlers. 118 RegisterBuiltinPragmas(); 119 120 // Initialize builtin macros like __LINE__ and friends. 121 RegisterBuiltinMacros(); 122 123 if(LangOpts.Borland) { 124 Ident__exception_info = getIdentifierInfo("_exception_info"); 125 Ident___exception_info = getIdentifierInfo("__exception_info"); 126 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 127 Ident__exception_code = getIdentifierInfo("_exception_code"); 128 Ident___exception_code = getIdentifierInfo("__exception_code"); 129 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 130 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 131 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 132 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 133 } else { 134 Ident__exception_info = Ident__exception_code = nullptr; 135 Ident__abnormal_termination = Ident___exception_info = nullptr; 136 Ident___exception_code = Ident___abnormal_termination = nullptr; 137 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 138 Ident_AbnormalTermination = nullptr; 139 } 140 } 141 142 Preprocessor::~Preprocessor() { 143 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 144 145 IncludeMacroStack.clear(); 146 147 // Destroy any macro definitions. 148 while (MacroInfoChain *I = MIChainHead) { 149 MIChainHead = I->Next; 150 I->~MacroInfoChain(); 151 } 152 153 // Free any cached macro expanders. 154 // This populates MacroArgCache, so all TokenLexers need to be destroyed 155 // before the code below that frees up the MacroArgCache list. 156 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 157 CurTokenLexer.reset(); 158 159 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 160 DeserialMIChainHead = I->Next; 161 I->~DeserializedMacroInfoChain(); 162 } 163 164 // Free any cached MacroArgs. 165 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 166 ArgList = ArgList->deallocate(); 167 168 // Delete the header search info, if we own it. 169 if (OwnsHeaderSearch) 170 delete &HeaderInfo; 171 } 172 173 void Preprocessor::Initialize(const TargetInfo &Target) { 174 assert((!this->Target || this->Target == &Target) && 175 "Invalid override of target information"); 176 this->Target = &Target; 177 178 // Initialize information about built-ins. 179 BuiltinInfo.initializeTarget(Target); 180 HeaderInfo.setTarget(Target); 181 } 182 183 void Preprocessor::InitializeForModelFile() { 184 NumEnteredSourceFiles = 0; 185 186 // Reset pragmas 187 PragmaHandlersBackup = std::move(PragmaHandlers); 188 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 189 RegisterBuiltinPragmas(); 190 191 // Reset PredefinesFileID 192 PredefinesFileID = FileID(); 193 } 194 195 void Preprocessor::FinalizeForModelFile() { 196 NumEnteredSourceFiles = 1; 197 198 PragmaHandlers = std::move(PragmaHandlersBackup); 199 } 200 201 void Preprocessor::setPTHManager(PTHManager* pm) { 202 PTH.reset(pm); 203 FileMgr.addStatCache(PTH->createStatCache()); 204 } 205 206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 207 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 208 << getSpelling(Tok) << "'"; 209 210 if (!DumpFlags) return; 211 212 llvm::errs() << "\t"; 213 if (Tok.isAtStartOfLine()) 214 llvm::errs() << " [StartOfLine]"; 215 if (Tok.hasLeadingSpace()) 216 llvm::errs() << " [LeadingSpace]"; 217 if (Tok.isExpandDisabled()) 218 llvm::errs() << " [ExpandDisabled]"; 219 if (Tok.needsCleaning()) { 220 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 221 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 222 << "']"; 223 } 224 225 llvm::errs() << "\tLoc=<"; 226 DumpLocation(Tok.getLocation()); 227 llvm::errs() << ">"; 228 } 229 230 void Preprocessor::DumpLocation(SourceLocation Loc) const { 231 Loc.dump(SourceMgr); 232 } 233 234 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 235 llvm::errs() << "MACRO: "; 236 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 237 DumpToken(MI.getReplacementToken(i)); 238 llvm::errs() << " "; 239 } 240 llvm::errs() << "\n"; 241 } 242 243 void Preprocessor::PrintStats() { 244 llvm::errs() << "\n*** Preprocessor Stats:\n"; 245 llvm::errs() << NumDirectives << " directives found:\n"; 246 llvm::errs() << " " << NumDefined << " #define.\n"; 247 llvm::errs() << " " << NumUndefined << " #undef.\n"; 248 llvm::errs() << " #include/#include_next/#import:\n"; 249 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 250 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 251 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 252 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 253 llvm::errs() << " " << NumEndif << " #endif.\n"; 254 llvm::errs() << " " << NumPragma << " #pragma.\n"; 255 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 256 257 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 258 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 259 << NumFastMacroExpanded << " on the fast path.\n"; 260 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 261 << " token paste (##) operations performed, " 262 << NumFastTokenPaste << " on the fast path.\n"; 263 264 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 265 266 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 267 llvm::errs() << "\n Macro Expanded Tokens: " 268 << llvm::capacity_in_bytes(MacroExpandedTokens); 269 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 270 // FIXME: List information for all submodules. 271 llvm::errs() << "\n Macros: " 272 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 273 llvm::errs() << "\n #pragma push_macro Info: " 274 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 275 llvm::errs() << "\n Poison Reasons: " 276 << llvm::capacity_in_bytes(PoisonReasons); 277 llvm::errs() << "\n Comment Handlers: " 278 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 279 } 280 281 Preprocessor::macro_iterator 282 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 283 if (IncludeExternalMacros && ExternalSource && 284 !ReadMacrosFromExternalSource) { 285 ReadMacrosFromExternalSource = true; 286 ExternalSource->ReadDefinedMacros(); 287 } 288 289 // Make sure we cover all macros in visible modules. 290 for (const ModuleMacro &Macro : ModuleMacros) 291 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 292 293 return CurSubmoduleState->Macros.begin(); 294 } 295 296 size_t Preprocessor::getTotalMemory() const { 297 return BP.getTotalMemory() 298 + llvm::capacity_in_bytes(MacroExpandedTokens) 299 + Predefines.capacity() /* Predefines buffer. */ 300 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 301 // and ModuleMacros. 302 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 303 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 304 + llvm::capacity_in_bytes(PoisonReasons) 305 + llvm::capacity_in_bytes(CommentHandlers); 306 } 307 308 Preprocessor::macro_iterator 309 Preprocessor::macro_end(bool IncludeExternalMacros) const { 310 if (IncludeExternalMacros && ExternalSource && 311 !ReadMacrosFromExternalSource) { 312 ReadMacrosFromExternalSource = true; 313 ExternalSource->ReadDefinedMacros(); 314 } 315 316 return CurSubmoduleState->Macros.end(); 317 } 318 319 /// \brief Compares macro tokens with a specified token value sequence. 320 static bool MacroDefinitionEquals(const MacroInfo *MI, 321 ArrayRef<TokenValue> Tokens) { 322 return Tokens.size() == MI->getNumTokens() && 323 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 324 } 325 326 StringRef Preprocessor::getLastMacroWithSpelling( 327 SourceLocation Loc, 328 ArrayRef<TokenValue> Tokens) const { 329 SourceLocation BestLocation; 330 StringRef BestSpelling; 331 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 332 I != E; ++I) { 333 const MacroDirective::DefInfo 334 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 335 if (!Def || !Def.getMacroInfo()) 336 continue; 337 if (!Def.getMacroInfo()->isObjectLike()) 338 continue; 339 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 340 continue; 341 SourceLocation Location = Def.getLocation(); 342 // Choose the macro defined latest. 343 if (BestLocation.isInvalid() || 344 (Location.isValid() && 345 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 346 BestLocation = Location; 347 BestSpelling = I->first->getName(); 348 } 349 } 350 return BestSpelling; 351 } 352 353 void Preprocessor::recomputeCurLexerKind() { 354 if (CurLexer) 355 CurLexerKind = CLK_Lexer; 356 else if (CurPTHLexer) 357 CurLexerKind = CLK_PTHLexer; 358 else if (CurTokenLexer) 359 CurLexerKind = CLK_TokenLexer; 360 else 361 CurLexerKind = CLK_CachingLexer; 362 } 363 364 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 365 unsigned CompleteLine, 366 unsigned CompleteColumn) { 367 assert(File); 368 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 369 assert(!CodeCompletionFile && "Already set"); 370 371 using llvm::MemoryBuffer; 372 373 // Load the actual file's contents. 374 bool Invalid = false; 375 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 376 if (Invalid) 377 return true; 378 379 // Find the byte position of the truncation point. 380 const char *Position = Buffer->getBufferStart(); 381 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 382 for (; *Position; ++Position) { 383 if (*Position != '\r' && *Position != '\n') 384 continue; 385 386 // Eat \r\n or \n\r as a single line. 387 if ((Position[1] == '\r' || Position[1] == '\n') && 388 Position[0] != Position[1]) 389 ++Position; 390 ++Position; 391 break; 392 } 393 } 394 395 Position += CompleteColumn - 1; 396 397 // If pointing inside the preamble, adjust the position at the beginning of 398 // the file after the preamble. 399 if (SkipMainFilePreamble.first && 400 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 401 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 402 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 403 } 404 405 if (Position > Buffer->getBufferEnd()) 406 Position = Buffer->getBufferEnd(); 407 408 CodeCompletionFile = File; 409 CodeCompletionOffset = Position - Buffer->getBufferStart(); 410 411 std::unique_ptr<MemoryBuffer> NewBuffer = 412 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 413 Buffer->getBufferIdentifier()); 414 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 415 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 416 *NewPos = '\0'; 417 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 418 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 419 420 return false; 421 } 422 423 void Preprocessor::CodeCompleteNaturalLanguage() { 424 if (CodeComplete) 425 CodeComplete->CodeCompleteNaturalLanguage(); 426 setCodeCompletionReached(); 427 } 428 429 /// getSpelling - This method is used to get the spelling of a token into a 430 /// SmallVector. Note that the returned StringRef may not point to the 431 /// supplied buffer if a copy can be avoided. 432 StringRef Preprocessor::getSpelling(const Token &Tok, 433 SmallVectorImpl<char> &Buffer, 434 bool *Invalid) const { 435 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 436 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 437 // Try the fast path. 438 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 439 return II->getName(); 440 } 441 442 // Resize the buffer if we need to copy into it. 443 if (Tok.needsCleaning()) 444 Buffer.resize(Tok.getLength()); 445 446 const char *Ptr = Buffer.data(); 447 unsigned Len = getSpelling(Tok, Ptr, Invalid); 448 return StringRef(Ptr, Len); 449 } 450 451 /// CreateString - Plop the specified string into a scratch buffer and return a 452 /// location for it. If specified, the source location provides a source 453 /// location for the token. 454 void Preprocessor::CreateString(StringRef Str, Token &Tok, 455 SourceLocation ExpansionLocStart, 456 SourceLocation ExpansionLocEnd) { 457 Tok.setLength(Str.size()); 458 459 const char *DestPtr; 460 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 461 462 if (ExpansionLocStart.isValid()) 463 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 464 ExpansionLocEnd, Str.size()); 465 Tok.setLocation(Loc); 466 467 // If this is a raw identifier or a literal token, set the pointer data. 468 if (Tok.is(tok::raw_identifier)) 469 Tok.setRawIdentifierData(DestPtr); 470 else if (Tok.isLiteral()) 471 Tok.setLiteralData(DestPtr); 472 } 473 474 Module *Preprocessor::getCurrentModule() { 475 if (getLangOpts().CurrentModule.empty()) 476 return nullptr; 477 478 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 479 } 480 481 //===----------------------------------------------------------------------===// 482 // Preprocessor Initialization Methods 483 //===----------------------------------------------------------------------===// 484 485 486 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 487 /// which implicitly adds the builtin defines etc. 488 void Preprocessor::EnterMainSourceFile() { 489 // We do not allow the preprocessor to reenter the main file. Doing so will 490 // cause FileID's to accumulate information from both runs (e.g. #line 491 // information) and predefined macros aren't guaranteed to be set properly. 492 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 493 FileID MainFileID = SourceMgr.getMainFileID(); 494 495 // If MainFileID is loaded it means we loaded an AST file, no need to enter 496 // a main file. 497 if (!SourceMgr.isLoadedFileID(MainFileID)) { 498 // Enter the main file source buffer. 499 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 500 501 // If we've been asked to skip bytes in the main file (e.g., as part of a 502 // precompiled preamble), do so now. 503 if (SkipMainFilePreamble.first > 0) 504 CurLexer->SkipBytes(SkipMainFilePreamble.first, 505 SkipMainFilePreamble.second); 506 507 // Tell the header info that the main file was entered. If the file is later 508 // #imported, it won't be re-entered. 509 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 510 HeaderInfo.IncrementIncludeCount(FE); 511 } 512 513 // Preprocess Predefines to populate the initial preprocessor state. 514 std::unique_ptr<llvm::MemoryBuffer> SB = 515 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 516 assert(SB && "Cannot create predefined source buffer"); 517 FileID FID = SourceMgr.createFileID(std::move(SB)); 518 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 519 setPredefinesFileID(FID); 520 521 // Start parsing the predefines. 522 EnterSourceFile(FID, nullptr, SourceLocation()); 523 } 524 525 void Preprocessor::EndSourceFile() { 526 // Notify the client that we reached the end of the source file. 527 if (Callbacks) 528 Callbacks->EndOfMainFile(); 529 } 530 531 //===----------------------------------------------------------------------===// 532 // Lexer Event Handling. 533 //===----------------------------------------------------------------------===// 534 535 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 536 /// identifier information for the token and install it into the token, 537 /// updating the token kind accordingly. 538 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 539 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 540 541 // Look up this token, see if it is a macro, or if it is a language keyword. 542 IdentifierInfo *II; 543 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 544 // No cleaning needed, just use the characters from the lexed buffer. 545 II = getIdentifierInfo(Identifier.getRawIdentifier()); 546 } else { 547 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 548 SmallString<64> IdentifierBuffer; 549 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 550 551 if (Identifier.hasUCN()) { 552 SmallString<64> UCNIdentifierBuffer; 553 expandUCNs(UCNIdentifierBuffer, CleanedStr); 554 II = getIdentifierInfo(UCNIdentifierBuffer); 555 } else { 556 II = getIdentifierInfo(CleanedStr); 557 } 558 } 559 560 // Update the token info (identifier info and appropriate token kind). 561 Identifier.setIdentifierInfo(II); 562 Identifier.setKind(II->getTokenID()); 563 564 return II; 565 } 566 567 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 568 PoisonReasons[II] = DiagID; 569 } 570 571 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 572 assert(Ident__exception_code && Ident__exception_info); 573 assert(Ident___exception_code && Ident___exception_info); 574 Ident__exception_code->setIsPoisoned(Poison); 575 Ident___exception_code->setIsPoisoned(Poison); 576 Ident_GetExceptionCode->setIsPoisoned(Poison); 577 Ident__exception_info->setIsPoisoned(Poison); 578 Ident___exception_info->setIsPoisoned(Poison); 579 Ident_GetExceptionInfo->setIsPoisoned(Poison); 580 Ident__abnormal_termination->setIsPoisoned(Poison); 581 Ident___abnormal_termination->setIsPoisoned(Poison); 582 Ident_AbnormalTermination->setIsPoisoned(Poison); 583 } 584 585 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 586 assert(Identifier.getIdentifierInfo() && 587 "Can't handle identifiers without identifier info!"); 588 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 589 PoisonReasons.find(Identifier.getIdentifierInfo()); 590 if(it == PoisonReasons.end()) 591 Diag(Identifier, diag::err_pp_used_poisoned_id); 592 else 593 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 594 } 595 596 /// \brief Returns a diagnostic message kind for reporting a future keyword as 597 /// appropriate for the identifier and specified language. 598 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 599 const LangOptions &LangOpts) { 600 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 601 602 if (LangOpts.CPlusPlus) 603 return llvm::StringSwitch<diag::kind>(II.getName()) 604 #define CXX11_KEYWORD(NAME, FLAGS) \ 605 .Case(#NAME, diag::warn_cxx11_keyword) 606 #include "clang/Basic/TokenKinds.def" 607 ; 608 609 llvm_unreachable( 610 "Keyword not known to come from a newer Standard or proposed Standard"); 611 } 612 613 /// HandleIdentifier - This callback is invoked when the lexer reads an 614 /// identifier. This callback looks up the identifier in the map and/or 615 /// potentially macro expands it or turns it into a named token (like 'for'). 616 /// 617 /// Note that callers of this method are guarded by checking the 618 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 619 /// IdentifierInfo methods that compute these properties will need to change to 620 /// match. 621 bool Preprocessor::HandleIdentifier(Token &Identifier) { 622 assert(Identifier.getIdentifierInfo() && 623 "Can't handle identifiers without identifier info!"); 624 625 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 626 627 // If the information about this identifier is out of date, update it from 628 // the external source. 629 // We have to treat __VA_ARGS__ in a special way, since it gets 630 // serialized with isPoisoned = true, but our preprocessor may have 631 // unpoisoned it if we're defining a C99 macro. 632 if (II.isOutOfDate()) { 633 bool CurrentIsPoisoned = false; 634 if (&II == Ident__VA_ARGS__) 635 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 636 637 ExternalSource->updateOutOfDateIdentifier(II); 638 Identifier.setKind(II.getTokenID()); 639 640 if (&II == Ident__VA_ARGS__) 641 II.setIsPoisoned(CurrentIsPoisoned); 642 } 643 644 // If this identifier was poisoned, and if it was not produced from a macro 645 // expansion, emit an error. 646 if (II.isPoisoned() && CurPPLexer) { 647 HandlePoisonedIdentifier(Identifier); 648 } 649 650 // If this is a macro to be expanded, do it. 651 if (MacroDefinition MD = getMacroDefinition(&II)) { 652 auto *MI = MD.getMacroInfo(); 653 assert(MI && "macro definition with no macro info?"); 654 if (!DisableMacroExpansion) { 655 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 656 // C99 6.10.3p10: If the preprocessing token immediately after the 657 // macro name isn't a '(', this macro should not be expanded. 658 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 659 return HandleMacroExpandedIdentifier(Identifier, MD); 660 } else { 661 // C99 6.10.3.4p2 says that a disabled macro may never again be 662 // expanded, even if it's in a context where it could be expanded in the 663 // future. 664 Identifier.setFlag(Token::DisableExpand); 665 if (MI->isObjectLike() || isNextPPTokenLParen()) 666 Diag(Identifier, diag::pp_disabled_macro_expansion); 667 } 668 } 669 } 670 671 // If this identifier is a keyword in a newer Standard or proposed Standard, 672 // produce a warning. Don't warn if we're not considering macro expansion, 673 // since this identifier might be the name of a macro. 674 // FIXME: This warning is disabled in cases where it shouldn't be, like 675 // "#define constexpr constexpr", "int constexpr;" 676 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 677 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 678 << II.getName(); 679 // Don't diagnose this keyword again in this translation unit. 680 II.setIsFutureCompatKeyword(false); 681 } 682 683 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 684 // then we act as if it is the actual operator and not the textual 685 // representation of it. 686 if (II.isCPlusPlusOperatorKeyword()) 687 Identifier.setIdentifierInfo(nullptr); 688 689 // If this is an extension token, diagnose its use. 690 // We avoid diagnosing tokens that originate from macro definitions. 691 // FIXME: This warning is disabled in cases where it shouldn't be, 692 // like "#define TY typeof", "TY(1) x". 693 if (II.isExtensionToken() && !DisableMacroExpansion) 694 Diag(Identifier, diag::ext_token_used); 695 696 // If this is the 'import' contextual keyword following an '@', note 697 // that the next token indicates a module name. 698 // 699 // Note that we do not treat 'import' as a contextual 700 // keyword when we're in a caching lexer, because caching lexers only get 701 // used in contexts where import declarations are disallowed. 702 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 703 !DisableMacroExpansion && 704 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 705 CurLexerKind != CLK_CachingLexer) { 706 ModuleImportLoc = Identifier.getLocation(); 707 ModuleImportPath.clear(); 708 ModuleImportExpectsIdentifier = true; 709 CurLexerKind = CLK_LexAfterModuleImport; 710 } 711 return true; 712 } 713 714 void Preprocessor::Lex(Token &Result) { 715 // We loop here until a lex function retuns a token; this avoids recursion. 716 bool ReturnedToken; 717 do { 718 switch (CurLexerKind) { 719 case CLK_Lexer: 720 ReturnedToken = CurLexer->Lex(Result); 721 break; 722 case CLK_PTHLexer: 723 ReturnedToken = CurPTHLexer->Lex(Result); 724 break; 725 case CLK_TokenLexer: 726 ReturnedToken = CurTokenLexer->Lex(Result); 727 break; 728 case CLK_CachingLexer: 729 CachingLex(Result); 730 ReturnedToken = true; 731 break; 732 case CLK_LexAfterModuleImport: 733 LexAfterModuleImport(Result); 734 ReturnedToken = true; 735 break; 736 } 737 } while (!ReturnedToken); 738 739 LastTokenWasAt = Result.is(tok::at); 740 } 741 742 743 /// \brief Lex a token following the 'import' contextual keyword. 744 /// 745 void Preprocessor::LexAfterModuleImport(Token &Result) { 746 // Figure out what kind of lexer we actually have. 747 recomputeCurLexerKind(); 748 749 // Lex the next token. 750 Lex(Result); 751 752 // The token sequence 753 // 754 // import identifier (. identifier)* 755 // 756 // indicates a module import directive. We already saw the 'import' 757 // contextual keyword, so now we're looking for the identifiers. 758 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 759 // We expected to see an identifier here, and we did; continue handling 760 // identifiers. 761 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 762 Result.getLocation())); 763 ModuleImportExpectsIdentifier = false; 764 CurLexerKind = CLK_LexAfterModuleImport; 765 return; 766 } 767 768 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 769 // see the next identifier. 770 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 771 ModuleImportExpectsIdentifier = true; 772 CurLexerKind = CLK_LexAfterModuleImport; 773 return; 774 } 775 776 // If we have a non-empty module path, load the named module. 777 if (!ModuleImportPath.empty()) { 778 Module *Imported = nullptr; 779 if (getLangOpts().Modules) { 780 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 781 ModuleImportPath, 782 Module::Hidden, 783 /*IsIncludeDirective=*/false); 784 if (Imported) 785 makeModuleVisible(Imported, ModuleImportLoc); 786 } 787 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 788 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 789 } 790 } 791 792 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 793 CurSubmoduleState->VisibleModules.setVisible( 794 M, Loc, [](Module *) {}, 795 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 796 // FIXME: Include the path in the diagnostic. 797 // FIXME: Include the import location for the conflicting module. 798 Diag(ModuleImportLoc, diag::warn_module_conflict) 799 << Path[0]->getFullModuleName() 800 << Conflict->getFullModuleName() 801 << Message; 802 }); 803 804 // Add this module to the imports list of the currently-built submodule. 805 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 806 BuildingSubmoduleStack.back().M->Imports.insert(M); 807 } 808 809 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 810 const char *DiagnosticTag, 811 bool AllowMacroExpansion) { 812 // We need at least one string literal. 813 if (Result.isNot(tok::string_literal)) { 814 Diag(Result, diag::err_expected_string_literal) 815 << /*Source='in...'*/0 << DiagnosticTag; 816 return false; 817 } 818 819 // Lex string literal tokens, optionally with macro expansion. 820 SmallVector<Token, 4> StrToks; 821 do { 822 StrToks.push_back(Result); 823 824 if (Result.hasUDSuffix()) 825 Diag(Result, diag::err_invalid_string_udl); 826 827 if (AllowMacroExpansion) 828 Lex(Result); 829 else 830 LexUnexpandedToken(Result); 831 } while (Result.is(tok::string_literal)); 832 833 // Concatenate and parse the strings. 834 StringLiteralParser Literal(StrToks, *this); 835 assert(Literal.isAscii() && "Didn't allow wide strings in"); 836 837 if (Literal.hadError) 838 return false; 839 840 if (Literal.Pascal) { 841 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 842 << /*Source='in...'*/0 << DiagnosticTag; 843 return false; 844 } 845 846 String = Literal.GetString(); 847 return true; 848 } 849 850 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 851 assert(Tok.is(tok::numeric_constant)); 852 SmallString<8> IntegerBuffer; 853 bool NumberInvalid = false; 854 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 855 if (NumberInvalid) 856 return false; 857 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 858 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 859 return false; 860 llvm::APInt APVal(64, 0); 861 if (Literal.GetIntegerValue(APVal)) 862 return false; 863 Lex(Tok); 864 Value = APVal.getLimitedValue(); 865 return true; 866 } 867 868 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 869 assert(Handler && "NULL comment handler"); 870 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 871 CommentHandlers.end() && "Comment handler already registered"); 872 CommentHandlers.push_back(Handler); 873 } 874 875 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 876 std::vector<CommentHandler *>::iterator Pos 877 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 878 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 879 CommentHandlers.erase(Pos); 880 } 881 882 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 883 bool AnyPendingTokens = false; 884 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 885 HEnd = CommentHandlers.end(); 886 H != HEnd; ++H) { 887 if ((*H)->HandleComment(*this, Comment)) 888 AnyPendingTokens = true; 889 } 890 if (!AnyPendingTokens || getCommentRetentionState()) 891 return false; 892 Lex(result); 893 return true; 894 } 895 896 ModuleLoader::~ModuleLoader() { } 897 898 CommentHandler::~CommentHandler() { } 899 900 CodeCompletionHandler::~CodeCompletionHandler() { } 901 902 void Preprocessor::createPreprocessingRecord() { 903 if (Record) 904 return; 905 906 Record = new PreprocessingRecord(getSourceManager()); 907 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 908 } 909