1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Options to support: 15 // -H - Print the name of each header file used. 16 // -d[DNI] - Dump various things. 17 // -fworking-directory - #line's with preprocessor's working dir. 18 // -fpreprocessed 19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 20 // -W* 21 // -w 22 // 23 // Messages to emit: 24 // "Multiple include guards may be useful for:\n" 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "clang/Lex/Preprocessor.h" 29 #include "clang/Basic/FileManager.h" 30 #include "clang/Basic/FileSystemStatCache.h" 31 #include "clang/Basic/SourceManager.h" 32 #include "clang/Basic/TargetInfo.h" 33 #include "clang/Lex/CodeCompletionHandler.h" 34 #include "clang/Lex/ExternalPreprocessorSource.h" 35 #include "clang/Lex/HeaderSearch.h" 36 #include "clang/Lex/LexDiagnostic.h" 37 #include "clang/Lex/LiteralSupport.h" 38 #include "clang/Lex/MacroArgs.h" 39 #include "clang/Lex/MacroInfo.h" 40 #include "clang/Lex/ModuleLoader.h" 41 #include "clang/Lex/Pragma.h" 42 #include "clang/Lex/PreprocessingRecord.h" 43 #include "clang/Lex/PreprocessorOptions.h" 44 #include "clang/Lex/ScratchBuffer.h" 45 #include "llvm/ADT/APFloat.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/StringExtras.h" 49 #include "llvm/Support/Capacity.h" 50 #include "llvm/Support/ConvertUTF.h" 51 #include "llvm/Support/MemoryBuffer.h" 52 #include "llvm/Support/raw_ostream.h" 53 using namespace clang; 54 55 //===----------------------------------------------------------------------===// 56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 57 58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 59 DiagnosticsEngine &diags, LangOptions &opts, 60 SourceManager &SM, HeaderSearch &Headers, 61 ModuleLoader &TheModuleLoader, 62 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 63 TranslationUnitKind TUKind) 64 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 65 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), 66 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), 67 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 68 Identifiers(opts, IILookup), 69 PragmaHandlers(new PragmaNamespace(StringRef())), 70 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), 71 CodeCompletionFile(nullptr), CodeCompletionOffset(0), 72 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), 73 CodeCompletionReached(0), MainFileDir(nullptr), 74 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), 75 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), 76 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), 77 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 78 OwnsHeaderSearch = OwnsHeaders; 79 80 CounterValue = 0; // __COUNTER__ starts at 0. 81 82 // Clear stats. 83 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 84 NumIf = NumElse = NumEndif = 0; 85 NumEnteredSourceFiles = 0; 86 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 87 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 88 MaxIncludeStackDepth = 0; 89 NumSkipped = 0; 90 91 // Default to discarding comments. 92 KeepComments = false; 93 KeepMacroComments = false; 94 SuppressIncludeNotFoundError = false; 95 96 // Macro expansion is enabled. 97 DisableMacroExpansion = false; 98 MacroExpansionInDirectivesOverride = false; 99 InMacroArgs = false; 100 InMacroArgPreExpansion = false; 101 NumCachedTokenLexers = 0; 102 PragmasEnabled = true; 103 ParsingIfOrElifDirective = false; 104 PreprocessedOutput = false; 105 106 CachedLexPos = 0; 107 108 // We haven't read anything from the external source. 109 ReadMacrosFromExternalSource = false; 110 111 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 112 // This gets unpoisoned where it is allowed. 113 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 114 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 115 116 // Initialize the pragma handlers. 117 RegisterBuiltinPragmas(); 118 119 // Initialize builtin macros like __LINE__ and friends. 120 RegisterBuiltinMacros(); 121 122 if(LangOpts.Borland) { 123 Ident__exception_info = getIdentifierInfo("_exception_info"); 124 Ident___exception_info = getIdentifierInfo("__exception_info"); 125 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 126 Ident__exception_code = getIdentifierInfo("_exception_code"); 127 Ident___exception_code = getIdentifierInfo("__exception_code"); 128 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 129 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 130 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 131 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 132 } else { 133 Ident__exception_info = Ident__exception_code = nullptr; 134 Ident__abnormal_termination = Ident___exception_info = nullptr; 135 Ident___exception_code = Ident___abnormal_termination = nullptr; 136 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 137 Ident_AbnormalTermination = nullptr; 138 } 139 } 140 141 Preprocessor::~Preprocessor() { 142 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 143 144 IncludeMacroStack.clear(); 145 146 // Destroy any macro definitions. 147 while (MacroInfoChain *I = MIChainHead) { 148 MIChainHead = I->Next; 149 I->~MacroInfoChain(); 150 } 151 152 // Free any cached macro expanders. 153 // This populates MacroArgCache, so all TokenLexers need to be destroyed 154 // before the code below that frees up the MacroArgCache list. 155 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 156 CurTokenLexer.reset(); 157 158 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 159 DeserialMIChainHead = I->Next; 160 I->~DeserializedMacroInfoChain(); 161 } 162 163 // Free any cached MacroArgs. 164 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 165 ArgList = ArgList->deallocate(); 166 167 // Delete the header search info, if we own it. 168 if (OwnsHeaderSearch) 169 delete &HeaderInfo; 170 } 171 172 void Preprocessor::Initialize(const TargetInfo &Target, 173 const TargetInfo *AuxTarget) { 174 assert((!this->Target || this->Target == &Target) && 175 "Invalid override of target information"); 176 this->Target = &Target; 177 178 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && 179 "Invalid override of aux target information."); 180 this->AuxTarget = AuxTarget; 181 182 // Initialize information about built-ins. 183 BuiltinInfo.InitializeTarget(Target, AuxTarget); 184 HeaderInfo.setTarget(Target); 185 } 186 187 void Preprocessor::InitializeForModelFile() { 188 NumEnteredSourceFiles = 0; 189 190 // Reset pragmas 191 PragmaHandlersBackup = std::move(PragmaHandlers); 192 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 193 RegisterBuiltinPragmas(); 194 195 // Reset PredefinesFileID 196 PredefinesFileID = FileID(); 197 } 198 199 void Preprocessor::FinalizeForModelFile() { 200 NumEnteredSourceFiles = 1; 201 202 PragmaHandlers = std::move(PragmaHandlersBackup); 203 } 204 205 void Preprocessor::setPTHManager(PTHManager* pm) { 206 PTH.reset(pm); 207 FileMgr.addStatCache(PTH->createStatCache()); 208 } 209 210 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 211 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 212 << getSpelling(Tok) << "'"; 213 214 if (!DumpFlags) return; 215 216 llvm::errs() << "\t"; 217 if (Tok.isAtStartOfLine()) 218 llvm::errs() << " [StartOfLine]"; 219 if (Tok.hasLeadingSpace()) 220 llvm::errs() << " [LeadingSpace]"; 221 if (Tok.isExpandDisabled()) 222 llvm::errs() << " [ExpandDisabled]"; 223 if (Tok.needsCleaning()) { 224 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 225 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 226 << "']"; 227 } 228 229 llvm::errs() << "\tLoc=<"; 230 DumpLocation(Tok.getLocation()); 231 llvm::errs() << ">"; 232 } 233 234 void Preprocessor::DumpLocation(SourceLocation Loc) const { 235 Loc.dump(SourceMgr); 236 } 237 238 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 239 llvm::errs() << "MACRO: "; 240 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 241 DumpToken(MI.getReplacementToken(i)); 242 llvm::errs() << " "; 243 } 244 llvm::errs() << "\n"; 245 } 246 247 void Preprocessor::PrintStats() { 248 llvm::errs() << "\n*** Preprocessor Stats:\n"; 249 llvm::errs() << NumDirectives << " directives found:\n"; 250 llvm::errs() << " " << NumDefined << " #define.\n"; 251 llvm::errs() << " " << NumUndefined << " #undef.\n"; 252 llvm::errs() << " #include/#include_next/#import:\n"; 253 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 254 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 255 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 256 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 257 llvm::errs() << " " << NumEndif << " #endif.\n"; 258 llvm::errs() << " " << NumPragma << " #pragma.\n"; 259 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 260 261 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 262 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 263 << NumFastMacroExpanded << " on the fast path.\n"; 264 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 265 << " token paste (##) operations performed, " 266 << NumFastTokenPaste << " on the fast path.\n"; 267 268 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 269 270 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 271 llvm::errs() << "\n Macro Expanded Tokens: " 272 << llvm::capacity_in_bytes(MacroExpandedTokens); 273 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 274 // FIXME: List information for all submodules. 275 llvm::errs() << "\n Macros: " 276 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); 277 llvm::errs() << "\n #pragma push_macro Info: " 278 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 279 llvm::errs() << "\n Poison Reasons: " 280 << llvm::capacity_in_bytes(PoisonReasons); 281 llvm::errs() << "\n Comment Handlers: " 282 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 283 } 284 285 Preprocessor::macro_iterator 286 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 287 if (IncludeExternalMacros && ExternalSource && 288 !ReadMacrosFromExternalSource) { 289 ReadMacrosFromExternalSource = true; 290 ExternalSource->ReadDefinedMacros(); 291 } 292 293 // Make sure we cover all macros in visible modules. 294 for (const ModuleMacro &Macro : ModuleMacros) 295 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); 296 297 return CurSubmoduleState->Macros.begin(); 298 } 299 300 size_t Preprocessor::getTotalMemory() const { 301 return BP.getTotalMemory() 302 + llvm::capacity_in_bytes(MacroExpandedTokens) 303 + Predefines.capacity() /* Predefines buffer. */ 304 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, 305 // and ModuleMacros. 306 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) 307 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 308 + llvm::capacity_in_bytes(PoisonReasons) 309 + llvm::capacity_in_bytes(CommentHandlers); 310 } 311 312 Preprocessor::macro_iterator 313 Preprocessor::macro_end(bool IncludeExternalMacros) const { 314 if (IncludeExternalMacros && ExternalSource && 315 !ReadMacrosFromExternalSource) { 316 ReadMacrosFromExternalSource = true; 317 ExternalSource->ReadDefinedMacros(); 318 } 319 320 return CurSubmoduleState->Macros.end(); 321 } 322 323 /// \brief Compares macro tokens with a specified token value sequence. 324 static bool MacroDefinitionEquals(const MacroInfo *MI, 325 ArrayRef<TokenValue> Tokens) { 326 return Tokens.size() == MI->getNumTokens() && 327 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 328 } 329 330 StringRef Preprocessor::getLastMacroWithSpelling( 331 SourceLocation Loc, 332 ArrayRef<TokenValue> Tokens) const { 333 SourceLocation BestLocation; 334 StringRef BestSpelling; 335 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 336 I != E; ++I) { 337 const MacroDirective::DefInfo 338 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); 339 if (!Def || !Def.getMacroInfo()) 340 continue; 341 if (!Def.getMacroInfo()->isObjectLike()) 342 continue; 343 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 344 continue; 345 SourceLocation Location = Def.getLocation(); 346 // Choose the macro defined latest. 347 if (BestLocation.isInvalid() || 348 (Location.isValid() && 349 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 350 BestLocation = Location; 351 BestSpelling = I->first->getName(); 352 } 353 } 354 return BestSpelling; 355 } 356 357 void Preprocessor::recomputeCurLexerKind() { 358 if (CurLexer) 359 CurLexerKind = CLK_Lexer; 360 else if (CurPTHLexer) 361 CurLexerKind = CLK_PTHLexer; 362 else if (CurTokenLexer) 363 CurLexerKind = CLK_TokenLexer; 364 else 365 CurLexerKind = CLK_CachingLexer; 366 } 367 368 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 369 unsigned CompleteLine, 370 unsigned CompleteColumn) { 371 assert(File); 372 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 373 assert(!CodeCompletionFile && "Already set"); 374 375 using llvm::MemoryBuffer; 376 377 // Load the actual file's contents. 378 bool Invalid = false; 379 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 380 if (Invalid) 381 return true; 382 383 // Find the byte position of the truncation point. 384 const char *Position = Buffer->getBufferStart(); 385 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 386 for (; *Position; ++Position) { 387 if (*Position != '\r' && *Position != '\n') 388 continue; 389 390 // Eat \r\n or \n\r as a single line. 391 if ((Position[1] == '\r' || Position[1] == '\n') && 392 Position[0] != Position[1]) 393 ++Position; 394 ++Position; 395 break; 396 } 397 } 398 399 Position += CompleteColumn - 1; 400 401 // If pointing inside the preamble, adjust the position at the beginning of 402 // the file after the preamble. 403 if (SkipMainFilePreamble.first && 404 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 405 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 406 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 407 } 408 409 if (Position > Buffer->getBufferEnd()) 410 Position = Buffer->getBufferEnd(); 411 412 CodeCompletionFile = File; 413 CodeCompletionOffset = Position - Buffer->getBufferStart(); 414 415 std::unique_ptr<MemoryBuffer> NewBuffer = 416 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 417 Buffer->getBufferIdentifier()); 418 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 419 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 420 *NewPos = '\0'; 421 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 422 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 423 424 return false; 425 } 426 427 void Preprocessor::CodeCompleteNaturalLanguage() { 428 if (CodeComplete) 429 CodeComplete->CodeCompleteNaturalLanguage(); 430 setCodeCompletionReached(); 431 } 432 433 /// getSpelling - This method is used to get the spelling of a token into a 434 /// SmallVector. Note that the returned StringRef may not point to the 435 /// supplied buffer if a copy can be avoided. 436 StringRef Preprocessor::getSpelling(const Token &Tok, 437 SmallVectorImpl<char> &Buffer, 438 bool *Invalid) const { 439 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 440 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 441 // Try the fast path. 442 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 443 return II->getName(); 444 } 445 446 // Resize the buffer if we need to copy into it. 447 if (Tok.needsCleaning()) 448 Buffer.resize(Tok.getLength()); 449 450 const char *Ptr = Buffer.data(); 451 unsigned Len = getSpelling(Tok, Ptr, Invalid); 452 return StringRef(Ptr, Len); 453 } 454 455 /// CreateString - Plop the specified string into a scratch buffer and return a 456 /// location for it. If specified, the source location provides a source 457 /// location for the token. 458 void Preprocessor::CreateString(StringRef Str, Token &Tok, 459 SourceLocation ExpansionLocStart, 460 SourceLocation ExpansionLocEnd) { 461 Tok.setLength(Str.size()); 462 463 const char *DestPtr; 464 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 465 466 if (ExpansionLocStart.isValid()) 467 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 468 ExpansionLocEnd, Str.size()); 469 Tok.setLocation(Loc); 470 471 // If this is a raw identifier or a literal token, set the pointer data. 472 if (Tok.is(tok::raw_identifier)) 473 Tok.setRawIdentifierData(DestPtr); 474 else if (Tok.isLiteral()) 475 Tok.setLiteralData(DestPtr); 476 } 477 478 Module *Preprocessor::getCurrentModule() { 479 if (getLangOpts().CurrentModule.empty()) 480 return nullptr; 481 482 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 483 } 484 485 //===----------------------------------------------------------------------===// 486 // Preprocessor Initialization Methods 487 //===----------------------------------------------------------------------===// 488 489 490 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 491 /// which implicitly adds the builtin defines etc. 492 void Preprocessor::EnterMainSourceFile() { 493 // We do not allow the preprocessor to reenter the main file. Doing so will 494 // cause FileID's to accumulate information from both runs (e.g. #line 495 // information) and predefined macros aren't guaranteed to be set properly. 496 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 497 FileID MainFileID = SourceMgr.getMainFileID(); 498 499 // If MainFileID is loaded it means we loaded an AST file, no need to enter 500 // a main file. 501 if (!SourceMgr.isLoadedFileID(MainFileID)) { 502 // Enter the main file source buffer. 503 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 504 505 // If we've been asked to skip bytes in the main file (e.g., as part of a 506 // precompiled preamble), do so now. 507 if (SkipMainFilePreamble.first > 0) 508 CurLexer->SkipBytes(SkipMainFilePreamble.first, 509 SkipMainFilePreamble.second); 510 511 // Tell the header info that the main file was entered. If the file is later 512 // #imported, it won't be re-entered. 513 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 514 HeaderInfo.IncrementIncludeCount(FE); 515 } 516 517 // Preprocess Predefines to populate the initial preprocessor state. 518 std::unique_ptr<llvm::MemoryBuffer> SB = 519 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 520 assert(SB && "Cannot create predefined source buffer"); 521 FileID FID = SourceMgr.createFileID(std::move(SB)); 522 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 523 setPredefinesFileID(FID); 524 525 // Start parsing the predefines. 526 EnterSourceFile(FID, nullptr, SourceLocation()); 527 } 528 529 void Preprocessor::EndSourceFile() { 530 // Notify the client that we reached the end of the source file. 531 if (Callbacks) 532 Callbacks->EndOfMainFile(); 533 } 534 535 //===----------------------------------------------------------------------===// 536 // Lexer Event Handling. 537 //===----------------------------------------------------------------------===// 538 539 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 540 /// identifier information for the token and install it into the token, 541 /// updating the token kind accordingly. 542 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 543 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 544 545 // Look up this token, see if it is a macro, or if it is a language keyword. 546 IdentifierInfo *II; 547 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 548 // No cleaning needed, just use the characters from the lexed buffer. 549 II = getIdentifierInfo(Identifier.getRawIdentifier()); 550 } else { 551 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 552 SmallString<64> IdentifierBuffer; 553 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 554 555 if (Identifier.hasUCN()) { 556 SmallString<64> UCNIdentifierBuffer; 557 expandUCNs(UCNIdentifierBuffer, CleanedStr); 558 II = getIdentifierInfo(UCNIdentifierBuffer); 559 } else { 560 II = getIdentifierInfo(CleanedStr); 561 } 562 } 563 564 // Update the token info (identifier info and appropriate token kind). 565 Identifier.setIdentifierInfo(II); 566 Identifier.setKind(II->getTokenID()); 567 568 return II; 569 } 570 571 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 572 PoisonReasons[II] = DiagID; 573 } 574 575 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 576 assert(Ident__exception_code && Ident__exception_info); 577 assert(Ident___exception_code && Ident___exception_info); 578 Ident__exception_code->setIsPoisoned(Poison); 579 Ident___exception_code->setIsPoisoned(Poison); 580 Ident_GetExceptionCode->setIsPoisoned(Poison); 581 Ident__exception_info->setIsPoisoned(Poison); 582 Ident___exception_info->setIsPoisoned(Poison); 583 Ident_GetExceptionInfo->setIsPoisoned(Poison); 584 Ident__abnormal_termination->setIsPoisoned(Poison); 585 Ident___abnormal_termination->setIsPoisoned(Poison); 586 Ident_AbnormalTermination->setIsPoisoned(Poison); 587 } 588 589 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 590 assert(Identifier.getIdentifierInfo() && 591 "Can't handle identifiers without identifier info!"); 592 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 593 PoisonReasons.find(Identifier.getIdentifierInfo()); 594 if(it == PoisonReasons.end()) 595 Diag(Identifier, diag::err_pp_used_poisoned_id); 596 else 597 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 598 } 599 600 /// \brief Returns a diagnostic message kind for reporting a future keyword as 601 /// appropriate for the identifier and specified language. 602 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 603 const LangOptions &LangOpts) { 604 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 605 606 if (LangOpts.CPlusPlus) 607 return llvm::StringSwitch<diag::kind>(II.getName()) 608 #define CXX11_KEYWORD(NAME, FLAGS) \ 609 .Case(#NAME, diag::warn_cxx11_keyword) 610 #include "clang/Basic/TokenKinds.def" 611 ; 612 613 llvm_unreachable( 614 "Keyword not known to come from a newer Standard or proposed Standard"); 615 } 616 617 /// HandleIdentifier - This callback is invoked when the lexer reads an 618 /// identifier. This callback looks up the identifier in the map and/or 619 /// potentially macro expands it or turns it into a named token (like 'for'). 620 /// 621 /// Note that callers of this method are guarded by checking the 622 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 623 /// IdentifierInfo methods that compute these properties will need to change to 624 /// match. 625 bool Preprocessor::HandleIdentifier(Token &Identifier) { 626 assert(Identifier.getIdentifierInfo() && 627 "Can't handle identifiers without identifier info!"); 628 629 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 630 631 // If the information about this identifier is out of date, update it from 632 // the external source. 633 // We have to treat __VA_ARGS__ in a special way, since it gets 634 // serialized with isPoisoned = true, but our preprocessor may have 635 // unpoisoned it if we're defining a C99 macro. 636 if (II.isOutOfDate()) { 637 bool CurrentIsPoisoned = false; 638 if (&II == Ident__VA_ARGS__) 639 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 640 641 ExternalSource->updateOutOfDateIdentifier(II); 642 Identifier.setKind(II.getTokenID()); 643 644 if (&II == Ident__VA_ARGS__) 645 II.setIsPoisoned(CurrentIsPoisoned); 646 } 647 648 // If this identifier was poisoned, and if it was not produced from a macro 649 // expansion, emit an error. 650 if (II.isPoisoned() && CurPPLexer) { 651 HandlePoisonedIdentifier(Identifier); 652 } 653 654 // If this is a macro to be expanded, do it. 655 if (MacroDefinition MD = getMacroDefinition(&II)) { 656 auto *MI = MD.getMacroInfo(); 657 assert(MI && "macro definition with no macro info?"); 658 if (!DisableMacroExpansion) { 659 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 660 // C99 6.10.3p10: If the preprocessing token immediately after the 661 // macro name isn't a '(', this macro should not be expanded. 662 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 663 return HandleMacroExpandedIdentifier(Identifier, MD); 664 } else { 665 // C99 6.10.3.4p2 says that a disabled macro may never again be 666 // expanded, even if it's in a context where it could be expanded in the 667 // future. 668 Identifier.setFlag(Token::DisableExpand); 669 if (MI->isObjectLike() || isNextPPTokenLParen()) 670 Diag(Identifier, diag::pp_disabled_macro_expansion); 671 } 672 } 673 } 674 675 // If this identifier is a keyword in a newer Standard or proposed Standard, 676 // produce a warning. Don't warn if we're not considering macro expansion, 677 // since this identifier might be the name of a macro. 678 // FIXME: This warning is disabled in cases where it shouldn't be, like 679 // "#define constexpr constexpr", "int constexpr;" 680 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { 681 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) 682 << II.getName(); 683 // Don't diagnose this keyword again in this translation unit. 684 II.setIsFutureCompatKeyword(false); 685 } 686 687 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 688 // then we act as if it is the actual operator and not the textual 689 // representation of it. 690 if (II.isCPlusPlusOperatorKeyword()) 691 Identifier.setIdentifierInfo(nullptr); 692 693 // If this is an extension token, diagnose its use. 694 // We avoid diagnosing tokens that originate from macro definitions. 695 // FIXME: This warning is disabled in cases where it shouldn't be, 696 // like "#define TY typeof", "TY(1) x". 697 if (II.isExtensionToken() && !DisableMacroExpansion) 698 Diag(Identifier, diag::ext_token_used); 699 700 // If this is the 'import' contextual keyword following an '@', note 701 // that the next token indicates a module name. 702 // 703 // Note that we do not treat 'import' as a contextual 704 // keyword when we're in a caching lexer, because caching lexers only get 705 // used in contexts where import declarations are disallowed. 706 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 707 !DisableMacroExpansion && 708 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && 709 CurLexerKind != CLK_CachingLexer) { 710 ModuleImportLoc = Identifier.getLocation(); 711 ModuleImportPath.clear(); 712 ModuleImportExpectsIdentifier = true; 713 CurLexerKind = CLK_LexAfterModuleImport; 714 } 715 return true; 716 } 717 718 void Preprocessor::Lex(Token &Result) { 719 // We loop here until a lex function returns a token; this avoids recursion. 720 bool ReturnedToken; 721 do { 722 switch (CurLexerKind) { 723 case CLK_Lexer: 724 ReturnedToken = CurLexer->Lex(Result); 725 break; 726 case CLK_PTHLexer: 727 ReturnedToken = CurPTHLexer->Lex(Result); 728 break; 729 case CLK_TokenLexer: 730 ReturnedToken = CurTokenLexer->Lex(Result); 731 break; 732 case CLK_CachingLexer: 733 CachingLex(Result); 734 ReturnedToken = true; 735 break; 736 case CLK_LexAfterModuleImport: 737 LexAfterModuleImport(Result); 738 ReturnedToken = true; 739 break; 740 } 741 } while (!ReturnedToken); 742 743 LastTokenWasAt = Result.is(tok::at); 744 } 745 746 747 /// \brief Lex a token following the 'import' contextual keyword. 748 /// 749 void Preprocessor::LexAfterModuleImport(Token &Result) { 750 // Figure out what kind of lexer we actually have. 751 recomputeCurLexerKind(); 752 753 // Lex the next token. 754 Lex(Result); 755 756 // The token sequence 757 // 758 // import identifier (. identifier)* 759 // 760 // indicates a module import directive. We already saw the 'import' 761 // contextual keyword, so now we're looking for the identifiers. 762 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 763 // We expected to see an identifier here, and we did; continue handling 764 // identifiers. 765 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 766 Result.getLocation())); 767 ModuleImportExpectsIdentifier = false; 768 CurLexerKind = CLK_LexAfterModuleImport; 769 return; 770 } 771 772 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 773 // see the next identifier. 774 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 775 ModuleImportExpectsIdentifier = true; 776 CurLexerKind = CLK_LexAfterModuleImport; 777 return; 778 } 779 780 // If we have a non-empty module path, load the named module. 781 if (!ModuleImportPath.empty()) { 782 Module *Imported = nullptr; 783 if (getLangOpts().Modules) { 784 Imported = TheModuleLoader.loadModule(ModuleImportLoc, 785 ModuleImportPath, 786 Module::Hidden, 787 /*IsIncludeDirective=*/false); 788 if (Imported) 789 makeModuleVisible(Imported, ModuleImportLoc); 790 } 791 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport)) 792 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 793 } 794 } 795 796 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { 797 CurSubmoduleState->VisibleModules.setVisible( 798 M, Loc, [](Module *) {}, 799 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { 800 // FIXME: Include the path in the diagnostic. 801 // FIXME: Include the import location for the conflicting module. 802 Diag(ModuleImportLoc, diag::warn_module_conflict) 803 << Path[0]->getFullModuleName() 804 << Conflict->getFullModuleName() 805 << Message; 806 }); 807 808 // Add this module to the imports list of the currently-built submodule. 809 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) 810 BuildingSubmoduleStack.back().M->Imports.insert(M); 811 } 812 813 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 814 const char *DiagnosticTag, 815 bool AllowMacroExpansion) { 816 // We need at least one string literal. 817 if (Result.isNot(tok::string_literal)) { 818 Diag(Result, diag::err_expected_string_literal) 819 << /*Source='in...'*/0 << DiagnosticTag; 820 return false; 821 } 822 823 // Lex string literal tokens, optionally with macro expansion. 824 SmallVector<Token, 4> StrToks; 825 do { 826 StrToks.push_back(Result); 827 828 if (Result.hasUDSuffix()) 829 Diag(Result, diag::err_invalid_string_udl); 830 831 if (AllowMacroExpansion) 832 Lex(Result); 833 else 834 LexUnexpandedToken(Result); 835 } while (Result.is(tok::string_literal)); 836 837 // Concatenate and parse the strings. 838 StringLiteralParser Literal(StrToks, *this); 839 assert(Literal.isAscii() && "Didn't allow wide strings in"); 840 841 if (Literal.hadError) 842 return false; 843 844 if (Literal.Pascal) { 845 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 846 << /*Source='in...'*/0 << DiagnosticTag; 847 return false; 848 } 849 850 String = Literal.GetString(); 851 return true; 852 } 853 854 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 855 assert(Tok.is(tok::numeric_constant)); 856 SmallString<8> IntegerBuffer; 857 bool NumberInvalid = false; 858 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 859 if (NumberInvalid) 860 return false; 861 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 862 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 863 return false; 864 llvm::APInt APVal(64, 0); 865 if (Literal.GetIntegerValue(APVal)) 866 return false; 867 Lex(Tok); 868 Value = APVal.getLimitedValue(); 869 return true; 870 } 871 872 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 873 assert(Handler && "NULL comment handler"); 874 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 875 CommentHandlers.end() && "Comment handler already registered"); 876 CommentHandlers.push_back(Handler); 877 } 878 879 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 880 std::vector<CommentHandler *>::iterator Pos 881 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 882 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 883 CommentHandlers.erase(Pos); 884 } 885 886 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 887 bool AnyPendingTokens = false; 888 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 889 HEnd = CommentHandlers.end(); 890 H != HEnd; ++H) { 891 if ((*H)->HandleComment(*this, Comment)) 892 AnyPendingTokens = true; 893 } 894 if (!AnyPendingTokens || getCommentRetentionState()) 895 return false; 896 Lex(result); 897 return true; 898 } 899 900 ModuleLoader::~ModuleLoader() { } 901 902 CommentHandler::~CommentHandler() { } 903 904 CodeCompletionHandler::~CodeCompletionHandler() { } 905 906 void Preprocessor::createPreprocessingRecord() { 907 if (Record) 908 return; 909 910 Record = new PreprocessingRecord(getSourceManager()); 911 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 912 } 913