1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/DenseMap.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/STLExtras.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/StringSwitch.h"
53 #include "llvm/Support/Capacity.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <memory>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace clang;
65 
66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
67 
68 //===----------------------------------------------------------------------===//
69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
70 
71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
72                            DiagnosticsEngine &diags, LangOptions &opts,
73                            SourceManager &SM, MemoryBufferCache &PCMCache,
74                            HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
75                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
76                            TranslationUnitKind TUKind)
77     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
78       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
79       PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
80       HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
81       ExternalSource(nullptr), Identifiers(opts, IILookup),
82       PragmaHandlers(new PragmaNamespace(StringRef())),
83       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
84       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
85       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
86       CodeCompletionReached(false), CodeCompletionII(nullptr),
87       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
88       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
89       CurLexerSubmodule(nullptr), Callbacks(nullptr),
90       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
91       Record(nullptr), MIChainHead(nullptr) {
92   OwnsHeaderSearch = OwnsHeaders;
93 
94   CounterValue = 0; // __COUNTER__ starts at 0.
95 
96   // Clear stats.
97   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
98   NumIf = NumElse = NumEndif = 0;
99   NumEnteredSourceFiles = 0;
100   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
101   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
102   MaxIncludeStackDepth = 0;
103   NumSkipped = 0;
104 
105   // Default to discarding comments.
106   KeepComments = false;
107   KeepMacroComments = false;
108   SuppressIncludeNotFoundError = false;
109 
110   // Macro expansion is enabled.
111   DisableMacroExpansion = false;
112   MacroExpansionInDirectivesOverride = false;
113   InMacroArgs = false;
114   InMacroArgPreExpansion = false;
115   NumCachedTokenLexers = 0;
116   PragmasEnabled = true;
117   ParsingIfOrElifDirective = false;
118   PreprocessedOutput = false;
119 
120   CachedLexPos = 0;
121 
122   // We haven't read anything from the external source.
123   ReadMacrosFromExternalSource = false;
124 
125   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
126   // This gets unpoisoned where it is allowed.
127   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
128   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
129 
130   // Initialize the pragma handlers.
131   RegisterBuiltinPragmas();
132 
133   // Initialize builtin macros like __LINE__ and friends.
134   RegisterBuiltinMacros();
135 
136   if(LangOpts.Borland) {
137     Ident__exception_info        = getIdentifierInfo("_exception_info");
138     Ident___exception_info       = getIdentifierInfo("__exception_info");
139     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
140     Ident__exception_code        = getIdentifierInfo("_exception_code");
141     Ident___exception_code       = getIdentifierInfo("__exception_code");
142     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
143     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
144     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
145     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
146   } else {
147     Ident__exception_info = Ident__exception_code = nullptr;
148     Ident__abnormal_termination = Ident___exception_info = nullptr;
149     Ident___exception_code = Ident___abnormal_termination = nullptr;
150     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
151     Ident_AbnormalTermination = nullptr;
152   }
153 
154   if (this->PPOpts->GeneratePreamble)
155     PreambleConditionalStack.startRecording();
156 }
157 
158 Preprocessor::~Preprocessor() {
159   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
160 
161   IncludeMacroStack.clear();
162 
163   // Destroy any macro definitions.
164   while (MacroInfoChain *I = MIChainHead) {
165     MIChainHead = I->Next;
166     I->~MacroInfoChain();
167   }
168 
169   // Free any cached macro expanders.
170   // This populates MacroArgCache, so all TokenLexers need to be destroyed
171   // before the code below that frees up the MacroArgCache list.
172   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
173   CurTokenLexer.reset();
174 
175   // Free any cached MacroArgs.
176   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
177     ArgList = ArgList->deallocate();
178 
179   // Delete the header search info, if we own it.
180   if (OwnsHeaderSearch)
181     delete &HeaderInfo;
182 }
183 
184 void Preprocessor::Initialize(const TargetInfo &Target,
185                               const TargetInfo *AuxTarget) {
186   assert((!this->Target || this->Target == &Target) &&
187          "Invalid override of target information");
188   this->Target = &Target;
189 
190   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
191          "Invalid override of aux target information.");
192   this->AuxTarget = AuxTarget;
193 
194   // Initialize information about built-ins.
195   BuiltinInfo.InitializeTarget(Target, AuxTarget);
196   HeaderInfo.setTarget(Target);
197 }
198 
199 void Preprocessor::InitializeForModelFile() {
200   NumEnteredSourceFiles = 0;
201 
202   // Reset pragmas
203   PragmaHandlersBackup = std::move(PragmaHandlers);
204   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
205   RegisterBuiltinPragmas();
206 
207   // Reset PredefinesFileID
208   PredefinesFileID = FileID();
209 }
210 
211 void Preprocessor::FinalizeForModelFile() {
212   NumEnteredSourceFiles = 1;
213 
214   PragmaHandlers = std::move(PragmaHandlersBackup);
215 }
216 
217 void Preprocessor::setPTHManager(PTHManager* pm) {
218   PTH.reset(pm);
219   FileMgr.addStatCache(PTH->createStatCache());
220 }
221 
222 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
223   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
224                << getSpelling(Tok) << "'";
225 
226   if (!DumpFlags) return;
227 
228   llvm::errs() << "\t";
229   if (Tok.isAtStartOfLine())
230     llvm::errs() << " [StartOfLine]";
231   if (Tok.hasLeadingSpace())
232     llvm::errs() << " [LeadingSpace]";
233   if (Tok.isExpandDisabled())
234     llvm::errs() << " [ExpandDisabled]";
235   if (Tok.needsCleaning()) {
236     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
237     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
238                  << "']";
239   }
240 
241   llvm::errs() << "\tLoc=<";
242   DumpLocation(Tok.getLocation());
243   llvm::errs() << ">";
244 }
245 
246 void Preprocessor::DumpLocation(SourceLocation Loc) const {
247   Loc.dump(SourceMgr);
248 }
249 
250 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
251   llvm::errs() << "MACRO: ";
252   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
253     DumpToken(MI.getReplacementToken(i));
254     llvm::errs() << "  ";
255   }
256   llvm::errs() << "\n";
257 }
258 
259 void Preprocessor::PrintStats() {
260   llvm::errs() << "\n*** Preprocessor Stats:\n";
261   llvm::errs() << NumDirectives << " directives found:\n";
262   llvm::errs() << "  " << NumDefined << " #define.\n";
263   llvm::errs() << "  " << NumUndefined << " #undef.\n";
264   llvm::errs() << "  #include/#include_next/#import:\n";
265   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
266   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
267   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
268   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
269   llvm::errs() << "  " << NumEndif << " #endif.\n";
270   llvm::errs() << "  " << NumPragma << " #pragma.\n";
271   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
272 
273   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
274              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
275              << NumFastMacroExpanded << " on the fast path.\n";
276   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
277              << " token paste (##) operations performed, "
278              << NumFastTokenPaste << " on the fast path.\n";
279 
280   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
281 
282   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
283   llvm::errs() << "\n  Macro Expanded Tokens: "
284                << llvm::capacity_in_bytes(MacroExpandedTokens);
285   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
286   // FIXME: List information for all submodules.
287   llvm::errs() << "\n  Macros: "
288                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
289   llvm::errs() << "\n  #pragma push_macro Info: "
290                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
291   llvm::errs() << "\n  Poison Reasons: "
292                << llvm::capacity_in_bytes(PoisonReasons);
293   llvm::errs() << "\n  Comment Handlers: "
294                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
295 }
296 
297 Preprocessor::macro_iterator
298 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
299   if (IncludeExternalMacros && ExternalSource &&
300       !ReadMacrosFromExternalSource) {
301     ReadMacrosFromExternalSource = true;
302     ExternalSource->ReadDefinedMacros();
303   }
304 
305   // Make sure we cover all macros in visible modules.
306   for (const ModuleMacro &Macro : ModuleMacros)
307     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
308 
309   return CurSubmoduleState->Macros.begin();
310 }
311 
312 size_t Preprocessor::getTotalMemory() const {
313   return BP.getTotalMemory()
314     + llvm::capacity_in_bytes(MacroExpandedTokens)
315     + Predefines.capacity() /* Predefines buffer. */
316     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
317     // and ModuleMacros.
318     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
319     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
320     + llvm::capacity_in_bytes(PoisonReasons)
321     + llvm::capacity_in_bytes(CommentHandlers);
322 }
323 
324 Preprocessor::macro_iterator
325 Preprocessor::macro_end(bool IncludeExternalMacros) const {
326   if (IncludeExternalMacros && ExternalSource &&
327       !ReadMacrosFromExternalSource) {
328     ReadMacrosFromExternalSource = true;
329     ExternalSource->ReadDefinedMacros();
330   }
331 
332   return CurSubmoduleState->Macros.end();
333 }
334 
335 /// \brief Compares macro tokens with a specified token value sequence.
336 static bool MacroDefinitionEquals(const MacroInfo *MI,
337                                   ArrayRef<TokenValue> Tokens) {
338   return Tokens.size() == MI->getNumTokens() &&
339       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
340 }
341 
342 StringRef Preprocessor::getLastMacroWithSpelling(
343                                     SourceLocation Loc,
344                                     ArrayRef<TokenValue> Tokens) const {
345   SourceLocation BestLocation;
346   StringRef BestSpelling;
347   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
348        I != E; ++I) {
349     const MacroDirective::DefInfo
350       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
351     if (!Def || !Def.getMacroInfo())
352       continue;
353     if (!Def.getMacroInfo()->isObjectLike())
354       continue;
355     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
356       continue;
357     SourceLocation Location = Def.getLocation();
358     // Choose the macro defined latest.
359     if (BestLocation.isInvalid() ||
360         (Location.isValid() &&
361          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
362       BestLocation = Location;
363       BestSpelling = I->first->getName();
364     }
365   }
366   return BestSpelling;
367 }
368 
369 void Preprocessor::recomputeCurLexerKind() {
370   if (CurLexer)
371     CurLexerKind = CLK_Lexer;
372   else if (CurPTHLexer)
373     CurLexerKind = CLK_PTHLexer;
374   else if (CurTokenLexer)
375     CurLexerKind = CLK_TokenLexer;
376   else
377     CurLexerKind = CLK_CachingLexer;
378 }
379 
380 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
381                                           unsigned CompleteLine,
382                                           unsigned CompleteColumn) {
383   assert(File);
384   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
385   assert(!CodeCompletionFile && "Already set");
386 
387   using llvm::MemoryBuffer;
388 
389   // Load the actual file's contents.
390   bool Invalid = false;
391   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
392   if (Invalid)
393     return true;
394 
395   // Find the byte position of the truncation point.
396   const char *Position = Buffer->getBufferStart();
397   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
398     for (; *Position; ++Position) {
399       if (*Position != '\r' && *Position != '\n')
400         continue;
401 
402       // Eat \r\n or \n\r as a single line.
403       if ((Position[1] == '\r' || Position[1] == '\n') &&
404           Position[0] != Position[1])
405         ++Position;
406       ++Position;
407       break;
408     }
409   }
410 
411   Position += CompleteColumn - 1;
412 
413   // If pointing inside the preamble, adjust the position at the beginning of
414   // the file after the preamble.
415   if (SkipMainFilePreamble.first &&
416       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
417     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
418       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
419   }
420 
421   if (Position > Buffer->getBufferEnd())
422     Position = Buffer->getBufferEnd();
423 
424   CodeCompletionFile = File;
425   CodeCompletionOffset = Position - Buffer->getBufferStart();
426 
427   std::unique_ptr<MemoryBuffer> NewBuffer =
428       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
429                                           Buffer->getBufferIdentifier());
430   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
431   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
432   *NewPos = '\0';
433   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
434   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
435 
436   return false;
437 }
438 
439 void Preprocessor::CodeCompleteNaturalLanguage() {
440   if (CodeComplete)
441     CodeComplete->CodeCompleteNaturalLanguage();
442   setCodeCompletionReached();
443 }
444 
445 /// getSpelling - This method is used to get the spelling of a token into a
446 /// SmallVector. Note that the returned StringRef may not point to the
447 /// supplied buffer if a copy can be avoided.
448 StringRef Preprocessor::getSpelling(const Token &Tok,
449                                           SmallVectorImpl<char> &Buffer,
450                                           bool *Invalid) const {
451   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
452   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
453     // Try the fast path.
454     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
455       return II->getName();
456   }
457 
458   // Resize the buffer if we need to copy into it.
459   if (Tok.needsCleaning())
460     Buffer.resize(Tok.getLength());
461 
462   const char *Ptr = Buffer.data();
463   unsigned Len = getSpelling(Tok, Ptr, Invalid);
464   return StringRef(Ptr, Len);
465 }
466 
467 /// CreateString - Plop the specified string into a scratch buffer and return a
468 /// location for it.  If specified, the source location provides a source
469 /// location for the token.
470 void Preprocessor::CreateString(StringRef Str, Token &Tok,
471                                 SourceLocation ExpansionLocStart,
472                                 SourceLocation ExpansionLocEnd) {
473   Tok.setLength(Str.size());
474 
475   const char *DestPtr;
476   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
477 
478   if (ExpansionLocStart.isValid())
479     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
480                                        ExpansionLocEnd, Str.size());
481   Tok.setLocation(Loc);
482 
483   // If this is a raw identifier or a literal token, set the pointer data.
484   if (Tok.is(tok::raw_identifier))
485     Tok.setRawIdentifierData(DestPtr);
486   else if (Tok.isLiteral())
487     Tok.setLiteralData(DestPtr);
488 }
489 
490 Module *Preprocessor::getCurrentModule() {
491   if (!getLangOpts().isCompilingModule())
492     return nullptr;
493 
494   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
495 }
496 
497 //===----------------------------------------------------------------------===//
498 // Preprocessor Initialization Methods
499 //===----------------------------------------------------------------------===//
500 
501 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
502 /// which implicitly adds the builtin defines etc.
503 void Preprocessor::EnterMainSourceFile() {
504   // We do not allow the preprocessor to reenter the main file.  Doing so will
505   // cause FileID's to accumulate information from both runs (e.g. #line
506   // information) and predefined macros aren't guaranteed to be set properly.
507   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
508   FileID MainFileID = SourceMgr.getMainFileID();
509 
510   // If MainFileID is loaded it means we loaded an AST file, no need to enter
511   // a main file.
512   if (!SourceMgr.isLoadedFileID(MainFileID)) {
513     // Enter the main file source buffer.
514     EnterSourceFile(MainFileID, nullptr, SourceLocation());
515 
516     // If we've been asked to skip bytes in the main file (e.g., as part of a
517     // precompiled preamble), do so now.
518     if (SkipMainFilePreamble.first > 0)
519       CurLexer->SkipBytes(SkipMainFilePreamble.first,
520                           SkipMainFilePreamble.second);
521 
522     // Tell the header info that the main file was entered.  If the file is later
523     // #imported, it won't be re-entered.
524     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
525       HeaderInfo.IncrementIncludeCount(FE);
526   }
527 
528   // Preprocess Predefines to populate the initial preprocessor state.
529   std::unique_ptr<llvm::MemoryBuffer> SB =
530     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
531   assert(SB && "Cannot create predefined source buffer");
532   FileID FID = SourceMgr.createFileID(std::move(SB));
533   assert(FID.isValid() && "Could not create FileID for predefines?");
534   setPredefinesFileID(FID);
535 
536   // Start parsing the predefines.
537   EnterSourceFile(FID, nullptr, SourceLocation());
538 }
539 
540 void Preprocessor::replayPreambleConditionalStack() {
541   // Restore the conditional stack from the preamble, if there is one.
542   if (PreambleConditionalStack.isReplaying()) {
543     assert(CurPPLexer &&
544            "CurPPLexer is null when calling replayPreambleConditionalStack.");
545     CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
546     PreambleConditionalStack.doneReplaying();
547   }
548 }
549 
550 void Preprocessor::EndSourceFile() {
551   // Notify the client that we reached the end of the source file.
552   if (Callbacks)
553     Callbacks->EndOfMainFile();
554 }
555 
556 //===----------------------------------------------------------------------===//
557 // Lexer Event Handling.
558 //===----------------------------------------------------------------------===//
559 
560 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
561 /// identifier information for the token and install it into the token,
562 /// updating the token kind accordingly.
563 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
564   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
565 
566   // Look up this token, see if it is a macro, or if it is a language keyword.
567   IdentifierInfo *II;
568   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
569     // No cleaning needed, just use the characters from the lexed buffer.
570     II = getIdentifierInfo(Identifier.getRawIdentifier());
571   } else {
572     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
573     SmallString<64> IdentifierBuffer;
574     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
575 
576     if (Identifier.hasUCN()) {
577       SmallString<64> UCNIdentifierBuffer;
578       expandUCNs(UCNIdentifierBuffer, CleanedStr);
579       II = getIdentifierInfo(UCNIdentifierBuffer);
580     } else {
581       II = getIdentifierInfo(CleanedStr);
582     }
583   }
584 
585   // Update the token info (identifier info and appropriate token kind).
586   Identifier.setIdentifierInfo(II);
587   if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
588       getSourceManager().isInSystemHeader(Identifier.getLocation()))
589     Identifier.setKind(clang::tok::identifier);
590   else
591     Identifier.setKind(II->getTokenID());
592 
593   return II;
594 }
595 
596 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
597   PoisonReasons[II] = DiagID;
598 }
599 
600 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
601   assert(Ident__exception_code && Ident__exception_info);
602   assert(Ident___exception_code && Ident___exception_info);
603   Ident__exception_code->setIsPoisoned(Poison);
604   Ident___exception_code->setIsPoisoned(Poison);
605   Ident_GetExceptionCode->setIsPoisoned(Poison);
606   Ident__exception_info->setIsPoisoned(Poison);
607   Ident___exception_info->setIsPoisoned(Poison);
608   Ident_GetExceptionInfo->setIsPoisoned(Poison);
609   Ident__abnormal_termination->setIsPoisoned(Poison);
610   Ident___abnormal_termination->setIsPoisoned(Poison);
611   Ident_AbnormalTermination->setIsPoisoned(Poison);
612 }
613 
614 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
615   assert(Identifier.getIdentifierInfo() &&
616          "Can't handle identifiers without identifier info!");
617   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
618     PoisonReasons.find(Identifier.getIdentifierInfo());
619   if(it == PoisonReasons.end())
620     Diag(Identifier, diag::err_pp_used_poisoned_id);
621   else
622     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
623 }
624 
625 /// \brief Returns a diagnostic message kind for reporting a future keyword as
626 /// appropriate for the identifier and specified language.
627 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
628                                           const LangOptions &LangOpts) {
629   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
630 
631   if (LangOpts.CPlusPlus)
632     return llvm::StringSwitch<diag::kind>(II.getName())
633 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
634         .Case(#NAME, diag::warn_cxx11_keyword)
635 #define CXX2A_KEYWORD(NAME, FLAGS)                                             \
636         .Case(#NAME, diag::warn_cxx2a_keyword)
637 #include "clang/Basic/TokenKinds.def"
638         ;
639 
640   llvm_unreachable(
641       "Keyword not known to come from a newer Standard or proposed Standard");
642 }
643 
644 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
645   assert(II.isOutOfDate() && "not out of date");
646   getExternalSource()->updateOutOfDateIdentifier(II);
647 }
648 
649 /// HandleIdentifier - This callback is invoked when the lexer reads an
650 /// identifier.  This callback looks up the identifier in the map and/or
651 /// potentially macro expands it or turns it into a named token (like 'for').
652 ///
653 /// Note that callers of this method are guarded by checking the
654 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
655 /// IdentifierInfo methods that compute these properties will need to change to
656 /// match.
657 bool Preprocessor::HandleIdentifier(Token &Identifier) {
658   assert(Identifier.getIdentifierInfo() &&
659          "Can't handle identifiers without identifier info!");
660 
661   IdentifierInfo &II = *Identifier.getIdentifierInfo();
662 
663   // If the information about this identifier is out of date, update it from
664   // the external source.
665   // We have to treat __VA_ARGS__ in a special way, since it gets
666   // serialized with isPoisoned = true, but our preprocessor may have
667   // unpoisoned it if we're defining a C99 macro.
668   if (II.isOutOfDate()) {
669     bool CurrentIsPoisoned = false;
670     if (&II == Ident__VA_ARGS__)
671       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
672 
673     updateOutOfDateIdentifier(II);
674     Identifier.setKind(II.getTokenID());
675 
676     if (&II == Ident__VA_ARGS__)
677       II.setIsPoisoned(CurrentIsPoisoned);
678   }
679 
680   // If this identifier was poisoned, and if it was not produced from a macro
681   // expansion, emit an error.
682   if (II.isPoisoned() && CurPPLexer) {
683     HandlePoisonedIdentifier(Identifier);
684   }
685 
686   // If this is a macro to be expanded, do it.
687   if (MacroDefinition MD = getMacroDefinition(&II)) {
688     auto *MI = MD.getMacroInfo();
689     assert(MI && "macro definition with no macro info?");
690     if (!DisableMacroExpansion) {
691       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
692         // C99 6.10.3p10: If the preprocessing token immediately after the
693         // macro name isn't a '(', this macro should not be expanded.
694         if (!MI->isFunctionLike() || isNextPPTokenLParen())
695           return HandleMacroExpandedIdentifier(Identifier, MD);
696       } else {
697         // C99 6.10.3.4p2 says that a disabled macro may never again be
698         // expanded, even if it's in a context where it could be expanded in the
699         // future.
700         Identifier.setFlag(Token::DisableExpand);
701         if (MI->isObjectLike() || isNextPPTokenLParen())
702           Diag(Identifier, diag::pp_disabled_macro_expansion);
703       }
704     }
705   }
706 
707   // If this identifier is a keyword in a newer Standard or proposed Standard,
708   // produce a warning. Don't warn if we're not considering macro expansion,
709   // since this identifier might be the name of a macro.
710   // FIXME: This warning is disabled in cases where it shouldn't be, like
711   //   "#define constexpr constexpr", "int constexpr;"
712   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
713     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
714         << II.getName();
715     // Don't diagnose this keyword again in this translation unit.
716     II.setIsFutureCompatKeyword(false);
717   }
718 
719   // If this is an extension token, diagnose its use.
720   // We avoid diagnosing tokens that originate from macro definitions.
721   // FIXME: This warning is disabled in cases where it shouldn't be,
722   // like "#define TY typeof", "TY(1) x".
723   if (II.isExtensionToken() && !DisableMacroExpansion)
724     Diag(Identifier, diag::ext_token_used);
725 
726   // If this is the 'import' contextual keyword following an '@', note
727   // that the next token indicates a module name.
728   //
729   // Note that we do not treat 'import' as a contextual
730   // keyword when we're in a caching lexer, because caching lexers only get
731   // used in contexts where import declarations are disallowed.
732   //
733   // Likewise if this is the C++ Modules TS import keyword.
734   if (((LastTokenWasAt && II.isModulesImport()) ||
735        Identifier.is(tok::kw_import)) &&
736       !InMacroArgs && !DisableMacroExpansion &&
737       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
738       CurLexerKind != CLK_CachingLexer) {
739     ModuleImportLoc = Identifier.getLocation();
740     ModuleImportPath.clear();
741     ModuleImportExpectsIdentifier = true;
742     CurLexerKind = CLK_LexAfterModuleImport;
743   }
744   return true;
745 }
746 
747 void Preprocessor::Lex(Token &Result) {
748   // We loop here until a lex function returns a token; this avoids recursion.
749   bool ReturnedToken;
750   do {
751     switch (CurLexerKind) {
752     case CLK_Lexer:
753       ReturnedToken = CurLexer->Lex(Result);
754       break;
755     case CLK_PTHLexer:
756       ReturnedToken = CurPTHLexer->Lex(Result);
757       break;
758     case CLK_TokenLexer:
759       ReturnedToken = CurTokenLexer->Lex(Result);
760       break;
761     case CLK_CachingLexer:
762       CachingLex(Result);
763       ReturnedToken = true;
764       break;
765     case CLK_LexAfterModuleImport:
766       LexAfterModuleImport(Result);
767       ReturnedToken = true;
768       break;
769     }
770   } while (!ReturnedToken);
771 
772   if (Result.is(tok::code_completion))
773     setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
774 
775   LastTokenWasAt = Result.is(tok::at);
776 }
777 
778 /// \brief Lex a token following the 'import' contextual keyword.
779 ///
780 void Preprocessor::LexAfterModuleImport(Token &Result) {
781   // Figure out what kind of lexer we actually have.
782   recomputeCurLexerKind();
783 
784   // Lex the next token.
785   Lex(Result);
786 
787   // The token sequence
788   //
789   //   import identifier (. identifier)*
790   //
791   // indicates a module import directive. We already saw the 'import'
792   // contextual keyword, so now we're looking for the identifiers.
793   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
794     // We expected to see an identifier here, and we did; continue handling
795     // identifiers.
796     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
797                                               Result.getLocation()));
798     ModuleImportExpectsIdentifier = false;
799     CurLexerKind = CLK_LexAfterModuleImport;
800     return;
801   }
802 
803   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
804   // see the next identifier. (We can also see a '[[' that begins an
805   // attribute-specifier-seq here under the C++ Modules TS.)
806   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
807     ModuleImportExpectsIdentifier = true;
808     CurLexerKind = CLK_LexAfterModuleImport;
809     return;
810   }
811 
812   // If we have a non-empty module path, load the named module.
813   if (!ModuleImportPath.empty()) {
814     // Under the Modules TS, the dot is just part of the module name, and not
815     // a real hierarachy separator. Flatten such module names now.
816     //
817     // FIXME: Is this the right level to be performing this transformation?
818     std::string FlatModuleName;
819     if (getLangOpts().ModulesTS) {
820       for (auto &Piece : ModuleImportPath) {
821         if (!FlatModuleName.empty())
822           FlatModuleName += ".";
823         FlatModuleName += Piece.first->getName();
824       }
825       SourceLocation FirstPathLoc = ModuleImportPath[0].second;
826       ModuleImportPath.clear();
827       ModuleImportPath.push_back(
828           std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
829     }
830 
831     Module *Imported = nullptr;
832     if (getLangOpts().Modules) {
833       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
834                                             ModuleImportPath,
835                                             Module::Hidden,
836                                             /*IsIncludeDirective=*/false);
837       if (Imported)
838         makeModuleVisible(Imported, ModuleImportLoc);
839     }
840     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
841       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
842   }
843 }
844 
845 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
846   CurSubmoduleState->VisibleModules.setVisible(
847       M, Loc, [](Module *) {},
848       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
849         // FIXME: Include the path in the diagnostic.
850         // FIXME: Include the import location for the conflicting module.
851         Diag(ModuleImportLoc, diag::warn_module_conflict)
852             << Path[0]->getFullModuleName()
853             << Conflict->getFullModuleName()
854             << Message;
855       });
856 
857   // Add this module to the imports list of the currently-built submodule.
858   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
859     BuildingSubmoduleStack.back().M->Imports.insert(M);
860 }
861 
862 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
863                                           const char *DiagnosticTag,
864                                           bool AllowMacroExpansion) {
865   // We need at least one string literal.
866   if (Result.isNot(tok::string_literal)) {
867     Diag(Result, diag::err_expected_string_literal)
868       << /*Source='in...'*/0 << DiagnosticTag;
869     return false;
870   }
871 
872   // Lex string literal tokens, optionally with macro expansion.
873   SmallVector<Token, 4> StrToks;
874   do {
875     StrToks.push_back(Result);
876 
877     if (Result.hasUDSuffix())
878       Diag(Result, diag::err_invalid_string_udl);
879 
880     if (AllowMacroExpansion)
881       Lex(Result);
882     else
883       LexUnexpandedToken(Result);
884   } while (Result.is(tok::string_literal));
885 
886   // Concatenate and parse the strings.
887   StringLiteralParser Literal(StrToks, *this);
888   assert(Literal.isAscii() && "Didn't allow wide strings in");
889 
890   if (Literal.hadError)
891     return false;
892 
893   if (Literal.Pascal) {
894     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
895       << /*Source='in...'*/0 << DiagnosticTag;
896     return false;
897   }
898 
899   String = Literal.GetString();
900   return true;
901 }
902 
903 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
904   assert(Tok.is(tok::numeric_constant));
905   SmallString<8> IntegerBuffer;
906   bool NumberInvalid = false;
907   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
908   if (NumberInvalid)
909     return false;
910   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
911   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
912     return false;
913   llvm::APInt APVal(64, 0);
914   if (Literal.GetIntegerValue(APVal))
915     return false;
916   Lex(Tok);
917   Value = APVal.getLimitedValue();
918   return true;
919 }
920 
921 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
922   assert(Handler && "NULL comment handler");
923   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
924          CommentHandlers.end() && "Comment handler already registered");
925   CommentHandlers.push_back(Handler);
926 }
927 
928 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
929   std::vector<CommentHandler *>::iterator Pos
930   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
931   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
932   CommentHandlers.erase(Pos);
933 }
934 
935 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
936   bool AnyPendingTokens = false;
937   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
938        HEnd = CommentHandlers.end();
939        H != HEnd; ++H) {
940     if ((*H)->HandleComment(*this, Comment))
941       AnyPendingTokens = true;
942   }
943   if (!AnyPendingTokens || getCommentRetentionState())
944     return false;
945   Lex(result);
946   return true;
947 }
948 
949 ModuleLoader::~ModuleLoader() { }
950 
951 CommentHandler::~CommentHandler() { }
952 
953 CodeCompletionHandler::~CodeCompletionHandler() { }
954 
955 void Preprocessor::createPreprocessingRecord() {
956   if (Record)
957     return;
958 
959   Record = new PreprocessingRecord(getSourceManager());
960   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
961 }
962